Code listing below.  Click here for program description.

You can cut-n-paste the code below or you can
access an ASCII text version of the code (which you can Save As...) by clicking here.


#!/bin/sh
#
# SccsId[] = "@(#)biggest.sh 1.9 05/22/07 (List 'biggest' files in filesystem)"
#
#----------------------------------------------------------------------#
#                              biggest.sh                              #
# -------------------------------------------------------------------- #
#                                                                      #
#   Copyright (c) 1995-2007 by Bob Orlando.  All rights reserved.      #
#                                                                      #
#   Permission to use, copy, modify and distribute this software       #
#   and its documentation for any purpose and without fee is hereby    #
#   granted, provided that the above copyright notice appear in all    #
#   copies, and that both the copyright notice and this permission     #
#   notice appear in supporting documentation, and that the name of    #
#   Bob Orlando not be used in advertising or publicity pertaining     #
#   to distribution of the software without specific, written prior    #
#   permission.  Bob Orlando makes no representations about the        #
#   suitability of this software for any purpose.  It is provided      #
#   "as is" without express or implied warranty.                       #
#                                                                      #
#   BOB ORLANDO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS           #
#   SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY      #
#   AND FITNESS.  IN NO EVENT SHALL BOB ORLANDO BE LIABLE FOR ANY      #
#   SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES          #
#   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER    #
#   IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,     #
#   ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF     #
#   THIS SOFTWARE.                                                     #
#                                                                      #
# -------------------------------------------------------------------- #
#        Program documentation and notes located at the bottom.        #
#----------------------------------------------------------------------#

  #----------------------------------------------------------------#
  # Script_name assignment is necessary if there exists the        #
  # possibility that this process may be run by the 'at' command.  #
  # Run via 'at' and $0 simply returns '/bin/sh' or 'sh' (hardly   #
  # desirable if you run that into basename).                      #
  #----------------------------------------------------------------#
  script_name="biggest.sh"
  [ $0 = "/bin/sh" -o `dirname $0` = "." ] \
    && script_home=`pwd` || script_home=`dirname $0`
  bin=/usr/bin # Default
  moi=`expr "\`id\`" : 'uid=[0-9]*(\(.*\)) .*'`

  #----------------------------------------------------------------#
  # Do our best to find and assign $AWK executable variable.       #
  #----------------------------------------------------------------#
     { [ -x /usr/bin/nawk     ] && AWK=/usr/bin/nawk     ; } \
  || { [ -x /bin/nawk         ] && AWK=/bin/nawk         ; } \
  || { [ -x /usr/bin/gawk     ] && AWK=/usr/bin/gawk     ; } \
  || { [ -x /bin/gawk         ] && AWK=/bin/gawk         ; } \
  || { [ -x /usr/gnu/bin/gawk ] && AWK=/usr/gnu/bin/gawk ; } \
  || { [ -x /usr/bin/awk      ] && AWK=/usr/bin/awk      ; } \
  || { [ -x /bin/awk          ] && AWK=/bin/awk          ; }

#======================================================================#
#                    L O C A L    F U N C T I O N S                    #
#                       (in alphabetical order)                        #
#----------------------------------------------------------------------#
EXIT_USAGE()
#----------------------------------------------------------------------#
{
  echo "Usage: biggest.sh -fHh -l <nn> -s <nnn> -t <dir> -v fs\n" 1>&2
  echo "                  -f = follow links"                      1>&2
  echo "                  -H = Full documentation"                1>&2
  echo "                  -h = Usage brief"                       1>&2
  echo "                  -l = Displays <nn> lines"               1>&2
  echo "                  -s = Minimum file size is <nnn>"        1>&2
  echo "                  -t = Temp/work directory, <dir>"        1>&2
  echo "                  -v = Edit (vi) file list"               1>&2
  echo "                  fs = Required filesystem argument."     1>&2
  echo ""                                                         1>&2
  exit 1
}

#----------------------------------------------------------------------#
SHOW_DOCUMENTATION() # Function documentation located at bottom.       #
#----------------------------------------------------------------------#
{
  #----------------------------------------------------------------#
  # If the following variables are not set, use these as defaults. #
  #----------------------------------------------------------------#
  : ${script_name:=`basename $0`}
  : ${script_home:=`dirname  $0`}
  SD_script_home=`echo $script_home | sed 's/\/*$/\//'`

  #------------------------------------------------#
  # User wants help, so find the documentation     #
  # section and print everything from there down.  #
  #------------------------------------------------#
  $AWK -v script_name=$script_name \
    'BEGIN { n=0 }

     { #------------------------------------------#
       # Until we find the documentation section, #
       # keep looking at each line.               #
       #------------------------------------------#
       if (n == 0)
       {
         if ($0 ~ /^# +D O C U M E N T A T I O N/)
         {
           n = NR
           print line
           print $0
         }
         else
         {
           line = $0
         }

         next
       }    #-------------------------------------#
       else # Once we find it, print until EOF.   #
       {    #-------------------------------------#
         print
       }
     }

     END {
           if (n == 0) # Means there is no documentation section.
           {
            "date +%Y-%m-%d" | getline yyyy_mm_dd
             print yyyy_mm_dd" NO DOCUMENTATION",
               "section found for "script_name".\a" | "cat 1>&2"
             exit 1 # Exit failure
           }
           exit 0 # Else exit success
         }' ${SD_script_home}$script_name

  exit $?
} # "SD_" prefix identifies this function's variables


#======================================================================#
#                     I N I T I A L I Z A T I O N                      #
#======================================================================#
  opt_v=0 # Default 'vi' option (0 = Do NOT vi the file list)
  tmp=/var/tmp
  follow=""
  size="499999" # Default minimum filesize
  lines="500"   # Default maximum lines

  while getopts fHhl:s:t:v opt 2> /dev/null
  do
     case "$opt" in
        f ) follow='-follow'  ;;
        H ) SHOW_DOCUMENTATION;;
        h ) EXIT_USAGE        ;;
        l ) lines="$OPTARG"   ;; # Max number of lines to display.
        s ) size="$OPTARG"    ;; # Minimum file size.
        t ) tmp="$OPTARG"     ;; # Temp directory (if /var/tmp full)
        v ) opt_v=1           ;;
        * ) echo "Ignoring invalid option, $1.";;
     esac
  done
  #----------------------------------#
  # Shift past options to arguments. #
  #----------------------------------#
  shift `expr $OPTIND - 1`


#======================================================================#
#                                M A I N                               #
#======================================================================#

  [ $# -eq 0 ] && EXIT_USAGE

  #-----------------------------------------------------------------#
  # Ensure we have write-access to temp/work directory.             #
  #-----------------------------------------------------------------#
  if [ ! -d $tmp ]; then
     echo "Temp/work directory, $tmp not found!" \
          "\n$script_name terminated."
     exit 1
  elif [ ! -w $tmp ]; then
     echo "No write access to temp/work directory, $tmp!" \
          "\n$script_name terminated."
     exit 1
  fi

  #----------------------------------------------------------------#
  # File lists of remote filesystems is problematic, so we limit   #
  # our operations to local filesystems only.                      #
  #----------------------------------------------------------------#
  df -lk $1
  if [ $? -ne 0 ]; then
     echo "$1 MUST be a local filesystem--it is not!" \
          "\n$script_name terminated."
     exit 1
  fi

  #----------------------------------------------------------------#
  # Build a 'find' command with the necessary options/arguments.   #
  # Be sure to exclude anything with cdrom in it and include -xdev #
  # -xdev if the filesystem being searched is root (/).            #
  #----------------------------------------------------------------#
  outfile=$tmp/$moi"_biggest.files" # Formatted 'find' output
  include='-size +'"$size"'c -exec ls -lc {} \;'
  exclude='-o -fstype nfs -prune -o -name cdrom\* -prune'
  [ ."$1" = ."/" ] && find_opt="-xdev $follow" || find_opt="$follow"

  date "+%D %T"
  find_cmd="find $1 $find_opt $include $exclude -print"

  #----------------------------------------------------------------#
  # Display find command before running it.  Use [gn]awk to format #
  # the output and sort it in descending order (biggest on top).   #
  #----------------------------------------------------------------#
  echo "$find_cmd 2> /dev/null | $AWK"
  eval  $find_cmd 2> /dev/null | $AWK \
    'BEGIN \
     {
       i   = 0
       own = 3
       siz = 5
       mmm = 6
       day = 7
       yyy = 8 # This may actually be yyyy or hh:mi
       Mon = "^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)$"
     }
  # "! /^[bcd]/" skips block and character devices and directories
     ! /^[bcd]/ \
     {
       #-----------------------------------------------------------#
       # If it looks like owner and group fields are concatonated, #
       # try backing up the field ($n) list and work with that.    #
       #-----------------------------------------------------------#
       if ($siz !~ /[0-9]+/ && !match($mmm,Mon))
       {
         if ($(siz-1) ~ /[0-9]+/ && match($(mmm-1),Mon))
         {
           siz = 4 # 4th field
           mmm = 5 # Etc.
           day = 6
           yyy = 7
         }
       }

       gsub(/[\t ]+/," ")  # Squeeze whitespace.
       gsub(/./,"& ",$siz) # Isolate each digit,
       q=split($siz,a," ") #   then split the $siz into an array.
       $siz=""             # Clear $siz.
       for (p=1;q>0;q--)   # Insert commas into $siz.
       {
         $siz=a[q]""$siz
         if ((p%3) == 0 && q != 1) $siz=","$siz # Insert commas here
         p++
       }

       printf("%17s %-8s %s %02d %-5s %s\n",
         $siz, $own, $mmm, $day, $yyy, $NF)

       #-----------------------------------------------#
       # if size value is not 5, then reset it, et al. #
       #-----------------------------------------------#
       if (siz != 5)
       {
         siz = 5
         mmm = 6
         day = 7
         yyy = 8
       }
     }' | sort -r -k 1,2 | head -$lines > $outfile

  #----------------------------------------------------------------#
  # Unless 'vi' option was given, simply cat our file list.        #
  #----------------------------------------------------------------#
  if [ `wc -l < $outfile` -eq 0 ]; then
     echo "No files found in $1 > $size bytes in size."
  else
     [ $opt_v -eq 0 ] && cat $outfile || vi $outfile
  fi

  exit $?


#======================================================================#
#                      D O C U M E N T A T I O N                       #
#======================================================================#
#                                                                      #
#      Author: Bob Orlando (Bob@OrlandoKuntao.com)                     #
#                                                                      #
#        Date: April 8, 1995                                           #
#                                                                      #
#  Program ID: biggest.sh                                              #
#                                                                      #
# Code Contrl: aphrodite:~dmc/SCCS.                                    #
#                                                                      #
#       Usage: biggest.sh -fHh -l <nn> -v -t <dir> -s <nnn> fs         #
#                                                                      #
#                         -f = Follow links                            #
#                         -H = Displays detailed documentation         #
#                         -h = Provides usage brief                    #
#                         -l = Displays <nn> lines (default is 500)    #
#                         -s = Minimum file size is <nnn>              #
#                              (default is 500K)                       #
#                         -t = Use <dir> as temp/work directory        #
#                              (default is /var/tmp)                   #
#                         -v = Edit (vi) file list                     #
#                         fs = Required filesystem argument.           #
#                                                                      #
#     Purpose: List biggest files in a given filesystem (files         #
#              appear in descending order).                            #
#                                                                      #
# Description: Using the find command, descend through the specified   #
#              file system (fs) listing all files whose sizes exceed   #
#              either the default minimum size (500K) or the minimum   #
#              value provided via size (-s) option.  The filelist      #
#              is created in /var/tmp by default as it usually much    #
#              larger than /tmp.  However, in the event that /var      #
#              is the filesystem that's full (or is not writable to    #
#              the user), the temp dir (-t) option is available to     #
#              redirect the output elsewhere.                          #
#                                                                      #
#              When root is the directory being searched, -xdev is     #
#              supplied as a find argument so only root, and none      #
#              of its subdirectories, is searched.                     #
#                                                                      #
#              With the 'vi' option (-v) the user can edit the         #
#              normally cat'd file list.                               #
#                                                                      #
#    Modified: 2007-05-22 Bob Orlando                                  #
#                v1.9   * Change $LOGNAME to $moi to reduce issues     #
#                         that can pop up when the user runs the       #
#                         script su'd as root one time, then run       #
#                         as the general user the next.                #
#                                                                      #
#----------------------------------------------------------------------#

©Copyright Bob Orlando, 2003.
All rights reserved.
http://www.OrlandoKuntao.com
E-mail: Bob@OrlandoKuntao.com
Last update:  Sep. 16, 2007
by Bob Orlando