#!/bin/sh
              #
              # SccsId[] = "@(#)biggest.sh 1.9 05/22/07 (List 'biggest' files in filesystem)"
              #
              #----------------------------------------------------------------------#
              #                              biggest.sh                              #
              # -------------------------------------------------------------------- #
              #                                                                      #
              #   Copyright (c) 1995-2007 by Bob Orlando.  All rights reserved.      #
              #                                                                      #
              #   Permission to use, copy, modify and distribute this software       #
              #   and its documentation for any purpose and without fee is hereby    #
              #   granted, provided that the above copyright notice appear in all    #
              #   copies, and that both the copyright notice and this permission     #
              #   notice appear in supporting documentation, and that the name of    #
              #   Bob Orlando not be used in advertising or publicity pertaining     #
              #   to distribution of the software without specific, written prior    #
              #   permission.  Bob Orlando makes no representations about the        #
              #   suitability of this software for any purpose.  It is provided      #
              #   "as is" without express or implied warranty.                       #
              #                                                                      #
              #   BOB ORLANDO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS           #
              #   SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY      #
              #   AND FITNESS.  IN NO EVENT SHALL BOB ORLANDO BE LIABLE FOR ANY      #
              #   SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES          #
              #   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER    #
              #   IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,     #
              #   ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF     #
              #   THIS SOFTWARE.                                                     #
              #                                                                      #
              # -------------------------------------------------------------------- #
              #        Program documentation and notes located at the bottom.        #
              #----------------------------------------------------------------------#

                #----------------------------------------------------------------#
                # Script_name assignment is necessary if there exists the        #
                # possibility that this process may be run by the 'at' command.  #
                # Run via 'at' and $0 simply returns '/bin/sh' or 'sh' (hardly   #
                # desirable if you run that into basename).                      #
                #----------------------------------------------------------------#
                script_name="biggest.sh"
                [ $0 = "/bin/sh" -o `dirname $0` = "." ] \
                  && script_home=`pwd` || script_home=`dirname $0`
                bin=/usr/bin # Default
                moi=`expr "\`id\`" : 'uid=[0-9]*(\(.*\)) .*'`

                #----------------------------------------------------------------#
                # Do our best to find and assign $AWK executable variable.       #
                #----------------------------------------------------------------#
                   { [ -x /usr/bin/nawk     ] && AWK=/usr/bin/nawk     ; } \
                || { [ -x /bin/nawk         ] && AWK=/bin/nawk         ; } \
                || { [ -x /usr/bin/gawk     ] && AWK=/usr/bin/gawk     ; } \
                || { [ -x /bin/gawk         ] && AWK=/bin/gawk         ; } \
                || { [ -x /usr/gnu/bin/gawk ] && AWK=/usr/gnu/bin/gawk ; } \
                || { [ -x /usr/bin/awk      ] && AWK=/usr/bin/awk      ; } \
                || { [ -x /bin/awk          ] && AWK=/bin/awk          ; }

              #======================================================================#
              #                    L O C A L    F U N C T I O N S                    #
              #                       (in alphabetical order)                        #
              #----------------------------------------------------------------------#
              EXIT_USAGE()
              #----------------------------------------------------------------------#
              {
                echo "Usage: biggest.sh -fHh -l <nn> -s <nnn> -t <dir> -v fs\n" 1>&2
                echo "                  -f = follow links"                      1>&2
                echo "                  -H = Full documentation"                1>&2
                echo "                  -h = Usage brief"                       1>&2
                echo "                  -l = Displays <nn> lines"               1>&2
                echo "                  -s = Minimum file size is <nnn>"        1>&2
                echo "                  -t = Temp/work directory, <dir>"        1>&2
                echo "                  -v = Edit (vi) file list"               1>&2
                echo "                  fs = Required filesystem argument."     1>&2
                echo ""                                                         1>&2
                exit 1
              }

              #----------------------------------------------------------------------#
              SHOW_DOCUMENTATION() # Function documentation located at bottom.       #
              #----------------------------------------------------------------------#
              {
                #----------------------------------------------------------------#
                # If the following variables are not set, use these as defaults. #
                #----------------------------------------------------------------#
                : ${script_name:=`basename $0`}
                : ${script_home:=`dirname  $0`}
                SD_script_home=`echo $script_home | sed 's/\/*$/\//'`

                #------------------------------------------------#
                # User wants help, so find the documentation     #
                # section and print everything from there down.  #
                #------------------------------------------------#
                $AWK -v script_name=$script_name \
                  'BEGIN { n=0 }

                   { #------------------------------------------#
                     # Until we find the documentation section, #
                     # keep looking at each line.               #
                     #------------------------------------------#
                     if (n == 0)
                     {
                       if ($0 ~ /^# +D O C U M E N T A T I O N/)
                       {
                         n = NR
                         print line
                         print $0
                       }
                       else
                       {
                         line = $0
                       }

                       next
                     }    #-------------------------------------#
                     else # Once we find it, print until EOF.   #
                     {    #-------------------------------------#
                       print
                     }
                   }

                   END {
                         if (n == 0) # Means there is no documentation section.
                         {
                          "date +%Y-%m-%d" | getline yyyy_mm_dd
                           print yyyy_mm_dd" NO DOCUMENTATION",
                             "section found for "script_name".\a" | "cat 1>&2"
                           exit 1 # Exit failure
                         }
                         exit 0 # Else exit success
                       }' ${SD_script_home}$script_name

                exit $?
              } # "SD_" prefix identifies this function's variables


              #======================================================================#
              #                     I N I T I A L I Z A T I O N                      #
              #======================================================================#
                opt_v=0 # Default 'vi' option (0 = Do NOT vi the file list)
                tmp=/var/tmp
                follow=""
                size="499999" # Default minimum filesize
                lines="500"   # Default maximum lines

                while getopts fHhl:s:t:v opt 2> /dev/null
                do
                   case "$opt" in
                      f ) follow='-follow'  ;;
                      H ) SHOW_DOCUMENTATION;;
                      h ) EXIT_USAGE        ;;
                      l ) lines="$OPTARG"   ;; # Max number of lines to display.
                      s ) size="$OPTARG"    ;; # Minimum file size.
                      t ) tmp="$OPTARG"     ;; # Temp directory (if /var/tmp full)
                      v ) opt_v=1           ;;
                      * ) echo "Ignoring invalid option, $1.";;
                   esac
                done
                #----------------------------------#
                # Shift past options to arguments. #
                #----------------------------------#
                shift `expr $OPTIND - 1`


              #======================================================================#
              #                                M A I N                               #
              #======================================================================#

                [ $# -eq 0 ] && EXIT_USAGE

                #-----------------------------------------------------------------#
                # Ensure we have write-access to temp/work directory.             #
                #-----------------------------------------------------------------#
                if [ ! -d $tmp ]; then
                   echo "Temp/work directory, $tmp not found!" \
                        "\n$script_name terminated."
                   exit 1
                elif [ ! -w $tmp ]; then
                   echo "No write access to temp/work directory, $tmp!" \
                        "\n$script_name terminated."
                   exit 1
                fi

                #----------------------------------------------------------------#
                # File lists of remote filesystems is problematic, so we limit   #
                # our operations to local filesystems only.                      #
                #----------------------------------------------------------------#
                df -lk $1
                if [ $? -ne 0 ]; then
                   echo "$1 MUST be a local filesystem--it is not!" \
                        "\n$script_name terminated."
                   exit 1
                fi

                #----------------------------------------------------------------#
                # Build a 'find' command with the necessary options/arguments.   #
                # Be sure to exclude anything with cdrom in it and include -xdev #
                # -xdev if the filesystem being searched is root (/).            #
                #----------------------------------------------------------------#
                outfile=$tmp/$moi"_biggest.files" # Formatted 'find' output
                include='-size +'"$size"'c -exec ls -lc {} \;'
                exclude='-o -fstype nfs -prune -o -name cdrom\* -prune'
                [ ."$1" = ."/" ] && find_opt="-xdev $follow" || find_opt="$follow"

                date "+%D %T"
                find_cmd="find $1 $find_opt $include $exclude -print"

                #----------------------------------------------------------------#
                # Display find command before running it.  Use [gn]awk to format #
                # the output and sort it in descending order (biggest on top).   #
                #----------------------------------------------------------------#
                echo "$find_cmd 2> /dev/null | $AWK"
                eval  $find_cmd 2> /dev/null | $AWK \
                  'BEGIN \
                   {
                     i   = 0
                     own = 3
                     siz = 5
                     mmm = 6
                     day = 7
                     yyy = 8 # This may actually be yyyy or hh:mi
                     Mon = "^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)$"
                   }
                # "! /^[bcd]/" skips block and character devices and directories
                   ! /^[bcd]/ \
                   {
                     #-----------------------------------------------------------#
                     # If it looks like owner and group fields are concatonated, #
                     # try backing up the field ($n) list and work with that.    #
                     #-----------------------------------------------------------#
                     if ($siz !~ /[0-9]+/ && !match($mmm,Mon))
                     {
                       if ($(siz-1) ~ /[0-9]+/ && match($(mmm-1),Mon))
                       {
                         siz = 4 # 4th field
                         mmm = 5 # Etc.
                         day = 6
                         yyy = 7
                       }
                     }

                     gsub(/[\t ]+/," ")  # Squeeze whitespace.
                     gsub(/./,"& ",$siz) # Isolate each digit,
                     q=split($siz,a," ") #   then split the $siz into an array.
                     $siz=""             # Clear $siz.
                     for (p=1;q>0;q--)   # Insert commas into $siz.
                     {
                       $siz=a[q]""$siz
                       if ((p%3) == 0 && q != 1) $siz=","$siz # Insert commas here
                       p++
                     }

                     printf("%17s %-8s %s %02d %-5s %s\n",
                       $siz, $own, $mmm, $day, $yyy, $NF)

                     #-----------------------------------------------#
                     # if size value is not 5, then reset it, et al. #
                     #-----------------------------------------------#
                     if (siz != 5)
                     {
                       siz = 5
                       mmm = 6
                       day = 7
                       yyy = 8
                     }
                   }' | sort -r -k 1,2 | head -$lines > $outfile

                #----------------------------------------------------------------#
                # Unless 'vi' option was given, simply cat our file list.        #
                #----------------------------------------------------------------#
                if [ `wc -l < $outfile` -eq 0 ]; then
                   echo "No files found in $1 > $size bytes in size."
                else
                   [ $opt_v -eq 0 ] && cat $outfile || vi $outfile
                fi

                exit $?


              #======================================================================#
              #                      D O C U M E N T A T I O N                       #
              #======================================================================#
              #                                                                      #
              #      Author: Bob Orlando (Bob@OrlandoKuntao.com)                     #
              #                                                                      #
              #        Date: April 8, 1995                                           #
              #                                                                      #
              #  Program ID: biggest.sh                                              #
              #                                                                      #
              # Code Contrl: aphrodite:~dmc/SCCS.                                    #
              #                                                                      #
              #       Usage: biggest.sh -fHh -l <nn> -v -t <dir> -s <nnn> fs         #
              #                                                                      #
              #                         -f = Follow links                            #
              #                         -H = Displays detailed documentation         #
              #                         -h = Provides usage brief                    #
              #                         -l = Displays <nn> lines (default is 500)    #
              #                         -s = Minimum file size is <nnn>              #
              #                              (default is 500K)                       #
              #                         -t = Use <dir> as temp/work directory        #
              #                              (default is /var/tmp)                   #
              #                         -v = Edit (vi) file list                     #
              #                         fs = Required filesystem argument.           #
              #                                                                      #
              #     Purpose: List biggest files in a given filesystem (files         #
              #              appear in descending order).                            #
              #                                                                      #
              # Description: Using the find command, descend through the specified   #
              #              file system (fs) listing all files whose sizes exceed   #
              #              either the default minimum size (500K) or the minimum   #
              #              value provided via size (-s) option.  The filelist      #
              #              is created in /var/tmp by default as it usually much    #
              #              larger than /tmp.  However, in the event that /var      #
              #              is the filesystem that's full (or is not writable to    #
              #              the user), the temp dir (-t) option is available to     #
              #              redirect the output elsewhere.                          #
              #                                                                      #
              #              When root is the directory being searched, -xdev is     #
              #              supplied as a find argument so only root, and none      #
              #              of its subdirectories, is searched.                     #
              #                                                                      #
              #              With the 'vi' option (-v) the user can edit the         #
              #              normally cat'd file list.                               #
              #                                                                      #
              #    Modified: 2007-05-22 Bob Orlando                                  #
              #                v1.9   * Change $LOGNAME to $moi to reduce issues     #
              #                         that can pop up when the user runs the       #
              #                         script su'd as root one time, then run       #
              #                         as the general user the next.                #
              #                                                                      #
              #----------------------------------------------------------------------#
            
Artificial Intelligence is no match for natural stupidity.
©Copyright Bob Orlando, 2003-2011
All rights reserved.
http://www.OrlandoKuntao.com
E-mail: Bob@OrlandoKuntao.com
Last update: Jan. 17, 2011
by Bob Orlando