#!/usr/bin/perl =head1 NAME splitfs - split directory trees into files with total size limits =head1 SYNOPSIS B I I [I ... ] =head1 DESCRIPTION The perl script recursively looks at all the files and directories in each of the I directories. It collects one or more of these file/directory-names and writes them into report files as long as the total byte count of all of them in each report file stays below the I given as the first command line argument. The file names of the report files (which are either truncated or created for writing) are derived from the name of the perl script by appending three numerical letters; the first report file is splitfs000, the second splitfs001 and so on. The total number of report files created depends on I (the larger I the smaller the number of files, and the larger the number of files listed in the report files), on the total number of directories and files in the assembly I's, and on the (average) number of bytes in the individual files in the I. If directories (ie the files that contain their file names and inodes) or files contain more than I bytes, their names are not put into any of the report files. A diagnostic error is written to stderr for each of those. The program does not optimize the aggregation into the report files as to bring the total byte count in each of those as close as possible to I. It will just run recursively through the directories in the order mentioned on the command line, and skip to a new report file as soon as the next entry met would overrun the total number of bytes represented by the files already enlisted in the current report file. =head1 EXAMPLE B 800000 ./bin /tmp/mydir creates files splitfs000, splitfs001 and so on where each of the splitfs??? files contains file names (one per line) ./bin* and/or /tmp/mydir* and the total byte count of the files mentioned in splitfs000, splitfs001 and so on is less than 800 kBytes. =head1 CAUTIONS The total number of report files will not be larger than 1000 (the last one baptized splitfs999). Files within the I directories that are symbolic links to other files will be counted with the full size of the file they point to, and be listed in the report files (unless this full size is larger than I). The program does not follow symbolic links to directories found within the I directories. The total number of bytes reserved for files in blocked modes (as with du(1M) or dd(1) or tar(1)) is larger, which is not considered here. =head1 AUTHOR Richard J. Mathar, I Feb. 8, 2001 =cut use IO::File ; $gotbytes = 0 ; $follow = 0 ; # don't follow symbolic links to directories $outfisuff = $0."000" ; # start outfile names as splitfs0, then proceed with splitfs1 etc if ( @ARGV < 2 ) # simple syntax check { print "usage: $0 [ ... ]\n" ; exit ; } $wantbytes = $ARGV[0] ; # maximum number of bytes in each assembly if ( $wantbytes =~ m/\D/ ) # another syntax check: $wantbytes must only contain digits 0-9 { print "$0: $wantbytes not a positive number\n" ; exit ; } shift( @ARGV ) ; # remove number of bytes from ARGV $outfh = new IO::File ; open($outfh,">$outfisuff") ; foreach $dir ( @ARGV ) # one directory entry at a time { ($gotbytes,$outfh) = recurdir($gotbytes,$wantbytes,$dir,$outfh) ; } close($outfh) ; exit ; # Run recursively through directories # Don't follow symbolic links that are directories, but list symbolic links to files # and count their 'size' (ie, the size of the file they point to) against the bytes.. sub recurdir () { my ($gotb, $wantb, $dir, $outfh) = @_ ; my $entr ; opendir(d,$dir) ; my @fils = readdir(d) ; # get all files in the directory @fils = nodots(@fils) ; # remove the parent directory from the listing foreach $entr ( @fils) { my $fullfils = $dir . "/" . $entr ; if ( -d $fullfils and $entr ne "." and ( $follow == 1 or ! -l $fullfils ) ) { # if file is a dirctory and either the 'follow' flag is set or the directory isn't a symbolic link: # then recursive call ($gotb,$outfh) = recurdir ($gotb,$wantb,$fullfils,$outfh) ; } else { # either a plain file or the $dir/. directory itself my $filsby = -s $fullfils ; # how large is it? if ( $filsby > $wantb ) # doesn't fit in any list of size $wantb { print STDERR "skipping $fullfils with $filsby bytes\n" ; } else { if ( $gotb + $filsby > $wantb ) # doesn't fit in current list { # close old file, open new file with "next" name close($outfh) ; $outfisuff++ ; open($outfh,">$outfisuff") ; $gotb = 0 ; } print $outfh "$fullfils\n"; $gotb += $filsby ; } } } closedir(d) ; return ($gotb,$outfh) ; } # Remove the '..' entries from the directory listing sub nodots () { # print "on entry",@_,"\n" ; # print "size ",scalar(@_),"\n" ; for($indx =0 ; $indx < scalar(@_) ; ) { # if ( @_[$indx] eq "." || @_[$indx] eq ".." ) if ( $_[$indx] eq ".." ) { splice(@_,$indx,1) ; } else { $indx++ ; } } return (@_) ; }