#!/usr/bin/env python """ This script generates a file tree with large and old files. The size and last access time of the files are determined by drawing random numbers (i.e., the files have random sizes and random dates). """ import os, time, random, sys def _fillfile(filename, nKbytes): """Make a file of size nKbytes kilo bytes and with name filename.""" f = open(filename, 'w') # we fill the file with a NumPy array of the right size, i.e., # we dump the binary representation of the array: n = nKbytes*1000/8 from numpy import zeros a = zeros(n, 'd') f.write(a.tostring()) f.close() # note that this more straightforward dump of whitespace to a file, # for i in range(nKbytes*1000): f.write(" ") # is *extremely* slow def _lastaccessed(filename, ndays): """Set the date of a file with name filename to ndays back in time.""" tm = time.time() - 60*60*24*ndays # time in seconds since the epoch os.utime(filename, (tm, tm)) def _filenamegenerator(): """Generate a filename.""" return os.path.join(os.curdir,"tmpf-"+str(random.randrange(1,1000000))) def _makefile(): """ Make a file or directory with probability 0.75 and 0.25, resp. The name of the file or directory is generated by _filenamegenerator(). The size is random (uniformly distributed) between 5Kb and 10Mb. The last date of access is random (uniformly distributed) between 0 and 360 days in the past. Return (ftype, name), where ftype is 'dir' or 'file' and name is the name of the directory or file. """ if random.uniform(0,1) > 0.25: # generate a file: name = _filenamegenerator() minsize = 5 # 5Kb is minimum filesize maxsize = 10001 # 10001 Kb = 10 Mb is maximum filesize _fillfile(name, random.randrange(minsize,maxsize)) # set date: maxage = 361 # max age of file in days _lastaccessed(name, random.randrange(0,maxage)) return ('file', name) else: # generate a directory: name = _filenamegenerator() os.mkdir(name) return 'dir', name def maketree(max_leaves=10, max_files_in_leef=10): """ Call makefile() n (random number between 1 and max_files_in_leef) times and take a chdir each time makefile generates a dir. Jump to one of the created dirs and repeat m (random number between 1 and max_leaves) times. Note that the number of files and directories generated by this function is random. """ dirs = [os.getcwd()] nfiles = 0 m = random.randrange(1, max_leaves) for i in range(m): n = random.randrange(1, max_files_in_leef) for j in range(n): tp, name = _makefile() if tp == 'dir': dirs.append(os.path.join(os.getcwd(), name)) os.chdir(name) print "dir:", name else: print " file: %s %dKb" % \ (name, os.path.getsize(name)/1000) nfiles += 1 # pick a random dir: dir = dirs[random.randrange(0,len(dirs))] os.chdir(dir) print "moved to", dir print "generated %d files and %d directories" % (nfiles, len(dirs)) if __name__ == '__main__': # The random will generate different sequences of random numbers # every time this script is run, resulting in different file # trees, unless we fix the seed: random.seed(1243) # (This fixing of the random number sequence is required if # the script is used to generate file trees for regression testing.) try: option = sys.argv[1] except: print "Usage: %s [ -f ] directory" % sys.argv[0] print "-f forces directory to be removed before a new is created" sys.exit(1) rmtree = 0 # remove filetree? if option == '-f': rmtree = 1 root = sys.argv[2] # 2nd arg. is the directory name else: # 1st arg. is the directory name root = option if os.path.isdir(root): if rmtree: import shutil shutil.rmtree(root) else: print "%s exists; it must be removed before %s can make "\ "a new directory" % (root,sys.argv[0]) sys.exit(1) os.mkdir(root) os.chdir(root) maketree(7,5)