#!/usr/bin/env python2 import os.path import os import glob import fnmatch import re import zipfile from functools import partial import itertools import tarfile from optparse import OptionParser magic_check = re.compile('[*?[]') # Stolen from glob sizePostfixes = ( (re.compile("kb?$", re.I), 1024), (re.compile("mb?$", re.I), 1024 ** 2), (re.compile("gb?$", re.I), 1024 ** 3), (re.compile("tb?$", re.I), 1024 ** 4), ) def excludeFilesize(excludeSize, filename): if os.path.getsize(filename) >= excludeSize: return True else: return False fmatch = lambda pattern, filename: fnmatch.fnmatch(filename, pattern) class SCompress(object): def __init__(self, dirglob=os.path.join("/home", "*", "public_html"), fileexcluders=None, direxcluders=None, zfile=None): if not fileexcluders: fileexcluders = [] if not direxcluders: direxcluders = [] if not zfile or not isinstance(zfile, (zipfile.ZipFile, tarfile.TarFile)): raise TypeError, "zfile must be a ZipFile or TarFile object" self.dirglob = dirglob self.fileexcluders = fileexcluders self.direxcluders = direxcluders self.zfile = zfile if isinstance(self.zfile, zipfile.ZipFile): self.addfunc = self.zfile.write elif isinstance(self.zfile, tarfile.TarFile): self.addfunc = self.zfile.add def getFiles(self): magic = magic_check.search(self.dirglob) if magic: prefixlen = self.dirglob.rfind(os.sep, 0, magic.start())+1 or magic.start() else: prefixlen = 0 paths = glob.glob(self.dirglob) for path in paths: for top, dirs, files in os.walk(path): files = [f for f in files if not any([excluder(os.path.join(top, f)) for excluder in self.fileexcluders])] files = [f for f in files if os.path.isfile(os.path.join(top, f))] # only do real files for d in dirs: if any([excluder(d) for excluder in self.direxcluders]): dirs.remove(d) for f in files: yield os.path.join(top, f), os.path.join(top[prefixlen:], f) def addFiles(self): for fname, arcname in self.getFiles(): self.addfunc(fname, arcname) def buildOptParser(): usage = "%prog [options] output-file" parser = OptionParser(usage=usage) parser.add_option("-e", "--exclude-extensions", action="store", type="string", dest="ext_excludes", help="don't archive files with the specified extensions, colon separated list of extensions to exclude (example: -e \"mp3:mpg\")") parser.add_option("--exclude-dirs", action="store", type="string", dest="dir_excludes", help="exclude dirs with the specified shell patterns, colon separated") parser.add_option("--exclude-files", action="store", type="string", dest="file_excludes", help="exclude files with the specified shell patterns, colon separated") parser.add_option("--exclude-size", action="store", type="string", dest="exclude_size", help="exclude files who's filesize is atleast the specified size") parser.add_option("--glob", action="store", type="string", dest="glob", default=os.path.join("/home", "*", "public_html"), help="use the following glob to determine what files to add to the archive") return parser def main(): parser = buildOptParser() options, args = parser.parse_args() if len(args) < 1: parser.error("too few arguments") fexcludes = [] dexcludes = [] tmp = options.ext_excludes.split(":") if isinstance(options.ext_excludes, basestring) else [] for e in tmp: fexcludes.append(partial(fmatch, "*.%s" % e.lstrip("."))) tmp = options.file_excludes.split(":") if isinstance(options.file_excludes, basestring) else [] for e in tmp: fexcludes.append(partial(fmatch, e)) if options.exclude_size: if options.exclude_size.isdigit(): fexcludes.append(partial(excludeFilesize, int(options.exclude_size))) elif options.exclude_size.rstrip("b").isdigit(): fexcludes.append(partial(excludeFilesize, int(options.exclude_size.rstrip("b")))) else: worked = False for r, num in sizePostfixes: m = r.search(options.exclude_size) if m and options.exclude_size[:m.start()].isdigit(): fexcludes.append(partial(excludeFilesize, int(options.exclude_size[:m.start()]) * num)) worked = True if not worked: parser.error("%s doesn't seem to be a valid filesize" % options.exclude_size) tmp = options.dir_excludes.split(":") if isinstance(options.dir_excludes, basestring) else [] for e in tmp: dexcludes.append(partial(fmatch, e)) filename = args[0] fmappings = {"tar.gz": (tarfile.open, "w:gz"), "tar.bz2": (tarfile.open, "w:bz2"), "zip": (zipfile.ZipFile, "w")} fknown = False f = None for ext in fmappings: if filename.endswith(ext): fknown = True f = fmappings[ext][0](filename, fmappings[ext][1]) if not fknown: parser.error("don't know how to write %s files, use an extension of .zip, .tar.bz2 or .tar.gz" % os.path.splitext(filename)[1]) c = SCompress(options.glob, fexcludes, dexcludes, f) c.addFiles() f.close() if __name__ == "__main__": main()