Add a cache around getpwuid/getgrgid to avoid repeated calls.
[cumulus.git] / cumulus-util
index ce80ff6..1608b63 100755 (executable)
@@ -12,28 +12,14 @@ sys.path.append(os.path.join(script_directory, 'python'))
 
 import cumulus
 
-# Compatibility
-lbs = cumulus
-
-# We support up to "LBS Snapshot v0.8" formats, but are also limited by the lbs
-# module.
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 8))
+# We support up to "Cumulus Snapshot v0.11" formats, but are also limited by
+# the cumulus module.
+FORMAT_VERSION = min(cumulus.FORMAT_VERSION, (0, 11))
 
 def check_version(format):
-    ver = lbs.parse_metadata_version(format)
+    ver = cumulus.parse_metadata_version(format)
     if ver > FORMAT_VERSION:
-        raise RuntimeError("Unsupported LBS format: " + format)
-
-parser = OptionParser(usage="%prog [option]... command [arg]...")
-parser.add_option("-v", action="store_true", dest="verbose", default=False,
-                  help="increase verbosity")
-parser.add_option("--store", dest="store",
-                  help="specify path to backup data store")
-parser.add_option("--localdb", dest="localdb",
-                  help="specify path to local database")
-parser.add_option("--intent", dest="intent", default=1.0,
-                  help="give expected next snapshot type when cleaning")
-(options, args) = parser.parse_args(sys.argv[1:])
+        raise RuntimeError("Unsupported Cumulus format: " + format)
 
 # Read a passphrase from the user and store it in the LBS_GPG_PASSPHRASE
 # environment variable.
@@ -42,20 +28,22 @@ def get_passphrase():
     if not os.environ.has_key(ENV_KEY):
         os.environ[ENV_KEY] = getpass.getpass()
 
-# Delete old snapshots from the local database, though do not actually schedule
-# any segment cleaning.
-# Syntax: $0 --localdb=LOCALDB prune-db
-def cmd_prune_db():
-    db = lbs.LocalDatabase(options.localdb)
+def cmd_prune_db(args):
+    """ Delete old snapshots from the local database, though do not
+        actually schedule any segment cleaning.
+        Syntax: $0 --localdb=LOCALDB prune-db
+    """
+    db = cumulus.LocalDatabase(options.localdb)
 
     # Delete old snapshots from the local database.
     #db.garbage_collect()
     #db.commit()
 
-# Run the segment cleaner.
-# Syntax: $0 --localdb=LOCALDB clean
-def cmd_clean(clean_threshold=7.0):
-    db = lbs.LocalDatabase(options.localdb)
+def cmd_clean(args, clean_threshold=7.0):
+    """ Run the segment cleaner.
+        Syntax: $0 --localdb=LOCALDB clean
+    """
+    db = cumulus.LocalDatabase(options.localdb)
 
     # Delete old snapshots from the local database.
     intent = float(options.intent)
@@ -73,21 +61,27 @@ def cmd_clean(clean_threshold=7.0):
     db.balance_expired_objects()
     db.commit()
 
-# List snapshots stored.
-# Syntax: $0 --data=DATADIR list-snapshots
-def cmd_list_snapshots():
-    store = lbs.LowlevelDataStore(options.store)
+def cmd_list_snapshots(args):
+    """ List snapshots stored.
+        Syntax: $0 --data=DATADIR list-snapshots
+    """
+    store = cumulus.LowlevelDataStore(options.store)
     for s in sorted(store.list_snapshots()):
         print s
 
-# List size of data needed for each snapshot.
-# Syntax: $0 --data=DATADIR list-snapshot-sizes
-def cmd_list_snapshot_sizes():
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
+def cmd_list_snapshot_sizes(args):
+    """ List size of data needed for each snapshot.
+        Syntax: $0 --data=DATADIR list-snapshot-sizes
+    """
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    lowlevel.scan()
+    store = cumulus.ObjectStore(lowlevel)
     previous = set()
+    exts = {}
+    for seg in lowlevel.store.list('segments'):
+        exts.update ([seg.split ('.', 1)])
     for s in sorted(lowlevel.list_snapshots()):
-        d = lbs.parse_full(store.load_snapshot(s))
+        d = cumulus.parse_full(store.load_snapshot(s))
         check_version(d['Format'])
 
         try:
@@ -96,51 +90,89 @@ def cmd_list_snapshot_sizes():
             intent = 1.0
 
         segments = d['Segments'].split()
-        (size, added, removed) = (0, 0, 0)
+        (size, added, removed, addcount, remcount) = (0, 0, 0, 0, 0)
+        lo_stat = lowlevel.lowlevel_stat
         for seg in segments:
-            segsize = lowlevel.lowlevel_stat(seg + ".tar.gpg")['size']
+            segsize = lo_stat('.'.join ((seg, exts[seg])))['size']
             size += segsize
-            if seg not in previous: added += segsize
+            if seg not in previous:
+                added += segsize
+                addcount += 1
         for seg in previous:
             if seg not in segments:
-                removed += lowlevel.lowlevel_stat(seg + ".tar.gpg")['size']
+                removed += lo_stat('.'.join((seg, exts[seg])))['size']
+                remcount += 1
         previous = set(segments)
-        print "%s [%s]: %.3f +%.3f -%.3f" % (s, intent, size / 1024.0**2, added / 1024.0**2, removed / 1024.0**2)
+        print "%s [%s]: %.3f +%.3f -%.3f (+%d/-%d segments)" % (s, intent, size / 1024.0**2, added / 1024.0**2, removed / 1024.0**2, addcount, remcount)
+
+def cmd_garbage_collect(args):
+    """ Search for any files which are not needed by any current
+        snapshots and offer to delete them.
+        Syntax: $0 --store=DATADIR gc
+    """
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    lowlevel.scan()
+    store = cumulus.ObjectStore(lowlevel)
+    snapshots = set(lowlevel.list_snapshots())
+    segments = set()
+    for s in snapshots:
+        d = cumulus.parse_full(store.load_snapshot(s))
+        check_version(d['Format'])
+        segments.update(d['Segments'].split())
+
+    referenced = snapshots.union(segments)
+    reclaimed = 0
+    for (t, r) in cumulus.store.type_patterns.items():
+        for f in lowlevel.store.list(t):
+            m = r.match(f)
+            if m is None or m.group(1) not in referenced:
+                print "Garbage:", (t, f)
+                reclaimed += lowlevel.store.stat(t, f)['size']
+                if not options.dry_run:
+                    lowlevel.store.delete(t, f)
+    print "Reclaimed space:", reclaimed
+
+cmd_gc = cmd_garbage_collect
 
-# Build checksum list for objects in the given segments, or all segments if
-# none are specified.
 def cmd_object_checksums(segments):
+    """ Build checksum list for objects in the given segments, or all
+        segments if none are specified.
+    """
     get_passphrase()
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    store = cumulus.ObjectStore(lowlevel)
     if len(segments) == 0:
         segments = sorted(lowlevel.list_segments())
     for s in segments:
         for (o, data) in store.load_segment(s):
-            csum = lbs.ChecksumCreator().update(data).compute()
+            csum = cumulus.ChecksumCreator().update(data).compute()
             print "%s/%s:%d:%s" % (s, o, len(data), csum)
     store.cleanup()
+object_sums = cmd_object_checksums
 
-# Read a snapshot file
 def cmd_read_snapshots(snapshots):
+    """ Read a snapshot file
+    """
     get_passphrase()
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    store = cumulus.ObjectStore(lowlevel)
     for s in snapshots:
-        d = lbs.parse_full(store.load_snapshot(s))
+        d = cumulus.parse_full(store.load_snapshot(s))
         check_version(d['Format'])
         print d
         print d['Segments'].split()
     store.cleanup()
 
-# Produce a flattened metadata dump from a snapshot
-def cmd_read_metadata(snapshot):
+def cmd_read_metadata(args):
+    """ Produce a flattened metadata dump from a snapshot
+    """
+    snapshot = args [0]
     get_passphrase()
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
-    d = lbs.parse_full(store.load_snapshot(snapshot))
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    store = cumulus.ObjectStore(lowlevel)
+    d = cumulus.parse_full(store.load_snapshot(snapshot))
     check_version(d['Format'])
-    metadata = lbs.read_metadata(store, d['Root'])
+    metadata = cumulus.read_metadata(store, d['Root'])
     blank = True
     for l in metadata:
         if l == '\n':
@@ -151,22 +183,23 @@ def cmd_read_metadata(snapshot):
         sys.stdout.write(l)
     store.cleanup()
 
-# Verify snapshot integrity
 def cmd_verify_snapshots(snapshots):
+    """ Verify snapshot integrity
+    """
     get_passphrase()
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    store = cumulus.ObjectStore(lowlevel)
     for s in snapshots:
-        lbs.accessed_segments.clear()
+        cumulus.accessed_segments.clear()
         print "#### Snapshot", s
-        d = lbs.parse_full(store.load_snapshot(s))
+        d = cumulus.parse_full(store.load_snapshot(s))
         check_version(d['Format'])
         print "## Root:", d['Root']
-        metadata = lbs.iterate_metadata(store, d['Root'])
+        metadata = cumulus.iterate_metadata(store, d['Root'])
         for m in metadata:
             if m.fields['type'] not in ('-', 'f'): continue
             print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
-            verifier = lbs.ChecksumVerifier(m.fields['checksum'])
+            verifier = cumulus.ChecksumVerifier(m.fields['checksum'])
             size = 0
             for block in m.data():
                 data = store.get(block)
@@ -181,20 +214,21 @@ def cmd_verify_snapshots(snapshots):
         # actually accurate: covered all segments that were really read, and
         # doesn't contain duplicates.
         listed_segments = set(d['Segments'].split())
-        if lbs.accessed_segments - listed_segments:
+        if cumulus.accessed_segments - listed_segments:
             print "Error: Some segments not listed in descriptor!"
-            print sorted(list(lbs.accessed_segments - listed_segments))
-        if listed_segments - lbs.accessed_segments :
+            print sorted(list(cumulus.accessed_segments - listed_segments))
+        if listed_segments - cumulus.accessed_segments :
             print "Warning: Extra unused segments listed in descriptor!"
-            print sorted(list(listed_segments - lbs.accessed_segments))
+            print sorted(list(listed_segments - cumulus.accessed_segments))
     store.cleanup()
 
-# Restore a snapshot, or some subset of files from it
 def cmd_restore_snapshot(args):
+    """ Restore a snapshot, or some subset of files from it
+    """
     get_passphrase()
-    lowlevel = lbs.LowlevelDataStore(options.store)
-    store = lbs.ObjectStore(lowlevel)
-    snapshot = lbs.parse_full(store.load_snapshot(args[0]))
+    lowlevel = cumulus.LowlevelDataStore(options.store)
+    store = cumulus.ObjectStore(lowlevel)
+    snapshot = cumulus.parse_full(store.load_snapshot(args[0]))
     check_version(snapshot['Format'])
     destdir = args[1]
     paths = args[2:]
@@ -217,7 +251,7 @@ def cmd_restore_snapshot(args):
     metadata_items = []
     metadata_paths = {}
     metadata_segments = {}
-    for m in lbs.iterate_metadata(store, snapshot['Root']):
+    for m in cumulus.iterate_metadata(store, snapshot['Root']):
         pathname = os.path.normpath(m.items.name)
         while os.path.isabs(pathname):
             pathname = pathname[1:]
@@ -234,7 +268,7 @@ def cmd_restore_snapshot(args):
             metadata_paths[pathname] = m
             for block in m.data():
                 (segment, object, checksum, slice) \
-                    = lbs.ObjectStore.parse_ref(block)
+                    = cumulus.ObjectStore.parse_ref(block)
                 if segment not in metadata_segments:
                     metadata_segments[segment] = set()
                 metadata_segments[segment].add(pathname)
@@ -254,7 +288,7 @@ def cmd_restore_snapshot(args):
         destpath = os.path.join(destdir, pathname)
 
         file = open(destpath, 'wb')
-        verifier = lbs.ChecksumVerifier(m.items.checksum)
+        verifier = cumulus.ChecksumVerifier(m.items.checksum)
         size = 0
         for block in m.data():
             data = store.get(block)
@@ -341,29 +375,32 @@ def cmd_restore_snapshot(args):
 
     store.cleanup()
 
+usage = ["%prog [option]... command [arg]...", "", "Commands:"]
+cmd = method = None
+for cmd, method in locals().iteritems():
+    if cmd.startswith ('cmd_'):
+        usage.append(cmd[4:].replace('_', '-') + ':' + method.__doc__)
+parser = OptionParser(usage="\n".join(usage))
+parser.add_option("-v", action="store_true", dest="verbose", default=False,
+                  help="increase verbosity")
+parser.add_option("-n", action="store_true", dest="dry_run", default=False,
+                  help="dry run")
+parser.add_option("--store", dest="store",
+                  help="specify path to backup data store")
+parser.add_option("--localdb", dest="localdb",
+                  help="specify path to local database")
+parser.add_option("--intent", dest="intent", default=1.0,
+                  help="give expected next snapshot type when cleaning")
+(options, args) = parser.parse_args(sys.argv[1:])
+
 if len(args) == 0:
     parser.print_usage()
     sys.exit(1)
 cmd = args[0]
 args = args[1:]
-if cmd == 'clean':
-    cmd_clean()
-elif cmd == 'prune-db':
-    cmd_prune_db()
-elif cmd == 'list-snapshots':
-    cmd_list_snapshots()
-elif cmd == 'object-sums':
-    cmd_object_checksums(args)
-elif cmd == 'read-snapshots':
-    cmd_read_snapshots(args)
-elif cmd == 'read-metadata':
-    cmd_read_metadata(args[0])
-elif cmd == 'list-snapshot-sizes':
-    cmd_list_snapshot_sizes()
-elif cmd == 'verify-snapshots':
-    cmd_verify_snapshots(args)
-elif cmd == 'restore-snapshot':
-    cmd_restore_snapshot(args)
+method = locals().get('cmd_' + cmd.replace('-', '_'))
+if method:
+    method (args)
 else:
     print "Unknown command:", cmd
     parser.print_usage()