python/cumulus/cmd_util.py

   1 # Cumulus: Smart Filesystem Backup to Dumb Servers
   2 #
   3 # Copyright (C) 2006-2009  The Regents of the University of California
   4 # Copyright (C) 2012  Google Inc.
   5 # Written by Michael Vrable <mvrable@cs.ucsd.edu>
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License along
  18 # with this program; if not, write to the Free Software Foundation, Inc.,
  19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20
  21 """Implementation of the Cumulus command-line utility program."""
  22
  23 import getpass, os, stat, sys, time
  24 from optparse import OptionParser
  25
  26 import cumulus
  27
  28 # We support up to "Cumulus Snapshot v0.11" formats, but are also limited by
  29 # the cumulus module.
  30 FORMAT_VERSION = min(cumulus.FORMAT_VERSION, (0, 11))
  31
  32 def check_version(format):
  33     ver = cumulus.parse_metadata_version(format)
  34     if ver > FORMAT_VERSION:
  35         raise RuntimeError("Unsupported Cumulus format: " + format)
  36
  37 # Read a passphrase from the user and store it in the LBS_GPG_PASSPHRASE
  38 # environment variable.
  39 def get_passphrase():
  40     ENV_KEY = 'LBS_GPG_PASSPHRASE'
  41     if not os.environ.has_key(ENV_KEY):
  42         os.environ[ENV_KEY] = getpass.getpass()
  43
  44 def cmd_prune_db(args):
  45     """ Delete old snapshots from the local database, though do not
  46         actually schedule any segment cleaning.
  47         Syntax: $0 --localdb=LOCALDB prune-db
  48     """
  49     db = cumulus.LocalDatabase(options.localdb)
  50
  51     # Delete old snapshots from the local database.
  52     #db.garbage_collect()
  53     #db.commit()
  54
  55 def cmd_clean(args, clean_threshold=7.0):
  56     """ Run the segment cleaner.
  57         Syntax: $0 --localdb=LOCALDB clean
  58     """
  59     db = cumulus.LocalDatabase(options.localdb)
  60
  61     # Delete old snapshots from the local database.
  62     intent = float(options.intent)
  63     for s in db.list_schemes():
  64         db.garbage_collect(s, intent)
  65
  66     # Expire segments which are poorly-utilized.
  67     for s in db.get_segment_cleaning_list():
  68         if s.cleaning_benefit > clean_threshold:
  69             print "Cleaning segment %d (benefit %.2f)" % (s.id,
  70                                                           s.cleaning_benefit)
  71             db.mark_segment_expired(s)
  72         else:
  73             break
  74     db.balance_expired_objects()
  75     db.commit()
  76
  77 def cmd_list_snapshots(args):
  78     """ List snapshots stored.
  79         Syntax: $0 --data=DATADIR list-snapshots
  80     """
  81     store = cumulus.LowlevelDataStore(options.store)
  82     for s in sorted(store.list_snapshots()):
  83         print s
  84
  85 def cmd_list_snapshot_sizes(args):
  86     """ List size of data needed for each snapshot.
  87         Syntax: $0 --data=DATADIR list-snapshot-sizes
  88     """
  89     lowlevel = cumulus.LowlevelDataStore(options.store)
  90     lowlevel.scan()
  91     store = cumulus.ObjectStore(lowlevel)
  92     previous = set()
  93     exts = {}
  94     for seg in lowlevel.store.list('segments'):
  95         exts.update ([seg.split ('.', 1)])
  96     for s in sorted(lowlevel.list_snapshots()):
  97         d = cumulus.parse_full(store.load_snapshot(s))
  98         check_version(d['Format'])
  99
 100         try:
 101             intent = float(d['Backup-Intent'])
 102         except:
 103             intent = 1.0
 104
 105         segments = d['Segments'].split()
 106         (size, added, removed, addcount, remcount) = (0, 0, 0, 0, 0)
 107         lo_stat = lowlevel.lowlevel_stat
 108         for seg in segments:
 109             segsize = lo_stat('.'.join ((seg, exts[seg])))['size']
 110             size += segsize
 111             if seg not in previous:
 112                 added += segsize
 113                 addcount += 1
 114         for seg in previous:
 115             if seg not in segments:
 116                 removed += lo_stat('.'.join((seg, exts[seg])))['size']
 117                 remcount += 1
 118         previous = set(segments)
 119         print "%s [%s]: %.3f +%.3f -%.3f (+%d/-%d segments)" % (s, intent, size / 1024.0**2, added / 1024.0**2, removed / 1024.0**2, addcount, remcount)
 120
 121 def cmd_garbage_collect(args):
 122     """ Search for any files which are not needed by any current
 123         snapshots and offer to delete them.
 124         Syntax: $0 --store=DATADIR gc
 125     """
 126     lowlevel = cumulus.LowlevelDataStore(options.store)
 127     lowlevel.scan()
 128     store = cumulus.ObjectStore(lowlevel)
 129     snapshots = set(lowlevel.list_snapshots())
 130     segments = set()
 131     for s in snapshots:
 132         d = cumulus.parse_full(store.load_snapshot(s))
 133         check_version(d['Format'])
 134         segments.update(d['Segments'].split())
 135
 136     referenced = snapshots.union(segments)
 137     reclaimed = 0
 138     for (t, r) in cumulus.store.type_patterns.items():
 139         for f in lowlevel.store.list(t):
 140             m = r.match(f)
 141             if m is None or m.group(1) not in referenced:
 142                 print "Garbage:", (t, f)
 143                 reclaimed += lowlevel.store.stat(t, f)['size']
 144                 if not options.dry_run:
 145                     lowlevel.store.delete(t, f)
 146     print "Reclaimed space:", reclaimed
 147
 148 cmd_gc = cmd_garbage_collect
 149
 150 def cmd_object_checksums(segments):
 151     """ Build checksum list for objects in the given segments, or all
 152         segments if none are specified.
 153     """
 154     get_passphrase()
 155     lowlevel = cumulus.LowlevelDataStore(options.store)
 156     store = cumulus.ObjectStore(lowlevel)
 157     if len(segments) == 0:
 158         segments = sorted(lowlevel.list_segments())
 159     for s in segments:
 160         for (o, data) in store.load_segment(s):
 161             csum = cumulus.ChecksumCreator().update(data).compute()
 162             print "%s/%s:%d:%s" % (s, o, len(data), csum)
 163     store.cleanup()
 164 object_sums = cmd_object_checksums
 165
 166 def cmd_read_snapshots(snapshots):
 167     """ Read a snapshot file
 168     """
 169     get_passphrase()
 170     lowlevel = cumulus.LowlevelDataStore(options.store)
 171     store = cumulus.ObjectStore(lowlevel)
 172     for s in snapshots:
 173         d = cumulus.parse_full(store.load_snapshot(s))
 174         check_version(d['Format'])
 175         print d
 176         print d['Segments'].split()
 177     store.cleanup()
 178
 179 def cmd_read_metadata(args):
 180     """ Produce a flattened metadata dump from a snapshot
 181     """
 182     snapshot = args [0]
 183     get_passphrase()
 184     lowlevel = cumulus.LowlevelDataStore(options.store)
 185     store = cumulus.ObjectStore(lowlevel)
 186     d = cumulus.parse_full(store.load_snapshot(snapshot))
 187     check_version(d['Format'])
 188     metadata = cumulus.read_metadata(store, d['Root'])
 189     blank = True
 190     for l in metadata:
 191         if l == '\n':
 192             if blank: continue
 193             blank = True
 194         else:
 195             blank = False
 196         sys.stdout.write(l)
 197     store.cleanup()
 198
 199 def cmd_verify_snapshots(snapshots):
 200     """ Verify snapshot integrity
 201     """
 202     get_passphrase()
 203     lowlevel = cumulus.LowlevelDataStore(options.store)
 204     store = cumulus.ObjectStore(lowlevel)
 205     for s in snapshots:
 206         cumulus.accessed_segments.clear()
 207         print "#### Snapshot", s
 208         d = cumulus.parse_full(store.load_snapshot(s))
 209         check_version(d['Format'])
 210         print "## Root:", d['Root']
 211         metadata = cumulus.iterate_metadata(store, d['Root'])
 212         for m in metadata:
 213             if m.fields['type'] not in ('-', 'f'): continue
 214             print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
 215             verifier = cumulus.ChecksumVerifier(m.fields['checksum'])
 216             size = 0
 217             for block in m.data():
 218                 data = store.get(block)
 219                 verifier.update(data)
 220                 size += len(data)
 221             if int(m.fields['size']) != size:
 222                 raise ValueError("File size does not match!")
 223             if not verifier.valid():
 224                 raise ValueError("Bad checksum found")
 225
 226         # Verify that the list of segments included with the snapshot was
 227         # actually accurate: covered all segments that were really read, and
 228         # doesn't contain duplicates.
 229         listed_segments = set(d['Segments'].split())
 230         if cumulus.accessed_segments - listed_segments:
 231             print "Error: Some segments not listed in descriptor!"
 232             print sorted(list(cumulus.accessed_segments - listed_segments))
 233         if listed_segments - cumulus.accessed_segments :
 234             print "Warning: Extra unused segments listed in descriptor!"
 235             print sorted(list(listed_segments - cumulus.accessed_segments))
 236     store.cleanup()
 237
 238 def cmd_restore_snapshot(args):
 239     """ Restore a snapshot, or some subset of files from it
 240     """
 241     get_passphrase()
 242     lowlevel = cumulus.LowlevelDataStore(options.store)
 243     store = cumulus.ObjectStore(lowlevel)
 244     snapshot = cumulus.parse_full(store.load_snapshot(args[0]))
 245     check_version(snapshot['Format'])
 246     destdir = args[1]
 247     paths = args[2:]
 248
 249     def matchpath(path):
 250         "Return true if the specified path should be included in the restore."
 251
 252         # No specification of what to restore => restore everything
 253         if len(paths) == 0: return True
 254
 255         for p in paths:
 256             if path == p: return True
 257             if path.startswith(p + "/"): return True
 258         return False
 259
 260     def warn(m, msg):
 261         print "Warning: %s: %s" % (m.items.name, msg)
 262
 263     # Phase 1: Read the complete metadata log and create directory structure.
 264     metadata_items = []
 265     metadata_paths = {}
 266     metadata_segments = {}
 267     for m in cumulus.iterate_metadata(store, snapshot['Root']):
 268         pathname = os.path.normpath(m.items.name)
 269         while os.path.isabs(pathname):
 270             pathname = pathname[1:]
 271         if not matchpath(pathname): continue
 272
 273         destpath = os.path.join(destdir, pathname)
 274         if m.items.type == 'd':
 275             path = destpath
 276         else:
 277             (path, filename) = os.path.split(destpath)
 278
 279         metadata_items.append((pathname, m))
 280         if m.items.type in ('-', 'f'):
 281             metadata_paths[pathname] = m
 282             for block in m.data():
 283                 (segment, object, checksum, slice) \
 284                     = cumulus.ObjectStore.parse_ref(block)
 285                 if segment not in metadata_segments:
 286                     metadata_segments[segment] = set()
 287                 metadata_segments[segment].add(pathname)
 288
 289         try:
 290             if not os.path.isdir(path):
 291                 print "mkdir:", path
 292                 os.makedirs(path)
 293         except Exception, e:
 294             warn(m, "Error creating directory structure: %s" % (e,))
 295             continue
 296
 297     # Phase 2: Restore files, ordered by how data is stored in segments.
 298     def restore_file(pathname, m):
 299         assert m.items.type in ('-', 'f')
 300         print "extract:", pathname
 301         destpath = os.path.join(destdir, pathname)
 302
 303         file = open(destpath, 'wb')
 304         verifier = cumulus.ChecksumVerifier(m.items.checksum)
 305         size = 0
 306         for block in m.data():
 307             data = store.get(block)
 308             verifier.update(data)
 309             size += len(data)
 310             file.write(data)
 311         file.close()
 312         if int(m.fields['size']) != size:
 313             raise ValueError("File size does not match!")
 314         if not verifier.valid():
 315             raise ValueError("Bad checksum found")
 316
 317     while metadata_segments:
 318         (segment, items) = metadata_segments.popitem()
 319         print "+ Segment", segment
 320         for pathname in sorted(items):
 321             if pathname in metadata_paths:
 322                 restore_file(pathname, metadata_paths[pathname])
 323                 del metadata_paths[pathname]
 324
 325     print "+ Remaining files"
 326     while metadata_paths:
 327         (pathname, m) = metadata_paths.popitem()
 328         restore_file(pathname, m)
 329
 330     # Phase 3: Restore special files (symlinks, devices).
 331     # Phase 4: Restore directory permissions and modification times.
 332     for (pathname, m) in reversed(metadata_items):
 333         print "permissions:", pathname
 334         destpath = os.path.join(destdir, pathname)
 335         (path, filename) = os.path.split(destpath)
 336
 337         # TODO: Check for ../../../paths that might attempt to write outside
 338         # the destination directory.  Maybe also check attempts to follow
 339         # symlinks pointing outside?
 340
 341         try:
 342             if m.items.type in ('-', 'f', 'd'):
 343                 pass
 344             elif m.items.type == 'l':
 345                 try:
 346                     target = m.items.target
 347                 except:
 348                     # Old (v0.2 format) name for 'target'
 349                     target = m.items.contents
 350                 os.symlink(target, destpath)
 351             elif m.items.type == 'p':
 352                 os.mkfifo(destpath)
 353             elif m.items.type in ('c', 'b'):
 354                 if m.items.type == 'c':
 355                     mode = 0600 | stat.S_IFCHR
 356                 else:
 357                     mode = 0600 | stat.S_IFBLK
 358                 os.mknod(destpath, mode, os.makedev(*m.items.device))
 359             elif m.items.type == 's':
 360                 pass        # TODO: Implement
 361             else:
 362                 warn(m, "Unknown type code: " + m.items.type)
 363                 continue
 364
 365         except Exception, e:
 366             warn(m, "Error restoring: %s" % (e,))
 367             continue
 368
 369         try:
 370             uid = m.items.user[0]
 371             gid = m.items.group[0]
 372             os.lchown(destpath, uid, gid)
 373         except Exception, e:
 374             warn(m, "Error restoring file ownership: %s" % (e,))
 375
 376         if m.items.type == 'l':
 377             continue
 378
 379         try:
 380             os.chmod(destpath, m.items.mode)
 381         except Exception, e:
 382             warn(m, "Error restoring file permissions: %s" % (e,))
 383
 384         try:
 385             os.utime(destpath, (time.time(), m.items.mtime))
 386         except Exception, e:
 387             warn(m, "Error restoring file timestamps: %s" % (e,))
 388
 389     store.cleanup()
 390
 391 def main(argv):
 392     usage = ["%prog [option]... command [arg]...", "", "Commands:"]
 393     cmd = method = None
 394     for cmd, method in globals().iteritems():
 395         if cmd.startswith ('cmd_'):
 396             usage.append(cmd[4:].replace('_', '-') + ':' + method.__doc__)
 397     parser = OptionParser(usage="\n".join(usage))
 398     parser.add_option("-v", action="store_true", dest="verbose", default=False,
 399                       help="increase verbosity")
 400     parser.add_option("-n", action="store_true", dest="dry_run", default=False,
 401                       help="dry run")
 402     parser.add_option("--store", dest="store",
 403                       help="specify path to backup data store")
 404     parser.add_option("--localdb", dest="localdb",
 405                       help="specify path to local database")
 406     parser.add_option("--intent", dest="intent", default=1.0,
 407                       help="give expected next snapshot type when cleaning")
 408     global options
 409     (options, args) = parser.parse_args(argv[1:])
 410
 411     if len(args) == 0:
 412         parser.print_usage()
 413         sys.exit(1)
 414     cmd = args[0]
 415     args = args[1:]
 416     method = globals().get('cmd_' + cmd.replace('-', '_'))
 417     if method:
 418         method (args)
 419     else:
 420         print "Unknown command:", cmd
 421         parser.print_usage()
 422         sys.exit(1)