1 # Cumulus: Smart Filesystem Backup to Dumb Servers
3 # Copyright (C) 2006-2009 The Regents of the University of California
4 # Copyright (C) 2012 Google Inc.
5 # Written by Michael Vrable <mvrable@cs.ucsd.edu>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Implementation of the Cumulus command-line utility program."""
23 import getpass, os, stat, sys, time
24 from optparse import OptionParser
28 # We support up to "Cumulus Snapshot v0.11" formats, but are also limited by
30 FORMAT_VERSION = min(cumulus.FORMAT_VERSION, (0, 11))
32 def check_version(format):
33 ver = cumulus.parse_metadata_version(format)
34 if ver > FORMAT_VERSION:
35 raise RuntimeError("Unsupported Cumulus format: " + format)
37 # Read a passphrase from the user and store it in the LBS_GPG_PASSPHRASE
38 # environment variable.
40 ENV_KEY = 'LBS_GPG_PASSPHRASE'
41 if not os.environ.has_key(ENV_KEY):
42 os.environ[ENV_KEY] = getpass.getpass()
44 def cmd_prune_db(args):
45 """ Delete old snapshots from the local database, though do not
46 actually schedule any segment cleaning.
47 Syntax: $0 --localdb=LOCALDB prune-db
49 db = cumulus.LocalDatabase(options.localdb)
51 # Delete old snapshots from the local database.
55 def cmd_clean(args, clean_threshold=7.0):
56 """ Run the segment cleaner.
57 Syntax: $0 --localdb=LOCALDB clean
59 db = cumulus.LocalDatabase(options.localdb)
61 # Delete old snapshots from the local database.
62 intent = float(options.intent)
63 for s in db.list_schemes():
64 db.garbage_collect(s, intent)
66 # Expire segments which are poorly-utilized.
67 for s in db.get_segment_cleaning_list():
68 if s.cleaning_benefit > clean_threshold:
69 print "Cleaning segment %d (benefit %.2f)" % (s.id,
71 db.mark_segment_expired(s)
74 db.balance_expired_objects()
77 def cmd_list_snapshots(args):
78 """ List snapshots stored.
79 Syntax: $0 --data=DATADIR list-snapshots
81 store = cumulus.LowlevelDataStore(options.store)
82 for s in sorted(store.list_snapshots()):
85 def cmd_list_snapshot_sizes(args):
86 """ List size of data needed for each snapshot.
87 Syntax: $0 --data=DATADIR list-snapshot-sizes
89 lowlevel = cumulus.LowlevelDataStore(options.store)
91 store = cumulus.ObjectStore(lowlevel)
94 for seg in lowlevel.store.list('segments'):
95 exts.update ([seg.split ('.', 1)])
96 for s in sorted(lowlevel.list_snapshots()):
97 d = cumulus.parse_full(store.load_snapshot(s))
98 check_version(d['Format'])
101 intent = float(d['Backup-Intent'])
105 segments = d['Segments'].split()
106 (size, added, removed, addcount, remcount) = (0, 0, 0, 0, 0)
107 lo_stat = lowlevel.lowlevel_stat
109 segsize = lo_stat('.'.join ((seg, exts[seg])))['size']
111 if seg not in previous:
115 if seg not in segments:
116 removed += lo_stat('.'.join((seg, exts[seg])))['size']
118 previous = set(segments)
119 print "%s [%s]: %.3f +%.3f -%.3f (+%d/-%d segments)" % (s, intent, size / 1024.0**2, added / 1024.0**2, removed / 1024.0**2, addcount, remcount)
121 def cmd_garbage_collect(args):
122 """ Search for any files which are not needed by any current
123 snapshots and offer to delete them.
124 Syntax: $0 --store=DATADIR gc
126 lowlevel = cumulus.LowlevelDataStore(options.store)
128 store = cumulus.ObjectStore(lowlevel)
129 snapshots = set(lowlevel.list_snapshots())
132 d = cumulus.parse_full(store.load_snapshot(s))
133 check_version(d['Format'])
134 segments.update(d['Segments'].split())
136 referenced = snapshots.union(segments)
138 for (t, r) in cumulus.store.type_patterns.items():
139 for f in lowlevel.store.list(t):
141 if m is None or m.group(1) not in referenced:
142 print "Garbage:", (t, f)
143 reclaimed += lowlevel.store.stat(t, f)['size']
144 if not options.dry_run:
145 lowlevel.store.delete(t, f)
146 print "Reclaimed space:", reclaimed
148 cmd_gc = cmd_garbage_collect
150 def cmd_object_checksums(segments):
151 """ Build checksum list for objects in the given segments, or all
152 segments if none are specified.
155 lowlevel = cumulus.LowlevelDataStore(options.store)
156 store = cumulus.ObjectStore(lowlevel)
157 if len(segments) == 0:
158 segments = sorted(lowlevel.list_segments())
160 for (o, data) in store.load_segment(s):
161 csum = cumulus.ChecksumCreator().update(data).compute()
162 print "%s/%s:%d:%s" % (s, o, len(data), csum)
164 object_sums = cmd_object_checksums
166 def cmd_read_snapshots(snapshots):
167 """ Read a snapshot file
170 lowlevel = cumulus.LowlevelDataStore(options.store)
171 store = cumulus.ObjectStore(lowlevel)
173 d = cumulus.parse_full(store.load_snapshot(s))
174 check_version(d['Format'])
176 print d['Segments'].split()
179 def cmd_read_metadata(args):
180 """ Produce a flattened metadata dump from a snapshot
184 lowlevel = cumulus.LowlevelDataStore(options.store)
185 store = cumulus.ObjectStore(lowlevel)
186 d = cumulus.parse_full(store.load_snapshot(snapshot))
187 check_version(d['Format'])
188 metadata = cumulus.read_metadata(store, d['Root'])
199 def cmd_verify_snapshots(snapshots):
200 """ Verify snapshot integrity
203 lowlevel = cumulus.LowlevelDataStore(options.store)
204 store = cumulus.ObjectStore(lowlevel)
206 cumulus.accessed_segments.clear()
207 print "#### Snapshot", s
208 d = cumulus.parse_full(store.load_snapshot(s))
209 check_version(d['Format'])
210 print "## Root:", d['Root']
211 metadata = cumulus.iterate_metadata(store, d['Root'])
213 if m.fields['type'] not in ('-', 'f'): continue
214 print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
215 verifier = cumulus.ChecksumVerifier(m.fields['checksum'])
217 for block in m.data():
218 data = store.get(block)
219 verifier.update(data)
221 if int(m.fields['size']) != size:
222 raise ValueError("File size does not match!")
223 if not verifier.valid():
224 raise ValueError("Bad checksum found")
226 # Verify that the list of segments included with the snapshot was
227 # actually accurate: covered all segments that were really read, and
228 # doesn't contain duplicates.
229 listed_segments = set(d['Segments'].split())
230 if cumulus.accessed_segments - listed_segments:
231 print "Error: Some segments not listed in descriptor!"
232 print sorted(list(cumulus.accessed_segments - listed_segments))
233 if listed_segments - cumulus.accessed_segments :
234 print "Warning: Extra unused segments listed in descriptor!"
235 print sorted(list(listed_segments - cumulus.accessed_segments))
238 def cmd_restore_snapshot(args):
239 """ Restore a snapshot, or some subset of files from it
242 lowlevel = cumulus.LowlevelDataStore(options.store)
243 store = cumulus.ObjectStore(lowlevel)
244 snapshot = cumulus.parse_full(store.load_snapshot(args[0]))
245 check_version(snapshot['Format'])
250 "Return true if the specified path should be included in the restore."
252 # No specification of what to restore => restore everything
253 if len(paths) == 0: return True
256 if path == p: return True
257 if path.startswith(p + "/"): return True
261 print "Warning: %s: %s" % (m.items.name, msg)
263 # Phase 1: Read the complete metadata log and create directory structure.
266 metadata_segments = {}
267 for m in cumulus.iterate_metadata(store, snapshot['Root']):
268 pathname = os.path.normpath(m.items.name)
269 while os.path.isabs(pathname):
270 pathname = pathname[1:]
271 if not matchpath(pathname): continue
273 destpath = os.path.join(destdir, pathname)
274 if m.items.type == 'd':
277 (path, filename) = os.path.split(destpath)
279 metadata_items.append((pathname, m))
280 if m.items.type in ('-', 'f'):
281 metadata_paths[pathname] = m
282 for block in m.data():
283 (segment, object, checksum, slice) \
284 = cumulus.ObjectStore.parse_ref(block)
285 if segment not in metadata_segments:
286 metadata_segments[segment] = set()
287 metadata_segments[segment].add(pathname)
290 if not os.path.isdir(path):
294 warn(m, "Error creating directory structure: %s" % (e,))
297 # Phase 2: Restore files, ordered by how data is stored in segments.
298 def restore_file(pathname, m):
299 assert m.items.type in ('-', 'f')
300 print "extract:", pathname
301 destpath = os.path.join(destdir, pathname)
303 file = open(destpath, 'wb')
304 verifier = cumulus.ChecksumVerifier(m.items.checksum)
306 for block in m.data():
307 data = store.get(block)
308 verifier.update(data)
312 if int(m.fields['size']) != size:
313 raise ValueError("File size does not match!")
314 if not verifier.valid():
315 raise ValueError("Bad checksum found")
317 while metadata_segments:
318 (segment, items) = metadata_segments.popitem()
319 print "+ Segment", segment
320 for pathname in sorted(items):
321 if pathname in metadata_paths:
322 restore_file(pathname, metadata_paths[pathname])
323 del metadata_paths[pathname]
325 print "+ Remaining files"
326 while metadata_paths:
327 (pathname, m) = metadata_paths.popitem()
328 restore_file(pathname, m)
330 # Phase 3: Restore special files (symlinks, devices).
331 # Phase 4: Restore directory permissions and modification times.
332 for (pathname, m) in reversed(metadata_items):
333 print "permissions:", pathname
334 destpath = os.path.join(destdir, pathname)
335 (path, filename) = os.path.split(destpath)
337 # TODO: Check for ../../../paths that might attempt to write outside
338 # the destination directory. Maybe also check attempts to follow
339 # symlinks pointing outside?
342 if m.items.type in ('-', 'f', 'd'):
344 elif m.items.type == 'l':
346 target = m.items.target
348 # Old (v0.2 format) name for 'target'
349 target = m.items.contents
350 os.symlink(target, destpath)
351 elif m.items.type == 'p':
353 elif m.items.type in ('c', 'b'):
354 if m.items.type == 'c':
355 mode = 0600 | stat.S_IFCHR
357 mode = 0600 | stat.S_IFBLK
358 os.mknod(destpath, mode, os.makedev(*m.items.device))
359 elif m.items.type == 's':
360 pass # TODO: Implement
362 warn(m, "Unknown type code: " + m.items.type)
366 warn(m, "Error restoring: %s" % (e,))
370 uid = m.items.user[0]
371 gid = m.items.group[0]
372 os.lchown(destpath, uid, gid)
374 warn(m, "Error restoring file ownership: %s" % (e,))
376 if m.items.type == 'l':
380 os.chmod(destpath, m.items.mode)
382 warn(m, "Error restoring file permissions: %s" % (e,))
385 os.utime(destpath, (time.time(), m.items.mtime))
387 warn(m, "Error restoring file timestamps: %s" % (e,))
392 usage = ["%prog [option]... command [arg]...", "", "Commands:"]
394 for cmd, method in globals().iteritems():
395 if cmd.startswith ('cmd_'):
396 usage.append(cmd[4:].replace('_', '-') + ':' + method.__doc__)
397 parser = OptionParser(usage="\n".join(usage))
398 parser.add_option("-v", action="store_true", dest="verbose", default=False,
399 help="increase verbosity")
400 parser.add_option("-n", action="store_true", dest="dry_run", default=False,
402 parser.add_option("--store", dest="store",
403 help="specify path to backup data store")
404 parser.add_option("--localdb", dest="localdb",
405 help="specify path to local database")
406 parser.add_option("--intent", dest="intent", default=1.0,
407 help="give expected next snapshot type when cleaning")
409 (options, args) = parser.parse_args(argv[1:])
416 method = globals().get('cmd_' + cmd.replace('-', '_'))
420 print "Unknown command:", cmd