1 # Cumulus: Efficient Filesystem Backup to the Cloud
2 # Copyright (C) 2006-2009, 2012 The Cumulus Developers
3 # See the AUTHORS file for a list of contributors.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 """Implementation of the Cumulus command-line utility program."""
21 from __future__ import division, print_function, unicode_literals
23 import getpass, os, stat, sys, time
24 from optparse import OptionParser
28 # We support up to "Cumulus Snapshot v0.11" formats, but are also limited by
30 FORMAT_VERSION = min(cumulus.FORMAT_VERSION, (0, 11))
32 def check_version(format):
33 ver = cumulus.parse_metadata_version(format)
34 if ver > FORMAT_VERSION:
35 raise RuntimeError("Unsupported Cumulus format: " + format)
37 # Read a passphrase from the user and store it in the LBS_GPG_PASSPHRASE
38 # environment variable.
40 ENV_KEY = 'LBS_GPG_PASSPHRASE'
41 if ENV_KEY not in os.environ:
42 os.environ[ENV_KEY] = getpass.getpass()
44 def cmd_prune_db(args):
45 """ Delete old snapshots from the local database, though do not
46 actually schedule any segment cleaning.
47 Syntax: $0 --localdb=LOCALDB prune-db
49 db = cumulus.LocalDatabase(options.localdb)
51 # Delete old snapshots from the local database.
55 def cmd_clean(args, clean_threshold=7.0):
56 """ Run the segment cleaner.
57 Syntax: $0 --localdb=LOCALDB clean
59 db = cumulus.LocalDatabase(options.localdb)
61 # Delete old snapshots from the local database.
62 intent = float(options.intent)
63 for s in db.list_schemes():
64 db.prune_old_snapshots(s, intent)
66 # Expire segments which are poorly-utilized.
67 for s in db.get_segment_cleaning_list():
68 if s.cleaning_benefit > clean_threshold:
69 print("Cleaning segment %d (benefit %.2f)" % (s.id,
71 db.mark_segment_expired(s)
74 db.balance_expired_objects()
77 def cmd_list_snapshots(args):
78 """ List snapshots stored.
79 Syntax: $0 --data=DATADIR list-snapshots
81 store = cumulus.CumulusStore(options.store)
82 for s in sorted(store.list_snapshots()): print(s)
84 def cmd_list_snapshot_sizes(args):
85 """ List size of data needed for each snapshot.
86 Syntax: $0 --data=DATADIR list-snapshot-sizes
88 store = cumulus.CumulusStore(options.store)
89 backend = store.backend
90 backend.prefetch_generic()
93 def get_size(segment):
94 return backend.stat_generic(segment + ".tar", "segments")["size"]
95 for s in sorted(store.list_snapshots()):
96 d = cumulus.parse_full(store.load_snapshot(s))
97 check_version(d['Format'])
99 segments = set(d['Segments'].split())
100 (added, removed, addcount, remcount) = (0, 0, 0, 0)
101 for seg in segments.difference(previous):
102 added += get_size(seg)
104 for seg in previous.difference(segments):
105 removed += get_size(seg)
107 size += added - removed
109 print("%s: %.3f +%.3f -%.3f (+%d/-%d segments)" % (s, size / 1024.0**2, added / 1024.0**2, removed / 1024.0**2, addcount, remcount))
111 def cmd_garbage_collect(args):
112 """ Search for any files which are not needed by any current
113 snapshots and offer to delete them.
114 Syntax: $0 --store=DATADIR gc
116 store = cumulus.CumulusStore(options.store)
117 backend = store.backend
119 for s in store.list_snapshots():
120 d = cumulus.parse_full(store.load_snapshot(s))
121 check_version(d['Format'])
123 referenced.update(d['Segments'].split())
129 for filetype in cumulus.SEARCH_PATHS:
130 for (name, path) in store.backend.list_generic(filetype):
131 if name in referenced:
132 to_preserve.append(path)
134 to_delete.append(path)
139 raw_backend = backend.raw_backend
142 if not options.dry_run:
143 raw_backend.delete(f)
144 cmd_gc = cmd_garbage_collect
146 def cmd_read_snapshots(snapshots):
147 """ Read a snapshot file
150 store = cumulus.CumulusStore(options.store)
152 d = cumulus.parse_full(store.load_snapshot(s))
153 check_version(d['Format'])
155 print(d['Segments'].split())
158 def cmd_read_metadata(args):
159 """ Produce a flattened metadata dump from a snapshot
163 store = cumulus.CumulusStore(options.store)
164 d = cumulus.parse_full(store.load_snapshot(snapshot))
165 check_version(d['Format'])
166 metadata = cumulus.read_metadata(store, d['Root'])
177 def cmd_verify_snapshots(snapshots):
178 """ Verify snapshot integrity
181 store = cumulus.CumulusStore(options.store)
183 cumulus.accessed_segments.clear()
184 print("#### Snapshot", s)
185 d = cumulus.parse_full(store.load_snapshot(s))
186 check_version(d['Format'])
187 print("## Root:", d['Root'])
188 metadata = cumulus.iterate_metadata(store, d['Root'])
190 if m.fields['type'] not in ('-', 'f'): continue
191 print("%s [%d bytes]" % (m.fields['name'], int(m.fields['size'])))
192 verifier = cumulus.ChecksumVerifier(m.fields['checksum'])
194 for block in m.data():
195 data = store.get(block)
196 verifier.update(data)
198 if int(m.fields['size']) != size:
199 raise ValueError("File size does not match!")
200 if not verifier.valid():
201 raise ValueError("Bad checksum found")
203 # Verify that the list of segments included with the snapshot was
204 # actually accurate: covered all segments that were really read, and
205 # doesn't contain duplicates.
206 listed_segments = set(d['Segments'].split())
207 if cumulus.accessed_segments - listed_segments:
208 print("Error: Some segments not listed in descriptor!")
209 print(sorted(list(cumulus.accessed_segments - listed_segments)))
210 if listed_segments - cumulus.accessed_segments :
211 print("Warning: Extra unused segments listed in descriptor!")
212 print(sorted(list(listed_segments - cumulus.accessed_segments)))
215 def cmd_restore_snapshot(args):
216 """ Restore a snapshot, or some subset of files from it
219 store = cumulus.CumulusStore(options.store)
220 snapshot = cumulus.parse_full(store.load_snapshot(args[0]))
221 check_version(snapshot['Format'])
226 "Return true if the specified path should be included in the restore."
228 # No specification of what to restore => restore everything
229 if len(paths) == 0: return True
232 if path == p: return True
233 if path.startswith(p + "/"): return True
237 print("Warning: %s: %s" % (m.items.name, msg))
239 # Phase 1: Read the complete metadata log and create directory structure.
242 metadata_segments = {}
243 for m in cumulus.iterate_metadata(store, snapshot['Root']):
244 pathname = os.path.normpath(m.items.name)
245 while os.path.isabs(pathname):
246 pathname = pathname[1:]
247 if not matchpath(pathname): continue
249 destpath = os.path.join(destdir, pathname)
250 if m.items.type == 'd':
253 (path, filename) = os.path.split(destpath)
255 metadata_items.append((pathname, m))
256 if m.items.type in ('-', 'f'):
257 metadata_paths[pathname] = m
258 for block in m.data():
259 (segment, object, checksum, slice) \
260 = cumulus.CumulusStore.parse_ref(block)
261 if segment not in metadata_segments:
262 metadata_segments[segment] = set()
263 metadata_segments[segment].add(pathname)
266 if not os.path.isdir(path):
267 print("mkdir:", path)
269 except Exception as e:
270 warn(m, "Error creating directory structure: %s" % (e,))
273 # Phase 2: Restore files, ordered by how data is stored in segments.
274 def restore_file(pathname, m):
275 assert m.items.type in ('-', 'f')
276 print("extract:", pathname)
277 destpath = os.path.join(destdir, pathname)
279 file = open(destpath, 'wb')
280 verifier = cumulus.ChecksumVerifier(m.items.checksum)
282 for block in m.data():
283 data = store.get(block)
284 verifier.update(data)
288 if int(m.fields['size']) != size:
289 raise ValueError("File size does not match!")
290 if not verifier.valid():
291 raise ValueError("Bad checksum found")
293 while metadata_segments:
294 (segment, items) = metadata_segments.popitem()
295 print("+ Segment", segment)
296 for pathname in sorted(items):
297 if pathname in metadata_paths:
298 restore_file(pathname, metadata_paths[pathname])
299 del metadata_paths[pathname]
301 print("+ Remaining files")
302 while metadata_paths:
303 (pathname, m) = metadata_paths.popitem()
304 restore_file(pathname, m)
306 # Phase 3: Restore special files (symlinks, devices).
307 # Phase 4: Restore directory permissions and modification times.
308 for (pathname, m) in reversed(metadata_items):
309 print("permissions:", pathname)
310 destpath = os.path.join(destdir, pathname)
311 (path, filename) = os.path.split(destpath)
313 # TODO: Check for ../../../paths that might attempt to write outside
314 # the destination directory. Maybe also check attempts to follow
315 # symlinks pointing outside?
318 if m.items.type in ('-', 'f', 'd'):
320 elif m.items.type == 'l':
322 target = m.items.target
324 # Old (v0.2 format) name for 'target'
325 target = m.items.contents
326 os.symlink(target, destpath)
327 elif m.items.type == 'p':
329 elif m.items.type in ('c', 'b'):
330 if m.items.type == 'c':
331 mode = 0o600 | stat.S_IFCHR
333 mode = 0o600 | stat.S_IFBLK
334 os.mknod(destpath, mode, os.makedev(*m.items.device))
335 elif m.items.type == 's':
336 pass # TODO: Implement
338 warn(m, "Unknown type code: " + m.items.type)
341 except Exception as e:
342 warn(m, "Error restoring: %s" % (e,))
346 uid = m.items.user[0]
347 gid = m.items.group[0]
348 os.lchown(destpath, uid, gid)
349 except Exception as e:
350 warn(m, "Error restoring file ownership: %s" % (e,))
352 if m.items.type == 'l':
356 os.chmod(destpath, m.items.mode)
357 except Exception as e:
358 warn(m, "Error restoring file permissions: %s" % (e,))
361 os.utime(destpath, (time.time(), m.items.mtime))
362 except Exception as e:
363 warn(m, "Error restoring file timestamps: %s" % (e,))
368 usage = ["%prog [option]... command [arg]...", "", "Commands:"]
370 for cmd, method in globals().items():
371 if cmd.startswith ('cmd_'):
372 usage.append(cmd[4:].replace('_', '-') + ':' + method.__doc__)
373 parser = OptionParser(usage="\n".join(usage))
374 parser.add_option("-v", action="store_true", dest="verbose", default=False,
375 help="increase verbosity")
376 parser.add_option("-n", action="store_true", dest="dry_run", default=False,
378 parser.add_option("--store", dest="store",
379 help="specify path to backup data store")
380 parser.add_option("--localdb", dest="localdb",
381 help="specify path to local database")
382 parser.add_option("--intent", dest="intent", default=1.0,
383 help="give expected next snapshot type when cleaning")
385 (options, args) = parser.parse_args(argv[1:])
392 method = globals().get('cmd_' + cmd.replace('-', '_'))
396 print("Unknown command:", cmd)