From: Michael Vrable Date: Sat, 1 Mar 2008 00:08:35 +0000 (-0800) Subject: Make restoring from snapshots more efficient. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=a285d431d827a198c6eb45c44158048deca3d772;p=cumulus.git Make restoring from snapshots more efficient. When restoring a snapshot, restore files in order roughly determined by how they are stored in segments, instead of in pure lexicographic order. This should ensure that, for the most part, each segment only has to be unpacked once, instead of perhaps many times as could happen previously, and so should make restoring more efficient. This implementation loads all metadata into memory to determine the ordering, and so restores are now much more memory-intensive than before. It would be good to work on memory requirements later--either offer an option to use the old behavior, or perhaps load some of the data into a temporary database. --- diff --git a/lbs-util b/lbs-util index 6f97129..d617705 100755 --- a/lbs-util +++ b/lbs-util @@ -192,12 +192,77 @@ def cmd_restore_snapshot(args): def warn(m, msg): print "Warning: %s: %s" % (m.items.name, msg) + # Phase 1: Read the complete metadata log and create directory structure. + metadata_items = [] + metadata_paths = {} + metadata_segments = {} for m in lbs.iterate_metadata(store, snapshot['Root']): pathname = os.path.normpath(m.items.name) while os.path.isabs(pathname): pathname = pathname[1:] if not matchpath(pathname): continue - print pathname + + destpath = os.path.join(destdir, pathname) + if m.items.type == 'd': + path = destpath + else: + (path, filename) = os.path.split(destpath) + + metadata_items.append((pathname, m)) + if m.items.type in ('-', 'f'): + metadata_paths[pathname] = m + for block in m.data(): + (segment, object, checksum, slice) \ + = lbs.ObjectStore.parse_ref(block) + if segment not in metadata_segments: + metadata_segments[segment] = set() + metadata_segments[segment].add(pathname) + + try: + if not os.path.isdir(path): + print "mkdir:", path + os.makedirs(path) + except Exception, e: + warn(m, "Error creating directory structure: %s" % (e,)) + continue + + # Phase 2: Restore files, ordered by how data is stored in segments. + def restore_file(pathname, m): + assert m.items.type in ('-', 'f') + print "extract:", pathname + destpath = os.path.join(destdir, pathname) + + file = open(destpath, 'wb') + verifier = lbs.ChecksumVerifier(m.items.checksum) + size = 0 + for block in m.data(): + data = store.get(block) + verifier.update(data) + size += len(data) + file.write(data) + file.close() + if int(m.fields['size']) != size: + raise ValueError("File size does not match!") + if not verifier.valid(): + raise ValueError("Bad checksum found") + + while metadata_segments: + (segment, items) = metadata_segments.popitem() + print "+ Segment", segment + for pathname in sorted(items): + if pathname in metadata_paths: + restore_file(pathname, metadata_paths[pathname]) + del metadata_paths[pathname] + + print "+ Remaining files" + while metadata_paths: + (pathname, m) = metadata_paths.popitem() + restore_file(pathname, m) + + # Phase 3: Restore special files (symlinks, devices). + # Phase 4: Restore directory permissions and modification times. + for (pathname, m) in reversed(metadata_items): + print "permissions:", pathname destpath = os.path.join(destdir, pathname) (path, filename) = os.path.split(destpath) @@ -206,26 +271,8 @@ def cmd_restore_snapshot(args): # symlinks pointing outside? try: - if not os.path.isdir(path): - os.makedirs(path) - - if m.items.type in ('-', 'f'): - file = open(destpath, 'wb') - verifier = lbs.ChecksumVerifier(m.items.checksum) - size = 0 - for block in m.data(): - data = store.get(block) - verifier.update(data) - size += len(data) - file.write(data) - file.close() - if int(m.fields['size']) != size: - raise ValueError("File size does not match!") - if not verifier.valid(): - raise ValueError("Bad checksum found") - elif m.items.type == 'd': - if filename != '.': - os.mkdir(destpath) + if m.items.type in ('-', 'f', 'd'): + pass elif m.items.type == 'l': try: target = m.items.target