X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2Frebuild_database.py;h=4ab580f05b2ca0eb6af6c321ae45adb4c4e61c97;hb=8b4b764f036d9ac945d36a1966f2eb284c4f44a3;hp=07f054284e210747fb2cd6c0be45bf1fa2b86574;hpb=86852c7d1d491d73ae9ecc64ee5a8c3dcece4ca0;p=cumulus.git diff --git a/python/cumulus/rebuild_database.py b/python/cumulus/rebuild_database.py index 07f0542..4ab580f 100755 --- a/python/cumulus/rebuild_database.py +++ b/python/cumulus/rebuild_database.py @@ -26,6 +26,8 @@ the local database. This can be used to recover from a local database loss, given data from a previous backup. """ +from __future__ import division, print_function, unicode_literals + import base64 import hashlib import itertools @@ -38,6 +40,7 @@ import tarfile import time import cumulus +from cumulus import util CHECKSUM_ALGORITHM = "sha224" CHUNKER_PROGRAM = "cumulus-chunker-standalone" @@ -113,7 +116,7 @@ class Chunker(object): def compute_breaks(self, buf): breaks = [0] signature = self.window_init() - for i in xrange(len(buf)): + for i in range(len(buf)): self.window_update(signature, ord(buf[i])) block_len = i - breaks[-1] + 1 if ((signature[0] % self.TARGET_CHUNK_SIZE == self.BREAKMARK_VALUE @@ -163,9 +166,9 @@ class Chunker(object): n -= i position = 0 - for next_start, (size, digest) in sorted(signatures.iteritems()): + for next_start, (size, digest) in sorted(signatures.items()): if next_start < position: - print "Warning: overlapping signatures, ignoring" + print("Warning: overlapping signatures, ignoring") continue skip(next_start - position) records.append(struct.pack(">H", size) + digest) @@ -177,7 +180,7 @@ class Chunker(object): """Loads signatures from the binary format stored in the database.""" entry_size = 2 + self.hash_size if len(signatures) % entry_size != 0: - print "Warning: Invalid signatures to load" + print("Warning: Invalid signatures to load") return {} null_digest = "\x00" * self.hash_size @@ -250,15 +253,36 @@ class DatabaseRebuilder(object): if metadata.items.type not in ("-", "f"): continue try: path = os.path.join(reference_path, metadata.items.name) - print "Path:", path + print("Path:", path) # TODO: Check file size for early abort if different self.rebuild_file(open(path), metadata) except IOError as e: - print e + print(e) pass # Ignore the file self.database.commit() + def reload_segment_metadata(self, segment_metadata): + """Read a segment metadata (.meta) file into the local database. + + Updates the segments table in the local database with information from + a a segment metadata backup file. Old data is not overwritten, so + loading a .meta file with partial information is fine. + """ + for info in cumulus.parse(segment_metadata, + terminate=lambda l: len(l) == 0): + segment = info.pop("segment") + self.insert_segment_info(segment, info) + + self.database.commit() + + def insert_segment_info(self, segment, info): + id = self.segment_to_id(segment) + for k, v in info.items(): + self.cursor.execute("update segments set " + k + " = ? " + "where segmentid = ?", + (v, id)) + def rebuild_file(self, fp, metadata): """Recompute database signatures if a file is unchanged. @@ -280,6 +304,9 @@ class DatabaseRebuilder(object): buf = fp.read(length) verifier.update(buf) + # Zero blocks get no checksums, so skip further processing on them. + if object is None: continue + if exact: csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM) csum.update(buf) @@ -299,10 +326,10 @@ class DatabaseRebuilder(object): subblock[k] = self.chunker.dump_signatures(subblock[k]) self.store_checksums(checksums, subblock) else: - print "Checksum mismatch" + print("Checksum mismatch") def store_checksums(self, block_checksums, subblock_signatures): - for (segment, object), (size, checksum) in block_checksums.iteritems(): + for (segment, object), (size, checksum) in block_checksums.items(): segmentid = self.segment_to_id(segment) self.cursor.execute( """insert or ignore into block_index(segmentid, object) @@ -393,14 +420,15 @@ class SegmentStateRebuilder(object): data_size += tarinfo.size object_count += 1 - return {"segment": cumulus.uri_encode(segment_name), - "path": cumulus.uri_encode(relative_path), + return {"segment": util.uri_encode_pathname(segment_name), + "path": util.uri_encode_pathname(relative_path), "checksum": checksum, "data_size": data_size, "disk_size": disk_size, "timestamp": timestamp} if __name__ == "__main__": + # Sample code to reconstruct segment metadata--ought to be relocated. if False: segment_rebuilder = SegmentStateRebuilder() topdir = sys.argv[1] @@ -415,8 +443,15 @@ if __name__ == "__main__": os.path.relpath(f, topdir)) if metadata: for (k, v) in sorted(metadata.items()): - print "%s: %s" % (k, v) - print + print("%s: %s" % (k, v)) + print() + sys.exit(0) + + # Sample code to rebuild the segments table from metadata--needs to be + # merged with the code below. + if False: + rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1])) + rebuilder.reload_segment_metadata(open(sys.argv[2])) sys.exit(0) # Read metadata from stdin; filter out lines starting with "@@" so the