X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2Frebuild_database.py;h=10a5f9a92c969d9088b464246a9272f303a200e6;hb=710d0e959d9b79859df8568aac9741a79486d53a;hp=10513babf2f545d5e974442b6d9668f41c0c068c;hpb=a889b1bf040b9f9877f33aedc83b514a1e44fbbc;p=cumulus.git diff --git a/python/cumulus/rebuild_database.py b/python/cumulus/rebuild_database.py index 10513ba..10a5f9a 100755 --- a/python/cumulus/rebuild_database.py +++ b/python/cumulus/rebuild_database.py @@ -35,13 +35,16 @@ import struct import subprocess import sys import tarfile +import time import cumulus CHECKSUM_ALGORITHM = "sha224" - CHUNKER_PROGRAM = "cumulus-chunker-standalone" +# TODO: Move to somewhere common +SQLITE_TIMESTAMP = "%Y-%m-%d %H:%M:%S" + class Chunker(object): """Compute sub-file chunk boundaries using a sliding Rabin fingerprint. @@ -256,6 +259,27 @@ class DatabaseRebuilder(object): self.database.commit() + def reload_segment_metadata(self, segment_metadata): + """Read a segment metadata (.meta) file into the local database. + + Updates the segments table in the local database with information from + a a segment metadata backup file. Old data is not overwritten, so + loading a .meta file with partial information is fine. + """ + for info in cumulus.parse(segment_metadata, + terminate=lambda l: len(l) == 0): + segment = info.pop("segment") + self.insert_segment_info(segment, info) + + self.database.commit() + + def insert_segment_info(self, segment, info): + id = self.segment_to_id(segment) + for k, v in info.items(): + self.cursor.execute("update segments set " + k + " = ? " + "where segmentid = ?", + (v, id)) + def rebuild_file(self, fp, metadata): """Recompute database signatures if a file is unchanged. @@ -277,6 +301,9 @@ class DatabaseRebuilder(object): buf = fp.read(length) verifier.update(buf) + # Zero blocks get no checksums, so skip further processing on them. + if object is None: continue + if exact: csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM) csum.update(buf) @@ -363,6 +390,11 @@ class SegmentStateRebuilder(object): if extension not in self.filters: return filter_cmd = self.filters[extension] + # File attributes. + st_buf = os.stat(path) + timestamp = time.strftime(SQLITE_TIMESTAMP, + time.gmtime(st_buf.st_mtime)) + # Compute attributes of the compressed segment data. BLOCK_SIZE = 4096 with open(path) as segment: @@ -385,13 +417,15 @@ class SegmentStateRebuilder(object): data_size += tarinfo.size object_count += 1 - return {"segment": segment_name, - "path": relative_path, + return {"segment": cumulus.uri_encode(segment_name), + "path": cumulus.uri_encode(relative_path), "checksum": checksum, "data_size": data_size, - "disk_size": disk_size} + "disk_size": disk_size, + "timestamp": timestamp} if __name__ == "__main__": + # Sample code to reconstruct segment metadata--ought to be relocated. if False: segment_rebuilder = SegmentStateRebuilder() topdir = sys.argv[1] @@ -406,10 +440,17 @@ if __name__ == "__main__": os.path.relpath(f, topdir)) if metadata: for (k, v) in sorted(metadata.items()): - print "%s: %s" % (k, cumulus.uri_encode(str(v))) + print "%s: %s" % (k, v) print sys.exit(0) + # Sample code to rebuild the segments table from metadata--needs to be + # merged with the code below. + if False: + rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1])) + rebuilder.reload_segment_metadata(open(sys.argv[2])) + sys.exit(0) + # Read metadata from stdin; filter out lines starting with "@@" so the # statcache file can be parsed as well. metadata = (x for x in sys.stdin if not x.startswith("@@"))