Bugfix for database checksum reconstruction with zero blocks.
[cumulus.git] / python / cumulus / rebuild_database.py
index 10513ba..10a5f9a 100755 (executable)
@@ -35,13 +35,16 @@ import struct
 import subprocess
 import sys
 import tarfile
+import time
 
 import cumulus
 
 CHECKSUM_ALGORITHM = "sha224"
-
 CHUNKER_PROGRAM = "cumulus-chunker-standalone"
 
+# TODO: Move to somewhere common
+SQLITE_TIMESTAMP = "%Y-%m-%d %H:%M:%S"
+
 class Chunker(object):
     """Compute sub-file chunk boundaries using a sliding Rabin fingerprint.
 
@@ -256,6 +259,27 @@ class DatabaseRebuilder(object):
 
         self.database.commit()
 
+    def reload_segment_metadata(self, segment_metadata):
+        """Read a segment metadata (.meta) file into the local database.
+
+        Updates the segments table in the local database with information from
+        a a segment metadata backup file.  Old data is not overwritten, so
+        loading a .meta file with partial information is fine.
+        """
+        for info in cumulus.parse(segment_metadata,
+                                     terminate=lambda l: len(l) == 0):
+            segment = info.pop("segment")
+            self.insert_segment_info(segment, info)
+
+        self.database.commit()
+
+    def insert_segment_info(self, segment, info):
+        id = self.segment_to_id(segment)
+        for k, v in info.items():
+            self.cursor.execute("update segments set " + k + " = ? "
+                                "where segmentid = ?",
+                                (v, id))
+
     def rebuild_file(self, fp, metadata):
         """Recompute database signatures if a file is unchanged.
 
@@ -277,6 +301,9 @@ class DatabaseRebuilder(object):
             buf = fp.read(length)
             verifier.update(buf)
 
+            # Zero blocks get no checksums, so skip further processing on them.
+            if object is None: continue
+
             if exact:
                 csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM)
                 csum.update(buf)
@@ -363,6 +390,11 @@ class SegmentStateRebuilder(object):
         if extension not in self.filters: return
         filter_cmd = self.filters[extension]
 
+        # File attributes.
+        st_buf = os.stat(path)
+        timestamp = time.strftime(SQLITE_TIMESTAMP,
+                                  time.gmtime(st_buf.st_mtime))
+
         # Compute attributes of the compressed segment data.
         BLOCK_SIZE = 4096
         with open(path) as segment:
@@ -385,13 +417,15 @@ class SegmentStateRebuilder(object):
                 data_size += tarinfo.size
                 object_count += 1
 
-        return {"segment": segment_name,
-                "path": relative_path,
+        return {"segment": cumulus.uri_encode(segment_name),
+                "path": cumulus.uri_encode(relative_path),
                 "checksum": checksum,
                 "data_size": data_size,
-                "disk_size": disk_size}
+                "disk_size": disk_size,
+                "timestamp": timestamp}
 
 if __name__ == "__main__":
+    # Sample code to reconstruct segment metadata--ought to be relocated.
     if False:
         segment_rebuilder = SegmentStateRebuilder()
         topdir = sys.argv[1]
@@ -406,10 +440,17 @@ if __name__ == "__main__":
                 os.path.relpath(f, topdir))
             if metadata:
                 for (k, v) in sorted(metadata.items()):
-                    print "%s: %s" % (k, cumulus.uri_encode(str(v)))
+                    print "%s: %s" % (k, v)
                 print
         sys.exit(0)
 
+    # Sample code to rebuild the segments table from metadata--needs to be
+    # merged with the code below.
+    if False:
+        rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1]))
+        rebuilder.reload_segment_metadata(open(sys.argv[2]))
+        sys.exit(0)
+
     # Read metadata from stdin; filter out lines starting with "@@" so the
     # statcache file can be parsed as well.
     metadata = (x for x in sys.stdin if not x.startswith("@@"))