Manual 2to3 fixups.
[cumulus.git] / python / cumulus / rebuild_database.py
index 10513ba..3f391d8 100755 (executable)
@@ -26,6 +26,8 @@ the local database.  This can be used to recover from a local database loss,
 given data from a previous backup.
 """
 
+from __future__ import division, print_function, unicode_literals
+
 import base64
 import hashlib
 import itertools
@@ -35,13 +37,16 @@ import struct
 import subprocess
 import sys
 import tarfile
+import time
 
 import cumulus
 
 CHECKSUM_ALGORITHM = "sha224"
-
 CHUNKER_PROGRAM = "cumulus-chunker-standalone"
 
+# TODO: Move to somewhere common
+SQLITE_TIMESTAMP = "%Y-%m-%d %H:%M:%S"
+
 class Chunker(object):
     """Compute sub-file chunk boundaries using a sliding Rabin fingerprint.
 
@@ -110,7 +115,7 @@ class Chunker(object):
     def compute_breaks(self, buf):
         breaks = [0]
         signature = self.window_init()
-        for i in xrange(len(buf)):
+        for i in range(len(buf)):
             self.window_update(signature, ord(buf[i]))
             block_len = i - breaks[-1] + 1
             if ((signature[0] % self.TARGET_CHUNK_SIZE == self.BREAKMARK_VALUE
@@ -160,9 +165,9 @@ class Chunker(object):
                 n -= i
 
         position = 0
-        for next_start, (size, digest) in sorted(signatures.iteritems()):
+        for next_start, (size, digest) in sorted(signatures.items()):
             if next_start < position:
-                print "Warning: overlapping signatures, ignoring"
+                print("Warning: overlapping signatures, ignoring")
                 continue
             skip(next_start - position)
             records.append(struct.pack(">H", size) + digest)
@@ -174,7 +179,7 @@ class Chunker(object):
         """Loads signatures from the binary format stored in the database."""
         entry_size = 2 + self.hash_size
         if len(signatures) % entry_size != 0:
-            print "Warning: Invalid signatures to load"
+            print("Warning: Invalid signatures to load")
             return {}
 
         null_digest = "\x00" * self.hash_size
@@ -247,15 +252,36 @@ class DatabaseRebuilder(object):
             if metadata.items.type not in ("-", "f"): continue
             try:
                 path = os.path.join(reference_path, metadata.items.name)
-                print "Path:", path
+                print("Path:", path)
                 # TODO: Check file size for early abort if different
                 self.rebuild_file(open(path), metadata)
             except IOError as e:
-                print e
+                print(e)
                 pass  # Ignore the file
 
         self.database.commit()
 
+    def reload_segment_metadata(self, segment_metadata):
+        """Read a segment metadata (.meta) file into the local database.
+
+        Updates the segments table in the local database with information from
+        a a segment metadata backup file.  Old data is not overwritten, so
+        loading a .meta file with partial information is fine.
+        """
+        for info in cumulus.parse(segment_metadata,
+                                     terminate=lambda l: len(l) == 0):
+            segment = info.pop("segment")
+            self.insert_segment_info(segment, info)
+
+        self.database.commit()
+
+    def insert_segment_info(self, segment, info):
+        id = self.segment_to_id(segment)
+        for k, v in info.items():
+            self.cursor.execute("update segments set " + k + " = ? "
+                                "where segmentid = ?",
+                                (v, id))
+
     def rebuild_file(self, fp, metadata):
         """Recompute database signatures if a file is unchanged.
 
@@ -277,6 +303,9 @@ class DatabaseRebuilder(object):
             buf = fp.read(length)
             verifier.update(buf)
 
+            # Zero blocks get no checksums, so skip further processing on them.
+            if object is None: continue
+
             if exact:
                 csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM)
                 csum.update(buf)
@@ -296,10 +325,10 @@ class DatabaseRebuilder(object):
                 subblock[k] = self.chunker.dump_signatures(subblock[k])
             self.store_checksums(checksums, subblock)
         else:
-            print "Checksum mismatch"
+            print("Checksum mismatch")
 
     def store_checksums(self, block_checksums, subblock_signatures):
-        for (segment, object), (size, checksum) in block_checksums.iteritems():
+        for (segment, object), (size, checksum) in block_checksums.items():
             segmentid = self.segment_to_id(segment)
             self.cursor.execute(
                 """insert or ignore into block_index(segmentid, object)
@@ -363,6 +392,11 @@ class SegmentStateRebuilder(object):
         if extension not in self.filters: return
         filter_cmd = self.filters[extension]
 
+        # File attributes.
+        st_buf = os.stat(path)
+        timestamp = time.strftime(SQLITE_TIMESTAMP,
+                                  time.gmtime(st_buf.st_mtime))
+
         # Compute attributes of the compressed segment data.
         BLOCK_SIZE = 4096
         with open(path) as segment:
@@ -385,13 +419,15 @@ class SegmentStateRebuilder(object):
                 data_size += tarinfo.size
                 object_count += 1
 
-        return {"segment": segment_name,
-                "path": relative_path,
+        return {"segment": cumulus.uri_encode(segment_name),
+                "path": cumulus.uri_encode(relative_path),
                 "checksum": checksum,
                 "data_size": data_size,
-                "disk_size": disk_size}
+                "disk_size": disk_size,
+                "timestamp": timestamp}
 
 if __name__ == "__main__":
+    # Sample code to reconstruct segment metadata--ought to be relocated.
     if False:
         segment_rebuilder = SegmentStateRebuilder()
         topdir = sys.argv[1]
@@ -406,8 +442,15 @@ if __name__ == "__main__":
                 os.path.relpath(f, topdir))
             if metadata:
                 for (k, v) in sorted(metadata.items()):
-                    print "%s: %s" % (k, cumulus.uri_encode(str(v)))
-                print
+                    print("%s: %s" % (k, v))
+                print()
+        sys.exit(0)
+
+    # Sample code to rebuild the segments table from metadata--needs to be
+    # merged with the code below.
+    if False:
+        rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1]))
+        rebuilder.reload_segment_metadata(open(sys.argv[2]))
         sys.exit(0)
 
     # Read metadata from stdin; filter out lines starting with "@@" so the