given data from a previous backup.
"""
+from __future__ import division, print_function, unicode_literals
+
import base64
import hashlib
import itertools
import subprocess
import sys
import tarfile
+import time
import cumulus
+from cumulus import util
CHECKSUM_ALGORITHM = "sha224"
-
CHUNKER_PROGRAM = "cumulus-chunker-standalone"
+# TODO: Move to somewhere common
+SQLITE_TIMESTAMP = "%Y-%m-%d %H:%M:%S"
+
class Chunker(object):
"""Compute sub-file chunk boundaries using a sliding Rabin fingerprint.
def compute_breaks(self, buf):
breaks = [0]
signature = self.window_init()
- for i in xrange(len(buf)):
+ for i in range(len(buf)):
self.window_update(signature, ord(buf[i]))
block_len = i - breaks[-1] + 1
if ((signature[0] % self.TARGET_CHUNK_SIZE == self.BREAKMARK_VALUE
n -= i
position = 0
- for next_start, (size, digest) in sorted(signatures.iteritems()):
+ for next_start, (size, digest) in sorted(signatures.items()):
if next_start < position:
- print "Warning: overlapping signatures, ignoring"
+ print("Warning: overlapping signatures, ignoring")
continue
skip(next_start - position)
records.append(struct.pack(">H", size) + digest)
"""Loads signatures from the binary format stored in the database."""
entry_size = 2 + self.hash_size
if len(signatures) % entry_size != 0:
- print "Warning: Invalid signatures to load"
+ print("Warning: Invalid signatures to load")
return {}
null_digest = "\x00" * self.hash_size
if metadata.items.type not in ("-", "f"): continue
try:
path = os.path.join(reference_path, metadata.items.name)
- print "Path:", path
+ print("Path:", path)
# TODO: Check file size for early abort if different
self.rebuild_file(open(path), metadata)
except IOError as e:
- print e
+ print(e)
pass # Ignore the file
self.database.commit()
+ def reload_segment_metadata(self, segment_metadata):
+ """Read a segment metadata (.meta) file into the local database.
+
+ Updates the segments table in the local database with information from
+ a a segment metadata backup file. Old data is not overwritten, so
+ loading a .meta file with partial information is fine.
+ """
+ for info in cumulus.parse(segment_metadata,
+ terminate=lambda l: len(l) == 0):
+ segment = info.pop("segment")
+ self.insert_segment_info(segment, info)
+
+ self.database.commit()
+
+ def insert_segment_info(self, segment, info):
+ id = self.segment_to_id(segment)
+ for k, v in info.items():
+ self.cursor.execute("update segments set " + k + " = ? "
+ "where segmentid = ?",
+ (v, id))
+
def rebuild_file(self, fp, metadata):
"""Recompute database signatures if a file is unchanged.
buf = fp.read(length)
verifier.update(buf)
+ # Zero blocks get no checksums, so skip further processing on them.
+ if object is None: continue
+
if exact:
csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM)
csum.update(buf)
subblock[k] = self.chunker.dump_signatures(subblock[k])
self.store_checksums(checksums, subblock)
else:
- print "Checksum mismatch"
+ print("Checksum mismatch")
def store_checksums(self, block_checksums, subblock_signatures):
- for (segment, object), (size, checksum) in block_checksums.iteritems():
+ for (segment, object), (size, checksum) in block_checksums.items():
segmentid = self.segment_to_id(segment)
self.cursor.execute(
"""insert or ignore into block_index(segmentid, object)
if extension not in self.filters: return
filter_cmd = self.filters[extension]
+ # File attributes.
+ st_buf = os.stat(path)
+ timestamp = time.strftime(SQLITE_TIMESTAMP,
+ time.gmtime(st_buf.st_mtime))
+
# Compute attributes of the compressed segment data.
BLOCK_SIZE = 4096
with open(path) as segment:
data_size += tarinfo.size
object_count += 1
- return {"segment": segment_name,
- "path": relative_path,
+ return {"segment": util.uri_encode_pathname(segment_name),
+ "path": util.uri_encode_pathname(relative_path),
"checksum": checksum,
"data_size": data_size,
- "disk_size": disk_size}
+ "disk_size": disk_size,
+ "timestamp": timestamp}
if __name__ == "__main__":
+ # Sample code to reconstruct segment metadata--ought to be relocated.
if False:
segment_rebuilder = SegmentStateRebuilder()
topdir = sys.argv[1]
os.path.relpath(f, topdir))
if metadata:
for (k, v) in sorted(metadata.items()):
- print "%s: %s" % (k, cumulus.uri_encode(str(v)))
- print
+ print("%s: %s" % (k, v))
+ print()
+ sys.exit(0)
+
+ # Sample code to rebuild the segments table from metadata--needs to be
+ # merged with the code below.
+ if False:
+ rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1]))
+ rebuilder.reload_segment_metadata(open(sys.argv[2]))
sys.exit(0)
# Read metadata from stdin; filter out lines starting with "@@" so the