import subprocess
import sys
import tarfile
+import time
import cumulus
CHECKSUM_ALGORITHM = "sha224"
-
CHUNKER_PROGRAM = "cumulus-chunker-standalone"
+# TODO: Move to somewhere common
+SQLITE_TIMESTAMP = "%Y-%m-%d %H:%M:%S"
+
class Chunker(object):
"""Compute sub-file chunk boundaries using a sliding Rabin fingerprint.
self.database.commit()
+ def reload_segment_metadata(self, segment_metadata):
+ """Read a segment metadata (.meta) file into the local database.
+
+ Updates the segments table in the local database with information from
+ a a segment metadata backup file. Old data is not overwritten, so
+ loading a .meta file with partial information is fine.
+ """
+ for info in cumulus.parse(segment_metadata,
+ terminate=lambda l: len(l) == 0):
+ segment = info.pop("segment")
+ self.insert_segment_info(segment, info)
+
+ self.database.commit()
+
+ def insert_segment_info(self, segment, info):
+ id = self.segment_to_id(segment)
+ for k, v in info.items():
+ self.cursor.execute("update segments set " + k + " = ? "
+ "where segmentid = ?",
+ (v, id))
+
def rebuild_file(self, fp, metadata):
"""Recompute database signatures if a file is unchanged.
buf = fp.read(length)
verifier.update(buf)
+ # Zero blocks get no checksums, so skip further processing on them.
+ if object is None: continue
+
if exact:
csum = cumulus.ChecksumCreator(CHECKSUM_ALGORITHM)
csum.update(buf)
if extension not in self.filters: return
filter_cmd = self.filters[extension]
+ # File attributes.
+ st_buf = os.stat(path)
+ timestamp = time.strftime(SQLITE_TIMESTAMP,
+ time.gmtime(st_buf.st_mtime))
+
# Compute attributes of the compressed segment data.
BLOCK_SIZE = 4096
with open(path) as segment:
data_size += tarinfo.size
object_count += 1
- return {"segment": segment_name,
- "path": relative_path,
+ return {"segment": cumulus.uri_encode(segment_name),
+ "path": cumulus.uri_encode(relative_path),
"checksum": checksum,
"data_size": data_size,
- "disk_size": disk_size}
+ "disk_size": disk_size,
+ "timestamp": timestamp}
if __name__ == "__main__":
+ # Sample code to reconstruct segment metadata--ought to be relocated.
if False:
segment_rebuilder = SegmentStateRebuilder()
topdir = sys.argv[1]
os.path.relpath(f, topdir))
if metadata:
for (k, v) in sorted(metadata.items()):
- print "%s: %s" % (k, cumulus.uri_encode(str(v)))
+ print "%s: %s" % (k, v)
print
sys.exit(0)
+ # Sample code to rebuild the segments table from metadata--needs to be
+ # merged with the code below.
+ if False:
+ rebuilder = DatabaseRebuilder(cumulus.LocalDatabase(sys.argv[1]))
+ rebuilder.reload_segment_metadata(open(sys.argv[2]))
+ sys.exit(0)
+
# Read metadata from stdin; filter out lines starting with "@@" so the
# statcache file can be parsed as well.
metadata = (x for x in sys.stdin if not x.startswith("@@"))