When verifying a snapshot, check that the segment list is accurate.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 12 Dec 2007 05:49:23 +0000 (21:49 -0800)
committerMichael Vrable <mvrable@turin.ucsd.edu>
Wed, 12 Dec 2007 05:49:23 +0000 (21:49 -0800)
This should help find bugs such as the one fixed in commit 1b39ce3ff11a.

lbs-util
lbs.py

index 8789a75..6f2dd07 100755 (executable)
--- a/lbs-util
+++ b/lbs-util
@@ -132,6 +132,7 @@ def cmd_verify_snapshots(snapshots):
     lowlevel = lbs.LowlevelDataStore(options.store)
     store = lbs.ObjectStore(lowlevel)
     for s in snapshots:
+        lbs.accessed_segments.clear()
         print "#### Snapshot", s
         d = lbs.parse_full(store.load_snapshot(s))
         check_version(d['Format'])
@@ -150,6 +151,17 @@ def cmd_verify_snapshots(snapshots):
                 raise ValueError("File size does not match!")
             if not verifier.valid():
                 raise ValueError("Bad checksum found")
+
+        # Verify that the list of segments included with the snapshot was
+        # actually accurate: covered all segments that were really read, and
+        # doesn't contain duplicates.
+        listed_segments = set(d['Segments'].split())
+        if lbs.accessed_segments - listed_segments:
+            print "Error: Some segments not listed in descriptor!"
+            print sorted(list(lbs.accessed_segments - listed_segments))
+        if listed_segments - lbs.accessed_segments :
+            print "Warning: Extra unused segments listed in descriptor!"
+            print sorted(list(listed_segments - lbs.accessed_segments))
     store.cleanup()
 
 # Restore a snapshot, or some subset of files from it
diff --git a/lbs.py b/lbs.py
index 712b072..3108998 100644 (file)
--- a/lbs.py
+++ b/lbs.py
@@ -18,6 +18,9 @@ FORMAT_VERSION = (0, 6)         # LBS Snapshot v0.6
 # Maximum number of nested indirect references allowed in a snapshot.
 MAX_RECURSION_DEPTH = 3
 
+# All segments which have been accessed this session.
+accessed_segments = set()
+
 class Struct:
     """A class which merely acts as a data container.
 
@@ -157,6 +160,7 @@ class ObjectStore:
         return (segment, object, checksum, slice)
 
     def get_segment(self, segment):
+        accessed_segments.add(segment)
         raw = self.store.lowlevel_open(segment + ".tar.gpg")
 
         (input, output) = os.popen2("lbs-filter-gpg --decrypt")
@@ -192,6 +196,7 @@ class ObjectStore:
             f.close()
 
     def load_object(self, segment, object):
+        accessed_segments.add(segment)
         path = os.path.join(self.get_cachedir(), segment, object)
         if not os.access(path, os.R_OK):
             self.extract_segment(segment)