Extend object reference syntax with size assertions.

author Michael Vrable <mvrable@cs.ucsd.edu>

Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)

committer Michael Vrable <mvrable@turin.ucsd.edu>

Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)
author Michael Vrable <mvrable@cs.ucsd.edu>
Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)
committer Michael Vrable <mvrable@turin.ucsd.edu>
Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)
diff --git a/cumulus-util b/cumulus-util

index 5425bca..59c5fc7 100755 (executable)
--- a/cumulus-util
+++ b/cumulus-util
@@ -6,9 +6,9 @@ import getpass, os, stat, sys, time
  from optparse import OptionParser
  import lbs
  
  from optparse import OptionParser
  import lbs
  
-# We support up to "LBS Snapshot v0.6" formats, but are also limited by the lbs
+# We support up to "LBS Snapshot v0.8" formats, but are also limited by the lbs
  # module.
  # module.
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 6))
+FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 8))
  
  def check_version(format):
      ver = lbs.parse_metadata_version(format)
  
  def check_version(format):
      ver = lbs.parse_metadata_version(format)
diff --git a/doc/format.txt b/doc/format.txt

index 1511115..2ab2696 100644 (file)
--- a/doc/format.txt
+++ b/doc/format.txt
@@ -1,6 +1,6 @@
                         Backup Format Description
           for Cumulus: Efficient Filesystem Backup to the Cloud
                         Backup Format Description
           for Cumulus: Efficient Filesystem Backup to the Cloud
-                      Version: "LBS Snapshot v0.6"
+                      Version: "LBS Snapshot v0.8"
  
  NOTE: This format specification is intended to be mostly stable, but is
  still subject to change before the 1.0 release.  The code may provide
  
  NOTE: This format specification is intended to be mostly stable, but is
  still subject to change before the 1.0 release.  The code may provide
@@ -129,6 +129,13 @@ abbreviation, the slice syntax
      [<length>]
  is shorthand for
      [0+<length>]
      [<length>]
  is shorthand for
      [0+<length>]
+In place of a traditional slice, the annotation
+    [=<length>]
+may be used.  This is somewhat similar to specifying [<length>], but
+additionally asserts that the referenced object is exactly <length>
+bytes long--that is, this slice syntax does not change the bytes
+returned at all, but can be used to provide information about the
+underlying object store.
  
  Both a checksum and a slice can be used.  In this case, the checksum is
  given first, followed by the slice.  The checksum is computed over the
  
  Both a checksum and a slice can be used.  In this case, the checksum is
  given first, followed by the slice.  The checksum is computed over the
diff --git a/lbs.py b/lbs.py

index 507b9e1..ee4f445 100644 (file)
--- a/lbs.py
+++ b/lbs.py
@@ -13,7 +13,7 @@ import os, re, sha, tarfile, tempfile, thread
  from pysqlite2 import dbapi2 as sqlite3
  
  # The largest supported snapshot format that can be understood.
  from pysqlite2 import dbapi2 as sqlite3
  
  # The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 6)         # LBS Snapshot v0.6
+FORMAT_VERSION = (0, 8)         # LBS Snapshot v0.8
  
  # Maximum number of nested indirect references allowed in a snapshot.
  MAX_RECURSION_DEPTH = 3
  
  # Maximum number of nested indirect references allowed in a snapshot.
  MAX_RECURSION_DEPTH = 3
@@ -143,7 +143,7 @@ class ObjectStore:
          if m:
              return ("zero", None, None, (0, int(m.group(1))))
  
          if m:
              return ("zero", None, None, (0, int(m.group(1))))
  
-        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[((\d+)\+)?(\d+)\])?$", refstr)
+        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr)
          if not m: return
  
          segment = m.group(1)
          if not m: return
  
          segment = m.group(1)
@@ -155,11 +155,14 @@ class ObjectStore:
              checksum = checksum.lstrip("(").rstrip(")")
  
          if slice is not None:
              checksum = checksum.lstrip("(").rstrip(")")
  
          if slice is not None:
-            if m.group(5) is None:
+            if m.group(9) is not None:
+                # Size-assertion slice
+                slice = (0, int(m.group(9)), True)
+            elif m.group(6) is None:
                  # Abbreviated slice
                  # Abbreviated slice
-                slice = (0, int(m.group(7)))
+                slice = (0, int(m.group(8)), False)
              else:
              else:
-                slice = (int(m.group(6)), int(m.group(7)))
+                slice = (int(m.group(7)), int(m.group(8)), False)
  
          return (segment, object, checksum, slice)
  
  
          return (segment, object, checksum, slice)
  
@@ -231,7 +234,8 @@ class ObjectStore:
                  raise ValueError
  
          if slice is not None:
                  raise ValueError
  
          if slice is not None:
-            (start, length) = slice
+            (start, length, exact) = slice
+            if exact and len(data) != length: raise ValueError
              data = data[start:start+length]
              if len(data) != length: raise IndexError
  
              data = data[start:start+length]
              if len(data) != length: raise IndexError
  
diff --git a/ref.cc b/ref.cc

index bed4daf..7e92b7a 100644 (file)
--- a/ref.cc
+++ b/ref.cc
@@ -97,7 +97,9 @@ string ObjectReference::to_string() const
  
      if (range_valid) {
          char buf[64];
  
      if (range_valid) {
          char buf[64];
-        if (range_start == 0) {
+        if (range_exact) {
+            sprintf(buf, "[=%zu]", range_length);
+        } else if (range_start == 0) {
              sprintf(buf, "[%zu]", range_length);
          } else {
              sprintf(buf, "[%zu+%zu]", range_start, range_length);
              sprintf(buf, "[%zu]", range_length);
          } else {
              sprintf(buf, "[%zu+%zu]", range_start, range_length);
@@ -159,10 +161,16 @@ ObjectReference ObjectReference::parse(const std::string& str)
      }
  
      // Range
      }
  
      // Range
-    bool have_range = false;
+    bool have_range = false, range_exact = false;
      int64_t range1 = 0, range2 = 0;
      if (*t == '[') {
          t++;
      int64_t range1 = 0, range2 = 0;
      if (*t == '[') {
          t++;
+
+        if (*t == '=') {
+            range_exact = true;
+            t++;
+        }
+
          s = t;
          while (*t >= '0' && *t <= '9')
              t++;
          s = t;
          while (*t >= '0' && *t <= '9')
              t++;
@@ -174,6 +182,8 @@ ObjectReference ObjectReference::parse(const std::string& str)
          } else {
              if (*t != '+')
                  return ObjectReference();
          } else {
              if (*t != '+')
                  return ObjectReference();
+            if (range_exact)
+                return ObjectReference();
  
              string val(s, t - s);
              range1 = atoll(val.c_str());
  
              string val(s, t - s);
              range1 = atoll(val.c_str());
@@ -208,7 +218,7 @@ ObjectReference ObjectReference::parse(const std::string& str)
          ref.set_checksum(checksum);
  
      if (have_range)
          ref.set_checksum(checksum);
  
      if (have_range)
-        ref.set_range(range1, range2);
+        ref.set_range(range1, range2, range_exact);
  
      return ref;
  }
  
      return ref;
  }
@@ -238,6 +248,9 @@ bool ObjectReference::merge(ObjectReference ref)
      if (!range_valid || !ref.range_valid)
          return false;
  
      if (!range_valid || !ref.range_valid)
          return false;
  
+    if (range_exact || ref.range_exact)
+        return false;
+
      if (range_start + range_length == ref.range_start) {
          range_length += ref.range_length;
          return true;
      if (range_start + range_length == ref.range_start) {
          range_length += ref.range_length;
          return true;
diff --git a/ref.h b/ref.h

index d1a0e0c..a27b4d6 100644 (file)
--- a/ref.h
+++ b/ref.h
@@ -55,9 +55,12 @@
   * a substring rather than the entire string using a range specifier.  If no
   * range specifier is given, then by default the entire object is used.
   *    <range> ::= <start> "+" <length>
   * a substring rather than the entire string using a range specifier.  If no
   * range specifier is given, then by default the entire object is used.
   *    <range> ::= <start> "+" <length>
+ *                | <length>
+ *                | "=" <length>
   * Both <start> and <length> are decimal values.  If included, the range is
   * enclosed in brackets.  As an abbreviation, if <start> is 0 then the range
   * Both <start> and <length> are decimal values.  If included, the range is
   * enclosed in brackets.  As an abbreviation, if <start> is 0 then the range
- * can be given as just <length> (no "+" needed).
+ * can be given as just <length> (no "+" needed).  The "=<length>" form asserts
+ * that the underlying object is exactly <length> bytes in size.
   *
   * When both a checksum and a range are included, note that the checksum is
   * taken over the entire original object, before the range is taken into
   *
   * When both a checksum and a range are included, note that the checksum is
   * taken over the entire original object, before the range is taken into
@@ -108,9 +111,13 @@ public:
      bool has_range() const { return range_valid; }
      size_t get_range_start() const { return range_start; }
      size_t get_range_length() const { return range_length; }
      bool has_range() const { return range_valid; }
      size_t get_range_start() const { return range_start; }
      size_t get_range_length() const { return range_length; }
-    void clear_range() { range_start = range_length = 0; range_valid = false; }
-    void set_range(size_t start, size_t length)
-        { range_start = start; range_length = length; range_valid = true; }
+    size_t get_range_exact() const { return range_exact; }
+    void clear_range()
+        { range_start = range_length = 0;
+          range_valid = false; range_exact = false; }
+    void set_range(size_t start, size_t length, bool exact = false)
+        { range_start = start; range_length = length;
+          range_valid = true; range_exact = exact; }
  
      bool merge(ObjectReference ref);
  
  
      bool merge(ObjectReference ref);
  
@@ -124,7 +131,7 @@ private:
      RefType type;
      std::string segment, object, checksum;
      size_t range_start, range_length;
      RefType type;
      std::string segment, object, checksum;
      size_t range_start, range_length;
-    bool checksum_valid, range_valid;
+    bool checksum_valid, range_valid, range_exact;
  };
  
  #endif // _LBS_REF_H
  };
  
  #endif // _LBS_REF_H
diff --git a/scandir.cc b/scandir.cc

index e82a918..a2a7a49 100644 (file)
--- a/scandir.cc
+++ b/scandir.cc
@@ -868,7 +868,7 @@ int main(int argc, char *argv[])
      }
      FILE *descriptor = fdopen(descriptor_fd, "w");
  
      }
      FILE *descriptor = fdopen(descriptor_fd, "w");
  
-    fprintf(descriptor, "Format: LBS Snapshot v0.6\n");
+    fprintf(descriptor, "Format: LBS Snapshot v0.8\n");
      fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
      strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
      fprintf(descriptor, "Date: %s\n", desc_buf);
      fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
      strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
      fprintf(descriptor, "Date: %s\n", desc_buf);
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)
committer	Michael Vrable <mvrable@turin.ucsd.edu>
	Mon, 30 Jun 2008 21:17:08 +0000 (14:17 -0700)
cumulus-util		patch \| blob \| history
doc/format.txt		patch \| blob \| history
lbs.py		patch \| blob \| history
ref.cc		patch \| blob \| history
ref.h		patch \| blob \| history
scandir.cc		patch \| blob \| history