Object references can now include a size assertion, such as [=1024]
which indicates that the referenced object is exactly 1024 bytes in
length. If a metadata log or statcache file is produced using this
reference form where appropriate, then it should be possible to rebuild
much of the object index in the local database (by looking for files
which are unchanged and computing hashes of blocks from that file where
it is known that an entire object was used, not just a fragment of an
object).
This commit merely adds support for parsing the new references; they are
not yet generated by any code.
from optparse import OptionParser
import lbs
from optparse import OptionParser
import lbs
-# We support up to "LBS Snapshot v0.6" formats, but are also limited by the lbs
+# We support up to "LBS Snapshot v0.8" formats, but are also limited by the lbs
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 6))
+FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 8))
def check_version(format):
ver = lbs.parse_metadata_version(format)
def check_version(format):
ver = lbs.parse_metadata_version(format)
Backup Format Description
for Cumulus: Efficient Filesystem Backup to the Cloud
Backup Format Description
for Cumulus: Efficient Filesystem Backup to the Cloud
- Version: "LBS Snapshot v0.6"
+ Version: "LBS Snapshot v0.8"
NOTE: This format specification is intended to be mostly stable, but is
still subject to change before the 1.0 release. The code may provide
NOTE: This format specification is intended to be mostly stable, but is
still subject to change before the 1.0 release. The code may provide
[<length>]
is shorthand for
[0+<length>]
[<length>]
is shorthand for
[0+<length>]
+In place of a traditional slice, the annotation
+ [=<length>]
+may be used. This is somewhat similar to specifying [<length>], but
+additionally asserts that the referenced object is exactly <length>
+bytes long--that is, this slice syntax does not change the bytes
+returned at all, but can be used to provide information about the
+underlying object store.
Both a checksum and a slice can be used. In this case, the checksum is
given first, followed by the slice. The checksum is computed over the
Both a checksum and a slice can be used. In this case, the checksum is
given first, followed by the slice. The checksum is computed over the
from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 6) # LBS Snapshot v0.6
+FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
if m:
return ("zero", None, None, (0, int(m.group(1))))
if m:
return ("zero", None, None, (0, int(m.group(1))))
- m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[((\d+)\+)?(\d+)\])?$", refstr)
+ m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr)
if not m: return
segment = m.group(1)
if not m: return
segment = m.group(1)
checksum = checksum.lstrip("(").rstrip(")")
if slice is not None:
checksum = checksum.lstrip("(").rstrip(")")
if slice is not None:
+ if m.group(9) is not None:
+ # Size-assertion slice
+ slice = (0, int(m.group(9)), True)
+ elif m.group(6) is None:
- slice = (0, int(m.group(7)))
+ slice = (0, int(m.group(8)), False)
- slice = (int(m.group(6)), int(m.group(7)))
+ slice = (int(m.group(7)), int(m.group(8)), False)
return (segment, object, checksum, slice)
return (segment, object, checksum, slice)
raise ValueError
if slice is not None:
raise ValueError
if slice is not None:
- (start, length) = slice
+ (start, length, exact) = slice
+ if exact and len(data) != length: raise ValueError
data = data[start:start+length]
if len(data) != length: raise IndexError
data = data[start:start+length]
if len(data) != length: raise IndexError
if (range_valid) {
char buf[64];
if (range_valid) {
char buf[64];
- if (range_start == 0) {
+ if (range_exact) {
+ sprintf(buf, "[=%zu]", range_length);
+ } else if (range_start == 0) {
sprintf(buf, "[%zu]", range_length);
} else {
sprintf(buf, "[%zu+%zu]", range_start, range_length);
sprintf(buf, "[%zu]", range_length);
} else {
sprintf(buf, "[%zu+%zu]", range_start, range_length);
- bool have_range = false;
+ bool have_range = false, range_exact = false;
int64_t range1 = 0, range2 = 0;
if (*t == '[') {
t++;
int64_t range1 = 0, range2 = 0;
if (*t == '[') {
t++;
+
+ if (*t == '=') {
+ range_exact = true;
+ t++;
+ }
+
s = t;
while (*t >= '0' && *t <= '9')
t++;
s = t;
while (*t >= '0' && *t <= '9')
t++;
} else {
if (*t != '+')
return ObjectReference();
} else {
if (*t != '+')
return ObjectReference();
+ if (range_exact)
+ return ObjectReference();
string val(s, t - s);
range1 = atoll(val.c_str());
string val(s, t - s);
range1 = atoll(val.c_str());
ref.set_checksum(checksum);
if (have_range)
ref.set_checksum(checksum);
if (have_range)
- ref.set_range(range1, range2);
+ ref.set_range(range1, range2, range_exact);
if (!range_valid || !ref.range_valid)
return false;
if (!range_valid || !ref.range_valid)
return false;
+ if (range_exact || ref.range_exact)
+ return false;
+
if (range_start + range_length == ref.range_start) {
range_length += ref.range_length;
return true;
if (range_start + range_length == ref.range_start) {
range_length += ref.range_length;
return true;
* a substring rather than the entire string using a range specifier. If no
* range specifier is given, then by default the entire object is used.
* <range> ::= <start> "+" <length>
* a substring rather than the entire string using a range specifier. If no
* range specifier is given, then by default the entire object is used.
* <range> ::= <start> "+" <length>
+ * | <length>
+ * | "=" <length>
* Both <start> and <length> are decimal values. If included, the range is
* enclosed in brackets. As an abbreviation, if <start> is 0 then the range
* Both <start> and <length> are decimal values. If included, the range is
* enclosed in brackets. As an abbreviation, if <start> is 0 then the range
- * can be given as just <length> (no "+" needed).
+ * can be given as just <length> (no "+" needed). The "=<length>" form asserts
+ * that the underlying object is exactly <length> bytes in size.
*
* When both a checksum and a range are included, note that the checksum is
* taken over the entire original object, before the range is taken into
*
* When both a checksum and a range are included, note that the checksum is
* taken over the entire original object, before the range is taken into
bool has_range() const { return range_valid; }
size_t get_range_start() const { return range_start; }
size_t get_range_length() const { return range_length; }
bool has_range() const { return range_valid; }
size_t get_range_start() const { return range_start; }
size_t get_range_length() const { return range_length; }
- void clear_range() { range_start = range_length = 0; range_valid = false; }
- void set_range(size_t start, size_t length)
- { range_start = start; range_length = length; range_valid = true; }
+ size_t get_range_exact() const { return range_exact; }
+ void clear_range()
+ { range_start = range_length = 0;
+ range_valid = false; range_exact = false; }
+ void set_range(size_t start, size_t length, bool exact = false)
+ { range_start = start; range_length = length;
+ range_valid = true; range_exact = exact; }
bool merge(ObjectReference ref);
bool merge(ObjectReference ref);
RefType type;
std::string segment, object, checksum;
size_t range_start, range_length;
RefType type;
std::string segment, object, checksum;
size_t range_start, range_length;
- bool checksum_valid, range_valid;
+ bool checksum_valid, range_valid, range_exact;
}
FILE *descriptor = fdopen(descriptor_fd, "w");
}
FILE *descriptor = fdopen(descriptor_fd, "w");
- fprintf(descriptor, "Format: LBS Snapshot v0.6\n");
+ fprintf(descriptor, "Format: LBS Snapshot v0.8\n");
fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
fprintf(descriptor, "Date: %s\n", desc_buf);
fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
fprintf(descriptor, "Date: %s\n", desc_buf);