While making other format changes, also add in support for explicitly
representing regions of a file that are entirely zero, as can happen with
sparse files. These are represented with an object reference of the form
"zero[0+<length>]". Update the lbs tool to generate and parse these
references, and the utility code to also handle it.
The restore tools do not seek over zero regions when writing out a file, so
the file is not restored as a sparse file, but that support can easily be
added later with no change needed to the format.
# necessary integrity checks (if a checksum is included), and return the object
# data.
sub load_ref {
# necessary integrity checks (if a checksum is included), and return the object
# data.
sub load_ref {
- # First, try to parse the object reference string into constituent pieces.
- # The format is segment/object(checksum)[range]. Both the checksum and
- # range are optional.
+ # Check for special objects before attempting general parsing.
+ if ($ref_str =~ m/^zero\[(\d+)\+(\d+)\]$/) {
+ return "\0" x ($2 + 0);
+ }
+
+ # Try to parse the object reference string into constituent pieces. The
+ # format is segment/object(checksum)[range]. Both the checksum and range
+ # are optional.
if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
die "Malformed object reference: $ref_str";
}
if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
die "Malformed object reference: $ref_str";
}
given first, followed by the slice. The checksum is computed over the
original object contents, before slicing.
given first, followed by the slice. The checksum is computed over the
original object contents, before slicing.
+Special Objects
+---------------
+
+In addition to the standard syntax for objects described above, the
+special name "zero" may be used instead of segment/sequence number.
+This represents an object consisting entirely of zeroes. The zero
+object must have a slice specification appended to indicate the size of
+the object. For example
+ zero[0+1024]
+represents a block consisting of 1024 null bytes. A checksum should not
+be given.
+
FILE METADATA LISTING
=====================
FILE METADATA LISTING
=====================
@staticmethod
def parse_ref(refstr):
@staticmethod
def parse_ref(refstr):
+ m = re.match(r"^zero\[(\d+)\+(\d+)\]$", refstr)
+ if m:
+ return ("zero", None, None, (int(m.group(1)), int(m.group(2))))
+
m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(\d+)\+(\d+)\])?$", refstr)
if not m: return
m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(\d+)\+(\d+)\])?$", refstr)
if not m: return
(segment, object, checksum, slice) = self.parse_ref(refstr)
(segment, object, checksum, slice) = self.parse_ref(refstr)
+ if segment == "zero":
+ return "\0" * slice[1]
+
data = self.load_object(segment, object)
if checksum is not None:
data = self.load_object(segment, object)
if checksum is not None:
sqlite3_stmt *stmt;
bool found = false;
sqlite3_stmt *stmt;
bool found = false;
+ // Special objects (such as the zero object) aren't stored in segments, and
+ // so are always available.
+ if (!ref.is_normal())
+ return true;
+
stmt = Prepare("select count(*) from block_index "
"where segmentid = ? and object = ? and expired is null");
sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment()));
stmt = Prepare("select count(*) from block_index "
"where segmentid = ? and object = ? and expired is null");
sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment()));
int rc;
sqlite3_stmt *stmt;
int rc;
sqlite3_stmt *stmt;
+ if (!ref.is_normal())
+ return;
+
stmt = Prepare("insert or ignore into snapshot_refs "
"select segmentid, object, size from block_index "
"where segmentid = ? and object = ?");
stmt = Prepare("insert or ignore into snapshot_refs "
"select segmentid, object, size from block_index "
"where segmentid = ? and object = ?");
}
ObjectReference::ObjectReference()
}
ObjectReference::ObjectReference()
- : segment(""), object("")
+ : type(REF_NULL), segment(""), object("")
+ clear_checksum();
+ clear_range();
+}
+
+ObjectReference::ObjectReference(RefType t)
+ : type(t), segment(""), object("")
+{
+ clear_checksum();
+ clear_range();
}
ObjectReference::ObjectReference(const std::string& segment, int sequence)
}
ObjectReference::ObjectReference(const std::string& segment, int sequence)
+ : type(REF_NORMAL), segment(segment)
{
char seq_buf[64];
sprintf(seq_buf, "%08x", sequence);
{
char seq_buf[64];
sprintf(seq_buf, "%08x", sequence);
ObjectReference::ObjectReference(const std::string& segment,
const std::string& sequence)
ObjectReference::ObjectReference(const std::string& segment,
const std::string& sequence)
- : segment(segment), object(sequence)
+ : type(REF_NORMAL), segment(segment), object(sequence)
{
clear_checksum();
clear_range();
{
clear_checksum();
clear_range();
string ObjectReference::to_string() const
{
string ObjectReference::to_string() const
{
- if (is_null())
- return "/";
+ if (type == REF_NULL)
+ return "null";
- string result = segment + "/" + object;
+ string result;
+ if (type == REF_ZERO) {
+ result = "zero";
+ } else if (type == REF_NORMAL) {
+ result = segment + "/" + object;
- if (checksum_valid)
- result += "(" + checksum + ")";
+ if (checksum_valid)
+ result += "(" + checksum + ")";
+ }
if (range_valid) {
char buf[64];
if (range_valid) {
char buf[64];
{
const char *s = str.c_str();
const char *t;
{
const char *s = str.c_str();
const char *t;
+ ObjectReference::RefType type = ObjectReference::REF_NORMAL;
+
+ // Special case: explicit zero objects
+ if (strncmp(s, "zero", 4) == 0) {
+ type = ObjectReference::REF_ZERO;
+ s += 4;
+ }
- while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f') || (*t == '-'))
- t++;
- if (*t != '/')
- return ObjectReference();
+ if (type == ObjectReference::REF_NORMAL) {
+ while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f')
+ || (*t == '-'))
+ t++;
+ if (*t != '/')
+ return ObjectReference();
+ }
string segment(s, t - s);
// Object sequence number
string segment(s, t - s);
// Object sequence number
- t++;
- s = t;
- while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f'))
+ if (type == ObjectReference::REF_NORMAL) {
- if (*t != '\0' && *t != '(' && *t != '[')
- return ObjectReference();
+ s = t;
+ while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f'))
+ t++;
+ if (*t != '\0' && *t != '(' && *t != '[')
+ return ObjectReference();
+ }
string object(s, t - s);
// Checksum
string object(s, t - s);
// Checksum
// Range
bool have_range = false;
// Range
bool have_range = false;
- int64_t range1, range2;
+ int64_t range1 = 0, range2 = 0;
if (*t == '[') {
t++;
s = t;
if (*t == '[') {
t++;
s = t;
- ObjectReference ref(segment, object);
+ ObjectReference ref;
+ switch (type) {
+ case ObjectReference::REF_ZERO:
+ ref = ObjectReference(ObjectReference::REF_ZERO);
+ break;
+ case ObjectReference::REF_NORMAL:
+ ref = ObjectReference(segment, object);
+ break;
+ default:
+ return ObjectReference();
+ }
+
if (checksum.size() > 0)
ref.set_checksum(checksum);
if (checksum.size() > 0)
ref.set_checksum(checksum);
* and converted to and from the text representation. */
class ObjectReference {
public:
* and converted to and from the text representation. */
class ObjectReference {
public:
+ enum RefType { REF_NULL, REF_ZERO, REF_NORMAL };
+
+ ObjectReference(RefType t);
ObjectReference(const std::string& segment, int sequence);
ObjectReference(const std::string& segment, const std::string& sequence);
ObjectReference(const std::string& segment, int sequence);
ObjectReference(const std::string& segment, const std::string& sequence);
- bool is_null() const { return segment.size() == 0; }
+ bool is_null() const { return type == REF_NULL; }
+ bool is_normal() const { return type == REF_NORMAL; }
std::string to_string() const;
static ObjectReference parse(const std::string& s);
std::string to_string() const;
static ObjectReference parse(const std::string& s);
bool merge(ObjectReference ref);
private:
bool merge(ObjectReference ref);
private:
std::string segment, object, checksum;
size_t range_start, range_length;
bool checksum_valid, range_valid;
std::string segment, object, checksum;
size_t range_start, range_length;
bool checksum_valid, range_valid;
i != blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
i != blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
- segment_list.insert(ref.get_segment());
+ if (ref.is_normal())
+ segment_list.insert(ref.get_segment());
db->UseObject(ref);
}
size = stat_buf.st_size;
db->UseObject(ref);
}
size = stat_buf.st_size;
hash.process(block_buf, bytes);
hash.process(block_buf, bytes);
+ // Sparse file processing: if we read a block of all zeroes, encode
+ // that explicitly.
+ bool all_zero = true;
+ for (int i = 0; i < bytes; i++) {
+ if (block_buf[i] != 0) {
+ all_zero = false;
+ break;
+ }
+ }
+
// Either find a copy of this block in an already-existing segment,
// or index it so it can be re-used in the future
double block_age = 0.0;
// Either find a copy of this block in an already-existing segment,
// or index it so it can be re-used in the future
double block_age = 0.0;
SHA1Checksum block_hash;
block_hash.process(block_buf, bytes);
string block_csum = block_hash.checksum_str();
SHA1Checksum block_hash;
block_hash.process(block_buf, bytes);
string block_csum = block_hash.checksum_str();
- ObjectReference ref = db->FindObject(block_csum, bytes);
+
+ if (all_zero) {
+ ref = ObjectReference(ObjectReference::REF_ZERO);
+ ref.set_range(0, bytes);
+ } else {
+ ObjectReference ref = db->FindObject(block_csum, bytes);
+ }
// Store a copy of the object if one does not yet exist
// Store a copy of the object if one does not yet exist
- if (ref.get_segment().size() == 0) {
LbsObject *o = new LbsObject;
int object_group;
LbsObject *o = new LbsObject;
int object_group;
}
object_list.push_back(ref.to_string());
}
object_list.push_back(ref.to_string());
- segment_list.insert(ref.get_segment());
+ if (ref.is_normal())
+ segment_list.insert(ref.get_segment());
db->UseObject(ref);
size += bytes;
db->UseObject(ref);
size += bytes;