# necessary integrity checks (if a checksum is included), and return the object
# data.
sub load_ref {
- # First, try to parse the object reference string into constituent pieces.
- # The format is segment/object(checksum)[range]. Both the checksum and
- # range are optional.
my $ref_str = shift;
+ # Check for special objects before attempting general parsing.
+ if ($ref_str =~ m/^zero\[(\d+)\+(\d+)\]$/) {
+ return "\0" x ($2 + 0);
+ }
+
+ # Try to parse the object reference string into constituent pieces. The
+ # format is segment/object(checksum)[range]. Both the checksum and range
+ # are optional.
if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
die "Malformed object reference: $ref_str";
}
given first, followed by the slice. The checksum is computed over the
original object contents, before slicing.
+Special Objects
+---------------
+
+In addition to the standard syntax for objects described above, the
+special name "zero" may be used instead of segment/sequence number.
+This represents an object consisting entirely of zeroes. The zero
+object must have a slice specification appended to indicate the size of
+the object. For example
+ zero[0+1024]
+represents a block consisting of 1024 null bytes. A checksum should not
+be given.
+
FILE METADATA LISTING
=====================
@staticmethod
def parse_ref(refstr):
+ m = re.match(r"^zero\[(\d+)\+(\d+)\]$", refstr)
+ if m:
+ return ("zero", None, None, (int(m.group(1)), int(m.group(2))))
+
m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(\d+)\+(\d+)\])?$", refstr)
if not m: return
(segment, object, checksum, slice) = self.parse_ref(refstr)
+ if segment == "zero":
+ return "\0" * slice[1]
+
data = self.load_object(segment, object)
if checksum is not None:
sqlite3_stmt *stmt;
bool found = false;
+ // Special objects (such as the zero object) aren't stored in segments, and
+ // so are always available.
+ if (!ref.is_normal())
+ return true;
+
stmt = Prepare("select count(*) from block_index "
"where segmentid = ? and object = ? and expired is null");
sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment()));
int rc;
sqlite3_stmt *stmt;
+ if (!ref.is_normal())
+ return;
+
stmt = Prepare("insert or ignore into snapshot_refs "
"select segmentid, object, size from block_index "
"where segmentid = ? and object = ?");
}
ObjectReference::ObjectReference()
- : segment(""), object("")
+ : type(REF_NULL), segment(""), object("")
{
+ clear_checksum();
+ clear_range();
+}
+
+ObjectReference::ObjectReference(RefType t)
+ : type(t), segment(""), object("")
+{
+ clear_checksum();
+ clear_range();
}
ObjectReference::ObjectReference(const std::string& segment, int sequence)
- : segment(segment)
+ : type(REF_NORMAL), segment(segment)
{
char seq_buf[64];
sprintf(seq_buf, "%08x", sequence);
ObjectReference::ObjectReference(const std::string& segment,
const std::string& sequence)
- : segment(segment), object(sequence)
+ : type(REF_NORMAL), segment(segment), object(sequence)
{
clear_checksum();
clear_range();
string ObjectReference::to_string() const
{
- if (is_null())
- return "/";
+ if (type == REF_NULL)
+ return "null";
- string result = segment + "/" + object;
+ string result;
+ if (type == REF_ZERO) {
+ result = "zero";
+ } else if (type == REF_NORMAL) {
+ result = segment + "/" + object;
- if (checksum_valid)
- result += "(" + checksum + ")";
+ if (checksum_valid)
+ result += "(" + checksum + ")";
+ }
if (range_valid) {
char buf[64];
{
const char *s = str.c_str();
const char *t;
+ ObjectReference::RefType type = ObjectReference::REF_NORMAL;
+
+ // Special case: explicit zero objects
+ if (strncmp(s, "zero", 4) == 0) {
+ type = ObjectReference::REF_ZERO;
+ s += 4;
+ }
// Segment
t = s;
- while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f') || (*t == '-'))
- t++;
- if (*t != '/')
- return ObjectReference();
+ if (type == ObjectReference::REF_NORMAL) {
+ while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f')
+ || (*t == '-'))
+ t++;
+ if (*t != '/')
+ return ObjectReference();
+ }
string segment(s, t - s);
// Object sequence number
- t++;
- s = t;
- while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f'))
+ if (type == ObjectReference::REF_NORMAL) {
t++;
- if (*t != '\0' && *t != '(' && *t != '[')
- return ObjectReference();
+ s = t;
+ while ((*t >= '0' && *t <= '9') || (*t >= 'a' && *t <= 'f'))
+ t++;
+ if (*t != '\0' && *t != '(' && *t != '[')
+ return ObjectReference();
+ }
string object(s, t - s);
// Checksum
// Range
bool have_range = false;
- int64_t range1, range2;
+ int64_t range1 = 0, range2 = 0;
if (*t == '[') {
t++;
s = t;
have_range = true;
}
- ObjectReference ref(segment, object);
+ ObjectReference ref;
+ switch (type) {
+ case ObjectReference::REF_ZERO:
+ ref = ObjectReference(ObjectReference::REF_ZERO);
+ break;
+ case ObjectReference::REF_NORMAL:
+ ref = ObjectReference(segment, object);
+ break;
+ default:
+ return ObjectReference();
+ }
+
if (checksum.size() > 0)
ref.set_checksum(checksum);
* and converted to and from the text representation. */
class ObjectReference {
public:
+ enum RefType { REF_NULL, REF_ZERO, REF_NORMAL };
+
ObjectReference();
+ ObjectReference(RefType t);
ObjectReference(const std::string& segment, int sequence);
ObjectReference(const std::string& segment, const std::string& sequence);
- bool is_null() const { return segment.size() == 0; }
+ bool is_null() const { return type == REF_NULL; }
+ bool is_normal() const { return type == REF_NORMAL; }
std::string to_string() const;
static ObjectReference parse(const std::string& s);
bool merge(ObjectReference ref);
private:
+ RefType type;
std::string segment, object, checksum;
size_t range_start, range_length;
bool checksum_valid, range_valid;
i != blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
- segment_list.insert(ref.get_segment());
+ if (ref.is_normal())
+ segment_list.insert(ref.get_segment());
db->UseObject(ref);
}
size = stat_buf.st_size;
hash.process(block_buf, bytes);
+ // Sparse file processing: if we read a block of all zeroes, encode
+ // that explicitly.
+ bool all_zero = true;
+ for (int i = 0; i < bytes; i++) {
+ if (block_buf[i] != 0) {
+ all_zero = false;
+ break;
+ }
+ }
+
// Either find a copy of this block in an already-existing segment,
// or index it so it can be re-used in the future
double block_age = 0.0;
+ ObjectReference ref;
+
SHA1Checksum block_hash;
block_hash.process(block_buf, bytes);
string block_csum = block_hash.checksum_str();
- ObjectReference ref = db->FindObject(block_csum, bytes);
+
+ if (all_zero) {
+ ref = ObjectReference(ObjectReference::REF_ZERO);
+ ref.set_range(0, bytes);
+ } else {
+ ObjectReference ref = db->FindObject(block_csum, bytes);
+ }
// Store a copy of the object if one does not yet exist
- if (ref.get_segment().size() == 0) {
+ if (ref.is_null()) {
LbsObject *o = new LbsObject;
int object_group;
}
object_list.push_back(ref.to_string());
- segment_list.insert(ref.get_segment());
+ if (ref.is_normal())
+ segment_list.insert(ref.get_segment());
db->UseObject(ref);
size += bytes;