- /* The index data consists of a sequence of pointers to the data blocks
- * that actually comprise the file data. This level of indirection is used
- * so that the same data block can be used in multiple files, or multiple
- * versions of the same file. */
- SHA1Checksum hash;
- while (true) {
- size_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
- if (bytes == 0)
- break;
-
- hash.process(block_buf, bytes);
-
- // Either find a copy of this block in an already-existing segment, or
- // index it so it can be re-used in the future
- SHA1Checksum block_hash;
- block_hash.process(block_buf, bytes);
- string block_csum = block_hash.checksum_str();
- ObjectReference ref = db->FindObject(block_csum, bytes);
-
- // Store a copy of the object if one does not yet exist
- if (ref.get_segment().size() == 0) {
- LbsObject *o = new LbsObject;
- o->set_group("data");
- o->set_data(block_buf, bytes);
- o->write(tss);
- ref = o->get_ref();
- db->StoreObject(ref, block_csum, bytes);
- delete o;
+ /* If the file is new or changed, we must read in the contents a block at a
+ * time. */
+ if (!cached) {
+ SHA1Checksum hash;
+ Subfile subfile(db);
+ subfile.load_old_blocks(old_blocks);
+
+ while (true) {
+ ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
+ if (bytes == 0)
+ break;
+ if (bytes < 0) {
+ fprintf(stderr, "Backup contents for %s may be incorrect\n",
+ path.c_str());
+ break;
+ }
+
+ hash.process(block_buf, bytes);
+
+ // Sparse file processing: if we read a block of all zeroes, encode
+ // that explicitly.
+ bool all_zero = true;
+ for (int i = 0; i < bytes; i++) {
+ if (block_buf[i] != 0) {
+ all_zero = false;
+ break;
+ }
+ }
+
+ // Either find a copy of this block in an already-existing segment,
+ // or index it so it can be re-used in the future
+ double block_age = 0.0;
+ ObjectReference ref;
+
+ SHA1Checksum block_hash;
+ block_hash.process(block_buf, bytes);
+ string block_csum = block_hash.checksum_str();
+
+ if (all_zero) {
+ ref = ObjectReference(ObjectReference::REF_ZERO);
+ ref.set_range(0, bytes);
+ } else {
+ ref = db->FindObject(block_csum, bytes);
+ }
+
+ list<ObjectReference> refs;
+
+ // Store a copy of the object if one does not yet exist
+ if (ref.is_null()) {
+ LbsObject *o = new LbsObject;
+ int object_group;
+
+ /* We might still have seen this checksum before, if the object
+ * was stored at some time in the past, but we have decided to
+ * clean the segment the object was originally stored in
+ * (FindObject will not return such objects). When rewriting
+ * the object contents, put it in a separate group, so that old
+ * objects get grouped together. The hope is that these old
+ * objects will continue to be used in the future, and we
+ * obtain segments which will continue to be well-utilized.
+ * Additionally, keep track of the age of the data by looking
+ * up the age of the block which was expired and using that
+ * instead of the current time. */
+ if (db->IsOldObject(block_csum, bytes,
+ &block_age, &object_group)) {
+ if (object_group == 0) {
+ o->set_group("data");
+ } else {
+ char group[32];
+ sprintf(group, "compacted-%d", object_group);
+ o->set_group(group);
+ }
+ if (status == NULL)
+ status = "partial";
+ } else {
+ o->set_group("data");
+ status = "new";
+ }
+
+ subfile.analyze_new_block(block_buf, bytes);
+ refs = subfile.create_incremental(tss, o, block_age);
+ } else {
+ if (flag_rebuild_statcache && ref.is_normal()) {
+ subfile.analyze_new_block(block_buf, bytes);
+ subfile.store_analyzed_signatures(ref);
+ }
+ refs.push_back(ref);
+ }
+
+ while (!refs.empty()) {
+ ref = refs.front(); refs.pop_front();
+ object_list.push_back(ref.to_string());
+ if (ref.is_normal())
+ add_segment(ref.get_segment());
+ db->UseObject(ref);
+ }
+ size += bytes;
+
+ if (status == NULL)
+ status = "old";