static SegmentStore *segment_store;
static OutputStream *info_dump = NULL;
+static TarSegmentStore *tss = NULL;
+
static SegmentPartitioner *index_segment, *data_segment;
/* Buffer for holding a single block of data read from a file. */
/* Read the contents of a file (specified by an open file descriptor) and copy
* the data to the store. */
-void dumpfile(int fd, dictionary &file_info)
+void dumpfile(int fd, dictionary &file_info, ostream &metadata)
{
struct stat stat_buf;
fstat(fd, &stat_buf);
int64_t size = 0;
+ string segment_list = "data:";
if ((stat_buf.st_mode & S_IFMT) != S_IFREG) {
printf("file is no longer a regular file!\n");
index_data->write_uuid(block_segment_uuid);
index_data->write_u32(block_object_id);
+ // tarstore processing
+ string blockid = tss->write_object(block_buf, bytes, "data");
+ segment_list += " " + blockid;
+
size += bytes;
}
file_info["sha1"] = string((const char *)hash.checksum(),
hash.checksum_size());
file_info["data"] = encode_objref(segment_uuid, object_id);
+
+ metadata << segment_list << "\n";
}
void scanfile(const string& path, ostream &metadata)
fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
file_info["size"] = encode_u64(stat_buf.st_size);
- dumpfile(fd, file_info);
+ dumpfile(fd, file_info, metadata);
close(fd);
break;
{
block_buf = new char[LBS_BLOCK_SIZE];
+ tss = new TarSegmentStore(".");
segment_store = new SegmentStore(".");
SegmentWriter *sw = segment_store->new_segment();
info_dump = sw->new_object(NULL, "ROOT");
fprintf(stderr, "IOException: %s\n", e.getError().c_str());
}
- Tarfile *t = new Tarfile("tarstore.tar", uuid);
const char testdata[] = "Test string.";
- t->write_object(0, testdata, strlen(testdata));
- t->write_object(1, testdata, strlen(testdata));
- t->write_object(2, testdata, strlen(testdata));
+ tss->write_object(testdata, strlen(testdata));
+ tss->write_object(testdata, strlen(testdata));
+ tss->write_object(testdata, strlen(testdata));
const string md = metadata.str();
- t->write_object(3, md.data(), md.size());
+ string root = tss->write_object(md.data(), md.size(), "root");
+
+ fprintf(stderr, "Metadata root is at %s\n", root.c_str());
- delete t;
+ tss->sync();
+ delete tss;
delete index_segment;
delete data_segment;
#include <string.h>
#include <arpa/inet.h>
+#include <string.h>
+
+using std::string;
+
/* SWAP does an endian swap on architectures that are little-endian,
as SHA1 needs some data in a big-endian form. */
#define SWAP(n) htonl(n)
sha1_finish_ctx(&ctx, resbuf);
return (const uint8_t *)resbuf;
}
+
+string SHA1Checksum::checksum_str()
+{
+ uint8_t resbuf[20];
+ char hexbuf[4];
+ string result = "sha-1:";
+
+ sha1_finish_ctx(&ctx, resbuf);
+
+ for (int i = 0; i < 20; i++) {
+ sprintf(hexbuf, "%02x", resbuf[i]);
+ result += hexbuf;
+ }
+
+ return result;
+}
# include <stdio.h>
# include <stdint.h>
+#include <string>
+
typedef uint32_t md5_uint32;
/* Structure to save state of computation between the single steps. */
void process(const void *data, size_t len);
const uint8_t *checksum();
size_t checksum_size() const { return 20; }
+ std::string checksum_str();
};
#endif
#include <uuid/uuid.h>
#include <string>
+#include <iostream>
#include "tarstore.h"
Tarfile::~Tarfile()
{
+ string checksum_list = checksums.str();
+ internal_write_object(segment_name + "/checksums",
+ checksum_list.data(), checksum_list.size());
tar_append_eof(t);
if (tar_close(t) != 0)
void Tarfile::write_object(int id, const char *data, size_t len)
{
- memset(&t->th_buf, 0, sizeof(struct tar_header));
-
char buf[64];
sprintf(buf, "%08x", id);
string path = segment_name + "/" + buf;
printf("path: %s\n", path.c_str());
+ internal_write_object(path, data, len);
+
+ // Compute a checksum for the data block, which will be stored at the end
+ // of the TAR file.
+ SHA1Checksum hash;
+ hash.process(data, len);
+ sprintf(buf, "%08x", id);
+ checksums << buf << " " << hash.checksum_str() << "\n";
+}
+
+void Tarfile::internal_write_object(const string &path,
+ const char *data, size_t len)
+{
+ memset(&t->th_buf, 0, sizeof(struct tar_header));
+
th_set_type(t, S_IFREG | 0600);
th_set_user(t, 0);
th_set_group(t, 0);
if (tar_block_write(t, block) == -1)
throw IOException("Error writing final tar block");
}
+
+string TarSegmentStore::write_object(const char *data, size_t len, const
+ std::string &group)
+{
+ struct segment_info *segment;
+
+ // Find the segment into which the object should be written, looking up by
+ // group. If no segment exists yet, create one.
+ if (segments.find(group) == segments.end()) {
+ segment = new segment_info;
+
+ uint8_t uuid[16];
+ char uuid_buf[40];
+ uuid_generate(uuid);
+ uuid_unparse_lower(uuid, uuid_buf);
+ segment->name = uuid_buf;
+
+ string filename = path + "/" + segment->name + ".tar";
+ segment->file = new Tarfile(filename, segment->name);
+
+ segment->count = 0;
+
+ segments[group] = segment;
+ } else {
+ segment = segments[group];
+ }
+
+ int id = segment->count;
+ char id_buf[64];
+ sprintf(id_buf, "%08x", id);
+
+ segment->file->write_object(id, data, len);
+ segment->count++;
+
+ return segment->name + "/" + id_buf;
+}
+
+void TarSegmentStore::sync()
+{
+ while (!segments.empty()) {
+ const string &name = segments.begin()->first;
+ struct segment_info *segment = segments[name];
+
+ fprintf(stderr, "Closing segment group %s (%s)\n",
+ name.c_str(), segment->name.c_str());
+
+ delete segment->file;
+ segments.erase(segments.begin());
+ delete segment;
+ }
+}
#include <libtar.h>
#include <string>
+#include <iostream>
+#include <sstream>
#include "store.h"
+/* A simple wrapper around a single TAR file to represent a segment. Objects
+ * may only be written out all at once, since the tar header must be written
+ * first; incremental writing is not supported. */
class Tarfile {
public:
Tarfile(const std::string &path, const std::string &segment);
- virtual ~Tarfile();
+ ~Tarfile();
void write_object(int id, const char *data, size_t len);
private:
+ void internal_write_object(const std::string &path,
+ const char *data, size_t len);
+
std::string segment_name;
+ std::ostringstream checksums;
TAR *t;
};
+class TarSegmentStore {
+public:
+ // New segments will be stored in the given directory.
+ TarSegmentStore(const std::string &path) { this->path = path; }
+ ~TarSegmentStore() { sync(); }
+
+ // Writes an object to segment in the store, and returns the name
+ // (segment/object) to refer to it. The optional parameter group can be
+ // used to control object placement; objects with different group
+ // parameters are kept in separate segments.
+ std::string write_object(const char *data, size_t len,
+ const std::string &group = "");
+
+ // Ensure all segments have been fully written.
+ void sync();
+
+private:
+ struct segment_info {
+ Tarfile *file;
+ std::string name; // UUID
+ int count; // Objects written to this segment
+ };
+
+ std::string path;
+ std::map<std::string, struct segment_info *> segments;
+};
+
#endif // _LBS_TARSTORE_H