X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=store.cc;h=8f2c8b328c2d7cacea3ca4c3683993532d1a7980;hb=def20364a3596d7b1fa4a07f3d3ee056cfff2d1e;hp=26797b319ed635341d1deeedf6bd161110644590;hpb=ac33ae99de4a6aa9cfaca2f7fd6746758264758e;p=cumulus.git diff --git a/store.cc b/store.cc index 26797b3..8f2c8b3 100644 --- a/store.cc +++ b/store.cc @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include #include @@ -24,11 +26,15 @@ #include "store.h" #include "ref.h" +using std::max; using std::list; +using std::map; using std::set; using std::string; -static char *const filter_program[] = {"bzip2", "-c", NULL}; +/* Default filter program is bzip2 */ +const char *filter_program = "bzip2 -c"; +const char *filter_extension = ".bz2"; static void cloexec(int fd) { @@ -51,7 +57,7 @@ Tarfile::Tarfile(const string &path, const string &segment) filter_fd = spawn_filter(real_fd); if (tar_fdopen(&t, filter_fd, (char *)path.c_str(), NULL, - O_WRONLY | O_CREAT, 0600, TAR_VERBOSE | TAR_GNU) == -1) + O_WRONLY | O_CREAT, 0666, TAR_VERBOSE | TAR_GNU) == -1) throw IOException("Error opening Tarfile"); } @@ -110,7 +116,7 @@ int Tarfile::spawn_filter(int fd_out) close(fd_out); /* Exec the filter program. */ - execvp(filter_program[0], filter_program); + execlp("/bin/sh", "/bin/sh", "-c", filter_program, NULL); /* Should not reach here except for error cases. */ fprintf(stderr, "Could not exec filter: %m\n"); @@ -169,24 +175,29 @@ void Tarfile::internal_write_object(const string &path, } /* Estimate the size based on the size of the actual output file on disk. - * However, the filter may not have written all data yet, and in the event that - * it is buffering data to a large extent, also use */ + * However, it might be the case that the filter program is buffering all its + * data, and might potentially not write a single byte until we have closed + * our end of the pipe. If we don't do so until we see data written, we have + * a problem. So, arbitrarily pick an upper bound on the compression ratio + * that the filter will achieve (128:1), and return a size estimate which is + * the larger of a) bytes actually seen written to disk, and b) input + * bytes/128. */ size_t Tarfile::size_estimate() { struct stat statbuf; - if (fstat(real_fd, &statbuf) == 0) { - size_t disk_size = statbuf.st_size; - - if (disk_size >= size / 128) - return disk_size; - } + if (fstat(real_fd, &statbuf) == 0) + return max((int64_t)statbuf.st_size, (int64_t)(size / 128)); + /* Couldn't stat the file on disk, so just return the actual number of + * bytes, before compression. */ return size; } static const size_t SEGMENT_SIZE = 4 * 1024 * 1024; +static map group_sizes; + ObjectReference TarSegmentStore::write_object(const char *data, size_t len, const std::string &group) { @@ -199,7 +210,8 @@ ObjectReference TarSegmentStore::write_object(const char *data, size_t len, segment->name = generate_uuid(); - string filename = path + "/" + segment->name + ".tar.bz2"; + string filename = path + "/" + segment->name + ".tar"; + filename += filter_extension; segment->file = new Tarfile(filename, segment->name); segment->count = 0; @@ -216,6 +228,8 @@ ObjectReference TarSegmentStore::write_object(const char *data, size_t len, segment->file->write_object(id, data, len); segment->count++; + group_sizes[group] += len; + ObjectReference ref(segment->name, id_buf); // If this segment meets or exceeds the size target, close it so that @@ -232,6 +246,15 @@ void TarSegmentStore::sync() close_segment(segments.begin()->first); } +void TarSegmentStore::dump_stats() +{ + printf("Data written:\n"); + for (map::iterator i = group_sizes.begin(); + i != group_sizes.end(); ++i) { + printf(" %s: %lld\n", i->first.c_str(), i->second); + } +} + void TarSegmentStore::close_segment(const string &group) { struct segment_info *segment = segments[group];