#include <fcntl.h>
#include <time.h>
+#include <algorithm>
#include <list>
#include <set>
#include <string>
#include "store.h"
#include "ref.h"
+using std::max;
using std::list;
using std::set;
using std::string;
-static char *const filter_program[] = {"bzip2", "-c", NULL};
+/* Default filter program is bzip2 */
+const char *filter_program = "bzip2 -c";
+const char *filter_extension = ".bz2";
static void cloexec(int fd)
{
close(fd_out);
/* Exec the filter program. */
- execvp(filter_program[0], filter_program);
+ execlp("/bin/sh", "/bin/sh", "-c", filter_program, NULL);
/* Should not reach here except for error cases. */
fprintf(stderr, "Could not exec filter: %m\n");
}
/* Estimate the size based on the size of the actual output file on disk.
- * However, the filter may not have written all data yet, and in the event that
- * it is buffering data to a large extent, also use */
+ * However, it might be the case that the filter program is buffering all its
+ * data, and might potentially not write a single byte until we have closed
+ * our end of the pipe. If we don't do so until we see data written, we have
+ * a problem. So, arbitrarily pick an upper bound on the compression ratio
+ * that the filter will achieve (128:1), and return a size estimate which is
+ * the larger of a) bytes actually seen written to disk, and b) input
+ * bytes/128. */
size_t Tarfile::size_estimate()
{
struct stat statbuf;
- if (fstat(real_fd, &statbuf) == 0) {
- size_t disk_size = statbuf.st_size;
-
- if (disk_size >= size / 128)
- return disk_size;
- }
+ if (fstat(real_fd, &statbuf) == 0)
+ return max((int64_t)statbuf.st_size, (int64_t)(size / 128));
+ /* Couldn't stat the file on disk, so just return the actual number of
+ * bytes, before compression. */
return size;
}
segment->name = generate_uuid();
- string filename = path + "/" + segment->name + ".tar.bz2";
+ string filename = path + "/" + segment->name + ".tar";
+ filename += filter_extension;
segment->file = new Tarfile(filename, segment->name);
segment->count = 0;