From: Michael Vrable Date: Thu, 17 May 2007 05:41:43 +0000 (-0700) Subject: Bugfix in size estimates for filtered tarfile outputs. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=57890fff9773a368f241a25c89931a5d67fccb4e Bugfix in size estimates for filtered tarfile outputs. --- diff --git a/store.cc b/store.cc index 26797b3..57ee11b 100644 --- a/store.cc +++ b/store.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include "store.h" #include "ref.h" +using std::max; using std::list; using std::set; using std::string; @@ -169,19 +171,22 @@ void Tarfile::internal_write_object(const string &path, } /* Estimate the size based on the size of the actual output file on disk. - * However, the filter may not have written all data yet, and in the event that - * it is buffering data to a large extent, also use */ + * However, it might be the case that the filter program is buffering all its + * data, and might potentially not write a single byte until we have closed + * our end of the pipe. If we don't do so until we see data written, we have + * a problem. So, arbitrarily pick an upper bound on the compression ratio + * that the filter will achieve (128:1), and return a size estimate which is + * the larger of a) bytes actually seen written to disk, and b) input + * bytes/128. */ size_t Tarfile::size_estimate() { struct stat statbuf; - if (fstat(real_fd, &statbuf) == 0) { - size_t disk_size = statbuf.st_size; - - if (disk_size >= size / 128) - return disk_size; - } + if (fstat(real_fd, &statbuf) == 0) + return max((int64_t)statbuf.st_size, (int64_t)(size / 128)); + /* Couldn't stat the file on disk, so just return the actual number of + * bytes, before compression. */ return size; }