Append checksums to segments to allow some verification.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Fri, 29 Dec 2006 19:49:20 +0000 (11:49 -0800)
committerMichael Vrable <mvrable@beleg.ucsd.edu>
Fri, 29 Dec 2006 19:49:20 +0000 (11:49 -0800)
sha1.cc
sha1.h
store.cc
store.h

diff --git a/sha1.cc b/sha1.cc
index 1fc4ad7..7a9c0fb 100644 (file)
--- a/sha1.cc
+++ b/sha1.cc
@@ -339,7 +339,7 @@ SHA1Checksum::~SHA1Checksum()
 {
 }
 
-void SHA1Checksum::process(void *data, size_t len)
+void SHA1Checksum::process(const void *data, size_t len)
 {
     sha1_process_bytes(data, len, &ctx);
 }
diff --git a/sha1.h b/sha1.h
index 597f39a..b4d4305 100644 (file)
--- a/sha1.h
+++ b/sha1.h
@@ -85,7 +85,7 @@ public:
     SHA1Checksum();
     ~SHA1Checksum();
 
-    void process(void *data, size_t len);
+    void process(const void *data, size_t len);
     const uint8_t *checksum();
     size_t checksum_size() const { return 20; }
 };
index 1686798..52304c2 100644 (file)
--- a/store.cc
+++ b/store.cc
@@ -144,6 +144,23 @@ void WrapperOutputStream::write_internal(const void *data, size_t len)
     real.write(data, len);
 }
 
+/* Provide checksumming of a data stream. */
+ChecksumOutputStream::ChecksumOutputStream(OutputStream &o)
+    : real(o)
+{
+}
+
+void ChecksumOutputStream::write_internal(const void *data, size_t len)
+{
+    real.write(data, len);
+    csum.process(data, len);
+}
+
+const uint8_t *ChecksumOutputStream::finish_and_checksum()
+{
+    return csum.checksum();
+}
+
 /* Utility functions, for encoding data types to strings. */
 string encode_u16(uint16_t val)
 {
@@ -167,10 +184,14 @@ string encode_u64(uint64_t val)
 }
 
 SegmentWriter::SegmentWriter(OutputStream *output, struct uuid u)
-    : out(output),
+    : raw_out(output),
       id(u),
       object_stream(NULL)
 {
+    /* All output data will be checksummed except the very last few bytes,
+     * which are the checksum itself. */
+    out = new ChecksumOutputStream(*raw_out);
+
     /* Write out the segment header first. */
     static const char signature[] = "LBSSEG0\n";
     out->write(signature, strlen(signature));
@@ -198,9 +219,15 @@ SegmentWriter::~SegmentWriter()
     out->write_s64(index_offset);
     out->write_u32(objects.size());
 
+    /* Finally, append a checksum to the end of the file, so that its integrity
+     * (against accidental, not malicious, corruption) can be verified. */
+    const uint8_t *csum = out->finish_and_checksum();
+    raw_out->write(csum, out->checksum_size());
+
     /* The SegmentWriter takes ownership of the OutputStream it is writing to,
      * and destroys it automatically when done with the segment. */
     delete out;
+    delete raw_out;
 }
 
 OutputStream *SegmentWriter::new_object()
diff --git a/store.h b/store.h
index 0cd2c4b..0bc2171 100644 (file)
--- a/store.h
+++ b/store.h
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <vector>
 
+#include "sha1.h"
+
 /* In memory datatype to represent key/value pairs of information, such as file
  * metadata.  Currently implemented as map<string, string>. */
 typedef std::map<std::string, std::string> dictionary;
@@ -112,6 +114,26 @@ private:
     OutputStream &real;
 };
 
+/* Like WrapperOutputStream, but additionally computes a checksum of data as it
+ * is written. */
+class ChecksumOutputStream : public OutputStream {
+public:
+    explicit ChecksumOutputStream(OutputStream &o);
+    virtual ~ChecksumOutputStream() { }
+
+    /* Once a checksum is computed, no further data should be written to the
+     * stream. */
+    const uint8_t *finish_and_checksum();
+    size_t checksum_size() const { return csum.checksum_size(); }
+
+protected:
+    virtual void write_internal(const void *data, size_t len);
+
+private:
+    OutputStream &real;
+    SHA1Checksum csum;
+};
+
 /* Simple wrappers that encode integers using a StringOutputStream and return
  * the encoded result. */
 std::string encode_u16(uint16_t val);
@@ -146,7 +168,8 @@ public:
 private:
     typedef std::vector<std::pair<int64_t, int64_t> > object_table;
 
-    OutputStream *out;
+    ChecksumOutputStream *out;  // Output stream with checksumming enabled
+    OutputStream *raw_out;      // Raw output stream, without checksumming
     struct uuid id;
 
     int64_t object_start_offset;