Create a third_party directory for files copied from other projects.

author Michael Vrable <vrable@cs.hmc.edu>

Thu, 30 Aug 2012 04:28:16 +0000 (21:28 -0700)

committer Michael Vrable <vrable@cs.hmc.edu>

Sat, 12 Jan 2013 03:48:36 +0000 (19:48 -0800)
author Michael Vrable <vrable@cs.hmc.edu>
Thu, 30 Aug 2012 04:28:16 +0000 (21:28 -0700)
committer Michael Vrable <vrable@cs.hmc.edu>
Sat, 12 Jan 2013 03:48:36 +0000 (19:48 -0800)
diff --git a/Makefile b/Makefile

index 162d9f9..e707305 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -5,8 +5,9 @@ CXXFLAGS=-O -Wall -Wextra -D_FILE_OFFSET_BITS=64 $(DEBUG) \
          -DCUMULUS_VERSION=$(shell cat version)
  LDFLAGS=$(DEBUG) $(shell pkg-config --libs $(PACKAGES))
  
-SRCS=chunk.cc exclude.cc localdb.cc main.cc metadata.cc ref.cc remote.cc \
-     sha1.cc store.cc subfile.cc util.cc
+THIRD_PARTY_SRCS=chunk.cc sha1.cc
+SRCS=exclude.cc localdb.cc main.cc metadata.cc ref.cc remote.cc \
+     store.cc subfile.cc util.cc $(addprefix third_party/,$(THIRD_PARTY_SRCS))
  OBJS=$(SRCS:.cc=.o)
  
  cumulus : $(OBJS)
diff --git a/chunk.cc b/chunk.cc

deleted file mode 100644 (file)

index 170030f..0000000
--- a/chunk.cc
+++ /dev/null
@@ -1,317 +0,0 @@
-/* Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2006-2008  The Regents of the University of California
- * Written by Michael Vrable <mvrable@cs.ucsd.edu>
- *
- * Much of the code in this file is taken from LBFS, which is
- * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Compute incremental backups at a sub-file level by chopping files up into
- * blocks in a content-sensitive manner (using Rabin fingerprints).  This code
- * is largely taken from LBFS, primarily the files:
- *   liblbfs/fingerprint.C  (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
- *   liblbfs/rabinpoly.h  (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp)
- *   liblbfs/rabinpoly.C  (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
- *   async/msb.h  (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp)
- *   async/msb.C  (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp)
- * but adapted and slimmed down to fit within Cumulus. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <string>
-
-#include "chunk.h"
-
-using std::string;
-
-// Functions/data only needed internally go in a separate namespace.  Public
-// interfaces (at the end of the file) are in the global namespace.
-namespace {
-
-#define FINGERPRINT_PT  0xbfe6b8a5bf378d83LL
-#define BREAKMARK_VALUE 0x78
-#define MIN_CHUNK_SIZE  2048
-#define MAX_CHUNK_SIZE  65535
-#define TARGET_CHUNK_SIZE  4096
-
-#define SFS_DEV_RANDOM "/dev/random"
-
-#define INT64(n) n##LL
-#define MSB64 INT64(0x8000000000000000)
-
-template<class R> inline R
-implicit_cast (R r)
-{
-  return r;
-}
-
-/* Highest bit set in a byte */
-static const char bytemsb[0x100] = {
-  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
-  5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-};
-
-/* Find last set (most significant bit) */
-static inline u_int fls32 (uint32_t) __attribute__ ((const));
-static inline u_int
-fls32 (u_int32_t v)
-{
-  if (v & 0xffff0000) {
-    if (v & 0xff000000)
-      return 24 + bytemsb[v>>24];
-    else
-      return 16 + bytemsb[v>>16];
-  }
-  if (v & 0x0000ff00)
-    return 8 + bytemsb[v>>8];
-  else
-    return bytemsb[v];
-}
-
-static inline u_int fls64 (u_int64_t) __attribute__ ((const));
-static inline u_int
-fls64 (u_int64_t v)
-{
-  u_int32_t h;
-  if ((h = v >> 32))
-    return 32 + fls32 (h);
-  else
-    return fls32 ((u_int32_t) v);
-}
-
-static uint64_t
-polymod (uint64_t nh, uint64_t nl, uint64_t d)
-{
-  assert (d);
-  int k = fls64 (d) - 1;
-  d <<= 63 - k;
-
-  if (nh) {
-    if (nh & MSB64)
-      nh ^= d;
-    for (int i = 62; i >= 0; i--)
-      if (nh & INT64 (1) << i) {
-        nh ^= d >> (63 - i);
-        nl ^= d << (i + 1);
-      }
-  }
-  for (int i = 63; i >= k; i--)
-    if (nl & INT64 (1) << i)
-      nl ^= d >> (63 - i);
-  return nl;
-}
-
-static void
-polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y)
-{
-  uint64_t ph = 0, pl = 0;
-  if (x & 1)
-    pl = y;
-  for (int i = 1; i < 64; i++)
-    if (x & (INT64 (1) << i)) {
-      ph ^= y >> (64 - i);
-      pl ^= y << i;
-    }
-  if (php)
-    *php = ph;
-  if (plp)
-    *plp = pl;
-}
-
-static uint64_t
-polymmult (uint64_t x, uint64_t y, uint64_t d)
-{
-  uint64_t h, l;
-  polymult (&h, &l, x, y);
-  return polymod (h, l, d);
-}
-
-#if 0
-static uint64_t
-polygcd (uint64_t x, uint64_t y)
-{
-  for (;;) {
-    if (!y)
-      return x;
-    x = polymod (0, x, y);
-    if (!x)
-      return y;
-    y = polymod (0, y, x);
-  }
-}
-
-static bool
-polyirreducible (uint64_t f)
-{
-  uint64_t u = 2;
-  int m = (fls64 (f) - 1) >> 1;
-  for (int i = 0; i < m; i++) {
-    u = polymmult (u, u, f);
-    if (polygcd (f, u ^ 2) != 1)
-      return false;
-  }
-  return true;
-}
-
-static uint64_t
-polygen (u_int degree)
-{
-  assert (degree > 0 && degree < 64);
-  uint64_t msb = INT64 (1) << degree;
-  uint64_t mask = msb - 1;
-  uint64_t f;
-  int rfd = open (SFS_DEV_RANDOM, O_RDONLY);
-  if (rfd < 0) {
-    fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM);
-    exit(1);
-  }
-  do {
-    if (read (rfd, &f, sizeof (f)) != implicit_cast<ssize_t> (sizeof (f))) {
-      fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM);
-      exit(1);
-    }
-    f = (f & mask) | msb;
-  } while (!polyirreducible (f));
-  close (rfd);
-  return f;
-}
-#endif
-
-class rabinpoly {
-  int shift;
-  uint64_t T[256];              // Lookup table for mod
-  void calcT ();
-public:
-  const uint64_t poly;          // Actual polynomial
-
-  explicit rabinpoly (uint64_t poly);
-  uint64_t append8 (uint64_t p, uint8_t m) const
-    { return ((p << 8) | m) ^ T[p >> shift]; }
-};
-
-void
-rabinpoly::calcT ()
-{
-  assert (poly >= 0x100);
-  int xshift = fls64 (poly) - 1;
-  shift = xshift - 8;
-  uint64_t T1 = polymod (0, INT64 (1) << xshift, poly);
-  for (int j = 0; j < 256; j++)
-    T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift);
-}
-
-rabinpoly::rabinpoly (uint64_t p)
-  : poly (p)
-{
-  calcT ();
-}
-
-class window : public rabinpoly {
-public:
-  enum {size = 48};
-  //enum {size = 24};
-private:
-  uint64_t fingerprint;
-  int bufpos;
-  uint64_t U[256];
-  uint8_t buf[size];
-
-public:
-  window (uint64_t poly);
-  uint64_t slide8 (uint8_t m) {
-    if (++bufpos >= size)
-      bufpos = 0;
-    uint8_t om = buf[bufpos];
-    buf[bufpos] = m;
-    return fingerprint = append8 (fingerprint ^ U[om], m);
-  }
-  void reset () {
-    fingerprint = 0;
-    bzero (buf, sizeof (buf));
-  }
-};
-
-window::window (uint64_t poly)
-  : rabinpoly (poly), fingerprint (0), bufpos (-1)
-{
-  uint64_t sizeshift = 1;
-  for (int i = 1; i < size; i++)
-    sizeshift = append8 (sizeshift, 0);
-  for (int i = 0; i < 256; i++)
-    U[i] = polymmult (i, sizeshift, poly);
-  bzero (buf, sizeof (buf));
-}
-
-} // end anonymous namespace
-
-/* Public interface to this module. */
-int chunk_compute_max_num_breaks(size_t buflen)
-{
-    return (buflen / MIN_CHUNK_SIZE) + 1;
-}
-
-int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints)
-{
-    size_t start, pos;
-    window w(FINGERPRINT_PT);
-
-    int i = 0;
-    start = 0;
-    for (pos = 0; pos < len; pos++) {
-        uint64_t sig = w.slide8(buf[pos]);
-        size_t block_len = pos - start + 1;
-        if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE
-             && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) {
-            breakpoints[i] = pos;
-            start = pos + 1;
-            i++;
-            w.reset();
-        }
-    }
-
-    if (start < len) {
-        breakpoints[i] = len - 1;
-        i++;
-    }
-
-    return i;
-}
-
-string chunk_algorithm_name()
-{
-    char buf[64];
-    sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE);
-    return buf;
-}
diff --git a/chunk.h b/chunk.h

deleted file mode 100644 (file)

index 6cc7392..0000000
--- a/chunk.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2006-2008  The Regents of the University of California
- * Written by Michael Vrable <mvrable@cs.ucsd.edu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Compute incremental backups at a sub-file level by chopping files up into
- * blocks in a content-sensitive manner (using Rabin fingerprints). */
-
-#ifndef _LBS_CHUNK_H
-#define _LBS_CHUNK_H
-
-#include <stdint.h>
-#include <string>
-
-/* Block breakpoints can only be computed for a single block of memory, all
- * loaded at once.  compute_breaks will, given a block of memory, compute the
- * offsets at which successive blocks should end.  These will be stored into
- * the provided memory at breakpoints.  The maximum possible number of blocks
- * (given the block size constaints) can be computed by compute_max_num_breaks
- * so that the breakpoints array can be properly sized.  The actual number of
- * blocks is returned by the compute_breaks function. */
-int chunk_compute_max_num_breaks(size_t buflen);
-int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints);
-std::string chunk_algorithm_name();
-
-#endif // _LBS_CHUNK_H
diff --git a/main.cc b/main.cc

index a8e345f..297e805 100644 (file)
--- a/main.cc
+++ b/main.cc
@@ -53,9 +53,9 @@
  #include "metadata.h"
  #include "remote.h"
  #include "store.h"
-#include "sha1.h"
  #include "subfile.h"
  #include "util.h"
+#include "third_party/sha1.h"
  
  using std::list;
  using std::map;
diff --git a/sha1.cc b/sha1.cc

deleted file mode 100644 (file)

index 7111f94..0000000
--- a/sha1.cc
+++ /dev/null
@@ -1,394 +0,0 @@
-/* sha1.cc - Functions to compute SHA1 message digest of data streams
- * according to the NIST specification FIPS-180-1.
- * part of Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
- * Copyright (C) 2006-2007  The Regents of the University of California
- * Written by Scott G. Miller
- * Additional Credits:
- *    Robert Klep <robert@ilse.nl>  -- Expansion function fix
- * Modifications by Michael Vrable <mvrable@cs.ucsd.edu> to integrate into
- * Cumulus.
- *
- * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "sha1.h"
-
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
-#include <arpa/inet.h>
-
-#include <string.h>
-
-using std::string;
-
-/* SWAP does an endian swap on architectures that are little-endian,
-   as SHA1 needs some data in a big-endian form.  */
-#define SWAP(n) htonl(n)
-
-#define BLOCKSIZE 4096
-#if BLOCKSIZE % 64 != 0
-# error "invalid BLOCKSIZE"
-#endif
-
-/* This array contains the bytes used to pad the buffer to the next
-   64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
-static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
-
-
-/*
-  Takes a pointer to a 160 bit block of data (five 32 bit ints) and
-  intializes it to the start constants of the SHA1 algorithm.  This
-  must be called before using hash in the call to sha1_hash.
-*/
-void
-sha1_init_ctx (struct sha1_ctx *ctx)
-{
-  ctx->A = 0x67452301;
-  ctx->B = 0xefcdab89;
-  ctx->C = 0x98badcfe;
-  ctx->D = 0x10325476;
-  ctx->E = 0xc3d2e1f0;
-
-  ctx->total[0] = ctx->total[1] = 0;
-  ctx->buflen = 0;
-}
-
-/* Put result from CTX in first 20 bytes following RESBUF.  The result
-   must be in little endian byte order.
-
-   IMPORTANT: On some systems it is required that RESBUF is correctly
-   aligned for a 32 bits value.  */
-void *
-sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
-{
-  ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
-  ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
-  ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
-  ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
-  ((md5_uint32 *) resbuf)[4] = SWAP (ctx->E);
-
-  return resbuf;
-}
-
-/* Process the remaining bytes in the internal buffer and the usual
-   prolog according to the standard and write the result to RESBUF.
-
-   IMPORTANT: On some systems it is required that RESBUF is correctly
-   aligned for a 32 bits value.  */
-void *
-sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
-{
-  /* Take yet unprocessed bytes into account.  */
-  md5_uint32 bytes = ctx->buflen;
-  size_t pad;
-
-  /* Now count remaining bytes.  */
-  ctx->total[0] += bytes;
-  if (ctx->total[0] < bytes)
-    ++ctx->total[1];
-
-  pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
-  memcpy (&ctx->buffer[bytes], fillbuf, pad);
-
-  /* Put the 64-bit file length in *bits* at the end of the buffer.  */
-  *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3);
-  *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) |
-                                                   (ctx->total[0] >> 29));
-
-  /* Process last bytes.  */
-  sha1_process_block (ctx->buffer, bytes + pad + 8, ctx);
-
-  return sha1_read_ctx (ctx, resbuf);
-}
-
-void
-sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
-{
-  /* When we already have some bits in our internal buffer concatenate
-     both inputs first.  */
-  if (ctx->buflen != 0)
-    {
-      size_t left_over = ctx->buflen;
-      size_t add = 128 - left_over > len ? len : 128 - left_over;
-
-      memcpy (&ctx->buffer[left_over], buffer, add);
-      ctx->buflen += add;
-
-      if (ctx->buflen > 64)
-       {
-         sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
-
-         ctx->buflen &= 63;
-         /* The regions in the following copy operation cannot overlap.  */
-         memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
-                 ctx->buflen);
-       }
-
-      buffer = (const char *) buffer + add;
-      len -= add;
-    }
-
-  /* Process available complete blocks.  */
-  if (len >= 64)
-    {
-#if !_STRING_ARCH_unaligned
-# define alignof(type) offsetof (struct { char c; type x; }, x)
-# define UNALIGNED_P(p) (((size_t) p) % alignof (md5_uint32) != 0)
-      if (UNALIGNED_P (buffer))
-       while (len > 64)
-         {
-           sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
-           buffer = (const char *) buffer + 64;
-           len -= 64;
-         }
-      else
-#endif
-       {
-         sha1_process_block (buffer, len & ~63, ctx);
-         buffer = (const char *) buffer + (len & ~63);
-         len &= 63;
-       }
-    }
-
-  /* Move remaining bytes in internal buffer.  */
-  if (len > 0)
-    {
-      size_t left_over = ctx->buflen;
-
-      memcpy (&ctx->buffer[left_over], buffer, len);
-      left_over += len;
-      if (left_over >= 64)
-       {
-         sha1_process_block (ctx->buffer, 64, ctx);
-         left_over -= 64;
-         memcpy (ctx->buffer, &ctx->buffer[64], left_over);
-       }
-      ctx->buflen = left_over;
-    }
-}
-
-/* --- Code below is the primary difference between md5.c and sha1.c --- */
-
-/* SHA1 round constants */
-#define K1 0x5a827999L
-#define K2 0x6ed9eba1L
-#define K3 0x8f1bbcdcL
-#define K4 0xca62c1d6L
-
-/* Round functions.  Note that F2 is the same as F4.  */
-#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
-#define F2(B,C,D) (B ^ C ^ D)
-#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
-#define F4(B,C,D) (B ^ C ^ D)
-
-/* Process LEN bytes of BUFFER, accumulating context into CTX.
-   It is assumed that LEN % 64 == 0.
-   Most of this code comes from GnuPG's cipher/sha1.c.  */
-
-void
-sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
-{
-  const md5_uint32 *words = (const md5_uint32 *)buffer;
-  size_t nwords = len / sizeof (md5_uint32);
-  const md5_uint32 *endp = words + nwords;
-  md5_uint32 x[16];
-  md5_uint32 a = ctx->A;
-  md5_uint32 b = ctx->B;
-  md5_uint32 c = ctx->C;
-  md5_uint32 d = ctx->D;
-  md5_uint32 e = ctx->E;
-
-  /* First increment the byte count.  RFC 1321 specifies the possible
-     length of the file up to 2^64 bits.  Here we only compute the
-     number of bytes.  Do a double word increment.  */
-  ctx->total[0] += len;
-  if (ctx->total[0] < len)
-    ++ctx->total[1];
-
-#define rol(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-
-#define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
-                   ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
-              , (x[I&0x0f] = rol(tm, 1)) )
-
-#define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
-                                     + F( B, C, D )  \
-                                     + K             \
-                                     + M;            \
-                                B = rol( B, 30 );    \
-                              } while(0)
-
-  while (words < endp)
-    {
-      md5_uint32 tm;
-      int t;
-      for (t = 0; t < 16; t++)
-       {
-         x[t] = SWAP (*words);
-         words++;
-       }
-
-      R( a, b, c, d, e, F1, K1, x[ 0] );
-      R( e, a, b, c, d, F1, K1, x[ 1] );
-      R( d, e, a, b, c, F1, K1, x[ 2] );
-      R( c, d, e, a, b, F1, K1, x[ 3] );
-      R( b, c, d, e, a, F1, K1, x[ 4] );
-      R( a, b, c, d, e, F1, K1, x[ 5] );
-      R( e, a, b, c, d, F1, K1, x[ 6] );
-      R( d, e, a, b, c, F1, K1, x[ 7] );
-      R( c, d, e, a, b, F1, K1, x[ 8] );
-      R( b, c, d, e, a, F1, K1, x[ 9] );
-      R( a, b, c, d, e, F1, K1, x[10] );
-      R( e, a, b, c, d, F1, K1, x[11] );
-      R( d, e, a, b, c, F1, K1, x[12] );
-      R( c, d, e, a, b, F1, K1, x[13] );
-      R( b, c, d, e, a, F1, K1, x[14] );
-      R( a, b, c, d, e, F1, K1, x[15] );
-      R( e, a, b, c, d, F1, K1, M(16) );
-      R( d, e, a, b, c, F1, K1, M(17) );
-      R( c, d, e, a, b, F1, K1, M(18) );
-      R( b, c, d, e, a, F1, K1, M(19) );
-      R( a, b, c, d, e, F2, K2, M(20) );
-      R( e, a, b, c, d, F2, K2, M(21) );
-      R( d, e, a, b, c, F2, K2, M(22) );
-      R( c, d, e, a, b, F2, K2, M(23) );
-      R( b, c, d, e, a, F2, K2, M(24) );
-      R( a, b, c, d, e, F2, K2, M(25) );
-      R( e, a, b, c, d, F2, K2, M(26) );
-      R( d, e, a, b, c, F2, K2, M(27) );
-      R( c, d, e, a, b, F2, K2, M(28) );
-      R( b, c, d, e, a, F2, K2, M(29) );
-      R( a, b, c, d, e, F2, K2, M(30) );
-      R( e, a, b, c, d, F2, K2, M(31) );
-      R( d, e, a, b, c, F2, K2, M(32) );
-      R( c, d, e, a, b, F2, K2, M(33) );
-      R( b, c, d, e, a, F2, K2, M(34) );
-      R( a, b, c, d, e, F2, K2, M(35) );
-      R( e, a, b, c, d, F2, K2, M(36) );
-      R( d, e, a, b, c, F2, K2, M(37) );
-      R( c, d, e, a, b, F2, K2, M(38) );
-      R( b, c, d, e, a, F2, K2, M(39) );
-      R( a, b, c, d, e, F3, K3, M(40) );
-      R( e, a, b, c, d, F3, K3, M(41) );
-      R( d, e, a, b, c, F3, K3, M(42) );
-      R( c, d, e, a, b, F3, K3, M(43) );
-      R( b, c, d, e, a, F3, K3, M(44) );
-      R( a, b, c, d, e, F3, K3, M(45) );
-      R( e, a, b, c, d, F3, K3, M(46) );
-      R( d, e, a, b, c, F3, K3, M(47) );
-      R( c, d, e, a, b, F3, K3, M(48) );
-      R( b, c, d, e, a, F3, K3, M(49) );
-      R( a, b, c, d, e, F3, K3, M(50) );
-      R( e, a, b, c, d, F3, K3, M(51) );
-      R( d, e, a, b, c, F3, K3, M(52) );
-      R( c, d, e, a, b, F3, K3, M(53) );
-      R( b, c, d, e, a, F3, K3, M(54) );
-      R( a, b, c, d, e, F3, K3, M(55) );
-      R( e, a, b, c, d, F3, K3, M(56) );
-      R( d, e, a, b, c, F3, K3, M(57) );
-      R( c, d, e, a, b, F3, K3, M(58) );
-      R( b, c, d, e, a, F3, K3, M(59) );
-      R( a, b, c, d, e, F4, K4, M(60) );
-      R( e, a, b, c, d, F4, K4, M(61) );
-      R( d, e, a, b, c, F4, K4, M(62) );
-      R( c, d, e, a, b, F4, K4, M(63) );
-      R( b, c, d, e, a, F4, K4, M(64) );
-      R( a, b, c, d, e, F4, K4, M(65) );
-      R( e, a, b, c, d, F4, K4, M(66) );
-      R( d, e, a, b, c, F4, K4, M(67) );
-      R( c, d, e, a, b, F4, K4, M(68) );
-      R( b, c, d, e, a, F4, K4, M(69) );
-      R( a, b, c, d, e, F4, K4, M(70) );
-      R( e, a, b, c, d, F4, K4, M(71) );
-      R( d, e, a, b, c, F4, K4, M(72) );
-      R( c, d, e, a, b, F4, K4, M(73) );
-      R( b, c, d, e, a, F4, K4, M(74) );
-      R( a, b, c, d, e, F4, K4, M(75) );
-      R( e, a, b, c, d, F4, K4, M(76) );
-      R( d, e, a, b, c, F4, K4, M(77) );
-      R( c, d, e, a, b, F4, K4, M(78) );
-      R( b, c, d, e, a, F4, K4, M(79) );
-
-      a = ctx->A += a;
-      b = ctx->B += b;
-      c = ctx->C += c;
-      d = ctx->D += d;
-      e = ctx->E += e;
-    }
-}
-
-/* ---- Object-Oriented Wrapper */
-SHA1Checksum::SHA1Checksum()
-{
-    sha1_init_ctx(&ctx);
-}
-
-SHA1Checksum::~SHA1Checksum()
-{
-}
-
-void SHA1Checksum::process(const void *data, size_t len)
-{
-    sha1_process_bytes(data, len, &ctx);
-}
-
-bool SHA1Checksum::process_file(const char *filename)
-{
-    FILE *f = fopen(filename, "rb");
-    if (f == NULL)
-        return false;
-
-    while (!feof(f)) {
-        char buf[4096];
-        size_t bytes = fread(buf, 1, sizeof(buf), f);
-
-        if (ferror(f)) {
-            fclose(f);
-            return false;
-        }
-
-        process(buf, bytes);
-    }
-
-    fclose(f);
-    return true;
-}
-
-const uint8_t *SHA1Checksum::checksum()
-{
-    sha1_finish_ctx(&ctx, resbuf);
-    return (const uint8_t *)resbuf;
-}
-
-string SHA1Checksum::checksum_str()
-{
-    uint8_t resbuf[20];
-    char hexbuf[4];
-    string result = "sha1=";
-
-    sha1_finish_ctx(&ctx, resbuf);
-
-    for (int i = 0; i < 20; i++) {
-        sprintf(hexbuf, "%02x", resbuf[i]);
-        result += hexbuf;
-    }
-
-    return result;
-}
diff --git a/sha1.h b/sha1.h

deleted file mode 100644 (file)

index 1eb8d8d..0000000
--- a/sha1.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Declarations of functions and data types used for SHA1 sum library
- * functions.
- * part of Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
- * Copyright (C) 2006-2007  The Regents of the University of California
- *
- * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5).
- * Modifications by Michael Vrable <mvrable@cs.ucsd.edu> to integrate into
- * Cumulus.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef SHA1_H
-# define SHA1_H 1
-
-# include <stdio.h>
-# include <stdint.h>
-
-#include <string>
-
-typedef uint32_t md5_uint32;
-
-/* Structure to save state of computation between the single steps.  */
-struct sha1_ctx
-{
-  md5_uint32 A;
-  md5_uint32 B;
-  md5_uint32 C;
-  md5_uint32 D;
-  md5_uint32 E;
-
-  md5_uint32 total[2];
-  md5_uint32 buflen;
-  char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
-};
-
-
-/* Initialize structure containing state of computation. */
-extern void sha1_init_ctx (struct sha1_ctx *ctx);
-
-/* Starting with the result of former calls of this function (or the
-   initialization function update the context for the next LEN bytes
-   starting at BUFFER.
-   It is necessary that LEN is a multiple of 64!!! */
-extern void sha1_process_block (const void *buffer, size_t len,
-                               struct sha1_ctx *ctx);
-
-/* Starting with the result of former calls of this function (or the
-   initialization function update the context for the next LEN bytes
-   starting at BUFFER.
-   It is NOT required that LEN is a multiple of 64.  */
-extern void sha1_process_bytes (const void *buffer, size_t len,
-                               struct sha1_ctx *ctx);
-
-/* Process the remaining bytes in the buffer and put result from CTX
-   in first 20 bytes following RESBUF.  The result is always in little
-   endian byte order, so that a byte-wise output yields to the wanted
-   ASCII representation of the message digest.
-
-   IMPORTANT: On some systems it is required that RESBUF be correctly
-   aligned for a 32 bits value.  */
-extern void *sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf);
-
-
-/* Put result from CTX in first 20 bytes following RESBUF.  The result is
-   always in little endian byte order, so that a byte-wise output yields
-   to the wanted ASCII representation of the message digest.
-
-   IMPORTANT: On some systems it is required that RESBUF is correctly
-   aligned for a 32 bits value.  */
-extern void *sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf);
-
-/* An object-oriented wrapper around checksumming functionality. */
-class SHA1Checksum {
-private:
-    struct sha1_ctx ctx;
-    char resbuf[20] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
-
-public:
-    SHA1Checksum();
-    ~SHA1Checksum();
-
-    void process(const void *data, size_t len);
-    bool process_file(const char *filename);
-    const uint8_t *checksum();
-    size_t checksum_size() const { return 20; }
-    std::string checksum_str();
-};
-
-#endif
diff --git a/store.h b/store.h

index aa98e30..aa41665 100644 (file)
--- a/store.h
+++ b/store.h
@@ -36,8 +36,8 @@
  
  #include "localdb.h"
  #include "remote.h"
-#include "sha1.h"
  #include "ref.h"
+#include "third_party/sha1.h"
  
  class LbsObject;
  
diff --git a/subfile.cc b/subfile.cc

index 4f8d0fe..eee3bdf 100644 (file)
--- a/subfile.cc
+++ b/subfile.cc
@@ -28,8 +28,8 @@
  #include <arpa/inet.h>
  
  #include "subfile.h"
-#include "chunk.h"
-#include "sha1.h"
+#include "third_party/chunk.h"
+#include "third_party/sha1.h"
  
  using std::list;
  using std::map;
diff --git a/subfile.h b/subfile.h

index 3127456..360836d 100644 (file)
--- a/subfile.h
+++ b/subfile.h
@@ -31,10 +31,10 @@
  #include <string>
  #include <vector>
  
-#include "chunk.h"
  #include "localdb.h"
  #include "ref.h"
  #include "store.h"
+#include "third_party/chunk.h"
  
  class Subfile {
  public:
diff --git a/third_party/chunk.cc b/third_party/chunk.cc

new file mode 100644 (file)

index 0000000..170030f
--- /dev/null
+++ b/third_party/chunk.cc
@@ -0,0 +1,317 @@
+/* Cumulus: Smart Filesystem Backup to Dumb Servers
+ *
+ * Copyright (C) 2006-2008  The Regents of the University of California
+ * Written by Michael Vrable <mvrable@cs.ucsd.edu>
+ *
+ * Much of the code in this file is taken from LBFS, which is
+ * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Compute incremental backups at a sub-file level by chopping files up into
+ * blocks in a content-sensitive manner (using Rabin fingerprints).  This code
+ * is largely taken from LBFS, primarily the files:
+ *   liblbfs/fingerprint.C  (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
+ *   liblbfs/rabinpoly.h  (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp)
+ *   liblbfs/rabinpoly.C  (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
+ *   async/msb.h  (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp)
+ *   async/msb.C  (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp)
+ * but adapted and slimmed down to fit within Cumulus. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <string>
+
+#include "chunk.h"
+
+using std::string;
+
+// Functions/data only needed internally go in a separate namespace.  Public
+// interfaces (at the end of the file) are in the global namespace.
+namespace {
+
+#define FINGERPRINT_PT  0xbfe6b8a5bf378d83LL
+#define BREAKMARK_VALUE 0x78
+#define MIN_CHUNK_SIZE  2048
+#define MAX_CHUNK_SIZE  65535
+#define TARGET_CHUNK_SIZE  4096
+
+#define SFS_DEV_RANDOM "/dev/random"
+
+#define INT64(n) n##LL
+#define MSB64 INT64(0x8000000000000000)
+
+template<class R> inline R
+implicit_cast (R r)
+{
+  return r;
+}
+
+/* Highest bit set in a byte */
+static const char bytemsb[0x100] = {
+  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};
+
+/* Find last set (most significant bit) */
+static inline u_int fls32 (uint32_t) __attribute__ ((const));
+static inline u_int
+fls32 (u_int32_t v)
+{
+  if (v & 0xffff0000) {
+    if (v & 0xff000000)
+      return 24 + bytemsb[v>>24];
+    else
+      return 16 + bytemsb[v>>16];
+  }
+  if (v & 0x0000ff00)
+    return 8 + bytemsb[v>>8];
+  else
+    return bytemsb[v];
+}
+
+static inline u_int fls64 (u_int64_t) __attribute__ ((const));
+static inline u_int
+fls64 (u_int64_t v)
+{
+  u_int32_t h;
+  if ((h = v >> 32))
+    return 32 + fls32 (h);
+  else
+    return fls32 ((u_int32_t) v);
+}
+
+static uint64_t
+polymod (uint64_t nh, uint64_t nl, uint64_t d)
+{
+  assert (d);
+  int k = fls64 (d) - 1;
+  d <<= 63 - k;
+
+  if (nh) {
+    if (nh & MSB64)
+      nh ^= d;
+    for (int i = 62; i >= 0; i--)
+      if (nh & INT64 (1) << i) {
+        nh ^= d >> (63 - i);
+        nl ^= d << (i + 1);
+      }
+  }
+  for (int i = 63; i >= k; i--)
+    if (nl & INT64 (1) << i)
+      nl ^= d >> (63 - i);
+  return nl;
+}
+
+static void
+polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y)
+{
+  uint64_t ph = 0, pl = 0;
+  if (x & 1)
+    pl = y;
+  for (int i = 1; i < 64; i++)
+    if (x & (INT64 (1) << i)) {
+      ph ^= y >> (64 - i);
+      pl ^= y << i;
+    }
+  if (php)
+    *php = ph;
+  if (plp)
+    *plp = pl;
+}
+
+static uint64_t
+polymmult (uint64_t x, uint64_t y, uint64_t d)
+{
+  uint64_t h, l;
+  polymult (&h, &l, x, y);
+  return polymod (h, l, d);
+}
+
+#if 0
+static uint64_t
+polygcd (uint64_t x, uint64_t y)
+{
+  for (;;) {
+    if (!y)
+      return x;
+    x = polymod (0, x, y);
+    if (!x)
+      return y;
+    y = polymod (0, y, x);
+  }
+}
+
+static bool
+polyirreducible (uint64_t f)
+{
+  uint64_t u = 2;
+  int m = (fls64 (f) - 1) >> 1;
+  for (int i = 0; i < m; i++) {
+    u = polymmult (u, u, f);
+    if (polygcd (f, u ^ 2) != 1)
+      return false;
+  }
+  return true;
+}
+
+static uint64_t
+polygen (u_int degree)
+{
+  assert (degree > 0 && degree < 64);
+  uint64_t msb = INT64 (1) << degree;
+  uint64_t mask = msb - 1;
+  uint64_t f;
+  int rfd = open (SFS_DEV_RANDOM, O_RDONLY);
+  if (rfd < 0) {
+    fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM);
+    exit(1);
+  }
+  do {
+    if (read (rfd, &f, sizeof (f)) != implicit_cast<ssize_t> (sizeof (f))) {
+      fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM);
+      exit(1);
+    }
+    f = (f & mask) | msb;
+  } while (!polyirreducible (f));
+  close (rfd);
+  return f;
+}
+#endif
+
+class rabinpoly {
+  int shift;
+  uint64_t T[256];              // Lookup table for mod
+  void calcT ();
+public:
+  const uint64_t poly;          // Actual polynomial
+
+  explicit rabinpoly (uint64_t poly);
+  uint64_t append8 (uint64_t p, uint8_t m) const
+    { return ((p << 8) | m) ^ T[p >> shift]; }
+};
+
+void
+rabinpoly::calcT ()
+{
+  assert (poly >= 0x100);
+  int xshift = fls64 (poly) - 1;
+  shift = xshift - 8;
+  uint64_t T1 = polymod (0, INT64 (1) << xshift, poly);
+  for (int j = 0; j < 256; j++)
+    T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift);
+}
+
+rabinpoly::rabinpoly (uint64_t p)
+  : poly (p)
+{
+  calcT ();
+}
+
+class window : public rabinpoly {
+public:
+  enum {size = 48};
+  //enum {size = 24};
+private:
+  uint64_t fingerprint;
+  int bufpos;
+  uint64_t U[256];
+  uint8_t buf[size];
+
+public:
+  window (uint64_t poly);
+  uint64_t slide8 (uint8_t m) {
+    if (++bufpos >= size)
+      bufpos = 0;
+    uint8_t om = buf[bufpos];
+    buf[bufpos] = m;
+    return fingerprint = append8 (fingerprint ^ U[om], m);
+  }
+  void reset () {
+    fingerprint = 0;
+    bzero (buf, sizeof (buf));
+  }
+};
+
+window::window (uint64_t poly)
+  : rabinpoly (poly), fingerprint (0), bufpos (-1)
+{
+  uint64_t sizeshift = 1;
+  for (int i = 1; i < size; i++)
+    sizeshift = append8 (sizeshift, 0);
+  for (int i = 0; i < 256; i++)
+    U[i] = polymmult (i, sizeshift, poly);
+  bzero (buf, sizeof (buf));
+}
+
+} // end anonymous namespace
+
+/* Public interface to this module. */
+int chunk_compute_max_num_breaks(size_t buflen)
+{
+    return (buflen / MIN_CHUNK_SIZE) + 1;
+}
+
+int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints)
+{
+    size_t start, pos;
+    window w(FINGERPRINT_PT);
+
+    int i = 0;
+    start = 0;
+    for (pos = 0; pos < len; pos++) {
+        uint64_t sig = w.slide8(buf[pos]);
+        size_t block_len = pos - start + 1;
+        if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE
+             && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) {
+            breakpoints[i] = pos;
+            start = pos + 1;
+            i++;
+            w.reset();
+        }
+    }
+
+    if (start < len) {
+        breakpoints[i] = len - 1;
+        i++;
+    }
+
+    return i;
+}
+
+string chunk_algorithm_name()
+{
+    char buf[64];
+    sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE);
+    return buf;
+}
diff --git a/third_party/chunk.h b/third_party/chunk.h

new file mode 100644 (file)

index 0000000..6cc7392
--- /dev/null
+++ b/third_party/chunk.h
@@ -0,0 +1,41 @@
+/* Cumulus: Smart Filesystem Backup to Dumb Servers
+ *
+ * Copyright (C) 2006-2008  The Regents of the University of California
+ * Written by Michael Vrable <mvrable@cs.ucsd.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Compute incremental backups at a sub-file level by chopping files up into
+ * blocks in a content-sensitive manner (using Rabin fingerprints). */
+
+#ifndef _LBS_CHUNK_H
+#define _LBS_CHUNK_H
+
+#include <stdint.h>
+#include <string>
+
+/* Block breakpoints can only be computed for a single block of memory, all
+ * loaded at once.  compute_breaks will, given a block of memory, compute the
+ * offsets at which successive blocks should end.  These will be stored into
+ * the provided memory at breakpoints.  The maximum possible number of blocks
+ * (given the block size constaints) can be computed by compute_max_num_breaks
+ * so that the breakpoints array can be properly sized.  The actual number of
+ * blocks is returned by the compute_breaks function. */
+int chunk_compute_max_num_breaks(size_t buflen);
+int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints);
+std::string chunk_algorithm_name();
+
+#endif // _LBS_CHUNK_H
diff --git a/third_party/sha1.cc b/third_party/sha1.cc

new file mode 100644 (file)

index 0000000..7111f94
--- /dev/null
+++ b/third_party/sha1.cc
@@ -0,0 +1,394 @@
+/* sha1.cc - Functions to compute SHA1 message digest of data streams
+ * according to the NIST specification FIPS-180-1.
+ * part of Cumulus: Smart Filesystem Backup to Dumb Servers
+ *
+ * Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+ * Copyright (C) 2006-2007  The Regents of the University of California
+ * Written by Scott G. Miller
+ * Additional Credits:
+ *    Robert Klep <robert@ilse.nl>  -- Expansion function fix
+ * Modifications by Michael Vrable <mvrable@cs.ucsd.edu> to integrate into
+ * Cumulus.
+ *
+ * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "sha1.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <arpa/inet.h>
+
+#include <string.h>
+
+using std::string;
+
+/* SWAP does an endian swap on architectures that are little-endian,
+   as SHA1 needs some data in a big-endian form.  */
+#define SWAP(n) htonl(n)
+
+#define BLOCKSIZE 4096
+#if BLOCKSIZE % 64 != 0
+# error "invalid BLOCKSIZE"
+#endif
+
+/* This array contains the bytes used to pad the buffer to the next
+   64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
+static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
+
+
+/*
+  Takes a pointer to a 160 bit block of data (five 32 bit ints) and
+  intializes it to the start constants of the SHA1 algorithm.  This
+  must be called before using hash in the call to sha1_hash.
+*/
+void
+sha1_init_ctx (struct sha1_ctx *ctx)
+{
+  ctx->A = 0x67452301;
+  ctx->B = 0xefcdab89;
+  ctx->C = 0x98badcfe;
+  ctx->D = 0x10325476;
+  ctx->E = 0xc3d2e1f0;
+
+  ctx->total[0] = ctx->total[1] = 0;
+  ctx->buflen = 0;
+}
+
+/* Put result from CTX in first 20 bytes following RESBUF.  The result
+   must be in little endian byte order.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+void *
+sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
+{
+  ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
+  ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
+  ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
+  ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
+  ((md5_uint32 *) resbuf)[4] = SWAP (ctx->E);
+
+  return resbuf;
+}
+
+/* Process the remaining bytes in the internal buffer and the usual
+   prolog according to the standard and write the result to RESBUF.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+void *
+sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
+{
+  /* Take yet unprocessed bytes into account.  */
+  md5_uint32 bytes = ctx->buflen;
+  size_t pad;
+
+  /* Now count remaining bytes.  */
+  ctx->total[0] += bytes;
+  if (ctx->total[0] < bytes)
+    ++ctx->total[1];
+
+  pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
+  memcpy (&ctx->buffer[bytes], fillbuf, pad);
+
+  /* Put the 64-bit file length in *bits* at the end of the buffer.  */
+  *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3);
+  *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) |
+                                                   (ctx->total[0] >> 29));
+
+  /* Process last bytes.  */
+  sha1_process_block (ctx->buffer, bytes + pad + 8, ctx);
+
+  return sha1_read_ctx (ctx, resbuf);
+}
+
+void
+sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
+{
+  /* When we already have some bits in our internal buffer concatenate
+     both inputs first.  */
+  if (ctx->buflen != 0)
+    {
+      size_t left_over = ctx->buflen;
+      size_t add = 128 - left_over > len ? len : 128 - left_over;
+
+      memcpy (&ctx->buffer[left_over], buffer, add);
+      ctx->buflen += add;
+
+      if (ctx->buflen > 64)
+       {
+         sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
+
+         ctx->buflen &= 63;
+         /* The regions in the following copy operation cannot overlap.  */
+         memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
+                 ctx->buflen);
+       }
+
+      buffer = (const char *) buffer + add;
+      len -= add;
+    }
+
+  /* Process available complete blocks.  */
+  if (len >= 64)
+    {
+#if !_STRING_ARCH_unaligned
+# define alignof(type) offsetof (struct { char c; type x; }, x)
+# define UNALIGNED_P(p) (((size_t) p) % alignof (md5_uint32) != 0)
+      if (UNALIGNED_P (buffer))
+       while (len > 64)
+         {
+           sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
+           buffer = (const char *) buffer + 64;
+           len -= 64;
+         }
+      else
+#endif
+       {
+         sha1_process_block (buffer, len & ~63, ctx);
+         buffer = (const char *) buffer + (len & ~63);
+         len &= 63;
+       }
+    }
+
+  /* Move remaining bytes in internal buffer.  */
+  if (len > 0)
+    {
+      size_t left_over = ctx->buflen;
+
+      memcpy (&ctx->buffer[left_over], buffer, len);
+      left_over += len;
+      if (left_over >= 64)
+       {
+         sha1_process_block (ctx->buffer, 64, ctx);
+         left_over -= 64;
+         memcpy (ctx->buffer, &ctx->buffer[64], left_over);
+       }
+      ctx->buflen = left_over;
+    }
+}
+
+/* --- Code below is the primary difference between md5.c and sha1.c --- */
+
+/* SHA1 round constants */
+#define K1 0x5a827999L
+#define K2 0x6ed9eba1L
+#define K3 0x8f1bbcdcL
+#define K4 0xca62c1d6L
+
+/* Round functions.  Note that F2 is the same as F4.  */
+#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
+#define F2(B,C,D) (B ^ C ^ D)
+#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
+#define F4(B,C,D) (B ^ C ^ D)
+
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+   It is assumed that LEN % 64 == 0.
+   Most of this code comes from GnuPG's cipher/sha1.c.  */
+
+void
+sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
+{
+  const md5_uint32 *words = (const md5_uint32 *)buffer;
+  size_t nwords = len / sizeof (md5_uint32);
+  const md5_uint32 *endp = words + nwords;
+  md5_uint32 x[16];
+  md5_uint32 a = ctx->A;
+  md5_uint32 b = ctx->B;
+  md5_uint32 c = ctx->C;
+  md5_uint32 d = ctx->D;
+  md5_uint32 e = ctx->E;
+
+  /* First increment the byte count.  RFC 1321 specifies the possible
+     length of the file up to 2^64 bits.  Here we only compute the
+     number of bytes.  Do a double word increment.  */
+  ctx->total[0] += len;
+  if (ctx->total[0] < len)
+    ++ctx->total[1];
+
+#define rol(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+#define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
+                   ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
+              , (x[I&0x0f] = rol(tm, 1)) )
+
+#define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
+                                     + F( B, C, D )  \
+                                     + K             \
+                                     + M;            \
+                                B = rol( B, 30 );    \
+                              } while(0)
+
+  while (words < endp)
+    {
+      md5_uint32 tm;
+      int t;
+      for (t = 0; t < 16; t++)
+       {
+         x[t] = SWAP (*words);
+         words++;
+       }
+
+      R( a, b, c, d, e, F1, K1, x[ 0] );
+      R( e, a, b, c, d, F1, K1, x[ 1] );
+      R( d, e, a, b, c, F1, K1, x[ 2] );
+      R( c, d, e, a, b, F1, K1, x[ 3] );
+      R( b, c, d, e, a, F1, K1, x[ 4] );
+      R( a, b, c, d, e, F1, K1, x[ 5] );
+      R( e, a, b, c, d, F1, K1, x[ 6] );
+      R( d, e, a, b, c, F1, K1, x[ 7] );
+      R( c, d, e, a, b, F1, K1, x[ 8] );
+      R( b, c, d, e, a, F1, K1, x[ 9] );
+      R( a, b, c, d, e, F1, K1, x[10] );
+      R( e, a, b, c, d, F1, K1, x[11] );
+      R( d, e, a, b, c, F1, K1, x[12] );
+      R( c, d, e, a, b, F1, K1, x[13] );
+      R( b, c, d, e, a, F1, K1, x[14] );
+      R( a, b, c, d, e, F1, K1, x[15] );
+      R( e, a, b, c, d, F1, K1, M(16) );
+      R( d, e, a, b, c, F1, K1, M(17) );
+      R( c, d, e, a, b, F1, K1, M(18) );
+      R( b, c, d, e, a, F1, K1, M(19) );
+      R( a, b, c, d, e, F2, K2, M(20) );
+      R( e, a, b, c, d, F2, K2, M(21) );
+      R( d, e, a, b, c, F2, K2, M(22) );
+      R( c, d, e, a, b, F2, K2, M(23) );
+      R( b, c, d, e, a, F2, K2, M(24) );
+      R( a, b, c, d, e, F2, K2, M(25) );
+      R( e, a, b, c, d, F2, K2, M(26) );
+      R( d, e, a, b, c, F2, K2, M(27) );
+      R( c, d, e, a, b, F2, K2, M(28) );
+      R( b, c, d, e, a, F2, K2, M(29) );
+      R( a, b, c, d, e, F2, K2, M(30) );
+      R( e, a, b, c, d, F2, K2, M(31) );
+      R( d, e, a, b, c, F2, K2, M(32) );
+      R( c, d, e, a, b, F2, K2, M(33) );
+      R( b, c, d, e, a, F2, K2, M(34) );
+      R( a, b, c, d, e, F2, K2, M(35) );
+      R( e, a, b, c, d, F2, K2, M(36) );
+      R( d, e, a, b, c, F2, K2, M(37) );
+      R( c, d, e, a, b, F2, K2, M(38) );
+      R( b, c, d, e, a, F2, K2, M(39) );
+      R( a, b, c, d, e, F3, K3, M(40) );
+      R( e, a, b, c, d, F3, K3, M(41) );
+      R( d, e, a, b, c, F3, K3, M(42) );
+      R( c, d, e, a, b, F3, K3, M(43) );
+      R( b, c, d, e, a, F3, K3, M(44) );
+      R( a, b, c, d, e, F3, K3, M(45) );
+      R( e, a, b, c, d, F3, K3, M(46) );
+      R( d, e, a, b, c, F3, K3, M(47) );
+      R( c, d, e, a, b, F3, K3, M(48) );
+      R( b, c, d, e, a, F3, K3, M(49) );
+      R( a, b, c, d, e, F3, K3, M(50) );
+      R( e, a, b, c, d, F3, K3, M(51) );
+      R( d, e, a, b, c, F3, K3, M(52) );
+      R( c, d, e, a, b, F3, K3, M(53) );
+      R( b, c, d, e, a, F3, K3, M(54) );
+      R( a, b, c, d, e, F3, K3, M(55) );
+      R( e, a, b, c, d, F3, K3, M(56) );
+      R( d, e, a, b, c, F3, K3, M(57) );
+      R( c, d, e, a, b, F3, K3, M(58) );
+      R( b, c, d, e, a, F3, K3, M(59) );
+      R( a, b, c, d, e, F4, K4, M(60) );
+      R( e, a, b, c, d, F4, K4, M(61) );
+      R( d, e, a, b, c, F4, K4, M(62) );
+      R( c, d, e, a, b, F4, K4, M(63) );
+      R( b, c, d, e, a, F4, K4, M(64) );
+      R( a, b, c, d, e, F4, K4, M(65) );
+      R( e, a, b, c, d, F4, K4, M(66) );
+      R( d, e, a, b, c, F4, K4, M(67) );
+      R( c, d, e, a, b, F4, K4, M(68) );
+      R( b, c, d, e, a, F4, K4, M(69) );
+      R( a, b, c, d, e, F4, K4, M(70) );
+      R( e, a, b, c, d, F4, K4, M(71) );
+      R( d, e, a, b, c, F4, K4, M(72) );
+      R( c, d, e, a, b, F4, K4, M(73) );
+      R( b, c, d, e, a, F4, K4, M(74) );
+      R( a, b, c, d, e, F4, K4, M(75) );
+      R( e, a, b, c, d, F4, K4, M(76) );
+      R( d, e, a, b, c, F4, K4, M(77) );
+      R( c, d, e, a, b, F4, K4, M(78) );
+      R( b, c, d, e, a, F4, K4, M(79) );
+
+      a = ctx->A += a;
+      b = ctx->B += b;
+      c = ctx->C += c;
+      d = ctx->D += d;
+      e = ctx->E += e;
+    }
+}
+
+/* ---- Object-Oriented Wrapper */
+SHA1Checksum::SHA1Checksum()
+{
+    sha1_init_ctx(&ctx);
+}
+
+SHA1Checksum::~SHA1Checksum()
+{
+}
+
+void SHA1Checksum::process(const void *data, size_t len)
+{
+    sha1_process_bytes(data, len, &ctx);
+}
+
+bool SHA1Checksum::process_file(const char *filename)
+{
+    FILE *f = fopen(filename, "rb");
+    if (f == NULL)
+        return false;
+
+    while (!feof(f)) {
+        char buf[4096];
+        size_t bytes = fread(buf, 1, sizeof(buf), f);
+
+        if (ferror(f)) {
+            fclose(f);
+            return false;
+        }
+
+        process(buf, bytes);
+    }
+
+    fclose(f);
+    return true;
+}
+
+const uint8_t *SHA1Checksum::checksum()
+{
+    sha1_finish_ctx(&ctx, resbuf);
+    return (const uint8_t *)resbuf;
+}
+
+string SHA1Checksum::checksum_str()
+{
+    uint8_t resbuf[20];
+    char hexbuf[4];
+    string result = "sha1=";
+
+    sha1_finish_ctx(&ctx, resbuf);
+
+    for (int i = 0; i < 20; i++) {
+        sprintf(hexbuf, "%02x", resbuf[i]);
+        result += hexbuf;
+    }
+
+    return result;
+}
diff --git a/third_party/sha1.h b/third_party/sha1.h

new file mode 100644 (file)

index 0000000..1eb8d8d
--- /dev/null
+++ b/third_party/sha1.h
@@ -0,0 +1,104 @@
+/* Declarations of functions and data types used for SHA1 sum library
+ * functions.
+ * part of Cumulus: Smart Filesystem Backup to Dumb Servers
+ *
+ * Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+ * Copyright (C) 2006-2007  The Regents of the University of California
+ *
+ * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5).
+ * Modifications by Michael Vrable <mvrable@cs.ucsd.edu> to integrate into
+ * Cumulus.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef SHA1_H
+# define SHA1_H 1
+
+# include <stdio.h>
+# include <stdint.h>
+
+#include <string>
+
+typedef uint32_t md5_uint32;
+
+/* Structure to save state of computation between the single steps.  */
+struct sha1_ctx
+{
+  md5_uint32 A;
+  md5_uint32 B;
+  md5_uint32 C;
+  md5_uint32 D;
+  md5_uint32 E;
+
+  md5_uint32 total[2];
+  md5_uint32 buflen;
+  char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
+};
+
+
+/* Initialize structure containing state of computation. */
+extern void sha1_init_ctx (struct sha1_ctx *ctx);
+
+/* Starting with the result of former calls of this function (or the
+   initialization function update the context for the next LEN bytes
+   starting at BUFFER.
+   It is necessary that LEN is a multiple of 64!!! */
+extern void sha1_process_block (const void *buffer, size_t len,
+                               struct sha1_ctx *ctx);
+
+/* Starting with the result of former calls of this function (or the
+   initialization function update the context for the next LEN bytes
+   starting at BUFFER.
+   It is NOT required that LEN is a multiple of 64.  */
+extern void sha1_process_bytes (const void *buffer, size_t len,
+                               struct sha1_ctx *ctx);
+
+/* Process the remaining bytes in the buffer and put result from CTX
+   in first 20 bytes following RESBUF.  The result is always in little
+   endian byte order, so that a byte-wise output yields to the wanted
+   ASCII representation of the message digest.
+
+   IMPORTANT: On some systems it is required that RESBUF be correctly
+   aligned for a 32 bits value.  */
+extern void *sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf);
+
+
+/* Put result from CTX in first 20 bytes following RESBUF.  The result is
+   always in little endian byte order, so that a byte-wise output yields
+   to the wanted ASCII representation of the message digest.
+
+   IMPORTANT: On some systems it is required that RESBUF is correctly
+   aligned for a 32 bits value.  */
+extern void *sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf);
+
+/* An object-oriented wrapper around checksumming functionality. */
+class SHA1Checksum {
+private:
+    struct sha1_ctx ctx;
+    char resbuf[20] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
+
+public:
+    SHA1Checksum();
+    ~SHA1Checksum();
+
+    void process(const void *data, size_t len);
+    bool process_file(const char *filename);
+    const uint8_t *checksum();
+    size_t checksum_size() const { return 20; }
+    std::string checksum_str();
+};
+
+#endif
author	Michael Vrable <vrable@cs.hmc.edu>
	Thu, 30 Aug 2012 04:28:16 +0000 (21:28 -0700)
committer	Michael Vrable <vrable@cs.hmc.edu>
	Sat, 12 Jan 2013 03:48:36 +0000 (19:48 -0800)
Makefile		patch \| blob \| history
chunk.cc	[deleted file]	patch \| blob \| history
chunk.h	[deleted file]	patch \| blob \| history
main.cc		patch \| blob \| history
sha1.cc	[deleted file]	patch \| blob \| history
sha1.h	[deleted file]	patch \| blob \| history
store.h		patch \| blob \| history
subfile.cc		patch \| blob \| history
subfile.h		patch \| blob \| history
third_party/chunk.cc	[new file with mode: 0644]	patch \| blob
third_party/chunk.h	[new file with mode: 0644]	patch \| blob
third_party/sha1.cc	[new file with mode: 0644]	patch \| blob
third_party/sha1.h	[new file with mode: 0644]	patch \| blob