From: Michael Vrable Date: Thu, 30 Aug 2012 04:28:16 +0000 (-0700) Subject: Create a third_party directory for files copied from other projects. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=35dd99aa3d47805b661fe3126a951710fa7dee11 Create a third_party directory for files copied from other projects. --- diff --git a/Makefile b/Makefile index 162d9f9..e707305 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,9 @@ CXXFLAGS=-O -Wall -Wextra -D_FILE_OFFSET_BITS=64 $(DEBUG) \ -DCUMULUS_VERSION=$(shell cat version) LDFLAGS=$(DEBUG) $(shell pkg-config --libs $(PACKAGES)) -SRCS=chunk.cc exclude.cc localdb.cc main.cc metadata.cc ref.cc remote.cc \ - sha1.cc store.cc subfile.cc util.cc +THIRD_PARTY_SRCS=chunk.cc sha1.cc +SRCS=exclude.cc localdb.cc main.cc metadata.cc ref.cc remote.cc \ + store.cc subfile.cc util.cc $(addprefix third_party/,$(THIRD_PARTY_SRCS)) OBJS=$(SRCS:.cc=.o) cumulus : $(OBJS) diff --git a/chunk.cc b/chunk.cc deleted file mode 100644 index 170030f..0000000 --- a/chunk.cc +++ /dev/null @@ -1,317 +0,0 @@ -/* Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2006-2008 The Regents of the University of California - * Written by Michael Vrable - * - * Much of the code in this file is taken from LBFS, which is - * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -/* Compute incremental backups at a sub-file level by chopping files up into - * blocks in a content-sensitive manner (using Rabin fingerprints). This code - * is largely taken from LBFS, primarily the files: - * liblbfs/fingerprint.C (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp) - * liblbfs/rabinpoly.h (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp) - * liblbfs/rabinpoly.C (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp) - * async/msb.h (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp) - * async/msb.C (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp) - * but adapted and slimmed down to fit within Cumulus. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "chunk.h" - -using std::string; - -// Functions/data only needed internally go in a separate namespace. Public -// interfaces (at the end of the file) are in the global namespace. -namespace { - -#define FINGERPRINT_PT 0xbfe6b8a5bf378d83LL -#define BREAKMARK_VALUE 0x78 -#define MIN_CHUNK_SIZE 2048 -#define MAX_CHUNK_SIZE 65535 -#define TARGET_CHUNK_SIZE 4096 - -#define SFS_DEV_RANDOM "/dev/random" - -#define INT64(n) n##LL -#define MSB64 INT64(0x8000000000000000) - -template inline R -implicit_cast (R r) -{ - return r; -} - -/* Highest bit set in a byte */ -static const char bytemsb[0x100] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, -}; - -/* Find last set (most significant bit) */ -static inline u_int fls32 (uint32_t) __attribute__ ((const)); -static inline u_int -fls32 (u_int32_t v) -{ - if (v & 0xffff0000) { - if (v & 0xff000000) - return 24 + bytemsb[v>>24]; - else - return 16 + bytemsb[v>>16]; - } - if (v & 0x0000ff00) - return 8 + bytemsb[v>>8]; - else - return bytemsb[v]; -} - -static inline u_int fls64 (u_int64_t) __attribute__ ((const)); -static inline u_int -fls64 (u_int64_t v) -{ - u_int32_t h; - if ((h = v >> 32)) - return 32 + fls32 (h); - else - return fls32 ((u_int32_t) v); -} - -static uint64_t -polymod (uint64_t nh, uint64_t nl, uint64_t d) -{ - assert (d); - int k = fls64 (d) - 1; - d <<= 63 - k; - - if (nh) { - if (nh & MSB64) - nh ^= d; - for (int i = 62; i >= 0; i--) - if (nh & INT64 (1) << i) { - nh ^= d >> (63 - i); - nl ^= d << (i + 1); - } - } - for (int i = 63; i >= k; i--) - if (nl & INT64 (1) << i) - nl ^= d >> (63 - i); - return nl; -} - -static void -polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y) -{ - uint64_t ph = 0, pl = 0; - if (x & 1) - pl = y; - for (int i = 1; i < 64; i++) - if (x & (INT64 (1) << i)) { - ph ^= y >> (64 - i); - pl ^= y << i; - } - if (php) - *php = ph; - if (plp) - *plp = pl; -} - -static uint64_t -polymmult (uint64_t x, uint64_t y, uint64_t d) -{ - uint64_t h, l; - polymult (&h, &l, x, y); - return polymod (h, l, d); -} - -#if 0 -static uint64_t -polygcd (uint64_t x, uint64_t y) -{ - for (;;) { - if (!y) - return x; - x = polymod (0, x, y); - if (!x) - return y; - y = polymod (0, y, x); - } -} - -static bool -polyirreducible (uint64_t f) -{ - uint64_t u = 2; - int m = (fls64 (f) - 1) >> 1; - for (int i = 0; i < m; i++) { - u = polymmult (u, u, f); - if (polygcd (f, u ^ 2) != 1) - return false; - } - return true; -} - -static uint64_t -polygen (u_int degree) -{ - assert (degree > 0 && degree < 64); - uint64_t msb = INT64 (1) << degree; - uint64_t mask = msb - 1; - uint64_t f; - int rfd = open (SFS_DEV_RANDOM, O_RDONLY); - if (rfd < 0) { - fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM); - exit(1); - } - do { - if (read (rfd, &f, sizeof (f)) != implicit_cast (sizeof (f))) { - fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM); - exit(1); - } - f = (f & mask) | msb; - } while (!polyirreducible (f)); - close (rfd); - return f; -} -#endif - -class rabinpoly { - int shift; - uint64_t T[256]; // Lookup table for mod - void calcT (); -public: - const uint64_t poly; // Actual polynomial - - explicit rabinpoly (uint64_t poly); - uint64_t append8 (uint64_t p, uint8_t m) const - { return ((p << 8) | m) ^ T[p >> shift]; } -}; - -void -rabinpoly::calcT () -{ - assert (poly >= 0x100); - int xshift = fls64 (poly) - 1; - shift = xshift - 8; - uint64_t T1 = polymod (0, INT64 (1) << xshift, poly); - for (int j = 0; j < 256; j++) - T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift); -} - -rabinpoly::rabinpoly (uint64_t p) - : poly (p) -{ - calcT (); -} - -class window : public rabinpoly { -public: - enum {size = 48}; - //enum {size = 24}; -private: - uint64_t fingerprint; - int bufpos; - uint64_t U[256]; - uint8_t buf[size]; - -public: - window (uint64_t poly); - uint64_t slide8 (uint8_t m) { - if (++bufpos >= size) - bufpos = 0; - uint8_t om = buf[bufpos]; - buf[bufpos] = m; - return fingerprint = append8 (fingerprint ^ U[om], m); - } - void reset () { - fingerprint = 0; - bzero (buf, sizeof (buf)); - } -}; - -window::window (uint64_t poly) - : rabinpoly (poly), fingerprint (0), bufpos (-1) -{ - uint64_t sizeshift = 1; - for (int i = 1; i < size; i++) - sizeshift = append8 (sizeshift, 0); - for (int i = 0; i < 256; i++) - U[i] = polymmult (i, sizeshift, poly); - bzero (buf, sizeof (buf)); -} - -} // end anonymous namespace - -/* Public interface to this module. */ -int chunk_compute_max_num_breaks(size_t buflen) -{ - return (buflen / MIN_CHUNK_SIZE) + 1; -} - -int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints) -{ - size_t start, pos; - window w(FINGERPRINT_PT); - - int i = 0; - start = 0; - for (pos = 0; pos < len; pos++) { - uint64_t sig = w.slide8(buf[pos]); - size_t block_len = pos - start + 1; - if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE - && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) { - breakpoints[i] = pos; - start = pos + 1; - i++; - w.reset(); - } - } - - if (start < len) { - breakpoints[i] = len - 1; - i++; - } - - return i; -} - -string chunk_algorithm_name() -{ - char buf[64]; - sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE); - return buf; -} diff --git a/chunk.h b/chunk.h deleted file mode 100644 index 6cc7392..0000000 --- a/chunk.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2006-2008 The Regents of the University of California - * Written by Michael Vrable - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -/* Compute incremental backups at a sub-file level by chopping files up into - * blocks in a content-sensitive manner (using Rabin fingerprints). */ - -#ifndef _LBS_CHUNK_H -#define _LBS_CHUNK_H - -#include -#include - -/* Block breakpoints can only be computed for a single block of memory, all - * loaded at once. compute_breaks will, given a block of memory, compute the - * offsets at which successive blocks should end. These will be stored into - * the provided memory at breakpoints. The maximum possible number of blocks - * (given the block size constaints) can be computed by compute_max_num_breaks - * so that the breakpoints array can be properly sized. The actual number of - * blocks is returned by the compute_breaks function. */ -int chunk_compute_max_num_breaks(size_t buflen); -int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints); -std::string chunk_algorithm_name(); - -#endif // _LBS_CHUNK_H diff --git a/main.cc b/main.cc index a8e345f..297e805 100644 --- a/main.cc +++ b/main.cc @@ -53,9 +53,9 @@ #include "metadata.h" #include "remote.h" #include "store.h" -#include "sha1.h" #include "subfile.h" #include "util.h" +#include "third_party/sha1.h" using std::list; using std::map; diff --git a/sha1.cc b/sha1.cc deleted file mode 100644 index 7111f94..0000000 --- a/sha1.cc +++ /dev/null @@ -1,394 +0,0 @@ -/* sha1.cc - Functions to compute SHA1 message digest of data streams - * according to the NIST specification FIPS-180-1. - * part of Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. - * Copyright (C) 2006-2007 The Regents of the University of California - * Written by Scott G. Miller - * Additional Credits: - * Robert Klep -- Expansion function fix - * Modifications by Michael Vrable to integrate into - * Cumulus. - * - * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5). - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include "sha1.h" - -#include -#include -#include -#include - -#include - -using std::string; - -/* SWAP does an endian swap on architectures that are little-endian, - as SHA1 needs some data in a big-endian form. */ -#define SWAP(n) htonl(n) - -#define BLOCKSIZE 4096 -#if BLOCKSIZE % 64 != 0 -# error "invalid BLOCKSIZE" -#endif - -/* This array contains the bytes used to pad the buffer to the next - 64-byte boundary. (RFC 1321, 3.1: Step 1) */ -static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; - - -/* - Takes a pointer to a 160 bit block of data (five 32 bit ints) and - intializes it to the start constants of the SHA1 algorithm. This - must be called before using hash in the call to sha1_hash. -*/ -void -sha1_init_ctx (struct sha1_ctx *ctx) -{ - ctx->A = 0x67452301; - ctx->B = 0xefcdab89; - ctx->C = 0x98badcfe; - ctx->D = 0x10325476; - ctx->E = 0xc3d2e1f0; - - ctx->total[0] = ctx->total[1] = 0; - ctx->buflen = 0; -} - -/* Put result from CTX in first 20 bytes following RESBUF. The result - must be in little endian byte order. - - IMPORTANT: On some systems it is required that RESBUF is correctly - aligned for a 32 bits value. */ -void * -sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf) -{ - ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A); - ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B); - ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C); - ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D); - ((md5_uint32 *) resbuf)[4] = SWAP (ctx->E); - - return resbuf; -} - -/* Process the remaining bytes in the internal buffer and the usual - prolog according to the standard and write the result to RESBUF. - - IMPORTANT: On some systems it is required that RESBUF is correctly - aligned for a 32 bits value. */ -void * -sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf) -{ - /* Take yet unprocessed bytes into account. */ - md5_uint32 bytes = ctx->buflen; - size_t pad; - - /* Now count remaining bytes. */ - ctx->total[0] += bytes; - if (ctx->total[0] < bytes) - ++ctx->total[1]; - - pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; - memcpy (&ctx->buffer[bytes], fillbuf, pad); - - /* Put the 64-bit file length in *bits* at the end of the buffer. */ - *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3); - *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) | - (ctx->total[0] >> 29)); - - /* Process last bytes. */ - sha1_process_block (ctx->buffer, bytes + pad + 8, ctx); - - return sha1_read_ctx (ctx, resbuf); -} - -void -sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) -{ - /* When we already have some bits in our internal buffer concatenate - both inputs first. */ - if (ctx->buflen != 0) - { - size_t left_over = ctx->buflen; - size_t add = 128 - left_over > len ? len : 128 - left_over; - - memcpy (&ctx->buffer[left_over], buffer, add); - ctx->buflen += add; - - if (ctx->buflen > 64) - { - sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx); - - ctx->buflen &= 63; - /* The regions in the following copy operation cannot overlap. */ - memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63], - ctx->buflen); - } - - buffer = (const char *) buffer + add; - len -= add; - } - - /* Process available complete blocks. */ - if (len >= 64) - { -#if !_STRING_ARCH_unaligned -# define alignof(type) offsetof (struct { char c; type x; }, x) -# define UNALIGNED_P(p) (((size_t) p) % alignof (md5_uint32) != 0) - if (UNALIGNED_P (buffer)) - while (len > 64) - { - sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); - buffer = (const char *) buffer + 64; - len -= 64; - } - else -#endif - { - sha1_process_block (buffer, len & ~63, ctx); - buffer = (const char *) buffer + (len & ~63); - len &= 63; - } - } - - /* Move remaining bytes in internal buffer. */ - if (len > 0) - { - size_t left_over = ctx->buflen; - - memcpy (&ctx->buffer[left_over], buffer, len); - left_over += len; - if (left_over >= 64) - { - sha1_process_block (ctx->buffer, 64, ctx); - left_over -= 64; - memcpy (ctx->buffer, &ctx->buffer[64], left_over); - } - ctx->buflen = left_over; - } -} - -/* --- Code below is the primary difference between md5.c and sha1.c --- */ - -/* SHA1 round constants */ -#define K1 0x5a827999L -#define K2 0x6ed9eba1L -#define K3 0x8f1bbcdcL -#define K4 0xca62c1d6L - -/* Round functions. Note that F2 is the same as F4. */ -#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) ) -#define F2(B,C,D) (B ^ C ^ D) -#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) ) -#define F4(B,C,D) (B ^ C ^ D) - -/* Process LEN bytes of BUFFER, accumulating context into CTX. - It is assumed that LEN % 64 == 0. - Most of this code comes from GnuPG's cipher/sha1.c. */ - -void -sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) -{ - const md5_uint32 *words = (const md5_uint32 *)buffer; - size_t nwords = len / sizeof (md5_uint32); - const md5_uint32 *endp = words + nwords; - md5_uint32 x[16]; - md5_uint32 a = ctx->A; - md5_uint32 b = ctx->B; - md5_uint32 c = ctx->C; - md5_uint32 d = ctx->D; - md5_uint32 e = ctx->E; - - /* First increment the byte count. RFC 1321 specifies the possible - length of the file up to 2^64 bits. Here we only compute the - number of bytes. Do a double word increment. */ - ctx->total[0] += len; - if (ctx->total[0] < len) - ++ctx->total[1]; - -#define rol(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) - -#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \ - ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ - , (x[I&0x0f] = rol(tm, 1)) ) - -#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ - + F( B, C, D ) \ - + K \ - + M; \ - B = rol( B, 30 ); \ - } while(0) - - while (words < endp) - { - md5_uint32 tm; - int t; - for (t = 0; t < 16; t++) - { - x[t] = SWAP (*words); - words++; - } - - R( a, b, c, d, e, F1, K1, x[ 0] ); - R( e, a, b, c, d, F1, K1, x[ 1] ); - R( d, e, a, b, c, F1, K1, x[ 2] ); - R( c, d, e, a, b, F1, K1, x[ 3] ); - R( b, c, d, e, a, F1, K1, x[ 4] ); - R( a, b, c, d, e, F1, K1, x[ 5] ); - R( e, a, b, c, d, F1, K1, x[ 6] ); - R( d, e, a, b, c, F1, K1, x[ 7] ); - R( c, d, e, a, b, F1, K1, x[ 8] ); - R( b, c, d, e, a, F1, K1, x[ 9] ); - R( a, b, c, d, e, F1, K1, x[10] ); - R( e, a, b, c, d, F1, K1, x[11] ); - R( d, e, a, b, c, F1, K1, x[12] ); - R( c, d, e, a, b, F1, K1, x[13] ); - R( b, c, d, e, a, F1, K1, x[14] ); - R( a, b, c, d, e, F1, K1, x[15] ); - R( e, a, b, c, d, F1, K1, M(16) ); - R( d, e, a, b, c, F1, K1, M(17) ); - R( c, d, e, a, b, F1, K1, M(18) ); - R( b, c, d, e, a, F1, K1, M(19) ); - R( a, b, c, d, e, F2, K2, M(20) ); - R( e, a, b, c, d, F2, K2, M(21) ); - R( d, e, a, b, c, F2, K2, M(22) ); - R( c, d, e, a, b, F2, K2, M(23) ); - R( b, c, d, e, a, F2, K2, M(24) ); - R( a, b, c, d, e, F2, K2, M(25) ); - R( e, a, b, c, d, F2, K2, M(26) ); - R( d, e, a, b, c, F2, K2, M(27) ); - R( c, d, e, a, b, F2, K2, M(28) ); - R( b, c, d, e, a, F2, K2, M(29) ); - R( a, b, c, d, e, F2, K2, M(30) ); - R( e, a, b, c, d, F2, K2, M(31) ); - R( d, e, a, b, c, F2, K2, M(32) ); - R( c, d, e, a, b, F2, K2, M(33) ); - R( b, c, d, e, a, F2, K2, M(34) ); - R( a, b, c, d, e, F2, K2, M(35) ); - R( e, a, b, c, d, F2, K2, M(36) ); - R( d, e, a, b, c, F2, K2, M(37) ); - R( c, d, e, a, b, F2, K2, M(38) ); - R( b, c, d, e, a, F2, K2, M(39) ); - R( a, b, c, d, e, F3, K3, M(40) ); - R( e, a, b, c, d, F3, K3, M(41) ); - R( d, e, a, b, c, F3, K3, M(42) ); - R( c, d, e, a, b, F3, K3, M(43) ); - R( b, c, d, e, a, F3, K3, M(44) ); - R( a, b, c, d, e, F3, K3, M(45) ); - R( e, a, b, c, d, F3, K3, M(46) ); - R( d, e, a, b, c, F3, K3, M(47) ); - R( c, d, e, a, b, F3, K3, M(48) ); - R( b, c, d, e, a, F3, K3, M(49) ); - R( a, b, c, d, e, F3, K3, M(50) ); - R( e, a, b, c, d, F3, K3, M(51) ); - R( d, e, a, b, c, F3, K3, M(52) ); - R( c, d, e, a, b, F3, K3, M(53) ); - R( b, c, d, e, a, F3, K3, M(54) ); - R( a, b, c, d, e, F3, K3, M(55) ); - R( e, a, b, c, d, F3, K3, M(56) ); - R( d, e, a, b, c, F3, K3, M(57) ); - R( c, d, e, a, b, F3, K3, M(58) ); - R( b, c, d, e, a, F3, K3, M(59) ); - R( a, b, c, d, e, F4, K4, M(60) ); - R( e, a, b, c, d, F4, K4, M(61) ); - R( d, e, a, b, c, F4, K4, M(62) ); - R( c, d, e, a, b, F4, K4, M(63) ); - R( b, c, d, e, a, F4, K4, M(64) ); - R( a, b, c, d, e, F4, K4, M(65) ); - R( e, a, b, c, d, F4, K4, M(66) ); - R( d, e, a, b, c, F4, K4, M(67) ); - R( c, d, e, a, b, F4, K4, M(68) ); - R( b, c, d, e, a, F4, K4, M(69) ); - R( a, b, c, d, e, F4, K4, M(70) ); - R( e, a, b, c, d, F4, K4, M(71) ); - R( d, e, a, b, c, F4, K4, M(72) ); - R( c, d, e, a, b, F4, K4, M(73) ); - R( b, c, d, e, a, F4, K4, M(74) ); - R( a, b, c, d, e, F4, K4, M(75) ); - R( e, a, b, c, d, F4, K4, M(76) ); - R( d, e, a, b, c, F4, K4, M(77) ); - R( c, d, e, a, b, F4, K4, M(78) ); - R( b, c, d, e, a, F4, K4, M(79) ); - - a = ctx->A += a; - b = ctx->B += b; - c = ctx->C += c; - d = ctx->D += d; - e = ctx->E += e; - } -} - -/* ---- Object-Oriented Wrapper */ -SHA1Checksum::SHA1Checksum() -{ - sha1_init_ctx(&ctx); -} - -SHA1Checksum::~SHA1Checksum() -{ -} - -void SHA1Checksum::process(const void *data, size_t len) -{ - sha1_process_bytes(data, len, &ctx); -} - -bool SHA1Checksum::process_file(const char *filename) -{ - FILE *f = fopen(filename, "rb"); - if (f == NULL) - return false; - - while (!feof(f)) { - char buf[4096]; - size_t bytes = fread(buf, 1, sizeof(buf), f); - - if (ferror(f)) { - fclose(f); - return false; - } - - process(buf, bytes); - } - - fclose(f); - return true; -} - -const uint8_t *SHA1Checksum::checksum() -{ - sha1_finish_ctx(&ctx, resbuf); - return (const uint8_t *)resbuf; -} - -string SHA1Checksum::checksum_str() -{ - uint8_t resbuf[20]; - char hexbuf[4]; - string result = "sha1="; - - sha1_finish_ctx(&ctx, resbuf); - - for (int i = 0; i < 20; i++) { - sprintf(hexbuf, "%02x", resbuf[i]); - result += hexbuf; - } - - return result; -} diff --git a/sha1.h b/sha1.h deleted file mode 100644 index 1eb8d8d..0000000 --- a/sha1.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Declarations of functions and data types used for SHA1 sum library - * functions. - * part of Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. - * Copyright (C) 2006-2007 The Regents of the University of California - * - * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5). - * Modifications by Michael Vrable to integrate into - * Cumulus. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef SHA1_H -# define SHA1_H 1 - -# include -# include - -#include - -typedef uint32_t md5_uint32; - -/* Structure to save state of computation between the single steps. */ -struct sha1_ctx -{ - md5_uint32 A; - md5_uint32 B; - md5_uint32 C; - md5_uint32 D; - md5_uint32 E; - - md5_uint32 total[2]; - md5_uint32 buflen; - char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32)))); -}; - - -/* Initialize structure containing state of computation. */ -extern void sha1_init_ctx (struct sha1_ctx *ctx); - -/* Starting with the result of former calls of this function (or the - initialization function update the context for the next LEN bytes - starting at BUFFER. - It is necessary that LEN is a multiple of 64!!! */ -extern void sha1_process_block (const void *buffer, size_t len, - struct sha1_ctx *ctx); - -/* Starting with the result of former calls of this function (or the - initialization function update the context for the next LEN bytes - starting at BUFFER. - It is NOT required that LEN is a multiple of 64. */ -extern void sha1_process_bytes (const void *buffer, size_t len, - struct sha1_ctx *ctx); - -/* Process the remaining bytes in the buffer and put result from CTX - in first 20 bytes following RESBUF. The result is always in little - endian byte order, so that a byte-wise output yields to the wanted - ASCII representation of the message digest. - - IMPORTANT: On some systems it is required that RESBUF be correctly - aligned for a 32 bits value. */ -extern void *sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf); - - -/* Put result from CTX in first 20 bytes following RESBUF. The result is - always in little endian byte order, so that a byte-wise output yields - to the wanted ASCII representation of the message digest. - - IMPORTANT: On some systems it is required that RESBUF is correctly - aligned for a 32 bits value. */ -extern void *sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf); - -/* An object-oriented wrapper around checksumming functionality. */ -class SHA1Checksum { -private: - struct sha1_ctx ctx; - char resbuf[20] __attribute__ ((__aligned__ (__alignof__ (md5_uint32)))); - -public: - SHA1Checksum(); - ~SHA1Checksum(); - - void process(const void *data, size_t len); - bool process_file(const char *filename); - const uint8_t *checksum(); - size_t checksum_size() const { return 20; } - std::string checksum_str(); -}; - -#endif diff --git a/store.h b/store.h index aa98e30..aa41665 100644 --- a/store.h +++ b/store.h @@ -36,8 +36,8 @@ #include "localdb.h" #include "remote.h" -#include "sha1.h" #include "ref.h" +#include "third_party/sha1.h" class LbsObject; diff --git a/subfile.cc b/subfile.cc index 4f8d0fe..eee3bdf 100644 --- a/subfile.cc +++ b/subfile.cc @@ -28,8 +28,8 @@ #include #include "subfile.h" -#include "chunk.h" -#include "sha1.h" +#include "third_party/chunk.h" +#include "third_party/sha1.h" using std::list; using std::map; diff --git a/subfile.h b/subfile.h index 3127456..360836d 100644 --- a/subfile.h +++ b/subfile.h @@ -31,10 +31,10 @@ #include #include -#include "chunk.h" #include "localdb.h" #include "ref.h" #include "store.h" +#include "third_party/chunk.h" class Subfile { public: diff --git a/third_party/chunk.cc b/third_party/chunk.cc new file mode 100644 index 0000000..170030f --- /dev/null +++ b/third_party/chunk.cc @@ -0,0 +1,317 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2006-2008 The Regents of the University of California + * Written by Michael Vrable + * + * Much of the code in this file is taken from LBFS, which is + * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Compute incremental backups at a sub-file level by chopping files up into + * blocks in a content-sensitive manner (using Rabin fingerprints). This code + * is largely taken from LBFS, primarily the files: + * liblbfs/fingerprint.C (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp) + * liblbfs/rabinpoly.h (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp) + * liblbfs/rabinpoly.C (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp) + * async/msb.h (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp) + * async/msb.C (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp) + * but adapted and slimmed down to fit within Cumulus. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "chunk.h" + +using std::string; + +// Functions/data only needed internally go in a separate namespace. Public +// interfaces (at the end of the file) are in the global namespace. +namespace { + +#define FINGERPRINT_PT 0xbfe6b8a5bf378d83LL +#define BREAKMARK_VALUE 0x78 +#define MIN_CHUNK_SIZE 2048 +#define MAX_CHUNK_SIZE 65535 +#define TARGET_CHUNK_SIZE 4096 + +#define SFS_DEV_RANDOM "/dev/random" + +#define INT64(n) n##LL +#define MSB64 INT64(0x8000000000000000) + +template inline R +implicit_cast (R r) +{ + return r; +} + +/* Highest bit set in a byte */ +static const char bytemsb[0x100] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + +/* Find last set (most significant bit) */ +static inline u_int fls32 (uint32_t) __attribute__ ((const)); +static inline u_int +fls32 (u_int32_t v) +{ + if (v & 0xffff0000) { + if (v & 0xff000000) + return 24 + bytemsb[v>>24]; + else + return 16 + bytemsb[v>>16]; + } + if (v & 0x0000ff00) + return 8 + bytemsb[v>>8]; + else + return bytemsb[v]; +} + +static inline u_int fls64 (u_int64_t) __attribute__ ((const)); +static inline u_int +fls64 (u_int64_t v) +{ + u_int32_t h; + if ((h = v >> 32)) + return 32 + fls32 (h); + else + return fls32 ((u_int32_t) v); +} + +static uint64_t +polymod (uint64_t nh, uint64_t nl, uint64_t d) +{ + assert (d); + int k = fls64 (d) - 1; + d <<= 63 - k; + + if (nh) { + if (nh & MSB64) + nh ^= d; + for (int i = 62; i >= 0; i--) + if (nh & INT64 (1) << i) { + nh ^= d >> (63 - i); + nl ^= d << (i + 1); + } + } + for (int i = 63; i >= k; i--) + if (nl & INT64 (1) << i) + nl ^= d >> (63 - i); + return nl; +} + +static void +polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y) +{ + uint64_t ph = 0, pl = 0; + if (x & 1) + pl = y; + for (int i = 1; i < 64; i++) + if (x & (INT64 (1) << i)) { + ph ^= y >> (64 - i); + pl ^= y << i; + } + if (php) + *php = ph; + if (plp) + *plp = pl; +} + +static uint64_t +polymmult (uint64_t x, uint64_t y, uint64_t d) +{ + uint64_t h, l; + polymult (&h, &l, x, y); + return polymod (h, l, d); +} + +#if 0 +static uint64_t +polygcd (uint64_t x, uint64_t y) +{ + for (;;) { + if (!y) + return x; + x = polymod (0, x, y); + if (!x) + return y; + y = polymod (0, y, x); + } +} + +static bool +polyirreducible (uint64_t f) +{ + uint64_t u = 2; + int m = (fls64 (f) - 1) >> 1; + for (int i = 0; i < m; i++) { + u = polymmult (u, u, f); + if (polygcd (f, u ^ 2) != 1) + return false; + } + return true; +} + +static uint64_t +polygen (u_int degree) +{ + assert (degree > 0 && degree < 64); + uint64_t msb = INT64 (1) << degree; + uint64_t mask = msb - 1; + uint64_t f; + int rfd = open (SFS_DEV_RANDOM, O_RDONLY); + if (rfd < 0) { + fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM); + exit(1); + } + do { + if (read (rfd, &f, sizeof (f)) != implicit_cast (sizeof (f))) { + fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM); + exit(1); + } + f = (f & mask) | msb; + } while (!polyirreducible (f)); + close (rfd); + return f; +} +#endif + +class rabinpoly { + int shift; + uint64_t T[256]; // Lookup table for mod + void calcT (); +public: + const uint64_t poly; // Actual polynomial + + explicit rabinpoly (uint64_t poly); + uint64_t append8 (uint64_t p, uint8_t m) const + { return ((p << 8) | m) ^ T[p >> shift]; } +}; + +void +rabinpoly::calcT () +{ + assert (poly >= 0x100); + int xshift = fls64 (poly) - 1; + shift = xshift - 8; + uint64_t T1 = polymod (0, INT64 (1) << xshift, poly); + for (int j = 0; j < 256; j++) + T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift); +} + +rabinpoly::rabinpoly (uint64_t p) + : poly (p) +{ + calcT (); +} + +class window : public rabinpoly { +public: + enum {size = 48}; + //enum {size = 24}; +private: + uint64_t fingerprint; + int bufpos; + uint64_t U[256]; + uint8_t buf[size]; + +public: + window (uint64_t poly); + uint64_t slide8 (uint8_t m) { + if (++bufpos >= size) + bufpos = 0; + uint8_t om = buf[bufpos]; + buf[bufpos] = m; + return fingerprint = append8 (fingerprint ^ U[om], m); + } + void reset () { + fingerprint = 0; + bzero (buf, sizeof (buf)); + } +}; + +window::window (uint64_t poly) + : rabinpoly (poly), fingerprint (0), bufpos (-1) +{ + uint64_t sizeshift = 1; + for (int i = 1; i < size; i++) + sizeshift = append8 (sizeshift, 0); + for (int i = 0; i < 256; i++) + U[i] = polymmult (i, sizeshift, poly); + bzero (buf, sizeof (buf)); +} + +} // end anonymous namespace + +/* Public interface to this module. */ +int chunk_compute_max_num_breaks(size_t buflen) +{ + return (buflen / MIN_CHUNK_SIZE) + 1; +} + +int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints) +{ + size_t start, pos; + window w(FINGERPRINT_PT); + + int i = 0; + start = 0; + for (pos = 0; pos < len; pos++) { + uint64_t sig = w.slide8(buf[pos]); + size_t block_len = pos - start + 1; + if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE + && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) { + breakpoints[i] = pos; + start = pos + 1; + i++; + w.reset(); + } + } + + if (start < len) { + breakpoints[i] = len - 1; + i++; + } + + return i; +} + +string chunk_algorithm_name() +{ + char buf[64]; + sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE); + return buf; +} diff --git a/third_party/chunk.h b/third_party/chunk.h new file mode 100644 index 0000000..6cc7392 --- /dev/null +++ b/third_party/chunk.h @@ -0,0 +1,41 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2006-2008 The Regents of the University of California + * Written by Michael Vrable + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Compute incremental backups at a sub-file level by chopping files up into + * blocks in a content-sensitive manner (using Rabin fingerprints). */ + +#ifndef _LBS_CHUNK_H +#define _LBS_CHUNK_H + +#include +#include + +/* Block breakpoints can only be computed for a single block of memory, all + * loaded at once. compute_breaks will, given a block of memory, compute the + * offsets at which successive blocks should end. These will be stored into + * the provided memory at breakpoints. The maximum possible number of blocks + * (given the block size constaints) can be computed by compute_max_num_breaks + * so that the breakpoints array can be properly sized. The actual number of + * blocks is returned by the compute_breaks function. */ +int chunk_compute_max_num_breaks(size_t buflen); +int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints); +std::string chunk_algorithm_name(); + +#endif // _LBS_CHUNK_H diff --git a/third_party/sha1.cc b/third_party/sha1.cc new file mode 100644 index 0000000..7111f94 --- /dev/null +++ b/third_party/sha1.cc @@ -0,0 +1,394 @@ +/* sha1.cc - Functions to compute SHA1 message digest of data streams + * according to the NIST specification FIPS-180-1. + * part of Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. + * Copyright (C) 2006-2007 The Regents of the University of California + * Written by Scott G. Miller + * Additional Credits: + * Robert Klep -- Expansion function fix + * Modifications by Michael Vrable to integrate into + * Cumulus. + * + * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "sha1.h" + +#include +#include +#include +#include + +#include + +using std::string; + +/* SWAP does an endian swap on architectures that are little-endian, + as SHA1 needs some data in a big-endian form. */ +#define SWAP(n) htonl(n) + +#define BLOCKSIZE 4096 +#if BLOCKSIZE % 64 != 0 +# error "invalid BLOCKSIZE" +#endif + +/* This array contains the bytes used to pad the buffer to the next + 64-byte boundary. (RFC 1321, 3.1: Step 1) */ +static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; + + +/* + Takes a pointer to a 160 bit block of data (five 32 bit ints) and + intializes it to the start constants of the SHA1 algorithm. This + must be called before using hash in the call to sha1_hash. +*/ +void +sha1_init_ctx (struct sha1_ctx *ctx) +{ + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + ctx->E = 0xc3d2e1f0; + + ctx->total[0] = ctx->total[1] = 0; + ctx->buflen = 0; +} + +/* Put result from CTX in first 20 bytes following RESBUF. The result + must be in little endian byte order. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +void * +sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf) +{ + ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A); + ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B); + ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C); + ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D); + ((md5_uint32 *) resbuf)[4] = SWAP (ctx->E); + + return resbuf; +} + +/* Process the remaining bytes in the internal buffer and the usual + prolog according to the standard and write the result to RESBUF. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +void * +sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf) +{ + /* Take yet unprocessed bytes into account. */ + md5_uint32 bytes = ctx->buflen; + size_t pad; + + /* Now count remaining bytes. */ + ctx->total[0] += bytes; + if (ctx->total[0] < bytes) + ++ctx->total[1]; + + pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; + memcpy (&ctx->buffer[bytes], fillbuf, pad); + + /* Put the 64-bit file length in *bits* at the end of the buffer. */ + *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3); + *(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) | + (ctx->total[0] >> 29)); + + /* Process last bytes. */ + sha1_process_block (ctx->buffer, bytes + pad + 8, ctx); + + return sha1_read_ctx (ctx, resbuf); +} + +void +sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) +{ + /* When we already have some bits in our internal buffer concatenate + both inputs first. */ + if (ctx->buflen != 0) + { + size_t left_over = ctx->buflen; + size_t add = 128 - left_over > len ? len : 128 - left_over; + + memcpy (&ctx->buffer[left_over], buffer, add); + ctx->buflen += add; + + if (ctx->buflen > 64) + { + sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx); + + ctx->buflen &= 63; + /* The regions in the following copy operation cannot overlap. */ + memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63], + ctx->buflen); + } + + buffer = (const char *) buffer + add; + len -= add; + } + + /* Process available complete blocks. */ + if (len >= 64) + { +#if !_STRING_ARCH_unaligned +# define alignof(type) offsetof (struct { char c; type x; }, x) +# define UNALIGNED_P(p) (((size_t) p) % alignof (md5_uint32) != 0) + if (UNALIGNED_P (buffer)) + while (len > 64) + { + sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); + buffer = (const char *) buffer + 64; + len -= 64; + } + else +#endif + { + sha1_process_block (buffer, len & ~63, ctx); + buffer = (const char *) buffer + (len & ~63); + len &= 63; + } + } + + /* Move remaining bytes in internal buffer. */ + if (len > 0) + { + size_t left_over = ctx->buflen; + + memcpy (&ctx->buffer[left_over], buffer, len); + left_over += len; + if (left_over >= 64) + { + sha1_process_block (ctx->buffer, 64, ctx); + left_over -= 64; + memcpy (ctx->buffer, &ctx->buffer[64], left_over); + } + ctx->buflen = left_over; + } +} + +/* --- Code below is the primary difference between md5.c and sha1.c --- */ + +/* SHA1 round constants */ +#define K1 0x5a827999L +#define K2 0x6ed9eba1L +#define K3 0x8f1bbcdcL +#define K4 0xca62c1d6L + +/* Round functions. Note that F2 is the same as F4. */ +#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) ) +#define F2(B,C,D) (B ^ C ^ D) +#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) ) +#define F4(B,C,D) (B ^ C ^ D) + +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 64 == 0. + Most of this code comes from GnuPG's cipher/sha1.c. */ + +void +sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) +{ + const md5_uint32 *words = (const md5_uint32 *)buffer; + size_t nwords = len / sizeof (md5_uint32); + const md5_uint32 *endp = words + nwords; + md5_uint32 x[16]; + md5_uint32 a = ctx->A; + md5_uint32 b = ctx->B; + md5_uint32 c = ctx->C; + md5_uint32 d = ctx->D; + md5_uint32 e = ctx->E; + + /* First increment the byte count. RFC 1321 specifies the possible + length of the file up to 2^64 bits. Here we only compute the + number of bytes. Do a double word increment. */ + ctx->total[0] += len; + if (ctx->total[0] < len) + ++ctx->total[1]; + +#define rol(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \ + ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ + , (x[I&0x0f] = rol(tm, 1)) ) + +#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ + + F( B, C, D ) \ + + K \ + + M; \ + B = rol( B, 30 ); \ + } while(0) + + while (words < endp) + { + md5_uint32 tm; + int t; + for (t = 0; t < 16; t++) + { + x[t] = SWAP (*words); + words++; + } + + R( a, b, c, d, e, F1, K1, x[ 0] ); + R( e, a, b, c, d, F1, K1, x[ 1] ); + R( d, e, a, b, c, F1, K1, x[ 2] ); + R( c, d, e, a, b, F1, K1, x[ 3] ); + R( b, c, d, e, a, F1, K1, x[ 4] ); + R( a, b, c, d, e, F1, K1, x[ 5] ); + R( e, a, b, c, d, F1, K1, x[ 6] ); + R( d, e, a, b, c, F1, K1, x[ 7] ); + R( c, d, e, a, b, F1, K1, x[ 8] ); + R( b, c, d, e, a, F1, K1, x[ 9] ); + R( a, b, c, d, e, F1, K1, x[10] ); + R( e, a, b, c, d, F1, K1, x[11] ); + R( d, e, a, b, c, F1, K1, x[12] ); + R( c, d, e, a, b, F1, K1, x[13] ); + R( b, c, d, e, a, F1, K1, x[14] ); + R( a, b, c, d, e, F1, K1, x[15] ); + R( e, a, b, c, d, F1, K1, M(16) ); + R( d, e, a, b, c, F1, K1, M(17) ); + R( c, d, e, a, b, F1, K1, M(18) ); + R( b, c, d, e, a, F1, K1, M(19) ); + R( a, b, c, d, e, F2, K2, M(20) ); + R( e, a, b, c, d, F2, K2, M(21) ); + R( d, e, a, b, c, F2, K2, M(22) ); + R( c, d, e, a, b, F2, K2, M(23) ); + R( b, c, d, e, a, F2, K2, M(24) ); + R( a, b, c, d, e, F2, K2, M(25) ); + R( e, a, b, c, d, F2, K2, M(26) ); + R( d, e, a, b, c, F2, K2, M(27) ); + R( c, d, e, a, b, F2, K2, M(28) ); + R( b, c, d, e, a, F2, K2, M(29) ); + R( a, b, c, d, e, F2, K2, M(30) ); + R( e, a, b, c, d, F2, K2, M(31) ); + R( d, e, a, b, c, F2, K2, M(32) ); + R( c, d, e, a, b, F2, K2, M(33) ); + R( b, c, d, e, a, F2, K2, M(34) ); + R( a, b, c, d, e, F2, K2, M(35) ); + R( e, a, b, c, d, F2, K2, M(36) ); + R( d, e, a, b, c, F2, K2, M(37) ); + R( c, d, e, a, b, F2, K2, M(38) ); + R( b, c, d, e, a, F2, K2, M(39) ); + R( a, b, c, d, e, F3, K3, M(40) ); + R( e, a, b, c, d, F3, K3, M(41) ); + R( d, e, a, b, c, F3, K3, M(42) ); + R( c, d, e, a, b, F3, K3, M(43) ); + R( b, c, d, e, a, F3, K3, M(44) ); + R( a, b, c, d, e, F3, K3, M(45) ); + R( e, a, b, c, d, F3, K3, M(46) ); + R( d, e, a, b, c, F3, K3, M(47) ); + R( c, d, e, a, b, F3, K3, M(48) ); + R( b, c, d, e, a, F3, K3, M(49) ); + R( a, b, c, d, e, F3, K3, M(50) ); + R( e, a, b, c, d, F3, K3, M(51) ); + R( d, e, a, b, c, F3, K3, M(52) ); + R( c, d, e, a, b, F3, K3, M(53) ); + R( b, c, d, e, a, F3, K3, M(54) ); + R( a, b, c, d, e, F3, K3, M(55) ); + R( e, a, b, c, d, F3, K3, M(56) ); + R( d, e, a, b, c, F3, K3, M(57) ); + R( c, d, e, a, b, F3, K3, M(58) ); + R( b, c, d, e, a, F3, K3, M(59) ); + R( a, b, c, d, e, F4, K4, M(60) ); + R( e, a, b, c, d, F4, K4, M(61) ); + R( d, e, a, b, c, F4, K4, M(62) ); + R( c, d, e, a, b, F4, K4, M(63) ); + R( b, c, d, e, a, F4, K4, M(64) ); + R( a, b, c, d, e, F4, K4, M(65) ); + R( e, a, b, c, d, F4, K4, M(66) ); + R( d, e, a, b, c, F4, K4, M(67) ); + R( c, d, e, a, b, F4, K4, M(68) ); + R( b, c, d, e, a, F4, K4, M(69) ); + R( a, b, c, d, e, F4, K4, M(70) ); + R( e, a, b, c, d, F4, K4, M(71) ); + R( d, e, a, b, c, F4, K4, M(72) ); + R( c, d, e, a, b, F4, K4, M(73) ); + R( b, c, d, e, a, F4, K4, M(74) ); + R( a, b, c, d, e, F4, K4, M(75) ); + R( e, a, b, c, d, F4, K4, M(76) ); + R( d, e, a, b, c, F4, K4, M(77) ); + R( c, d, e, a, b, F4, K4, M(78) ); + R( b, c, d, e, a, F4, K4, M(79) ); + + a = ctx->A += a; + b = ctx->B += b; + c = ctx->C += c; + d = ctx->D += d; + e = ctx->E += e; + } +} + +/* ---- Object-Oriented Wrapper */ +SHA1Checksum::SHA1Checksum() +{ + sha1_init_ctx(&ctx); +} + +SHA1Checksum::~SHA1Checksum() +{ +} + +void SHA1Checksum::process(const void *data, size_t len) +{ + sha1_process_bytes(data, len, &ctx); +} + +bool SHA1Checksum::process_file(const char *filename) +{ + FILE *f = fopen(filename, "rb"); + if (f == NULL) + return false; + + while (!feof(f)) { + char buf[4096]; + size_t bytes = fread(buf, 1, sizeof(buf), f); + + if (ferror(f)) { + fclose(f); + return false; + } + + process(buf, bytes); + } + + fclose(f); + return true; +} + +const uint8_t *SHA1Checksum::checksum() +{ + sha1_finish_ctx(&ctx, resbuf); + return (const uint8_t *)resbuf; +} + +string SHA1Checksum::checksum_str() +{ + uint8_t resbuf[20]; + char hexbuf[4]; + string result = "sha1="; + + sha1_finish_ctx(&ctx, resbuf); + + for (int i = 0; i < 20; i++) { + sprintf(hexbuf, "%02x", resbuf[i]); + result += hexbuf; + } + + return result; +} diff --git a/third_party/sha1.h b/third_party/sha1.h new file mode 100644 index 0000000..1eb8d8d --- /dev/null +++ b/third_party/sha1.h @@ -0,0 +1,104 @@ +/* Declarations of functions and data types used for SHA1 sum library + * functions. + * part of Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. + * Copyright (C) 2006-2007 The Regents of the University of California + * + * Original code (in C) is taken from GNU coreutils (Debian package 5.97-5). + * Modifications by Michael Vrable to integrate into + * Cumulus. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef SHA1_H +# define SHA1_H 1 + +# include +# include + +#include + +typedef uint32_t md5_uint32; + +/* Structure to save state of computation between the single steps. */ +struct sha1_ctx +{ + md5_uint32 A; + md5_uint32 B; + md5_uint32 C; + md5_uint32 D; + md5_uint32 E; + + md5_uint32 total[2]; + md5_uint32 buflen; + char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32)))); +}; + + +/* Initialize structure containing state of computation. */ +extern void sha1_init_ctx (struct sha1_ctx *ctx); + +/* Starting with the result of former calls of this function (or the + initialization function update the context for the next LEN bytes + starting at BUFFER. + It is necessary that LEN is a multiple of 64!!! */ +extern void sha1_process_block (const void *buffer, size_t len, + struct sha1_ctx *ctx); + +/* Starting with the result of former calls of this function (or the + initialization function update the context for the next LEN bytes + starting at BUFFER. + It is NOT required that LEN is a multiple of 64. */ +extern void sha1_process_bytes (const void *buffer, size_t len, + struct sha1_ctx *ctx); + +/* Process the remaining bytes in the buffer and put result from CTX + in first 20 bytes following RESBUF. The result is always in little + endian byte order, so that a byte-wise output yields to the wanted + ASCII representation of the message digest. + + IMPORTANT: On some systems it is required that RESBUF be correctly + aligned for a 32 bits value. */ +extern void *sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf); + + +/* Put result from CTX in first 20 bytes following RESBUF. The result is + always in little endian byte order, so that a byte-wise output yields + to the wanted ASCII representation of the message digest. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +extern void *sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf); + +/* An object-oriented wrapper around checksumming functionality. */ +class SHA1Checksum { +private: + struct sha1_ctx ctx; + char resbuf[20] __attribute__ ((__aligned__ (__alignof__ (md5_uint32)))); + +public: + SHA1Checksum(); + ~SHA1Checksum(); + + void process(const void *data, size_t len); + bool process_file(const char *filename); + const uint8_t *checksum(); + size_t checksum_size() const { return 20; } + std::string checksum_str(); +}; + +#endif