1 /* Cumulus: Smart Filesystem Backup to Dumb Servers
3 * Copyright (C) 2006-2008 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
6 * Much of the code in this file is taken from LBFS, which is
7 * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org)
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 /* Compute incremental backups at a sub-file level by chopping files up into
25 * blocks in a content-sensitive manner (using Rabin fingerprints). This code
26 * is largely taken from LBFS, primarily the files:
27 * liblbfs/fingerprint.C (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
28 * liblbfs/rabinpoly.h (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp)
29 * liblbfs/rabinpoly.C (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp)
30 * async/msb.h (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp)
31 * async/msb.C (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp)
32 * but adapted and slimmed down to fit within Cumulus. */
41 #include <sys/types.h>
50 // Functions/data only needed internally go in a separate namespace. Public
51 // interfaces (at the end of the file) are in the global namespace.
54 #define FINGERPRINT_PT 0xbfe6b8a5bf378d83LL
55 #define BREAKMARK_VALUE 0x78
56 #define MIN_CHUNK_SIZE 2048
57 #define MAX_CHUNK_SIZE 65535
58 #define TARGET_CHUNK_SIZE 4096
60 #define SFS_DEV_RANDOM "/dev/random"
62 #define INT64(n) n##LL
63 #define MSB64 INT64(0x8000000000000000)
65 template<class R> inline R
71 /* Highest bit set in a byte */
72 static const char bytemsb[0x100] = {
73 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
74 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
75 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
76 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
77 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
78 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
79 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
80 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
81 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
82 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
83 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
86 /* Find last set (most significant bit) */
87 static inline u_int fls32 (uint32_t) __attribute__ ((const));
93 return 24 + bytemsb[v>>24];
95 return 16 + bytemsb[v>>16];
98 return 8 + bytemsb[v>>8];
103 static inline u_int fls64 (u_int64_t) __attribute__ ((const));
109 return 32 + fls32 (h);
111 return fls32 ((u_int32_t) v);
115 polymod (uint64_t nh, uint64_t nl, uint64_t d)
118 int k = fls64 (d) - 1;
124 for (int i = 62; i >= 0; i--)
125 if (nh & INT64 (1) << i) {
130 for (int i = 63; i >= k; i--)
131 if (nl & INT64 (1) << i)
137 polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y)
139 uint64_t ph = 0, pl = 0;
142 for (int i = 1; i < 64; i++)
143 if (x & (INT64 (1) << i)) {
154 polymmult (uint64_t x, uint64_t y, uint64_t d)
157 polymult (&h, &l, x, y);
158 return polymod (h, l, d);
163 polygcd (uint64_t x, uint64_t y)
168 x = polymod (0, x, y);
171 y = polymod (0, y, x);
176 polyirreducible (uint64_t f)
179 int m = (fls64 (f) - 1) >> 1;
180 for (int i = 0; i < m; i++) {
181 u = polymmult (u, u, f);
182 if (polygcd (f, u ^ 2) != 1)
189 polygen (u_int degree)
191 assert (degree > 0 && degree < 64);
192 uint64_t msb = INT64 (1) << degree;
193 uint64_t mask = msb - 1;
195 int rfd = open (SFS_DEV_RANDOM, O_RDONLY);
197 fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM);
201 if (read (rfd, &f, sizeof (f)) != implicit_cast<ssize_t> (sizeof (f))) {
202 fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM);
205 f = (f & mask) | msb;
206 } while (!polyirreducible (f));
214 uint64_t T[256]; // Lookup table for mod
217 const uint64_t poly; // Actual polynomial
219 explicit rabinpoly (uint64_t poly);
220 uint64_t append8 (uint64_t p, uint8_t m) const
221 { return ((p << 8) | m) ^ T[p >> shift]; }
227 assert (poly >= 0x100);
228 int xshift = fls64 (poly) - 1;
230 uint64_t T1 = polymod (0, INT64 (1) << xshift, poly);
231 for (int j = 0; j < 256; j++)
232 T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift);
235 rabinpoly::rabinpoly (uint64_t p)
241 class window : public rabinpoly {
246 uint64_t fingerprint;
252 window (uint64_t poly);
253 uint64_t slide8 (uint8_t m) {
254 if (++bufpos >= size)
256 uint8_t om = buf[bufpos];
258 return fingerprint = append8 (fingerprint ^ U[om], m);
262 bzero (buf, sizeof (buf));
266 window::window (uint64_t poly)
267 : rabinpoly (poly), fingerprint (0), bufpos (-1)
269 uint64_t sizeshift = 1;
270 for (int i = 1; i < size; i++)
271 sizeshift = append8 (sizeshift, 0);
272 for (int i = 0; i < 256; i++)
273 U[i] = polymmult (i, sizeshift, poly);
274 bzero (buf, sizeof (buf));
277 } // end anonymous namespace
279 /* Public interface to this module. */
280 int chunk_compute_max_num_breaks(size_t buflen)
282 return (buflen / MIN_CHUNK_SIZE) + 1;
285 int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints)
288 window w(FINGERPRINT_PT);
292 for (pos = 0; pos < len; pos++) {
293 uint64_t sig = w.slide8(buf[pos]);
294 size_t block_len = pos - start + 1;
295 if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE
296 && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) {
297 breakpoints[i] = pos;
305 breakpoints[i] = len - 1;
312 string chunk_algorithm_name()
315 sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE);