*.o
*.pyc
-cumulus
-Makefile.dep
-version
+/cumulus
+/Makefile.dep
+/version
--- /dev/null
+#!/usr/bin/python
+#
+# FUSE interface to Cumulus, allowing snapshots to be mounted as a virtual
+# filesystem.
+#
+# Copyright (C) 2006-2008 The Regents of the University of California
+# Written by Michael Vrable <mvrable@cs.ucsd.edu>
+#
+# This program can be distributed under the terms of the GNU GPL, either
+# version 2 of the License, or (at your option) any later version. See the
+# file COPYING.
+
+import itertools, os, stat, errno
+import fuse
+from fuse import Fuse
+import cumulus
+import cumulus.metadata
+
+fuse.fuse_python_api = (0, 2)
+
+# TODO: Figure out FUSE option parsing
+lowlevel = cumulus.LowlevelDataStore('/backups/lbs/turin')
+store = cumulus.ObjectStore(lowlevel)
+
+def _printable(ptr):
+ if ptr is None: return None
+ return tuple(x[1] for x in ptr)
+
+def parse_path(path):
+ """Strip leading slashe from path, and split apart into components."""
+ if not path.startswith('/'):
+ return None
+ if path == '/':
+ return []
+ else:
+ return path[1:].split('/')
+
+def load_metadata(path):
+ if type(path) != type([]):
+ path = parse_path(path)
+
+ if path is None or len(path) < 2:
+ return None
+
+ snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+ metadata = cumulus.metadata.Metadata(store, snapshot['Root'])
+ ptr = metadata.search(lambda x: cmp(x, path[1:]))
+ item = metadata._read(ptr)
+ if metadata._get_path(item) != path[1:]:
+ return None
+ return cumulus.MetadataItem(item, store)
+
+class MyStat(fuse.Stat):
+ def __init__(self):
+ self.st_mode = 0
+ self.st_ino = 0
+ self.st_dev = 0
+ self.st_nlink = 0
+ self.st_uid = 0
+ self.st_gid = 0
+ self.st_size = 0
+ self.st_atime = 0
+ self.st_mtime = 0
+ self.st_ctime = 0
+
+class CumulusFS(Fuse):
+ def getattr(self, path):
+ st = MyStat()
+ path = parse_path(path)
+
+ if path is None: return -errno.ENOENT
+ if path == []:
+ # Root directory
+ st.st_mode = stat.S_IFDIR | 0755
+ st.st_nlink = 2
+ return st
+
+ snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+ if len(path) == 1:
+ # Snapshot directory
+ st.st_mode = stat.S_IFDIR | 0755
+ st.st_nlink = 2
+ else:
+ # File contained within a snapshot
+ m = load_metadata(path)
+ if m is None:
+ return -errno.ENOENT
+
+ st.st_nlink = 1
+ st.st_uid = m.items.user[0]
+ st.st_gid = m.items.group[0]
+ st.st_mtime = m.items.mtime
+ st.st_ctime = m.items.ctime
+ st.st_atime = m.items.mtime
+ if m.items.type == 'd':
+ st.st_mode = stat.S_IFDIR | m.items.mode
+ st.st_nlink = 2
+ elif m.items.type == 'l':
+ st.st_mode = stat.S_IFLNK | m.items.mode
+ else:
+ st.st_mode = stat.S_IFREG | m.items.mode
+ st.st_size = m.items.size
+
+ return st
+
+ def _cumulus_readdir(self, metadata, path):
+ # Find pointer to base directory in metadata
+ ptr1 = metadata.search(lambda x: cmp(x, path))
+
+ # Find pointer to end of directory contents
+ def endcmp(p1):
+ def _cmp(p2):
+ if len(p2) > len(p1): p2 = p2[0:len(p1)]
+ if p2 > p1:
+ return 1
+ else:
+ return -1
+ return _cmp
+ ptr2 = metadata.search(endcmp(path), ptr1)
+
+ # Scan through looking for top-level files and directories. Skip over
+ # data for files in subdirectories.
+ while metadata._cmp(ptr1, ptr2) < 0:
+ item = metadata._read(ptr1)
+ m = cumulus.MetadataItem(item, store)
+ if m.items.name == '.':
+ itempath = []
+ else:
+ itempath = m.items.name.split('/')
+ assert itempath[0:len(path)] == path
+
+ if len(itempath) == len(path):
+ ptr1 = metadata._advance(ptr1)
+ continue
+
+ if len(itempath) > len(path) + 1:
+ ptr1 = metadata.search(endcmp(itempath[0:len(path)+1]),
+ ptr1, ptr2)
+ continue
+
+ yield itempath[len(path)]
+ ptr1 = metadata._advance(ptr1)
+
+ def readdir(self, path, offset):
+ if path == '/':
+ for r in itertools.chain(('.', '..'), lowlevel.list_snapshots()):
+ yield fuse.Direntry(r)
+ else:
+ path = parse_path(path)
+ if path is None:
+ return
+ snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+ metadata = cumulus.metadata.Metadata(store, snapshot['Root'])
+ for r in itertools.chain(('.', '..'),
+ self._cumulus_readdir(metadata, path[1:])):
+ yield fuse.Direntry(r)
+
+ def readlink(self, path):
+ m = load_metadata(path)
+ if m is None:
+ return -errno.ENOENT
+ else:
+ return m.items.target
+
+ def open(self, path, flags):
+ m = load_metadata(path)
+ if m is None:
+ return -errno.ENOENT
+ accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
+ if (flags & accmode) != os.O_RDONLY:
+ return -errno.EACCES
+
+ def read(self, path, size, offset):
+ m = load_metadata(path)
+ if m is None:
+ return -errno.ENOENT
+
+ return '\0' * size
+
+def main():
+ usage="""
+cumulus-fuse: Mount cumulus snapshots as a filesystem
+
+""" + Fuse.fusage
+ server = CumulusFS(version="%prog " + fuse.__version__,
+ usage=usage,
+ dash_s_do='setsingle')
+
+ server.parser.add_option(mountopt="root", metavar="PATH", default='/',
+ help="read snapshots from PATH [default: %default]")
+
+ server.parse(errex=1)
+ print server.fuse_args
+ print server.fuse_args.assemble()
+ server.main()
+ store.cleanup()
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+"""Advanced metadata iterator for Cumulus snapshots.
+
+Allow fast random access to Cumulus metadata logs. This requires that the
+underlying metadata log have been generated by a depth-first-search traversal
+of the filesystem, in sorted order.
+"""
+
+import cumulus
+
+class Metadata:
+ def __init__(self, object_store, root):
+ self.store = object_store
+ self.root = root
+
+ def _load(self, ref):
+ """Return the contents of the given object, as a list of text lines."""
+
+ data = self.store.get(ref)
+ return data.splitlines()
+
+ def _read(self, ptr):
+ """Parse the metadata item at the given address and return it."""
+
+ if ptr is None or ptr == []: return {}
+
+ (ref, n) = ptr[-1]
+ lines = self._load(ref)[n:]
+
+ try:
+ return cumulus.parse(lines, lambda l: len(l) == 0).next()
+ except StopIteration:
+ return {}
+
+ def _advance(self, ptr):
+ """Advance the specified metadata pointer to the next metadata item."""
+
+ if ptr == None:
+ return None
+
+ advanced = False
+ ptr = list(ptr)
+ if ptr == []:
+ ptr.append((self.root, 0))
+ advanced = True
+
+ while True:
+ last = ptr.pop(-1)
+ lines = self._load(last[0])
+
+ # Reached the end of the current object? Advance pointer by one
+ # line in the containing object (or return None if reached the very
+ # end of input).
+ if last[1] >= len(lines):
+ if ptr == []: return None
+ last = ptr.pop(-1)
+ ptr.append((last[0], last[1] + 1))
+ advanced = True
+ continue
+
+ # Reached a line with an indirect reference? Recurse.
+ line = lines[last[1]]
+ if line.startswith('@'):
+ ptr.append(last)
+ ptr.append((line[1:], 0))
+ advanced = True
+ continue
+
+ # Skip over blank lines.
+ if line == "":
+ ptr.append((last[0], last[1] + 1))
+ advanced = True
+ continue
+
+ # Skip over the remainder of a metadata stanza, if we started in
+ # the middle of one.
+ if not advanced:
+ ptr.append((last[0], last[1] + 1))
+ continue
+
+ # Done! Found a non-blank line which is not an indirect reference.
+ ptr.append(last)
+ return ptr
+
+ def _cmp(self, ptr1, ptr2):
+ if ptr1 is None and ptr2 is None: return 0
+ if ptr1 is None: return 1
+ if ptr2 is None: return -1
+ return cmp(ptr1, ptr2)
+
+ def _get_path(self, metadata):
+ if metadata is None or 'name' not in metadata:
+ return None
+ path = metadata['name']
+ path = cumulus.MetadataItem.decode_str(path)
+ if path == '.': return []
+ return path.split('/')
+
+ def _midpoint(self, ptr1, ptr2):
+ """Return a pointer to some metadata item in the range [ptr1, ptr2)."""
+
+ if ptr1 == []: ptr1 = self._advance([])
+ if ptr1 is None: return None
+ if self._cmp(ptr1, ptr2) > 0: return None
+
+ #print "ptr1:", ptr1
+ #print "ptr2:", ptr2
+
+ level = 0
+ while True:
+ if level >= len(ptr1): return ptr1
+
+ if ptr2 is not None and level < len(ptr2) and ptr1[level] == ptr2[level]:
+ level += 1
+ continue
+
+ midpoint = ptr1[0:level]
+ lastref = ptr1[level][0]
+ limit = len(self._load(lastref))
+ if ptr2 is not None and level < len(ptr2) \
+ and ptr1[level][0] == ptr2[level][0]:
+ limit = ptr2[level][1]
+ midpoint.append((lastref, (ptr1[level][1] + limit) // 2))
+
+ if self._cmp(midpoint, ptr1) < 0:
+ #print " ...descend"
+ level += 1
+ continue
+
+ #print "Guess midpoint:", midpoint
+ midpoint = self._advance(midpoint)
+ #print " ...advanced to", midpoint
+
+ if self._cmp(midpoint, ptr2) >= 0:
+ #print " ...overshoot, trying again"
+ level += 1
+ continue
+
+ return midpoint
+
+ def search(self, searchfunc, ptr1=[], ptr2=None):
+ """Perform a binary search for name.
+
+ The search is restricted to the interval [ptr1, ptr2). Return either a
+ pointer to the item for name, or if it doesn't exist the pointer to the
+ item which follows where it would have been (assuming the original
+ search interval included that location)."""
+
+ def _printable(ptr):
+ if ptr is None: return None
+ return tuple(x[1] for x in ptr)
+
+ while True:
+ #print _printable(ptr1), "...", _printable(ptr2)
+ if self._cmp(ptr1, ptr2) >= 0:
+ #print " X", _printable(ptr1)
+ return ptr1
+
+ mid = self._midpoint(ptr1, ptr2)
+ midpath = self._get_path(self._read(mid))
+
+ c = searchfunc(midpath)
+ if c == 0:
+ #print " =", _printable(mid), midpath
+ return mid
+ elif c < 0:
+ #print " >", _printable(mid), midpath
+ ptr1 = self._advance(mid)
+ else:
+ #print " <", _printable(mid), midpath
+ ptr2 = mid
+
+if __name__ == '__main__':
+ import cumulus
+
+ store = cumulus.ObjectStore(cumulus.LowlevelDataStore('/backups/lbs/turin'))
+ root = '9e0be287-a74c-4527-aa26-0a10e8f40f7d/00000243(sha1=316e034bb5b86e5ba00b1d54335427d7b742f4f9)'
+
+ metadata = Metadata(store, root)
+ ptr = metadata.search(['home', 'mvrable', 'docs'])
+ print ptr
+ print metadata._read(ptr)
+ store.cleanup()
--- /dev/null
+import os, sys, tempfile
+
+import cumulus.store
+
+type_patterns = cumulus.store.type_patterns
+
+class FileStore(cumulus.store.Store):
+ def __init__(self, prefix):
+ while prefix.endswith("/") and prefix != "/": prefix = prefix[:-1]
+ self.prefix = prefix
+
+ def _get_path(self, type, name):
+ return "%s/%s" % (self.prefix, name)
+
+ def list(self, type):
+ files = os.listdir(self.prefix)
+ return (f for f in files if type_patterns[type].match(f))
+
+ def get(self, type, name):
+ k = self._get_path(type, name)
+ return open(k, 'rb')
+
+ def put(self, type, name, fp):
+ k = self._get_path(type, name)
+ out = open(k, 'wb')
+ buf = fp.read(4096)
+ while len(buf) > 0:
+ out.write(buf)
+ buf = fp.read(4096)
+
+ def delete(self, type, name):
+ k = self._get_path(type, name)
+ os.unlink(k)
+
+ def stat(self, type, name):
+ stat = os.stat(self._get_path(type, name))
+ return {'size': stat.st_size}