Start a proof-of-concept FUSE interface to old snapshots.

author Michael Vrable <mvrable@cs.ucsd.edu>

Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)

committer Michael Vrable <mvrable@turin.ucsd.edu>

Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)
author Michael Vrable <mvrable@cs.ucsd.edu>
Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)
committer Michael Vrable <mvrable@turin.ucsd.edu>
Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)
diff --git a/.gitignore b/.gitignore

index d0ae7ac..23672b5 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
  *.o
  *.pyc
-cumulus
-Makefile.dep
-version
+/cumulus
+/Makefile.dep
+/version
diff --git a/contrib/cumulus-fuse b/contrib/cumulus-fuse

new file mode 100755 (executable)

index 0000000..8849335
--- /dev/null
+++ b/contrib/cumulus-fuse
@@ -0,0 +1,199 @@
+#!/usr/bin/python
+#
+# FUSE interface to Cumulus, allowing snapshots to be mounted as a virtual
+# filesystem.
+#
+# Copyright (C) 2006-2008  The Regents of the University of California
+# Written by Michael Vrable <mvrable@cs.ucsd.edu>
+#
+# This program can be distributed under the terms of the GNU GPL, either
+# version 2 of the License, or (at your option) any later version.  See the
+# file COPYING.
+
+import itertools, os, stat, errno
+import fuse
+from fuse import Fuse
+import cumulus
+import cumulus.metadata
+
+fuse.fuse_python_api = (0, 2)
+
+# TODO: Figure out FUSE option parsing
+lowlevel = cumulus.LowlevelDataStore('/backups/lbs/turin')
+store = cumulus.ObjectStore(lowlevel)
+
+def _printable(ptr):
+    if ptr is None: return None
+    return tuple(x[1] for x in ptr)
+
+def parse_path(path):
+    """Strip leading slashe from path, and split apart into components."""
+    if not path.startswith('/'):
+        return None
+    if path == '/':
+        return []
+    else:
+        return path[1:].split('/')
+
+def load_metadata(path):
+    if type(path) != type([]):
+        path = parse_path(path)
+
+    if path is None or len(path) < 2:
+        return None
+
+    snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+    metadata = cumulus.metadata.Metadata(store, snapshot['Root'])
+    ptr = metadata.search(lambda x: cmp(x, path[1:]))
+    item = metadata._read(ptr)
+    if metadata._get_path(item) != path[1:]:
+        return None
+    return cumulus.MetadataItem(item, store)
+
+class MyStat(fuse.Stat):
+    def __init__(self):
+        self.st_mode = 0
+        self.st_ino = 0
+        self.st_dev = 0
+        self.st_nlink = 0
+        self.st_uid = 0
+        self.st_gid = 0
+        self.st_size = 0
+        self.st_atime = 0
+        self.st_mtime = 0
+        self.st_ctime = 0
+
+class CumulusFS(Fuse):
+    def getattr(self, path):
+        st = MyStat()
+        path = parse_path(path)
+
+        if path is None: return -errno.ENOENT
+        if path == []:
+            # Root directory
+            st.st_mode = stat.S_IFDIR | 0755
+            st.st_nlink = 2
+            return st
+
+        snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+        if len(path) == 1:
+            # Snapshot directory
+            st.st_mode = stat.S_IFDIR | 0755
+            st.st_nlink = 2
+        else:
+            # File contained within a snapshot
+            m = load_metadata(path)
+            if m is None:
+                return -errno.ENOENT
+
+            st.st_nlink = 1
+            st.st_uid = m.items.user[0]
+            st.st_gid = m.items.group[0]
+            st.st_mtime = m.items.mtime
+            st.st_ctime = m.items.ctime
+            st.st_atime = m.items.mtime
+            if m.items.type == 'd':
+                st.st_mode = stat.S_IFDIR | m.items.mode
+                st.st_nlink = 2
+            elif m.items.type == 'l':
+                st.st_mode = stat.S_IFLNK | m.items.mode
+            else:
+                st.st_mode = stat.S_IFREG | m.items.mode
+                st.st_size = m.items.size
+
+        return st
+
+    def _cumulus_readdir(self, metadata, path):
+        # Find pointer to base directory in metadata
+        ptr1 = metadata.search(lambda x: cmp(x, path))
+
+        # Find pointer to end of directory contents
+        def endcmp(p1):
+            def _cmp(p2):
+                if len(p2) > len(p1): p2 = p2[0:len(p1)]
+                if p2 > p1:
+                    return 1
+                else:
+                    return -1
+            return _cmp
+        ptr2 = metadata.search(endcmp(path), ptr1)
+
+        # Scan through looking for top-level files and directories.  Skip over
+        # data for files in subdirectories.
+        while metadata._cmp(ptr1, ptr2) < 0:
+            item = metadata._read(ptr1)
+            m = cumulus.MetadataItem(item, store)
+            if m.items.name == '.':
+                itempath = []
+            else:
+                itempath = m.items.name.split('/')
+            assert itempath[0:len(path)] == path
+
+            if len(itempath) == len(path):
+                ptr1 = metadata._advance(ptr1)
+                continue
+
+            if len(itempath) > len(path) + 1:
+                ptr1 = metadata.search(endcmp(itempath[0:len(path)+1]),
+                                       ptr1, ptr2)
+                continue
+
+            yield itempath[len(path)]
+            ptr1 = metadata._advance(ptr1)
+
+    def readdir(self, path, offset):
+        if path == '/':
+            for r in itertools.chain(('.', '..'), lowlevel.list_snapshots()):
+                yield fuse.Direntry(r)
+        else:
+            path = parse_path(path)
+            if path is None:
+                return
+            snapshot = cumulus.parse_full(store.load_snapshot(path[0]))
+            metadata = cumulus.metadata.Metadata(store, snapshot['Root'])
+            for r in itertools.chain(('.', '..'),
+                                     self._cumulus_readdir(metadata, path[1:])):
+                yield fuse.Direntry(r)
+
+    def readlink(self, path):
+        m = load_metadata(path)
+        if m is None:
+            return -errno.ENOENT
+        else:
+            return m.items.target
+
+    def open(self, path, flags):
+        m = load_metadata(path)
+        if m is None:
+            return -errno.ENOENT
+        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
+        if (flags & accmode) != os.O_RDONLY:
+            return -errno.EACCES
+
+    def read(self, path, size, offset):
+        m = load_metadata(path)
+        if m is None:
+            return -errno.ENOENT
+
+        return '\0' * size
+
+def main():
+    usage="""
+cumulus-fuse: Mount cumulus snapshots as a filesystem
+
+""" + Fuse.fusage
+    server = CumulusFS(version="%prog " + fuse.__version__,
+                       usage=usage,
+                       dash_s_do='setsingle')
+
+    server.parser.add_option(mountopt="root", metavar="PATH", default='/',
+                             help="read snapshots from PATH [default: %default]")
+
+    server.parse(errex=1)
+    print server.fuse_args
+    print server.fuse_args.assemble()
+    server.main()
+    store.cleanup()
+
+if __name__ == '__main__':
+    main()
diff --git a/python/cumulus/metadata.py b/python/cumulus/metadata.py

new file mode 100644 (file)

index 0000000..e5d5507
--- /dev/null
+++ b/python/cumulus/metadata.py
@@ -0,0 +1,182 @@
+"""Advanced metadata iterator for Cumulus snapshots.
+
+Allow fast random access to Cumulus metadata logs.  This requires that the
+underlying metadata log have been generated by a depth-first-search traversal
+of the filesystem, in sorted order.
+"""
+
+import cumulus
+
+class Metadata:
+    def __init__(self, object_store, root):
+        self.store = object_store
+        self.root = root
+
+    def _load(self, ref):
+        """Return the contents of the given object, as a list of text lines."""
+
+        data = self.store.get(ref)
+        return data.splitlines()
+
+    def _read(self, ptr):
+        """Parse the metadata item at the given address and return it."""
+
+        if ptr is None or ptr == []: return {}
+
+        (ref, n) = ptr[-1]
+        lines = self._load(ref)[n:]
+
+        try:
+            return cumulus.parse(lines, lambda l: len(l) == 0).next()
+        except StopIteration:
+            return {}
+
+    def _advance(self, ptr):
+        """Advance the specified metadata pointer to the next metadata item."""
+
+        if ptr == None:
+            return None
+
+        advanced = False
+        ptr = list(ptr)
+        if ptr == []:
+            ptr.append((self.root, 0))
+            advanced = True
+
+        while True:
+            last = ptr.pop(-1)
+            lines = self._load(last[0])
+
+            # Reached the end of the current object?  Advance pointer by one
+            # line in the containing object (or return None if reached the very
+            # end of input).
+            if last[1] >= len(lines):
+                if ptr == []: return None
+                last = ptr.pop(-1)
+                ptr.append((last[0], last[1] + 1))
+                advanced = True
+                continue
+
+            # Reached a line with an indirect reference?  Recurse.
+            line = lines[last[1]]
+            if line.startswith('@'):
+                ptr.append(last)
+                ptr.append((line[1:], 0))
+                advanced = True
+                continue
+
+            # Skip over blank lines.
+            if line == "":
+                ptr.append((last[0], last[1] + 1))
+                advanced = True
+                continue
+
+            # Skip over the remainder of a metadata stanza, if we started in
+            # the middle of one.
+            if not advanced:
+                ptr.append((last[0], last[1] + 1))
+                continue
+
+            # Done!  Found a non-blank line which is not an indirect reference.
+            ptr.append(last)
+            return ptr
+
+    def _cmp(self, ptr1, ptr2):
+        if ptr1 is None and ptr2 is None: return 0
+        if ptr1 is None: return 1
+        if ptr2 is None: return -1
+        return cmp(ptr1, ptr2)
+
+    def _get_path(self, metadata):
+        if metadata is None or 'name' not in metadata:
+            return None
+        path = metadata['name']
+        path = cumulus.MetadataItem.decode_str(path)
+        if path == '.': return []
+        return path.split('/')
+
+    def _midpoint(self, ptr1, ptr2):
+        """Return a pointer to some metadata item in the range [ptr1, ptr2)."""
+
+        if ptr1 == []: ptr1 = self._advance([])
+        if ptr1 is None: return None
+        if self._cmp(ptr1, ptr2) > 0: return None
+
+        #print "ptr1:", ptr1
+        #print "ptr2:", ptr2
+
+        level = 0
+        while True:
+            if level >= len(ptr1): return ptr1
+
+            if ptr2 is not None and level < len(ptr2) and ptr1[level] == ptr2[level]:
+                level += 1
+                continue
+
+            midpoint = ptr1[0:level]
+            lastref = ptr1[level][0]
+            limit = len(self._load(lastref))
+            if ptr2 is not None and level < len(ptr2) \
+                    and ptr1[level][0] == ptr2[level][0]:
+                limit = ptr2[level][1]
+            midpoint.append((lastref, (ptr1[level][1] + limit) // 2))
+
+            if self._cmp(midpoint, ptr1) < 0:
+                #print "    ...descend"
+                level += 1
+                continue
+
+            #print "Guess midpoint:", midpoint
+            midpoint = self._advance(midpoint)
+            #print "    ...advanced to", midpoint
+
+            if self._cmp(midpoint, ptr2) >= 0:
+                #print "    ...overshoot, trying again"
+                level += 1
+                continue
+
+            return midpoint
+
+    def search(self, searchfunc, ptr1=[], ptr2=None):
+        """Perform a binary search for name.
+
+        The search is restricted to the interval [ptr1, ptr2).  Return either a
+        pointer to the item for name, or if it doesn't exist the pointer to the
+        item which follows where it would have been (assuming the original
+        search interval included that location)."""
+
+        def _printable(ptr):
+            if ptr is None: return None
+            return tuple(x[1] for x in ptr)
+
+        while True:
+            #print _printable(ptr1), "...", _printable(ptr2)
+            if self._cmp(ptr1, ptr2) >= 0:
+                #print "    X", _printable(ptr1)
+                return ptr1
+
+            mid = self._midpoint(ptr1, ptr2)
+            midpath = self._get_path(self._read(mid))
+
+            c = searchfunc(midpath)
+            if c == 0:
+                #print "    =", _printable(mid), midpath
+                return mid
+            elif c < 0:
+                #print "    >", _printable(mid), midpath
+                ptr1 = self._advance(mid)
+            else:
+                #print "    <", _printable(mid), midpath
+                ptr2 = mid
+
+if __name__ == '__main__':
+    import cumulus
+
+    store = cumulus.ObjectStore(cumulus.LowlevelDataStore('/backups/lbs/turin'))
+    root = '9e0be287-a74c-4527-aa26-0a10e8f40f7d/00000243(sha1=316e034bb5b86e5ba00b1d54335427d7b742f4f9)'
+
+    metadata = Metadata(store, root)
+    ptr = metadata.search(['home', 'mvrable', 'docs'])
+    print ptr
+    print metadata._read(ptr)
+    store.cleanup()
diff --git a/python/cumulus/store/file.py b/python/cumulus/store/file.py

new file mode 100644 (file)

index 0000000..6034835
--- /dev/null
+++ b/python/cumulus/store/file.py
@@ -0,0 +1,37 @@
+import os, sys, tempfile
+
+import cumulus.store
+
+type_patterns = cumulus.store.type_patterns
+
+class FileStore(cumulus.store.Store):
+    def __init__(self, prefix):
+        while prefix.endswith("/") and prefix != "/": prefix = prefix[:-1]
+        self.prefix = prefix
+
+    def _get_path(self, type, name):
+        return "%s/%s" % (self.prefix, name)
+
+    def list(self, type):
+        files = os.listdir(self.prefix)
+        return (f for f in files if type_patterns[type].match(f))
+
+    def get(self, type, name):
+        k = self._get_path(type, name)
+        return open(k, 'rb')
+
+    def put(self, type, name, fp):
+        k = self._get_path(type, name)
+        out = open(k, 'wb')
+        buf = fp.read(4096)
+        while len(buf) > 0:
+            out.write(buf)
+            buf = fp.read(4096)
+
+    def delete(self, type, name):
+        k = self._get_path(type, name)
+        os.unlink(k)
+
+    def stat(self, type, name):
+        stat = os.stat(self._get_path(type, name))
+        return {'size': stat.st_size}
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)
committer	Michael Vrable <mvrable@turin.ucsd.edu>
	Tue, 12 Aug 2008 22:14:35 +0000 (15:14 -0700)
.gitignore		patch \| blob \| history
contrib/cumulus-fuse	[new file with mode: 0755]	patch \| blob
python/cumulus/metadata.py	[new file with mode: 0644]	patch \| blob
python/cumulus/store/file.py	[new file with mode: 0644]	patch \| blob