1 # Cumulus: Efficient Filesystem Backup to the Cloud
2 # Copyright (C) 2008 The Cumulus Developers
3 # See the AUTHORS file for a list of contributors.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 """Advanced metadata iterator for Cumulus snapshots.
21 Allow fast random access to Cumulus metadata logs. This requires that the
22 underlying metadata log have been generated by a depth-first-search traversal
23 of the filesystem, in sorted order.
29 def __init__(self, object_store, root):
30 self.store = object_store
34 """Return the contents of the given object, as a list of text lines."""
36 data = self.store.get(ref)
37 return data.splitlines()
40 """Parse the metadata item at the given address and return it."""
42 if ptr is None or ptr == []: return {}
45 lines = self._load(ref)[n:]
48 return next(cumulus.parse(lines, lambda l: len(l) == 0))
52 def _advance(self, ptr):
53 """Advance the specified metadata pointer to the next metadata item."""
61 ptr.append((self.root, 0))
66 lines = self._load(last[0])
68 # Reached the end of the current object? Advance pointer by one
69 # line in the containing object (or return None if reached the very
71 if last[1] >= len(lines):
72 if ptr == []: return None
74 ptr.append((last[0], last[1] + 1))
78 # Reached a line with an indirect reference? Recurse.
80 if line.startswith('@'):
82 ptr.append((line[1:], 0))
86 # Skip over blank lines.
88 ptr.append((last[0], last[1] + 1))
92 # Skip over the remainder of a metadata stanza, if we started in
95 ptr.append((last[0], last[1] + 1))
98 # Done! Found a non-blank line which is not an indirect reference.
102 def _cmp(self, ptr1, ptr2):
103 if ptr1 is None and ptr2 is None: return 0
104 if ptr1 is None: return 1
105 if ptr2 is None: return -1
106 return cmp(ptr1, ptr2)
108 def _get_path(self, metadata):
109 if metadata is None or 'name' not in metadata:
111 path = metadata['name']
112 path = cumulus.MetadataItem.decode_str(path)
113 if path == '.': return []
114 return path.split('/')
116 def _midpoint(self, ptr1, ptr2):
117 """Return a pointer to some metadata item in the range [ptr1, ptr2)."""
119 if ptr1 == []: ptr1 = self._advance([])
120 if ptr1 is None: return None
121 if self._cmp(ptr1, ptr2) > 0: return None
128 if level >= len(ptr1): return ptr1
130 if ptr2 is not None and level < len(ptr2) and ptr1[level] == ptr2[level]:
134 midpoint = ptr1[0:level]
135 lastref = ptr1[level][0]
136 limit = len(self._load(lastref))
137 if ptr2 is not None and level < len(ptr2) \
138 and ptr1[level][0] == ptr2[level][0]:
139 limit = ptr2[level][1]
140 midpoint.append((lastref, (ptr1[level][1] + limit) // 2))
142 if self._cmp(midpoint, ptr1) < 0:
147 #print "Guess midpoint:", midpoint
148 midpoint = self._advance(midpoint)
149 #print " ...advanced to", midpoint
151 if self._cmp(midpoint, ptr2) >= 0:
152 #print " ...overshoot, trying again"
158 def search(self, searchfunc, ptr1=[], ptr2=None):
159 """Perform a binary search for name.
161 The search is restricted to the interval [ptr1, ptr2). Return either a
162 pointer to the item for name, or if it doesn't exist the pointer to the
163 item which follows where it would have been (assuming the original
164 search interval included that location)."""
167 if ptr is None: return None
168 return tuple(x[1] for x in ptr)
171 #print _printable(ptr1), "...", _printable(ptr2)
172 if self._cmp(ptr1, ptr2) >= 0:
173 #print " X", _printable(ptr1)
176 mid = self._midpoint(ptr1, ptr2)
177 midpath = self._get_path(self._read(mid))
179 c = searchfunc(midpath)
181 #print " =", _printable(mid), midpath
184 #print " >", _printable(mid), midpath
185 ptr1 = self._advance(mid)
187 #print " <", _printable(mid), midpath
190 if __name__ == '__main__':
193 store = cumulus.ObjectStore(cumulus.LowlevelDataStore('/backups/lbs/turin'))
194 root = '9e0be287-a74c-4527-aa26-0a10e8f40f7d/00000243(sha1=316e034bb5b86e5ba00b1d54335427d7b742f4f9)'
196 metadata = Metadata(store, root)
197 ptr = metadata.search(['home', 'mvrable', 'docs'])
199 print(metadata._read(ptr))