1 """Advanced metadata iterator for Cumulus snapshots.
3 Allow fast random access to Cumulus metadata logs. This requires that the
4 underlying metadata log have been generated by a depth-first-search traversal
5 of the filesystem, in sorted order.
11 def __init__(self, object_store, root):
12 self.store = object_store
16 """Return the contents of the given object, as a list of text lines."""
18 data = self.store.get(ref)
19 return data.splitlines()
22 """Parse the metadata item at the given address and return it."""
24 if ptr is None or ptr == []: return {}
27 lines = self._load(ref)[n:]
30 return cumulus.parse(lines, lambda l: len(l) == 0).next()
34 def _advance(self, ptr):
35 """Advance the specified metadata pointer to the next metadata item."""
43 ptr.append((self.root, 0))
48 lines = self._load(last[0])
50 # Reached the end of the current object? Advance pointer by one
51 # line in the containing object (or return None if reached the very
53 if last[1] >= len(lines):
54 if ptr == []: return None
56 ptr.append((last[0], last[1] + 1))
60 # Reached a line with an indirect reference? Recurse.
62 if line.startswith('@'):
64 ptr.append((line[1:], 0))
68 # Skip over blank lines.
70 ptr.append((last[0], last[1] + 1))
74 # Skip over the remainder of a metadata stanza, if we started in
77 ptr.append((last[0], last[1] + 1))
80 # Done! Found a non-blank line which is not an indirect reference.
84 def _cmp(self, ptr1, ptr2):
85 if ptr1 is None and ptr2 is None: return 0
86 if ptr1 is None: return 1
87 if ptr2 is None: return -1
88 return cmp(ptr1, ptr2)
90 def _get_path(self, metadata):
91 if metadata is None or 'name' not in metadata:
93 path = metadata['name']
94 path = cumulus.MetadataItem.decode_str(path)
95 if path == '.': return []
96 return path.split('/')
98 def _midpoint(self, ptr1, ptr2):
99 """Return a pointer to some metadata item in the range [ptr1, ptr2)."""
101 if ptr1 == []: ptr1 = self._advance([])
102 if ptr1 is None: return None
103 if self._cmp(ptr1, ptr2) > 0: return None
110 if level >= len(ptr1): return ptr1
112 if ptr2 is not None and level < len(ptr2) and ptr1[level] == ptr2[level]:
116 midpoint = ptr1[0:level]
117 lastref = ptr1[level][0]
118 limit = len(self._load(lastref))
119 if ptr2 is not None and level < len(ptr2) \
120 and ptr1[level][0] == ptr2[level][0]:
121 limit = ptr2[level][1]
122 midpoint.append((lastref, (ptr1[level][1] + limit) // 2))
124 if self._cmp(midpoint, ptr1) < 0:
129 #print "Guess midpoint:", midpoint
130 midpoint = self._advance(midpoint)
131 #print " ...advanced to", midpoint
133 if self._cmp(midpoint, ptr2) >= 0:
134 #print " ...overshoot, trying again"
140 def search(self, searchfunc, ptr1=[], ptr2=None):
141 """Perform a binary search for name.
143 The search is restricted to the interval [ptr1, ptr2). Return either a
144 pointer to the item for name, or if it doesn't exist the pointer to the
145 item which follows where it would have been (assuming the original
146 search interval included that location)."""
149 if ptr is None: return None
150 return tuple(x[1] for x in ptr)
153 #print _printable(ptr1), "...", _printable(ptr2)
154 if self._cmp(ptr1, ptr2) >= 0:
155 #print " X", _printable(ptr1)
158 mid = self._midpoint(ptr1, ptr2)
159 midpath = self._get_path(self._read(mid))
161 c = searchfunc(midpath)
163 #print " =", _printable(mid), midpath
166 #print " >", _printable(mid), midpath
167 ptr1 = self._advance(mid)
169 #print " <", _printable(mid), midpath
172 if __name__ == '__main__':
175 store = cumulus.ObjectStore(cumulus.LowlevelDataStore('/backups/lbs/turin'))
176 root = '9e0be287-a74c-4527-aa26-0a10e8f40f7d/00000243(sha1=316e034bb5b86e5ba00b1d54335427d7b742f4f9)'
178 metadata = Metadata(store, root)
179 ptr = metadata.search(['home', 'mvrable', 'docs'])
181 print metadata._read(ptr)