1 # Cumulus: Efficient Filesystem Backup to the Cloud
2 # Copyright (C) 2008 The Cumulus Developers
3 # See the AUTHORS file for a list of contributors.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 """Advanced metadata iterator for Cumulus snapshots.
21 Allow fast random access to Cumulus metadata logs. This requires that the
22 underlying metadata log have been generated by a depth-first-search traversal
23 of the filesystem, in sorted order.
26 from __future__ import division, print_function, unicode_literals
31 def __init__(self, object_store, root):
32 self.store = object_store
36 """Return the contents of the given object, as a list of text lines."""
38 data = self.store.get(ref)
39 return data.splitlines()
42 """Parse the metadata item at the given address and return it."""
44 if ptr is None or ptr == []: return {}
47 lines = self._load(ref)[n:]
50 return next(cumulus.parse(lines, lambda l: len(l) == 0))
54 def _advance(self, ptr):
55 """Advance the specified metadata pointer to the next metadata item."""
63 ptr.append((self.root, 0))
68 lines = self._load(last[0])
70 # Reached the end of the current object? Advance pointer by one
71 # line in the containing object (or return None if reached the very
73 if last[1] >= len(lines):
74 if ptr == []: return None
76 ptr.append((last[0], last[1] + 1))
80 # Reached a line with an indirect reference? Recurse.
82 if line.startswith('@'):
84 ptr.append((line[1:], 0))
88 # Skip over blank lines.
90 ptr.append((last[0], last[1] + 1))
94 # Skip over the remainder of a metadata stanza, if we started in
97 ptr.append((last[0], last[1] + 1))
100 # Done! Found a non-blank line which is not an indirect reference.
104 def _cmp(self, ptr1, ptr2):
105 if ptr1 is None and ptr2 is None: return 0
106 if ptr1 is None: return 1
107 if ptr2 is None: return -1
108 return cmp(ptr1, ptr2)
110 def _get_path(self, metadata):
111 if metadata is None or 'name' not in metadata:
113 path = metadata['name']
114 path = cumulus.MetadataItem.decode_str(path)
115 if path == '.': return []
116 return path.split('/')
118 def _midpoint(self, ptr1, ptr2):
119 """Return a pointer to some metadata item in the range [ptr1, ptr2)."""
121 if ptr1 == []: ptr1 = self._advance([])
122 if ptr1 is None: return None
123 if self._cmp(ptr1, ptr2) > 0: return None
130 if level >= len(ptr1): return ptr1
132 if ptr2 is not None and level < len(ptr2) and ptr1[level] == ptr2[level]:
136 midpoint = ptr1[0:level]
137 lastref = ptr1[level][0]
138 limit = len(self._load(lastref))
139 if ptr2 is not None and level < len(ptr2) \
140 and ptr1[level][0] == ptr2[level][0]:
141 limit = ptr2[level][1]
142 midpoint.append((lastref, (ptr1[level][1] + limit) // 2))
144 if self._cmp(midpoint, ptr1) < 0:
149 #print "Guess midpoint:", midpoint
150 midpoint = self._advance(midpoint)
151 #print " ...advanced to", midpoint
153 if self._cmp(midpoint, ptr2) >= 0:
154 #print " ...overshoot, trying again"
160 def search(self, searchfunc, ptr1=[], ptr2=None):
161 """Perform a binary search for name.
163 The search is restricted to the interval [ptr1, ptr2). Return either a
164 pointer to the item for name, or if it doesn't exist the pointer to the
165 item which follows where it would have been (assuming the original
166 search interval included that location)."""
169 if ptr is None: return None
170 return tuple(x[1] for x in ptr)
173 #print _printable(ptr1), "...", _printable(ptr2)
174 if self._cmp(ptr1, ptr2) >= 0:
175 #print " X", _printable(ptr1)
178 mid = self._midpoint(ptr1, ptr2)
179 midpath = self._get_path(self._read(mid))
181 c = searchfunc(midpath)
183 #print " =", _printable(mid), midpath
186 #print " >", _printable(mid), midpath
187 ptr1 = self._advance(mid)
189 #print " <", _printable(mid), midpath
192 if __name__ == '__main__':
195 store = cumulus.ObjectStore(cumulus.LowlevelDataStore('/backups/lbs/turin'))
196 root = '9e0be287-a74c-4527-aa26-0a10e8f40f7d/00000243(sha1=316e034bb5b86e5ba00b1d54335427d7b742f4f9)'
198 metadata = Metadata(store, root)
199 ptr = metadata.search(['home', 'mvrable', 'docs'])
201 print(metadata._read(ptr))