Update copyright notices to use a central AUTHORS file.

[cumulus.git] / python / cumulus / __init__.py
diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py

index 424e38c..51755c7 100644 (file)
--- a/python/cumulus/__init__.py
+++ b/python/cumulus/__init__.py
@@ -1,7 +1,25 @@
-"""High-level interface for working with LBS archives.
+# Cumulus: Efficient Filesystem Backup to the Cloud
+# Copyright (C) 2008-2009, 2012 The Cumulus Developers
+# See the AUTHORS file for a list of contributors.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""High-level interface for working with Cumulus archives.
  
  This module provides an easy interface for reading from and manipulating
-various parts of an LBS archive:
+various parts of a Cumulus archive:
    - listing the snapshots and segments present
    - reading segment contents
    - parsing snapshot descriptors and snapshot metadata logs
@@ -9,13 +27,13 @@ various parts of an LBS archive:
  """
  
  from __future__ import division
-import os, re, sha, tarfile, tempfile, thread
+import hashlib, os, re, tarfile, tempfile, thread
  from pysqlite2 import dbapi2 as sqlite3
  
  import cumulus.store, cumulus.store.file
  
  # The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 8)         # LBS Snapshot v0.8
+FORMAT_VERSION = (0, 11)        # Cumulus Snapshot v0.11
  
  # Maximum number of nested indirect references allowed in a snapshot.
  MAX_RECURSION_DEPTH = 3
@@ -23,6 +41,15 @@ MAX_RECURSION_DEPTH = 3
  # All segments which have been accessed this session.
  accessed_segments = set()
  
+# Table of methods used to filter segments before storage, and corresponding
+# filename extensions.  These are listed in priority order (methods earlier in
+# the list are tried first).
+SEGMENT_FILTERS = [
+    (".gpg", "cumulus-filter-gpg --decrypt"),
+    (".gz", "gzip -dc"),
+    (".bz2", "bzip2 -dc"),
+]
+
  def uri_decode(s):
      """Decode a URI-encoded (%xx escapes) string."""
      def hex_decode(m): return chr(int(m.group(1), 16))
@@ -47,11 +74,13 @@ class Struct:
          return "<%s %s>" % (self.__class__, self.__dict__)
  
  CHECKSUM_ALGORITHMS = {
-    'sha1': sha.new
+    'sha1': hashlib.sha1,
+    'sha224': hashlib.sha224,
+    'sha256': hashlib.sha256,
  }
  
  class ChecksumCreator:
-    """Compute an LBS checksum for provided data.
+    """Compute a Cumulus checksum for provided data.
  
      The algorithm used is selectable, but currently defaults to sha1.
      """
@@ -110,6 +139,9 @@ class LowlevelDataStore:
                  return (t, filename)
          return (None, filename)
  
+    def scan(self):
+        self.store.scan()
+
      def lowlevel_open(self, filename):
          """Return a file-like object for reading data from the given file."""
  
@@ -186,19 +218,26 @@ class ObjectStore:
  
      def get_segment(self, segment):
          accessed_segments.add(segment)
-        raw = self.store.lowlevel_open(segment + ".tar.gpg")
  
-        (input, output) = os.popen2("lbs-filter-gpg --decrypt")
-        def copy_thread(src, dst):
-            BLOCK_SIZE = 4096
-            while True:
-                block = src.read(BLOCK_SIZE)
-                if len(block) == 0: break
-                dst.write(block)
-            dst.close()
+        for (extension, filter) in SEGMENT_FILTERS:
+            try:
+                raw = self.store.lowlevel_open(segment + ".tar" + extension)
+
+                (input, output) = os.popen2(filter)
+                def copy_thread(src, dst):
+                    BLOCK_SIZE = 4096
+                    while True:
+                        block = src.read(BLOCK_SIZE)
+                        if len(block) == 0: break
+                        dst.write(block)
+                    dst.close()
+
+                thread.start_new_thread(copy_thread, (raw, input))
+                return output
+            except:
+                pass
  
-        thread.start_new_thread(copy_thread, (raw, input))
-        return output
+        raise cumulus.store.NotFoundError
  
      def load_segment(self, segment):
          seg = tarfile.open(segment, 'r|', self.get_segment(segment))
@@ -305,7 +344,7 @@ def parse_full(lines):
  def parse_metadata_version(s):
      """Convert a string with the snapshot version format to a tuple."""
  
-    m = re.match(r"^LBS Snapshot v(\d+(\.\d+)*)$", s)
+    m = re.match(r"^(?:Cumulus|LBS) Snapshot v(\d+(\.\d+)*)$", s)
      if m is None:
          return ()
      else:
@@ -663,7 +702,7 @@ class LocalDatabase:
          """
  
          # The expired column of the block_index table is used when generating a
-        # new LBS snapshot.  A null value indicates that an object may be
+        # new Cumulus snapshot.  A null value indicates that an object may be
          # re-used.  Otherwise, an object must be written into a new segment if
          # needed.  Objects with distinct expired values will be written into
          # distinct segments, to allow for some grouping by age.  The value 0 is