Upgrades to utility code for new formats, and a few more database tweaks.

author Michael Vrable <mvrable@cs.ucsd.edu>

Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)

committer Michael Vrable <mvrable@turin.ucsd.edu>

Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)
author Michael Vrable <mvrable@cs.ucsd.edu>
Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)
committer Michael Vrable <mvrable@turin.ucsd.edu>
Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)
diff --git a/contrib/restore.pl b/contrib/restore.pl

index ad1f594..0c7ee21 100755 (executable)
--- a/contrib/restore.pl
+++ b/contrib/restore.pl
@@ -243,7 +243,7 @@ sub process_file {
      # Restore the specified file.  How to do so depends upon the file type, so
      # dispatch based on that.
      my $dest = "$DEST_DIR/$filename";
-    if ($type eq '-') {
+    if ($type eq '-' || $type eq 'f') {
          # Regular file
          unpack_file($filename, %info);
      } elsif ($type eq 'd') {
@@ -253,11 +253,12 @@ sub process_file {
          }
      } elsif ($type eq 'l') {
          # Symlink
-        if (!defined($info{contents})) {
+        my $target = $info{target} || $info{contents};
+        if (!defined($target)) {
              die "Symlink $filename has no value specified";
          }
-        my $contents = uri_decode($info{contents});
-        symlink $contents, $dest
+        $target = uri_decode($target);
+        symlink $target, $dest
              or die "Cannot create symlink $filename: $!";
  
          # TODO: We can't properly restore all metadata for symbolic links
diff --git a/contrib/upgrade0.6-localdb.sql b/contrib/upgrade0.6-localdb.sql

index 0733ba1..dbf7789 100644 (file)
--- a/contrib/upgrade0.6-localdb.sql
+++ b/contrib/upgrade0.6-localdb.sql
@@ -4,15 +4,16 @@
  -- This script should be loaded after connecting to the database to be
  -- upgraded.
  
--- Database schema changes: the size column was added to the segments table,
--- and the segments_used table was added.  Rather than upgrade the segments
--- table in-place, we create a new table and then rename it over the old
--- segments table.
+-- Database schema changes: the size and mtime columns were added to the
+-- segments table, and the segments_used table was added.  Rather than upgrade
+-- the segments table in-place, we create a new table and then rename it over
+-- the old segments table.
  create table segments_new (
      segmentid integer primary key,
      segment text unique not null,
      path text,
      checksum text,
+    mtime real,
      size integer
  );
  
@@ -25,11 +26,12 @@ create table segments_used (
  -- Compute the size of each of the segments, if possible, based on our
  -- knowledge of the objects stored in them.
  insert into segments_new
-select segmentid, segment, path, checksum, size
+select segmentid, segment, path, checksum, mtime, size
  from
      (select segmentid, segment, path, checksum from segments)
  left join
-    (select segmentid, sum(size) as size from block_index group by segmentid)
+    (select segmentid, sum(size) as size, max(timestamp) as mtime
+     from block_index group by segmentid)
  using (segmentid);
  
  drop table segments;
@@ -50,3 +52,15 @@ using (segmentid);
  
  -- The snapshot_contents table is obsolete.
  drop table snapshot_contents;
+
+-- Upgrade database views.
+drop view cleaning_order;
+drop view segment_info;
+
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+       utilization
+from segments join
+     (select segmentid, max(utilization) as utilization
+      from segments_used group by segmentid)
+using (segmentid);
diff --git a/lbs-util b/lbs-util

index 5c12438..8789a75 100755 (executable)
--- a/lbs-util
+++ b/lbs-util
@@ -6,9 +6,9 @@ import getpass, os, stat, sys, time
  from optparse import OptionParser
  import lbs
  
-# We support up to "LBS Snapshot v0.2" formats, but are also limited by the lbs
+# We support up to "LBS Snapshot v0.6" formats, but are also limited by the lbs
  # module.
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 2))
+FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 6))
  
  def check_version(format):
      ver = lbs.parse_metadata_version(format)
@@ -138,7 +138,7 @@ def cmd_verify_snapshots(snapshots):
          print "## Root:", d['Root']
          metadata = lbs.iterate_metadata(store, d['Root'])
          for m in metadata:
-            if m.fields['type'] != '-': continue
+            if m.fields['type'] not in ('-', 'f'): continue
              print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
              verifier = lbs.ChecksumVerifier(m.fields['checksum'])
              size = 0
@@ -181,7 +181,7 @@ def cmd_restore_snapshot(args):
              if not os.path.isdir(path):
                  os.makedirs(path)
  
-            if m.items.type == '-':
+            if m.items.type in ('-', 'f'):
                  file = open(destpath, 'wb')
                  verifier = lbs.ChecksumVerifier(m.items.checksum)
                  size = 0
@@ -199,7 +199,12 @@ def cmd_restore_snapshot(args):
                  if filename != '.':
                      os.mkdir(destpath)
              elif m.items.type == 'l':
-                os.symlink(m.items.contents, destpath)
+                try:
+                    target = m.items.target
+                except:
+                    # Old (v0.2 format) name for 'target'
+                    target = m.items.contents
+                os.symlink(target, destpath)
              elif m.items.type == 'p':
                  os.mkfifo(destpath)
              elif m.items.type in ('c', 'b'):
diff --git a/lbs.py b/lbs.py

index 3f647c4..7a391c7 100644 (file)
--- a/lbs.py
+++ b/lbs.py
@@ -13,7 +13,7 @@ import os, re, sha, tarfile, tempfile, thread
  from pysqlite2 import dbapi2 as sqlite3
  
  # The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 2)         # LBS Snapshot v0.2
+FORMAT_VERSION = (0, 6)         # LBS Snapshot v0.6
  
  # Maximum number of nested indirect references allowed in a snapshot.
  MAX_RECURSION_DEPTH = 3
@@ -402,12 +402,14 @@ MetadataItem.field_types = {
      'device': MetadataItem.decode_device,
      'user': MetadataItem.decode_user,
      'group': MetadataItem.decode_user,
+    'ctime': MetadataItem.decode_int,
      'mtime': MetadataItem.decode_int,
      'links': MetadataItem.decode_int,
      'inode': MetadataItem.raw_str,
      'checksum': MetadataItem.decode_str,
      'size': MetadataItem.decode_int,
      'contents': MetadataItem.decode_str,
+    'target': MetadataItem.decode_str,
  }
  
  def iterate_metadata(object_store, root):
@@ -452,17 +454,16 @@ class LocalDatabase:
                         where snapshotid < (select max(snapshotid)
                                             from snapshots)""")
  
-        # Delete entries in the snapshot_contents table which are for
-        # non-existent snapshots.
-        cur.execute("""delete from snapshot_contents
+        # Delete entries in the segments_used table which are for non-existent
+        # snapshots.
+        cur.execute("""delete from segments_used
                         where snapshotid not in
                             (select snapshotid from snapshots)""")
  
          # Find segments which contain no objects used by any current snapshots,
          # and delete them from the segment table.
          cur.execute("""delete from segments where segmentid not in
-                           (select distinct segmentid from snapshot_contents
-                                natural join block_index)""")
+                           (select segmentid from segments_used)""")
  
          # Finally, delete objects contained in non-existent segments.  We can't
          # simply delete unused objects, since we use the set of unused objects
@@ -574,18 +575,10 @@ class LocalDatabase:
  
          cur = self.cursor()
  
-        # First step: Mark all unused-and-expired objects with expired = -1,
-        # which will cause us to mostly ignore these objects when rebalancing.
-        # At the end, we will set these objects to be in group expired = 0.
-        # Mark expired objects which still seem to be in use with expired = 0;
-        # these objects will later have values set to indicate groupings of
-        # objects when repacking.
-        cur.execute("""update block_index set expired = -1
-                       where expired is not null""")
-
+        # Mark all expired objects with expired = 0; these objects will later
+        # have values set to indicate groupings of objects when repacking.
          cur.execute("""update block_index set expired = 0
-                       where expired is not null and blockid in
-                           (select blockid from snapshot_contents)""")
+                       where expired is not null""")
  
          # We will want to aim for at least one full segment for each bucket
          # that we eventually create, but don't know how many bytes that should
@@ -595,7 +588,7 @@ class LocalDatabase:
          # segments, but for now don't worry too much about that.)  If we can't
          # compute an average, it's probably because there are no expired
          # segments, so we have no more work to do.
-        cur.execute("""select avg(size) from segment_info
+        cur.execute("""select avg(size) from segments
                         where segmentid in
                             (select distinct segmentid from block_index
                              where expired is not null)""")
@@ -678,6 +671,5 @@ class LocalDatabase:
          cutoffs.reverse()
          for i in range(len(cutoffs)):
              cur.execute("""update block_index set expired = ?
-                           where round(? - timestamp) > ? and expired >= 0""",
+                           where round(? - timestamp) > ?""",
                          (i, now, cutoffs[i]))
-        cur.execute("update block_index set expired = 0 where expired = -1")
diff --git a/schema.sql b/schema.sql

index a0556c5..e0f16a6 100644 (file)
--- a/schema.sql
+++ b/schema.sql
@@ -17,6 +17,7 @@ create table segments (
      segment text unique not null,
      path text,
      checksum text,
+    mtime real,
      size integer
  );
  
@@ -39,3 +40,12 @@ create table segments_used (
      segmentid integer not null,
      utilization real
  );
+
+-- Overall estimate of segment utilization, for all snapshots combined.
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+       utilization
+from segments join
+     (select segmentid, max(utilization) as utilization
+      from segments_used group by segmentid)
+using (segmentid);
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)
committer	Michael Vrable <mvrable@turin.ucsd.edu>
	Fri, 7 Dec 2007 03:16:57 +0000 (19:16 -0800)
contrib/restore.pl		patch \| blob \| history
contrib/upgrade0.6-localdb.sql		patch \| blob \| history
lbs-util		patch \| blob \| history
lbs.py		patch \| blob \| history
schema.sql		patch \| blob \| history