1 # Cumulus: Smart Filesystem Backup to Dumb Servers
3 # Copyright (C) 2012 Google Inc.
4 # Written by Michael Vrable <mvrable@cs.ucsd.edu>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 """Backup retention policies.
22 Retention policies control how long different backup snapshots should be kept,
23 for example keeping daily snapshots for short periods of time but retaining
24 weekly snapshots going back further in time.
30 TIMESTAMP_FORMAT = "%Y%m%dT%H%M%S"
32 # Different classes of backups--such as "daily" or "monthly"--can have
33 # different retention periods applied. A single backup snapshot might belong
34 # to multiple classes (i.e., perhaps be both a "daily" and a "monthly", though
37 # Backups are classified using partitioning functions, defined below. For a
38 # "monthly" backup classifier, all backups for a given month should map to the
39 # same partition. Then, we apply the class label to the earliest snapshot in
40 # each partition--so the set of "monthly" backups would consist of all backups
41 # which were the first to run after the start of a month.
43 # A partitioning function must take a datetime instance as input and return a
44 # partition representative as output; timestamps that should be part of the
45 # same partition should map to equal partition representatives. For a
46 # "monthly" classifier, an easy way to do this is to truncate the timestamp to
47 # keep only the month and year, and in general truncating timestamps works
48 # well, but the values are not used in any other way than equality testing so
49 # any type is allowed.
51 # _backup_classes is a registry of useful backup types; it maps a descriptive
52 # name to a partition function which implements it.
55 def add_backup_class(name, partioning_function):
56 """Registers a new class of backups for which policies can be applied.
58 The new class will be available as name to RetentionEngine.add_policy.
59 partioning_function should be a function for grouping together backups in
62 Predefined backups classes are: "yearly", "monthly", "weekly", "daily", and
65 _backup_classes[name] = partioning_function
67 add_backup_class("yearly", lambda t: t.date().replace(day=1, month=1))
68 add_backup_class("monthly", lambda t: t.date().replace(day=1))
69 add_backup_class("weekly", lambda t: t.isocalendar()[0:2])
70 add_backup_class("daily", lambda t: t.date())
71 add_backup_class("all", lambda t: t)
74 class RetentionEngine(object):
75 """Class for applying a retention policy to a set of snapshots.
77 Allows a retention policy to be set, then matches a sequence of backup
78 snapshots to the policy to decide which ones should be kept.
84 self._last_snapshots = {}
85 self._now = datetime.datetime.utcnow()
87 def set_utc(self, use_utc=True):
88 """Perform policy matching with timestamps in UTC.
90 By default, the policy converts timestamps to local time, but calling
91 set_utc(True) will select snapshots based on UTC timestamps.
93 self._convert_to_localtime = not use_utc
95 def set_now(self, timestamp):
96 """Sets the "current time" for the purposes of snapshot expiration.
98 timestamp should be a datetime object, expressed in UTC. If set_now()
99 is not called, the current time defaults to the time at which the
100 RetentionEngine object was instantiated.
102 self._now = timestamp
104 def add_policy(self, backup_class, retention_period):
105 self._policies[backup_class] = retention_period
106 self._last_snapshots[backup_class] = (None, None)
109 def parse_timestamp(s):
110 return datetime.datetime.strptime(s, TIMESTAMP_FORMAT)
112 def consider_snapshot(self, snapshot):
113 """Compute whether a given snapshot should be expired.
115 Successive calls to consider_snapshot() must be for snapshots in
116 chronological order. For each call, consider_snapshot() will return a
117 boolean indicating whether the snapshot should be retained (True) or
120 timestamp_utc = self.parse_timestamp(snapshot)
121 snapshot_age = self._now - timestamp_utc
123 # timestamp_policy is the timestamp in the format that will be used for
124 # doing policy matching: either in the local timezone or UTC, depending
125 # on the setting of set_utc().
126 if self._convert_to_localtime:
127 unixtime = calendar.timegm(timestamp_utc.timetuple())
128 timestamp_policy = datetime.datetime.fromtimestamp(unixtime)
130 timestamp_policy = timestamp_utc
134 for (backup_class, retention_period) in self._policies.iteritems():
135 partition = _backup_classes[backup_class](timestamp_policy)
136 last_snapshot = self._last_snapshots[backup_class]
137 if self._last_snapshots[backup_class][0] != partition:
138 self._last_snapshots[backup_class] = (partition, snapshot)
139 self._labels.add(backup_class)
140 if snapshot_age < retention_period: retain = True
143 def last_labels(self):
144 """Return the set of policies that applied to the last snapshot.
146 This will fail if consider_snapshot has not yet been called.
150 def last_snapshots(self):
151 """Returns the most recent snapshot in each backup class."""
152 return dict((k, v[1]) for (k, v) in self._last_snapshots.iteritems())