1 /* Cumulus: Smart Filesystem Backup to Dumb Servers
3 * Copyright (C) 2012 Google Inc.
4 * Written by Michael Vrable <vrable@cs.hmc.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 /* Include/exclude processing for selecting files to be backed up: mechanisms
22 * for matching filenames against patterns and constructing lists of
23 * include/exclude rules. */
25 #ifndef _CUMULUS_EXCLUDE_H
26 #define _CUMULUS_EXCLUDE_H
28 #include <sys/types.h>
34 /* Base class for objects which should not have implicit copy constructors and
35 * assignment operators. */
40 noncopyable(const noncopyable&);
41 const noncopyable& operator=(const noncopyable&);
44 /* A pattern which can be matched against file paths while scanning the file
45 * system for backups. The pattern language is described in doc/exclude.rst.
47 class FilePattern : public noncopyable {
49 /* Constructs a FilePattern which the specified pattern. If patterns are
50 * loaded from a per-directory merge file, basedir should be the path to
51 * the directory where the patterns were loaded (and the pattern will only
52 * match files in or below that directory). basedir should be empty for a
53 * pattern matching starting at the root. */
54 FilePattern(const std::string& pattern, const std::string& basedir);
58 /* Reference counting for freeing FilePattern objects. Newly created
59 * objects have a reference count of 1. If the reference count drops to
60 * zero via unref(), the object is deleted. */
61 void ref() { refcount++; }
62 void unref() { if (--refcount == 0) delete this; }
64 /* Returns the original pattern used to construct the FilePattern object;
65 * this is intended primarily for logging/debugging. */
66 const std::string& pattern() { return orig_pattern; }
68 /* Does this pattern match the specified file? Paths should be specified
69 * without any leading slash. A trailing slash should be included in the
70 * path when the object is a directory, to indicate this (so that
71 * directory-only rules can be processed properly). */
72 bool matches(const std::string& path) const;
75 /* Compiles a Cumulus pattern to a regular expression. This is used for
76 * the underlying matching implementation. */
77 static std::string pattern_to_regex(const std::string& pattern,
78 const std::string& basedir);
80 /* Quotes any special characters in the input to produce a regular
81 * expression matching the literal string pattern. */
82 static std::string regex_quote(const std::string& pattern);
84 int refcount; // Reference count for object lifetime management.
85 std::string orig_pattern; // Original pattern, returned by pattern()
87 bool valid; // True if regex is valid and initialized
88 regex_t regex; // The pattern, converted to a compiled regular expression
91 /* A PathFilterList represents a collection of rules for selecting files to be
92 * included or excluded from a backup. Patterns can be appended to the list,
93 * and PathFilterList also supports updating the list via per-directory rule
95 class PathFilterList : public noncopyable {
100 /* Possible pattern types, as described in doc/exclude.rst. */
101 enum PatternType { INCLUDE, EXCLUDE, DIRMERGE };
103 /* During the backup, a call to save() will store a snapshot of the current
104 * rule set. After any modifications to the filter list, a call to
105 * restore() will change the rules back to those from the time of the
106 * snapshot. Calls to save() and restore() can be nested; the saved
107 * snapshots act as a stack. */
111 /* Append a new pattern to the end of the list of rules. */
112 void add_pattern(PatternType type, const std::string& pattern,
113 const std::string& basedir);
115 /* Should a specified file be included in the backup, according to the
116 * current rules? The first matching rule applies; if no rule matches the
117 * default is to include the file. is_directory is a boolean indicating
118 * whether the path specifies a directory (so that directory-only rules can
119 * be matched properly. */
120 bool is_included(const std::string& path, bool is_directory) const;
122 /* Does the given file match a dir-merge rule in the current rule set? */
123 bool is_mergefile(const std::string& path) const;
125 /* Updates the current rule set from the contents of a per-directory merge
126 * file. If is_mergefile returns true, then call merge_patterns specifying
127 * the path to the merge file once again, the base directory containing the
128 * merge file (which is the starting point for matching the new rules), and
129 * the contents of the merge file as an in-memory string. */
130 void merge_patterns(const std::string& path, const std::string& basedir,
131 const std::string& contents);
134 /* A particular set of rules is stored simply as an ordered list of (rule
135 * type, pattern) tuples. Lifetime of the FilePattern objects is managed
136 * with reference counts. */
137 typedef std::list<std::pair<PatternType, FilePattern *> > PatternList;
139 /* A stack of patterns, for handling save()/restore() calls. The current
140 * set of rules appears at the head of the list. As an optimization to
141 * better support save()/restore() calls without any modification to the
142 * rules, the stack uses run-length encoding: each item on the stack
143 * consists of a set of rules and a count of how many times those rules
144 * have been pushed. */
145 std::list<std::pair<int, PatternList *> > pattern_stack;
147 /* Parses rules (such as those in a per-directory merge file) and returns a
148 * PatternList. basedir should be the directory where the files were
149 * parsed from (all rules will be matched relative to this directory), and
150 * the contents of the rules file should be read in and passed as rules. */
151 static PatternList *parse_rules(const std::string& basedir,
152 const std::string& rules);
154 /* Returns the current set of rules (from the head of pattern_stack). */
155 const PatternList &patterns() const {
156 return *pattern_stack.front().second;
159 /* Returns a copy of the current rules, suitable for modification. If the
160 * current head of pattern_stack has a repetition greater than 1, an
161 * unshared copy of the current rule set is created. */
162 PatternList *mutable_patterns();
165 #endif // _CUMULUS_EXCLUDE_H