1 /* Cumulus: Efficient Filesystem Backup to the Cloud
2 * Copyright (C) 2012 The Cumulus Developers
3 * See the AUTHORS file for a list of contributors.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 /* Include/exclude processing for selecting files to be backed up: mechanisms
21 * for matching filenames against patterns and constructing lists of
22 * include/exclude rules. */
24 #ifndef _CUMULUS_EXCLUDE_H
25 #define _CUMULUS_EXCLUDE_H
27 #include <sys/types.h>
33 /* Base class for objects which should not have implicit copy constructors and
34 * assignment operators. */
39 noncopyable(const noncopyable&);
40 const noncopyable& operator=(const noncopyable&);
43 /* A pattern which can be matched against file paths while scanning the file
44 * system for backups. The pattern language is described in doc/exclude.rst.
46 class FilePattern : public noncopyable {
48 /* Constructs a FilePattern which the specified pattern. If patterns are
49 * loaded from a per-directory merge file, basedir should be the path to
50 * the directory where the patterns were loaded (and the pattern will only
51 * match files in or below that directory). basedir should be empty for a
52 * pattern matching starting at the root. */
53 FilePattern(const std::string& pattern, const std::string& basedir);
57 /* Reference counting for freeing FilePattern objects. Newly created
58 * objects have a reference count of 1. If the reference count drops to
59 * zero via unref(), the object is deleted. */
60 void ref() { refcount++; }
61 void unref() { if (--refcount == 0) delete this; }
63 /* Returns the original pattern used to construct the FilePattern object;
64 * this is intended primarily for logging/debugging. */
65 const std::string& pattern() { return orig_pattern; }
67 /* Does this pattern match the specified file? Paths should be specified
68 * without any leading slash. A trailing slash should be included in the
69 * path when the object is a directory, to indicate this (so that
70 * directory-only rules can be processed properly). */
71 bool matches(const std::string& path) const;
74 /* Compiles a Cumulus pattern to a regular expression. This is used for
75 * the underlying matching implementation. */
76 static std::string pattern_to_regex(const std::string& pattern,
77 const std::string& basedir);
79 /* Quotes any special characters in the input to produce a regular
80 * expression matching the literal string pattern. */
81 static std::string regex_quote(const std::string& pattern);
83 int refcount; // Reference count for object lifetime management.
84 std::string orig_pattern; // Original pattern, returned by pattern()
86 bool valid; // True if regex is valid and initialized
87 regex_t regex; // The pattern, converted to a compiled regular expression
90 /* A PathFilterList represents a collection of rules for selecting files to be
91 * included or excluded from a backup. Patterns can be appended to the list,
92 * and PathFilterList also supports updating the list via per-directory rule
94 class PathFilterList : public noncopyable {
99 /* Possible pattern types, as described in doc/exclude.rst. */
100 enum PatternType { INCLUDE, EXCLUDE, DIRMERGE };
102 /* During the backup, a call to save() will store a snapshot of the current
103 * rule set. After any modifications to the filter list, a call to
104 * restore() will change the rules back to those from the time of the
105 * snapshot. Calls to save() and restore() can be nested; the saved
106 * snapshots act as a stack. */
110 /* Append a new pattern to the end of the list of rules. */
111 void add_pattern(PatternType type, const std::string& pattern,
112 const std::string& basedir);
114 /* Should a specified file be included in the backup, according to the
115 * current rules? The first matching rule applies; if no rule matches the
116 * default is to include the file. is_directory is a boolean indicating
117 * whether the path specifies a directory (so that directory-only rules can
118 * be matched properly. */
119 bool is_included(const std::string& path, bool is_directory) const;
121 /* Does the given file match a dir-merge rule in the current rule set? */
122 bool is_mergefile(const std::string& path) const;
124 /* Updates the current rule set from the contents of a per-directory merge
125 * file. If is_mergefile returns true, then call merge_patterns specifying
126 * the path to the merge file once again, the base directory containing the
127 * merge file (which is the starting point for matching the new rules), and
128 * the contents of the merge file as an in-memory string. */
129 void merge_patterns(const std::string& path, const std::string& basedir,
130 const std::string& contents);
133 /* A particular set of rules is stored simply as an ordered list of (rule
134 * type, pattern) tuples. Lifetime of the FilePattern objects is managed
135 * with reference counts. */
136 typedef std::list<std::pair<PatternType, FilePattern *> > PatternList;
138 /* A stack of patterns, for handling save()/restore() calls. The current
139 * set of rules appears at the head of the list. As an optimization to
140 * better support save()/restore() calls without any modification to the
141 * rules, the stack uses run-length encoding: each item on the stack
142 * consists of a set of rules and a count of how many times those rules
143 * have been pushed. */
144 std::list<std::pair<int, PatternList *> > pattern_stack;
146 /* Parses rules (such as those in a per-directory merge file) and returns a
147 * PatternList. basedir should be the directory where the files were
148 * parsed from (all rules will be matched relative to this directory), and
149 * the contents of the rules file should be read in and passed as rules. */
150 static PatternList *parse_rules(const std::string& basedir,
151 const std::string& rules);
153 /* Returns the current set of rules (from the head of pattern_stack). */
154 const PatternList &patterns() const {
155 return *pattern_stack.front().second;
158 /* Returns a copy of the current rules, suitable for modification. If the
159 * current head of pattern_stack has a repetition greater than 1, an
160 * unshared copy of the current rule set is created. */
161 PatternList *mutable_patterns();
164 #endif // _CUMULUS_EXCLUDE_H