1 /* Cumulus: Smart Filesystem Backup to Dumb Servers
3 * Copyright (C) 2012 Google Inc.
4 * Written by Michael Vrable <vrable@cs.hmc.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 /* Implementation of Cumulus include/exclude rules for selecting files to be
27 #include <sys/types.h>
38 FilePattern::FilePattern(const string& pattern, const string& basedir)
39 : refcount(1), orig_pattern(pattern), valid(false)
41 string pattern_as_regex = pattern_to_regex(pattern, basedir);
42 int status = regcomp(®ex, pattern_as_regex.c_str(),
43 REG_EXTENDED|REG_NOSUB);
46 regerror(status, ®ex, errbuf, sizeof(errbuf));
48 "Pattern %s: failed to compile as regular expression %s: %s\n",
49 pattern.c_str(), pattern_as_regex.c_str(), errbuf);
56 FilePattern::~FilePattern()
62 bool FilePattern::matches(const std::string& path) const
67 return regexec(®ex, path.c_str(), 0, NULL, 0) == 0;
70 string FilePattern::pattern_to_regex(const string& pattern,
71 const string& basedir)
73 /* Matches are always anchored to cover the entire string; we insert
74 * wildcards where needed if we only need to match a suffix of the path. */
77 size_t len = pattern.size();
79 /* Special case: an empty pattern matches all files. */
83 /* For a non-empty basedir, the match must ensure that the file actually
84 * falls within basedir. */
85 if (!basedir.empty() && basedir != ".") {
86 result += regex_quote(basedir) + "/";
89 /* A leading slash indicates a pattern that must match the entire path. If
90 * there is no leading slash, match any number of leading directory
92 if (pattern[0] == '/') {
100 /* Characters that must be quoted in a regular expression that are not
101 * otherwise part of the Cumulus pattern language. */
115 result += pattern[i];
119 /* Any character except a directory separator. */
124 if (i + 1 < len && pattern[i + 1] == '*') {
125 /* Any number of characters, including slashes. */
129 /* Zero or more characters (but no directory separators). */
135 /* A character matched literally that does not require quoting. */
136 result += pattern[i];
142 /* A trailing slash should match only a directory. No trailing slash means
143 * match any type of file. Callers should include a slash at the end of a
144 * path that is a directory; if there is no trailing slash in the pattern
145 * match either a trailing slash or none. */
146 if (pattern[len - 1] != '/') {
155 string FilePattern::regex_quote(const string& pattern)
158 for (size_t i = 0; i < pattern.length(); i++) {
159 switch (pattern[i]) {
160 /* Characters that must be quoted in a regular expression. */
179 result += pattern[i];
186 PathFilterList::PathFilterList()
188 /* Invariant: pattern_stack is always non-empty (except when the destructor
189 * runs). Thus, reading pattern_stack.front() is always safe. */
190 pattern_stack.push_back(make_pair(1, new PatternList));
193 PathFilterList::~PathFilterList()
195 /* Pops all items off the saved rule stack. As an optimization, rather
196 * than repeatedly popping items which have a repeat count, just set the
197 * repeat count to one. */
198 while (!pattern_stack.empty()) {
199 pattern_stack.front().first = 1;
204 /* save() operates lazily: simply increment the repeat count on the rule set at
205 * the head of the list. If modifications are made, mutable_patterns() will
206 * create a copy of the rules. */
207 void PathFilterList::save()
209 pattern_stack.front().first++;
212 void PathFilterList::restore()
214 if (--pattern_stack.front().first == 0) {
215 PatternList *old_patterns = pattern_stack.front().second;
216 pattern_stack.pop_front();
217 for (PatternList::iterator i = old_patterns->begin();
218 i != old_patterns->end(); ++i) {
225 void PathFilterList::add_pattern(PatternType type, const string& pattern,
226 const string& basedir)
228 FilePattern *pat = new FilePattern(pattern, basedir);
229 mutable_patterns()->push_back(make_pair(type, pat));
232 bool PathFilterList::is_included(const std::string& path,
233 bool is_directory) const
237 full_path = path + "/";
242 PatternList::const_iterator i;
243 for (i = patterns().begin(); i != patterns().end(); ++i) {
244 if (i->second->matches(full_path)) {
251 /* Merge rules are ignored for the purposes of selecting
252 * whether a file is included or not. */
258 /* Default is include if no rule matches. */
262 bool PathFilterList::is_mergefile(const std::string& path) const
264 PatternList::const_iterator i;
265 for (i = patterns().begin(); i != patterns().end(); ++i) {
266 if (i->first == DIRMERGE && i->second->matches(path))
272 /* Parses the specified contents of a per-directory rule merge file. The rules
273 * are first parsed into a temporary PatternList, which is then spliced into
274 * the rule set just before the DIRMERGE rule. Thus, if a dir-merge rule
275 * matches multiple times (in successive sub-directories), deeper rules take
276 * precedence over earlier rules. */
277 void PathFilterList::merge_patterns(const string& path,
278 const string& basedir,
279 const string& contents)
281 PatternList *rules = mutable_patterns();
282 PatternList::iterator i;
283 for (PatternList::iterator i = rules->begin(); i != rules->end(); ++i) {
284 /* Try to locate where the rules should be inserted by looking for the
285 * DIRMERGE rule which matches the path to the rule file. */
286 if (i->first == DIRMERGE && i->second->matches(path)) {
287 PatternList *new_rules = parse_rules(basedir, contents);
288 rules->splice(i, *new_rules);
295 PathFilterList::PatternList *PathFilterList::parse_rules(const string& basedir,
298 PatternList *patterns = new PatternList;
299 std::stringstream rules(data, std::stringstream::in);
300 while (!rules.eof()) {
302 std::getline(rules, rule);
303 /* Ignore blank lines and lines starting with "#". */
304 if (rule.empty() || rule[0] == '#')
306 if (rule.length() > 2 && rule[1] == ' ') {
307 if (rule[0] == '+' || rule[0] == '-' || rule[0] == ':') {
308 FilePattern *pat = new FilePattern(rule.substr(2), basedir);
311 patterns->push_back(make_pair(INCLUDE, pat));
314 patterns->push_back(make_pair(EXCLUDE, pat));
317 patterns->push_back(make_pair(DIRMERGE, pat));
324 fprintf(stderr, "Invalid rule: %s\n", rule.c_str());
330 PathFilterList::PatternList *PathFilterList::mutable_patterns()
332 PatternList *old_list = pattern_stack.front().second;
333 if (pattern_stack.front().first == 1)
336 PatternList *new_list = new PatternList;
337 for (PatternList::iterator i = old_list->begin();
338 i != old_list->end(); ++i) {
340 new_list->push_back(*i);
342 pattern_stack.front().first--;
343 pattern_stack.push_front(make_pair(1, new_list));
348 /*****************************************************************************
349 * Unit tests for pattern matching. These are not compiled in by default, but
350 * exclude.cc can be compiled to a standalone binary with -DRUN_TESTS to run
352 *****************************************************************************/
355 /* Tests of pattern matching rules. test_pattern takes a pattern, a base
356 * directory, and a path to match, and prints out whether the rule matches.
357 * expect_match is the expected result; if this doesn't equal the actual result
358 * print a warning message. */
359 static void test_pattern(const string& pattern, const string& basedir,
360 const string& path, bool expect_match)
362 FilePattern pat(pattern, basedir);
363 bool result = pat.matches(path);
364 printf("%3s %c %c %-30s %-30s\n",
365 result == expect_match ? "" : "ERR",
367 expect_match ? '+' : '-',
372 int main(int argc, char *argv[])
374 printf("Act/Exp Pattern Path\n");
375 test_pattern("*.o", "", "a/b/c.txt", false);
376 test_pattern("*.o", "", "a/b/c.o", true);
377 test_pattern("*.git/", "", "repo/project.git/", true);
378 test_pattern("/.cache", "", ".cache", true);
379 test_pattern("/.cache", "", "home/user/.cache", false);
380 test_pattern("/*/.cache", "", "home/user/.cache", false);
381 test_pattern("/*/*/.cache", "", "home/user/.cache", true);
382 test_pattern("/**/.cache", "", "home/user/.cache", true);
383 test_pattern(".cache", "", "home/user/.cache", true);
384 test_pattern("?.o", "", "home/user/a.o", true);
385 test_pattern("?.o", "", "home/user/a/o", false);
386 test_pattern("*.o", "", "a/b/\n.o", true);
387 test_pattern("/**/.cache", "", "home/new\nline/.cache", true);
388 test_pattern("/*/.cache", "home", "home/user/.cache", true);
389 test_pattern(".cache", "home", "home/user/.cache", true);
390 test_pattern("user/.cache", "home", "home/user/.cache", true);
391 test_pattern("user/.cache", "home/user", "home/user/.cache", false);
394 pfl.add_pattern(PathFilterList::DIRMERGE, ".cumulus-filter", "");
396 pfl.merge_patterns("dir/.cumulus-filter", "dir",