immediately read a file containing additional rules and insert those
in the current ruleset **(not yet implemented)**
+and one special rule type:
+
+cachedir-check (``*CACHEDIR_CHECK``)
+ see "Cache Directory Tags" later; this specifies a check against the
+ Cache Directory Tagging Specification should be performed at this
+ priority in the rules
+
Patterns found in the rules are interpreted as follows:
- Most characters are treated literally and must match exactly.
"``+ /file.txt``" would match ``file.txt`` in the same directory as the
merge file, not at the root.
+Cache Directory Tags
+--------------------
+
+Cumulus supports excluding directory trees marked as cache directories
+according to the Cache Directory Tagging Specification
+(https://bford.info/cachedir/).
+
+This functionality is optional; it is enabled by including a "cache
+directory check" rule along with other filter rules. This rule will
+exclude all files contained within a directory (including in a
+subdirectory) that contains a valid ``CACHEDIR.TAG`` file. The
+directory itself, as well as the ``CACHEDIR.TAG`` file, will not
+themselves be excluded, making it more obvious in the backup that a
+cache directory was found (even though contents weren't dumped).
+
+Normal precedence rules apply; if the cache directory check rule is
+listed after an include rule which also matches files, the earlier
+include rule will take priority (this can be used to force certain cache
+directories to be included, if desired).
+
Example
-------
#include <string>
#include "exclude.h"
+#include "util.h"
using std::make_pair;
using std::pair;
using std::string;
+const char CACHEDIR_TAG_FILE[] = "CACHEDIR.TAG";
+
FilePattern::FilePattern(const string& pattern, const string& basedir)
: refcount(1), orig_pattern(pattern), valid(false)
{
case EXCLUDE:
return false;
case DIRMERGE:
- /* Merge rules are ignored for the purposes of selecting
- * whether a file is included or not. */
+ case CACHEDIR_CHECK:
+ /* Merge rules and markers for a cache directory check are
+ * ignored for the purposes of selecting whether a file is
+ * included or not. */
continue;
}
}
}
}
+void PathFilterList::activate_cachedir(const string& basedir)
+{
+ /* Splice the above patterns in immediately following the cache directory
+ * check rule (if it exists). */
+ PatternList *rules = mutable_patterns();
+ PatternList::iterator i;
+ for (PatternList::iterator i = rules->begin(); i != rules->end(); ++i) {
+ if (i->first == CACHEDIR_CHECK) {
+ /* Insert new rules (temporarily) for within the cache directory:
+ * do still include the CACHEDIR.TAG file itself, but otherwise
+ * skip all files under this subtree. */
+ string cachedir_rules = string_printf("+ /%s\n- **\n",
+ CACHEDIR_TAG_FILE);
+ PatternList *new_rules = parse_rules(basedir, cachedir_rules);
+ rules->splice(i, *new_rules);
+ delete new_rules;
+ break;
+ }
+ }
+}
+
+/* Updates the rules */
+
PathFilterList::PatternList *PathFilterList::parse_rules(const string& basedir,
const string& data)
{
/* Ignore blank lines and lines starting with "#". */
if (rule.empty() || rule[0] == '#')
continue;
+ if (rule == "*CACHEDIR_CHECK") {
+ patterns->push_back(make_pair(CACHEDIR_CHECK,
+ new FilePattern("", basedir)));
+ continue;
+ }
if (rule.length() > 2 && rule[1] == ' ') {
if (rule[0] == '+' || rule[0] == '-' || rule[0] == ':') {
FilePattern *pat = new FilePattern(rule.substr(2), basedir);
#include <map>
#include <string>
+extern const char CACHEDIR_TAG_FILE[];
+
/* Base class for objects which should not have implicit copy constructors and
* assignment operators. */
class noncopyable {
~PathFilterList();
/* Possible pattern types, as described in doc/exclude.rst. */
- enum PatternType { INCLUDE, EXCLUDE, DIRMERGE };
+ enum PatternType { INCLUDE, EXCLUDE, DIRMERGE, CACHEDIR_CHECK };
/* During the backup, a call to save() will store a snapshot of the current
* rule set. After any modifications to the filter list, a call to
void merge_patterns(const std::string& path, const std::string& basedir,
const std::string& contents);
+ /* Updates the current rule set to indicate that a cache directory has been
+ * detected and files within basedir should be skipped. These rules are
+ * inserted at the location of the CACHEDIR_CHECK rule; if there is no
+ * CACHEDIR_CHECK then this call makes no modifications. */
+ void activate_cachedir(const std::string& basedir);
+
private:
/* A particular set of rules is stored simply as an ordered list of (rule
* type, pattern) tuples. Lifetime of the FilePattern objects is managed
string(block_buf, bytes));
}
+/* Tests whether the specified file identifies a directory as a cache
+ * directory, by the rules of the Cache Directory Tagging Specification
+ * (https://bford.info/cachedir/).
+ *
+ * Conditions that must be met:
+ * 1. File is named "CACHEDIR.TAG".
+ * 2. File is a regular file (not a symlink or other type).
+ * 3. First bytes of the file must be identical to CACHEDIR_SIGNATURE.
+ * Remaining bytes of the file are ignored and can have any contents.
+ *
+ * It is up to the caller to check the file name; other conditions are checked
+ * in this function. */
+bool is_cachedir_tag_file(const string& path) {
+ struct stat stat_buf;
+ if (lstat(path.c_str(), &stat_buf) < 0) {
+ return false;
+ }
+ if ((stat_buf.st_mode & S_IFMT) != S_IFREG) {
+ return false;
+ }
+
+ static const char CACHEDIR_SIGNATURE[]
+ = "Signature: 8a477f597d28d172789f06886806bc55";
+ ssize_t CACHEDIR_SIGLEN = strlen(CACHEDIR_SIGNATURE);
+
+ int fd = safe_open(path, NULL);
+ if (fd < 0)
+ return false;
+
+ ssize_t bytes = file_read(fd, block_buf, CACHEDIR_SIGLEN);
+ close(fd);
+ if (bytes != CACHEDIR_SIGLEN) {
+ return false;
+ }
+
+ if (memcmp(block_buf, CACHEDIR_SIGNATURE, CACHEDIR_SIGLEN) != 0) {
+ return false;
+ }
+
+ return true;
+}
+
void scanfile(const string& path)
{
int fd = -1;
}
try_merge_filter(filename, output_path);
}
+ if (*i == CACHEDIR_TAG_FILE && is_cachedir_tag_file(filename)) {
+ if (verbose) {
+ printf("Cache directory found at %s\n",
+ output_path.c_str());
+ }
+ filter_rules.activate_cachedir(output_path);
+ }
}
/* Second pass: recursively scan all items in the directory for backup;
" --exclude=PATTERN exclude files matching PATTERN from snapshot\n"
" --include=PATTERN include files matching PATTERN in snapshot\n"
" --dir-merge=PATTERN parse files matching PATTERN to read additional\n"
+ " --cachedir-check insert a CACHEDIR.TAG check in filter rules\n"
" subtree-specific include/exclude rules during backup\n"
" --localdb=PATH local backup metadata is stored in PATH\n"
" --tmpdir=PATH path for temporarily storing backup files\n"
{"include", 1, 0, 0}, // 11
{"exclude", 1, 0, 0}, // 12
{"dir-merge", 1, 0, 0}, // 13
+ {"cachedir-check", 0, 0, 0}, // 14
// Aliases for short options
{"verbose", 0, 0, 'v'},
{NULL, 0, 0, 0},
case 13: // --dir-merge
filter_rules.add_pattern(PathFilterList::DIRMERGE, optarg, "");
break;
+ case 14: // --cachedir-check
+ filter_rules.add_pattern(PathFilterList::CACHEDIR_CHECK, "",
+ "");
+ break;
default:
fprintf(stderr, "Unhandled long option!\n");
return 1;