-/* Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2006-2008 The Regents of the University of California
- * Written by Michael Vrable <mvrable@cs.ucsd.edu>
+/* Cumulus: Efficient Filesystem Backup to the Cloud
+ * Copyright (C) 2006-2008 The Cumulus Developers
+ * See the AUTHORS file for a list of contributors.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include <iostream>
#include <sstream>
+#include "cumulus.h"
#include "localdb.h"
#include "remote.h"
#include "ref.h"
char padding[12];
};
+class FileFilter {
+public:
+ // It is valid for program to be NULL or empty; if so, no filtering is
+ // done.
+ static FileFilter *New(int fd, const char *program);
+
+ // Wait for the filter process to terminate.
+ int wait();
+
+ // Accessors for the file descriptors.
+ int get_raw_fd() const { return fd_raw; }
+ int get_wrapped_fd() const { return fd_wrapped; }
+
+private:
+ FileFilter(int raw, int wrapped, pid_t pid);
+
+ // Launch a process to filter data written to a file descriptor. fd_out is
+ // the file descriptor where the filtered data should be written. program
+ // is the filter program to execute (a single string which will be
+ // interpreted by /bin/sh). The return value is a file descriptor to which
+ // the data to be filtered should be written. The process ID of the filter
+ // process is stored at address filter_pid if non-NULL.
+ static int spawn_filter(int fd_out, const char *program, pid_t *filter_pid);
+
+ // The original file descriptor passed when creating the FileFilter object.
+ int fd_raw;
+
+ // The wrapped file descriptor: writes here are piped through the filter
+ // program.
+ int fd_wrapped;
+
+ // The filter process if one was launched, or -1 if there is no filter
+ // program.
+ pid_t pid;
+};
+
/* A simple wrapper around a single TAR file to represent a segment. Objects
* may only be written out all at once, since the tar header must be written
* first; incremental writing is not supported. */
std::string segment_name;
RemoteFile *file;
-
- /* Filter support. */
- int real_fd, filter_fd;
- pid_t filter_pid;
+ scoped_ptr<FileFilter> filter;
// Write data to the tar file
void tar_write(const char *data, size_t size);
// used to control object placement; objects with different group
// parameters are kept in separate segments.
ObjectReference write_object(const char *data, size_t len,
- const std::string &group = "");
+ const std::string &group = "",
+ const std::string &checksum = "",
+ double age = 0.0);
// Ensure all segments have been fully written.
void sync();
std::string group;
std::string name; // UUID
int count; // Objects written to this segment
- int size; // Combined size of objects written
+ int data_size; // Combined size of objects written
std::string basename; // Name of segment without directory
RemoteFile *rf;
};
// Data in an object must be written all at once, and cannot be generated
// incrementally. Data can be an arbitrary block of binary data of any
// size. The pointer to the data need only remain valid until write() is
- // called.
- void set_data(const char *d, size_t len) { data = d; data_len = len; }
+ // called. If checksum is non-NULL then it is assumed to contain a hash
+ // value for the data; this provides an optimization in case the caller has
+ // already checksummed the data. Otherwise the set_data will compute a
+ // hash of the data itself.
+ void set_data(const char *d, size_t len, const char *checksum);
+
+ // Explicitly sets the age of the data, for later garbage-collection or
+ // repacking purposes. If not set, the age defaults to the current time.
+ // The age is stored in the database as a floating point value, expressing
+ // the time in Julian days.
+ void set_age(double age) { this->age = age; }
// Write an object to a segment, thus making it permanent. This function
// can be called at most once.
void write(TarSegmentStore *store);
- // Compute the checksum of an object, and include it in the object
- // reference. This should be called after write(), and the data specified
- // by set_data() must remain valid through the call to checksum().
- void checksum();
-
// An object is assigned a permanent name once it has been written to a
// segment. Until that time, its name cannot be determined.
- std::string get_name() const { return ref.to_string(); }
ObjectReference get_ref() { return ref; }
private:
std::string group;
+ double age;
const char *data;
size_t data_len;
+ std::string checksum;
bool written;
ObjectReference ref;
* included; this adds to it) */
extern const char *filter_extension;
-/* Launch a process to filter data written to a file descriptor. fd_out is the
- * file descriptor where the filtered data should be written. program is the
- * filter program to execute (a single string which will be interpreted by
- * /bin/sh). The return value is a file descriptor to which the data to be
- * filtered should be written. The process ID of the filter process is stored
- * at address filter_pid if non-NULL. */
-int spawn_filter(int fd_out, const char *program, pid_t *filter_pid);
-
#endif // _LBS_STORE_H