From: Michael Vrable Date: Wed, 26 Aug 2009 00:23:15 +0000 (-0700) Subject: Switch to 31-bit directory cookies and a separate hashtable for lookups. X-Git-Url: http://git.vrable.net/?a=commitdiff_plain;ds=sidebyside;h=19f316a185dc3d2a67fc3d3546f3989e45559d7c;p=bluesky.git Switch to 31-bit directory cookies and a separate hashtable for lookups. --- diff --git a/bluesky.h b/bluesky.h index c85fd70..793daa4 100644 --- a/bluesky.h +++ b/bluesky.h @@ -68,17 +68,19 @@ typedef struct { GArray *blocks; /* Directory-specific fields */ - GSequence *dirents; + GSequence *dirents; /* List of entries for READDIR */ + GHashTable *dirhash; /* Hash table by name for LOOKUP */ uint64_t parent_inum; /* inode for ".."; 0 if the root directory */ } BlueSkyInode; /* A directory entry. The name is UTF-8 and is a freshly-allocated string. - * The name is hashed to a 64-bit value, and the directory entries are sorted - * by hash value (the hash value can then be used as a cookie for resuming a - * READDIR call). */ + * Each directory entry is listed in two indices: dirents is indexed by cookie + * and dirhash by name. The cookie is a randomly-assigned 32-bit value, unique + * within the directory, that remains unchanged until the entry is deleted. It + * is used to provide a stable key for restarting a READDIR call. */ typedef struct { gchar *name; - uint64_t hash; + uint32_t cookie; uint64_t inum; } BlueSkyDirent; @@ -103,6 +105,7 @@ typedef struct { BlueSkyFS *bluesky_new_fs(gchar *name); int64_t bluesky_get_current_time(); +void bluesky_inode_update_ctime(BlueSkyInode *inode, gboolean update_mtime); uint64_t bluesky_fs_alloc_inode(BlueSkyFS *fs); BlueSkyInode *bluesky_new_inode(uint64_t inum, BlueSkyFileType type); diff --git a/dir.c b/dir.c index f8aef94..204439e 100644 --- a/dir.c +++ b/dir.c @@ -13,29 +13,6 @@ /* Core filesystem: handling of directories. */ -/* Hash a filename for a directory lookup. The string is hashed to a 64-bit - * value. It is guaranteed that the hash values 0, 1, and 2 are never returned - * (to allow the hash value to be used as an NFS cookie for a READDIR - * operation). TODO: We perhaps should make this a keyed hash, and may want to - * use something cheaper than MD5 to compute. */ -uint64_t bluesky_directory_hash(gchar *name) -{ - GChecksum *csum = g_checksum_new(G_CHECKSUM_MD5); - g_checksum_update(csum, (guchar *)name, -1); - - guint64 hashbytes[2]; - gsize hashsize = sizeof(hashbytes); - g_checksum_get_digest(csum, (guint8 *)hashbytes, &hashsize); - - uint64_t hash = GUINT64_FROM_BE(hashbytes[0]); - if (hash < 3) - hash = 3; - - g_checksum_free(csum); - - return hash; -} - void bluesky_dirent_destroy(gpointer data) { BlueSkyDirent *dirent = (BlueSkyDirent *)data; @@ -46,10 +23,8 @@ void bluesky_dirent_destroy(gpointer data) gint bluesky_dirent_compare(gconstpointer a, gconstpointer b, gpointer unused) { - /* We can't simply subtract the hash values, since they are 64-bit and the - * result could overflow when converted to a gint. */ - uint64_t hash1 = ((const BlueSkyDirent *)a)->hash; - uint64_t hash2 = ((const BlueSkyDirent *)b)->hash; + uint32_t hash1 = ((const BlueSkyDirent *)a)->cookie; + uint32_t hash2 = ((const BlueSkyDirent *)b)->cookie; if (hash1 < hash2) return -1; @@ -65,61 +40,90 @@ gint bluesky_dirent_compare(gconstpointer a, gconstpointer b, uint64_t bluesky_directory_lookup(BlueSkyInode *inode, gchar *name) { g_return_val_if_fail(inode->type == BLUESKY_DIRECTORY, 0); - g_return_val_if_fail(inode->dirents != NULL, 0); + g_return_val_if_fail(inode->dirhash != NULL, 0); - /* First, construct a hash of the file name. Search the directory for a - * match, then check to see if it does really match. */ - uint64_t hash = bluesky_directory_hash(name); - - BlueSkyDirent d = {name, hash, 0}; - GSequenceIter *i = g_sequence_search(inode->dirents, &d, - bluesky_dirent_compare, NULL); - - /* g_sequence_search will return an iterator pointing just beyond the - * desired node if there is a match, so that when inserting the new node - * will go after the match. But we are trying to do a lookup, so back up - * by one position. */ - i = g_sequence_iter_prev(i); - - if (g_sequence_iter_is_end(i)) - return 0; - BlueSkyDirent *dirent = g_sequence_get(i); - if (dirent->hash != hash) + BlueSkyDirent *d = g_hash_table_lookup(inode->dirhash, name); + if (d == NULL) return 0; - if (g_strcmp0(name, dirent->name) != 0) - return 0; - - g_print("Lookup(%s) -> 0x%016llx\n", name, dirent->hash); - return dirent->inum; + else + return d->inum; } /* Insert a new entry into a directory. Should be called with the inode lock * already held. */ gboolean bluesky_directory_insert(BlueSkyInode *dir, gchar *name, uint64_t inum) { - /* First, construct a hash of the file name. Search the directory for a - * match, then check to see if it does really match. */ - uint64_t hash = bluesky_directory_hash(name); + g_return_val_if_fail(dir->type == BLUESKY_DIRECTORY, FALSE); + + /* Check that no entry with the given name already exists. */ + if (g_hash_table_lookup(dir->dirhash, name) != NULL) + return FALSE; BlueSkyDirent *d = g_new(BlueSkyDirent, 1); d->name = g_strdup(name); - d->hash = hash; d->inum = inum; + GSequence *dirents = dir->dirents; + + /* Pick an unused cookie value for the directory at random. Restrict + * ourselves to positive 32-bit values (even if treated as signed), and + * keep the first four slots free. */ + while (1) { + do { + d->cookie = g_random_int() & 0x7fffffff; + } while (d->cookie < 4); + + /* If the directory is empty, we can't possibly have a collision, so + * just go with the first key chosen. */ + if (g_sequence_get_length(dirents) == 0) + break; + + /* Otherwise, try looking up the generated cookie. If we do not find a + * match, we can use this cookie value, otherwise we need to generate a + * new one and try again. Because of the check above for an empty + * directory, we know that the lookup will return something so no need + * to worry about NULL. */ + GSequenceIter *i = g_sequence_search(dir->dirents, d, + bluesky_dirent_compare, NULL); + i = g_sequence_iter_prev(i); + if (((BlueSkyDirent *)g_sequence_get(i))->cookie != d->cookie) + break; + } + + /* Add the directory entry to both indices. */ + g_sequence_insert_sorted(dirents, d, bluesky_dirent_compare, NULL); + g_hash_table_insert(dir->dirhash, d->name, d); + + bluesky_inode_update_ctime(dir, 1); + + return TRUE; +} + +/* Remove an from a directory. Should be called with the inode lock already + * held. */ +gboolean bluesky_directory_remove(BlueSkyInode *dir, gchar *name) +{ + g_return_val_if_fail(dir->type == BLUESKY_DIRECTORY, FALSE); + + BlueSkyDirent *d = g_hash_table_lookup(dir->dirhash, name); + if (d == NULL) { + return FALSE; + } + + g_hash_table_remove(dir->dirhash, name); + GSequenceIter *i = g_sequence_search(dir->dirents, d, bluesky_dirent_compare, NULL); + i = g_sequence_iter_prev(i); - /* If a directory entry already exists, return failure. The caller must - * delete the old entry and try again. TODO: We'll fail on a hash - * collision; we should handle that case. */ - if (!g_sequence_iter_is_end(g_sequence_iter_prev(i))) { - BlueSkyDirent *d2 = g_sequence_get(g_sequence_iter_prev(i)); - if (d2->hash == hash) - return FALSE; - } + /* Assertion check, this ought to succeed */ + g_return_val_if_fail(g_sequence_get(i) == d, FALSE); + + g_sequence_remove(i); + + bluesky_dirent_destroy(d); - g_sequence_insert_before(i, d); - dir->change_count++; + bluesky_inode_update_ctime(dir, 1); return TRUE; } @@ -133,7 +137,7 @@ void bluesky_directory_dump(BlueSkyInode *dir) while (!g_sequence_iter_is_end(i)) { BlueSkyDirent *d = g_sequence_get(i); - g_print(" 0x%016llx [inum=%lld] %s\n", d->hash, d->inum, d->name); + g_print(" 0x%08x [inum=%lld] %s\n", d->cookie, d->inum, d->name); i = g_sequence_iter_next(i); } } diff --git a/inode.c b/inode.c index 95afc0a..aa822b7 100644 --- a/inode.c +++ b/inode.c @@ -24,6 +24,18 @@ int64_t bluesky_get_current_time() return (int64_t)t.tv_sec * 1000000 + t.tv_usec; } +/* Update an inode to indicate that a modification was made. This increases + * the change counter, updates the ctime to the current time, and optionally + * updates the mtime. */ +void bluesky_inode_update_ctime(BlueSkyInode *inode, gboolean update_mtime) +{ + int64_t now = bluesky_get_current_time(); + inode->change_count++; + inode->ctime = now; + if (update_mtime) + inode->mtime = now; +} + /* Unfortunately a glib hash table is only guaranteed to be able to store * 32-bit keys if we use the key directly. If we want 64-bit inode numbers, * we'll have to allocate memory to store the 64-bit inumber, and use a pointer @@ -85,6 +97,7 @@ BlueSkyInode *bluesky_new_inode(uint64_t inum, BlueSkyFileType type) break; case BLUESKY_DIRECTORY: i->dirents = g_sequence_new(bluesky_dirent_destroy); + i->dirhash = g_hash_table_new(g_str_hash, g_str_equal); break; case BLUESKY_BLOCK: case BLUESKY_CHARACTER: diff --git a/main.c b/main.c index 62fcd0c..dc34d50 100644 --- a/main.c +++ b/main.c @@ -36,11 +36,24 @@ int main(int argc, char *argv[]) file->nlink = 1; file->mode = 0755; bluesky_insert_inode(fs, file); - bluesky_directory_insert(root, "demo", file->inum); + bluesky_directory_insert(root, "foo", file->inum); + + file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), BLUESKY_REGULAR); + file->nlink = 1; + file->mode = 0755; + bluesky_insert_inode(fs, file); + bluesky_directory_insert(root, "bar", file->inum); + + file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), BLUESKY_REGULAR); + file->nlink = 1; + file->mode = 0755; + bluesky_insert_inode(fs, file); + bluesky_directory_insert(root, "baz", file->inum); bluesky_directory_dump(root); bluesky_directory_lookup(root, "foo"); - bluesky_directory_lookup(root, "demo"); + bluesky_directory_lookup(root, "bar"); + bluesky_directory_lookup(root, "baz"); return 0; } diff --git a/nfs3/nfs3.c b/nfs3/nfs3.c index 90f62d7..04a77b6 100644 --- a/nfs3/nfs3.c +++ b/nfs3/nfs3.c @@ -389,7 +389,7 @@ nfsproc3_readdir_3_svc(readdir3args *argp, struct svc_req *rqstp) BlueSkyDirent *d = g_sequence_get(i); dirents[count].fileid = d->inum; dirents[count].name = d->name; - dirents[count].cookie = d->hash; + dirents[count].cookie = d->cookie; dirents[count].nextentry = NULL; if (count > 0) dirents[count - 1].nextentry = &dirents[count];