X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=nfs3%2Fnfs3.c;h=80fb432ab0771e6214da3374daec4bcbd226c65b;hb=8ff0fd08d6e1cc97cdb7e94b7cd97dc28c29e674;hp=3f858ac4a1569bb03bb830a0745e8c750b9b4b9e;hpb=53817147ff42b54d2b103216983cbf3e1ffa341f;p=bluesky.git diff --git a/nfs3/nfs3.c b/nfs3/nfs3.c index 3f858ac..80fb432 100644 --- a/nfs3/nfs3.c +++ b/nfs3/nfs3.c @@ -1,3 +1,33 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* * This is sample code generated by rpcgen. * These are only templates and you can use them @@ -9,6 +39,9 @@ extern BlueSkyFS *fs; +#define NFS_BLOCKSIZE 32768 +#define NFS_MAXSIZE (1 << 20) + /* Check that a string is a valid file name. We require that it be valid * UTF-8, that it not be empty, and that it not contain embedded forward * slashes. Also checks that the length of the string is not more than the @@ -27,14 +60,27 @@ gboolean validate_filename(const char *filename) return TRUE; } +/* Arrange for a reference to an inode to be dropped when the RPC request + * completes. */ +void schedule_inode_unref(RPCRequest *req, BlueSkyInode *inode) +{ + struct cleanup_list *c = g_new(struct cleanup_list, 1); + c->func = (void (*)(void *))bluesky_inode_unref; + c->arg = inode; + c->next = req->cleanup; + req->cleanup = c; +} + /* Look up a BlueSkyInode given an NFS filehandle. Returns NULL if the * filehandle is invalid. */ -BlueSkyInode *lookup_fh(nfs_fh3 *fh) +BlueSkyInode *lookup_fh(RPCRequest *req, nfs_fh3 *fh) { BlueSkyInode *inode = NULL; if (fh->data.data_len == 8) { uint64_t inum = GUINT64_FROM_BE(*(uint64_t *)(fh->data.data_val)); inode = bluesky_get_inode(fs, inum); + if (inode != NULL) + schedule_inode_unref(req, inode); } return inode; } @@ -91,8 +137,7 @@ void set_attributes(BlueSkyInode *inode, sattr3 *attributes) break; } - inode->ctime = now; - inode->change_count++; + bluesky_inode_update_ctime(inode, FALSE); } /* Copy inode attributes into NFS response. The BlueSkyInode should be locked @@ -146,10 +191,12 @@ void nfsproc3_getattr_3_svc(nfs_fh3 *argp, RPCRequest *req) getattr3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(argp); + BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { result.status = NFS3_OK; + g_mutex_lock(inode->lock); encode_fattr3(&result.getattr3res_u.attributes, inode); + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } @@ -164,19 +211,21 @@ void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) result.wccstat3_u.wcc.before.present = FALSE; result.wccstat3_u.wcc.after.present = FALSE; - BlueSkyInode *inode = lookup_fh(&argp->object); + BlueSkyInode *inode = lookup_fh(req, &argp->object); if (inode == NULL) { result.status = NFS3ERR_STALE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); encode_pre_wcc(&result.wccstat3_u.wcc, inode); if (argp->guard.check) { if (inode->ctime != decode_nfstime3(&argp->guard.sattrguard3_u.ctime)) { result.status = NFS3ERR_NOT_SYNC; result.wccstat3_u.wcc.after.present = TRUE; encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, inode); + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } @@ -189,6 +238,9 @@ void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) inode); result.status = NFS3_OK; + bluesky_inode_do_sync(inode); + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -197,7 +249,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) lookup3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.lookup3res_u.resfail.present = FALSE; @@ -205,6 +257,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); result.lookup3res_u.resfail.present = TRUE; encode_fattr3(&result.lookup3res_u.resfail.post_op_attr_u.attributes, dir); if (!validate_filename(argp->name)) { @@ -212,6 +265,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) result.status = NFS3ERR_NAMETOOLONG; else result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -220,19 +274,25 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) uint64_t inum = bluesky_directory_lookup(dir, argp->name); if (inum == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + + result.lookup3res_u.resok.dir_attributes.present = TRUE; + encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); + g_mutex_unlock(dir->lock); + BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); + schedule_inode_unref(req, inode); result.status = NFS3_OK; - result.lookup3res_u.resok.dir_attributes.present = TRUE; - encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); result.lookup3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.lookup3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -241,6 +301,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) result.lookup3res_u.resok.object.data.data_len = 8; result.lookup3res_u.resok.object.data.data_val = (char *)&fh_bytes; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -249,7 +310,7 @@ void nfsproc3_access_3_svc(access3args *argp, RPCRequest *req) access3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(&argp->object); + BlueSkyInode *inode = lookup_fh(req, &argp->object); if (inode == NULL) { result.status = NFS3ERR_STALE; result.access3res_u.resfail.present = FALSE; @@ -257,10 +318,12 @@ void nfsproc3_access_3_svc(access3args *argp, RPCRequest *req) return; } + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.access3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.access3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); result.access3res_u.resok.access = argp->access; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -270,8 +333,9 @@ void nfsproc3_readlink_3_svc(nfs_fh3 *argp, RPCRequest *req) readlink3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(argp); + BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { + g_mutex_lock(inode->lock); if (inode->type == BLUESKY_SYMLINK) { result.status = NFS3_OK; result.readlink3res_u.resok.symlink_attributes.present = TRUE; @@ -282,6 +346,7 @@ void nfsproc3_readlink_3_svc(nfs_fh3 *argp, RPCRequest *req) result.readlink3res_u.resfail.present = TRUE; encode_fattr3(&result.readlink3res_u.resfail.post_op_attr_u.attributes, inode); } + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } @@ -293,9 +358,11 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) { read3res result; memset(&result, 0, sizeof(result)); - char buf[32768]; + char buf[NFS_MAXSIZE]; + + bluesky_flushd_invoke_conditional(fs); - BlueSkyInode *inode = lookup_fh(&argp->file); + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.read3res_u.resfail.present = FALSE; @@ -303,11 +370,14 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) return; } + g_mutex_lock(inode->lock); + int count = argp->count; if (argp->offset >= inode->size) { count = 0; result.read3res_u.resok.eof = TRUE; } else { + count = MIN(count, NFS_MAXSIZE); count = MIN(count, inode->size - argp->offset); if (argp->offset + count == inode->size) result.read3res_u.resok.eof = TRUE; @@ -324,6 +394,8 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) result.read3res_u.resok.data.data_val = buf; result.read3res_u.resok.data.data_len = count; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); } @@ -334,7 +406,9 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *inode = lookup_fh(&argp->file); + bluesky_flushd_invoke_conditional(fs); + + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.write3res_u.resfail = wcc; @@ -342,10 +416,28 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) return; } +#if 0 + /* FIXME: Hack to throttle writes when there is too much dirty data still + * to be written out. */ + while (g_atomic_int_get(&fs->cache_dirty) > 4096 + || g_atomic_int_get(&fs->cache_total) > 8192) { + g_print("Too many dirty pages (%d) or total pages (%d); throttling writes...\n", + g_atomic_int_get(&fs->cache_dirty), + g_atomic_int_get(&fs->cache_total)); + struct timespec delay; + delay.tv_sec = 2; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + } +#endif + + g_mutex_lock(inode->lock); + encode_pre_wcc(&wcc, inode); if (inode->type != BLUESKY_REGULAR) { result.status = NFS3ERR_INVAL; result.write3res_u.resfail = wcc; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } @@ -366,7 +458,16 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) encode_fattr3(&wcc.after.post_op_attr_u.attributes, inode); result.write3res_u.resok.file_wcc = wcc; result.write3res_u.resok.count = argp->count; - result.write3res_u.resok.committed = FILE_SYNC; + result.write3res_u.resok.committed = UNSTABLE; + memcpy(result.write3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + if (argp->stable != UNSTABLE) { + bluesky_inode_do_sync(inode); + result.write3res_u.resok.committed = FILE_SYNC; + } + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -378,7 +479,7 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; @@ -386,10 +487,13 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -400,25 +504,28 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_REGULAR); file->nlink = 1; file->mode = 0755; int64_t time = bluesky_get_current_time(); - printf("time: %"PRIi64"\n", time); file->mtime = time; file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -432,6 +539,11 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } @@ -442,7 +554,7 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; @@ -450,10 +562,13 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -464,10 +579,12 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_DIRECTORY); file->nlink = 1; @@ -477,12 +594,14 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); set_attributes(file, &argp->attributes); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -496,6 +615,10 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -506,18 +629,20 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -528,10 +653,12 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_SYMLINK); file->nlink = 1; @@ -542,11 +669,13 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) file->atime = time; file->ntime = time; file->symlink_contents = g_strdup(argp->symlink.symlink_data); + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -560,6 +689,10 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -578,13 +711,15 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) wccstat3 result; memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&result.wccstat3_u.wcc, dir); if (!validate_filename(argp->name) @@ -592,6 +727,7 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) || strcmp(argp->name, "..") == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -605,6 +741,8 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -613,13 +751,15 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) wccstat3 result; memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&result.wccstat3_u.wcc, dir); if (!validate_filename(argp->name) @@ -627,6 +767,7 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) || strcmp(argp->name, "..") == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -635,12 +776,17 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); + schedule_inode_unref(req, inode); if (inode->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -648,6 +794,8 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) printf("Directory not empty: %d entries\n", g_sequence_get_length(inode->dirents)); result.status = NFS3ERR_NOTEMPTY; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -661,6 +809,10 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); + bluesky_inode_do_sync(inode); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -671,20 +823,28 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) wcc_data *wcc1 = &result.rename3res_u.res.fromdir_wcc; wcc_data *wcc2 = &result.rename3res_u.res.todir_wcc; - BlueSkyInode *dir1 = lookup_fh(&argp->from.dir); + BlueSkyInode *dir1 = lookup_fh(req, &argp->from.dir); if (dir1 == NULL) { result.status = NFS3ERR_STALE; async_rpc_send_reply(req, &result); return; } - encode_pre_wcc(wcc1, dir1); - BlueSkyInode *dir2 = lookup_fh(&argp->to.dir); + BlueSkyInode *dir2 = lookup_fh(req, &argp->to.dir); if (dir2 == NULL) { result.status = NFS3ERR_STALE; async_rpc_send_reply(req, &result); return; } + + if (dir1->inum < dir2->inum) { + g_mutex_lock(dir1->lock); + g_mutex_lock(dir2->lock); + } else if (dir1->inum > dir2->inum) { + g_mutex_lock(dir2->lock); + g_mutex_lock(dir1->lock); + } + encode_pre_wcc(wcc1, dir1); encode_pre_wcc(wcc2, dir1); gboolean status = bluesky_rename(dir1, argp->from.name, @@ -700,6 +860,12 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) else result.status = NFS3ERR_PERM; + bluesky_inode_do_sync(dir2); + bluesky_inode_do_sync(dir1); + + g_mutex_unlock(dir1->lock); + if (dir1->inum != dir2->inum) + g_mutex_unlock(dir2->lock); async_rpc_send_reply(req, &result); } @@ -710,26 +876,31 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *inode = lookup_fh(&argp->file); + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.link3res_u.res.linkdir_wcc = wcc; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); - BlueSkyInode *dir = lookup_fh(&argp->link.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->link.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -741,6 +912,8 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -748,11 +921,14 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) if (!bluesky_directory_insert(dir, argp->link.name, inode->inum)) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } inode->nlink++; - bluesky_inode_update_ctime(inode, 0); + bluesky_inode_update_ctime(inode, FALSE); + bluesky_inode_update_ctime(dir, TRUE); result.status = NFS3_OK; wcc.after.present = TRUE; @@ -761,6 +937,10 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) encode_fattr3(&result.link3res_u.res.file_attributes.post_op_attr_u.attributes, inode); result.link3res_u.res.linkdir_wcc = wcc; + bluesky_inode_do_sync(inode); + bluesky_inode_do_sync(dir); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -773,13 +953,14 @@ void nfsproc3_readdir_3_svc(readdir3args *argp, RPCRequest *req) readdir3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.readdir3res_u.resfail.present = FALSE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); result.status = NFS3_OK; result.readdir3res_u.resok.dir_attributes.present = TRUE; @@ -812,6 +993,7 @@ void nfsproc3_readdir_3_svc(readdir3args *argp, RPCRequest *req) result.readdir3res_u.resok.reply.entries = NULL; result.readdir3res_u.resok.reply.eof = g_sequence_iter_is_end(i); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -827,13 +1009,14 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) readdirplus3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.readdirplus3res_u.resfail.present = FALSE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); result.status = NFS3_OK; result.readdirplus3res_u.resok.dir_attributes.present = TRUE; @@ -845,21 +1028,44 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) uint64_t fh_bytes[MAX_READDIR_DIRENTS]; int count = 0; - /* TODO: Handle dircount, maxcount arguments from client. */ - + GSequenceIter *i; BlueSkyDirent start = {NULL, NULL, argp->cookie, 0}; - GSequenceIter *i = g_sequence_search(dir->dirents, &start, - bluesky_dirent_compare, NULL); + /* Perform a prefetch pass on inodes: for all the inodes we think we will + * return information about, try to load each one but don't wait. This + * should let multiple inodes be fetched in parallel, instead of + * sequentially in the loop that follows. */ + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + while (count < MAX_READDIR_DIRENTS + && !g_sequence_iter_is_end(i) + && dircount <= argp->dircount + && dircount + attrcount <= argp->maxcount) + { + BlueSkyDirent *d = g_sequence_get(i); + bluesky_inode_prefetch(fs, d->inum); + dircount += 24 + ((strlen(d->name) + 3) & ~3); + attrcount += 88 + 8 + 8; + i = g_sequence_iter_next(i); + } + + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + count = 0; + dircount = 88 + 16; + attrcount = 0; while (count < MAX_READDIR_DIRENTS && !g_sequence_iter_is_end(i)) { BlueSkyDirent *d = g_sequence_get(i); BlueSkyInode *inode = bluesky_get_inode(fs, d->inum); if (inode != NULL) { + g_mutex_lock(inode->lock); dircount += 24 + ((strlen(d->name) + 3) & ~3); attrcount += 88 + 8 + 8; if (dircount > argp->dircount || dircount + attrcount > argp->maxcount) + { + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); break; + } dirents[count].fileid = d->inum; dirents[count].name = d->name; dirents[count].cookie = d->cookie; @@ -874,6 +1080,8 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) if (count > 0) dirents[count - 1].nextentry = &dirents[count]; count++; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); } i = g_sequence_iter_next(i); } @@ -884,6 +1092,7 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) result.readdirplus3res_u.resok.reply.entries = NULL; result.readdirplus3res_u.resok.reply.eof = g_sequence_iter_is_end(i); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -892,13 +1101,14 @@ void nfsproc3_fsstat_3_svc(nfs_fh3 *argp, RPCRequest *req) fsstat3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(argp); + BlueSkyInode *inode = lookup_fh(req, argp); if (inode == NULL) { result.status = NFS3ERR_STALE; result.fsstat3res_u.resfail.present = FALSE; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.fsstat3res_u.resok.obj_attributes.present = TRUE; @@ -912,6 +1122,7 @@ void nfsproc3_fsstat_3_svc(nfs_fh3 *argp, RPCRequest *req) result.fsstat3res_u.resok.afiles = 0; result.fsstat3res_u.resok.invarsec = 0; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -921,22 +1132,25 @@ void nfsproc3_fsinfo_3_svc(nfs_fh3 *argp, RPCRequest *req) memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.fsinfo3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.fsinfo3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); - result.fsinfo3res_u.resok.rtmax = 32768; - result.fsinfo3res_u.resok.rtpref = 32768; - result.fsinfo3res_u.resok.rtmult = 4096; - result.fsinfo3res_u.resok.wtmax = 32768; - result.fsinfo3res_u.resok.wtpref = 32768; - result.fsinfo3res_u.resok.wtmult = 4096; - result.fsinfo3res_u.resok.dtpref = 4096; + result.fsinfo3res_u.resok.rtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.wtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.dtpref = NFS_BLOCKSIZE; result.fsinfo3res_u.resok.maxfilesize = 0x7fffffffffffffffULL; result.fsinfo3res_u.resok.time_delta.seconds = 0; result.fsinfo3res_u.resok.time_delta.nseconds = 1000; result.fsinfo3res_u.resok.properties = FSF3_LINK | FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); async_rpc_send_reply(req, &result); } @@ -946,6 +1160,7 @@ void nfsproc3_pathconf_3_svc(nfs_fh3 *argp, RPCRequest *req) memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.pathconf3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.pathconf3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -956,6 +1171,8 @@ void nfsproc3_pathconf_3_svc(nfs_fh3 *argp, RPCRequest *req) result.pathconf3res_u.resok.case_insensitive = FALSE; result.pathconf3res_u.resok.case_preserving = TRUE; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); async_rpc_send_reply(req, &result); } @@ -964,7 +1181,26 @@ void nfsproc3_commit_3_svc(commit3args *argp, RPCRequest *req) commit3res result; memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + result.status = NFS3_OK; + + BlueSkyInode *inode = lookup_fh(req, &argp->file); + if (inode == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + g_mutex_lock(inode->lock); + encode_pre_wcc(&result.commit3res_u.resok.file_wcc, inode); + + bluesky_inode_do_sync(inode); + + result.commit3res_u.resok.file_wcc.after.present = TRUE; + encode_fattr3(&result.commit3res_u.resok.file_wcc.after.post_op_attr_u.attributes, inode); + memcpy(result.commit3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); }