X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=nfs3%2Fnfs3.c;h=80fb432ab0771e6214da3374daec4bcbd226c65b;hb=8ff0fd08d6e1cc97cdb7e94b7cd97dc28c29e674;hp=8be68bd5977fef5f2f746a2ca51eb7fcfc6a263a;hpb=3c5c5e9b5a8650e0549712e20e007ba2399656bf;p=bluesky.git diff --git a/nfs3/nfs3.c b/nfs3/nfs3.c index 8be68bd..80fb432 100644 --- a/nfs3/nfs3.c +++ b/nfs3/nfs3.c @@ -1,3 +1,33 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* * This is sample code generated by rpcgen. * These are only templates and you can use them @@ -9,6 +39,9 @@ extern BlueSkyFS *fs; +#define NFS_BLOCKSIZE 32768 +#define NFS_MAXSIZE (1 << 20) + /* Check that a string is a valid file name. We require that it be valid * UTF-8, that it not be empty, and that it not contain embedded forward * slashes. Also checks that the length of the string is not more than the @@ -104,8 +137,7 @@ void set_attributes(BlueSkyInode *inode, sattr3 *attributes) break; } - inode->ctime = now; - inode->change_count++; + bluesky_inode_update_ctime(inode, FALSE); } /* Copy inode attributes into NFS response. The BlueSkyInode should be locked @@ -162,7 +194,9 @@ void nfsproc3_getattr_3_svc(nfs_fh3 *argp, RPCRequest *req) BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { result.status = NFS3_OK; + g_mutex_lock(inode->lock); encode_fattr3(&result.getattr3res_u.attributes, inode); + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } @@ -184,12 +218,14 @@ void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) return; } + g_mutex_lock(inode->lock); encode_pre_wcc(&result.wccstat3_u.wcc, inode); if (argp->guard.check) { if (inode->ctime != decode_nfstime3(&argp->guard.sattrguard3_u.ctime)) { result.status = NFS3ERR_NOT_SYNC; result.wccstat3_u.wcc.after.present = TRUE; encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, inode); + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } @@ -202,6 +238,9 @@ void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) inode); result.status = NFS3_OK; + bluesky_inode_do_sync(inode); + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -218,6 +257,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); result.lookup3res_u.resfail.present = TRUE; encode_fattr3(&result.lookup3res_u.resfail.post_op_attr_u.attributes, dir); if (!validate_filename(argp->name)) { @@ -225,6 +265,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) result.status = NFS3ERR_NAMETOOLONG; else result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -233,20 +274,25 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) uint64_t inum = bluesky_directory_lookup(dir, argp->name); if (inum == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + + result.lookup3res_u.resok.dir_attributes.present = TRUE; + encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); + g_mutex_unlock(dir->lock); + BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); schedule_inode_unref(req, inode); result.status = NFS3_OK; - result.lookup3res_u.resok.dir_attributes.present = TRUE; - encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); result.lookup3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.lookup3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -255,6 +301,7 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) result.lookup3res_u.resok.object.data.data_len = 8; result.lookup3res_u.resok.object.data.data_val = (char *)&fh_bytes; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -271,10 +318,12 @@ void nfsproc3_access_3_svc(access3args *argp, RPCRequest *req) return; } + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.access3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.access3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); result.access3res_u.resok.access = argp->access; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -286,6 +335,7 @@ void nfsproc3_readlink_3_svc(nfs_fh3 *argp, RPCRequest *req) BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { + g_mutex_lock(inode->lock); if (inode->type == BLUESKY_SYMLINK) { result.status = NFS3_OK; result.readlink3res_u.resok.symlink_attributes.present = TRUE; @@ -296,6 +346,7 @@ void nfsproc3_readlink_3_svc(nfs_fh3 *argp, RPCRequest *req) result.readlink3res_u.resfail.present = TRUE; encode_fattr3(&result.readlink3res_u.resfail.post_op_attr_u.attributes, inode); } + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } @@ -307,7 +358,9 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) { read3res result; memset(&result, 0, sizeof(result)); - char buf[32768]; + char buf[NFS_MAXSIZE]; + + bluesky_flushd_invoke_conditional(fs); BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { @@ -317,11 +370,14 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) return; } + g_mutex_lock(inode->lock); + int count = argp->count; if (argp->offset >= inode->size) { count = 0; result.read3res_u.resok.eof = TRUE; } else { + count = MIN(count, NFS_MAXSIZE); count = MIN(count, inode->size - argp->offset); if (argp->offset + count == inode->size) result.read3res_u.resok.eof = TRUE; @@ -338,6 +394,8 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) result.read3res_u.resok.data.data_val = buf; result.read3res_u.resok.data.data_len = count; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); } @@ -348,6 +406,8 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); + bluesky_flushd_invoke_conditional(fs); + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; @@ -356,10 +416,28 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) return; } +#if 0 + /* FIXME: Hack to throttle writes when there is too much dirty data still + * to be written out. */ + while (g_atomic_int_get(&fs->cache_dirty) > 4096 + || g_atomic_int_get(&fs->cache_total) > 8192) { + g_print("Too many dirty pages (%d) or total pages (%d); throttling writes...\n", + g_atomic_int_get(&fs->cache_dirty), + g_atomic_int_get(&fs->cache_total)); + struct timespec delay; + delay.tv_sec = 2; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + } +#endif + + g_mutex_lock(inode->lock); + encode_pre_wcc(&wcc, inode); if (inode->type != BLUESKY_REGULAR) { result.status = NFS3ERR_INVAL; result.write3res_u.resfail = wcc; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } @@ -380,7 +458,16 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) encode_fattr3(&wcc.after.post_op_attr_u.attributes, inode); result.write3res_u.resok.file_wcc = wcc; result.write3res_u.resok.count = argp->count; - result.write3res_u.resok.committed = FILE_SYNC; + result.write3res_u.resok.committed = UNSTABLE; + memcpy(result.write3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + if (argp->stable != UNSTABLE) { + bluesky_inode_do_sync(inode); + result.write3res_u.resok.committed = FILE_SYNC; + } + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -400,10 +487,13 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -414,25 +504,28 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_REGULAR); file->nlink = 1; file->mode = 0755; int64_t time = bluesky_get_current_time(); - printf("time: %"PRIi64"\n", time); file->mtime = time; file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -446,6 +539,11 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } @@ -464,10 +562,13 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -478,10 +579,12 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_DIRECTORY); file->nlink = 1; @@ -491,12 +594,14 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); set_attributes(file, &argp->attributes); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -510,6 +615,10 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -527,11 +636,13 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -542,10 +653,12 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_SYMLINK); file->nlink = 1; @@ -556,11 +669,13 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) file->atime = time; file->ntime = time; file->symlink_contents = g_strdup(argp->symlink.symlink_data); + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -574,6 +689,10 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -599,6 +718,8 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&result.wccstat3_u.wcc, dir); if (!validate_filename(argp->name) @@ -606,6 +727,7 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) || strcmp(argp->name, "..") == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -619,6 +741,8 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -634,6 +758,8 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&result.wccstat3_u.wcc, dir); if (!validate_filename(argp->name) @@ -641,6 +767,7 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) || strcmp(argp->name, "..") == 0) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -649,13 +776,17 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); schedule_inode_unref(req, inode); if (inode->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -663,6 +794,8 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) printf("Directory not empty: %d entries\n", g_sequence_get_length(inode->dirents)); result.status = NFS3ERR_NOTEMPTY; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -676,6 +809,10 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); + bluesky_inode_do_sync(inode); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -692,7 +829,6 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } - encode_pre_wcc(wcc1, dir1); BlueSkyInode *dir2 = lookup_fh(req, &argp->to.dir); if (dir2 == NULL) { @@ -700,6 +836,15 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + + if (dir1->inum < dir2->inum) { + g_mutex_lock(dir1->lock); + g_mutex_lock(dir2->lock); + } else if (dir1->inum > dir2->inum) { + g_mutex_lock(dir2->lock); + g_mutex_lock(dir1->lock); + } + encode_pre_wcc(wcc1, dir1); encode_pre_wcc(wcc2, dir1); gboolean status = bluesky_rename(dir1, argp->from.name, @@ -715,6 +860,12 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) else result.status = NFS3ERR_PERM; + bluesky_inode_do_sync(dir2); + bluesky_inode_do_sync(dir1); + + g_mutex_unlock(dir1->lock); + if (dir1->inum != dir2->inum) + g_mutex_unlock(dir2->lock); async_rpc_send_reply(req, &result); } @@ -732,19 +883,24 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); BlueSkyInode *dir = lookup_fh(req, &argp->link.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -756,6 +912,8 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -763,11 +921,14 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) if (!bluesky_directory_insert(dir, argp->link.name, inode->inum)) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } inode->nlink++; - bluesky_inode_update_ctime(inode, 0); + bluesky_inode_update_ctime(inode, FALSE); + bluesky_inode_update_ctime(dir, TRUE); result.status = NFS3_OK; wcc.after.present = TRUE; @@ -776,6 +937,10 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) encode_fattr3(&result.link3res_u.res.file_attributes.post_op_attr_u.attributes, inode); result.link3res_u.res.linkdir_wcc = wcc; + bluesky_inode_do_sync(inode); + bluesky_inode_do_sync(dir); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -795,6 +960,7 @@ void nfsproc3_readdir_3_svc(readdir3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); result.status = NFS3_OK; result.readdir3res_u.resok.dir_attributes.present = TRUE; @@ -827,6 +993,7 @@ void nfsproc3_readdir_3_svc(readdir3args *argp, RPCRequest *req) result.readdir3res_u.resok.reply.entries = NULL; result.readdir3res_u.resok.reply.eof = g_sequence_iter_is_end(i); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -849,6 +1016,7 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + g_mutex_lock(dir->lock); result.status = NFS3_OK; result.readdirplus3res_u.resok.dir_attributes.present = TRUE; @@ -860,21 +1028,44 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) uint64_t fh_bytes[MAX_READDIR_DIRENTS]; int count = 0; - /* TODO: Handle dircount, maxcount arguments from client. */ - + GSequenceIter *i; BlueSkyDirent start = {NULL, NULL, argp->cookie, 0}; - GSequenceIter *i = g_sequence_search(dir->dirents, &start, - bluesky_dirent_compare, NULL); + /* Perform a prefetch pass on inodes: for all the inodes we think we will + * return information about, try to load each one but don't wait. This + * should let multiple inodes be fetched in parallel, instead of + * sequentially in the loop that follows. */ + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + while (count < MAX_READDIR_DIRENTS + && !g_sequence_iter_is_end(i) + && dircount <= argp->dircount + && dircount + attrcount <= argp->maxcount) + { + BlueSkyDirent *d = g_sequence_get(i); + bluesky_inode_prefetch(fs, d->inum); + dircount += 24 + ((strlen(d->name) + 3) & ~3); + attrcount += 88 + 8 + 8; + i = g_sequence_iter_next(i); + } + + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + count = 0; + dircount = 88 + 16; + attrcount = 0; while (count < MAX_READDIR_DIRENTS && !g_sequence_iter_is_end(i)) { BlueSkyDirent *d = g_sequence_get(i); BlueSkyInode *inode = bluesky_get_inode(fs, d->inum); if (inode != NULL) { + g_mutex_lock(inode->lock); dircount += 24 + ((strlen(d->name) + 3) & ~3); attrcount += 88 + 8 + 8; if (dircount > argp->dircount || dircount + attrcount > argp->maxcount) + { + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); break; + } dirents[count].fileid = d->inum; dirents[count].name = d->name; dirents[count].cookie = d->cookie; @@ -889,6 +1080,7 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) if (count > 0) dirents[count - 1].nextentry = &dirents[count]; count++; + g_mutex_unlock(inode->lock); bluesky_inode_unref(inode); } i = g_sequence_iter_next(i); @@ -900,6 +1092,7 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) result.readdirplus3res_u.resok.reply.entries = NULL; result.readdirplus3res_u.resok.reply.eof = g_sequence_iter_is_end(i); + g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -915,6 +1108,7 @@ void nfsproc3_fsstat_3_svc(nfs_fh3 *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.fsstat3res_u.resok.obj_attributes.present = TRUE; @@ -928,6 +1122,7 @@ void nfsproc3_fsstat_3_svc(nfs_fh3 *argp, RPCRequest *req) result.fsstat3res_u.resok.afiles = 0; result.fsstat3res_u.resok.invarsec = 0; + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -937,23 +1132,25 @@ void nfsproc3_fsinfo_3_svc(nfs_fh3 *argp, RPCRequest *req) memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.fsinfo3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.fsinfo3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); - result.fsinfo3res_u.resok.rtmax = 32768; - result.fsinfo3res_u.resok.rtpref = 32768; - result.fsinfo3res_u.resok.rtmult = 4096; - result.fsinfo3res_u.resok.wtmax = 32768; - result.fsinfo3res_u.resok.wtpref = 32768; - result.fsinfo3res_u.resok.wtmult = 4096; - result.fsinfo3res_u.resok.dtpref = 4096; + result.fsinfo3res_u.resok.rtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.wtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.dtpref = NFS_BLOCKSIZE; result.fsinfo3res_u.resok.maxfilesize = 0x7fffffffffffffffULL; result.fsinfo3res_u.resok.time_delta.seconds = 0; result.fsinfo3res_u.resok.time_delta.nseconds = 1000; result.fsinfo3res_u.resok.properties = FSF3_LINK | FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME; - bluesky_inode_unref(inode); + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); async_rpc_send_reply(req, &result); } @@ -963,6 +1160,7 @@ void nfsproc3_pathconf_3_svc(nfs_fh3 *argp, RPCRequest *req) memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.pathconf3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.pathconf3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -972,8 +1170,9 @@ void nfsproc3_pathconf_3_svc(nfs_fh3 *argp, RPCRequest *req) result.pathconf3res_u.resok.chown_restricted = TRUE; result.pathconf3res_u.resok.case_insensitive = FALSE; result.pathconf3res_u.resok.case_preserving = TRUE; - bluesky_inode_unref(inode); + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); async_rpc_send_reply(req, &result); } @@ -982,7 +1181,26 @@ void nfsproc3_commit_3_svc(commit3args *argp, RPCRequest *req) commit3res result; memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + result.status = NFS3_OK; + + BlueSkyInode *inode = lookup_fh(req, &argp->file); + if (inode == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + g_mutex_lock(inode->lock); + encode_pre_wcc(&result.commit3res_u.resok.file_wcc, inode); + + bluesky_inode_do_sync(inode); + + result.commit3res_u.resok.file_wcc.after.present = TRUE; + encode_fattr3(&result.commit3res_u.resok.file_wcc.after.post_op_attr_u.attributes, inode); + memcpy(result.commit3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); }