X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=nfs3%2Fnfs3.c;h=80fb432ab0771e6214da3374daec4bcbd226c65b;hb=8ff0fd08d6e1cc97cdb7e94b7cd97dc28c29e674;hp=2aa429955f12b327d366ad2fcb4a347ae6c648be;hpb=d5e2f1643a99e57607e86173d832dbfa4f8b51c2;p=bluesky.git diff --git a/nfs3/nfs3.c b/nfs3/nfs3.c index 2aa4299..80fb432 100644 --- a/nfs3/nfs3.c +++ b/nfs3/nfs3.c @@ -1,3 +1,33 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* * This is sample code generated by rpcgen. * These are only templates and you can use them @@ -9,8 +39,8 @@ extern BlueSkyFS *fs; -static int null_int; -static void *null_result = (void *)&null_int; +#define NFS_BLOCKSIZE 32768 +#define NFS_MAXSIZE (1 << 20) /* Check that a string is a valid file name. We require that it be valid * UTF-8, that it not be empty, and that it not contain embedded forward @@ -30,14 +60,27 @@ gboolean validate_filename(const char *filename) return TRUE; } +/* Arrange for a reference to an inode to be dropped when the RPC request + * completes. */ +void schedule_inode_unref(RPCRequest *req, BlueSkyInode *inode) +{ + struct cleanup_list *c = g_new(struct cleanup_list, 1); + c->func = (void (*)(void *))bluesky_inode_unref; + c->arg = inode; + c->next = req->cleanup; + req->cleanup = c; +} + /* Look up a BlueSkyInode given an NFS filehandle. Returns NULL if the * filehandle is invalid. */ -BlueSkyInode *lookup_fh(nfs_fh3 *fh) +BlueSkyInode *lookup_fh(RPCRequest *req, nfs_fh3 *fh) { BlueSkyInode *inode = NULL; if (fh->data.data_len == 8) { uint64_t inum = GUINT64_FROM_BE(*(uint64_t *)(fh->data.data_val)); inode = bluesky_get_inode(fs, inum); + if (inode != NULL) + schedule_inode_unref(req, inode); } return inode; } @@ -94,8 +137,7 @@ void set_attributes(BlueSkyInode *inode, sattr3 *attributes) break; } - inode->ctime = now; - inode->change_count++; + bluesky_inode_update_ctime(inode, FALSE); } /* Copy inode attributes into NFS response. The BlueSkyInode should be locked @@ -119,6 +161,14 @@ void encode_fattr3(struct fattr3 *result, BlueSkyInode *inode) result->mtime.nseconds = (inode->mtime % 1000000) * 1000; result->ctime.seconds = inode->ctime / 1000000; result->ctime.nseconds = (inode->ctime % 1000000) * 1000; + + switch (inode->type) { + case BLUESKY_SYMLINK: + result->size = strlen(inode->symlink_contents); + break; + default: + break; + } } void encode_pre_wcc(struct wcc_data *wcc, BlueSkyInode *inode) @@ -131,48 +181,53 @@ void encode_pre_wcc(struct wcc_data *wcc, BlueSkyInode *inode) wcc->before.pre_op_attr_u.attributes.ctime.nseconds = (inode->ctime % 1000000) * 1000; } -void * -nfsproc3_null_3_svc(void *argp, struct svc_req *rqstp) +void nfsproc3_null_3_svc(void *argp, RPCRequest *req) { - return null_result; + async_rpc_send_reply(req, NULL); } -getattr3res * -nfsproc3_getattr_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) +void nfsproc3_getattr_3_svc(nfs_fh3 *argp, RPCRequest *req) { - static getattr3res result; + getattr3res result; + memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(argp); + BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { result.status = NFS3_OK; + g_mutex_lock(inode->lock); encode_fattr3(&result.getattr3res_u.attributes, inode); + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } - return &result; + async_rpc_send_reply(req, &result); } -wccstat3 * -nfsproc3_setattr_3_svc(setattr3args *argp, struct svc_req *rqstp) +void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) { - static wccstat3 result; + wccstat3 result; + memset(&result, 0, sizeof(result)); result.wccstat3_u.wcc.before.present = FALSE; result.wccstat3_u.wcc.after.present = FALSE; - BlueSkyInode *inode = lookup_fh(&argp->object); + BlueSkyInode *inode = lookup_fh(req, &argp->object); if (inode == NULL) { result.status = NFS3ERR_STALE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(inode->lock); encode_pre_wcc(&result.wccstat3_u.wcc, inode); if (argp->guard.check) { if (inode->ctime != decode_nfstime3(&argp->guard.sattrguard3_u.ctime)) { result.status = NFS3ERR_NOT_SYNC; result.wccstat3_u.wcc.after.present = TRUE; encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, inode); - return &result; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); + return; } } @@ -183,21 +238,26 @@ nfsproc3_setattr_3_svc(setattr3args *argp, struct svc_req *rqstp) inode); result.status = NFS3_OK; - return &result; + bluesky_inode_do_sync(inode); + + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); } -lookup3res * -nfsproc3_lookup_3_svc(diropargs3 *argp, struct svc_req *rqstp) +void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) { - static lookup3res result; + lookup3res result; + memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.lookup3res_u.resfail.present = FALSE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); result.lookup3res_u.resfail.present = TRUE; encode_fattr3(&result.lookup3res_u.resfail.post_op_attr_u.attributes, dir); if (!validate_filename(argp->name)) { @@ -205,63 +265,77 @@ nfsproc3_lookup_3_svc(diropargs3 *argp, struct svc_req *rqstp) result.status = NFS3ERR_NAMETOOLONG; else result.status = NFS3ERR_NOENT; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } /* TODO: Special-case "." and "..". */ uint64_t inum = bluesky_directory_lookup(dir, argp->name); if (inum == 0) { result.status = NFS3ERR_NOENT; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } + + result.lookup3res_u.resok.dir_attributes.present = TRUE; + encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); + g_mutex_unlock(dir->lock); + BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(inode->lock); + schedule_inode_unref(req, inode); result.status = NFS3_OK; - result.lookup3res_u.resok.dir_attributes.present = TRUE; - encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); result.lookup3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.lookup3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); - static uint64_t fh_bytes; + uint64_t fh_bytes; fh_bytes = GUINT64_TO_BE(inum); result.lookup3res_u.resok.object.data.data_len = 8; result.lookup3res_u.resok.object.data.data_val = (char *)&fh_bytes; - return &result; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); } -access3res * -nfsproc3_access_3_svc(access3args *argp, struct svc_req *rqstp) +void nfsproc3_access_3_svc(access3args *argp, RPCRequest *req) { - static access3res result; + access3res result; + memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(&argp->object); + BlueSkyInode *inode = lookup_fh(req, &argp->object); if (inode == NULL) { result.status = NFS3ERR_STALE; result.access3res_u.resfail.present = FALSE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.access3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.access3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); result.access3res_u.resok.access = argp->access; + g_mutex_unlock(inode->lock); - return &result; + async_rpc_send_reply(req, &result); } -readlink3res * -nfsproc3_readlink_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) +void nfsproc3_readlink_3_svc(nfs_fh3 *argp, RPCRequest *req) { - static readlink3res result; + readlink3res result; memset(&result, 0, sizeof(result)); - BlueSkyInode *inode = lookup_fh(argp); + BlueSkyInode *inode = lookup_fh(req, argp); if (inode != NULL) { + g_mutex_lock(inode->lock); if (inode->type == BLUESKY_SYMLINK) { result.status = NFS3_OK; result.readlink3res_u.resok.symlink_attributes.present = TRUE; @@ -272,31 +346,38 @@ nfsproc3_readlink_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) result.readlink3res_u.resfail.present = TRUE; encode_fattr3(&result.readlink3res_u.resfail.post_op_attr_u.attributes, inode); } + g_mutex_unlock(inode->lock); } else { result.status = NFS3ERR_STALE; } - return &result; + async_rpc_send_reply(req, &result); } -read3res * -nfsproc3_read_3_svc(read3args *argp, struct svc_req *rqstp) +void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) { - static read3res result; - static char buf[32768]; + read3res result; + memset(&result, 0, sizeof(result)); + char buf[NFS_MAXSIZE]; - BlueSkyInode *inode = lookup_fh(&argp->file); + bluesky_flushd_invoke_conditional(fs); + + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.read3res_u.resfail.present = FALSE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(inode->lock); + int count = argp->count; if (argp->offset >= inode->size) { count = 0; result.read3res_u.resok.eof = TRUE; } else { + count = MIN(count, NFS_MAXSIZE); count = MIN(count, inode->size - argp->offset); if (argp->offset + count == inode->size) result.read3res_u.resok.eof = TRUE; @@ -313,28 +394,52 @@ nfsproc3_read_3_svc(read3args *argp, struct svc_req *rqstp) result.read3res_u.resok.data.data_val = buf; result.read3res_u.resok.data.data_len = count; - return &result; + g_mutex_unlock(inode->lock); + + async_rpc_send_reply(req, &result); } -write3res * -nfsproc3_write_3_svc(write3args *argp, struct svc_req *rqstp) +void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) { - static write3res result; + write3res result; + memset(&result, 0, sizeof(result)); struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *inode = lookup_fh(&argp->file); + bluesky_flushd_invoke_conditional(fs); + + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.write3res_u.resfail = wcc; - return &result; + async_rpc_send_reply(req, &result); + return; } +#if 0 + /* FIXME: Hack to throttle writes when there is too much dirty data still + * to be written out. */ + while (g_atomic_int_get(&fs->cache_dirty) > 4096 + || g_atomic_int_get(&fs->cache_total) > 8192) { + g_print("Too many dirty pages (%d) or total pages (%d); throttling writes...\n", + g_atomic_int_get(&fs->cache_dirty), + g_atomic_int_get(&fs->cache_total)); + struct timespec delay; + delay.tv_sec = 2; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + } +#endif + + g_mutex_lock(inode->lock); + encode_pre_wcc(&wcc, inode); if (inode->type != BLUESKY_REGULAR) { result.status = NFS3ERR_INVAL; result.write3res_u.resfail = wcc; - return &result; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); + return; } uint64_t lastbyte = argp->offset + argp->count; @@ -349,33 +454,48 @@ nfsproc3_write_3_svc(write3args *argp, struct svc_req *rqstp) argp->data.data_val, argp->count); } + wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, inode); result.write3res_u.resok.file_wcc = wcc; result.write3res_u.resok.count = argp->count; - result.write3res_u.resok.committed = FILE_SYNC; + result.write3res_u.resok.committed = UNSTABLE; + memcpy(result.write3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + if (argp->stable != UNSTABLE) { + bluesky_inode_do_sync(inode); + result.write3res_u.resok.committed = FILE_SYNC; + } + + g_mutex_unlock(inode->lock); - return &result; + async_rpc_send_reply(req, &result); } -diropres3 * -nfsproc3_create_3_svc(create3args *argp, struct svc_req *rqstp) +void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) { - static diropres3 result; + diropres3 result; + memset(&result, 0, sizeof(result)); struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } if (!validate_filename(argp->where.name) @@ -384,24 +504,28 @@ nfsproc3_create_3_svc(create3args *argp, struct svc_req *rqstp) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_REGULAR); file->nlink = 1; file->mode = 0755; int64_t time = bluesky_get_current_time(); - printf("time: %"PRIi64"\n", time); file->mtime = time; file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -409,34 +533,44 @@ nfsproc3_create_3_svc(create3args *argp, struct svc_req *rqstp) encode_fattr3(&result.diropres3_u.resok.obj_attributes.post_op_attr_u.attributes, file); result.diropres3_u.resok.dir_wcc = wcc; - static uint64_t fh_bytes; + uint64_t fh_bytes; fh_bytes = GUINT64_TO_BE(file->inum); result.diropres3_u.resok.obj.present = TRUE; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; - return &result; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); + + async_rpc_send_reply(req, &result); } -diropres3 * -nfsproc3_mkdir_3_svc(mkdir3args *argp, struct svc_req *rqstp) +void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) { - static diropres3 result; + diropres3 result; + memset(&result, 0, sizeof(result)); struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } if (!validate_filename(argp->where.name) @@ -445,9 +579,12 @@ nfsproc3_mkdir_3_svc(mkdir3args *argp, struct svc_req *rqstp) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_DIRECTORY); file->nlink = 1; @@ -457,12 +594,14 @@ nfsproc3_mkdir_3_svc(mkdir3args *argp, struct svc_req *rqstp) file->ctime = time; file->atime = time; file->ntime = time; + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); set_attributes(file, &argp->attributes); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -470,34 +609,42 @@ nfsproc3_mkdir_3_svc(mkdir3args *argp, struct svc_req *rqstp) encode_fattr3(&result.diropres3_u.resok.obj_attributes.post_op_attr_u.attributes, file); result.diropres3_u.resok.dir_wcc = wcc; - static uint64_t fh_bytes; + uint64_t fh_bytes; fh_bytes = GUINT64_TO_BE(file->inum); result.diropres3_u.resok.obj.present = TRUE; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; - return &result; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -diropres3 * -nfsproc3_symlink_3_svc(symlink3args *argp, struct svc_req *rqstp) +void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) { - static diropres3 result; + diropres3 result; + memset(&result, 0, sizeof(result)); struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *dir = lookup_fh(&argp->where.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->where.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.diropres3_u.resfail = wcc; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } if (!validate_filename(argp->where.name) @@ -506,9 +653,12 @@ nfsproc3_symlink_3_svc(symlink3args *argp, struct svc_req *rqstp) { result.status = NFS3ERR_EXIST; result.diropres3_u.resfail = wcc; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_SYMLINK); file->nlink = 1; @@ -519,11 +669,13 @@ nfsproc3_symlink_3_svc(symlink3args *argp, struct svc_req *rqstp) file->atime = time; file->ntime = time; file->symlink_contents = g_strdup(argp->symlink.symlink_data); + g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); - dir->mtime = dir->ctime = bluesky_get_current_time(); - dir->change_count++; + bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -531,39 +683,43 @@ nfsproc3_symlink_3_svc(symlink3args *argp, struct svc_req *rqstp) encode_fattr3(&result.diropres3_u.resok.obj_attributes.post_op_attr_u.attributes, file); result.diropres3_u.resok.dir_wcc = wcc; - static uint64_t fh_bytes; + uint64_t fh_bytes; fh_bytes = GUINT64_TO_BE(file->inum); result.diropres3_u.resok.obj.present = TRUE; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; - return &result; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); + g_mutex_unlock(file->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -diropres3 * -nfsproc3_mknod_3_svc(mknod3args *argp, struct svc_req *rqstp) +void nfsproc3_mknod_3_svc(mknod3args *argp, RPCRequest *req) { - static diropres3 result; + diropres3 result; + memset(&result, 0, sizeof(result)); result.status = NFS3ERR_NOTSUPP; - return &result; + async_rpc_send_reply(req, &result); } -wccstat3 * -nfsproc3_remove_3_svc(diropargs3 *argp, struct svc_req *rqstp) +void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) { - static wccstat3 result; - - result.wccstat3_u.wcc.before.present = FALSE; - result.wccstat3_u.wcc.after.present = FALSE; + wccstat3 result; + memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); + encode_pre_wcc(&result.wccstat3_u.wcc, dir); if (!validate_filename(argp->name) @@ -571,7 +727,9 @@ nfsproc3_remove_3_svc(diropargs3 *argp, struct svc_req *rqstp) || strcmp(argp->name, "..") == 0) { result.status = NFS3ERR_NOENT; - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } /* TODO: Decrement link count, deallocate inode if needed. */ @@ -583,55 +741,168 @@ nfsproc3_remove_3_svc(diropargs3 *argp, struct svc_req *rqstp) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); - return &result; + bluesky_inode_do_sync(dir); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -wccstat3 * -nfsproc3_rmdir_3_svc(diropargs3 *argp, struct svc_req *rqstp) +void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) { - static wccstat3 result; + wccstat3 result; + memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + BlueSkyInode *dir = lookup_fh(req, &argp->dir); + if (dir == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + g_mutex_lock(dir->lock); + + encode_pre_wcc(&result.wccstat3_u.wcc, dir); - return &result; + if (!validate_filename(argp->name) + || strcmp(argp->name, ".") == 0 + || strcmp(argp->name, "..") == 0) + { + result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; + } + + uint64_t inum = bluesky_directory_lookup(dir, argp->name); + BlueSkyInode *inode = bluesky_get_inode(fs, inum); + if (inode == NULL) { + result.status = NFS3ERR_NOENT; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; + } + g_mutex_lock(inode->lock); + schedule_inode_unref(req, inode); + + if (inode->type != BLUESKY_DIRECTORY) { + result.status = NFS3ERR_NOTDIR; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; + } + if (g_sequence_get_length(inode->dirents) > 0) { + printf("Directory not empty: %d entries\n", + g_sequence_get_length(inode->dirents)); + result.status = NFS3ERR_NOTEMPTY; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; + } + + /* TODO: Decrement link count, deallocate inode if needed. */ + + bluesky_directory_remove(dir, argp->name); + + result.status = NFS3_OK; + result.wccstat3_u.wcc.after.present = TRUE; + encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, + dir); + + bluesky_inode_do_sync(dir); + bluesky_inode_do_sync(inode); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -rename3res * -nfsproc3_rename_3_svc(rename3args *argp, struct svc_req *rqstp) +void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) { - static rename3res result; + rename3res result; + memset(&result, 0, sizeof(result)); + wcc_data *wcc1 = &result.rename3res_u.res.fromdir_wcc; + wcc_data *wcc2 = &result.rename3res_u.res.todir_wcc; - result.status = NFS3ERR_NOTSUPP; + BlueSkyInode *dir1 = lookup_fh(req, &argp->from.dir); + if (dir1 == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + BlueSkyInode *dir2 = lookup_fh(req, &argp->to.dir); + if (dir2 == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } - return &result; + if (dir1->inum < dir2->inum) { + g_mutex_lock(dir1->lock); + g_mutex_lock(dir2->lock); + } else if (dir1->inum > dir2->inum) { + g_mutex_lock(dir2->lock); + g_mutex_lock(dir1->lock); + } + encode_pre_wcc(wcc1, dir1); + encode_pre_wcc(wcc2, dir1); + + gboolean status = bluesky_rename(dir1, argp->from.name, + dir2, argp->to.name, + TRUE, TRUE); + + wcc1->after.present = TRUE; + encode_fattr3(&wcc1->after.post_op_attr_u.attributes, dir1); + wcc2->after.present = TRUE; + encode_fattr3(&wcc2->after.post_op_attr_u.attributes, dir2); + if (status) + result.status = NFS3_OK; + else + result.status = NFS3ERR_PERM; + + bluesky_inode_do_sync(dir2); + bluesky_inode_do_sync(dir1); + + g_mutex_unlock(dir1->lock); + if (dir1->inum != dir2->inum) + g_mutex_unlock(dir2->lock); + async_rpc_send_reply(req, &result); } -link3res * -nfsproc3_link_3_svc(link3args *argp, struct svc_req *rqstp) +void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) { - static link3res result; + link3res result; + memset(&result, 0, sizeof(result)); struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); - BlueSkyInode *inode = lookup_fh(&argp->file); + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; result.link3res_u.res.linkdir_wcc = wcc; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(inode->lock); - BlueSkyInode *dir = lookup_fh(&argp->link.dir); + BlueSkyInode *dir = lookup_fh(req, &argp->link.dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.link3res_u.res.linkdir_wcc = wcc; - return &result; + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); encode_pre_wcc(&wcc, dir); if (dir->type != BLUESKY_DIRECTORY) { result.status = NFS3ERR_NOTDIR; result.link3res_u.res.linkdir_wcc = wcc; - return &result; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } if (!validate_filename(argp->link.name) @@ -641,16 +912,23 @@ nfsproc3_link_3_svc(link3args *argp, struct svc_req *rqstp) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; - return &result; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } if (!bluesky_directory_insert(dir, argp->link.name, inode->inum)) { result.status = NFS3ERR_EXIST; result.link3res_u.res.linkdir_wcc = wcc; - return &result; + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); + return; } inode->nlink++; - bluesky_inode_update_ctime(inode, 0); + bluesky_inode_update_ctime(inode, FALSE); + bluesky_inode_update_ctime(dir, TRUE); result.status = NFS3_OK; wcc.after.present = TRUE; @@ -659,23 +937,30 @@ nfsproc3_link_3_svc(link3args *argp, struct svc_req *rqstp) encode_fattr3(&result.link3res_u.res.file_attributes.post_op_attr_u.attributes, inode); result.link3res_u.res.linkdir_wcc = wcc; - return &result; + bluesky_inode_do_sync(inode); + bluesky_inode_do_sync(dir); + g_mutex_unlock(inode->lock); + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } gint bluesky_dirent_compare(gconstpointer a, gconstpointer b, gpointer unused); -readdir3res * -nfsproc3_readdir_3_svc(readdir3args *argp, struct svc_req *rqstp) +#define MAX_READDIR_DIRENTS 64 +void nfsproc3_readdir_3_svc(readdir3args *argp, RPCRequest *req) { - static readdir3res result; + readdir3res result; + memset(&result, 0, sizeof(result)); - BlueSkyInode *dir = lookup_fh(&argp->dir); + BlueSkyInode *dir = lookup_fh(req, &argp->dir); if (dir == NULL) { result.status = NFS3ERR_STALE; result.readdir3res_u.resfail.present = FALSE; - return &result; + async_rpc_send_reply(req, &result); + return; } + g_mutex_lock(dir->lock); result.status = NFS3_OK; result.readdir3res_u.resok.dir_attributes.present = TRUE; @@ -683,8 +968,7 @@ nfsproc3_readdir_3_svc(readdir3args *argp, struct svc_req *rqstp) memset(result.readdir3res_u.resok.cookieverf, 0, sizeof(result.readdir3res_u.resok.cookieverf)); -#define MAX_READDIR_DIRENTS 4 - static entry3 dirents[MAX_READDIR_DIRENTS]; + entry3 dirents[MAX_READDIR_DIRENTS]; int count = 0; BlueSkyDirent start = {NULL, NULL, argp->cookie, 0}; @@ -709,60 +993,174 @@ nfsproc3_readdir_3_svc(readdir3args *argp, struct svc_req *rqstp) result.readdir3res_u.resok.reply.entries = NULL; result.readdir3res_u.resok.reply.eof = g_sequence_iter_is_end(i); - return &result; + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -readdirplus3res * -nfsproc3_readdirplus_3_svc(readdirplus3args *argp, struct svc_req *rqstp) +void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) { - static readdirplus3res result; + /* XDR-encoded sizes: + * post_op_attr: 88 bytes + * base readdirplus3resok: 88 + 16 bytes + * base directory entry: 24 bytes + filename + * attributes/fh3: 88 + 8 + filehandle size + */ + size_t dircount = 88 + 16, attrcount = 0; + readdirplus3res result; + memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + BlueSkyInode *dir = lookup_fh(req, &argp->dir); + if (dir == NULL) { + result.status = NFS3ERR_STALE; + result.readdirplus3res_u.resfail.present = FALSE; + async_rpc_send_reply(req, &result); + return; + } + g_mutex_lock(dir->lock); + + result.status = NFS3_OK; + result.readdirplus3res_u.resok.dir_attributes.present = TRUE; + encode_fattr3(&result.readdirplus3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); + memset(result.readdirplus3res_u.resok.cookieverf, 0, + sizeof(result.readdirplus3res_u.resok.cookieverf)); + + entryplus3 dirents[MAX_READDIR_DIRENTS]; + uint64_t fh_bytes[MAX_READDIR_DIRENTS]; + int count = 0; + + GSequenceIter *i; + BlueSkyDirent start = {NULL, NULL, argp->cookie, 0}; - return &result; + /* Perform a prefetch pass on inodes: for all the inodes we think we will + * return information about, try to load each one but don't wait. This + * should let multiple inodes be fetched in parallel, instead of + * sequentially in the loop that follows. */ + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + while (count < MAX_READDIR_DIRENTS + && !g_sequence_iter_is_end(i) + && dircount <= argp->dircount + && dircount + attrcount <= argp->maxcount) + { + BlueSkyDirent *d = g_sequence_get(i); + bluesky_inode_prefetch(fs, d->inum); + dircount += 24 + ((strlen(d->name) + 3) & ~3); + attrcount += 88 + 8 + 8; + i = g_sequence_iter_next(i); + } + + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + count = 0; + dircount = 88 + 16; + attrcount = 0; + while (count < MAX_READDIR_DIRENTS && !g_sequence_iter_is_end(i)) { + BlueSkyDirent *d = g_sequence_get(i); + BlueSkyInode *inode = bluesky_get_inode(fs, d->inum); + if (inode != NULL) { + g_mutex_lock(inode->lock); + dircount += 24 + ((strlen(d->name) + 3) & ~3); + attrcount += 88 + 8 + 8; + if (dircount > argp->dircount + || dircount + attrcount > argp->maxcount) + { + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); + break; + } + dirents[count].fileid = d->inum; + dirents[count].name = d->name; + dirents[count].cookie = d->cookie; + dirents[count].nextentry = NULL; + dirents[count].name_attributes.present = TRUE; + encode_fattr3(&dirents[count].name_attributes.post_op_attr_u.attributes, inode); + fh_bytes[count] = GUINT64_TO_BE(d->inum); + dirents[count].name_handle.present = TRUE; + dirents[count].name_handle.post_op_fh3_u.handle.data.data_len = 8; + dirents[count].name_handle.post_op_fh3_u.handle.data.data_val + = (char *)&fh_bytes[count]; + if (count > 0) + dirents[count - 1].nextentry = &dirents[count]; + count++; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); + } + i = g_sequence_iter_next(i); + } + + if (count > 0) + result.readdirplus3res_u.resok.reply.entries = &dirents[0]; + else + result.readdirplus3res_u.resok.reply.entries = NULL; + result.readdirplus3res_u.resok.reply.eof = g_sequence_iter_is_end(i); + + g_mutex_unlock(dir->lock); + async_rpc_send_reply(req, &result); } -fsstat3res * -nfsproc3_fsstat_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) +void nfsproc3_fsstat_3_svc(nfs_fh3 *argp, RPCRequest *req) { - static fsstat3res result; + fsstat3res result; + memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + BlueSkyInode *inode = lookup_fh(req, argp); + if (inode == NULL) { + result.status = NFS3ERR_STALE; + result.fsstat3res_u.resfail.present = FALSE; + async_rpc_send_reply(req, &result); + return; + } + g_mutex_lock(inode->lock); - return &result; + result.status = NFS3_OK; + result.fsstat3res_u.resok.obj_attributes.present = TRUE; + encode_fattr3(&result.fsstat3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); + + result.fsstat3res_u.resok.tbytes = (1 << 30); + result.fsstat3res_u.resok.fbytes = (1 << 30); + result.fsstat3res_u.resok.abytes = (1 << 30); + result.fsstat3res_u.resok.tfiles = 0; + result.fsstat3res_u.resok.ffiles = 0; + result.fsstat3res_u.resok.afiles = 0; + result.fsstat3res_u.resok.invarsec = 0; + + g_mutex_unlock(inode->lock); + async_rpc_send_reply(req, &result); } -fsinfo3res * -nfsproc3_fsinfo_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) +void nfsproc3_fsinfo_3_svc(nfs_fh3 *argp, RPCRequest *req) { - static fsinfo3res result; + fsinfo3res result; + memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.fsinfo3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.fsinfo3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); - result.fsinfo3res_u.resok.rtmax = 32768; - result.fsinfo3res_u.resok.rtpref = 32768; - result.fsinfo3res_u.resok.rtmult = 4096; - result.fsinfo3res_u.resok.wtmax = 32768; - result.fsinfo3res_u.resok.wtpref = 32768; - result.fsinfo3res_u.resok.wtmult = 4096; - result.fsinfo3res_u.resok.dtpref = 4096; + result.fsinfo3res_u.resok.rtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.wtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.dtpref = NFS_BLOCKSIZE; result.fsinfo3res_u.resok.maxfilesize = 0x7fffffffffffffffULL; result.fsinfo3res_u.resok.time_delta.seconds = 0; result.fsinfo3res_u.resok.time_delta.nseconds = 1000; result.fsinfo3res_u.resok.properties = FSF3_LINK | FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME; - return &result; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); + async_rpc_send_reply(req, &result); } -pathconf3res * -nfsproc3_pathconf_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) +void nfsproc3_pathconf_3_svc(nfs_fh3 *argp, RPCRequest *req) { - static pathconf3res result; + pathconf3res result; + memset(&result, 0, sizeof(result)); BlueSkyInode *inode = bluesky_get_inode(fs, 1); + g_mutex_lock(inode->lock); result.status = NFS3_OK; result.pathconf3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.pathconf3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -773,15 +1171,36 @@ nfsproc3_pathconf_3_svc(nfs_fh3 *argp, struct svc_req *rqstp) result.pathconf3res_u.resok.case_insensitive = FALSE; result.pathconf3res_u.resok.case_preserving = TRUE; - return &result; + g_mutex_unlock(inode->lock); + bluesky_inode_unref(inode); + async_rpc_send_reply(req, &result); } -commit3res * -nfsproc3_commit_3_svc(commit3args *argp, struct svc_req *rqstp) +void nfsproc3_commit_3_svc(commit3args *argp, RPCRequest *req) { - static commit3res result; + commit3res result; + memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + result.status = NFS3_OK; + + BlueSkyInode *inode = lookup_fh(req, &argp->file); + if (inode == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + g_mutex_lock(inode->lock); + encode_pre_wcc(&result.commit3res_u.resok.file_wcc, inode); + + bluesky_inode_do_sync(inode); + + result.commit3res_u.resok.file_wcc.after.present = TRUE; + encode_fattr3(&result.commit3res_u.resok.file_wcc.after.post_op_attr_u.attributes, inode); + memcpy(result.commit3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + g_mutex_unlock(inode->lock); - return &result; + async_rpc_send_reply(req, &result); }