X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=nfs3%2Fnfs3.c;h=80fb432ab0771e6214da3374daec4bcbd226c65b;hb=HEAD;hp=72ec9cb7a26173e6a0732a428a901d0c0af99597;hpb=50c08ba526a6638e8e3c4eec0503365a2c110a85;p=bluesky.git diff --git a/nfs3/nfs3.c b/nfs3/nfs3.c index 72ec9cb..80fb432 100644 --- a/nfs3/nfs3.c +++ b/nfs3/nfs3.c @@ -1,3 +1,33 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* * This is sample code generated by rpcgen. * These are only templates and you can use them @@ -9,6 +39,9 @@ extern BlueSkyFS *fs; +#define NFS_BLOCKSIZE 32768 +#define NFS_MAXSIZE (1 << 20) + /* Check that a string is a valid file name. We require that it be valid * UTF-8, that it not be empty, and that it not contain embedded forward * slashes. Also checks that the length of the string is not more than the @@ -205,6 +238,8 @@ void nfsproc3_setattr_3_svc(setattr3args *argp, RPCRequest *req) inode); result.status = NFS3_OK; + bluesky_inode_do_sync(inode); + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); } @@ -243,10 +278,14 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } + + result.lookup3res_u.resok.dir_attributes.present = TRUE; + encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); + g_mutex_unlock(dir->lock); + BlueSkyInode *inode = bluesky_get_inode(fs, inum); if (inode == NULL) { result.status = NFS3ERR_NOENT; - g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); return; } @@ -254,8 +293,6 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) schedule_inode_unref(req, inode); result.status = NFS3_OK; - result.lookup3res_u.resok.dir_attributes.present = TRUE; - encode_fattr3(&result.lookup3res_u.resok.dir_attributes.post_op_attr_u.attributes, dir); result.lookup3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.lookup3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); @@ -265,7 +302,6 @@ void nfsproc3_lookup_3_svc(diropargs3 *argp, RPCRequest *req) result.lookup3res_u.resok.object.data.data_val = (char *)&fh_bytes; g_mutex_unlock(inode->lock); - g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -322,7 +358,9 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) { read3res result; memset(&result, 0, sizeof(result)); - char buf[32768]; + char buf[NFS_MAXSIZE]; + + bluesky_flushd_invoke_conditional(fs); BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { @@ -339,6 +377,7 @@ void nfsproc3_read_3_svc(read3args *argp, RPCRequest *req) count = 0; result.read3res_u.resok.eof = TRUE; } else { + count = MIN(count, NFS_MAXSIZE); count = MIN(count, inode->size - argp->offset); if (argp->offset + count == inode->size) result.read3res_u.resok.eof = TRUE; @@ -367,6 +406,8 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) struct wcc_data wcc; memset(&wcc, 0, sizeof(wcc)); + bluesky_flushd_invoke_conditional(fs); + BlueSkyInode *inode = lookup_fh(req, &argp->file); if (inode == NULL) { result.status = NFS3ERR_STALE; @@ -375,6 +416,21 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) return; } +#if 0 + /* FIXME: Hack to throttle writes when there is too much dirty data still + * to be written out. */ + while (g_atomic_int_get(&fs->cache_dirty) > 4096 + || g_atomic_int_get(&fs->cache_total) > 8192) { + g_print("Too many dirty pages (%d) or total pages (%d); throttling writes...\n", + g_atomic_int_get(&fs->cache_dirty), + g_atomic_int_get(&fs->cache_total)); + struct timespec delay; + delay.tv_sec = 2; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + } +#endif + g_mutex_lock(inode->lock); encode_pre_wcc(&wcc, inode); @@ -402,7 +458,14 @@ void nfsproc3_write_3_svc(write3args *argp, RPCRequest *req) encode_fattr3(&wcc.after.post_op_attr_u.attributes, inode); result.write3res_u.resok.file_wcc = wcc; result.write3res_u.resok.count = argp->count; - result.write3res_u.resok.committed = FILE_SYNC; + result.write3res_u.resok.committed = UNSTABLE; + memcpy(result.write3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + if (argp->stable != UNSTABLE) { + bluesky_inode_do_sync(inode); + result.write3res_u.resok.committed = FILE_SYNC; + } g_mutex_unlock(inode->lock); @@ -446,21 +509,23 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_REGULAR); file->nlink = 1; file->mode = 0755; int64_t time = bluesky_get_current_time(); - printf("time: %"PRIi64"\n", time); file->mtime = time; file->ctime = time; file->atime = time; file->ntime = time; g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -474,6 +539,8 @@ void nfsproc3_create_3_svc(create3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); g_mutex_unlock(file->lock); g_mutex_unlock(dir->lock); @@ -517,6 +584,7 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_DIRECTORY); file->nlink = 1; @@ -528,10 +596,12 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) file->ntime = time; g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); set_attributes(file, &argp->attributes); bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -545,6 +615,8 @@ void nfsproc3_mkdir_3_svc(mkdir3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); g_mutex_unlock(file->lock); g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); @@ -586,6 +658,7 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) return; } + g_mutex_lock(fs->lock); BlueSkyInode *file; file = bluesky_new_inode(bluesky_fs_alloc_inode(fs), fs, BLUESKY_SYMLINK); file->nlink = 1; @@ -598,9 +671,11 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) file->symlink_contents = g_strdup(argp->symlink.symlink_data); g_mutex_lock(file->lock); bluesky_insert_inode(fs, file); + g_mutex_unlock(fs->lock); bluesky_directory_insert(dir, argp->where.name, file->inum); bluesky_inode_update_ctime(dir, TRUE); + bluesky_inode_update_ctime(file, FALSE); wcc.after.present = TRUE; encode_fattr3(&wcc.after.post_op_attr_u.attributes, dir); @@ -614,6 +689,8 @@ void nfsproc3_symlink_3_svc(symlink3args *argp, RPCRequest *req) result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_len = 8; result.diropres3_u.resok.obj.post_op_fh3_u.handle.data.data_val = (char *)&fh_bytes; + bluesky_inode_do_sync(file); + bluesky_inode_do_sync(dir); g_mutex_unlock(file->lock); g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); @@ -664,6 +741,7 @@ void nfsproc3_remove_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); } @@ -731,6 +809,8 @@ void nfsproc3_rmdir_3_svc(diropargs3 *argp, RPCRequest *req) encode_fattr3(&result.wccstat3_u.wcc.after.post_op_attr_u.attributes, dir); + bluesky_inode_do_sync(dir); + bluesky_inode_do_sync(inode); g_mutex_unlock(inode->lock); g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); @@ -749,17 +829,22 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) async_rpc_send_reply(req, &result); return; } - g_mutex_lock(dir1->lock); - encode_pre_wcc(wcc1, dir1); BlueSkyInode *dir2 = lookup_fh(req, &argp->to.dir); if (dir2 == NULL) { result.status = NFS3ERR_STALE; - g_mutex_unlock(dir1->lock); async_rpc_send_reply(req, &result); return; } - g_mutex_lock(dir2->lock); + + if (dir1->inum < dir2->inum) { + g_mutex_lock(dir1->lock); + g_mutex_lock(dir2->lock); + } else if (dir1->inum > dir2->inum) { + g_mutex_lock(dir2->lock); + g_mutex_lock(dir1->lock); + } + encode_pre_wcc(wcc1, dir1); encode_pre_wcc(wcc2, dir1); gboolean status = bluesky_rename(dir1, argp->from.name, @@ -775,8 +860,12 @@ void nfsproc3_rename_3_svc(rename3args *argp, RPCRequest *req) else result.status = NFS3ERR_PERM; - g_mutex_unlock(dir2->lock); + bluesky_inode_do_sync(dir2); + bluesky_inode_do_sync(dir1); + g_mutex_unlock(dir1->lock); + if (dir1->inum != dir2->inum) + g_mutex_unlock(dir2->lock); async_rpc_send_reply(req, &result); } @@ -848,6 +937,8 @@ void nfsproc3_link_3_svc(link3args *argp, RPCRequest *req) encode_fattr3(&result.link3res_u.res.file_attributes.post_op_attr_u.attributes, inode); result.link3res_u.res.linkdir_wcc = wcc; + bluesky_inode_do_sync(inode); + bluesky_inode_do_sync(dir); g_mutex_unlock(inode->lock); g_mutex_unlock(dir->lock); async_rpc_send_reply(req, &result); @@ -937,17 +1028,35 @@ void nfsproc3_readdirplus_3_svc(readdirplus3args *argp, RPCRequest *req) uint64_t fh_bytes[MAX_READDIR_DIRENTS]; int count = 0; - /* TODO: Handle dircount, maxcount arguments from client. */ - + GSequenceIter *i; BlueSkyDirent start = {NULL, NULL, argp->cookie, 0}; - GSequenceIter *i = g_sequence_search(dir->dirents, &start, - bluesky_dirent_compare, NULL); + /* Perform a prefetch pass on inodes: for all the inodes we think we will + * return information about, try to load each one but don't wait. This + * should let multiple inodes be fetched in parallel, instead of + * sequentially in the loop that follows. */ + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + while (count < MAX_READDIR_DIRENTS + && !g_sequence_iter_is_end(i) + && dircount <= argp->dircount + && dircount + attrcount <= argp->maxcount) + { + BlueSkyDirent *d = g_sequence_get(i); + bluesky_inode_prefetch(fs, d->inum); + dircount += 24 + ((strlen(d->name) + 3) & ~3); + attrcount += 88 + 8 + 8; + i = g_sequence_iter_next(i); + } + + i = g_sequence_search(dir->dirents, &start, bluesky_dirent_compare, NULL); + count = 0; + dircount = 88 + 16; + attrcount = 0; while (count < MAX_READDIR_DIRENTS && !g_sequence_iter_is_end(i)) { BlueSkyDirent *d = g_sequence_get(i); BlueSkyInode *inode = bluesky_get_inode(fs, d->inum); - g_mutex_lock(inode->lock); if (inode != NULL) { + g_mutex_lock(inode->lock); dircount += 24 + ((strlen(d->name) + 3) & ~3); attrcount += 88 + 8 + 8; if (dircount > argp->dircount @@ -1027,13 +1136,13 @@ void nfsproc3_fsinfo_3_svc(nfs_fh3 *argp, RPCRequest *req) result.status = NFS3_OK; result.fsinfo3res_u.resok.obj_attributes.present = TRUE; encode_fattr3(&result.fsinfo3res_u.resok.obj_attributes.post_op_attr_u.attributes, inode); - result.fsinfo3res_u.resok.rtmax = 32768; - result.fsinfo3res_u.resok.rtpref = 32768; - result.fsinfo3res_u.resok.rtmult = 4096; - result.fsinfo3res_u.resok.wtmax = 32768; - result.fsinfo3res_u.resok.wtpref = 32768; - result.fsinfo3res_u.resok.wtmult = 4096; - result.fsinfo3res_u.resok.dtpref = 4096; + result.fsinfo3res_u.resok.rtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.rtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.wtmax = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtpref = NFS_MAXSIZE; + result.fsinfo3res_u.resok.wtmult = NFS_BLOCKSIZE; + result.fsinfo3res_u.resok.dtpref = NFS_BLOCKSIZE; result.fsinfo3res_u.resok.maxfilesize = 0x7fffffffffffffffULL; result.fsinfo3res_u.resok.time_delta.seconds = 0; result.fsinfo3res_u.resok.time_delta.nseconds = 1000; @@ -1072,7 +1181,26 @@ void nfsproc3_commit_3_svc(commit3args *argp, RPCRequest *req) commit3res result; memset(&result, 0, sizeof(result)); - result.status = NFS3ERR_NOTSUPP; + result.status = NFS3_OK; + + BlueSkyInode *inode = lookup_fh(req, &argp->file); + if (inode == NULL) { + result.status = NFS3ERR_STALE; + async_rpc_send_reply(req, &result); + return; + } + + g_mutex_lock(inode->lock); + encode_pre_wcc(&result.commit3res_u.resok.file_wcc, inode); + + bluesky_inode_do_sync(inode); + + result.commit3res_u.resok.file_wcc.after.present = TRUE; + encode_fattr3(&result.commit3res_u.resok.file_wcc.after.post_op_attr_u.attributes, inode); + memcpy(result.commit3res_u.resok.verf, + nfsd_instance_verf_cookie, NFS3_WRITEVERFSIZE); + + g_mutex_unlock(inode->lock); async_rpc_send_reply(req, &result); }