X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=nfs3%2Frpc.c;h=3c6da98764de453236a79734889a5e889852f32d;hb=8ff0fd08d6e1cc97cdb7e94b7cd97dc28c29e674;hp=7b73840c62349c046ab77d67cee02f65651353c4;hpb=3c5c5e9b5a8650e0549712e20e007ba2399656bf;p=bluesky.git diff --git a/nfs3/rpc.c b/nfs3/rpc.c index 7b73840..3c6da98 100644 --- a/nfs3/rpc.c +++ b/nfs3/rpc.c @@ -3,7 +3,29 @@ * Copyright (C) 2009 The Regents of the University of California * Written by Michael Vrable * - * TODO: Licensing + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* RPC handling: registration, marshalling and unmarshalling of messages. For @@ -18,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +49,9 @@ #include "bluesky.h" extern BlueSkyFS *fs; +static int outstanding_rpcs = 0; +static struct bluesky_stats *rpc_recv_stats, *rpc_send_stats; + /* TCP port number to use for NFS protocol. (Should be 2049.) */ #define NFS_SERVICE_PORT 2051 @@ -111,6 +137,7 @@ struct rpc_reply { static void async_rpc_write(RPCConnection *rpc, const char *buf, gsize len); +static void async_rpc_flush(RPCConnection *rpc); struct rpc_fail_reply { uint32_t xid; @@ -121,73 +148,13 @@ struct rpc_fail_reply { uint32_t accept_stat; }; -/* Routines for XDR-encoding to a growable string. */ -static bool_t xdr_string_putlong(XDR *xdrs, const long *lp) -{ - GString *str = (GString *)xdrs->x_private; - uint32_t data = htonl(*lp); - g_string_set_size(str, str->len + 4); - memcpy(str->str + str->len - 4, &data, 4); - return TRUE; -} - -static bool_t xdr_string_putbytes(XDR *xdrs, const char *addr, u_int len) -{ - GString *str = (GString *)xdrs->x_private; - g_string_set_size(str, str->len + len); - memcpy(str->str + str->len - len, addr, len); - return TRUE; -} - -static u_int xdr_string_getpos(const XDR *xdrs) -{ - GString *str = (GString *)xdrs->x_private; - return str->len; -} - -static bool_t xdr_string_putint32(XDR *xdrs, const int32_t *ip) -{ - GString *str = (GString *)xdrs->x_private; - uint32_t data = htonl(*ip); - g_string_set_size(str, str->len + 4); - memcpy(str->str + str->len - 4, &data, 4); - return TRUE; -} - -static int32_t *xdr_string_inline(XDR *xdrs, u_int len) -{ - GString *str = (GString *)xdrs->x_private; - g_string_set_size(str, str->len + len); - return (int32_t *)(str->str + str->len - len); -} - -static void xdr_string_destroy(XDR *xdrs) -{ -} - -static struct xdr_ops xdr_string_ops = { - .x_putlong = xdr_string_putlong, - .x_putbytes = xdr_string_putbytes, - .x_getpostn = xdr_string_getpos, - .x_putint32 = xdr_string_putint32, - .x_inline = xdr_string_inline, - .x_destroy = xdr_string_destroy, -}; - -static void xdr_string_create(XDR *xdrs, GString *string, enum xdr_op op) -{ - xdrs->x_op = op; - xdrs->x_ops = &xdr_string_ops; - xdrs->x_private = (char *)string; - xdrs->x_base = NULL; - xdrs->x_handy = 0; -} - static void async_rpc_send_failure(RPCRequest *req, enum accept_stat stat) { struct rpc_fail_reply header; + g_atomic_int_add(&outstanding_rpcs, -1); + header.xid = htonl(req->xid); header.type = htonl(1); /* REPLY */ header.stat = htonl(MSG_ACCEPTED); @@ -195,10 +162,16 @@ async_rpc_send_failure(RPCRequest *req, enum accept_stat stat) header.verf_len = 0; header.accept_stat = htonl(stat); + g_mutex_lock(req->connection->send_lock); uint32_t fragment = htonl(sizeof(header) | 0x80000000); - async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment)); + if (!req->connection->udp_transport) + async_rpc_write(req->connection, (const char *)&fragment, + sizeof(fragment)); async_rpc_write(req->connection, (const char *)&header, sizeof(header)); - g_io_channel_flush(req->connection->channel, NULL); + async_rpc_flush(req->connection); + g_mutex_unlock(req->connection->send_lock); + + bluesky_profile_free(req->profile); if (req->args != NULL) { char buf[4]; @@ -207,6 +180,7 @@ async_rpc_send_failure(RPCRequest *req, enum accept_stat stat) if (!req->xdr_args_free(&xdr, req->args)) { fprintf(stderr, "unable to free arguments"); } + g_free(req->args); } if (req->raw_args != NULL) @@ -219,6 +193,15 @@ async_rpc_send_failure(RPCRequest *req, enum accept_stat stat) g_free(c); } + if (req->connection->udp_transport) { + /* For UDP, a connection only exists for the duration of a single + * message. */ + g_mutex_free(req->connection->send_lock); + g_string_free(req->connection->msgbuf, TRUE); + g_string_free(req->connection->sendbuf, TRUE); + g_free(req->connection); + } + g_free(req); } @@ -227,6 +210,9 @@ async_rpc_send_reply(RPCRequest *req, void *result) { bluesky_time_hires time_end; + bluesky_profile_add_event(req->profile, + g_strdup("Start encoding NFS response")); + GString *str = g_string_new(""); XDR xdr_out; xdr_string_create(&xdr_out, str, XDR_ENCODE); @@ -236,6 +222,9 @@ async_rpc_send_reply(RPCRequest *req, void *result) return; } + g_atomic_int_add(&outstanding_rpcs, -1); + bluesky_stats_add(rpc_send_stats, str->len); + struct rpc_reply header; header.xid = htonl(req->xid); header.type = htonl(1); /* REPLY */ @@ -244,19 +233,30 @@ async_rpc_send_reply(RPCRequest *req, void *result) header.verf_len = 0; header.accept_stat = 0; + g_mutex_lock(req->connection->send_lock); gsize msg_size = str->len; uint32_t fragment = htonl((msg_size + sizeof(header)) | 0x80000000); - async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment)); + if (!req->connection->udp_transport) + async_rpc_write(req->connection, (const char *)&fragment, + sizeof(fragment)); async_rpc_write(req->connection, (const char *)&header, sizeof(header)); async_rpc_write(req->connection, str->str, str->len); - g_io_channel_flush(req->connection->channel, NULL); + async_rpc_flush(req->connection); + g_mutex_unlock(req->connection->send_lock); time_end = bluesky_now_hires(); +#if 0 printf("RPC[%"PRIx32"]: time = %"PRId64" ns\n", req->xid, time_end - req->time_start); +#endif + + bluesky_profile_add_event(req->profile, + g_strdup("NFS reply sent")); + bluesky_profile_print(req->profile); /* Clean up. */ + bluesky_profile_free(req->profile); g_string_free(str, TRUE); if (req->args != NULL) { @@ -266,6 +266,7 @@ async_rpc_send_reply(RPCRequest *req, void *result) if (!req->xdr_args_free(&xdr, req->args)) { fprintf(stderr, "unable to free arguments"); } + g_free(req->args); } if (req->raw_args != NULL) @@ -278,9 +279,43 @@ async_rpc_send_reply(RPCRequest *req, void *result) g_free(c); } + if (req->connection->udp_transport) { + /* For UDP, a connection only exists for the duration of a single + * message. */ + g_mutex_free(req->connection->send_lock); + g_string_free(req->connection->msgbuf, TRUE); + g_string_free(req->connection->sendbuf, TRUE); + g_free(req->connection); + } + g_free(req); } +static const char *nfs_proc_names[] = { + [NFSPROC3_NULL] = "NULL", + [NFSPROC3_GETATTR] = "GETATTR", + [NFSPROC3_SETATTR] = "SETATTR", + [NFSPROC3_LOOKUP] = "LOOKUP", + [NFSPROC3_ACCESS] = "ACCESS", + [NFSPROC3_READLINK] = "READLINK", + [NFSPROC3_READ] = "READ", + [NFSPROC3_WRITE] = "WRITE", + [NFSPROC3_CREATE] = "CREATE", + [NFSPROC3_MKDIR] = "MKDIR", + [NFSPROC3_SYMLINK] = "SYMLINK", + [NFSPROC3_MKNOD] = "MKNOD", + [NFSPROC3_REMOVE] = "REMOVE", + [NFSPROC3_RMDIR] = "RMDIR", + [NFSPROC3_RENAME] = "RENAME", + [NFSPROC3_LINK] = "LINK", + [NFSPROC3_READDIR] = "READDIR", + [NFSPROC3_READDIRPLUS] = "READDIRPLUS", + [NFSPROC3_FSSTAT] = "FSSTAT", + [NFSPROC3_FSINFO] = "FSINFO", + [NFSPROC3_PATHCONF] = "PATHCONF", + [NFSPROC3_COMMIT] = "COMMIT", +}; + static void nfs_program_3(RPCRequest *req) { @@ -316,7 +351,15 @@ nfs_program_3(RPCRequest *req) xdrproc_t _xdr_argument, _xdr_result; char *(*local)(char *, RPCRequest *); - printf("Dispatched NFS RPC message type %d\n", req->req_proc); + bluesky_profile_set(req->profile); + + if (req->req_proc < sizeof(nfs_proc_names) / sizeof(const char *)) { + bluesky_profile_add_event( + req->profile, + g_strdup_printf("Dispatching NFS %s request", + nfs_proc_names[req->req_proc]) + ); + } switch (req->req_proc) { case NFSPROC3_NULL: @@ -471,9 +514,6 @@ nfs_program_3(RPCRequest *req) req->xdr_result = _xdr_result; result = (*local)((char *)req->args, req); - bluesky_flushd_invoke(fs); - bluesky_debug_dump(fs); - return; } @@ -483,10 +523,63 @@ nfs_program_3(RPCRequest *req) static GMainContext *main_context; static GMainLoop *main_loop; +static GThreadPool *rpc_thread_pool; + +static volatile int fs_dump_requested = 0; + +static void sig_handler(int sig) +{ + if (sig == SIGUSR1) { + fs_dump_requested = 1; + } +} + +static gboolean async_flushd(gpointer data) +{ +#if 0 + int rpc_count = g_atomic_int_get(&outstanding_rpcs); + if (rpc_count != 0) { + g_print("Currently outstanding RPC requests: %d\n", rpc_count); + } +#endif + + if (fs_dump_requested) { + bluesky_debug_dump(fs); + bluesky_stats_dump_all(); + fs_dump_requested = 0; + } + + bluesky_flushd_invoke(fs); + return TRUE; +} + +static void async_rpc_task(gpointer data, gpointer user_data) +{ + nfs_program_3((RPCRequest *)data); +} + static async_rpc_init() { main_context = g_main_context_new(); main_loop = g_main_loop_new(main_context, FALSE); + + rpc_thread_pool = g_thread_pool_new(async_rpc_task, NULL, + bluesky_max_threads, FALSE, NULL); + + /* Arrange to have the cache writeback code run every five seconds. */ + GSource *source = g_timeout_source_new_seconds(5); + g_source_set_callback(source, async_flushd, NULL, NULL); + g_source_attach(source, main_context); + g_source_unref(source); + + /* Signal USR1 is used to request a debugging dump of filesyste info */ + struct sigaction sa; + sa.sa_handler = sig_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + perror("sigaction"); + } } struct rpc_call_header { @@ -513,6 +606,8 @@ static gboolean async_rpc_dispatch(RPCConnection *rpc) GString *msg = rpc->msgbuf; const char *buf = msg->str; + bluesky_stats_add(rpc_recv_stats, msg->len); + if (msg->len < sizeof(struct rpc_call_header)) { fprintf(stderr, "Short RPC message: only %zd bytes!\n", msg->len); return FALSE; @@ -530,9 +625,12 @@ static gboolean async_rpc_dispatch(RPCConnection *rpc) return FALSE; } + g_atomic_int_add(&outstanding_rpcs, 1); + RPCRequest *req = g_new0(RPCRequest, 1); req->connection = rpc; - req->time_start = time_start; + req->profile = bluesky_profile_new(); + bluesky_profile_add_event(req->profile, g_strdup("Receive NFS request")); req->xid = xid; if (ntohl(header->prog) != NFS_PROGRAM) { @@ -550,25 +648,35 @@ static gboolean async_rpc_dispatch(RPCConnection *rpc) buf += sizeof(struct rpc_call_header); for (i = 0; i < 2; i++) { struct rpc_auth *auth = (struct rpc_auth *)buf; - if (buf - msg->str + sizeof(struct rpc_auth) > msg->len) + if (buf - msg->str + sizeof(struct rpc_auth) > msg->len) { + g_atomic_int_add(&outstanding_rpcs, -1); return FALSE; + } gsize authsize = ntohl(auth->len) + sizeof(struct rpc_auth); - if (authsize > MAX_RPC_MSGSIZE) + if (authsize > MAX_RPC_MSGSIZE) { + g_atomic_int_add(&outstanding_rpcs, -1); return FALSE; + } buf += authsize; } - if (buf - msg->str > msg->len) + if (buf - msg->str > msg->len) { + g_atomic_int_add(&outstanding_rpcs, -1); return FALSE; + } req->raw_args = msg; req->raw_args_header_bytes = buf - msg->str; req->req_proc = ntohl(header->proc); rpc->msgbuf = g_string_new(""); - nfs_program_3(req); + if (bluesky_options.sync_frontends) { + nfs_program_3(req); + } else { + g_thread_pool_push(rpc_thread_pool, req, NULL); + } return TRUE; } @@ -577,6 +685,12 @@ static gboolean async_rpc_dispatch(RPCConnection *rpc) static void async_rpc_write(RPCConnection *rpc, const char *buf, gsize len) { + if (rpc->udp_transport) { + g_string_append_len(rpc->sendbuf, buf, len); + return; + } + + /* Normal TCP path */ while (len > 0) { gsize written = 0; switch (g_io_channel_write_chars(rpc->channel, buf, len, @@ -596,6 +710,19 @@ static void async_rpc_write(RPCConnection *rpc, // g_io_channel_flush(rpc->channel, NULL); } +/* Flush a completed message out to the RPC socket */ +static void async_rpc_flush(RPCConnection *rpc) +{ + if (rpc->udp_transport) { + sendto(g_io_channel_unix_get_fd(rpc->channel), + rpc->sendbuf->str, rpc->sendbuf->len, 0, + (struct sockaddr *)&rpc->peer, sizeof(struct sockaddr_in)); + return; + } else { + g_io_channel_flush(rpc->channel, NULL); + } +} + static gboolean async_rpc_do_read(GIOChannel *channel, GIOCondition condition, gpointer data) @@ -640,6 +767,7 @@ static gboolean async_rpc_do_read(GIOChannel *channel, fprintf(stderr, "Unexpected error or end of file on RPC stream %d!\n", g_io_channel_unix_get_fd(rpc->channel)); g_io_channel_shutdown(rpc->channel, TRUE, NULL); + /* TODO: Clean up connection object. */ return FALSE; } @@ -663,7 +791,7 @@ static gboolean async_rpc_do_read(GIOChannel *channel, /* We were reading in the fragment body. */ rpc->frag_len -= bytes_read; - if (rpc->frag_len = 0x80000000) { + if (rpc->frag_len == 0x80000000) { /* We have a complete message since this was the last fragment and * there are no more bytes in it. Dispatch the message. */ if (!async_rpc_dispatch(rpc)) { @@ -698,6 +826,7 @@ static gboolean async_rpc_do_accept(GIOChannel *channel, rpc->channel = g_io_channel_unix_new(nfd); rpc->msgbuf = g_string_new(""); g_io_channel_set_encoding(rpc->channel, NULL, NULL); + rpc->send_lock = g_mutex_new(); GSource *source = g_io_create_watch(rpc->channel, G_IO_IN); g_source_set_callback(source, (GSourceFunc)async_rpc_do_read, rpc, NULL); @@ -718,6 +847,50 @@ static async_rpc_register_listening(int fd) g_source_unref(source); } +static gboolean async_rpc_do_udp(GIOChannel *channel, + GIOCondition condition, + gpointer data) +{ + char buf[65536]; + + struct sockaddr_in src; + socklen_t addrlen = sizeof(struct sockaddr_in); + ssize_t len = recvfrom(g_io_channel_unix_get_fd(channel), + buf, sizeof(buf), 0, + (struct sockaddr *)&src, &addrlen); + if (len < 0) { + fprintf(stderr, "UDP read error: %m, shutting down UDP\n"); + return FALSE; + } + + g_assert(len < sizeof(buf)); + + RPCConnection *rpc = g_new0(RPCConnection, 1); + rpc->channel = channel; + rpc->msgbuf = g_string_new_len(buf, len); + rpc->send_lock = g_mutex_new(); + rpc->udp_transport = TRUE; + memcpy(&rpc->peer, &src, sizeof(struct sockaddr_in)); + rpc->sendbuf = g_string_new(""); + + /* We have a complete message since this was the last fragment and + * there are no more bytes in it. Dispatch the message. */ + async_rpc_dispatch(rpc); + + return TRUE; +} + +static async_rpc_register_listening_udp(int fd) +{ + GIOChannel *channel = g_io_channel_unix_new(fd); + g_io_channel_set_encoding(channel, NULL, NULL); + GSource *source = g_io_create_watch(channel, G_IO_IN); + g_source_set_callback(source, (GSourceFunc)async_rpc_do_udp, + NULL, NULL); + g_source_attach(source, main_context); + g_source_unref(source); +} + static gpointer async_rpc_run(gpointer data) { g_print("Starting NFS main loop...\n"); @@ -728,6 +901,9 @@ void register_rpc() { SVCXPRT *transp; + rpc_recv_stats = bluesky_stats_new("NFS RPC Messages In"); + rpc_send_stats = bluesky_stats_new("NFS RPC Messages Out"); + async_rpc_init(); /* MOUNT protocol */ @@ -786,5 +962,27 @@ void register_rpc() async_rpc_register_listening(fd); + /* Minimal UDP NFSv3 support */ + fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (fd < 0) { + fprintf(stderr, "Unable to create NFS UDP socket: %m\n"); + exit(1); + } + + addr.sin_family = AF_INET; + addr.sin_port = htons(NFS_SERVICE_PORT); + addr.sin_addr.s_addr = INADDR_ANY; + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + fprintf(stderr, "Unable to bind to NFS UDP address: %m\n"); + exit(1); + } + + if (!pmap_set(NFS_PROGRAM, NFS_V3, IPPROTO_UDP, NFS_SERVICE_PORT)) { + fprintf(stderr, "Could not register NFS UDP RPC service!\n"); + exit(1); + } + + async_rpc_register_listening_udp(fd); + g_thread_create(async_rpc_run, NULL, TRUE, NULL); }