From: Michael Vrable Date: Tue, 12 Jan 2010 21:44:02 +0000 (-0800) Subject: Rewrite NFS RPC dispatching. X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=commitdiff_plain;h=cb5c460ba6a31c27ac0d6803c33e9e6c5a140acd Rewrite NFS RPC dispatching. The rpcgen-provided dispatching is inflexible, only allowing for a single request to be handled at a time. Rewrite it (add our own reading of RPC packets off a TCP socket, calls to XDR decoding/encoding, etc.) so that we can eventually support multi-threaded and asynchronous handling of messages. --- diff --git a/nfs3/rpc.c b/nfs3/rpc.c index cfd67ec..7376535 100644 --- a/nfs3/rpc.c +++ b/nfs3/rpc.c @@ -21,10 +21,33 @@ #include #include #include +#include #include "bluesky.h" extern BlueSkyFS *fs; +/* TCP port number to use for NFS protocol. (Should be 2049.) */ +#define NFS_SERVICE_PORT 2051 + +/* Maximum size of a single RPC message that we will accept (8 MB). */ +#define MAX_RPC_MSGSIZE (8 << 20) + +/* For now, used for NFS only. */ +typedef struct { + GIOChannel *channel; + + /* The reassembled message, thus far. */ + GString *msgbuf; + + /* Remaining number of bytes in this message fragment; 0 if we next expect + * another fragment header. */ + uint32_t frag_len; + + /* If frag_len is zero: the number of bytes of the fragment header that + * have been read so far. */ + int frag_hdr_bytes; +} RPCConnection; + static void mount_program_3(struct svc_req *rqstp, register SVCXPRT *transp) { @@ -93,8 +116,50 @@ mount_program_3(struct svc_req *rqstp, register SVCXPRT *transp) return; } +struct rpc_reply { + uint32_t xid; + uint32_t type; + uint32_t stat; + uint32_t verf_flavor; + uint32_t verf_len; + uint32_t accept_stat; +}; + +static void async_rpc_write(RPCConnection *rpc, + const char *buf, gsize len); + +struct rpc_fail_reply { + uint32_t xid; + uint32_t type; + uint32_t stat; + uint32_t verf_flavor; + uint32_t verf_len; + uint32_t accept_stat; +}; + +static void +async_rpc_send_failure(RPCConnection *rpc, uint32_t xid, enum accept_stat stat) +{ + struct rpc_fail_reply header; + + fprintf(stderr, "Sending RPC failure status %d\n", stat); + + header.xid = htonl(xid); + header.type = htonl(1); /* REPLY */ + header.stat = htonl(MSG_ACCEPTED); + header.verf_flavor = 0; + header.verf_len = 0; + header.accept_stat = htonl(stat); + + uint32_t fragment = htonl(sizeof(header) | 0x80000000); + async_rpc_write(rpc, (const char *)&fragment, sizeof(fragment)); + async_rpc_write(rpc, (const char *)&header, sizeof(header)); + g_io_channel_flush(rpc->channel, NULL); +} + static void -nfs_program_3(struct svc_req *rqstp, register SVCXPRT *transp) +nfs_program_3(struct svc_req *rqstp, RPCConnection *connection, uint32_t xid, + const char *msg_buf, size_t msg_len) { union { nfs_fh3 nfsproc3_getattr_3_arg; @@ -257,19 +322,56 @@ nfs_program_3(struct svc_req *rqstp, register SVCXPRT *transp) break; default: - svcerr_noproc (transp); + async_rpc_send_failure(connection, xid, PROC_UNAVAIL); return; } + + /* Decode incoming message */ memset ((char *)&argument, 0, sizeof (argument)); - if (!svc_getargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) { - svcerr_decode (transp); + XDR xdr_in; + xdrmem_create(&xdr_in, (char *)msg_buf, msg_len, XDR_DECODE); + int i; + printf("Call XDR: "); + for (i = 0; i < msg_len; i++) { + printf("%02x ", (uint8_t)msg_buf[i]); + } + printf("\n"); + if (!_xdr_argument(&xdr_in, (caddr_t)&argument)) { + async_rpc_send_failure(connection, xid, GARBAGE_ARGS); + fprintf(stderr, "RPC decode error!\n"); return; } + + /* Perform the call. */ result = (*local)((char *)&argument, rqstp); - if (result != NULL && !svc_sendreply(transp, (xdrproc_t) _xdr_result, result)) { - svcerr_systemerr (transp); + + /* Encode result and send reply. */ + static char reply_buf[MAX_RPC_MSGSIZE]; + XDR xdr_out; + xdrmem_create(&xdr_out, reply_buf, MAX_RPC_MSGSIZE, XDR_ENCODE); + if (result != NULL && !_xdr_result(&xdr_out, result)) { + async_rpc_send_failure(connection, xid, SYSTEM_ERR); } - if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) { + + struct rpc_reply header; + header.xid = htonl(xid); + header.type = htonl(1); /* REPLY */ + header.stat = htonl(MSG_ACCEPTED); + header.verf_flavor = 0; + header.verf_len = 0; + header.accept_stat = 0; + + gsize msg_size = xdr_out.x_ops->x_getpostn(&xdr_out); + printf("Have an RPC reply of size %zd bytes\n", msg_size); + uint32_t fragment = htonl((msg_size + sizeof(header)) | 0x80000000); + async_rpc_write(connection, (const char *)&fragment, sizeof(fragment)); + async_rpc_write(connection, (const char *)&header, sizeof(header)); + async_rpc_write(connection, reply_buf, msg_size); + g_io_channel_flush(connection->channel, NULL); + + /* Clean up. */ + xdr_in.x_op = XDR_FREE; + if (!_xdr_argument(&xdr_in, (caddr_t)&argument)) { fprintf (stderr, "%s", "unable to free arguments"); exit (1); } @@ -279,10 +381,261 @@ nfs_program_3(struct svc_req *rqstp, register SVCXPRT *transp) return; } +/* Enhanced, asynchronous-friendly RPC layer. This is a replacement for the + * built-in sunrpc parsing and dispatch that will allow for processing multiple + * requests at the same time. */ +static GMainContext *main_context; +static GMainLoop *main_loop; + +static async_rpc_init() +{ + main_context = g_main_context_new(); + main_loop = g_main_loop_new(main_context, FALSE); +} + +struct rpc_call_header { + uint32_t xid; + uint32_t mtype; + uint32_t rpcvers; + uint32_t prog; + uint32_t vers; + uint32_t proc; +}; + +struct rpc_auth { + uint32_t flavor; + uint32_t len; +}; + +/* Decode an RPC message and process it. Returns a boolean indicating whether + * the message could be processed; if false, an unrecoverable error occurred + * and the transport should be closed. */ +static gboolean async_rpc_dispatch(RPCConnection *rpc) +{ + int i; + GString *msg = rpc->msgbuf; + const char *buf = msg->str; + + if (msg->len < sizeof(struct rpc_call_header)) { + fprintf(stderr, "Short RPC message: only %zd bytes!\n", msg->len); + return FALSE; + } + + struct rpc_call_header *header = (struct rpc_call_header *)(msg->str); + uint32_t xid = ntohl(header->xid); + + if (ntohl(header->mtype) != 0) { + /* Not an RPC call */ + return FALSE; + } + + if (ntohl(header->rpcvers) != 2) { + return FALSE; + } else if (ntohl(header->prog) != NFS_PROGRAM) { + async_rpc_send_failure(rpc, xid, PROG_UNAVAIL); + return TRUE; + } else if (ntohl(header->vers) != NFS_V3) { + /* FIXME: Should be PROG_MISMATCH */ + async_rpc_send_failure(rpc, xid, PROG_UNAVAIL); + return FALSE; + } + + uint32_t proc = ntohl(header->proc); + + /* Next, skip over authentication headers. */ + buf += sizeof(struct rpc_call_header); + for (i = 0; i < 2; i++) { + struct rpc_auth *auth = (struct rpc_auth *)buf; + if (buf - msg->str + sizeof(struct rpc_auth) > msg->len) + return FALSE; + + gsize authsize = ntohl(auth->len) + sizeof(struct rpc_auth); + if (authsize > MAX_RPC_MSGSIZE) + return FALSE; + + buf += authsize; + } + + if (buf - msg->str > msg->len) + return FALSE; + + printf("Dispatching RPC procedure %d...\n", proc); + + struct svc_req req; + req.rq_prog = ntohl(header->prog); + req.rq_vers = ntohl(header->vers); + req.rq_proc = ntohl(header->proc); + req.rq_cred.oa_flavor = 0; + req.rq_cred.oa_base = NULL; + req.rq_cred.oa_length = 0; + req.rq_clntcred = NULL; + req.rq_xprt = NULL; + + nfs_program_3(&req, rpc, ntohl(header->xid), buf, + (msg->str + msg->len) - buf); + + return TRUE; +} + +/* Write the given data to the RPC socket. */ +static void async_rpc_write(RPCConnection *rpc, + const char *buf, gsize len) +{ + while (len > 0) { + gsize written = 0; + switch (g_io_channel_write_chars(rpc->channel, buf, len, + &written, NULL)) { + case G_IO_STATUS_ERROR: + case G_IO_STATUS_EOF: + case G_IO_STATUS_AGAIN: + fprintf(stderr, "Error writing to socket!\n"); + return; + case G_IO_STATUS_NORMAL: + len -= written; + buf += written; + break; + } + } + + // g_io_channel_flush(rpc->channel, NULL); +} + +static gboolean async_rpc_do_read(GIOChannel *channel, + GIOCondition condition, + gpointer data) +{ + RPCConnection *rpc = (RPCConnection *)data; + + gsize bytes_to_read = 0; /* Number of bytes to attempt to read. */ + + /* If we have not yet read in the fragment header, do that first. This is + * 4 bytes that indicates the number of bytes in the message to follow + * (with the high bit set if this is the last fragment making up the + * message). */ + if (rpc->frag_len == 0) { + bytes_to_read = 4 - rpc->frag_hdr_bytes; + } else { + bytes_to_read = rpc->frag_len & 0x7fffffff; + } + + if (bytes_to_read > MAX_RPC_MSGSIZE + || rpc->msgbuf->len + bytes_to_read > MAX_RPC_MSGSIZE) + { + fprintf(stderr, "Excessive fragment size for RPC: %zd bytes\n", + bytes_to_read); + g_io_channel_shutdown(rpc->channel, TRUE, NULL); + return FALSE; + } + + gsize bytes_read = 0; + g_string_set_size(rpc->msgbuf, rpc->msgbuf->len + bytes_to_read); + char *buf = &rpc->msgbuf->str[rpc->msgbuf->len - bytes_to_read]; + switch (g_io_channel_read_chars(rpc->channel, buf, + bytes_to_read, &bytes_read, NULL)) { + case G_IO_STATUS_NORMAL: + break; + case G_IO_STATUS_AGAIN: + return TRUE; + case G_IO_STATUS_EOF: + if (bytes_read == bytes_to_read) + break; + /* else fall through */ + case G_IO_STATUS_ERROR: + fprintf(stderr, "Unexpected error or end of file on RPC stream %d!\n", + g_io_channel_unix_get_fd(rpc->channel)); + g_io_channel_shutdown(rpc->channel, TRUE, NULL); + return FALSE; + } + + g_assert(bytes_read >= 0 && bytes_read <= bytes_to_read); + + g_string_set_size(rpc->msgbuf, + rpc->msgbuf->len - (bytes_to_read - bytes_read)); + + if (rpc->frag_len == 0) { + /* Handle reading in the fragment header. If we've read the complete + * header, store the fragment size. */ + rpc->frag_hdr_bytes += bytes_read; + if (rpc->frag_hdr_bytes == 4) { + memcpy((char *)&rpc->frag_len, + &rpc->msgbuf->str[rpc->msgbuf->len - 4], 4); + rpc->frag_len = ntohl(rpc->frag_len); + g_string_set_size(rpc->msgbuf, rpc->msgbuf->len - 4); + rpc->frag_hdr_bytes = 0; + g_print("RPC fragment header: %08x\n", rpc->frag_len); + } + } else { + /* We were reading in the fragment body. */ + rpc->frag_len -= bytes_read; + + if (rpc->frag_len = 0x80000000) { + /* We have a complete message since this was the last fragment and + * there are no more bytes in it. Dispatch the message. */ + g_print("Complete RPC message: %zd bytes\n", rpc->msgbuf->len); + if (!async_rpc_dispatch(rpc)) { + fprintf(stderr, "Invalid RPC message, closing channel\n"); + g_io_channel_shutdown(rpc->channel, TRUE, NULL); + return FALSE; + } + rpc->frag_len = 0; + g_string_set_size(rpc->msgbuf, 0); + } + } + + return TRUE; +} + +static gboolean async_rpc_do_accept(GIOChannel *channel, + GIOCondition condition, + gpointer data) +{ + int fd = g_io_channel_unix_get_fd(channel); + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + + g_print("Received new connection on fd %d!\n", fd); + int nfd = accept(fd, (struct sockaddr *)&addr, &addrlen); + if (nfd < 0) { + fprintf(stderr, "Error accepting connection: %m\n"); + return TRUE; + } + + RPCConnection *rpc = g_new0(RPCConnection, 1); + rpc->channel = g_io_channel_unix_new(nfd); + rpc->msgbuf = g_string_new(""); + g_io_channel_set_encoding(rpc->channel, NULL, NULL); + GSource *source = g_io_create_watch(rpc->channel, G_IO_IN); + g_source_set_callback(source, (GSourceFunc)async_rpc_do_read, + rpc, NULL); + g_source_attach(source, main_context); + g_source_unref(source); + + return TRUE; +} + +static async_rpc_register_listening(int fd) +{ + GIOChannel *channel = g_io_channel_unix_new(fd); + g_io_channel_set_encoding(channel, NULL, NULL); + GSource *source = g_io_create_watch(channel, G_IO_IN); + g_source_set_callback(source, (GSourceFunc)async_rpc_do_accept, + NULL, NULL); + g_source_attach(source, main_context); + g_source_unref(source); +} + +static gpointer async_rpc_run(gpointer data) +{ + g_print("Starting NFS main loop...\n"); + g_main_loop_run(main_loop); +} + void register_rpc() { SVCXPRT *transp; + async_rpc_init(); + /* MOUNT protocol */ pmap_unset (MOUNT_PROGRAM, MOUNT_V3); @@ -309,23 +662,35 @@ void register_rpc() /* NFS protocol (version 3) */ pmap_unset (NFS_PROGRAM, NFS_V3); - transp = svcudp_create(RPC_ANYSOCK); - if (transp == NULL) { - fprintf (stderr, "%s", "cannot create udp service."); + int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (fd < 0) { + fprintf(stderr, "Unable to create NFS TCP socket: %m\n"); exit(1); } - if (!svc_register(transp, NFS_PROGRAM, NFS_V3, nfs_program_3, IPPROTO_UDP)) { - fprintf (stderr, "%s", "unable to register (NFS_PROGRAM, NFS_V3, udp)."); + + int n = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n)); + + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(NFS_SERVICE_PORT); + addr.sin_addr.s_addr = INADDR_ANY; + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + fprintf(stderr, "Unable to bind to NFS TCP address: %m\n"); exit(1); } - transp = svctcp_create(RPC_ANYSOCK, 0, 0); - if (transp == NULL) { - fprintf (stderr, "%s", "cannot create tcp service."); + if (listen(fd, SOMAXCONN) < 0) { + fprintf(stderr, "Unable to listen on NFS TCP socket: %m\n"); exit(1); } - if (!svc_register(transp, NFS_PROGRAM, NFS_V3, nfs_program_3, IPPROTO_TCP)) { - fprintf (stderr, "%s", "unable to register (NFS_PROGRAM, NFS_V3, tcp)."); + + if (!pmap_set(NFS_PROGRAM, NFS_V3, IPPROTO_TCP, NFS_SERVICE_PORT)) { + fprintf(stderr, "Could not register NFS RPC service!\n"); exit(1); } + + async_rpc_register_listening(fd); + + g_thread_create(async_rpc_run, NULL, TRUE, NULL); }