1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
9 /* RPC handling: registration, marshalling and unmarshalling of messages. For
10 * now this uses the standard Sun RPC mechanisms in the standard C library.
11 * Later, it might be changed to use something better. Much of this code was
12 * generated with rpcgen from the XDR specifications, but has been hand-edited
15 #include "mount_prot.h"
16 #include "nfs3_prot.h"
19 #include <rpc/pmap_clnt.h>
22 #include <sys/socket.h>
23 #include <netinet/in.h>
24 #include <netinet/ip.h>
29 /* TCP port number to use for NFS protocol. (Should be 2049.) */
30 #define NFS_SERVICE_PORT 2051
32 /* Maximum size of a single RPC message that we will accept (8 MB). */
33 #define MAX_RPC_MSGSIZE (8 << 20)
36 mount_program_3(struct svc_req *rqstp, register SVCXPRT *transp)
39 dirpath mountproc3_mnt_3_arg;
40 dirpath mountproc3_umnt_3_arg;
43 xdrproc_t _xdr_argument, _xdr_result;
44 char *(*local)(char *, struct svc_req *);
46 switch (rqstp->rq_proc) {
48 _xdr_argument = (xdrproc_t) xdr_void;
49 _xdr_result = (xdrproc_t) xdr_void;
50 local = (char *(*)(char *, struct svc_req *)) mountproc3_null_3_svc;
54 _xdr_argument = (xdrproc_t) xdr_dirpath;
55 _xdr_result = (xdrproc_t) xdr_mountres3;
56 local = (char *(*)(char *, struct svc_req *)) mountproc3_mnt_3_svc;
60 _xdr_argument = (xdrproc_t) xdr_void;
61 _xdr_result = (xdrproc_t) xdr_mountlist;
62 local = (char *(*)(char *, struct svc_req *)) mountproc3_dump_3_svc;
66 _xdr_argument = (xdrproc_t) xdr_dirpath;
67 _xdr_result = (xdrproc_t) xdr_void;
68 local = (char *(*)(char *, struct svc_req *)) mountproc3_umnt_3_svc;
71 case MOUNTPROC3_UMNTALL:
72 _xdr_argument = (xdrproc_t) xdr_void;
73 _xdr_result = (xdrproc_t) xdr_void;
74 local = (char *(*)(char *, struct svc_req *)) mountproc3_umntall_3_svc;
77 case MOUNTPROC3_EXPORT:
78 _xdr_argument = (xdrproc_t) xdr_void;
79 _xdr_result = (xdrproc_t) xdr_exports;
80 local = (char *(*)(char *, struct svc_req *)) mountproc3_export_3_svc;
84 svcerr_noproc (transp);
87 memset ((char *)&argument, 0, sizeof (argument));
88 if (!svc_getargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
89 svcerr_decode (transp);
92 result = (*local)((char *)&argument, rqstp);
93 if (result != NULL && !svc_sendreply(transp, (xdrproc_t) _xdr_result, result)) {
94 svcerr_systemerr (transp);
96 if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
97 fprintf (stderr, "%s", "unable to free arguments");
107 uint32_t verf_flavor;
109 uint32_t accept_stat;
112 static void async_rpc_write(RPCConnection *rpc,
113 const char *buf, gsize len);
115 struct rpc_fail_reply {
119 uint32_t verf_flavor;
121 uint32_t accept_stat;
125 async_rpc_send_failure(RPCRequest *req, enum accept_stat stat)
127 struct rpc_fail_reply header;
129 fprintf(stderr, "Sending RPC failure status %d\n", stat);
131 header.xid = htonl(req->xid);
132 header.type = htonl(1); /* REPLY */
133 header.stat = htonl(MSG_ACCEPTED);
134 header.verf_flavor = 0;
136 header.accept_stat = htonl(stat);
138 uint32_t fragment = htonl(sizeof(header) | 0x80000000);
139 async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment));
140 async_rpc_write(req->connection, (const char *)&header, sizeof(header));
141 g_io_channel_flush(req->connection->channel, NULL);
143 if (req->args != NULL) {
146 xdrmem_create(&xdr, buf, sizeof(buf), XDR_FREE);
147 if (!req->xdr_args_free(&xdr, req->args)) {
148 fprintf(stderr, "unable to free arguments");
152 if (req->raw_args != NULL)
153 g_string_free(req->raw_args, TRUE);
159 async_rpc_send_reply(RPCRequest *req, void *result)
161 static char reply_buf[MAX_RPC_MSGSIZE];
163 xdrmem_create(&xdr_out, reply_buf, MAX_RPC_MSGSIZE, XDR_ENCODE);
164 if (result != NULL && !req->xdr_result(&xdr_out, result)) {
165 async_rpc_send_failure(req, SYSTEM_ERR);
169 struct rpc_reply header;
170 header.xid = htonl(req->xid);
171 header.type = htonl(1); /* REPLY */
172 header.stat = htonl(MSG_ACCEPTED);
173 header.verf_flavor = 0;
175 header.accept_stat = 0;
177 gsize msg_size = xdr_out.x_ops->x_getpostn(&xdr_out);
178 printf("Have an RPC reply of size %zd bytes\n", msg_size);
179 uint32_t fragment = htonl((msg_size + sizeof(header)) | 0x80000000);
180 async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment));
181 async_rpc_write(req->connection, (const char *)&header, sizeof(header));
182 async_rpc_write(req->connection, reply_buf, msg_size);
183 g_io_channel_flush(req->connection->channel, NULL);
186 if (req->args != NULL) {
189 xdrmem_create(&xdr, buf, sizeof(buf), XDR_FREE);
190 if (!req->xdr_args_free(&xdr, req->args)) {
191 fprintf(stderr, "unable to free arguments");
195 if (req->raw_args != NULL)
196 g_string_free(req->raw_args, TRUE);
202 nfs_program_3(RPCRequest *req)
204 RPCConnection *connection = req->connection;
205 uint32_t xid = req->xid;
206 const char *msg_buf = req->raw_args->str + req->raw_args_header_bytes;
207 size_t msg_len = req->raw_args->len - req->raw_args_header_bytes;
210 nfs_fh3 nfsproc3_getattr_3_arg;
211 setattr3args nfsproc3_setattr_3_arg;
212 diropargs3 nfsproc3_lookup_3_arg;
213 access3args nfsproc3_access_3_arg;
214 nfs_fh3 nfsproc3_readlink_3_arg;
215 read3args nfsproc3_read_3_arg;
216 write3args nfsproc3_write_3_arg;
217 create3args nfsproc3_create_3_arg;
218 mkdir3args nfsproc3_mkdir_3_arg;
219 symlink3args nfsproc3_symlink_3_arg;
220 mknod3args nfsproc3_mknod_3_arg;
221 diropargs3 nfsproc3_remove_3_arg;
222 diropargs3 nfsproc3_rmdir_3_arg;
223 rename3args nfsproc3_rename_3_arg;
224 link3args nfsproc3_link_3_arg;
225 readdir3args nfsproc3_readdir_3_arg;
226 readdirplus3args nfsproc3_readdirplus_3_arg;
227 nfs_fh3 nfsproc3_fsstat_3_arg;
228 nfs_fh3 nfsproc3_fsinfo_3_arg;
229 nfs_fh3 nfsproc3_pathconf_3_arg;
230 commit3args nfsproc3_commit_3_arg;
233 xdrproc_t _xdr_argument, _xdr_result;
234 char *(*local)(char *, RPCRequest *);
236 switch (req->req_proc) {
238 _xdr_argument = (xdrproc_t) xdr_void;
239 _xdr_result = (xdrproc_t) xdr_void;
240 local = (char *(*)(char *, RPCRequest *)) nfsproc3_null_3_svc;
243 case NFSPROC3_GETATTR:
244 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
245 _xdr_result = (xdrproc_t) xdr_getattr3res;
246 local = (char *(*)(char *, RPCRequest *)) nfsproc3_getattr_3_svc;
249 case NFSPROC3_SETATTR:
250 _xdr_argument = (xdrproc_t) xdr_setattr3args;
251 _xdr_result = (xdrproc_t) xdr_wccstat3;
252 local = (char *(*)(char *, RPCRequest *)) nfsproc3_setattr_3_svc;
255 case NFSPROC3_LOOKUP:
256 _xdr_argument = (xdrproc_t) xdr_diropargs3;
257 _xdr_result = (xdrproc_t) xdr_lookup3res;
258 local = (char *(*)(char *, RPCRequest *)) nfsproc3_lookup_3_svc;
261 case NFSPROC3_ACCESS:
262 _xdr_argument = (xdrproc_t) xdr_access3args;
263 _xdr_result = (xdrproc_t) xdr_access3res;
264 local = (char *(*)(char *, RPCRequest *)) nfsproc3_access_3_svc;
267 case NFSPROC3_READLINK:
268 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
269 _xdr_result = (xdrproc_t) xdr_readlink3res;
270 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readlink_3_svc;
274 _xdr_argument = (xdrproc_t) xdr_read3args;
275 _xdr_result = (xdrproc_t) xdr_read3res;
276 local = (char *(*)(char *, RPCRequest *)) nfsproc3_read_3_svc;
280 _xdr_argument = (xdrproc_t) xdr_write3args;
281 _xdr_result = (xdrproc_t) xdr_write3res;
282 local = (char *(*)(char *, RPCRequest *)) nfsproc3_write_3_svc;
285 case NFSPROC3_CREATE:
286 _xdr_argument = (xdrproc_t) xdr_create3args;
287 _xdr_result = (xdrproc_t) xdr_diropres3;
288 local = (char *(*)(char *, RPCRequest *)) nfsproc3_create_3_svc;
292 _xdr_argument = (xdrproc_t) xdr_mkdir3args;
293 _xdr_result = (xdrproc_t) xdr_diropres3;
294 local = (char *(*)(char *, RPCRequest *)) nfsproc3_mkdir_3_svc;
297 case NFSPROC3_SYMLINK:
298 _xdr_argument = (xdrproc_t) xdr_symlink3args;
299 _xdr_result = (xdrproc_t) xdr_diropres3;
300 local = (char *(*)(char *, RPCRequest *)) nfsproc3_symlink_3_svc;
304 _xdr_argument = (xdrproc_t) xdr_mknod3args;
305 _xdr_result = (xdrproc_t) xdr_diropres3;
306 local = (char *(*)(char *, RPCRequest *)) nfsproc3_mknod_3_svc;
309 case NFSPROC3_REMOVE:
310 _xdr_argument = (xdrproc_t) xdr_diropargs3;
311 _xdr_result = (xdrproc_t) xdr_wccstat3;
312 local = (char *(*)(char *, RPCRequest *)) nfsproc3_remove_3_svc;
316 _xdr_argument = (xdrproc_t) xdr_diropargs3;
317 _xdr_result = (xdrproc_t) xdr_wccstat3;
318 local = (char *(*)(char *, RPCRequest *)) nfsproc3_rmdir_3_svc;
321 case NFSPROC3_RENAME:
322 _xdr_argument = (xdrproc_t) xdr_rename3args;
323 _xdr_result = (xdrproc_t) xdr_rename3res;
324 local = (char *(*)(char *, RPCRequest *)) nfsproc3_rename_3_svc;
328 _xdr_argument = (xdrproc_t) xdr_link3args;
329 _xdr_result = (xdrproc_t) xdr_link3res;
330 local = (char *(*)(char *, RPCRequest *)) nfsproc3_link_3_svc;
333 case NFSPROC3_READDIR:
334 _xdr_argument = (xdrproc_t) xdr_readdir3args;
335 _xdr_result = (xdrproc_t) xdr_readdir3res;
336 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readdir_3_svc;
339 case NFSPROC3_READDIRPLUS:
340 _xdr_argument = (xdrproc_t) xdr_readdirplus3args;
341 _xdr_result = (xdrproc_t) xdr_readdirplus3res;
342 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readdirplus_3_svc;
345 case NFSPROC3_FSSTAT:
346 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
347 _xdr_result = (xdrproc_t) xdr_fsstat3res;
348 local = (char *(*)(char *, RPCRequest *)) nfsproc3_fsstat_3_svc;
351 case NFSPROC3_FSINFO:
352 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
353 _xdr_result = (xdrproc_t) xdr_fsinfo3res;
354 local = (char *(*)(char *, RPCRequest *)) nfsproc3_fsinfo_3_svc;
357 case NFSPROC3_PATHCONF:
358 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
359 _xdr_result = (xdrproc_t) xdr_pathconf3res;
360 local = (char *(*)(char *, RPCRequest *)) nfsproc3_pathconf_3_svc;
363 case NFSPROC3_COMMIT:
364 _xdr_argument = (xdrproc_t) xdr_commit3args;
365 _xdr_result = (xdrproc_t) xdr_commit3res;
366 local = (char *(*)(char *, RPCRequest *)) nfsproc3_commit_3_svc;
370 async_rpc_send_failure(req, PROC_UNAVAIL);
374 /* Decode incoming message */
375 req->xdr_args_free = _xdr_argument;
376 req->args = g_new0(union argtype, 1);
378 xdrmem_create(&xdr_in, (char *)msg_buf, msg_len, XDR_DECODE);
379 if (!_xdr_argument(&xdr_in, req->args)) {
380 async_rpc_send_failure(req, GARBAGE_ARGS);
381 fprintf(stderr, "RPC decode error!\n");
385 /* Perform the call. */
386 req->xdr_result = _xdr_result;
387 result = (*local)((char *)req->args, req);
389 bluesky_flushd_invoke(fs);
394 /* Enhanced, asynchronous-friendly RPC layer. This is a replacement for the
395 * built-in sunrpc parsing and dispatch that will allow for processing multiple
396 * requests at the same time. */
397 static GMainContext *main_context;
398 static GMainLoop *main_loop;
400 static async_rpc_init()
402 main_context = g_main_context_new();
403 main_loop = g_main_loop_new(main_context, FALSE);
406 struct rpc_call_header {
420 /* Decode an RPC message and process it. Returns a boolean indicating whether
421 * the message could be processed; if false, an unrecoverable error occurred
422 * and the transport should be closed. */
423 static gboolean async_rpc_dispatch(RPCConnection *rpc)
426 GString *msg = rpc->msgbuf;
427 const char *buf = msg->str;
429 if (msg->len < sizeof(struct rpc_call_header)) {
430 fprintf(stderr, "Short RPC message: only %zd bytes!\n", msg->len);
434 struct rpc_call_header *header = (struct rpc_call_header *)(msg->str);
435 uint32_t xid = ntohl(header->xid);
437 if (ntohl(header->mtype) != 0) {
438 /* Not an RPC call */
442 if (ntohl(header->rpcvers) != 2) {
446 RPCRequest *req = g_new0(RPCRequest, 1);
447 req->connection = rpc;
450 if (ntohl(header->prog) != NFS_PROGRAM) {
451 async_rpc_send_failure(req, PROG_UNAVAIL);
453 } else if (ntohl(header->vers) != NFS_V3) {
454 /* FIXME: Should be PROG_MISMATCH */
455 async_rpc_send_failure(req, PROG_UNAVAIL);
459 uint32_t proc = ntohl(header->proc);
461 /* Next, skip over authentication headers. */
462 buf += sizeof(struct rpc_call_header);
463 for (i = 0; i < 2; i++) {
464 struct rpc_auth *auth = (struct rpc_auth *)buf;
465 if (buf - msg->str + sizeof(struct rpc_auth) > msg->len)
468 gsize authsize = ntohl(auth->len) + sizeof(struct rpc_auth);
469 if (authsize > MAX_RPC_MSGSIZE)
475 if (buf - msg->str > msg->len)
478 printf("Dispatching RPC procedure %d...\n", proc);
481 req->raw_args_header_bytes = buf - msg->str;
482 req->req_proc = ntohl(header->proc);
483 rpc->msgbuf = g_string_new("");
490 /* Write the given data to the RPC socket. */
491 static void async_rpc_write(RPCConnection *rpc,
492 const char *buf, gsize len)
496 switch (g_io_channel_write_chars(rpc->channel, buf, len,
498 case G_IO_STATUS_ERROR:
499 case G_IO_STATUS_EOF:
500 case G_IO_STATUS_AGAIN:
501 fprintf(stderr, "Error writing to socket!\n");
503 case G_IO_STATUS_NORMAL:
510 // g_io_channel_flush(rpc->channel, NULL);
513 static gboolean async_rpc_do_read(GIOChannel *channel,
514 GIOCondition condition,
517 RPCConnection *rpc = (RPCConnection *)data;
519 gsize bytes_to_read = 0; /* Number of bytes to attempt to read. */
521 /* If we have not yet read in the fragment header, do that first. This is
522 * 4 bytes that indicates the number of bytes in the message to follow
523 * (with the high bit set if this is the last fragment making up the
525 if (rpc->frag_len == 0) {
526 bytes_to_read = 4 - rpc->frag_hdr_bytes;
528 bytes_to_read = rpc->frag_len & 0x7fffffff;
531 if (bytes_to_read > MAX_RPC_MSGSIZE
532 || rpc->msgbuf->len + bytes_to_read > MAX_RPC_MSGSIZE)
534 fprintf(stderr, "Excessive fragment size for RPC: %zd bytes\n",
536 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
540 gsize bytes_read = 0;
541 g_string_set_size(rpc->msgbuf, rpc->msgbuf->len + bytes_to_read);
542 char *buf = &rpc->msgbuf->str[rpc->msgbuf->len - bytes_to_read];
543 switch (g_io_channel_read_chars(rpc->channel, buf,
544 bytes_to_read, &bytes_read, NULL)) {
545 case G_IO_STATUS_NORMAL:
547 case G_IO_STATUS_AGAIN:
549 case G_IO_STATUS_EOF:
550 if (bytes_read == bytes_to_read)
552 /* else fall through */
553 case G_IO_STATUS_ERROR:
554 fprintf(stderr, "Unexpected error or end of file on RPC stream %d!\n",
555 g_io_channel_unix_get_fd(rpc->channel));
556 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
560 g_assert(bytes_read >= 0 && bytes_read <= bytes_to_read);
562 g_string_set_size(rpc->msgbuf,
563 rpc->msgbuf->len - (bytes_to_read - bytes_read));
565 if (rpc->frag_len == 0) {
566 /* Handle reading in the fragment header. If we've read the complete
567 * header, store the fragment size. */
568 rpc->frag_hdr_bytes += bytes_read;
569 if (rpc->frag_hdr_bytes == 4) {
570 memcpy((char *)&rpc->frag_len,
571 &rpc->msgbuf->str[rpc->msgbuf->len - 4], 4);
572 rpc->frag_len = ntohl(rpc->frag_len);
573 g_string_set_size(rpc->msgbuf, rpc->msgbuf->len - 4);
574 rpc->frag_hdr_bytes = 0;
575 g_print("RPC fragment header: %08x\n", rpc->frag_len);
578 /* We were reading in the fragment body. */
579 rpc->frag_len -= bytes_read;
581 if (rpc->frag_len = 0x80000000) {
582 /* We have a complete message since this was the last fragment and
583 * there are no more bytes in it. Dispatch the message. */
584 g_print("Complete RPC message: %zd bytes\n", rpc->msgbuf->len);
585 if (!async_rpc_dispatch(rpc)) {
586 fprintf(stderr, "Invalid RPC message, closing channel\n");
587 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
591 g_string_set_size(rpc->msgbuf, 0);
598 static gboolean async_rpc_do_accept(GIOChannel *channel,
599 GIOCondition condition,
602 int fd = g_io_channel_unix_get_fd(channel);
603 struct sockaddr_in addr;
604 socklen_t addrlen = sizeof(addr);
606 g_print("Received new connection on fd %d!\n", fd);
607 int nfd = accept(fd, (struct sockaddr *)&addr, &addrlen);
609 fprintf(stderr, "Error accepting connection: %m\n");
613 RPCConnection *rpc = g_new0(RPCConnection, 1);
614 rpc->channel = g_io_channel_unix_new(nfd);
615 rpc->msgbuf = g_string_new("");
616 g_io_channel_set_encoding(rpc->channel, NULL, NULL);
617 GSource *source = g_io_create_watch(rpc->channel, G_IO_IN);
618 g_source_set_callback(source, (GSourceFunc)async_rpc_do_read,
620 g_source_attach(source, main_context);
621 g_source_unref(source);
626 static async_rpc_register_listening(int fd)
628 GIOChannel *channel = g_io_channel_unix_new(fd);
629 g_io_channel_set_encoding(channel, NULL, NULL);
630 GSource *source = g_io_create_watch(channel, G_IO_IN);
631 g_source_set_callback(source, (GSourceFunc)async_rpc_do_accept,
633 g_source_attach(source, main_context);
634 g_source_unref(source);
637 static gpointer async_rpc_run(gpointer data)
639 g_print("Starting NFS main loop...\n");
640 g_main_loop_run(main_loop);
650 pmap_unset (MOUNT_PROGRAM, MOUNT_V3);
652 transp = svcudp_create(RPC_ANYSOCK);
653 if (transp == NULL) {
654 fprintf(stderr, "%s", "cannot create udp service.");
657 if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3, mount_program_3, IPPROTO_UDP)) {
658 fprintf(stderr, "%s", "unable to register (MOUNT_PROGRAM, MOUNT_V3, udp).");
662 transp = svctcp_create(RPC_ANYSOCK, 0, 0);
663 if (transp == NULL) {
664 fprintf(stderr, "%s", "cannot create tcp service.");
667 if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3, mount_program_3, IPPROTO_TCP)) {
668 fprintf(stderr, "%s", "unable to register (MOUNT_PROGRAM, MOUNT_V3, tcp).");
672 /* NFS protocol (version 3) */
673 pmap_unset (NFS_PROGRAM, NFS_V3);
675 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
677 fprintf(stderr, "Unable to create NFS TCP socket: %m\n");
682 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n));
684 struct sockaddr_in addr;
685 addr.sin_family = AF_INET;
686 addr.sin_port = htons(NFS_SERVICE_PORT);
687 addr.sin_addr.s_addr = INADDR_ANY;
688 if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
689 fprintf(stderr, "Unable to bind to NFS TCP address: %m\n");
693 if (listen(fd, SOMAXCONN) < 0) {
694 fprintf(stderr, "Unable to listen on NFS TCP socket: %m\n");
698 if (!pmap_set(NFS_PROGRAM, NFS_V3, IPPROTO_TCP, NFS_SERVICE_PORT)) {
699 fprintf(stderr, "Could not register NFS RPC service!\n");
703 async_rpc_register_listening(fd);
705 g_thread_create(async_rpc_run, NULL, TRUE, NULL);