1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
9 /* RPC handling: registration, marshalling and unmarshalling of messages. For
10 * now this uses the standard Sun RPC mechanisms in the standard C library.
11 * Later, it might be changed to use something better. Much of this code was
12 * generated with rpcgen from the XDR specifications, but has been hand-edited
15 #include "mount_prot.h"
16 #include "nfs3_prot.h"
19 #include <rpc/pmap_clnt.h>
22 #include <sys/socket.h>
23 #include <netinet/in.h>
24 #include <netinet/ip.h>
29 /* TCP port number to use for NFS protocol. (Should be 2049.) */
30 #define NFS_SERVICE_PORT 2051
32 /* Maximum size of a single RPC message that we will accept (8 MB). */
33 #define MAX_RPC_MSGSIZE (8 << 20)
36 mount_program_3(struct svc_req *rqstp, register SVCXPRT *transp)
39 dirpath mountproc3_mnt_3_arg;
40 dirpath mountproc3_umnt_3_arg;
43 xdrproc_t _xdr_argument, _xdr_result;
44 char *(*local)(char *, struct svc_req *);
46 switch (rqstp->rq_proc) {
48 _xdr_argument = (xdrproc_t) xdr_void;
49 _xdr_result = (xdrproc_t) xdr_void;
50 local = (char *(*)(char *, struct svc_req *)) mountproc3_null_3_svc;
54 _xdr_argument = (xdrproc_t) xdr_dirpath;
55 _xdr_result = (xdrproc_t) xdr_mountres3;
56 local = (char *(*)(char *, struct svc_req *)) mountproc3_mnt_3_svc;
60 _xdr_argument = (xdrproc_t) xdr_void;
61 _xdr_result = (xdrproc_t) xdr_mountlist;
62 local = (char *(*)(char *, struct svc_req *)) mountproc3_dump_3_svc;
66 _xdr_argument = (xdrproc_t) xdr_dirpath;
67 _xdr_result = (xdrproc_t) xdr_void;
68 local = (char *(*)(char *, struct svc_req *)) mountproc3_umnt_3_svc;
71 case MOUNTPROC3_UMNTALL:
72 _xdr_argument = (xdrproc_t) xdr_void;
73 _xdr_result = (xdrproc_t) xdr_void;
74 local = (char *(*)(char *, struct svc_req *)) mountproc3_umntall_3_svc;
77 case MOUNTPROC3_EXPORT:
78 _xdr_argument = (xdrproc_t) xdr_void;
79 _xdr_result = (xdrproc_t) xdr_exports;
80 local = (char *(*)(char *, struct svc_req *)) mountproc3_export_3_svc;
84 svcerr_noproc (transp);
87 memset ((char *)&argument, 0, sizeof (argument));
88 if (!svc_getargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
89 svcerr_decode (transp);
92 result = (*local)((char *)&argument, rqstp);
93 if (result != NULL && !svc_sendreply(transp, (xdrproc_t) _xdr_result, result)) {
94 svcerr_systemerr (transp);
96 if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
97 fprintf (stderr, "%s", "unable to free arguments");
107 uint32_t verf_flavor;
109 uint32_t accept_stat;
112 static void async_rpc_write(RPCConnection *rpc,
113 const char *buf, gsize len);
115 struct rpc_fail_reply {
119 uint32_t verf_flavor;
121 uint32_t accept_stat;
125 async_rpc_send_failure(RPCRequest *req, enum accept_stat stat)
127 struct rpc_fail_reply header;
129 fprintf(stderr, "Sending RPC failure status %d\n", stat);
131 header.xid = htonl(req->xid);
132 header.type = htonl(1); /* REPLY */
133 header.stat = htonl(MSG_ACCEPTED);
134 header.verf_flavor = 0;
136 header.accept_stat = htonl(stat);
138 uint32_t fragment = htonl(sizeof(header) | 0x80000000);
139 async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment));
140 async_rpc_write(req->connection, (const char *)&header, sizeof(header));
141 g_io_channel_flush(req->connection->channel, NULL);
143 if (req->args != NULL) {
146 xdrmem_create(&xdr, buf, sizeof(buf), XDR_FREE);
147 if (!req->xdr_args_free(&xdr, req->args)) {
148 fprintf(stderr, "unable to free arguments");
152 if (req->raw_args != NULL)
153 g_string_free(req->raw_args, TRUE);
159 async_rpc_send_reply(RPCRequest *req, void *result)
161 static char reply_buf[MAX_RPC_MSGSIZE];
163 xdrmem_create(&xdr_out, reply_buf, MAX_RPC_MSGSIZE, XDR_ENCODE);
164 if (result != NULL && !req->xdr_result(&xdr_out, result)) {
165 async_rpc_send_failure(req, SYSTEM_ERR);
169 struct rpc_reply header;
170 header.xid = htonl(req->xid);
171 header.type = htonl(1); /* REPLY */
172 header.stat = htonl(MSG_ACCEPTED);
173 header.verf_flavor = 0;
175 header.accept_stat = 0;
177 gsize msg_size = xdr_out.x_ops->x_getpostn(&xdr_out);
178 printf("Have an RPC reply of size %zd bytes\n", msg_size);
179 uint32_t fragment = htonl((msg_size + sizeof(header)) | 0x80000000);
180 async_rpc_write(req->connection, (const char *)&fragment, sizeof(fragment));
181 async_rpc_write(req->connection, (const char *)&header, sizeof(header));
182 async_rpc_write(req->connection, reply_buf, msg_size);
183 g_io_channel_flush(req->connection->channel, NULL);
186 if (req->args != NULL) {
189 xdrmem_create(&xdr, buf, sizeof(buf), XDR_FREE);
190 if (!req->xdr_args_free(&xdr, req->args)) {
191 fprintf(stderr, "unable to free arguments");
195 if (req->raw_args != NULL)
196 g_string_free(req->raw_args, TRUE);
202 nfs_program_3(RPCRequest *req)
204 RPCConnection *connection = req->connection;
205 uint32_t xid = req->xid;
206 const char *msg_buf = req->raw_args->str + req->raw_args_header_bytes;
207 size_t msg_len = req->raw_args->len - req->raw_args_header_bytes;
210 nfs_fh3 nfsproc3_getattr_3_arg;
211 setattr3args nfsproc3_setattr_3_arg;
212 diropargs3 nfsproc3_lookup_3_arg;
213 access3args nfsproc3_access_3_arg;
214 nfs_fh3 nfsproc3_readlink_3_arg;
215 read3args nfsproc3_read_3_arg;
216 write3args nfsproc3_write_3_arg;
217 create3args nfsproc3_create_3_arg;
218 mkdir3args nfsproc3_mkdir_3_arg;
219 symlink3args nfsproc3_symlink_3_arg;
220 mknod3args nfsproc3_mknod_3_arg;
221 diropargs3 nfsproc3_remove_3_arg;
222 diropargs3 nfsproc3_rmdir_3_arg;
223 rename3args nfsproc3_rename_3_arg;
224 link3args nfsproc3_link_3_arg;
225 readdir3args nfsproc3_readdir_3_arg;
226 readdirplus3args nfsproc3_readdirplus_3_arg;
227 nfs_fh3 nfsproc3_fsstat_3_arg;
228 nfs_fh3 nfsproc3_fsinfo_3_arg;
229 nfs_fh3 nfsproc3_pathconf_3_arg;
230 commit3args nfsproc3_commit_3_arg;
233 xdrproc_t _xdr_argument, _xdr_result;
234 char *(*local)(char *, RPCRequest *);
236 switch (req->req_proc) {
238 _xdr_argument = (xdrproc_t) xdr_void;
239 _xdr_result = (xdrproc_t) xdr_void;
240 local = (char *(*)(char *, RPCRequest *)) nfsproc3_null_3_svc;
243 case NFSPROC3_GETATTR:
244 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
245 _xdr_result = (xdrproc_t) xdr_getattr3res;
246 local = (char *(*)(char *, RPCRequest *)) nfsproc3_getattr_3_svc;
249 case NFSPROC3_SETATTR:
250 _xdr_argument = (xdrproc_t) xdr_setattr3args;
251 _xdr_result = (xdrproc_t) xdr_wccstat3;
252 local = (char *(*)(char *, RPCRequest *)) nfsproc3_setattr_3_svc;
255 case NFSPROC3_LOOKUP:
256 _xdr_argument = (xdrproc_t) xdr_diropargs3;
257 _xdr_result = (xdrproc_t) xdr_lookup3res;
258 local = (char *(*)(char *, RPCRequest *)) nfsproc3_lookup_3_svc;
261 case NFSPROC3_ACCESS:
262 _xdr_argument = (xdrproc_t) xdr_access3args;
263 _xdr_result = (xdrproc_t) xdr_access3res;
264 local = (char *(*)(char *, RPCRequest *)) nfsproc3_access_3_svc;
267 case NFSPROC3_READLINK:
268 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
269 _xdr_result = (xdrproc_t) xdr_readlink3res;
270 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readlink_3_svc;
274 _xdr_argument = (xdrproc_t) xdr_read3args;
275 _xdr_result = (xdrproc_t) xdr_read3res;
276 local = (char *(*)(char *, RPCRequest *)) nfsproc3_read_3_svc;
280 _xdr_argument = (xdrproc_t) xdr_write3args;
281 _xdr_result = (xdrproc_t) xdr_write3res;
282 local = (char *(*)(char *, RPCRequest *)) nfsproc3_write_3_svc;
285 case NFSPROC3_CREATE:
286 _xdr_argument = (xdrproc_t) xdr_create3args;
287 _xdr_result = (xdrproc_t) xdr_diropres3;
288 local = (char *(*)(char *, RPCRequest *)) nfsproc3_create_3_svc;
292 _xdr_argument = (xdrproc_t) xdr_mkdir3args;
293 _xdr_result = (xdrproc_t) xdr_diropres3;
294 local = (char *(*)(char *, RPCRequest *)) nfsproc3_mkdir_3_svc;
297 case NFSPROC3_SYMLINK:
298 _xdr_argument = (xdrproc_t) xdr_symlink3args;
299 _xdr_result = (xdrproc_t) xdr_diropres3;
300 local = (char *(*)(char *, RPCRequest *)) nfsproc3_symlink_3_svc;
304 _xdr_argument = (xdrproc_t) xdr_mknod3args;
305 _xdr_result = (xdrproc_t) xdr_diropres3;
306 local = (char *(*)(char *, RPCRequest *)) nfsproc3_mknod_3_svc;
309 case NFSPROC3_REMOVE:
310 _xdr_argument = (xdrproc_t) xdr_diropargs3;
311 _xdr_result = (xdrproc_t) xdr_wccstat3;
312 local = (char *(*)(char *, RPCRequest *)) nfsproc3_remove_3_svc;
316 _xdr_argument = (xdrproc_t) xdr_diropargs3;
317 _xdr_result = (xdrproc_t) xdr_wccstat3;
318 local = (char *(*)(char *, RPCRequest *)) nfsproc3_rmdir_3_svc;
321 case NFSPROC3_RENAME:
322 _xdr_argument = (xdrproc_t) xdr_rename3args;
323 _xdr_result = (xdrproc_t) xdr_rename3res;
324 local = (char *(*)(char *, RPCRequest *)) nfsproc3_rename_3_svc;
328 _xdr_argument = (xdrproc_t) xdr_link3args;
329 _xdr_result = (xdrproc_t) xdr_link3res;
330 local = (char *(*)(char *, RPCRequest *)) nfsproc3_link_3_svc;
333 case NFSPROC3_READDIR:
334 _xdr_argument = (xdrproc_t) xdr_readdir3args;
335 _xdr_result = (xdrproc_t) xdr_readdir3res;
336 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readdir_3_svc;
339 case NFSPROC3_READDIRPLUS:
340 _xdr_argument = (xdrproc_t) xdr_readdirplus3args;
341 _xdr_result = (xdrproc_t) xdr_readdirplus3res;
342 local = (char *(*)(char *, RPCRequest *)) nfsproc3_readdirplus_3_svc;
345 case NFSPROC3_FSSTAT:
346 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
347 _xdr_result = (xdrproc_t) xdr_fsstat3res;
348 local = (char *(*)(char *, RPCRequest *)) nfsproc3_fsstat_3_svc;
351 case NFSPROC3_FSINFO:
352 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
353 _xdr_result = (xdrproc_t) xdr_fsinfo3res;
354 local = (char *(*)(char *, RPCRequest *)) nfsproc3_fsinfo_3_svc;
357 case NFSPROC3_PATHCONF:
358 _xdr_argument = (xdrproc_t) xdr_nfs_fh3;
359 _xdr_result = (xdrproc_t) xdr_pathconf3res;
360 local = (char *(*)(char *, RPCRequest *)) nfsproc3_pathconf_3_svc;
363 case NFSPROC3_COMMIT:
364 _xdr_argument = (xdrproc_t) xdr_commit3args;
365 _xdr_result = (xdrproc_t) xdr_commit3res;
366 local = (char *(*)(char *, RPCRequest *)) nfsproc3_commit_3_svc;
370 async_rpc_send_failure(req, PROC_UNAVAIL);
374 /* Decode incoming message */
375 req->xdr_args_free = _xdr_argument;
376 req->args = g_new0(union argtype, 1);
378 xdrmem_create(&xdr_in, (char *)msg_buf, msg_len, XDR_DECODE);
379 if (!_xdr_argument(&xdr_in, req->args)) {
380 async_rpc_send_failure(req, GARBAGE_ARGS);
381 fprintf(stderr, "RPC decode error!\n");
385 /* Perform the call. */
386 req->xdr_result = _xdr_result;
387 result = (*local)((char *)req->args, req);
389 /* Encode result and send reply. */
390 async_rpc_send_reply(req, result);
392 bluesky_flushd_invoke(fs);
397 /* Enhanced, asynchronous-friendly RPC layer. This is a replacement for the
398 * built-in sunrpc parsing and dispatch that will allow for processing multiple
399 * requests at the same time. */
400 static GMainContext *main_context;
401 static GMainLoop *main_loop;
403 static async_rpc_init()
405 main_context = g_main_context_new();
406 main_loop = g_main_loop_new(main_context, FALSE);
409 struct rpc_call_header {
423 /* Decode an RPC message and process it. Returns a boolean indicating whether
424 * the message could be processed; if false, an unrecoverable error occurred
425 * and the transport should be closed. */
426 static gboolean async_rpc_dispatch(RPCConnection *rpc)
429 GString *msg = rpc->msgbuf;
430 const char *buf = msg->str;
432 if (msg->len < sizeof(struct rpc_call_header)) {
433 fprintf(stderr, "Short RPC message: only %zd bytes!\n", msg->len);
437 struct rpc_call_header *header = (struct rpc_call_header *)(msg->str);
438 uint32_t xid = ntohl(header->xid);
440 if (ntohl(header->mtype) != 0) {
441 /* Not an RPC call */
445 if (ntohl(header->rpcvers) != 2) {
449 RPCRequest *req = g_new0(RPCRequest, 1);
450 req->connection = rpc;
453 if (ntohl(header->prog) != NFS_PROGRAM) {
454 async_rpc_send_failure(req, PROG_UNAVAIL);
456 } else if (ntohl(header->vers) != NFS_V3) {
457 /* FIXME: Should be PROG_MISMATCH */
458 async_rpc_send_failure(req, PROG_UNAVAIL);
462 uint32_t proc = ntohl(header->proc);
464 /* Next, skip over authentication headers. */
465 buf += sizeof(struct rpc_call_header);
466 for (i = 0; i < 2; i++) {
467 struct rpc_auth *auth = (struct rpc_auth *)buf;
468 if (buf - msg->str + sizeof(struct rpc_auth) > msg->len)
471 gsize authsize = ntohl(auth->len) + sizeof(struct rpc_auth);
472 if (authsize > MAX_RPC_MSGSIZE)
478 if (buf - msg->str > msg->len)
481 printf("Dispatching RPC procedure %d...\n", proc);
484 req->raw_args_header_bytes = buf - msg->str;
485 req->req_proc = ntohl(header->proc);
486 rpc->msgbuf = g_string_new("");
493 /* Write the given data to the RPC socket. */
494 static void async_rpc_write(RPCConnection *rpc,
495 const char *buf, gsize len)
499 switch (g_io_channel_write_chars(rpc->channel, buf, len,
501 case G_IO_STATUS_ERROR:
502 case G_IO_STATUS_EOF:
503 case G_IO_STATUS_AGAIN:
504 fprintf(stderr, "Error writing to socket!\n");
506 case G_IO_STATUS_NORMAL:
513 // g_io_channel_flush(rpc->channel, NULL);
516 static gboolean async_rpc_do_read(GIOChannel *channel,
517 GIOCondition condition,
520 RPCConnection *rpc = (RPCConnection *)data;
522 gsize bytes_to_read = 0; /* Number of bytes to attempt to read. */
524 /* If we have not yet read in the fragment header, do that first. This is
525 * 4 bytes that indicates the number of bytes in the message to follow
526 * (with the high bit set if this is the last fragment making up the
528 if (rpc->frag_len == 0) {
529 bytes_to_read = 4 - rpc->frag_hdr_bytes;
531 bytes_to_read = rpc->frag_len & 0x7fffffff;
534 if (bytes_to_read > MAX_RPC_MSGSIZE
535 || rpc->msgbuf->len + bytes_to_read > MAX_RPC_MSGSIZE)
537 fprintf(stderr, "Excessive fragment size for RPC: %zd bytes\n",
539 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
543 gsize bytes_read = 0;
544 g_string_set_size(rpc->msgbuf, rpc->msgbuf->len + bytes_to_read);
545 char *buf = &rpc->msgbuf->str[rpc->msgbuf->len - bytes_to_read];
546 switch (g_io_channel_read_chars(rpc->channel, buf,
547 bytes_to_read, &bytes_read, NULL)) {
548 case G_IO_STATUS_NORMAL:
550 case G_IO_STATUS_AGAIN:
552 case G_IO_STATUS_EOF:
553 if (bytes_read == bytes_to_read)
555 /* else fall through */
556 case G_IO_STATUS_ERROR:
557 fprintf(stderr, "Unexpected error or end of file on RPC stream %d!\n",
558 g_io_channel_unix_get_fd(rpc->channel));
559 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
563 g_assert(bytes_read >= 0 && bytes_read <= bytes_to_read);
565 g_string_set_size(rpc->msgbuf,
566 rpc->msgbuf->len - (bytes_to_read - bytes_read));
568 if (rpc->frag_len == 0) {
569 /* Handle reading in the fragment header. If we've read the complete
570 * header, store the fragment size. */
571 rpc->frag_hdr_bytes += bytes_read;
572 if (rpc->frag_hdr_bytes == 4) {
573 memcpy((char *)&rpc->frag_len,
574 &rpc->msgbuf->str[rpc->msgbuf->len - 4], 4);
575 rpc->frag_len = ntohl(rpc->frag_len);
576 g_string_set_size(rpc->msgbuf, rpc->msgbuf->len - 4);
577 rpc->frag_hdr_bytes = 0;
578 g_print("RPC fragment header: %08x\n", rpc->frag_len);
581 /* We were reading in the fragment body. */
582 rpc->frag_len -= bytes_read;
584 if (rpc->frag_len = 0x80000000) {
585 /* We have a complete message since this was the last fragment and
586 * there are no more bytes in it. Dispatch the message. */
587 g_print("Complete RPC message: %zd bytes\n", rpc->msgbuf->len);
588 if (!async_rpc_dispatch(rpc)) {
589 fprintf(stderr, "Invalid RPC message, closing channel\n");
590 g_io_channel_shutdown(rpc->channel, TRUE, NULL);
594 g_string_set_size(rpc->msgbuf, 0);
601 static gboolean async_rpc_do_accept(GIOChannel *channel,
602 GIOCondition condition,
605 int fd = g_io_channel_unix_get_fd(channel);
606 struct sockaddr_in addr;
607 socklen_t addrlen = sizeof(addr);
609 g_print("Received new connection on fd %d!\n", fd);
610 int nfd = accept(fd, (struct sockaddr *)&addr, &addrlen);
612 fprintf(stderr, "Error accepting connection: %m\n");
616 RPCConnection *rpc = g_new0(RPCConnection, 1);
617 rpc->channel = g_io_channel_unix_new(nfd);
618 rpc->msgbuf = g_string_new("");
619 g_io_channel_set_encoding(rpc->channel, NULL, NULL);
620 GSource *source = g_io_create_watch(rpc->channel, G_IO_IN);
621 g_source_set_callback(source, (GSourceFunc)async_rpc_do_read,
623 g_source_attach(source, main_context);
624 g_source_unref(source);
629 static async_rpc_register_listening(int fd)
631 GIOChannel *channel = g_io_channel_unix_new(fd);
632 g_io_channel_set_encoding(channel, NULL, NULL);
633 GSource *source = g_io_create_watch(channel, G_IO_IN);
634 g_source_set_callback(source, (GSourceFunc)async_rpc_do_accept,
636 g_source_attach(source, main_context);
637 g_source_unref(source);
640 static gpointer async_rpc_run(gpointer data)
642 g_print("Starting NFS main loop...\n");
643 g_main_loop_run(main_loop);
653 pmap_unset (MOUNT_PROGRAM, MOUNT_V3);
655 transp = svcudp_create(RPC_ANYSOCK);
656 if (transp == NULL) {
657 fprintf(stderr, "%s", "cannot create udp service.");
660 if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3, mount_program_3, IPPROTO_UDP)) {
661 fprintf(stderr, "%s", "unable to register (MOUNT_PROGRAM, MOUNT_V3, udp).");
665 transp = svctcp_create(RPC_ANYSOCK, 0, 0);
666 if (transp == NULL) {
667 fprintf(stderr, "%s", "cannot create tcp service.");
670 if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3, mount_program_3, IPPROTO_TCP)) {
671 fprintf(stderr, "%s", "unable to register (MOUNT_PROGRAM, MOUNT_V3, tcp).");
675 /* NFS protocol (version 3) */
676 pmap_unset (NFS_PROGRAM, NFS_V3);
678 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
680 fprintf(stderr, "Unable to create NFS TCP socket: %m\n");
685 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n));
687 struct sockaddr_in addr;
688 addr.sin_family = AF_INET;
689 addr.sin_port = htons(NFS_SERVICE_PORT);
690 addr.sin_addr.s_addr = INADDR_ANY;
691 if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
692 fprintf(stderr, "Unable to bind to NFS TCP address: %m\n");
696 if (listen(fd, SOMAXCONN) < 0) {
697 fprintf(stderr, "Unable to listen on NFS TCP socket: %m\n");
701 if (!pmap_set(NFS_PROGRAM, NFS_V3, IPPROTO_TCP, NFS_SERVICE_PORT)) {
702 fprintf(stderr, "Could not register NFS RPC service!\n");
706 async_rpc_register_listening(fd);
708 g_thread_create(async_rpc_run, NULL, TRUE, NULL);