Release
[nbd.git] / nbd-server.c
blob270eb681b26a4f459c656db23903a5a0950ead08
1 /*
2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
34 * <wouter@debian.org>
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
43 * <wouter@debian.org>
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
51 * <wouter@debian.org>
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
60 #include "lfs.h"
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
63 #if NEED_BSD_SOURCE
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
65 #endif
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
68 #include <assert.h>
69 #include <sys/types.h>
70 #include <sys/socket.h>
71 #include <sys/stat.h>
72 #include <sys/select.h>
73 #include <sys/wait.h>
74 #include <sys/un.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
77 #endif
78 #ifdef HAVE_SYS_UIO_H
79 #include <sys/uio.h>
80 #endif
81 #include <sys/param.h>
82 #include <signal.h>
83 #include <errno.h>
84 #include <libgen.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
87 #include <netdb.h>
88 #include <syslog.h>
89 #include <unistd.h>
90 #include <stdbool.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <fcntl.h>
95 #if HAVE_FALLOC_PH
96 #include <linux/falloc.h>
97 #endif
98 #include <arpa/inet.h>
99 #include <strings.h>
100 #include <dirent.h>
101 #ifdef HAVE_SYS_DIR_H
102 #include <sys/dir.h>
103 #endif
104 #ifdef HAVE_SYS_DIRENT_H
105 #include <sys/dirent.h>
106 #endif
107 #include <getopt.h>
108 #include <pwd.h>
109 #include <grp.h>
110 #include <dirent.h>
111 #include <ctype.h>
112 #include <inttypes.h>
114 #include <glib.h>
116 #if HAVE_OLD_GLIB
117 #include <pthread.h>
118 #endif
120 #include <semaphore.h>
122 /* used in cliserv.h, so must come first */
123 #define MY_NAME "nbd_server"
124 #include "cliserv.h"
125 #include "nbd-debug.h"
126 #include "netdb-compat.h"
127 #include "backend.h"
128 #include "treefiles.h"
130 #ifdef WITH_SDP
131 #include <sdp_inet.h>
132 #endif
134 #if HAVE_FSCTL_SET_ZERO_DATA
135 #include <io.h>
136 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
137 #define NOMINMAX 1
138 #include <windef.h>
139 #include <winbase.h>
140 #include <winioctl.h>
141 #endif
143 /** Default position of the config file */
144 #ifndef SYSCONFDIR
145 #define SYSCONFDIR "/etc"
146 #endif
147 #define CFILE SYSCONFDIR "/nbd-server/config"
149 #if HAVE_GNUTLS
150 #include <gnutls/gnutls.h>
151 #include <gnutls/x509.h>
152 #endif
154 /** Where our config file actually is */
155 gchar* config_file_pos;
157 /** global flags */
158 int glob_flags=0;
160 /* Whether we should avoid forking */
161 int dontfork = 0;
164 * The highest value a variable of type off_t can reach. This is a signed
165 * integer, so set all bits except for the leftmost one.
167 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
168 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
169 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
171 /** Global flags: */
172 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
173 #define F_LIST 2 /**< Allow clients to list the exports on a server */
174 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
175 #define F_DUAL_LISTEN 8 /**< Listen on both TCP and unix socket */
176 // also accepts F_FORCEDTLS (which is 16384)
177 GHashTable *children;
178 char pidfname[256]; /**< name of our PID file */
179 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
181 #define NEG_INIT (1 << 0)
182 #define NEG_OLD (1 << 1)
183 #define NEG_MODERN (1 << 2)
186 * If we want what the system really has set we'd have to read
187 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
189 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
190 #define SPLICE_IN 0
191 #define SPLICE_OUT 1
193 #include <nbdsrv.h>
195 /* Our thread pool */
196 GThreadPool *tpool;
198 /* A work package for the thread pool functions */
199 struct work_package {
200 CLIENT* client;
201 struct nbd_request* req;
202 int pipefd[2];
203 void* data; /**< for read requests */
206 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
207 SIGCHLD handler
208 to mark a child
209 exit */
211 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
212 SIGTERM handler
213 to mark a exit
214 request */
216 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
217 handler to mark a
218 reconfiguration
219 request */
221 GArray* modernsocks; /**< Sockets for the modern handler. Not used
222 if a client was only specified on the
223 command line; only port used if
224 oldstyle is set to false (and then the
225 command-line client isn't used, gna gna).
226 This may be more than one socket on
227 systems that don't support serving IPv4
228 and IPv6 from the same socket (like,
229 e.g., FreeBSD) */
230 GArray* childsocks; /**< parent-side sockets for communication with children */
231 int commsocket; /**< child-side socket for communication with parent */
232 static sem_t file_wait_sem;
234 bool logged_oversized=false; /**< whether we logged oversized requests already */
237 * Type of configuration file values
239 typedef enum {
240 PARAM_INT, /**< This parameter is an integer */
241 PARAM_INT64, /**< This parameter is an integer */
242 PARAM_STRING, /**< This parameter is a string */
243 PARAM_BOOL, /**< This parameter is a boolean */
244 } PARAM_TYPE;
247 * Configuration file values
249 typedef struct {
250 gchar *paramname; /**< Name of the parameter, as it appears in
251 the config file */
252 gboolean required; /**< Whether this is a required (as opposed to
253 optional) parameter */
254 PARAM_TYPE ptype; /**< Type of the parameter. */
255 gpointer target; /**< Pointer to where the data of this
256 parameter should be written. If ptype is
257 PARAM_BOOL, the data is or'ed rather than
258 overwritten. */
259 gint flagval; /**< Flag mask for this parameter in case ptype
260 is PARAM_BOOL. */
261 } PARAM;
264 * Configuration file values of the "generic" section
266 struct generic_conf {
267 gchar *user; /**< user we run the server as */
268 gchar *group; /**< group we run running as */
269 gchar *modernaddr; /**< address of the modern socket */
270 gchar *modernport; /**< port of the modern socket */
271 gchar *unixsock; /**< file name of the unix domain socket */
272 gchar *certfile; /**< certificate file */
273 gchar *keyfile; /**< key file */
274 gchar *cacertfile; /**< CA certificate file */
275 gchar *tlsprio; /**< TLS priority string */
276 gint flags; /**< global flags */
277 gint threads; /**< maximum number of parallel threads we want to run */
281 * Translate a command name into human readable form
283 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
284 * @return pointer to the command name
286 static inline const char * getcommandname(uint64_t command) {
287 switch (command) {
288 case NBD_CMD_READ:
289 return "NBD_CMD_READ";
290 case NBD_CMD_WRITE:
291 return "NBD_CMD_WRITE";
292 case NBD_CMD_DISC:
293 return "NBD_CMD_DISC";
294 case NBD_CMD_FLUSH:
295 return "NBD_CMD_FLUSH";
296 case NBD_CMD_TRIM:
297 return "NBD_CMD_TRIM";
298 case NBD_CMD_WRITE_ZEROES:
299 return "NBD_CMD_WRITE_ZEROES";
300 default:
301 return "UNKNOWN";
305 #if HAVE_GNUTLS
306 static int writeit_tls(gnutls_session_t s, void *buf, size_t len) {
307 ssize_t res;
308 char *m;
309 while(len > 0) {
310 DEBUG("+");
311 if ((res = gnutls_record_send(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
312 m = g_strdup_printf("issue while sending data: %s", gnutls_strerror(res));
313 err_nonfatal(m);
314 g_free(m);
315 } else if(res < 0) {
316 m = g_strdup_printf("could not send data: %s", gnutls_strerror(res));
317 err_nonfatal(m);
318 g_free(m);
319 return -1;
320 } else {
321 len -= res;
322 buf += res;
325 return 0;
328 static int readit_tls(gnutls_session_t s, void *buf, size_t len) {
329 ssize_t res;
330 char *m;
331 while(len > 0) {
332 DEBUG("*");
333 if((res = gnutls_record_recv(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
334 m = g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res));
335 err_nonfatal(m);
336 g_free(m);
337 } else if(res < 0) {
338 m = g_strdup_printf("could not receive data: %s", gnutls_strerror(res));
339 err_nonfatal(m);
340 g_free(m);
341 return -1;
342 } else {
343 len -= res;
344 buf += res;
347 return 0;
350 static int socket_read_tls(CLIENT* client, void *buf, size_t len) {
351 return readit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
354 static int socket_write_tls(CLIENT* client, void *buf, size_t len) {
355 return writeit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
357 #endif // HAVE_GNUTLS
359 static int socket_read_notls(CLIENT* client, void *buf, size_t len) {
360 return readit(client->net, buf, len);
363 static int socket_write_notls(CLIENT* client, void *buf, size_t len) {
364 return writeit(client->net, buf, len);
367 static void socket_read(CLIENT* client, void *buf, size_t len) {
368 g_assert(client->socket_read != NULL);
369 if(client->socket_read(client, buf, len)<0) {
370 g_assert(client->socket_closed != NULL);
371 client->socket_closed(client);
376 * Consume data from a socket that we don't want
378 * @param c the client to read from
379 * @param len the number of bytes to consume
380 * @param buf a buffer
381 * @param bufsiz the size of the buffer
383 static inline void consume(CLIENT* c, size_t len, void * buf, size_t bufsiz) {
384 size_t curlen;
385 while (len>0) {
386 curlen = (len>bufsiz)?bufsiz:len;
387 socket_read(c, buf, curlen);
388 len -= curlen;
393 * Consume a length field and corresponding payload that we don't want
395 * @param c the client to read from
397 static inline void consume_len(CLIENT* c) {
398 uint32_t len;
399 char buf[1024];
401 socket_read(c, &len, sizeof(len));
402 len = ntohl(len);
403 consume(c, len, buf, sizeof(buf));
406 static void socket_write(CLIENT* client, void *buf, size_t len) {
407 g_assert(client->socket_write != NULL);
408 if(client->socket_write(client, buf, len)<0) {
409 g_assert(client->socket_closed != NULL);
410 client->socket_closed(client);
414 static inline void socket_closed_negotiate(CLIENT* client) {
415 err("Negotiation failed: %m");
419 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
420 * options
422 * @param command the command to be ran. Read from the config file
423 * @param file the file name we're about to export
425 int do_run(gchar* command, gchar* file) {
426 gchar* cmd;
427 int retval=0;
429 if(command && *command) {
430 cmd = g_strdup_printf(command, file);
431 retval=system(cmd);
432 g_free(cmd);
434 return retval;
437 static inline void finalize_client(CLIENT* client) {
438 g_thread_pool_free(tpool, FALSE, TRUE);
439 do_run(client->server->postrun, client->exportname);
440 if(client->transactionlogfd != -1) {
441 close(client->transactionlogfd);
442 client->transactionlogfd = -1;
444 if(client->server->flags & F_COPYONWRITE) {
445 unlink(client->difffilename);
449 static inline void socket_closed_transmission(CLIENT* client) {
450 int saved_errno = errno;
451 finalize_client(client);
452 errno = saved_errno;
453 err("Connection dropped: %m");
456 #ifdef HAVE_SPLICE
458 * Splice data between a pipe and a file descriptor
460 * @param fd_in The fd to splice from.
461 * @param off_in The fd_in offset to splice from.
462 * @param fd_out The fd to splice to.
463 * @param off_out The fd_out offset to splice to.
464 * @param len The length to splice.
466 static inline void spliceit(int fd_in, loff_t *off_in, int fd_out,
467 loff_t *off_out, size_t len)
469 ssize_t ret;
470 while (len > 0) {
471 if ((ret = splice(fd_in, off_in, fd_out, off_out, len,
472 SPLICE_F_MOVE)) <= 0)
473 err("Splice failed: %m");
474 len -= ret;
477 #endif
480 * Print out a message about how to use nbd-server. Split out to a separate
481 * function so that we can call it from multiple places
483 void usage() {
484 printf("This is nbd-server version " VERSION "\n");
485 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
486 "\t-r|--read-only\t\tread only\n"
487 "\t-m|--multi-file\t\tmultiple file\n"
488 "\t-c|--copy-on-write\tcopy on write\n"
489 "\t-C|--config-file\tspecify an alternate configuration file\n"
490 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
491 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
492 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
493 "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
494 "\t-V|--version\toutput the version and exit\n\n"
495 "\tif port is set to 0, stdin is used (for running from inetd).\n"
496 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
497 "\t\taddress of the machine trying to connect\n"
498 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
499 printf("Using configuration file %s\n", CFILE);
500 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT);
503 /* Dumps a config file section of the given SERVER*, and exits. */
504 void dump_section(SERVER* serve, gchar* section_header) {
505 printf("[%s]\n", section_header);
506 printf("\texportname = %s\n", serve->exportname);
507 printf("\tlistenaddr = %s\n", serve->listenaddr);
508 if(serve->flags & F_READONLY) {
509 printf("\treadonly = true\n");
511 if(serve->flags & F_MULTIFILE) {
512 printf("\tmultifile = true\n");
514 if(serve->flags & F_TREEFILES) {
515 printf("\ttreefiles = true\n");
517 if(serve->flags & F_COPYONWRITE) {
518 printf("\tcopyonwrite = true\n");
520 if(serve->expected_size) {
521 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
523 if(serve->authname) {
524 printf("\tauthfile = %s\n", serve->authname);
526 exit(EXIT_SUCCESS);
530 * Parse the command line.
532 * @param argc the argc argument to main()
533 * @param argv the argv argument to main()
535 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) {
536 int i=0;
537 int nonspecial=0;
538 int c;
539 struct option long_options[] = {
540 {"read-only", no_argument, NULL, 'r'},
541 {"multi-file", no_argument, NULL, 'm'},
542 {"copy-on-write", no_argument, NULL, 'c'},
543 {"dont-fork", no_argument, NULL, 'd'},
544 {"authorize-file", required_argument, NULL, 'l'},
545 {"config-file", required_argument, NULL, 'C'},
546 {"pid-file", required_argument, NULL, 'p'},
547 {"output-config", required_argument, NULL, 'o'},
548 {"max-connection", required_argument, NULL, 'M'},
549 {"version", no_argument, NULL, 'V'},
550 {0,0,0,0}
552 SERVER *serve;
553 off_t es;
554 size_t last;
555 char suffix;
556 bool do_output=false;
557 gchar* section_header="";
558 gchar** addr_port;
560 if(argc==1) {
561 return NULL;
563 serve=g_new0(SERVER, 1);
564 serve->authname = g_strdup(default_authname);
565 serve->virtstyle=VIRT_IPLIT;
566 while((c=getopt_long(argc, argv, "-C:cwdl:mo:rp:M:V", long_options, &i))>=0) {
567 switch (c) {
568 case 1:
569 /* non-option argument */
570 switch(nonspecial++) {
571 case 0:
572 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
573 addr_port=g_strsplit(optarg, ":", 2);
575 /* Check for "@" - maybe user using this separator
576 for IPv4 address */
577 if(!addr_port[1]) {
578 g_strfreev(addr_port);
579 addr_port=g_strsplit(optarg, "@", 2);
581 } else {
582 addr_port=g_strsplit(optarg, "@", 2);
585 if(addr_port[1]) {
586 genconf->modernport=g_strdup(addr_port[1]);
587 genconf->modernaddr=g_strdup(addr_port[0]);
588 } else {
589 g_free(genconf->modernaddr);
590 genconf->modernaddr=NULL;
591 genconf->modernport=g_strdup(addr_port[0]);
593 g_strfreev(addr_port);
594 break;
595 case 1:
596 serve->exportname = g_strdup(optarg);
597 if(serve->exportname[0] != '/') {
598 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
599 exit(EXIT_FAILURE);
601 break;
602 case 2:
603 last=strlen(optarg)-1;
604 suffix=optarg[last];
605 if (suffix == 'k' || suffix == 'K' ||
606 suffix == 'm' || suffix == 'M')
607 optarg[last] = '\0';
608 es = (off_t)atoll(optarg);
609 switch (suffix) {
610 case 'm':
611 case 'M': es <<= 10;
612 case 'k':
613 case 'K': es <<= 10;
614 default : break;
616 serve->expected_size = es;
617 break;
619 break;
620 case 'r':
621 serve->flags |= F_READONLY;
622 break;
623 case 'm':
624 serve->flags |= F_MULTIFILE;
625 break;
626 case 'o':
627 do_output = true;
628 section_header = g_strdup(optarg);
629 break;
630 case 'p':
631 strncpy(pidfname, optarg, 256);
632 pidfname[255]='\0';
633 break;
634 case 'c':
635 serve->flags |=F_COPYONWRITE;
636 break;
637 case 'd':
638 dontfork = 1;
639 break;
640 case 'C':
641 g_free(config_file_pos);
642 config_file_pos=g_strdup(optarg);
643 break;
644 case 'l':
645 g_free(serve->authname);
646 serve->authname=g_strdup(optarg);
647 break;
648 case 'M':
649 serve->max_connections = strtol(optarg, NULL, 0);
650 break;
651 case 'V':
652 printf("This is nbd-server version " VERSION "\n");
653 exit(EXIT_SUCCESS);
654 break;
655 default:
656 usage();
657 exit(EXIT_FAILURE);
658 break;
661 /* What's left: the port to export, the name of the to be exported
662 * file, and, optionally, the size of the file, in that order. */
663 if(nonspecial<2) {
664 g_free(serve);
665 serve=NULL;
666 } else {
667 serve->servename = "";
669 if(do_output) {
670 if(!serve) {
671 g_critical("Need a complete configuration on the command line to output a config file section!");
672 exit(EXIT_FAILURE);
674 dump_section(serve, section_header);
676 return serve;
679 /* forward definition of parse_cfile */
680 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
682 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
683 #define NBD_D_TYPE de->d_type
684 #else
685 #define NBD_D_TYPE 0
686 #define DT_UNKNOWN 0
687 #define DT_REG 1
688 #endif
691 * Parse config file snippets in a directory. Uses readdir() and friends
692 * to find files and open them, then passes them on to parse_cfile
693 * with have_global set false
695 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
696 DIR* dirh = opendir(dir);
697 struct dirent* de;
698 gchar* fname;
699 GArray* retval = NULL;
700 GArray* tmp;
701 struct stat stbuf;
703 if(!dirh) {
704 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
705 return NULL;
707 errno=0;
708 while((de = readdir(dirh))) {
709 int saved_errno=errno;
710 fname = g_build_filename(dir, de->d_name, NULL);
711 switch(NBD_D_TYPE) {
712 case DT_UNKNOWN:
713 /* Filesystem doesn't return type of
714 * file through readdir. Run stat() on
715 * the file instead */
716 if(stat(fname, &stbuf)) {
717 perror("stat");
718 goto err_out;
720 if (!S_ISREG(stbuf.st_mode)) {
721 goto next;
723 case DT_REG:
724 /* Skip unless the name ends with '.conf' */
725 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
726 goto next;
728 tmp = parse_cfile(fname, genconf, false, e);
729 errno=saved_errno;
730 if(*e) {
731 goto err_out;
733 if(!retval)
734 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
735 retval = g_array_append_vals(retval, tmp->data, tmp->len);
736 g_array_free(tmp, TRUE);
737 default:
738 break;
740 next:
741 g_free(fname);
743 if(errno) {
744 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
745 err_out:
746 if(retval)
747 g_array_free(retval, TRUE);
748 retval = NULL;
750 if(dirh)
751 closedir(dirh);
752 return retval;
756 * Parse the config file.
758 * @param f the name of the config file
760 * @param genconf a pointer to generic configuration which will get
761 * updated with parsed values. If NULL, then parsed generic
762 * configuration values are safely and silently discarded.
764 * @param e a GError. Error code can be any of the following:
765 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
766 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
767 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
769 * @param expect_generic if true, we expect a configuration file that
770 * contains a [generic] section. If false, we don't.
772 * @return a GArray of SERVER* pointers. If the config file is empty or does not
773 * exist, returns an empty GArray; if the config file contains an
774 * error, returns NULL, and e is set appropriately
776 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
777 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
778 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
779 gchar* cfdir = NULL;
780 SERVER s;
781 gchar *virtstyle=NULL;
782 PARAM lp[] = {
783 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
784 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
785 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
786 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
787 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
788 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
789 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
790 { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 },
791 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
792 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
793 { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES },
794 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
795 { "waitfile", FALSE, PARAM_BOOL, &(s.flags), F_WAIT },
796 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
797 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
798 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
799 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
800 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
801 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
802 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
803 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
804 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
805 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
806 { "force_tls", FALSE, PARAM_BOOL, &(s.flags), F_FORCEDTLS },
807 { "splice", FALSE, PARAM_BOOL, &(s.flags), F_SPLICE},
809 const int lp_size=sizeof(lp)/sizeof(PARAM);
810 struct generic_conf genconftmp;
811 PARAM gp[] = {
812 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
813 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
814 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
815 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
816 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
817 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
818 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
819 { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 },
820 { "duallisten", FALSE, PARAM_BOOL, &(genconftmp.flags), F_DUAL_LISTEN }, // Used to listen on both TCP and unix socket
821 { "max_threads", FALSE, PARAM_INT, &(genconftmp.threads), 0 },
822 { "force_tls", FALSE, PARAM_BOOL, &(genconftmp.flags), F_FORCEDTLS },
823 { "certfile", FALSE, PARAM_STRING, &(genconftmp.certfile), 0 },
824 { "keyfile", FALSE, PARAM_STRING, &(genconftmp.keyfile), 0 },
825 { "cacertfile", FALSE, PARAM_STRING, &(genconftmp.cacertfile), 0 },
826 { "tlsprio", FALSE, PARAM_STRING, &(genconftmp.tlsprio), 0 },
828 PARAM* p=gp;
829 int p_size=sizeof(gp)/sizeof(PARAM);
830 GKeyFile *cfile;
831 GError *err = NULL;
832 const char *err_msg=NULL;
833 GArray *retval=NULL;
834 gchar **groups;
835 gboolean bval;
836 gint ival;
837 gint64 i64val;
838 gchar* sval;
839 gchar* startgroup;
840 gint i;
841 gint j;
843 memset(&genconftmp, 0, sizeof(struct generic_conf));
845 genconftmp.tlsprio = "NORMAL:-VERS-TLS-ALL:+VERS-TLS1.2:%SERVER_PRECEDENCE";
847 if (genconf) {
848 /* Use the passed configuration values as defaults. The
849 * parsing algorithm below updates all parameter targets
850 * found from configuration files. */
851 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
854 cfile = g_key_file_new();
855 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
856 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
857 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
858 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
859 f, err->message);
860 g_key_file_free(cfile);
861 return retval;
863 startgroup = g_key_file_get_start_group(cfile);
864 if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
865 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
866 g_key_file_free(cfile);
867 return NULL;
869 groups = g_key_file_get_groups(cfile, NULL);
870 for(i=0;groups[i];i++) {
871 memset(&s, '\0', sizeof(SERVER));
873 /* After the [generic] group or when we're parsing an include
874 * directory, start parsing exports */
875 if(i==1 || !expect_generic) {
876 p=lp;
877 p_size=lp_size;
879 for(j=0;j<p_size;j++) {
880 assert(p[j].target != NULL);
881 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
882 switch(p[j].ptype) {
883 case PARAM_INT:
884 ival = g_key_file_get_integer(cfile,
885 groups[i],
886 p[j].paramname,
887 &err);
888 if(!err) {
889 *((gint*)p[j].target) = ival;
891 break;
892 case PARAM_INT64:
893 i64val = g_key_file_get_int64(cfile,
894 groups[i],
895 p[j].paramname,
896 &err);
897 if(!err) {
898 *((gint64*)p[j].target) = i64val;
900 break;
901 case PARAM_STRING:
902 sval = g_key_file_get_string(cfile,
903 groups[i],
904 p[j].paramname,
905 &err);
906 if(!err) {
907 *((gchar**)p[j].target) = sval;
909 break;
910 case PARAM_BOOL:
911 bval = g_key_file_get_boolean(cfile,
912 groups[i],
913 p[j].paramname, &err);
914 if(!err) {
915 if(bval) {
916 *((gint*)p[j].target) |= p[j].flagval;
917 } else {
918 *((gint*)p[j].target) &= ~(p[j].flagval);
921 break;
923 if(err) {
924 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
925 if(!p[j].required) {
926 /* Ignore not-found error for optional values */
927 g_clear_error(&err);
928 continue;
929 } else {
930 err_msg = MISSING_REQUIRED_ERROR;
932 } else {
933 err_msg = DEFAULT_ERROR;
935 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
936 g_array_free(retval, TRUE);
937 g_error_free(err);
938 g_key_file_free(cfile);
939 return NULL;
942 if(virtstyle) {
943 if(!strncmp(virtstyle, "none", 4)) {
944 s.virtstyle=VIRT_NONE;
945 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
946 s.virtstyle=VIRT_IPLIT;
947 } else if(!strncmp(virtstyle, "iphash", 6)) {
948 s.virtstyle=VIRT_IPHASH;
949 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
950 s.virtstyle=VIRT_CIDR;
951 if(strlen(virtstyle)<10) {
952 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
953 g_array_free(retval, TRUE);
954 g_key_file_free(cfile);
955 return NULL;
957 s.cidrlen=strtol(virtstyle+8, NULL, 0);
958 } else {
959 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
960 g_array_free(retval, TRUE);
961 g_key_file_free(cfile);
962 return NULL;
964 } else {
965 s.virtstyle=VIRT_IPLIT;
967 if(genconftmp.flags & F_OLDSTYLE) {
968 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
969 g_message("Exiting.");
970 return NULL;
972 #ifndef HAVE_SPLICE
973 if (s.flags & F_SPLICE) {
974 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without splice support, yet group %s uses it", groups[i]);
975 g_array_free(retval, TRUE);
976 g_key_file_free(cfile);
977 return NULL;
979 #endif
980 /* We can't mix copyonwrite and splice. */
981 if ((s.flags & F_COPYONWRITE) && (s.flags & F_SPLICE)) {
982 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_SPLICE,
983 "Cannot mix copyonwrite with splice for an export in group %s",
984 groups[i]);
985 g_array_free(retval, TRUE);
986 g_key_file_free(cfile);
987 return NULL;
989 if ((s.flags & F_COPYONWRITE) && (s.flags & F_WAIT)) {
990 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_WAIT,
991 "Cannot mix copyonwrite with waitfile for an export in group %s",
992 groups[i]);
993 g_array_free(retval, TRUE);
994 g_key_file_free(cfile);
995 return NULL;
997 /* Don't need to free this, it's not our string */
998 virtstyle=NULL;
999 /* Don't append values for the [generic] group */
1000 if(i>0 || !expect_generic) {
1001 s.servename = groups[i];
1003 g_array_append_val(retval, s);
1005 #ifndef WITH_SDP
1006 if(s.flags & F_SDP) {
1007 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
1008 g_array_free(retval, TRUE);
1009 g_key_file_free(cfile);
1010 return NULL;
1012 #endif
1014 g_key_file_free(cfile);
1015 if(cfdir) {
1016 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
1017 if(extra) {
1018 retval = g_array_append_vals(retval, extra->data, extra->len);
1019 i+=extra->len;
1020 g_array_free(extra, TRUE);
1021 } else {
1022 if(*e) {
1023 g_array_free(retval, TRUE);
1024 return NULL;
1028 if(i==1 && expect_generic) {
1029 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
1032 if (genconf) {
1033 /* Return the updated generic configuration through the
1034 * pointer parameter. */
1035 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
1038 return retval;
1042 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1043 * main loop of the root server process. This allows us to separate the signal
1044 * catching from th actual task triggered by SIGCHLD and hence processing in the
1045 * interrupt context is kept as minimial as possible.
1047 * @param s the signal we're handling (must be SIGCHLD, or something
1048 * is severely wrong)
1050 static void sigchld_handler(const int s G_GNUC_UNUSED) {
1051 is_sigchld_caught = 1;
1055 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1057 * @param key the key
1058 * @param value the value corresponding to the above key
1059 * @param user_data a pointer which we always set to 1, so that we know what
1060 * will happen next.
1062 void killchild(gpointer key, gpointer value, gpointer user_data) {
1063 pid_t *pid=value;
1065 kill(*pid, SIGTERM);
1069 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1070 * main loop of the root server process. This allows us to separate the signal
1071 * catching from th actual task triggered by SIGTERM and hence processing in the
1072 * interrupt context is kept as minimial as possible.
1074 * @param s the signal we're handling (must be SIGTERM, or something
1075 * is severely wrong).
1077 static void sigterm_handler(const int s G_GNUC_UNUSED) {
1078 is_sigterm_caught = 1;
1082 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1083 * the main loop of the root server process. This allows us to separate
1084 * the signal catching from th actual task triggered by SIGHUP and hence
1085 * processing in the interrupt context is kept as minimial as possible.
1087 * @param s the signal we're handling (must be SIGHUP, or something
1088 * is severely wrong).
1090 static void sighup_handler(const int s G_GNUC_UNUSED) {
1091 is_sighup_caught = 1;
1094 static void sigusr1_handler(const int s G_GNUC_UNUSED) {
1095 msg(LOG_INFO, "Got SIGUSR1");
1096 sem_post(&file_wait_sem);
1100 * Get the file handle and offset, given an export offset.
1102 * @param client The client we're serving for
1103 * @param a The offset to get corresponding file/offset for
1104 * @param fhandle [out] File descriptor
1105 * @param foffset [out] Offset into fhandle
1106 * @param maxbytes [out] Tells how many bytes can be read/written
1107 * from fhandle starting at foffset (0 if there is no limit)
1108 * @return 0 on success, -1 on failure
1110 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1111 GArray * const export = client->export;
1113 /* Negative offset not allowed */
1114 if(a < 0)
1115 return -1;
1117 /* Open separate file for treefiles */
1118 if (client->server->flags & F_TREEFILES) {
1119 *foffset = a % TREEPAGESIZE;
1120 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
1121 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock);
1122 return 0;
1125 /* Binary search for last file with starting offset <= a */
1126 FILE_INFO fi;
1127 int start = 0;
1128 int end = export->len - 1;
1129 while( start <= end ) {
1130 int mid = (start + end) / 2;
1131 fi = g_array_index(export, FILE_INFO, mid);
1132 if( fi.startoff < a ) {
1133 start = mid + 1;
1134 } else if( fi.startoff > a ) {
1135 end = mid - 1;
1136 } else {
1137 start = end = mid;
1138 break;
1142 /* end should never go negative, since first startoff is 0 and a >= 0 */
1143 assert(end >= 0);
1145 fi = g_array_index(export, FILE_INFO, end);
1146 *fhandle = fi.fhandle;
1147 *foffset = a - fi.startoff;
1148 *maxbytes = 0;
1149 if( end+1 < export->len ) {
1150 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1151 *maxbytes = fi_next.startoff - a;
1154 return 0;
1158 * Write an amount of bytes at a given offset to the right file. This
1159 * abstracts the write-side of the multiple file option.
1161 * @param a The offset where the write should start
1162 * @param buf The buffer to write from
1163 * @param len The length of buf
1164 * @param client The client we're serving for
1165 * @param fua Flag to indicate 'Force Unit Access'
1166 * @return The number of bytes actually written, or -1 in case of an error
1168 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1169 int fhandle;
1170 off_t foffset;
1171 size_t maxbytes;
1172 ssize_t retval;
1174 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1175 return -1;
1176 if(maxbytes && len > maxbytes)
1177 len = maxbytes;
1179 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1181 retval = pwrite(fhandle, buf, len, foffset);
1182 if(client->server->flags & F_SYNC) {
1183 fsync(fhandle);
1184 } else if (fua) {
1186 /* This is where we would do the following
1187 * #ifdef USE_SYNC_FILE_RANGE
1188 * However, we don't, for the reasons set out below
1189 * by Christoph Hellwig <hch@infradead.org>
1191 * [BEGINS]
1192 * fdatasync is equivalent to fsync except that it does not flush
1193 * non-essential metadata (basically just timestamps in practice), but it
1194 * does flush metadata requried to find the data again, e.g. allocation
1195 * information and extent maps. sync_file_range does nothing but flush
1196 * out pagecache content - it means you basically won't get your data
1197 * back in case of a crash if you either:
1199 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1200 * b) are using a sparse file on a filesystem
1201 * c) are using a fallocate-preallocated file on a filesystem
1202 * d) use any file on a COW filesystem like btrfs
1204 * e.g. it only does anything useful for you if you do not have a volatile
1205 * write cache, and either use a raw block device node, or just overwrite
1206 * an already fully allocated (and not preallocated) file on a non-COW
1207 * filesystem.
1208 * [ENDS]
1210 * What we should do is open a second FD with O_DSYNC set, then write to
1211 * that when appropriate. However, with a Linux client, every REQ_FUA
1212 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1213 * problems.
1216 #if 0
1217 sync_file_range(fhandle, foffset, len,
1218 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1219 SYNC_FILE_RANGE_WAIT_AFTER);
1220 #else
1221 fdatasync(fhandle);
1222 #endif
1224 /* close file pointer in case of treefiles */
1225 if (client->server->flags & F_TREEFILES) {
1226 close(fhandle);
1228 return retval;
1232 * Call rawexpwrite repeatedly until all data has been written.
1234 * @param a The offset where the write should start
1235 * @param buf The buffer to write from
1236 * @param len The length of buf
1237 * @param client The client we're serving for
1238 * @param fua Flag to indicate 'Force Unit Access'
1239 * @return 0 on success, nonzero on failure
1241 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1242 ssize_t ret=0;
1244 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1245 a += ret;
1246 buf += ret;
1247 len -= ret;
1249 return (ret < 0 || len != 0);
1253 * Read an amount of bytes at a given offset from the right file. This
1254 * abstracts the read-side of the multiple files option.
1256 * @param a The offset where the read should start
1257 * @param buf A buffer to read into
1258 * @param len The size of buf
1259 * @param client The client we're serving for
1260 * @return The number of bytes actually read, or -1 in case of an
1261 * error.
1263 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1264 int fhandle;
1265 off_t foffset;
1266 size_t maxbytes;
1267 ssize_t retval;
1269 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1270 return -1;
1271 if(maxbytes && len > maxbytes)
1272 len = maxbytes;
1274 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1276 retval = pread(fhandle, buf, len, foffset);
1277 if (client->server->flags & F_TREEFILES) {
1278 close(fhandle);
1280 return retval;
1284 * Call rawexpread repeatedly until all data has been read.
1285 * @return 0 on success, nonzero on failure
1287 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1288 ssize_t ret=0;
1290 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1291 a += ret;
1292 buf += ret;
1293 len -= ret;
1295 return (ret < 0 || len != 0);
1298 #ifdef HAVE_SPLICE
1299 int rawexpsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir,
1300 int fua)
1302 int fhandle;
1303 off_t foffset;
1304 size_t maxbytes;
1305 ssize_t retval;
1307 if (get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1308 return -1;
1309 if (maxbytes && len > maxbytes)
1310 len = maxbytes;
1312 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1313 (dir == SPLICE_IN) ? "from" : "to", fhandle,
1314 (unsigned long long)a, (unsigned)len);
1317 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1318 * it might, so go ahead and use it.
1320 if (dir == SPLICE_IN) {
1321 retval = splice(fhandle, &foffset, pipe, NULL, len,
1322 SPLICE_F_MOVE);
1323 } else {
1324 retval = splice(pipe, NULL, fhandle, &foffset, len,
1325 SPLICE_F_MOVE);
1326 if (client->server->flags & F_SYNC)
1327 fsync(fhandle);
1328 else if (fua)
1329 fdatasync(fhandle);
1331 if (client->server->flags & F_TREEFILES)
1332 close(fhandle);
1333 return retval;
1337 * Splice an amount of bytes from the given offset from/into the right file
1338 * from/into the given pipe.
1339 * @param pipe The pipe we are using for this splice.
1340 * @param a The offset of the file we are operating on.
1341 * @param len The length of the splice.
1342 * @param client The client we're splicing for.
1343 * @param dir The direction we are doing the splice in.
1344 * @param fua Set if this is a write and we need to fua.
1345 * @return 0 on success, nonzero on failure.
1347 int expsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir, int fua)
1349 ssize_t ret;
1351 while (len > 0 &&
1352 (ret = rawexpsplice(pipe, a, len, client, dir, fua)) > 0) {
1353 a += ret;
1354 len -= ret;
1356 return (ret < 0 || len != 0);
1358 #endif /* HAVE_SPLICE */
1361 * Read an amount of bytes at a given offset from the right file. This
1362 * abstracts the read-side of the copyonwrite stuff, and calls
1363 * rawexpread() with the right parameters to do the actual work.
1364 * @param a The offset where the read should start
1365 * @param buf A buffer to read into
1366 * @param len The size of buf
1367 * @param client The client we're going to read for
1368 * @return 0 on success, nonzero on failure
1370 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1371 off_t rdlen, offset;
1372 off_t mapcnt, mapl, maph, pagestart;
1374 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1376 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1377 return(rawexpread_fully(a, buf, len, client));
1379 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1381 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1382 pagestart=mapcnt*DIFFPAGESIZE;
1383 offset=a-pagestart;
1384 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1385 len : (size_t)DIFFPAGESIZE-offset;
1386 if (!(client->server->flags & F_COPYONWRITE))
1387 pthread_rwlock_rdlock(&client->export_lock);
1388 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1389 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1390 (unsigned long)(client->difmap[mapcnt]));
1391 if (pread(client->difffile, buf, rdlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != rdlen) goto fail;
1392 } else { /* the block is not there */
1393 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1394 DEBUG("Page %llu is not here, and waiting for file\n",
1395 (unsigned long long)mapcnt);
1396 goto fail;
1397 } else {
1398 DEBUG("Page %llu is not here, we read the original one\n",
1399 (unsigned long long)mapcnt);
1400 if(rawexpread_fully(a, buf, rdlen, client)) goto fail;
1403 if (!(client->server->flags & F_COPYONWRITE))
1404 pthread_rwlock_unlock(&client->export_lock);
1405 len-=rdlen; a+=rdlen; buf+=rdlen;
1407 return 0;
1408 fail:
1409 if (!(client->server->flags & F_COPYONWRITE))
1410 pthread_rwlock_unlock(&client->export_lock);
1411 return -1;
1415 * Write an amount of bytes at a given offset to the right file. This
1416 * abstracts the write-side of the copyonwrite option, and calls
1417 * rawexpwrite() with the right parameters to do the actual work.
1419 * @param a The offset where the write should start
1420 * @param buf The buffer to write from
1421 * @param len The length of buf
1422 * @param client The client we're going to write for.
1423 * @param fua Flag to indicate 'Force Unit Access'
1424 * @return 0 on success, nonzero on failure
1426 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1427 char pagebuf[DIFFPAGESIZE];
1428 off_t mapcnt,mapl,maph;
1429 off_t wrlen,rdlen;
1430 off_t pagestart;
1431 off_t offset;
1433 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1436 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1437 return(rawexpwrite_fully(a, buf, len, client, fua));
1439 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1441 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1442 pagestart=mapcnt*DIFFPAGESIZE ;
1443 offset=a-pagestart ;
1444 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1445 len : (size_t)DIFFPAGESIZE-offset;
1447 if (!(client->server->flags & F_COPYONWRITE))
1448 pthread_rwlock_rdlock(&client->export_lock);
1449 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1450 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1451 (unsigned long)(client->difmap[mapcnt])) ;
1452 if (pwrite(client->difffile, buf, wrlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != wrlen) goto fail;
1453 } else { /* the block is not there */
1454 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1455 DEBUG("Page %llu is not here, we put it at %lu\n",
1456 (unsigned long long)mapcnt,
1457 (unsigned long)(client->difmap[mapcnt]));
1458 if ((offset != 0) || (wrlen != DIFFPAGESIZE)){
1459 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1460 DEBUG("error: we can write only whole page while waiting for file\n");
1461 goto fail;
1463 rdlen=DIFFPAGESIZE ;
1464 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1465 goto fail;
1467 memcpy(pagebuf+offset,buf,wrlen) ;
1468 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != DIFFPAGESIZE)
1469 goto fail;
1471 if (!(client->server->flags & F_COPYONWRITE))
1472 pthread_rwlock_unlock(&client->export_lock);
1473 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1475 if (client->server->flags & F_SYNC) {
1476 fsync(client->difffile);
1477 } else if (fua) {
1478 /* open question: would it be cheaper to do multiple sync_file_ranges?
1479 as we iterate through the above?
1481 fdatasync(client->difffile);
1483 return 0;
1484 fail:
1485 if (!(client->server->flags & F_COPYONWRITE))
1486 pthread_rwlock_unlock(&client->export_lock);
1487 return -1;
1493 * Write an amount of zeroes at a given offset to the right file.
1494 * This routine could be optimised by not calling expwrite. However,
1495 * this is by far the simplest way to do it.
1497 * @param req the request
1498 * @param client The client we're going to write for.
1499 * @return 0 on success, nonzero on failure
1501 int expwrite_zeroes(struct nbd_request* req, CLIENT* client, int fua) {
1502 off_t a = req->from;
1503 size_t len = req->len;
1504 size_t maxsize = 64LL*1024LL*1024LL;
1505 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1506 char *buf = calloc (1, maxsize);
1507 int ret;
1508 while (len > 0) {
1509 size_t l = len;
1510 if (l > maxsize)
1511 l = maxsize;
1512 ret = expwrite(a, buf, l, client, fua);
1513 if (ret) {
1514 free(buf);
1515 return ret;
1517 len -= l;
1519 free(buf);
1520 return 0;
1524 * Flush data to a client
1526 * @param client The client we're going to write for.
1527 * @return 0 on success, nonzero on failure
1529 int expflush(CLIENT *client) {
1530 gint i;
1532 if (client->server->flags & F_COPYONWRITE) {
1533 return fsync(client->difffile);
1536 if (client->server->flags & F_WAIT) {
1537 return fsync(client->difffile);
1540 if (client->server->flags & F_TREEFILES ) {
1541 // all we can do is force sync the entire filesystem containing the tree
1542 if (client->server->flags & F_READONLY)
1543 return 0;
1544 sync();
1545 return 0;
1548 for (i = 0; i < client->export->len; i++) {
1549 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1550 if (fsync(fi.fhandle) < 0)
1551 return -1;
1554 return 0;
1557 void punch_hole(int fd, off_t off, off_t len) {
1558 DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len);
1559 #if HAVE_FALLOC_PH
1560 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len);
1561 #elif HAVE_FSCTL_SET_ZERO_DATA
1562 FILE_ZERO_DATA_INFORMATION zerodata;
1563 zerodata.FileOffset.QuadPart = off;
1564 zerodata.BeyondFinalZero.QuadPart = off + len;
1565 HANDLE w32handle = (HANDLE)_get_osfhandle(fd);
1566 DWORD bytesret;
1567 DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL);
1568 #else
1569 DEBUG("punching holes not supported on this platform\n");
1570 #endif
1573 static void send_reply(CLIENT* client, uint32_t opt, uint32_t reply_type, ssize_t datasize, void* data) {
1574 struct {
1575 uint64_t magic;
1576 uint32_t opt;
1577 uint32_t reply_type;
1578 uint32_t datasize;
1579 } __attribute__ ((packed)) header = {
1580 htonll(0x3e889045565a9LL),
1581 htonl(opt),
1582 htonl(reply_type),
1583 htonl(datasize),
1585 if(datasize < 0) {
1586 datasize = strlen((char*)data);
1587 header.datasize = htonl(datasize);
1589 socket_write(client, &header, sizeof(header));
1590 if(data != NULL) {
1591 socket_write(client, data, datasize);
1596 * Find the name of the file we have to serve. This will use g_strdup_printf
1597 * to put the IP address of the client inside a filename containing
1598 * "%s" (in the form as specified by the "virtstyle" option). That name
1599 * is then written to client->exportname.
1601 * @param net A socket connected to an nbd client
1602 * @param client information about the client. The IP address in human-readable
1603 * format will be written to a new char* buffer, the address of which will be
1604 * stored in client->clientname.
1605 * @return: 0 - OK, -1 - failed.
1607 int set_peername(int net, CLIENT *client) {
1608 struct sockaddr_storage netaddr;
1609 struct sockaddr* addr = (struct sockaddr*)&netaddr;
1610 socklen_t addrinlen = sizeof( struct sockaddr_storage );
1611 struct addrinfo hints;
1612 struct addrinfo *ai = NULL;
1613 char peername[NI_MAXHOST];
1614 char netname[NI_MAXHOST];
1615 char *tmp = NULL;
1616 int i;
1617 int e;
1619 if (getsockname(net, addr, &addrinlen) < 0) {
1620 msg(LOG_INFO, "getsockname failed: %m");
1621 return -1;
1624 if(netaddr.ss_family == AF_UNIX) {
1625 client->clientaddr.ss_family = AF_UNIX;
1626 strcpy(peername, "unix");
1627 } else {
1628 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1629 msg(LOG_INFO, "getpeername failed: %m");
1630 return -1;
1632 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1633 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1634 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1635 return -1;
1638 memset(&hints, '\0', sizeof (hints));
1639 hints.ai_flags = AI_ADDRCONFIG;
1640 e = getaddrinfo(peername, NULL, &hints, &ai);
1642 if(e != 0) {
1643 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
1644 freeaddrinfo(ai);
1645 return -1;
1649 if(strncmp(peername, "::ffff:", 7) == 0) {
1650 memmove(peername, peername+7, strlen(peername));
1653 switch(client->server->virtstyle) {
1654 case VIRT_NONE:
1655 msg(LOG_DEBUG, "virtualization is off");
1656 client->exportname=g_strdup(client->server->exportname);
1657 break;
1658 case VIRT_IPHASH:
1659 msg(LOG_DEBUG, "virtstyle iphash");
1660 for(i=0;i<strlen(peername);i++) {
1661 if(peername[i]=='.') {
1662 peername[i]='/';
1665 case VIRT_IPLIT:
1666 msg(LOG_DEBUG, "virtstyle ipliteral");
1667 client->exportname=g_strdup_printf(client->server->exportname, peername);
1668 break;
1669 case VIRT_CIDR:
1670 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
1671 memcpy(&netaddr, &(client->clientaddr), addrinlen);
1672 int addrbits;
1673 if(client->clientaddr.ss_family == AF_UNIX) {
1674 tmp = g_strdup(peername);
1675 } else {
1676 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
1677 if(ai->ai_family == AF_INET) {
1678 addrbits = 32;
1679 } else if(ai->ai_family == AF_INET6) {
1680 addrbits = 128;
1681 } else {
1682 g_assert_not_reached();
1684 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
1685 for(int i = 0; i < addrbits; i+=8) {
1686 int masklen = client->server->cidrlen - i;
1687 masklen = masklen > 0 ? masklen : 0;
1688 uint8_t mask = getmaskbyte(masklen);
1689 *addrptr &= mask;
1690 addrptr++;
1692 getnameinfo((struct sockaddr *) &netaddr, addrinlen,
1693 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1694 tmp=g_strdup_printf("%s/%s", netname, peername);
1697 if(tmp != NULL) {
1698 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1699 g_free(tmp);
1702 break;
1705 freeaddrinfo(ai);
1706 msg(LOG_INFO, "connect from %s, assigned file is %s",
1707 peername, client->exportname);
1708 client->clientname=g_strdup(peername);
1709 return 0;
1712 int commit_diff(CLIENT* client, bool lock, int fhandle){
1713 int dirtycount = 0;
1714 int pagecount = client->exportsize/DIFFPAGESIZE;
1715 off_t offset;
1716 char* buf = malloc(sizeof(char)*DIFFPAGESIZE);
1718 for (int i=0; i<pagecount; i++){
1719 offset = DIFFPAGESIZE*i;
1720 if (lock)
1721 pthread_rwlock_wrlock(&client->export_lock);
1722 if (client->difmap[i] != (u32)-1){
1723 dirtycount += 1;
1724 DEBUG("flushing dirty page %d, offset %ld\n", i, offset);
1725 if (pread(client->difffile, buf, DIFFPAGESIZE, client->difmap[i]*DIFFPAGESIZE) != DIFFPAGESIZE) {
1726 msg(LOG_WARNING, "could not read while committing diff: %m");
1727 if(lock) {
1728 pthread_rwlock_unlock(&client->export_lock);
1730 break;
1732 if (pwrite(fhandle, buf, DIFFPAGESIZE, offset) != DIFFPAGESIZE) {
1733 msg(LOG_WARNING, "could not write while committing diff: %m");
1734 if (lock) {
1735 pthread_rwlock_unlock(&client->export_lock);
1737 break;
1739 client->difmap[i] = (u32)-1;
1741 if (lock)
1742 pthread_rwlock_unlock(&client->export_lock);
1745 free(buf);
1746 return dirtycount;
1749 void* wait_file(void *void_ptr) {
1750 CLIENT* client = (CLIENT *)void_ptr;
1751 FILE_INFO fi;
1752 GArray* export;
1753 mode_t mode = O_RDWR;
1754 int dirtycount;
1756 fi.fhandle = -1;
1757 fi.startoff = 0;
1759 while (fi.fhandle < 1){
1760 sem_wait(&file_wait_sem);
1761 msg(LOG_INFO, "checking for file %s", client->server->exportname);
1762 fi.fhandle = open(client->server->exportname, mode);
1765 msg(LOG_INFO, "File %s appeared, fd %d", client->server->exportname, fi.fhandle);
1767 // first time there may be lot of data so we lock only per page
1768 do {
1769 dirtycount = commit_diff(client, true, fi.fhandle);
1770 } while (dirtycount > 0);
1772 //last time we lock export for the whole time until we switch write destination
1773 pthread_rwlock_wrlock(&client->export_lock);
1774 do {
1775 dirtycount = commit_diff(client, false, fi.fhandle);
1776 } while (dirtycount > 0);
1778 export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1779 g_array_append_val(export, fi);
1781 client->export = export;
1782 pthread_rwlock_unlock(&client->export_lock);
1783 msg(LOG_INFO, "Waiting for file ended, switching to exported file %s", client->server->exportname);
1785 return NULL;
1789 * Set up client export array, which is an array of FILE_INFO.
1790 * Also, split a single exportfile into multiple ones, if that was asked.
1791 * @param client information on the client which we want to setup export for
1793 bool setupexport(CLIENT* client) {
1794 int i = 0;
1795 off_t laststartoff = 0, lastsize = 0;
1796 int multifile = (client->server->flags & F_MULTIFILE);
1797 int treefile = (client->server->flags & F_TREEFILES);
1798 int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1799 int cancreate = (client->server->expected_size) && !multifile;
1801 if (treefile || (client->server->flags & F_WAIT)) {
1802 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
1803 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
1805 if(client->server->flags & F_WAIT){
1806 pthread_t wait_file_thread;
1807 if (pthread_create(&wait_file_thread, NULL, wait_file, client)){
1808 DEBUG("failed to create wait_file thread");
1809 return false;
1813 } else {
1814 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1816 /* If multi-file, open as many files as we can.
1817 * If not, open exactly one file.
1818 * Calculate file sizes as we go to get total size. */
1819 for(i=0; ; i++) {
1820 FILE_INFO fi;
1821 gchar *tmpname;
1822 gchar* error_string;
1824 if (i)
1825 cancreate = 0;
1826 /* if expected_size is specified, and this is the first file, we can create the file */
1827 mode_t mode = (client->server->flags & F_READONLY) ?
1828 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1830 if (temporary) {
1831 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1832 DEBUG( "Opening %s\n", tmpname );
1833 fi.fhandle = mkstemp(tmpname);
1834 } else {
1835 if(multifile) {
1836 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1837 } else {
1838 tmpname=g_strdup(client->exportname);
1840 DEBUG( "Opening %s\n", tmpname );
1841 fi.fhandle = open(tmpname, mode, 0600);
1842 if(fi.fhandle == -1 && mode == O_RDWR) {
1843 /* Try again because maybe media was read-only */
1844 fi.fhandle = open(tmpname, O_RDONLY);
1845 if(fi.fhandle != -1) {
1846 /* Opening the base file in copyonwrite mode is
1847 * okay */
1848 if(!(client->server->flags & F_COPYONWRITE)) {
1849 client->server->flags |= F_AUTOREADONLY;
1850 client->server->flags |= F_READONLY;
1855 if(fi.fhandle == -1) {
1856 if(multifile && i>0)
1857 break;
1858 error_string=g_strdup_printf(
1859 "Could not open exported file %s: %%m",
1860 tmpname);
1861 err_nonfatal(error_string);
1862 return false;
1865 if (temporary) {
1866 unlink(tmpname); /* File will stick around whilst FD open */
1869 fi.startoff = laststartoff + lastsize;
1870 g_array_append_val(client->export, fi);
1871 g_free(tmpname);
1873 /* Starting offset and size of this file will be used to
1874 * calculate starting offset of next file */
1875 laststartoff = fi.startoff;
1876 lastsize = size_autodetect(fi.fhandle);
1878 /* If we created the file, it will be length zero */
1879 if (!lastsize && cancreate) {
1880 assert(!multifile);
1881 if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1882 err_nonfatal("Could not expand file: %m");
1883 return false;
1885 lastsize = client->server->expected_size;
1886 break; /* don't look for any more files */
1889 if(!multifile || temporary)
1890 break;
1893 /* Set export size to total calculated size */
1894 client->exportsize = laststartoff + lastsize;
1896 /* Export size may be overridden */
1897 if(client->server->expected_size) {
1898 /* desired size must be <= total calculated size */
1899 if(client->server->expected_size > client->exportsize) {
1900 err_nonfatal("Size of exported file is too big\n");
1901 return false;
1904 client->exportsize = client->server->expected_size;
1908 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1909 if(multifile) {
1910 msg(LOG_INFO, "Total number of files: %d", i);
1912 if(treefile) {
1913 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
1915 return true;
1918 bool copyonwrite_prepare(CLIENT* client) {
1919 off_t i;
1920 gchar* dir;
1921 gchar* export_base;
1922 if (client->server->cowdir != NULL) {
1923 dir = g_strdup(client->server->cowdir);
1924 } else {
1925 dir = g_strdup(dirname(client->exportname));
1927 export_base = g_strdup(basename(client->exportname));
1928 client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
1929 (int)getpid());
1930 g_free(dir);
1931 g_free(export_base);
1932 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1933 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1934 if (client->difffile<0) {
1935 err("Could not create diff file (%m)");
1936 return false;
1938 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) {
1939 err("Could not allocate memory");
1940 return false;
1942 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1;
1944 return true;
1947 void send_export_info(CLIENT* client, SERVER* server, bool maybe_zeroes) {
1948 uint64_t size_host = htonll((u64)(client->exportsize));
1949 uint16_t flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_WRITE_ZEROES;
1951 socket_write(client, &size_host, 8);
1952 if (server->flags & F_READONLY)
1953 flags |= NBD_FLAG_READ_ONLY;
1954 if (server->flags & F_FLUSH)
1955 flags |= NBD_FLAG_SEND_FLUSH;
1956 if (server->flags & F_FUA)
1957 flags |= NBD_FLAG_SEND_FUA;
1958 if (server->flags & F_ROTATIONAL)
1959 flags |= NBD_FLAG_ROTATIONAL;
1960 if (server->flags & F_TRIM)
1961 flags |= NBD_FLAG_SEND_TRIM;
1962 if (!(server->flags & F_COPYONWRITE))
1963 flags |= NBD_FLAG_CAN_MULTI_CONN;
1964 flags = htons(flags);
1965 socket_write(client, &flags, sizeof(flags));
1966 if (!(glob_flags & F_NO_ZEROES) && maybe_zeroes) {
1967 char zeros[128];
1968 memset(zeros, '\0', sizeof(zeros));
1969 socket_write(client, zeros, 124);
1974 * Commit to exporting the chosen export
1976 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
1977 * a number of things (verify whether the client is allowed access, try
1978 * to open files, etc etc) before we're ready to actually serve the
1979 * export.
1981 * This function does all those things.
1983 * @param client the CLIENT structure with .server and .net members set
1984 * up correctly
1985 * @return true if the client is allowed access to the export, false
1986 * otherwise
1988 static bool commit_client(CLIENT* client, SERVER* server) {
1989 char acl;
1990 uint32_t len;
1992 client->server = server;
1993 client->exportsize = OFFT_MAX;
1994 client->transactionlogfd = -1;
1995 if(pthread_mutex_init(&(client->lock), NULL)) {
1996 msg(LOG_ERR, "Unable to initialize mutex");
1997 return false;
1999 if (pthread_rwlock_init(&client->export_lock, NULL)){
2000 msg(LOG_ERR, "Unable to initialize write lock");
2001 return false;
2003 /* Check whether we exceeded the maximum number of allowed
2004 * clients already */
2005 if(dontfork) {
2006 acl = 'Y';
2007 } else {
2008 len = strlen(client->server->servename);
2009 writeit(commsocket, &len, sizeof len);
2010 writeit(commsocket, client->server->servename, len);
2011 readit(commsocket, &acl, 1);
2012 close(commsocket);
2014 switch(acl) {
2015 case 'N':
2016 msg(LOG_ERR, "Connection not allowed (too many clients)");
2017 return false;
2018 case 'X':
2019 msg(LOG_ERR, "Connection not allowed (unknown by parent?!?)");
2020 return false;
2023 /* Check whether the client is listed in the authfile */
2024 if (set_peername(client->net, client)) {
2025 msg(LOG_ERR, "Failed to set peername");
2026 return false;
2029 if (!authorized_client(client)) {
2030 msg(LOG_INFO, "Client '%s' is not authorized to access",
2031 client->clientname);
2032 return false;
2035 /* Set up the transactionlog, if we need one */
2036 if (client->server->transactionlog && (client->transactionlogfd == -1)) {
2037 if((client->transactionlogfd =
2038 open(client->server->transactionlog,
2039 O_WRONLY | O_CREAT,
2040 S_IRUSR | S_IWUSR)) ==
2041 -1) {
2042 msg(LOG_INFO, "Could not open transactionlog %s, moving on without it",
2043 client->server->transactionlog);
2047 /* Run any pre scripts that we may need */
2048 if (do_run(client->server->prerun, client->exportname)) {
2049 msg(LOG_INFO, "Client '%s' not allowed access by prerun script",
2050 client->clientname);
2051 return false;
2053 client->socket_closed = socket_closed_transmission;
2054 if(!setupexport(client)) {
2055 return false;
2058 if (client->server->flags & F_COPYONWRITE) {
2059 if(!copyonwrite_prepare(client)) {
2060 return false;
2064 if (client->server->flags & F_WAIT) {
2065 if(!copyonwrite_prepare(client)) {
2066 return false;
2070 setmysockopt(client->net);
2072 return true;
2075 static CLIENT* handle_export_name(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2076 uint32_t namelen;
2077 char* name;
2078 int i;
2080 socket_read(client, &namelen, sizeof(namelen));
2081 namelen = ntohl(namelen);
2082 if(namelen > 0) {
2083 name = malloc(namelen+1);
2084 name[namelen]=0;
2085 socket_read(client, name, namelen);
2086 } else {
2087 name = strdup("");
2089 for(i=0; i<servers->len; i++) {
2090 SERVER* serve = &(g_array_index(servers, SERVER, i));
2091 // hide exports that are TLS-only if we haven't negotiated TLS
2092 // yet
2093 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2094 continue;
2096 if(!strcmp(serve->servename, name)) {
2097 client->clientfeats = cflags;
2098 free(name);
2099 if(!commit_client(client, serve)) {
2100 return NULL;
2102 send_export_info(client, serve, true);
2103 return client;
2106 free(name);
2107 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2110 static void handle_list(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2111 uint32_t len;
2112 int i;
2113 char buf[1024];
2114 char *ptr = buf + sizeof(len);
2116 socket_read(client, &len, sizeof(len));
2117 len = ntohl(len);
2118 if(len) {
2119 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2121 if(!(glob_flags & F_LIST)) {
2122 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Listing of exports denied by server configuration");
2123 err_nonfatal("Client tried disallowed list option");
2124 return;
2126 for(i=0; i<servers->len; i++) {
2127 SERVER* serve = &(g_array_index(servers, SERVER, i));
2128 // Hide TLS-only exports if we haven't negotiated TLS yet
2129 if(!client->tls_session && (serve->flags & F_FORCEDTLS)) {
2130 continue;
2132 len = htonl(strlen(serve->servename));
2133 memcpy(buf, &len, sizeof(len));
2134 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len));
2135 send_reply(client, opt, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
2137 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2140 #if HAVE_GNUTLS
2141 static int verify_cert(gnutls_session_t session) {
2142 int ret;
2143 unsigned int status, cert_list_size;
2144 const gnutls_datum_t *cert_list;
2145 gnutls_x509_crt_t cert;
2146 time_t now = time(NULL);
2148 ret = gnutls_certificate_verify_peers2(session, &status);
2149 if(ret < 0 || status != 0 || gnutls_certificate_type_get(session) !=
2150 GNUTLS_CRT_X509) {
2151 goto err;
2154 if(gnutls_x509_crt_init(&cert) < 0) {
2155 goto err;
2158 cert_list = gnutls_certificate_get_peers(session, &cert_list_size);
2159 if(cert_list == NULL) {
2160 goto err;
2162 if(gnutls_x509_crt_import(cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) {
2163 goto err;
2165 if(gnutls_x509_crt_get_activation_time(cert) > now) {
2166 goto err;
2168 if(gnutls_x509_crt_get_expiration_time(cert) < now) {
2169 goto err;
2171 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2172 msg(LOG_INFO, "client certificate verification successful");
2173 return 0;
2174 err:
2175 msg(LOG_ERR, "E: client certificate verification failed");
2176 return GNUTLS_E_CERTIFICATE_ERROR;
2179 CLIENT* handle_starttls(CLIENT* client, int opt, GArray* servers, uint32_t cflags, struct generic_conf *genconf) {
2180 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2181 gnutls_certificate_credentials_t x509_cred;
2182 CLIENT* retval = client;
2183 gnutls_priority_t priority_cache;
2184 gnutls_session_t *session = g_new0(gnutls_session_t, 1);
2185 int ret;
2186 int len;
2188 socket_read(client, &len, sizeof(len));
2189 if(G_UNLIKELY(len != 0)) {
2190 char buf[1024*1024];
2191 consume(client, len, buf, sizeof(buf));
2192 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Sending a STARTTLS command with data is invalid");
2193 return NULL;
2196 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2198 check_rv(gnutls_certificate_allocate_credentials(&x509_cred));
2199 gnutls_certificate_set_verify_function(x509_cred, verify_cert);
2200 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred, genconf->cacertfile, GNUTLS_X509_FMT_PEM));
2201 check_rv(gnutls_certificate_set_x509_key_file(x509_cred, genconf->certfile, genconf->keyfile, GNUTLS_X509_FMT_PEM));
2202 check_rv(gnutls_priority_init(&priority_cache, genconf->tlsprio, NULL));
2203 check_rv(gnutls_init(session, GNUTLS_SERVER));
2204 check_rv(gnutls_priority_set(*session, priority_cache));
2205 check_rv(gnutls_credentials_set(*session, GNUTLS_CRD_CERTIFICATE, x509_cred));
2207 gnutls_certificate_server_set_request(*session, GNUTLS_CERT_REQUEST);
2208 #if GNUTLS_VERSION_NUMBER >= 0x030109
2209 gnutls_transport_set_int(*session, client->net);
2210 #else
2211 gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) (intptr_t) client->net);
2212 #endif
2213 do {
2214 ret = gnutls_handshake(*session);
2215 } while(ret < 0 && gnutls_error_is_fatal(ret) == 0);
2217 if (ret < 0) {
2218 err_nonfatal(gnutls_strerror(ret));
2219 gnutls_bye(*session, GNUTLS_SHUT_RDWR);
2220 gnutls_deinit(*session);
2221 g_free(session);
2222 return NULL;
2224 client->tls_session = session;
2225 client->socket_read = socket_read_tls;
2226 client->socket_write = socket_write_tls;
2227 #undef check_rv
2228 exit:
2229 if(retval == NULL && session != NULL) {
2230 g_free(session);
2232 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2233 if(retval != NULL && retval->server != NULL) {
2234 retval->server = NULL;
2236 return retval;
2238 #endif
2241 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2243 * XXX this matches the proposal I sent out, rather than the officially
2244 * documented version of this command. Need to bring the two in sync
2245 * one way or the other.
2247 static bool handle_info(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2248 uint32_t namelen, len;
2249 char *name;
2250 int i;
2251 SERVER *server = NULL;
2252 uint16_t n_requests;
2253 uint16_t request;
2254 char buf[1024];
2255 bool sent_export = false;
2256 uint32_t reptype = NBD_REP_ERR_UNKNOWN;
2257 char *msg = "Export unknown";
2259 socket_read(client, &len, sizeof(len));
2260 len = htonl(len);
2261 socket_read(client, &namelen, sizeof(namelen));
2262 namelen = htonl(namelen);
2263 if(namelen > (len - 6)) {
2264 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2265 socket_read(client, buf, len - sizeof(namelen));
2267 if(namelen > 0) {
2268 name = malloc(namelen + 1);
2269 name[namelen] = 0;
2270 socket_read(client, name, namelen);
2271 } else {
2272 name = strdup("");
2274 for(i=0; i<servers->len; i++) {
2275 SERVER *serve = &(g_array_index(servers, SERVER, i));
2276 if (!strcmp(serve->servename, name)) {
2277 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2278 reptype = NBD_REP_ERR_TLS_REQD;
2279 msg = "TLS is required for that export";
2280 continue;
2282 server = serve;
2285 free(name);
2286 socket_read(client, &n_requests, sizeof(n_requests));
2287 n_requests = ntohs(n_requests);
2288 if(!server) {
2289 consume(client, n_requests * sizeof(request), buf,
2290 sizeof(buf));
2291 send_reply(client, opt, reptype, -1, msg);
2292 return false;
2294 if (opt == NBD_OPT_GO) {
2295 client->clientfeats = cflags;
2296 if(!commit_client(client, server)) {
2297 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Access denied by server configuration");
2298 return false;
2301 for(i=0; i<n_requests; i++) {
2302 socket_read(client, &request, sizeof(request));
2303 switch(ntohs(request)) {
2304 case NBD_INFO_EXPORT:
2305 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2306 socket_write(client, &request, 2);
2307 send_export_info(client, server, false);
2308 sent_export = true;
2309 break;
2310 default:
2311 // ignore all other options for now.
2312 break;
2315 if(!sent_export) {
2316 request = htons(NBD_INFO_EXPORT);
2317 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2318 socket_write(client, &request, 2);
2319 send_export_info(client, server, false);
2321 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2323 return true;
2327 * Do the initial negotiation.
2329 * @param net The socket we're doing the negotiation over.
2330 * @param servers The array of known servers.
2331 * @param genconf the global options (needed for accessing TLS config data)
2333 CLIENT* negotiate(int net, GArray* servers, struct generic_conf *genconf) {
2334 uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
2335 uint64_t magic;
2336 uint32_t cflags = 0;
2337 uint32_t opt;
2338 CLIENT* client = g_new0(CLIENT, 1);
2339 client->net = net;
2340 client->socket_read = socket_read_notls;
2341 client->socket_write = socket_write_notls;
2342 client->socket_closed = socket_closed_negotiate;
2344 assert(servers != NULL);
2345 socket_write(client, INIT_PASSWD, 8);
2346 magic = htonll(opts_magic);
2347 socket_write(client, &magic, sizeof(magic));
2349 smallflags = htons(smallflags);
2350 socket_write(client, &smallflags, sizeof(uint16_t));
2351 socket_read(client, &cflags, sizeof(cflags));
2352 cflags = htonl(cflags);
2353 if (cflags & NBD_FLAG_C_NO_ZEROES) {
2354 glob_flags |= F_NO_ZEROES;
2356 do {
2357 socket_read(client, &magic, sizeof(magic));
2358 magic = ntohll(magic);
2359 if(magic != opts_magic) {
2360 err_nonfatal("Negotiation failed/5a: magic mismatch");
2361 goto handler_err;
2363 socket_read(client, &opt, sizeof(opt));
2364 opt = ntohl(opt);
2365 if(client->tls_session == NULL
2366 && glob_flags & F_FORCEDTLS
2367 && opt != NBD_OPT_STARTTLS) {
2368 if(opt == NBD_OPT_EXPORT_NAME) {
2369 // can't send an error message for EXPORT_NAME,
2370 // so must do hard close
2371 goto handler_err;
2373 if(opt == NBD_OPT_ABORT) {
2374 // handled below
2375 break;
2377 consume_len(client);
2378 send_reply(client, opt, NBD_REP_ERR_TLS_REQD, -1, "TLS is required on this server");
2379 continue;
2381 switch(opt) {
2382 case NBD_OPT_EXPORT_NAME:
2383 // NBD_OPT_EXPORT_NAME must be the last
2384 // selected option, so return from here
2385 // if that is chosen.
2386 if(handle_export_name(client, opt, servers, cflags) != NULL) {
2387 return client;
2388 } else {
2389 goto handler_err;
2391 break;
2392 case NBD_OPT_LIST:
2393 handle_list(client, opt, servers, cflags);
2394 break;
2395 case NBD_OPT_ABORT:
2396 // handled below
2397 break;
2398 case NBD_OPT_STARTTLS:
2399 #if !HAVE_GNUTLS
2400 consume_len(client);
2401 send_reply(client, opt, NBD_REP_ERR_PLATFORM, -1, "This nbd-server was compiled without TLS support");
2402 #else
2403 if(client->tls_session != NULL) {
2404 consume_len(client);
2405 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2406 continue;
2408 if(genconf->keyfile == NULL) {
2409 consume_len(client);
2410 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "TLS not allowed on this server");
2411 continue;
2413 if(handle_starttls(client, opt, servers, cflags, genconf) == NULL) {
2414 // can't recover from failed TLS negotiation.
2415 goto handler_err;
2417 #endif
2418 break;
2419 case NBD_OPT_GO:
2420 case NBD_OPT_INFO:
2421 if(handle_info(client, opt, servers, cflags) && opt == NBD_OPT_GO) {
2422 return client;
2424 break;
2425 default:
2426 consume_len(client);
2427 send_reply(client, opt, NBD_REP_ERR_UNSUP, -1, "The given option is unknown to this server implementation");
2428 break;
2430 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
2431 if(opt == NBD_OPT_ABORT) {
2432 err_nonfatal("Session terminated by client");
2433 goto handler_err;
2435 err_nonfatal("Weird things happened: reached end of negotiation without success");
2436 handler_err:
2437 g_free(client);
2438 return NULL;
2441 static int nbd_errno(int errcode) {
2442 switch (errcode) {
2443 case EPERM:
2444 return htonl(1);
2445 case EIO:
2446 return htonl(5);
2447 case ENOMEM:
2448 return htonl(12);
2449 case EINVAL:
2450 return htonl(22);
2451 case EFBIG:
2452 case ENOSPC:
2453 #ifdef EDQUOT
2454 case EDQUOT:
2455 #endif
2456 return htonl(28); // ENOSPC
2457 default:
2458 return htonl(22); // EINVAL
2462 static void package_dispose(struct work_package* package) {
2463 if (package->pipefd[0] > 0)
2464 close(package->pipefd[0]);
2465 if (package->pipefd[1] > 0)
2466 close(package->pipefd[1]);
2467 g_free(package->data);
2468 g_free(package->req);
2469 g_free(package);
2472 static int mkpipe(int pipefd[2], size_t len)
2474 if (len > MAX_PIPE_SIZE)
2475 return -1;
2476 if (pipe(pipefd))
2477 return -1;
2479 #ifdef HAVE_SPLICE
2480 if (fcntl(pipefd[1], F_SETPIPE_SZ, MAX_PIPE_SIZE) < MAX_PIPE_SIZE) {
2481 close(pipefd[0]);
2482 close(pipefd[1]);
2483 pipefd[0] = -1;
2484 pipefd[1] = -1;
2485 return -1;
2487 #endif
2489 return 0;
2492 struct work_package* package_create(CLIENT* client, struct nbd_request* req) {
2493 struct work_package* rv = calloc(sizeof (struct work_package), 1);
2495 rv->req = req;
2496 rv->client = client;
2497 rv->data = NULL;
2498 rv->pipefd[0] = -1;
2499 rv->pipefd[1] = -1;
2501 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2502 if (client->server->flags & F_SPLICE) {
2503 if (mkpipe(rv->pipefd, req->len))
2504 rv->data = malloc(req->len);
2505 } else {
2506 rv->data = malloc(req->len);
2510 return rv;
2513 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) {
2514 rep->magic = htonl(NBD_REPLY_MAGIC);
2515 rep->error = 0;
2516 memcpy(&(rep->handle), &(req->handle), sizeof(req->handle));
2519 #ifdef HAVE_SPLICE
2520 static int handle_splice_read(CLIENT *client, struct nbd_request *req)
2522 struct nbd_reply rep;
2523 int pipefd[2];
2525 // splice doesn't work with TLS
2526 if (client->tls_session != NULL)
2527 return -1;
2529 if (mkpipe(pipefd, req->len))
2530 return -1;
2532 if (expsplice(pipefd[1], req->from, req->len, client, SPLICE_IN, 0)) {
2533 close(pipefd[1]);
2534 close(pipefd[0]);
2535 return -1;
2538 DEBUG("handling read request (splice)\n");
2539 setup_reply(&rep, req);
2540 pthread_mutex_lock(&(client->lock));
2541 writeit(client->net, &rep, sizeof(rep));
2542 spliceit(pipefd[0], NULL, client->net, NULL, req->len);
2543 pthread_mutex_unlock(&(client->lock));
2544 close(pipefd[0]);
2545 close(pipefd[1]);
2546 return 0;
2548 #endif
2550 static void handle_normal_read(CLIENT *client, struct nbd_request *req)
2552 struct nbd_reply rep;
2553 void* buf = malloc(req->len);
2554 if(!buf) {
2555 err("Could not allocate memory for request");
2557 DEBUG("handling read request\n");
2558 setup_reply(&rep, req);
2559 if(expread(req->from, buf, req->len, client)) {
2560 DEBUG("Read failed: %m");
2561 rep.error = nbd_errno(errno);
2563 pthread_mutex_lock(&(client->lock));
2564 socket_write(client, &rep, sizeof rep);
2565 if(!rep.error) {
2566 socket_write(client, buf, req->len);
2568 pthread_mutex_unlock(&(client->lock));
2569 free(buf);
2572 static void handle_read(CLIENT* client, struct nbd_request* req)
2574 #ifdef HAVE_SPLICE
2576 * If we have splice set we want to try that first, and if that fails
2577 * for whatever reason we fall through to ye olde read.
2579 if (client->server->flags & F_SPLICE)
2580 if (!handle_splice_read(client, req))
2581 return;
2582 #endif
2583 handle_normal_read(client, req);
2586 static void handle_write(struct work_package *pkg)
2588 CLIENT *client = pkg->client;
2589 struct nbd_request *req = pkg->req;
2590 struct nbd_reply rep;
2591 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2593 DEBUG("handling write request\n");
2594 setup_reply(&rep, req);
2596 #ifdef HAVE_SPLICE
2597 if (!pkg->data) {
2598 if (expsplice(pkg->pipefd[0], req->from, req->len, client,
2599 SPLICE_OUT, fua)) {
2600 DEBUG("Splice failed: %m");
2601 rep.error = nbd_errno(errno);
2603 } else
2604 #endif
2606 if(expwrite(req->from, pkg->data, req->len, client, fua)) {
2607 DEBUG("Write failed: %m");
2608 rep.error = nbd_errno(errno);
2611 pthread_mutex_lock(&(client->lock));
2612 socket_write(client, &rep, sizeof rep);
2613 pthread_mutex_unlock(&(client->lock));
2616 static void handle_flush(CLIENT* client, struct nbd_request* req) {
2617 struct nbd_reply rep;
2618 DEBUG("handling flush request\n");
2619 setup_reply(&rep, req);
2620 if(expflush(client)) {
2621 DEBUG("Flush failed: %m");
2622 rep.error = nbd_errno(errno);
2624 pthread_mutex_lock(&(client->lock));
2625 socket_write(client, &rep, sizeof rep);
2626 pthread_mutex_unlock(&(client->lock));
2629 static void handle_trim(CLIENT* client, struct nbd_request* req) {
2630 struct nbd_reply rep;
2631 DEBUG("handling trim request\n");
2632 setup_reply(&rep, req);
2633 if(exptrim(req, client)) {
2634 DEBUG("Trim failed: %m");
2635 rep.error = nbd_errno(errno);
2637 pthread_mutex_lock(&(client->lock));
2638 socket_write(client, &rep, sizeof rep);
2639 pthread_mutex_unlock(&(client->lock));
2642 static void handle_write_zeroes(CLIENT* client, struct nbd_request* req) {
2643 struct nbd_reply rep;
2644 DEBUG("handling write_zeroes request\n");
2645 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2646 setup_reply(&rep, req);
2647 if(expwrite_zeroes(req, client, fua)) {
2648 DEBUG("Write_zeroes failed: %m");
2649 rep.error = nbd_errno(errno);
2651 // For now, don't trim
2652 // TODO: handle this far more efficiently with reference to the
2653 // actual backing driver
2654 pthread_mutex_lock(&(client->lock));
2655 socket_write(client, &rep, sizeof rep);
2656 pthread_mutex_unlock(&(client->lock));
2660 static bool bad_write(CLIENT* client, struct nbd_request* req) {
2661 if ((client->server->flags & F_READONLY) ||
2662 (client->server->flags & F_AUTOREADONLY)) {
2663 DEBUG("[WRITE to READONLY!]");
2664 return true;
2666 return false;
2669 static bool bad_range(CLIENT* client, struct nbd_request* req) {
2670 if(req->from > client->exportsize ||
2671 req->from + req->len > client->exportsize) {
2672 DEBUG("[out of bounds!]");
2673 return true;
2675 return false;
2678 static void handle_request(gpointer data, gpointer user_data) {
2679 struct work_package* package = (struct work_package*) data;
2680 uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND;
2681 uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND;
2682 struct nbd_reply rep;
2683 int err = EINVAL;
2685 if(flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
2686 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type);
2687 goto error;
2690 switch(type) {
2691 case NBD_CMD_READ:
2692 if (bad_range(package->client, package->req)) {
2693 goto error;
2695 handle_read(package->client, package->req);
2696 break;
2697 case NBD_CMD_WRITE:
2698 if (bad_write(package->client, package->req)) {
2699 err = EPERM;
2700 goto error;
2702 if (bad_range(package->client, package->req)) {
2703 err = ENOSPC;
2704 goto error;
2706 handle_write(package);
2707 break;
2708 case NBD_CMD_FLUSH:
2709 handle_flush(package->client, package->req);
2710 break;
2711 case NBD_CMD_TRIM:
2712 if (bad_write(package->client, package->req)) {
2713 err = EPERM;
2714 goto error;
2716 if (bad_range(package->client, package->req)) {
2717 goto error;
2719 handle_trim(package->client, package->req);
2720 break;
2721 case NBD_CMD_WRITE_ZEROES:
2722 if (bad_write(package->client, package->req)) {
2723 err = EPERM;
2724 goto error;
2726 if (bad_range(package->client, package->req)) {
2727 err = ENOSPC;
2728 goto error;
2730 handle_write_zeroes(package->client, package->req);
2731 break;
2732 default:
2733 msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type);
2734 goto error;
2736 goto end;
2737 error:
2738 setup_reply(&rep, package->req);
2739 rep.error = nbd_errno(err);
2740 pthread_mutex_lock(&(package->client->lock));
2741 socket_write(package->client, &rep, sizeof rep);
2742 pthread_mutex_unlock(&(package->client->lock));
2743 end:
2744 package_dispose(package);
2747 static int mainloop_threaded(CLIENT* client) {
2748 struct nbd_request* req;
2749 struct work_package* pkg;
2751 DEBUG("Entering request loop\n");
2752 while(1) {
2753 req = calloc(sizeof (struct nbd_request), 1);
2755 socket_read(client, req, sizeof(struct nbd_request));
2756 if(client->transactionlogfd != -1) {
2757 writeit(client->transactionlogfd, req, sizeof(struct nbd_request));
2760 req->from = ntohll(req->from);
2761 req->type = ntohl(req->type);
2762 req->len = ntohl(req->len);
2764 if(req->magic != htonl(NBD_REQUEST_MAGIC))
2765 err("Protocol error: not enough magic.");
2767 pkg = package_create(client, req);
2769 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2770 #ifdef HAVE_SPLICE
2771 if ((client->server->flags & F_SPLICE) &&
2772 (req->len <= MAX_PIPE_SIZE && pkg->pipefd[1] > 0) &&
2773 (client->tls_session == NULL))
2774 spliceit(client->net, NULL, pkg->pipefd[1],
2775 NULL, req->len);
2776 else
2777 #endif
2778 socket_read(client, pkg->data, req->len);
2780 if(req->type == NBD_CMD_DISC) {
2781 finalize_client(client);
2782 return 0;
2784 g_thread_pool_push(tpool, pkg, NULL);
2789 * Destroy a pid_t*
2790 * @param data a pointer to pid_t which should be freed
2792 void destroy_pid_t(gpointer data) {
2793 g_free(data);
2796 static pid_t
2797 spawn_child(int* socket)
2799 pid_t pid;
2800 sigset_t newset;
2801 sigset_t oldset;
2802 int sockets[2];
2804 sigemptyset(&newset);
2805 sigaddset(&newset, SIGCHLD);
2806 sigaddset(&newset, SIGTERM);
2807 sigprocmask(SIG_BLOCK, &newset, &oldset);
2808 socketpair(AF_UNIX, SOCK_STREAM, 0, sockets);
2809 pid = fork();
2810 if (pid < 0) {
2811 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
2812 close(sockets[0]);
2813 close(sockets[1]);
2814 goto out;
2816 if (pid > 0) { /* Parent */
2817 pid_t *pidp;
2819 pidp = g_malloc(sizeof(pid_t));
2820 *pidp = pid;
2821 *socket = sockets[1];
2822 close(sockets[0]);
2823 g_hash_table_insert(children, pidp, pidp);
2824 goto out;
2826 /* Child */
2827 *socket = sockets[0];
2828 close(sockets[1]);
2829 /* Child's signal disposition is reset to default. */
2830 signal(SIGCHLD, SIG_DFL);
2831 signal(SIGTERM, SIG_DFL);
2832 signal(SIGHUP, SIG_DFL);
2833 sigemptyset(&oldset);
2834 out:
2835 sigprocmask(SIG_SETMASK, &oldset, NULL);
2836 return pid;
2839 static int
2840 socket_accept(const int sock)
2842 struct sockaddr_storage addrin;
2843 socklen_t addrinlen = sizeof(addrin);
2844 int net;
2846 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
2847 if (net < 0) {
2848 err_nonfatal("Failed to accept socket connection: %m");
2851 return net;
2854 static void
2855 handle_modern_connection(GArray *const servers, const int sock, struct generic_conf *genconf)
2857 int net;
2858 pid_t pid;
2859 CLIENT *client = NULL;
2860 int sock_flags_old;
2861 int sock_flags_new;
2863 net = socket_accept(sock);
2864 if (net < 0)
2865 return;
2867 if (!dontfork) {
2868 pid = spawn_child(&commsocket);
2869 if (pid) {
2870 if (pid > 0) {
2871 msg(LOG_INFO, "Spawned a child process");
2872 g_array_append_val(childsocks, commsocket);
2874 if (pid < 0)
2875 msg(LOG_ERR, "Failed to spawn a child process");
2876 close(net);
2877 return;
2879 /* Child just continues. */
2882 sock_flags_old = fcntl(net, F_GETFL, 0);
2883 if (sock_flags_old == -1) {
2884 msg(LOG_ERR, "Failed to get socket flags");
2885 goto handler_err;
2888 sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2889 if (sock_flags_new != sock_flags_old &&
2890 fcntl(net, F_SETFL, sock_flags_new) == -1) {
2891 msg(LOG_ERR, "Failed to set socket to blocking mode");
2892 goto handler_err;
2895 client = negotiate(net, servers, genconf);
2896 if (!client) {
2897 msg(LOG_ERR, "Modern initial negotiation failed");
2898 goto handler_err;
2901 if (!dontfork) {
2902 int i;
2904 /* Free all root server resources here, because we are
2905 * currently in the child process serving one specific
2906 * connection. These are not simply needed anymore. */
2907 g_hash_table_destroy(children);
2908 children = NULL;
2909 for (i = 0; i < modernsocks->len; i++) {
2910 close(g_array_index(modernsocks, int, i));
2912 g_array_free(modernsocks, TRUE);
2914 /* Now that we are in the child process after a
2915 * succesful negotiation, we do not need the list of
2916 * servers anymore, get rid of it.*/
2917 /* FALSE does not free the
2918 actual data. This is required,
2919 because the client has a
2920 direct reference into that
2921 data, and otherwise we get a
2922 segfault... */
2923 g_array_free(servers, FALSE);
2926 msg(LOG_INFO, "Starting to serve");
2927 mainloop_threaded(client);
2928 exit(EXIT_SUCCESS);
2930 handler_err:
2931 close(net);
2932 g_free(client);
2934 if (!dontfork) {
2935 exit(EXIT_FAILURE);
2939 static int handle_childname(GArray* servers, int socket)
2941 uint32_t len;
2942 char *buf;
2943 int i, r, rt = 0;
2945 while(rt < sizeof(len)) {
2946 switch((r = read(socket, &len, sizeof len))) {
2947 case 0:
2948 return -1;
2949 case -1:
2950 err_nonfatal("Error reading from acl socket: %m");
2951 return -1;
2952 default:
2953 rt += r;
2954 break;
2957 buf = g_malloc0(len + 1);
2958 buf[len] = 0;
2959 readit(socket, buf, len);
2960 for(i=0; i<servers->len; i++) {
2961 SERVER* srv = &g_array_index(servers, SERVER, i);
2962 if(strcmp(srv->servename, buf) == 0) {
2963 if(srv->max_connections == 0 || srv->max_connections > srv->numclients) {
2964 writeit(socket, "Y", 1);
2965 srv->numclients++;
2966 } else {
2967 writeit(socket, "N", 1);
2969 goto exit;
2972 writeit(socket, "X", 1);
2973 exit:
2974 g_free(buf);
2975 return 0;
2979 * Return the index of the server whose servename matches the given
2980 * name.
2982 * @param servename a string to match
2983 * @param servers an array of servers
2984 * @return the first index of the server whose servename matches the
2985 * given name or -1 if one cannot be found
2987 static int get_index_by_servename(const gchar *const servename,
2988 const GArray *const servers) {
2989 int i;
2991 for (i = 0; i < servers->len; ++i) {
2992 const SERVER server = g_array_index(servers, SERVER, i);
2994 if (strcmp(servename, server.servename) == 0)
2995 return i;
2998 return -1;
3002 * Parse configuration files and add servers to the array if they don't
3003 * already exist there. The existence is tested by comparing
3004 * servenames. A server is appended to the array only if its servename
3005 * is unique among all other servers.
3007 * @param servers an array of servers
3008 * @return the number of new servers appended to the array, or -1 in
3009 * case of an error
3011 static int append_new_servers(GArray *const servers, GError **const gerror) {
3012 int i;
3013 GArray *new_servers;
3014 const int old_len = servers->len;
3015 int retval = -1;
3016 struct generic_conf genconf;
3018 new_servers = parse_cfile(config_file_pos, &genconf, true, gerror);
3019 g_thread_pool_set_max_threads(tpool, genconf.threads, NULL);
3020 if (!new_servers)
3021 goto out;
3023 for (i = 0; i < new_servers->len; ++i) {
3024 SERVER new_server = g_array_index(new_servers, SERVER, i);
3026 if (new_server.servename
3027 && -1 == get_index_by_servename(new_server.servename,
3028 servers)) {
3029 g_array_append_val(servers, new_server);
3033 retval = servers->len - old_len;
3034 out:
3035 g_array_free(new_servers, TRUE);
3037 return retval;
3040 void serveloop(GArray* servers, struct generic_conf *genconf) G_GNUC_NORETURN;
3042 * Loop through the available servers, and serve them. Never returns.
3044 void serveloop(GArray* servers, struct generic_conf *genconf) {
3045 int i;
3046 int mmax, max;
3047 fd_set mset;
3048 fd_set rset;
3049 sigset_t blocking_mask;
3050 sigset_t original_mask;
3053 * Set up the master fd_set. The set of descriptors we need
3054 * to select() for never changes anyway and it buys us a *lot*
3055 * of time to only build this once. However, if we ever choose
3056 * to not fork() for clients anymore, we may have to revisit
3057 * this.
3059 mmax=0;
3060 FD_ZERO(&mset);
3061 for(i=0;i<modernsocks->len;i++) {
3062 int sock = g_array_index(modernsocks, int, i);
3063 FD_SET(sock, &mset);
3064 mmax=sock>mmax?sock:mmax;
3067 /* Construct a signal mask which is used to make signal testing and
3068 * receiving an atomic operation to ensure no signal is received between
3069 * tests and blocking pselect(). */
3070 if (sigemptyset(&blocking_mask) == -1)
3071 err("failed to initialize blocking_mask: %m");
3073 if (sigaddset(&blocking_mask, SIGCHLD) == -1)
3074 err("failed to add SIGCHLD to blocking_mask: %m");
3076 if (sigaddset(&blocking_mask, SIGHUP) == -1)
3077 err("failed to add SIGHUP to blocking_mask: %m");
3079 if (sigaddset(&blocking_mask, SIGTERM) == -1)
3080 err("failed to add SIGTERM to blocking_mask: %m");
3082 if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
3083 err("failed to block signals: %m");
3085 for(;;) {
3086 if (is_sigterm_caught) {
3087 is_sigterm_caught = 0;
3089 g_hash_table_foreach(children, killchild, NULL);
3090 unlink(pidfname);
3092 exit(EXIT_SUCCESS);
3095 if (is_sigchld_caught) {
3096 int status;
3097 int* i;
3098 pid_t pid;
3100 is_sigchld_caught = 0;
3102 while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
3103 if (WIFEXITED(status)) {
3104 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
3106 i = g_hash_table_lookup(children, &pid);
3107 if (!i) {
3108 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
3109 } else {
3110 DEBUG("Removing %d from the list of children", pid);
3111 g_hash_table_remove(children, &pid);
3116 /* SIGHUP causes the root server process to reconfigure
3117 * itself and add new export servers for each newly
3118 * found export configuration group, i.e. spawn new
3119 * server processes for each previously non-existent
3120 * export. This does not alter old runtime configuration
3121 * but just appends new exports. */
3122 if (is_sighup_caught) {
3123 int n;
3124 GError *gerror = NULL;
3126 msg(LOG_INFO, "reconfiguration request received");
3127 is_sighup_caught = 0; /* Reset to allow catching
3128 * it again. */
3130 n = append_new_servers(servers, &gerror);
3131 if (n == -1)
3132 msg(LOG_ERR, "failed to append new servers: %s",
3133 gerror->message);
3135 for (i = servers->len - n; i < servers->len; ++i) {
3136 const SERVER server = g_array_index(servers,
3137 SERVER, i);
3139 msg(LOG_INFO, "reconfigured new server: %s",
3140 server.servename);
3144 memcpy(&rset, &mset, sizeof(fd_set));
3145 max=mmax;
3146 for(i=0;i<childsocks->len;i++) {
3147 int sock = g_array_index(childsocks, int, i);
3148 FD_SET(sock, &rset);
3149 max=sock>max?sock:max;
3152 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
3153 DEBUG("accept, ");
3154 for(i=0; i < modernsocks->len; i++) {
3155 int sock = g_array_index(modernsocks, int, i);
3156 if(!FD_ISSET(sock, &rset)) {
3157 continue;
3160 handle_modern_connection(servers, sock, genconf);
3162 for(i=0; i < childsocks->len; i++) {
3163 int sock = g_array_index(childsocks, int, i);
3165 if(FD_ISSET(sock, &rset)) {
3166 if(handle_childname(servers, sock) < 0) {
3167 close(sock);
3168 g_array_remove_index(childsocks, i);
3177 * Set server socket options.
3179 * @param socket a socket descriptor of the server
3181 * @param gerror a pointer to an error object pointer used for reporting
3182 * errors. On error, if gerror is not NULL, *gerror is set and -1
3183 * is returned.
3185 * @return 0 on success, -1 on error
3187 int dosockopts(const int socket, GError **const gerror) {
3188 #ifndef sun
3189 int yes=1;
3190 #else
3191 char yes='1';
3192 #endif /* sun */
3193 struct linger l;
3195 /* lose the pesky "Address already in use" error message */
3196 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
3197 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
3198 "failed to set socket option SO_REUSEADDR: %s",
3199 strerror(errno));
3200 return -1;
3202 l.l_onoff = 1;
3203 l.l_linger = 10;
3204 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
3205 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
3206 "failed to set socket option SO_LINGER: %s",
3207 strerror(errno));
3208 return -1;
3210 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
3211 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
3212 "failed to set socket option SO_KEEPALIVE: %s",
3213 strerror(errno));
3214 return -1;
3217 return 0;
3220 int open_unix(const gchar *const sockname, GError **const gerror) {
3221 struct sockaddr_un sa;
3222 int sock=-1;
3223 int retval=-1;
3225 memset(&sa, 0, sizeof(struct sockaddr_un));
3226 sa.sun_family = AF_UNIX;
3227 strncpy(sa.sun_path, sockname, sizeof sa.sun_path);
3228 sa.sun_path[sizeof(sa.sun_path)-1] = '\0';
3229 sock = socket(AF_UNIX, SOCK_STREAM, 0);
3230 if(sock < 0) {
3231 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3232 "failed to open a unix socket: "
3233 "failed to create socket: %s",
3234 strerror(errno));
3235 goto out;
3237 if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
3238 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3239 "failed to open a unix socket: "
3240 "failed to bind to address %s: %s",
3241 sockname, strerror(errno));
3242 goto out;
3244 if(listen(sock, 10)<0) {
3245 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3246 "failed to open a unix socket: "
3247 "failed to start listening: %s",
3248 strerror(errno));
3249 goto out;
3251 retval=0;
3252 g_array_append_val(modernsocks, sock);
3253 out:
3254 if(retval<0 && sock >= 0) {
3255 close(sock);
3258 return retval;
3261 int open_modern(const gchar *const addr, const gchar *const port,
3262 GError **const gerror) {
3263 struct addrinfo hints;
3264 struct addrinfo* ai = NULL;
3265 struct addrinfo* ai_bak = NULL;
3266 struct sock_flags;
3267 int e;
3268 int retval = -1;
3269 int sock = -1;
3270 gchar** addrs;
3271 gchar const* l_addr = addr;
3273 if(!addr || strlen(addr) == 0) {
3274 l_addr = "::, 0.0.0.0";
3277 addrs = g_strsplit_set(l_addr, ", \t", -1);
3279 for(int i=0; addrs[i]!=NULL; i++) {
3280 if(addrs[i][0] == '\0') {
3281 continue;
3283 memset(&hints, '\0', sizeof(hints));
3284 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
3285 hints.ai_socktype = SOCK_STREAM;
3286 hints.ai_family = AF_UNSPEC;
3287 hints.ai_protocol = IPPROTO_TCP;
3288 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai);
3289 ai_bak = ai;
3290 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) {
3291 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
3292 "failed to open a modern socket: "
3293 "failed to get address info: %s",
3294 gai_strerror(e));
3295 goto out;
3298 while(ai != NULL) {
3299 sock = -1;
3301 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
3302 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3303 "failed to open a modern socket: "
3304 "failed to create a socket: %s",
3305 strerror(errno));
3306 goto out;
3309 if (dosockopts(sock, gerror) == -1) {
3310 g_prefix_error(gerror, "failed to open a modern socket: ");
3311 goto out;
3314 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
3316 * Some systems will return multiple entries for the
3317 * same address when we ask it for something
3318 * AF_UNSPEC, even though the first entry will
3319 * listen to both protocols. Other systems will
3320 * return multiple entries too, but we actually
3321 * do need to open both.
3323 * Handle this by ignoring EADDRINUSE if we've
3324 * already got at least one socket open
3326 if(errno == EADDRINUSE && modernsocks->len > 0) {
3327 goto next;
3329 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3330 "failed to open a modern socket: "
3331 "failed to bind an address to a socket: %s",
3332 strerror(errno));
3333 goto out;
3336 if(listen(sock, 10) <0) {
3337 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3338 "failed to open a modern socket: "
3339 "failed to start listening on a socket: %s",
3340 strerror(errno));
3341 goto out;
3343 g_array_append_val(modernsocks, sock);
3344 next:
3345 ai = ai->ai_next;
3347 if(ai_bak) {
3348 freeaddrinfo(ai_bak);
3349 ai_bak=NULL;
3353 retval = 0;
3354 out:
3356 if (retval == -1 && sock >= 0) {
3357 close(sock);
3359 if(ai_bak)
3360 freeaddrinfo(ai_bak);
3362 return retval;
3366 * Connect our servers.
3368 void setup_servers(GArray *const servers, const gchar *const modernaddr,
3369 const gchar *const modernport, const gchar* unixsock,
3370 const gint flags ) {
3371 struct sigaction sa;
3373 if(unixsock != NULL) {
3374 GError* gerror = NULL;
3375 if(open_unix(unixsock, &gerror) == -1) {
3376 msg(LOG_ERR, "failed to setup servers: %s",
3377 gerror->message);
3378 g_clear_error(&gerror);
3379 exit(EXIT_FAILURE);
3382 if (((flags & F_DUAL_LISTEN) != 0) || (unixsock == NULL)) {
3383 GError *gerror = NULL;
3384 if (open_modern(modernaddr, modernport, &gerror) == -1) {
3385 msg(LOG_ERR, "failed to setup servers: %s",
3386 gerror->message);
3387 g_clear_error(&gerror);
3388 exit(EXIT_FAILURE);
3391 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
3393 sa.sa_handler = sigchld_handler;
3394 sigemptyset(&sa.sa_mask);
3395 sigaddset(&sa.sa_mask, SIGTERM);
3396 sa.sa_flags = SA_RESTART;
3397 if(sigaction(SIGCHLD, &sa, NULL) == -1)
3398 err("sigaction: %m");
3400 sa.sa_handler = sigterm_handler;
3401 sigemptyset(&sa.sa_mask);
3402 sigaddset(&sa.sa_mask, SIGCHLD);
3403 sa.sa_flags = SA_RESTART;
3404 if(sigaction(SIGTERM, &sa, NULL) == -1)
3405 err("sigaction: %m");
3407 sa.sa_handler = sighup_handler;
3408 sigemptyset(&sa.sa_mask);
3409 sa.sa_flags = SA_RESTART;
3410 if(sigaction(SIGHUP, &sa, NULL) == -1)
3411 err("sigaction: %m");
3413 sa.sa_handler = sigusr1_handler;
3414 sigemptyset(&sa.sa_mask);
3415 sa.sa_flags = SA_RESTART;
3416 if(sigaction(SIGUSR1, &sa, NULL) == -1)
3417 err("sigaction: %m");
3421 * Go daemon (unless we specified at compile time that we didn't want this)
3422 * @param serve the first server of our configuration. If its port is zero,
3423 * then do not daemonize, because we're doing inetd then. This parameter
3424 * is only used to create a PID file of the form
3425 * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
3427 #if !defined(NODAEMON)
3428 void daemonize() {
3429 FILE*pidf;
3431 if(daemon(0,0)<0) {
3432 err("daemon");
3434 if(!*pidfname) {
3435 strncpy(pidfname, "/var/run/nbd-server.pid", 255);
3437 pidf=fopen(pidfname, "w");
3438 if(pidf) {
3439 fprintf(pidf,"%d\n", (int)getpid());
3440 fclose(pidf);
3441 } else {
3442 perror("fopen");
3443 fprintf(stderr, "Not fatal; continuing");
3446 #else
3447 #define daemonize(serve)
3448 #endif /* !defined(NODAEMON) */
3451 * Everything beyond this point (in the file) is run in non-daemon mode.
3452 * The stuff above daemonize() isn't.
3456 * Set up user-ID and/or group-ID
3458 void dousers(const gchar *const username, const gchar *const groupname) {
3459 struct passwd *pw;
3460 struct group *gr;
3461 gchar* str;
3462 if (groupname) {
3463 gr = getgrnam(groupname);
3464 if(!gr) {
3465 str = g_strdup_printf("Invalid group name: %s", groupname);
3466 err(str);
3468 if(setgid(gr->gr_gid)<0) {
3469 err("Could not set GID: %m");
3472 if (username) {
3473 pw = getpwnam(username);
3474 if(!pw) {
3475 str = g_strdup_printf("Invalid user name: %s", username);
3476 err(str);
3478 setgroups(0, NULL);
3479 if(setuid(pw->pw_uid)<0) {
3480 err("Could not set UID: %m");
3485 #ifndef ISSERVER
3486 void glib_message_syslog_redirect(const gchar *log_domain,
3487 GLogLevelFlags log_level,
3488 const gchar *message,
3489 gpointer user_data)
3491 int level=LOG_DEBUG;
3493 switch( log_level )
3495 case G_LOG_FLAG_FATAL:
3496 case G_LOG_LEVEL_CRITICAL:
3497 case G_LOG_LEVEL_ERROR:
3498 level=LOG_ERR;
3499 break;
3500 case G_LOG_LEVEL_WARNING:
3501 level=LOG_WARNING;
3502 break;
3503 case G_LOG_LEVEL_MESSAGE:
3504 case G_LOG_LEVEL_INFO:
3505 level=LOG_INFO;
3506 break;
3507 case G_LOG_LEVEL_DEBUG:
3508 level=LOG_DEBUG;
3509 break;
3510 default:
3511 level=LOG_ERR;
3513 syslog(level, "%s", message);
3515 #endif
3518 * Main entry point...
3520 int main(int argc, char *argv[]) {
3521 SERVER *serve;
3522 GArray *servers;
3523 GError *gerr=NULL;
3524 struct generic_conf genconf;
3526 memset(&genconf, 0, sizeof(struct generic_conf));
3528 if (sizeof( struct nbd_request )!=28) {
3529 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
3530 exit(EXIT_FAILURE) ;
3533 modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
3534 childsocks = g_array_new(FALSE, FALSE, sizeof(int));
3536 logging(MY_NAME);
3537 config_file_pos = g_strdup(CFILE);
3538 serve=cmdline(argc, argv, &genconf);
3540 genconf.threads = 4;
3541 servers = parse_cfile(config_file_pos, &genconf, true, &gerr);
3543 /* Update global variables with parsed values. This will be
3544 * removed once we get rid of global configuration variables. */
3545 glob_flags |= genconf.flags;
3547 if(serve) {
3548 g_array_append_val(servers, *serve);
3551 if(!servers || !servers->len) {
3552 if(gerr && !(gerr->domain == NBDS_ERR
3553 && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) {
3554 g_warning("Could not parse config file: %s",
3555 gerr ? gerr->message : "Unknown error");
3558 if(serve) {
3559 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3562 if((!serve) && (!servers||!servers->len)) {
3563 if(gerr)
3564 g_message("No configured exports; quitting.");
3565 exit(EXIT_FAILURE);
3567 if (!dontfork)
3568 daemonize();
3569 #if HAVE_OLD_GLIB
3570 g_thread_init(NULL);
3571 #endif
3572 tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL);
3574 setup_servers(servers, genconf.modernaddr, genconf.modernport,
3575 genconf.unixsock, genconf.flags);
3576 dousers(genconf.user, genconf.group);
3578 #if HAVE_GNUTLS
3579 gnutls_global_init();
3580 static gnutls_dh_params_t dh_params;
3581 gnutls_dh_params_init(&dh_params);
3582 gnutls_dh_params_generate2(dh_params,
3583 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH,
3584 // Renamed in GnuTLS 3.3
3585 #if GNUTLS_VERSION_NUMBER >= 0x030300
3586 GNUTLS_SEC_PARAM_MEDIUM
3587 #else
3588 GNUTLS_SEC_PARAM_NORMAL
3589 #endif
3591 #endif
3593 if((genconf.modernport != NULL) && strcmp(genconf.modernport, "0")==0) {
3594 #ifndef ISSERVER
3595 err("inetd mode requires syslog");
3596 #endif
3597 CLIENT* client = negotiate(0, servers, &genconf);
3598 if(!client) {
3599 exit(EXIT_FAILURE);
3601 mainloop_threaded(client);
3602 return 0;
3605 serveloop(servers, &genconf);