correct shell variable assignment for libnl flags in configure(.ac)
[nbd.git] / nbd-server.c
blobb0720ea140369ca0b05f9bc94d2c7f2527a6eb31
1 /*
2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
34 * <wouter@debian.org>
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
43 * <wouter@debian.org>
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
51 * <wouter@debian.org>
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
60 #include "lfs.h"
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
63 #if NEED_BSD_SOURCE
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
65 #endif
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
68 #include <assert.h>
69 #include <sys/types.h>
70 #include <sys/socket.h>
71 #include <sys/stat.h>
72 #include <sys/select.h>
73 #include <sys/wait.h>
74 #include <sys/un.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
77 #endif
78 #ifdef HAVE_SYS_UIO_H
79 #include <sys/uio.h>
80 #endif
81 #include <sys/param.h>
82 #include <signal.h>
83 #include <errno.h>
84 #include <libgen.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
87 #include <netdb.h>
88 #include <syslog.h>
89 #include <unistd.h>
90 #include <stdbool.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <fcntl.h>
95 #if HAVE_FALLOC_PH
96 #include <linux/falloc.h>
97 #endif
98 #include <arpa/inet.h>
99 #include <strings.h>
100 #include <dirent.h>
101 #ifdef HAVE_SYS_DIR_H
102 #include <sys/dir.h>
103 #endif
104 #ifdef HAVE_SYS_DIRENT_H
105 #include <sys/dirent.h>
106 #endif
107 #include <getopt.h>
108 #include <pwd.h>
109 #include <grp.h>
110 #include <dirent.h>
111 #include <ctype.h>
112 #include <inttypes.h>
114 #include <glib.h>
116 #if HAVE_OLD_GLIB
117 #include <pthread.h>
118 #endif
120 #include <semaphore.h>
122 /* used in cliserv.h, so must come first */
123 #define MY_NAME "nbd_server"
124 #include "cliserv.h"
125 #include "nbd-debug.h"
126 #include "netdb-compat.h"
127 #include "backend.h"
128 #include "treefiles.h"
130 #ifdef WITH_SDP
131 #include <sdp_inet.h>
132 #endif
134 #if HAVE_FSCTL_SET_ZERO_DATA
135 #include <io.h>
136 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
137 #define NOMINMAX 1
138 #include <windef.h>
139 #include <winbase.h>
140 #include <winioctl.h>
141 #endif
143 /** Default position of the config file */
144 #ifndef SYSCONFDIR
145 #define SYSCONFDIR "/etc"
146 #endif
147 #define CFILE SYSCONFDIR "/nbd-server/config"
149 #if HAVE_GNUTLS
150 #include <gnutls/gnutls.h>
151 #include <gnutls/x509.h>
152 #endif
154 /** Where our config file actually is */
155 gchar* config_file_pos;
157 /** global flags */
158 int glob_flags=0;
160 /* Whether we should avoid forking */
161 int dontfork = 0;
164 * The highest value a variable of type off_t can reach. This is a signed
165 * integer, so set all bits except for the leftmost one.
167 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
168 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
169 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
171 /** Global flags: */
172 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
173 #define F_LIST 2 /**< Allow clients to list the exports on a server */
174 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
175 // also accepts F_FORCEDTLS (which is 16384)
176 GHashTable *children;
177 char pidfname[256]; /**< name of our PID file */
178 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
180 #define NEG_INIT (1 << 0)
181 #define NEG_OLD (1 << 1)
182 #define NEG_MODERN (1 << 2)
185 * If we want what the system really has set we'd have to read
186 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
188 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
189 #define SPLICE_IN 0
190 #define SPLICE_OUT 1
192 #include <nbdsrv.h>
194 /* Our thread pool */
195 GThreadPool *tpool;
197 /* A work package for the thread pool functions */
198 struct work_package {
199 CLIENT* client;
200 struct nbd_request* req;
201 int pipefd[2];
202 void* data; /**< for read requests */
205 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
206 SIGCHLD handler
207 to mark a child
208 exit */
210 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
211 SIGTERM handler
212 to mark a exit
213 request */
215 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
216 handler to mark a
217 reconfiguration
218 request */
220 GArray* modernsocks; /**< Sockets for the modern handler. Not used
221 if a client was only specified on the
222 command line; only port used if
223 oldstyle is set to false (and then the
224 command-line client isn't used, gna gna).
225 This may be more than one socket on
226 systems that don't support serving IPv4
227 and IPv6 from the same socket (like,
228 e.g., FreeBSD) */
229 GArray* childsocks; /**< parent-side sockets for communication with children */
230 int commsocket; /**< child-side socket for communication with parent */
231 static sem_t file_wait_sem;
233 bool logged_oversized=false; /**< whether we logged oversized requests already */
236 * Type of configuration file values
238 typedef enum {
239 PARAM_INT, /**< This parameter is an integer */
240 PARAM_INT64, /**< This parameter is an integer */
241 PARAM_STRING, /**< This parameter is a string */
242 PARAM_BOOL, /**< This parameter is a boolean */
243 } PARAM_TYPE;
246 * Configuration file values
248 typedef struct {
249 gchar *paramname; /**< Name of the parameter, as it appears in
250 the config file */
251 gboolean required; /**< Whether this is a required (as opposed to
252 optional) parameter */
253 PARAM_TYPE ptype; /**< Type of the parameter. */
254 gpointer target; /**< Pointer to where the data of this
255 parameter should be written. If ptype is
256 PARAM_BOOL, the data is or'ed rather than
257 overwritten. */
258 gint flagval; /**< Flag mask for this parameter in case ptype
259 is PARAM_BOOL. */
260 } PARAM;
263 * Configuration file values of the "generic" section
265 struct generic_conf {
266 gchar *user; /**< user we run the server as */
267 gchar *group; /**< group we run running as */
268 gchar *modernaddr; /**< address of the modern socket */
269 gchar *modernport; /**< port of the modern socket */
270 gchar *unixsock; /**< file name of the unix domain socket */
271 gchar *certfile; /**< certificate file */
272 gchar *keyfile; /**< key file */
273 gchar *cacertfile; /**< CA certificate file */
274 gchar *tlsprio; /**< TLS priority string */
275 gint flags; /**< global flags */
276 gint threads; /**< maximum number of parallel threads we want to run */
280 * Translate a command name into human readable form
282 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
283 * @return pointer to the command name
285 static inline const char * getcommandname(uint64_t command) {
286 switch (command) {
287 case NBD_CMD_READ:
288 return "NBD_CMD_READ";
289 case NBD_CMD_WRITE:
290 return "NBD_CMD_WRITE";
291 case NBD_CMD_DISC:
292 return "NBD_CMD_DISC";
293 case NBD_CMD_FLUSH:
294 return "NBD_CMD_FLUSH";
295 case NBD_CMD_TRIM:
296 return "NBD_CMD_TRIM";
297 case NBD_CMD_WRITE_ZEROES:
298 return "NBD_CMD_WRITE_ZEROES";
299 default:
300 return "UNKNOWN";
304 #if HAVE_GNUTLS
305 static int writeit_tls(gnutls_session_t s, void *buf, size_t len) {
306 ssize_t res;
307 char *m;
308 while(len > 0) {
309 DEBUG("+");
310 if ((res = gnutls_record_send(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
311 m = g_strdup_printf("issue while sending data: %s", gnutls_strerror(res));
312 err_nonfatal(m);
313 g_free(m);
314 } else if(res < 0) {
315 m = g_strdup_printf("could not send data: %s", gnutls_strerror(res));
316 err_nonfatal(m);
317 g_free(m);
318 return -1;
319 } else {
320 len -= res;
321 buf += res;
324 return 0;
327 static int readit_tls(gnutls_session_t s, void *buf, size_t len) {
328 ssize_t res;
329 char *m;
330 while(len > 0) {
331 DEBUG("*");
332 if((res = gnutls_record_recv(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
333 m = g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res));
334 err_nonfatal(m);
335 g_free(m);
336 } else if(res < 0) {
337 m = g_strdup_printf("could not receive data: %s", gnutls_strerror(res));
338 err_nonfatal(m);
339 g_free(m);
340 return -1;
341 } else {
342 len -= res;
343 buf += res;
346 return 0;
349 static int socket_read_tls(CLIENT* client, void *buf, size_t len) {
350 return readit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
353 static int socket_write_tls(CLIENT* client, void *buf, size_t len) {
354 return writeit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
356 #endif // HAVE_GNUTLS
358 static int socket_read_notls(CLIENT* client, void *buf, size_t len) {
359 return readit(client->net, buf, len);
362 static int socket_write_notls(CLIENT* client, void *buf, size_t len) {
363 return writeit(client->net, buf, len);
366 static void socket_read(CLIENT* client, void *buf, size_t len) {
367 g_assert(client->socket_read != NULL);
368 if(client->socket_read(client, buf, len)<0) {
369 g_assert(client->socket_closed != NULL);
370 client->socket_closed(client);
375 * Consume data from a socket that we don't want
377 * @param c the client to read from
378 * @param len the number of bytes to consume
379 * @param buf a buffer
380 * @param bufsiz the size of the buffer
382 static inline void consume(CLIENT* c, size_t len, void * buf, size_t bufsiz) {
383 size_t curlen;
384 while (len>0) {
385 curlen = (len>bufsiz)?bufsiz:len;
386 socket_read(c, buf, curlen);
387 len -= curlen;
392 * Consume a length field and corresponding payload that we don't want
394 * @param c the client to read from
396 static inline void consume_len(CLIENT* c) {
397 uint32_t len;
398 char buf[1024];
400 socket_read(c, &len, sizeof(len));
401 len = ntohl(len);
402 consume(c, len, buf, sizeof(buf));
405 static void socket_write(CLIENT* client, void *buf, size_t len) {
406 g_assert(client->socket_write != NULL);
407 if(client->socket_write(client, buf, len)<0) {
408 g_assert(client->socket_closed != NULL);
409 client->socket_closed(client);
413 static inline void socket_closed_negotiate(CLIENT* client) {
414 err("Negotiation failed: %m");
418 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
419 * options
421 * @param command the command to be ran. Read from the config file
422 * @param file the file name we're about to export
424 int do_run(gchar* command, gchar* file) {
425 gchar* cmd;
426 int retval=0;
428 if(command && *command) {
429 cmd = g_strdup_printf(command, file);
430 retval=system(cmd);
431 g_free(cmd);
433 return retval;
436 static inline void finalize_client(CLIENT* client) {
437 g_thread_pool_free(tpool, FALSE, TRUE);
438 do_run(client->server->postrun, client->exportname);
439 if(client->transactionlogfd != -1) {
440 close(client->transactionlogfd);
441 client->transactionlogfd = -1;
443 if(client->server->flags & F_COPYONWRITE) {
444 unlink(client->difffilename);
448 static inline void socket_closed_transmission(CLIENT* client) {
449 int saved_errno = errno;
450 finalize_client(client);
451 errno = saved_errno;
452 err("Connection dropped: %m");
455 #ifdef HAVE_SPLICE
457 * Splice data between a pipe and a file descriptor
459 * @param fd_in The fd to splice from.
460 * @param off_in The fd_in offset to splice from.
461 * @param fd_out The fd to splice to.
462 * @param off_out The fd_out offset to splice to.
463 * @param len The length to splice.
465 static inline void spliceit(int fd_in, loff_t *off_in, int fd_out,
466 loff_t *off_out, size_t len)
468 ssize_t ret;
469 while (len > 0) {
470 if ((ret = splice(fd_in, off_in, fd_out, off_out, len,
471 SPLICE_F_MOVE)) <= 0)
472 err("Splice failed: %m");
473 len -= ret;
476 #endif
479 * Print out a message about how to use nbd-server. Split out to a separate
480 * function so that we can call it from multiple places
482 void usage() {
483 printf("This is nbd-server version " VERSION "\n");
484 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
485 "\t-r|--read-only\t\tread only\n"
486 "\t-m|--multi-file\t\tmultiple file\n"
487 "\t-c|--copy-on-write\tcopy on write\n"
488 "\t-C|--config-file\tspecify an alternate configuration file\n"
489 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
490 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
491 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
492 "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
493 "\t-V|--version\toutput the version and exit\n\n"
494 "\tif port is set to 0, stdin is used (for running from inetd).\n"
495 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
496 "\t\taddress of the machine trying to connect\n"
497 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
498 printf("Using configuration file %s\n", CFILE);
499 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT);
502 /* Dumps a config file section of the given SERVER*, and exits. */
503 void dump_section(SERVER* serve, gchar* section_header) {
504 printf("[%s]\n", section_header);
505 printf("\texportname = %s\n", serve->exportname);
506 printf("\tlistenaddr = %s\n", serve->listenaddr);
507 if(serve->flags & F_READONLY) {
508 printf("\treadonly = true\n");
510 if(serve->flags & F_MULTIFILE) {
511 printf("\tmultifile = true\n");
513 if(serve->flags & F_TREEFILES) {
514 printf("\ttreefiles = true\n");
516 if(serve->flags & F_COPYONWRITE) {
517 printf("\tcopyonwrite = true\n");
519 if(serve->expected_size) {
520 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
522 if(serve->authname) {
523 printf("\tauthfile = %s\n", serve->authname);
525 exit(EXIT_SUCCESS);
529 * Parse the command line.
531 * @param argc the argc argument to main()
532 * @param argv the argv argument to main()
534 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) {
535 int i=0;
536 int nonspecial=0;
537 int c;
538 struct option long_options[] = {
539 {"read-only", no_argument, NULL, 'r'},
540 {"multi-file", no_argument, NULL, 'm'},
541 {"copy-on-write", no_argument, NULL, 'c'},
542 {"dont-fork", no_argument, NULL, 'd'},
543 {"authorize-file", required_argument, NULL, 'l'},
544 {"config-file", required_argument, NULL, 'C'},
545 {"pid-file", required_argument, NULL, 'p'},
546 {"output-config", required_argument, NULL, 'o'},
547 {"max-connection", required_argument, NULL, 'M'},
548 {"version", no_argument, NULL, 'V'},
549 {0,0,0,0}
551 SERVER *serve;
552 off_t es;
553 size_t last;
554 char suffix;
555 bool do_output=false;
556 gchar* section_header="";
557 gchar** addr_port;
559 if(argc==1) {
560 return NULL;
562 serve=g_new0(SERVER, 1);
563 serve->authname = g_strdup(default_authname);
564 serve->virtstyle=VIRT_IPLIT;
565 while((c=getopt_long(argc, argv, "-C:cwdl:mo:rp:M:V", long_options, &i))>=0) {
566 switch (c) {
567 case 1:
568 /* non-option argument */
569 switch(nonspecial++) {
570 case 0:
571 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
572 addr_port=g_strsplit(optarg, ":", 2);
574 /* Check for "@" - maybe user using this separator
575 for IPv4 address */
576 if(!addr_port[1]) {
577 g_strfreev(addr_port);
578 addr_port=g_strsplit(optarg, "@", 2);
580 } else {
581 addr_port=g_strsplit(optarg, "@", 2);
584 if(addr_port[1]) {
585 genconf->modernport=g_strdup(addr_port[1]);
586 genconf->modernaddr=g_strdup(addr_port[0]);
587 } else {
588 g_free(genconf->modernaddr);
589 genconf->modernaddr=NULL;
590 genconf->modernport=g_strdup(addr_port[0]);
592 g_strfreev(addr_port);
593 break;
594 case 1:
595 serve->exportname = g_strdup(optarg);
596 if(serve->exportname[0] != '/') {
597 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
598 exit(EXIT_FAILURE);
600 break;
601 case 2:
602 last=strlen(optarg)-1;
603 suffix=optarg[last];
604 if (suffix == 'k' || suffix == 'K' ||
605 suffix == 'm' || suffix == 'M')
606 optarg[last] = '\0';
607 es = (off_t)atoll(optarg);
608 switch (suffix) {
609 case 'm':
610 case 'M': es <<= 10;
611 case 'k':
612 case 'K': es <<= 10;
613 default : break;
615 serve->expected_size = es;
616 break;
618 break;
619 case 'r':
620 serve->flags |= F_READONLY;
621 break;
622 case 'm':
623 serve->flags |= F_MULTIFILE;
624 break;
625 case 'o':
626 do_output = true;
627 section_header = g_strdup(optarg);
628 break;
629 case 'p':
630 strncpy(pidfname, optarg, 256);
631 pidfname[255]='\0';
632 break;
633 case 'c':
634 serve->flags |=F_COPYONWRITE;
635 break;
636 case 'd':
637 dontfork = 1;
638 break;
639 case 'C':
640 g_free(config_file_pos);
641 config_file_pos=g_strdup(optarg);
642 break;
643 case 'l':
644 g_free(serve->authname);
645 serve->authname=g_strdup(optarg);
646 break;
647 case 'M':
648 serve->max_connections = strtol(optarg, NULL, 0);
649 break;
650 case 'V':
651 printf("This is nbd-server version " VERSION "\n");
652 exit(EXIT_SUCCESS);
653 break;
654 default:
655 usage();
656 exit(EXIT_FAILURE);
657 break;
660 /* What's left: the port to export, the name of the to be exported
661 * file, and, optionally, the size of the file, in that order. */
662 if(nonspecial<2) {
663 g_free(serve);
664 serve=NULL;
665 } else {
666 serve->servename = "";
668 if(do_output) {
669 if(!serve) {
670 g_critical("Need a complete configuration on the command line to output a config file section!");
671 exit(EXIT_FAILURE);
673 dump_section(serve, section_header);
675 return serve;
678 /* forward definition of parse_cfile */
679 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
681 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
682 #define NBD_D_TYPE de->d_type
683 #else
684 #define NBD_D_TYPE 0
685 #define DT_UNKNOWN 0
686 #define DT_REG 1
687 #endif
690 * Parse config file snippets in a directory. Uses readdir() and friends
691 * to find files and open them, then passes them on to parse_cfile
692 * with have_global set false
694 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
695 DIR* dirh = opendir(dir);
696 struct dirent* de;
697 gchar* fname;
698 GArray* retval = NULL;
699 GArray* tmp;
700 struct stat stbuf;
702 if(!dirh) {
703 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
704 return NULL;
706 errno=0;
707 while((de = readdir(dirh))) {
708 int saved_errno=errno;
709 fname = g_build_filename(dir, de->d_name, NULL);
710 switch(NBD_D_TYPE) {
711 case DT_UNKNOWN:
712 /* Filesystem doesn't return type of
713 * file through readdir. Run stat() on
714 * the file instead */
715 if(stat(fname, &stbuf)) {
716 perror("stat");
717 goto err_out;
719 if (!S_ISREG(stbuf.st_mode)) {
720 goto next;
722 case DT_REG:
723 /* Skip unless the name ends with '.conf' */
724 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
725 goto next;
727 tmp = parse_cfile(fname, genconf, false, e);
728 errno=saved_errno;
729 if(*e) {
730 goto err_out;
732 if(!retval)
733 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
734 retval = g_array_append_vals(retval, tmp->data, tmp->len);
735 g_array_free(tmp, TRUE);
736 default:
737 break;
739 next:
740 g_free(fname);
742 if(errno) {
743 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
744 err_out:
745 if(retval)
746 g_array_free(retval, TRUE);
747 retval = NULL;
749 if(dirh)
750 closedir(dirh);
751 return retval;
755 * Parse the config file.
757 * @param f the name of the config file
759 * @param genconf a pointer to generic configuration which will get
760 * updated with parsed values. If NULL, then parsed generic
761 * configuration values are safely and silently discarded.
763 * @param e a GError. Error code can be any of the following:
764 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
765 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
766 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
768 * @param expect_generic if true, we expect a configuration file that
769 * contains a [generic] section. If false, we don't.
771 * @return a GArray of SERVER* pointers. If the config file is empty or does not
772 * exist, returns an empty GArray; if the config file contains an
773 * error, returns NULL, and e is set appropriately
775 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
776 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
777 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
778 gchar* cfdir = NULL;
779 SERVER s;
780 gchar *virtstyle=NULL;
781 PARAM lp[] = {
782 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
783 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
784 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
785 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
786 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
787 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
788 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
789 { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 },
790 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
791 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
792 { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES },
793 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
794 { "waitfile", FALSE, PARAM_BOOL, &(s.flags), F_WAIT },
795 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
796 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
797 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
798 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
799 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
800 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
801 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
802 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
803 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
804 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
805 { "force_tls", FALSE, PARAM_BOOL, &(s.flags), F_FORCEDTLS },
806 { "splice", FALSE, PARAM_BOOL, &(s.flags), F_SPLICE},
808 const int lp_size=sizeof(lp)/sizeof(PARAM);
809 struct generic_conf genconftmp;
810 PARAM gp[] = {
811 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
812 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
813 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
814 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
815 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
816 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
817 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
818 { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 },
819 { "max_threads", FALSE, PARAM_INT, &(genconftmp.threads), 0 },
820 { "force_tls", FALSE, PARAM_BOOL, &(genconftmp.flags), F_FORCEDTLS },
821 { "certfile", FALSE, PARAM_STRING, &(genconftmp.certfile), 0 },
822 { "keyfile", FALSE, PARAM_STRING, &(genconftmp.keyfile), 0 },
823 { "cacertfile", FALSE, PARAM_STRING, &(genconftmp.cacertfile), 0 },
824 { "tlsprio", FALSE, PARAM_STRING, &(genconftmp.tlsprio), 0 },
826 PARAM* p=gp;
827 int p_size=sizeof(gp)/sizeof(PARAM);
828 GKeyFile *cfile;
829 GError *err = NULL;
830 const char *err_msg=NULL;
831 GArray *retval=NULL;
832 gchar **groups;
833 gboolean bval;
834 gint ival;
835 gint64 i64val;
836 gchar* sval;
837 gchar* startgroup;
838 gint i;
839 gint j;
841 memset(&genconftmp, 0, sizeof(struct generic_conf));
843 genconftmp.tlsprio = "NORMAL:-VERS-TLS-ALL:+VERS-TLS1.2:%SERVER_PRECEDENCE";
845 if (genconf) {
846 /* Use the passed configuration values as defaults. The
847 * parsing algorithm below updates all parameter targets
848 * found from configuration files. */
849 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
852 cfile = g_key_file_new();
853 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
854 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
855 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
856 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
857 f, err->message);
858 g_key_file_free(cfile);
859 return retval;
861 startgroup = g_key_file_get_start_group(cfile);
862 if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
863 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
864 g_key_file_free(cfile);
865 return NULL;
867 groups = g_key_file_get_groups(cfile, NULL);
868 for(i=0;groups[i];i++) {
869 memset(&s, '\0', sizeof(SERVER));
871 /* After the [generic] group or when we're parsing an include
872 * directory, start parsing exports */
873 if(i==1 || !expect_generic) {
874 p=lp;
875 p_size=lp_size;
877 for(j=0;j<p_size;j++) {
878 assert(p[j].target != NULL);
879 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
880 switch(p[j].ptype) {
881 case PARAM_INT:
882 ival = g_key_file_get_integer(cfile,
883 groups[i],
884 p[j].paramname,
885 &err);
886 if(!err) {
887 *((gint*)p[j].target) = ival;
889 break;
890 case PARAM_INT64:
891 i64val = g_key_file_get_int64(cfile,
892 groups[i],
893 p[j].paramname,
894 &err);
895 if(!err) {
896 *((gint64*)p[j].target) = i64val;
898 break;
899 case PARAM_STRING:
900 sval = g_key_file_get_string(cfile,
901 groups[i],
902 p[j].paramname,
903 &err);
904 if(!err) {
905 *((gchar**)p[j].target) = sval;
907 break;
908 case PARAM_BOOL:
909 bval = g_key_file_get_boolean(cfile,
910 groups[i],
911 p[j].paramname, &err);
912 if(!err) {
913 if(bval) {
914 *((gint*)p[j].target) |= p[j].flagval;
915 } else {
916 *((gint*)p[j].target) &= ~(p[j].flagval);
919 break;
921 if(err) {
922 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
923 if(!p[j].required) {
924 /* Ignore not-found error for optional values */
925 g_clear_error(&err);
926 continue;
927 } else {
928 err_msg = MISSING_REQUIRED_ERROR;
930 } else {
931 err_msg = DEFAULT_ERROR;
933 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
934 g_array_free(retval, TRUE);
935 g_error_free(err);
936 g_key_file_free(cfile);
937 return NULL;
940 if(virtstyle) {
941 if(!strncmp(virtstyle, "none", 4)) {
942 s.virtstyle=VIRT_NONE;
943 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
944 s.virtstyle=VIRT_IPLIT;
945 } else if(!strncmp(virtstyle, "iphash", 6)) {
946 s.virtstyle=VIRT_IPHASH;
947 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
948 s.virtstyle=VIRT_CIDR;
949 if(strlen(virtstyle)<10) {
950 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
951 g_array_free(retval, TRUE);
952 g_key_file_free(cfile);
953 return NULL;
955 s.cidrlen=strtol(virtstyle+8, NULL, 0);
956 } else {
957 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
958 g_array_free(retval, TRUE);
959 g_key_file_free(cfile);
960 return NULL;
962 } else {
963 s.virtstyle=VIRT_IPLIT;
965 if(genconftmp.flags & F_OLDSTYLE) {
966 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
967 g_message("Exiting.");
968 return NULL;
970 #ifndef HAVE_SPLICE
971 if (s.flags & F_SPLICE) {
972 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without splice support, yet group %s uses it", groups[i]);
973 g_array_free(retval, TRUE);
974 g_key_file_free(cfile);
975 return NULL;
977 #endif
978 /* We can't mix copyonwrite and splice. */
979 if ((s.flags & F_COPYONWRITE) && (s.flags & F_SPLICE)) {
980 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_SPLICE,
981 "Cannot mix copyonwrite with splice for an export in group %s",
982 groups[i]);
983 g_array_free(retval, TRUE);
984 g_key_file_free(cfile);
985 return NULL;
987 if ((s.flags & F_COPYONWRITE) && (s.flags & F_WAIT)) {
988 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_WAIT,
989 "Cannot mix copyonwrite with waitfile for an export in group %s",
990 groups[i]);
991 g_array_free(retval, TRUE);
992 g_key_file_free(cfile);
993 return NULL;
995 /* Don't need to free this, it's not our string */
996 virtstyle=NULL;
997 /* Don't append values for the [generic] group */
998 if(i>0 || !expect_generic) {
999 s.servename = groups[i];
1001 g_array_append_val(retval, s);
1003 #ifndef WITH_SDP
1004 if(s.flags & F_SDP) {
1005 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
1006 g_array_free(retval, TRUE);
1007 g_key_file_free(cfile);
1008 return NULL;
1010 #endif
1012 g_key_file_free(cfile);
1013 if(cfdir) {
1014 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
1015 if(extra) {
1016 retval = g_array_append_vals(retval, extra->data, extra->len);
1017 i+=extra->len;
1018 g_array_free(extra, TRUE);
1019 } else {
1020 if(*e) {
1021 g_array_free(retval, TRUE);
1022 return NULL;
1026 if(i==1 && expect_generic) {
1027 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
1030 if (genconf) {
1031 /* Return the updated generic configuration through the
1032 * pointer parameter. */
1033 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
1036 return retval;
1040 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1041 * main loop of the root server process. This allows us to separate the signal
1042 * catching from th actual task triggered by SIGCHLD and hence processing in the
1043 * interrupt context is kept as minimial as possible.
1045 * @param s the signal we're handling (must be SIGCHLD, or something
1046 * is severely wrong)
1048 static void sigchld_handler(const int s G_GNUC_UNUSED) {
1049 is_sigchld_caught = 1;
1053 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1055 * @param key the key
1056 * @param value the value corresponding to the above key
1057 * @param user_data a pointer which we always set to 1, so that we know what
1058 * will happen next.
1060 void killchild(gpointer key, gpointer value, gpointer user_data) {
1061 pid_t *pid=value;
1063 kill(*pid, SIGTERM);
1067 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1068 * main loop of the root server process. This allows us to separate the signal
1069 * catching from th actual task triggered by SIGTERM and hence processing in the
1070 * interrupt context is kept as minimial as possible.
1072 * @param s the signal we're handling (must be SIGTERM, or something
1073 * is severely wrong).
1075 static void sigterm_handler(const int s G_GNUC_UNUSED) {
1076 is_sigterm_caught = 1;
1080 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1081 * the main loop of the root server process. This allows us to separate
1082 * the signal catching from th actual task triggered by SIGHUP and hence
1083 * processing in the interrupt context is kept as minimial as possible.
1085 * @param s the signal we're handling (must be SIGHUP, or something
1086 * is severely wrong).
1088 static void sighup_handler(const int s G_GNUC_UNUSED) {
1089 is_sighup_caught = 1;
1092 static void sigusr1_handler(const int s G_GNUC_UNUSED) {
1093 msg(LOG_INFO, "Got SIGUSR1");
1094 sem_post(&file_wait_sem);
1098 * Get the file handle and offset, given an export offset.
1100 * @param client The client we're serving for
1101 * @param a The offset to get corresponding file/offset for
1102 * @param fhandle [out] File descriptor
1103 * @param foffset [out] Offset into fhandle
1104 * @param maxbytes [out] Tells how many bytes can be read/written
1105 * from fhandle starting at foffset (0 if there is no limit)
1106 * @return 0 on success, -1 on failure
1108 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1109 GArray * const export = client->export;
1111 /* Negative offset not allowed */
1112 if(a < 0)
1113 return -1;
1115 /* Open separate file for treefiles */
1116 if (client->server->flags & F_TREEFILES) {
1117 *foffset = a % TREEPAGESIZE;
1118 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
1119 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock);
1120 return 0;
1123 /* Binary search for last file with starting offset <= a */
1124 FILE_INFO fi;
1125 int start = 0;
1126 int end = export->len - 1;
1127 while( start <= end ) {
1128 int mid = (start + end) / 2;
1129 fi = g_array_index(export, FILE_INFO, mid);
1130 if( fi.startoff < a ) {
1131 start = mid + 1;
1132 } else if( fi.startoff > a ) {
1133 end = mid - 1;
1134 } else {
1135 start = end = mid;
1136 break;
1140 /* end should never go negative, since first startoff is 0 and a >= 0 */
1141 assert(end >= 0);
1143 fi = g_array_index(export, FILE_INFO, end);
1144 *fhandle = fi.fhandle;
1145 *foffset = a - fi.startoff;
1146 *maxbytes = 0;
1147 if( end+1 < export->len ) {
1148 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1149 *maxbytes = fi_next.startoff - a;
1152 return 0;
1156 * Write an amount of bytes at a given offset to the right file. This
1157 * abstracts the write-side of the multiple file option.
1159 * @param a The offset where the write should start
1160 * @param buf The buffer to write from
1161 * @param len The length of buf
1162 * @param client The client we're serving for
1163 * @param fua Flag to indicate 'Force Unit Access'
1164 * @return The number of bytes actually written, or -1 in case of an error
1166 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1167 int fhandle;
1168 off_t foffset;
1169 size_t maxbytes;
1170 ssize_t retval;
1172 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1173 return -1;
1174 if(maxbytes && len > maxbytes)
1175 len = maxbytes;
1177 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1179 retval = pwrite(fhandle, buf, len, foffset);
1180 if(client->server->flags & F_SYNC) {
1181 fsync(fhandle);
1182 } else if (fua) {
1184 /* This is where we would do the following
1185 * #ifdef USE_SYNC_FILE_RANGE
1186 * However, we don't, for the reasons set out below
1187 * by Christoph Hellwig <hch@infradead.org>
1189 * [BEGINS]
1190 * fdatasync is equivalent to fsync except that it does not flush
1191 * non-essential metadata (basically just timestamps in practice), but it
1192 * does flush metadata requried to find the data again, e.g. allocation
1193 * information and extent maps. sync_file_range does nothing but flush
1194 * out pagecache content - it means you basically won't get your data
1195 * back in case of a crash if you either:
1197 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1198 * b) are using a sparse file on a filesystem
1199 * c) are using a fallocate-preallocated file on a filesystem
1200 * d) use any file on a COW filesystem like btrfs
1202 * e.g. it only does anything useful for you if you do not have a volatile
1203 * write cache, and either use a raw block device node, or just overwrite
1204 * an already fully allocated (and not preallocated) file on a non-COW
1205 * filesystem.
1206 * [ENDS]
1208 * What we should do is open a second FD with O_DSYNC set, then write to
1209 * that when appropriate. However, with a Linux client, every REQ_FUA
1210 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1211 * problems.
1214 #if 0
1215 sync_file_range(fhandle, foffset, len,
1216 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1217 SYNC_FILE_RANGE_WAIT_AFTER);
1218 #else
1219 fdatasync(fhandle);
1220 #endif
1222 /* close file pointer in case of treefiles */
1223 if (client->server->flags & F_TREEFILES) {
1224 close(fhandle);
1226 return retval;
1230 * Call rawexpwrite repeatedly until all data has been written.
1232 * @param a The offset where the write should start
1233 * @param buf The buffer to write from
1234 * @param len The length of buf
1235 * @param client The client we're serving for
1236 * @param fua Flag to indicate 'Force Unit Access'
1237 * @return 0 on success, nonzero on failure
1239 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1240 ssize_t ret=0;
1242 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1243 a += ret;
1244 buf += ret;
1245 len -= ret;
1247 return (ret < 0 || len != 0);
1251 * Read an amount of bytes at a given offset from the right file. This
1252 * abstracts the read-side of the multiple files option.
1254 * @param a The offset where the read should start
1255 * @param buf A buffer to read into
1256 * @param len The size of buf
1257 * @param client The client we're serving for
1258 * @return The number of bytes actually read, or -1 in case of an
1259 * error.
1261 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1262 int fhandle;
1263 off_t foffset;
1264 size_t maxbytes;
1265 ssize_t retval;
1267 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1268 return -1;
1269 if(maxbytes && len > maxbytes)
1270 len = maxbytes;
1272 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1274 retval = pread(fhandle, buf, len, foffset);
1275 if (client->server->flags & F_TREEFILES) {
1276 close(fhandle);
1278 return retval;
1282 * Call rawexpread repeatedly until all data has been read.
1283 * @return 0 on success, nonzero on failure
1285 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1286 ssize_t ret=0;
1288 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1289 a += ret;
1290 buf += ret;
1291 len -= ret;
1293 return (ret < 0 || len != 0);
1296 #ifdef HAVE_SPLICE
1297 int rawexpsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir,
1298 int fua)
1300 int fhandle;
1301 off_t foffset;
1302 size_t maxbytes;
1303 ssize_t retval;
1305 if (get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1306 return -1;
1307 if (maxbytes && len > maxbytes)
1308 len = maxbytes;
1310 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1311 (dir == SPLICE_IN) ? "from" : "to", fhandle,
1312 (unsigned long long)a, (unsigned)len);
1315 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1316 * it might, so go ahead and use it.
1318 if (dir == SPLICE_IN) {
1319 retval = splice(fhandle, &foffset, pipe, NULL, len,
1320 SPLICE_F_MOVE);
1321 } else {
1322 retval = splice(pipe, NULL, fhandle, &foffset, len,
1323 SPLICE_F_MOVE);
1324 if (client->server->flags & F_SYNC)
1325 fsync(fhandle);
1326 else if (fua)
1327 fdatasync(fhandle);
1329 if (client->server->flags & F_TREEFILES)
1330 close(fhandle);
1331 return retval;
1335 * Splice an amount of bytes from the given offset from/into the right file
1336 * from/into the given pipe.
1337 * @param pipe The pipe we are using for this splice.
1338 * @param a The offset of the file we are operating on.
1339 * @param len The length of the splice.
1340 * @param client The client we're splicing for.
1341 * @param dir The direction we are doing the splice in.
1342 * @param fua Set if this is a write and we need to fua.
1343 * @return 0 on success, nonzero on failure.
1345 int expsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir, int fua)
1347 ssize_t ret;
1349 while (len > 0 &&
1350 (ret = rawexpsplice(pipe, a, len, client, dir, fua)) > 0) {
1351 a += ret;
1352 len -= ret;
1354 return (ret < 0 || len != 0);
1356 #endif /* HAVE_SPLICE */
1359 * Read an amount of bytes at a given offset from the right file. This
1360 * abstracts the read-side of the copyonwrite stuff, and calls
1361 * rawexpread() with the right parameters to do the actual work.
1362 * @param a The offset where the read should start
1363 * @param buf A buffer to read into
1364 * @param len The size of buf
1365 * @param client The client we're going to read for
1366 * @return 0 on success, nonzero on failure
1368 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1369 off_t rdlen, offset;
1370 off_t mapcnt, mapl, maph, pagestart;
1372 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1374 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1375 return(rawexpread_fully(a, buf, len, client));
1377 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1379 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1380 pagestart=mapcnt*DIFFPAGESIZE;
1381 offset=a-pagestart;
1382 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1383 len : (size_t)DIFFPAGESIZE-offset;
1384 if (!(client->server->flags & F_COPYONWRITE))
1385 pthread_rwlock_rdlock(&client->export_lock);
1386 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1387 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1388 (unsigned long)(client->difmap[mapcnt]));
1389 if (pread(client->difffile, buf, rdlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != rdlen) goto fail;
1390 } else { /* the block is not there */
1391 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1392 DEBUG("Page %llu is not here, and waiting for file\n",
1393 (unsigned long long)mapcnt);
1394 goto fail;
1395 } else {
1396 DEBUG("Page %llu is not here, we read the original one\n",
1397 (unsigned long long)mapcnt);
1398 if(rawexpread_fully(a, buf, rdlen, client)) goto fail;
1401 if (!(client->server->flags & F_COPYONWRITE))
1402 pthread_rwlock_unlock(&client->export_lock);
1403 len-=rdlen; a+=rdlen; buf+=rdlen;
1405 return 0;
1406 fail:
1407 if (!(client->server->flags & F_COPYONWRITE))
1408 pthread_rwlock_unlock(&client->export_lock);
1409 return -1;
1413 * Write an amount of bytes at a given offset to the right file. This
1414 * abstracts the write-side of the copyonwrite option, and calls
1415 * rawexpwrite() with the right parameters to do the actual work.
1417 * @param a The offset where the write should start
1418 * @param buf The buffer to write from
1419 * @param len The length of buf
1420 * @param client The client we're going to write for.
1421 * @param fua Flag to indicate 'Force Unit Access'
1422 * @return 0 on success, nonzero on failure
1424 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1425 char pagebuf[DIFFPAGESIZE];
1426 off_t mapcnt,mapl,maph;
1427 off_t wrlen,rdlen;
1428 off_t pagestart;
1429 off_t offset;
1431 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1434 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1435 return(rawexpwrite_fully(a, buf, len, client, fua));
1437 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1439 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1440 pagestart=mapcnt*DIFFPAGESIZE ;
1441 offset=a-pagestart ;
1442 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1443 len : (size_t)DIFFPAGESIZE-offset;
1445 if (!(client->server->flags & F_COPYONWRITE))
1446 pthread_rwlock_rdlock(&client->export_lock);
1447 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1448 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1449 (unsigned long)(client->difmap[mapcnt])) ;
1450 if (pwrite(client->difffile, buf, wrlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != wrlen) goto fail;
1451 } else { /* the block is not there */
1452 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1453 DEBUG("Page %llu is not here, we put it at %lu\n",
1454 (unsigned long long)mapcnt,
1455 (unsigned long)(client->difmap[mapcnt]));
1456 if ((offset != 0) || (wrlen != DIFFPAGESIZE)){
1457 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1458 DEBUG("error: we can write only whole page while waiting for file\n");
1459 goto fail;
1461 rdlen=DIFFPAGESIZE ;
1462 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1463 goto fail;
1465 memcpy(pagebuf+offset,buf,wrlen) ;
1466 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != DIFFPAGESIZE)
1467 goto fail;
1469 if (!(client->server->flags & F_COPYONWRITE))
1470 pthread_rwlock_unlock(&client->export_lock);
1471 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1473 if (client->server->flags & F_SYNC) {
1474 fsync(client->difffile);
1475 } else if (fua) {
1476 /* open question: would it be cheaper to do multiple sync_file_ranges?
1477 as we iterate through the above?
1479 fdatasync(client->difffile);
1481 return 0;
1482 fail:
1483 if (!(client->server->flags & F_COPYONWRITE))
1484 pthread_rwlock_unlock(&client->export_lock);
1485 return -1;
1491 * Write an amount of zeroes at a given offset to the right file.
1492 * This routine could be optimised by not calling expwrite. However,
1493 * this is by far the simplest way to do it.
1495 * @param req the request
1496 * @param client The client we're going to write for.
1497 * @return 0 on success, nonzero on failure
1499 int expwrite_zeroes(struct nbd_request* req, CLIENT* client, int fua) {
1500 off_t a = req->from;
1501 size_t len = req->len;
1502 size_t maxsize = 64LL*1024LL*1024LL;
1503 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1504 char *buf = calloc (1, maxsize);
1505 int ret;
1506 while (len > 0) {
1507 size_t l = len;
1508 if (l > maxsize)
1509 l = maxsize;
1510 ret = expwrite(a, buf, l, client, fua);
1511 if (ret) {
1512 free(buf);
1513 return ret;
1515 len -= l;
1517 free(buf);
1518 return 0;
1522 * Flush data to a client
1524 * @param client The client we're going to write for.
1525 * @return 0 on success, nonzero on failure
1527 int expflush(CLIENT *client) {
1528 gint i;
1530 if (client->server->flags & F_COPYONWRITE) {
1531 return fsync(client->difffile);
1534 if (client->server->flags & F_WAIT) {
1535 return fsync(client->difffile);
1538 if (client->server->flags & F_TREEFILES ) {
1539 // all we can do is force sync the entire filesystem containing the tree
1540 if (client->server->flags & F_READONLY)
1541 return 0;
1542 sync();
1543 return 0;
1546 for (i = 0; i < client->export->len; i++) {
1547 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1548 if (fsync(fi.fhandle) < 0)
1549 return -1;
1552 return 0;
1555 void punch_hole(int fd, off_t off, off_t len) {
1556 DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len);
1557 #if HAVE_FALLOC_PH
1558 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len);
1559 #elif HAVE_FSCTL_SET_ZERO_DATA
1560 FILE_ZERO_DATA_INFORMATION zerodata;
1561 zerodata.FileOffset.QuadPart = off;
1562 zerodata.BeyondFinalZero.QuadPart = off + len;
1563 HANDLE w32handle = (HANDLE)_get_osfhandle(fd);
1564 DWORD bytesret;
1565 DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL);
1566 #else
1567 DEBUG("punching holes not supported on this platform\n");
1568 #endif
1571 static void send_reply(CLIENT* client, uint32_t opt, uint32_t reply_type, ssize_t datasize, void* data) {
1572 struct {
1573 uint64_t magic;
1574 uint32_t opt;
1575 uint32_t reply_type;
1576 uint32_t datasize;
1577 } __attribute__ ((packed)) header = {
1578 htonll(0x3e889045565a9LL),
1579 htonl(opt),
1580 htonl(reply_type),
1581 htonl(datasize),
1583 if(datasize < 0) {
1584 datasize = strlen((char*)data);
1585 header.datasize = htonl(datasize);
1587 socket_write(client, &header, sizeof(header));
1588 if(data != NULL) {
1589 socket_write(client, data, datasize);
1594 * Find the name of the file we have to serve. This will use g_strdup_printf
1595 * to put the IP address of the client inside a filename containing
1596 * "%s" (in the form as specified by the "virtstyle" option). That name
1597 * is then written to client->exportname.
1599 * @param net A socket connected to an nbd client
1600 * @param client information about the client. The IP address in human-readable
1601 * format will be written to a new char* buffer, the address of which will be
1602 * stored in client->clientname.
1603 * @return: 0 - OK, -1 - failed.
1605 int set_peername(int net, CLIENT *client) {
1606 struct sockaddr_storage netaddr;
1607 struct sockaddr* addr = (struct sockaddr*)&netaddr;
1608 socklen_t addrinlen = sizeof( struct sockaddr_storage );
1609 struct addrinfo hints;
1610 struct addrinfo *ai = NULL;
1611 char peername[NI_MAXHOST];
1612 char netname[NI_MAXHOST];
1613 char *tmp = NULL;
1614 int i;
1615 int e;
1617 if (getsockname(net, addr, &addrinlen) < 0) {
1618 msg(LOG_INFO, "getsockname failed: %m");
1619 return -1;
1622 if(netaddr.ss_family == AF_UNIX) {
1623 client->clientaddr.ss_family = AF_UNIX;
1624 strcpy(peername, "unix");
1625 } else {
1626 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1627 msg(LOG_INFO, "getpeername failed: %m");
1628 return -1;
1630 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1631 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1632 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1633 return -1;
1636 memset(&hints, '\0', sizeof (hints));
1637 hints.ai_flags = AI_ADDRCONFIG;
1638 e = getaddrinfo(peername, NULL, &hints, &ai);
1640 if(e != 0) {
1641 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
1642 freeaddrinfo(ai);
1643 return -1;
1647 if(strncmp(peername, "::ffff:", 7) == 0) {
1648 memmove(peername, peername+7, strlen(peername));
1651 switch(client->server->virtstyle) {
1652 case VIRT_NONE:
1653 msg(LOG_DEBUG, "virtualization is off");
1654 client->exportname=g_strdup(client->server->exportname);
1655 break;
1656 case VIRT_IPHASH:
1657 msg(LOG_DEBUG, "virtstyle iphash");
1658 for(i=0;i<strlen(peername);i++) {
1659 if(peername[i]=='.') {
1660 peername[i]='/';
1663 case VIRT_IPLIT:
1664 msg(LOG_DEBUG, "virtstyle ipliteral");
1665 client->exportname=g_strdup_printf(client->server->exportname, peername);
1666 break;
1667 case VIRT_CIDR:
1668 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
1669 memcpy(&netaddr, &(client->clientaddr), addrinlen);
1670 int addrbits;
1671 if(client->clientaddr.ss_family == AF_UNIX) {
1672 tmp = g_strdup(peername);
1673 } else {
1674 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
1675 if(ai->ai_family == AF_INET) {
1676 addrbits = 32;
1677 } else if(ai->ai_family == AF_INET6) {
1678 addrbits = 128;
1679 } else {
1680 g_assert_not_reached();
1682 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
1683 for(int i = 0; i < addrbits; i+=8) {
1684 int masklen = client->server->cidrlen - i;
1685 masklen = masklen > 0 ? masklen : 0;
1686 uint8_t mask = getmaskbyte(masklen);
1687 *addrptr &= mask;
1688 addrptr++;
1690 getnameinfo((struct sockaddr *) &netaddr, addrinlen,
1691 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1692 tmp=g_strdup_printf("%s/%s", netname, peername);
1695 if(tmp != NULL) {
1696 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1697 g_free(tmp);
1700 break;
1703 freeaddrinfo(ai);
1704 msg(LOG_INFO, "connect from %s, assigned file is %s",
1705 peername, client->exportname);
1706 client->clientname=g_strdup(peername);
1707 return 0;
1710 int commit_diff(CLIENT* client, bool lock, int fhandle){
1711 int dirtycount = 0;
1712 int pagecount = client->exportsize/DIFFPAGESIZE;
1713 off_t offset;
1714 char* buf = malloc(sizeof(char)*DIFFPAGESIZE);
1716 for (int i=0; i<pagecount; i++){
1717 offset = DIFFPAGESIZE*i;
1718 if (lock)
1719 pthread_rwlock_wrlock(&client->export_lock);
1720 if (client->difmap[i] != (u32)-1){
1721 dirtycount += 1;
1722 DEBUG("flushing dirty page %d, offset %ld\n", i, offset);
1723 if (pread(client->difffile, buf, DIFFPAGESIZE, client->difmap[i]*DIFFPAGESIZE) != DIFFPAGESIZE) {
1724 msg(LOG_WARNING, "could not read while committing diff: %m");
1725 if(lock) {
1726 pthread_rwlock_unlock(&client->export_lock);
1728 break;
1730 if (pwrite(fhandle, buf, DIFFPAGESIZE, offset) != DIFFPAGESIZE) {
1731 msg(LOG_WARNING, "could not write while committing diff: %m");
1732 if (lock) {
1733 pthread_rwlock_unlock(&client->export_lock);
1735 break;
1737 client->difmap[i] = (u32)-1;
1739 if (lock)
1740 pthread_rwlock_unlock(&client->export_lock);
1743 free(buf);
1744 return dirtycount;
1747 void* wait_file(void *void_ptr) {
1748 CLIENT* client = (CLIENT *)void_ptr;
1749 FILE_INFO fi;
1750 GArray* export;
1751 mode_t mode = O_RDWR;
1752 int dirtycount;
1754 fi.fhandle = -1;
1755 fi.startoff = 0;
1757 while (fi.fhandle < 1){
1758 sem_wait(&file_wait_sem);
1759 msg(LOG_INFO, "checking for file %s", client->server->exportname);
1760 fi.fhandle = open(client->server->exportname, mode);
1763 msg(LOG_INFO, "File %s appeared, fd %d", client->server->exportname, fi.fhandle);
1765 // first time there may be lot of data so we lock only per page
1766 do {
1767 dirtycount = commit_diff(client, true, fi.fhandle);
1768 } while (dirtycount > 0);
1770 //last time we lock export for the whole time until we switch write destination
1771 pthread_rwlock_wrlock(&client->export_lock);
1772 do {
1773 dirtycount = commit_diff(client, false, fi.fhandle);
1774 } while (dirtycount > 0);
1776 export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1777 g_array_append_val(export, fi);
1779 client->export = export;
1780 pthread_rwlock_unlock(&client->export_lock);
1781 msg(LOG_INFO, "Waiting for file ended, switching to exported file %s", client->server->exportname);
1783 return NULL;
1787 * Set up client export array, which is an array of FILE_INFO.
1788 * Also, split a single exportfile into multiple ones, if that was asked.
1789 * @param client information on the client which we want to setup export for
1791 bool setupexport(CLIENT* client) {
1792 int i = 0;
1793 off_t laststartoff = 0, lastsize = 0;
1794 int multifile = (client->server->flags & F_MULTIFILE);
1795 int treefile = (client->server->flags & F_TREEFILES);
1796 int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1797 int cancreate = (client->server->expected_size) && !multifile;
1799 if (treefile || (client->server->flags & F_WAIT)) {
1800 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
1801 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
1803 if(client->server->flags & F_WAIT){
1804 pthread_t wait_file_thread;
1805 if (pthread_create(&wait_file_thread, NULL, wait_file, client)){
1806 DEBUG("failed to create wait_file thread");
1807 return false;
1811 } else {
1812 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1814 /* If multi-file, open as many files as we can.
1815 * If not, open exactly one file.
1816 * Calculate file sizes as we go to get total size. */
1817 for(i=0; ; i++) {
1818 FILE_INFO fi;
1819 gchar *tmpname;
1820 gchar* error_string;
1822 if (i)
1823 cancreate = 0;
1824 /* if expected_size is specified, and this is the first file, we can create the file */
1825 mode_t mode = (client->server->flags & F_READONLY) ?
1826 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1828 if (temporary) {
1829 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1830 DEBUG( "Opening %s\n", tmpname );
1831 fi.fhandle = mkstemp(tmpname);
1832 } else {
1833 if(multifile) {
1834 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1835 } else {
1836 tmpname=g_strdup(client->exportname);
1838 DEBUG( "Opening %s\n", tmpname );
1839 fi.fhandle = open(tmpname, mode, 0600);
1840 if(fi.fhandle == -1 && mode == O_RDWR) {
1841 /* Try again because maybe media was read-only */
1842 fi.fhandle = open(tmpname, O_RDONLY);
1843 if(fi.fhandle != -1) {
1844 /* Opening the base file in copyonwrite mode is
1845 * okay */
1846 if(!(client->server->flags & F_COPYONWRITE)) {
1847 client->server->flags |= F_AUTOREADONLY;
1848 client->server->flags |= F_READONLY;
1853 if(fi.fhandle == -1) {
1854 if(multifile && i>0)
1855 break;
1856 error_string=g_strdup_printf(
1857 "Could not open exported file %s: %%m",
1858 tmpname);
1859 err_nonfatal(error_string);
1860 return false;
1863 if (temporary) {
1864 unlink(tmpname); /* File will stick around whilst FD open */
1867 fi.startoff = laststartoff + lastsize;
1868 g_array_append_val(client->export, fi);
1869 g_free(tmpname);
1871 /* Starting offset and size of this file will be used to
1872 * calculate starting offset of next file */
1873 laststartoff = fi.startoff;
1874 lastsize = size_autodetect(fi.fhandle);
1876 /* If we created the file, it will be length zero */
1877 if (!lastsize && cancreate) {
1878 assert(!multifile);
1879 if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1880 err_nonfatal("Could not expand file: %m");
1881 return false;
1883 lastsize = client->server->expected_size;
1884 break; /* don't look for any more files */
1887 if(!multifile || temporary)
1888 break;
1891 /* Set export size to total calculated size */
1892 client->exportsize = laststartoff + lastsize;
1894 /* Export size may be overridden */
1895 if(client->server->expected_size) {
1896 /* desired size must be <= total calculated size */
1897 if(client->server->expected_size > client->exportsize) {
1898 err_nonfatal("Size of exported file is too big\n");
1899 return false;
1902 client->exportsize = client->server->expected_size;
1906 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1907 if(multifile) {
1908 msg(LOG_INFO, "Total number of files: %d", i);
1910 if(treefile) {
1911 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
1913 return true;
1916 bool copyonwrite_prepare(CLIENT* client) {
1917 off_t i;
1918 gchar* dir;
1919 gchar* export_base;
1920 if (client->server->cowdir != NULL) {
1921 dir = g_strdup(client->server->cowdir);
1922 } else {
1923 dir = g_strdup(dirname(client->exportname));
1925 export_base = g_strdup(basename(client->exportname));
1926 client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
1927 (int)getpid());
1928 g_free(dir);
1929 g_free(export_base);
1930 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1931 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1932 if (client->difffile<0) {
1933 err("Could not create diff file (%m)");
1934 return false;
1936 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) {
1937 err("Could not allocate memory");
1938 return false;
1940 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1;
1942 return true;
1945 void send_export_info(CLIENT* client, bool maybe_zeroes) {
1946 uint64_t size_host = htonll((u64)(client->exportsize));
1947 uint16_t flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_WRITE_ZEROES;
1949 socket_write(client, &size_host, 8);
1950 if (client->server->flags & F_READONLY)
1951 flags |= NBD_FLAG_READ_ONLY;
1952 if (client->server->flags & F_FLUSH)
1953 flags |= NBD_FLAG_SEND_FLUSH;
1954 if (client->server->flags & F_FUA)
1955 flags |= NBD_FLAG_SEND_FUA;
1956 if (client->server->flags & F_ROTATIONAL)
1957 flags |= NBD_FLAG_ROTATIONAL;
1958 if (client->server->flags & F_TRIM)
1959 flags |= NBD_FLAG_SEND_TRIM;
1960 if (!(client->server->flags & F_COPYONWRITE))
1961 flags |= NBD_FLAG_CAN_MULTI_CONN;
1962 flags = htons(flags);
1963 socket_write(client, &flags, sizeof(flags));
1964 if (!(glob_flags & F_NO_ZEROES) && maybe_zeroes) {
1965 char zeros[128];
1966 memset(zeros, '\0', sizeof(zeros));
1967 socket_write(client, zeros, 124);
1972 * Commit to exporting the chosen export
1974 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
1975 * a number of things (verify whether the client is allowed access, try
1976 * to open files, etc etc) before we're ready to actually serve the
1977 * export.
1979 * This function does all those things.
1981 * @param client the CLIENT structure with .server and .net members set
1982 * up correctly
1983 * @return true if the client is allowed access to the export, false
1984 * otherwise
1986 static bool commit_client(CLIENT* client, SERVER* server) {
1987 char acl;
1988 uint32_t len;
1990 client->server = server;
1991 client->exportsize = OFFT_MAX;
1992 client->transactionlogfd = -1;
1993 if(pthread_mutex_init(&(client->lock), NULL)) {
1994 msg(LOG_ERR, "Unable to initialize mutex");
1995 return false;
1997 if (pthread_rwlock_init(&client->export_lock, NULL)){
1998 msg(LOG_ERR, "Unable to initialize write lock");
1999 return false;
2001 /* Check whether we exceeded the maximum number of allowed
2002 * clients already */
2003 if(dontfork) {
2004 acl = 'Y';
2005 } else {
2006 len = strlen(client->server->servename);
2007 writeit(commsocket, &len, sizeof len);
2008 writeit(commsocket, client->server->servename, len);
2009 readit(commsocket, &acl, 1);
2010 close(commsocket);
2012 switch(acl) {
2013 case 'N':
2014 msg(LOG_ERR, "Connection not allowed (too many clients)");
2015 return false;
2016 case 'X':
2017 msg(LOG_ERR, "Connection not allowed (unknown by parent?!?)");
2018 return false;
2021 /* Check whether the client is listed in the authfile */
2022 if (set_peername(client->net, client)) {
2023 msg(LOG_ERR, "Failed to set peername");
2024 return false;
2027 if (!authorized_client(client)) {
2028 msg(LOG_INFO, "Client '%s' is not authorized to access",
2029 client->clientname);
2030 return false;
2033 /* Set up the transactionlog, if we need one */
2034 if (client->server->transactionlog && (client->transactionlogfd == -1)) {
2035 if((client->transactionlogfd =
2036 open(client->server->transactionlog,
2037 O_WRONLY | O_CREAT,
2038 S_IRUSR | S_IWUSR)) ==
2039 -1) {
2040 msg(LOG_INFO, "Could not open transactionlog %s, moving on without it",
2041 client->server->transactionlog);
2045 /* Run any pre scripts that we may need */
2046 if (do_run(client->server->prerun, client->exportname)) {
2047 msg(LOG_INFO, "Client '%s' not allowed access by prerun script",
2048 client->clientname);
2049 return false;
2051 client->socket_closed = socket_closed_transmission;
2052 if(!setupexport(client)) {
2053 return false;
2056 if (client->server->flags & F_COPYONWRITE) {
2057 if(!copyonwrite_prepare(client)) {
2058 return false;
2062 if (client->server->flags & F_WAIT) {
2063 if(!copyonwrite_prepare(client)) {
2064 return false;
2068 setmysockopt(client->net);
2070 return true;
2073 static CLIENT* handle_export_name(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2074 uint32_t namelen;
2075 char* name;
2076 int i;
2078 socket_read(client, &namelen, sizeof(namelen));
2079 namelen = ntohl(namelen);
2080 if(namelen > 0) {
2081 name = malloc(namelen+1);
2082 name[namelen]=0;
2083 socket_read(client, name, namelen);
2084 } else {
2085 name = strdup("");
2087 for(i=0; i<servers->len; i++) {
2088 SERVER* serve = &(g_array_index(servers, SERVER, i));
2089 // hide exports that are TLS-only if we haven't negotiated TLS
2090 // yet
2091 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2092 continue;
2094 if(!strcmp(serve->servename, name)) {
2095 client->clientfeats = cflags;
2096 free(name);
2097 if(!commit_client(client, serve)) {
2098 return NULL;
2100 send_export_info(client, true);
2101 return client;
2104 free(name);
2105 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2108 static void handle_list(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2109 uint32_t len;
2110 int i;
2111 char buf[1024];
2112 char *ptr = buf + sizeof(len);
2114 socket_read(client, &len, sizeof(len));
2115 len = ntohl(len);
2116 if(len) {
2117 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2119 if(!(glob_flags & F_LIST)) {
2120 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Listing of exports denied by server configuration");
2121 err_nonfatal("Client tried disallowed list option");
2122 return;
2124 for(i=0; i<servers->len; i++) {
2125 SERVER* serve = &(g_array_index(servers, SERVER, i));
2126 // Hide TLS-only exports if we haven't negotiated TLS yet
2127 if(!client->tls_session && (serve->flags & F_FORCEDTLS)) {
2128 continue;
2130 len = htonl(strlen(serve->servename));
2131 memcpy(buf, &len, sizeof(len));
2132 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len));
2133 send_reply(client, opt, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
2135 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2138 #if HAVE_GNUTLS
2139 static int verify_cert(gnutls_session_t session) {
2140 int ret;
2141 unsigned int status, cert_list_size;
2142 const gnutls_datum_t *cert_list;
2143 gnutls_x509_crt_t cert;
2144 time_t now = time(NULL);
2146 ret = gnutls_certificate_verify_peers2(session, &status);
2147 if(ret < 0 || status != 0 || gnutls_certificate_type_get(session) !=
2148 GNUTLS_CRT_X509) {
2149 goto err;
2152 if(gnutls_x509_crt_init(&cert) < 0) {
2153 goto err;
2156 cert_list = gnutls_certificate_get_peers(session, &cert_list_size);
2157 if(cert_list == NULL) {
2158 goto err;
2160 if(gnutls_x509_crt_import(cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) {
2161 goto err;
2163 if(gnutls_x509_crt_get_activation_time(cert) > now) {
2164 goto err;
2166 if(gnutls_x509_crt_get_expiration_time(cert) < now) {
2167 goto err;
2169 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2170 msg(LOG_INFO, "client certificate verification successful");
2171 return 0;
2172 err:
2173 msg(LOG_ERR, "E: client certificate verification failed");
2174 return GNUTLS_E_CERTIFICATE_ERROR;
2177 CLIENT* handle_starttls(CLIENT* client, int opt, GArray* servers, uint32_t cflags, struct generic_conf *genconf) {
2178 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2179 gnutls_certificate_credentials_t x509_cred;
2180 CLIENT* retval = client;
2181 gnutls_priority_t priority_cache;
2182 gnutls_session_t *session = g_new0(gnutls_session_t, 1);
2183 int ret;
2184 int len;
2186 socket_read(client, &len, sizeof(len));
2187 if(G_UNLIKELY(len != 0)) {
2188 char buf[1024*1024];
2189 consume(client, len, buf, sizeof(buf));
2190 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Sending a STARTTLS command with data is invalid");
2191 return NULL;
2194 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2196 check_rv(gnutls_certificate_allocate_credentials(&x509_cred));
2197 gnutls_certificate_set_verify_function(x509_cred, verify_cert);
2198 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred, genconf->cacertfile, GNUTLS_X509_FMT_PEM));
2199 check_rv(gnutls_certificate_set_x509_key_file(x509_cred, genconf->certfile, genconf->keyfile, GNUTLS_X509_FMT_PEM));
2200 check_rv(gnutls_priority_init(&priority_cache, genconf->tlsprio, NULL));
2201 check_rv(gnutls_init(session, GNUTLS_SERVER));
2202 check_rv(gnutls_priority_set(*session, priority_cache));
2203 check_rv(gnutls_credentials_set(*session, GNUTLS_CRD_CERTIFICATE, x509_cred));
2205 gnutls_certificate_server_set_request(*session, GNUTLS_CERT_REQUEST);
2206 #if GNUTLS_VERSION_NUMBER >= 0x030109
2207 gnutls_transport_set_int(*session, client->net);
2208 #else
2209 gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) (intptr_t) client->net);
2210 #endif
2211 do {
2212 ret = gnutls_handshake(*session);
2213 } while(ret < 0 && gnutls_error_is_fatal(ret) == 0);
2215 if (ret < 0) {
2216 err_nonfatal(gnutls_strerror(ret));
2217 gnutls_bye(*session, GNUTLS_SHUT_RDWR);
2218 gnutls_deinit(*session);
2219 g_free(session);
2220 return NULL;
2222 client->tls_session = session;
2223 client->socket_read = socket_read_tls;
2224 client->socket_write = socket_write_tls;
2225 #undef check_rv
2226 exit:
2227 if(retval == NULL && session != NULL) {
2228 g_free(session);
2230 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2231 if(retval != NULL && retval->server != NULL) {
2232 retval->server = NULL;
2234 return retval;
2236 #endif
2239 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2241 * XXX this matches the proposal I sent out, rather than the officially
2242 * documented version of this command. Need to bring the two in sync
2243 * one way or the other.
2245 static bool handle_info(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2246 uint32_t namelen, len;
2247 char *name;
2248 int i;
2249 SERVER *server = NULL;
2250 uint16_t n_requests;
2251 uint16_t request;
2252 char buf[1024];
2253 bool sent_export = false;
2254 uint32_t reptype = NBD_REP_ERR_UNKNOWN;
2255 char *msg = "Export unknown";
2257 socket_read(client, &len, sizeof(len));
2258 len = htonl(len);
2259 socket_read(client, &namelen, sizeof(namelen));
2260 namelen = htonl(namelen);
2261 if(namelen > (len - 6)) {
2262 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2263 socket_read(client, buf, len - sizeof(namelen));
2265 if(namelen > 0) {
2266 name = malloc(namelen + 1);
2267 name[namelen] = 0;
2268 socket_read(client, name, namelen);
2269 } else {
2270 name = strdup("");
2272 for(i=0; i<servers->len; i++) {
2273 SERVER *serve = &(g_array_index(servers, SERVER, i));
2274 if (!strcmp(serve->servename, name)) {
2275 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2276 reptype = NBD_REP_ERR_TLS_REQD;
2277 msg = "TLS is required for that export";
2278 continue;
2280 server = serve;
2283 free(name);
2284 socket_read(client, &n_requests, sizeof(n_requests));
2285 n_requests = ntohs(n_requests);
2286 if(!server) {
2287 consume(client, n_requests * sizeof(request), buf,
2288 sizeof(buf));
2289 send_reply(client, opt, reptype, -1, msg);
2290 return false;
2292 if (opt == NBD_OPT_GO) {
2293 client->clientfeats = cflags;
2294 if(!commit_client(client, server)) {
2295 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Access denied by server configuration");
2296 return false;
2299 for(i=0; i<n_requests; i++) {
2300 socket_read(client, &request, sizeof(request));
2301 switch(ntohs(request)) {
2302 case NBD_INFO_EXPORT:
2303 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2304 socket_write(client, &request, 2);
2305 send_export_info(client, false);
2306 sent_export = true;
2307 break;
2308 default:
2309 // ignore all other options for now.
2310 break;
2313 if(!sent_export) {
2314 request = htons(NBD_INFO_EXPORT);
2315 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2316 socket_write(client, &request, 2);
2317 send_export_info(client, false);
2319 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2321 return true;
2325 * Do the initial negotiation.
2327 * @param net The socket we're doing the negotiation over.
2328 * @param servers The array of known servers.
2329 * @param genconf the global options (needed for accessing TLS config data)
2331 CLIENT* negotiate(int net, GArray* servers, struct generic_conf *genconf) {
2332 uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
2333 uint64_t magic;
2334 uint32_t cflags = 0;
2335 uint32_t opt;
2336 CLIENT* client = g_new0(CLIENT, 1);
2337 client->net = net;
2338 client->socket_read = socket_read_notls;
2339 client->socket_write = socket_write_notls;
2340 client->socket_closed = socket_closed_negotiate;
2342 assert(servers != NULL);
2343 socket_write(client, INIT_PASSWD, 8);
2344 magic = htonll(opts_magic);
2345 socket_write(client, &magic, sizeof(magic));
2347 smallflags = htons(smallflags);
2348 socket_write(client, &smallflags, sizeof(uint16_t));
2349 socket_read(client, &cflags, sizeof(cflags));
2350 cflags = htonl(cflags);
2351 if (cflags & NBD_FLAG_C_NO_ZEROES) {
2352 glob_flags |= F_NO_ZEROES;
2354 do {
2355 socket_read(client, &magic, sizeof(magic));
2356 magic = ntohll(magic);
2357 if(magic != opts_magic) {
2358 err_nonfatal("Negotiation failed/5a: magic mismatch");
2359 goto handler_err;
2361 socket_read(client, &opt, sizeof(opt));
2362 opt = ntohl(opt);
2363 if(client->tls_session == NULL
2364 && glob_flags & F_FORCEDTLS
2365 && opt != NBD_OPT_STARTTLS) {
2366 if(opt == NBD_OPT_EXPORT_NAME) {
2367 // can't send an error message for EXPORT_NAME,
2368 // so must do hard close
2369 goto handler_err;
2371 if(opt == NBD_OPT_ABORT) {
2372 // handled below
2373 break;
2375 consume_len(client);
2376 send_reply(client, opt, NBD_REP_ERR_TLS_REQD, -1, "TLS is required on this server");
2377 continue;
2379 switch(opt) {
2380 case NBD_OPT_EXPORT_NAME:
2381 // NBD_OPT_EXPORT_NAME must be the last
2382 // selected option, so return from here
2383 // if that is chosen.
2384 if(handle_export_name(client, opt, servers, cflags) != NULL) {
2385 return client;
2386 } else {
2387 goto handler_err;
2389 break;
2390 case NBD_OPT_LIST:
2391 handle_list(client, opt, servers, cflags);
2392 break;
2393 case NBD_OPT_ABORT:
2394 // handled below
2395 break;
2396 case NBD_OPT_STARTTLS:
2397 #if !HAVE_GNUTLS
2398 consume_len(client);
2399 send_reply(client, opt, NBD_REP_ERR_PLATFORM, -1, "This nbd-server was compiled without TLS support");
2400 #else
2401 if(client->tls_session != NULL) {
2402 consume_len(client);
2403 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2404 continue;
2406 if(genconf->keyfile == NULL) {
2407 consume_len(client);
2408 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "TLS not allowed on this server");
2409 continue;
2411 if(handle_starttls(client, opt, servers, cflags, genconf) == NULL) {
2412 // can't recover from failed TLS negotiation.
2413 goto handler_err;
2415 #endif
2416 break;
2417 case NBD_OPT_GO:
2418 case NBD_OPT_INFO:
2419 if(handle_info(client, opt, servers, cflags) && opt == NBD_OPT_GO) {
2420 return client;
2422 break;
2423 default:
2424 consume_len(client);
2425 send_reply(client, opt, NBD_REP_ERR_UNSUP, -1, "The given option is unknown to this server implementation");
2426 break;
2428 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
2429 if(opt == NBD_OPT_ABORT) {
2430 err_nonfatal("Session terminated by client");
2431 goto handler_err;
2433 err_nonfatal("Weird things happened: reached end of negotiation without success");
2434 handler_err:
2435 g_free(client);
2436 return NULL;
2439 static int nbd_errno(int errcode) {
2440 switch (errcode) {
2441 case EPERM:
2442 return htonl(1);
2443 case EIO:
2444 return htonl(5);
2445 case ENOMEM:
2446 return htonl(12);
2447 case EINVAL:
2448 return htonl(22);
2449 case EFBIG:
2450 case ENOSPC:
2451 #ifdef EDQUOT
2452 case EDQUOT:
2453 #endif
2454 return htonl(28); // ENOSPC
2455 default:
2456 return htonl(22); // EINVAL
2460 static void package_dispose(struct work_package* package) {
2461 if (package->pipefd[0] > 0)
2462 close(package->pipefd[0]);
2463 if (package->pipefd[1] > 0)
2464 close(package->pipefd[1]);
2465 g_free(package->data);
2466 g_free(package->req);
2467 g_free(package);
2470 static int mkpipe(int pipefd[2], size_t len)
2472 if (len > MAX_PIPE_SIZE)
2473 return -1;
2474 if (pipe(pipefd))
2475 return -1;
2477 #ifdef HAVE_SPLICE
2478 if (fcntl(pipefd[1], F_SETPIPE_SZ, MAX_PIPE_SIZE) < MAX_PIPE_SIZE) {
2479 close(pipefd[0]);
2480 close(pipefd[1]);
2481 pipefd[0] = -1;
2482 pipefd[1] = -1;
2483 return -1;
2485 #endif
2487 return 0;
2490 struct work_package* package_create(CLIENT* client, struct nbd_request* req) {
2491 struct work_package* rv = calloc(sizeof (struct work_package), 1);
2493 rv->req = req;
2494 rv->client = client;
2495 rv->data = NULL;
2496 rv->pipefd[0] = -1;
2497 rv->pipefd[1] = -1;
2499 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2500 if (client->server->flags & F_SPLICE) {
2501 if (mkpipe(rv->pipefd, req->len))
2502 rv->data = malloc(req->len);
2503 } else {
2504 rv->data = malloc(req->len);
2508 return rv;
2511 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) {
2512 rep->magic = htonl(NBD_REPLY_MAGIC);
2513 rep->error = 0;
2514 memcpy(&(rep->handle), &(req->handle), sizeof(req->handle));
2517 #ifdef HAVE_SPLICE
2518 static int handle_splice_read(CLIENT *client, struct nbd_request *req)
2520 struct nbd_reply rep;
2521 int pipefd[2];
2523 // splice doesn't work with TLS
2524 if (client->tls_session != NULL)
2525 return -1;
2527 if (mkpipe(pipefd, req->len))
2528 return -1;
2530 if (expsplice(pipefd[1], req->from, req->len, client, SPLICE_IN, 0)) {
2531 close(pipefd[1]);
2532 close(pipefd[0]);
2533 return -1;
2536 DEBUG("handling read request (splice)\n");
2537 setup_reply(&rep, req);
2538 pthread_mutex_lock(&(client->lock));
2539 writeit(client->net, &rep, sizeof(rep));
2540 spliceit(pipefd[0], NULL, client->net, NULL, req->len);
2541 pthread_mutex_unlock(&(client->lock));
2542 close(pipefd[0]);
2543 close(pipefd[1]);
2544 return 0;
2546 #endif
2548 static void handle_normal_read(CLIENT *client, struct nbd_request *req)
2550 struct nbd_reply rep;
2551 void* buf = malloc(req->len);
2552 if(!buf) {
2553 err("Could not allocate memory for request");
2555 DEBUG("handling read request\n");
2556 setup_reply(&rep, req);
2557 if(expread(req->from, buf, req->len, client)) {
2558 DEBUG("Read failed: %m");
2559 rep.error = nbd_errno(errno);
2561 pthread_mutex_lock(&(client->lock));
2562 socket_write(client, &rep, sizeof rep);
2563 if(!rep.error) {
2564 socket_write(client, buf, req->len);
2566 pthread_mutex_unlock(&(client->lock));
2567 free(buf);
2570 static void handle_read(CLIENT* client, struct nbd_request* req)
2572 #ifdef HAVE_SPLICE
2574 * If we have splice set we want to try that first, and if that fails
2575 * for whatever reason we fall through to ye olde read.
2577 if (client->server->flags & F_SPLICE)
2578 if (!handle_splice_read(client, req))
2579 return;
2580 #endif
2581 handle_normal_read(client, req);
2584 static void handle_write(struct work_package *pkg)
2586 CLIENT *client = pkg->client;
2587 struct nbd_request *req = pkg->req;
2588 struct nbd_reply rep;
2589 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2591 DEBUG("handling write request\n");
2592 setup_reply(&rep, req);
2594 #ifdef HAVE_SPLICE
2595 if (!pkg->data) {
2596 if (expsplice(pkg->pipefd[0], req->from, req->len, client,
2597 SPLICE_OUT, fua)) {
2598 DEBUG("Splice failed: %M");
2599 rep.error = nbd_errno(errno);
2601 } else
2602 #endif
2604 if(expwrite(req->from, pkg->data, req->len, client, fua)) {
2605 DEBUG("Write failed: %m");
2606 rep.error = nbd_errno(errno);
2609 pthread_mutex_lock(&(client->lock));
2610 socket_write(client, &rep, sizeof rep);
2611 pthread_mutex_unlock(&(client->lock));
2614 static void handle_flush(CLIENT* client, struct nbd_request* req) {
2615 struct nbd_reply rep;
2616 DEBUG("handling flush request\n");
2617 setup_reply(&rep, req);
2618 if(expflush(client)) {
2619 DEBUG("Flush failed: %m");
2620 rep.error = nbd_errno(errno);
2622 pthread_mutex_lock(&(client->lock));
2623 socket_write(client, &rep, sizeof rep);
2624 pthread_mutex_unlock(&(client->lock));
2627 static void handle_trim(CLIENT* client, struct nbd_request* req) {
2628 struct nbd_reply rep;
2629 DEBUG("handling trim request\n");
2630 setup_reply(&rep, req);
2631 if(exptrim(req, client)) {
2632 DEBUG("Trim failed: %m");
2633 rep.error = nbd_errno(errno);
2635 pthread_mutex_lock(&(client->lock));
2636 socket_write(client, &rep, sizeof rep);
2637 pthread_mutex_unlock(&(client->lock));
2640 static void handle_write_zeroes(CLIENT* client, struct nbd_request* req) {
2641 struct nbd_reply rep;
2642 DEBUG("handling write_zeroes request\n");
2643 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2644 setup_reply(&rep, req);
2645 if(expwrite_zeroes(req, client, fua)) {
2646 DEBUG("Write_zeroes failed: %m");
2647 rep.error = nbd_errno(errno);
2649 // For now, don't trim
2650 // TODO: handle this far more efficiently with reference to the
2651 // actual backing driver
2652 pthread_mutex_lock(&(client->lock));
2653 socket_write(client, &rep, sizeof rep);
2654 pthread_mutex_unlock(&(client->lock));
2658 static bool bad_write(CLIENT* client, struct nbd_request* req) {
2659 if ((client->server->flags & F_READONLY) ||
2660 (client->server->flags & F_AUTOREADONLY)) {
2661 DEBUG("[WRITE to READONLY!]");
2662 return true;
2664 return false;
2667 static bool bad_range(CLIENT* client, struct nbd_request* req) {
2668 if(req->from > client->exportsize ||
2669 req->from + req->len > client->exportsize) {
2670 DEBUG("[out of bounds!]");
2671 return true;
2673 return false;
2676 static void handle_request(gpointer data, gpointer user_data) {
2677 struct work_package* package = (struct work_package*) data;
2678 uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND;
2679 uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND;
2680 struct nbd_reply rep;
2681 int err = EINVAL;
2683 if(flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
2684 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type);
2685 goto error;
2688 switch(type) {
2689 case NBD_CMD_READ:
2690 if (bad_range(package->client, package->req)) {
2691 goto error;
2693 handle_read(package->client, package->req);
2694 break;
2695 case NBD_CMD_WRITE:
2696 if (bad_write(package->client, package->req)) {
2697 err = EPERM;
2698 goto error;
2700 if (bad_range(package->client, package->req)) {
2701 err = ENOSPC;
2702 goto error;
2704 handle_write(package);
2705 break;
2706 case NBD_CMD_FLUSH:
2707 handle_flush(package->client, package->req);
2708 break;
2709 case NBD_CMD_TRIM:
2710 if (bad_write(package->client, package->req)) {
2711 err = EPERM;
2712 goto error;
2714 if (bad_range(package->client, package->req)) {
2715 goto error;
2717 handle_trim(package->client, package->req);
2718 break;
2719 case NBD_CMD_WRITE_ZEROES:
2720 if (bad_write(package->client, package->req)) {
2721 err = EPERM;
2722 goto error;
2724 if (bad_range(package->client, package->req)) {
2725 err = ENOSPC;
2726 goto error;
2728 handle_write_zeroes(package->client, package->req);
2729 break;
2730 default:
2731 msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type);
2732 goto error;
2734 goto end;
2735 error:
2736 setup_reply(&rep, package->req);
2737 rep.error = nbd_errno(err);
2738 pthread_mutex_lock(&(package->client->lock));
2739 socket_write(package->client, &rep, sizeof rep);
2740 pthread_mutex_unlock(&(package->client->lock));
2741 end:
2742 package_dispose(package);
2745 static int mainloop_threaded(CLIENT* client) {
2746 struct nbd_request* req;
2747 struct work_package* pkg;
2749 DEBUG("Entering request loop\n");
2750 while(1) {
2751 req = calloc(sizeof (struct nbd_request), 1);
2753 socket_read(client, req, sizeof(struct nbd_request));
2754 if(client->transactionlogfd != -1) {
2755 writeit(client->transactionlogfd, req, sizeof(struct nbd_request));
2758 req->from = ntohll(req->from);
2759 req->type = ntohl(req->type);
2760 req->len = ntohl(req->len);
2762 if(req->magic != htonl(NBD_REQUEST_MAGIC))
2763 err("Protocol error: not enough magic.");
2765 pkg = package_create(client, req);
2767 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2768 #ifdef HAVE_SPLICE
2769 if ((client->server->flags & F_SPLICE) &&
2770 (req->len <= MAX_PIPE_SIZE && pkg->pipefd[1] > 0) &&
2771 (client->tls_session == NULL))
2772 spliceit(client->net, NULL, pkg->pipefd[1],
2773 NULL, req->len);
2774 else
2775 #endif
2776 socket_read(client, pkg->data, req->len);
2778 if(req->type == NBD_CMD_DISC) {
2779 finalize_client(client);
2780 return 0;
2782 g_thread_pool_push(tpool, pkg, NULL);
2787 * Destroy a pid_t*
2788 * @param data a pointer to pid_t which should be freed
2790 void destroy_pid_t(gpointer data) {
2791 g_free(data);
2794 static pid_t
2795 spawn_child(int* socket)
2797 pid_t pid;
2798 sigset_t newset;
2799 sigset_t oldset;
2800 int sockets[2];
2802 sigemptyset(&newset);
2803 sigaddset(&newset, SIGCHLD);
2804 sigaddset(&newset, SIGTERM);
2805 sigprocmask(SIG_BLOCK, &newset, &oldset);
2806 socketpair(AF_UNIX, SOCK_STREAM, 0, sockets);
2807 pid = fork();
2808 if (pid < 0) {
2809 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
2810 close(sockets[0]);
2811 close(sockets[1]);
2812 goto out;
2814 if (pid > 0) { /* Parent */
2815 pid_t *pidp;
2817 pidp = g_malloc(sizeof(pid_t));
2818 *pidp = pid;
2819 *socket = sockets[1];
2820 close(sockets[0]);
2821 g_hash_table_insert(children, pidp, pidp);
2822 goto out;
2824 /* Child */
2825 *socket = sockets[0];
2826 close(sockets[1]);
2827 /* Child's signal disposition is reset to default. */
2828 signal(SIGCHLD, SIG_DFL);
2829 signal(SIGTERM, SIG_DFL);
2830 signal(SIGHUP, SIG_DFL);
2831 sigemptyset(&oldset);
2832 out:
2833 sigprocmask(SIG_SETMASK, &oldset, NULL);
2834 return pid;
2837 static int
2838 socket_accept(const int sock)
2840 struct sockaddr_storage addrin;
2841 socklen_t addrinlen = sizeof(addrin);
2842 int net;
2844 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
2845 if (net < 0) {
2846 err_nonfatal("Failed to accept socket connection: %m");
2849 return net;
2852 static void
2853 handle_modern_connection(GArray *const servers, const int sock, struct generic_conf *genconf)
2855 int net;
2856 pid_t pid;
2857 CLIENT *client = NULL;
2858 int sock_flags_old;
2859 int sock_flags_new;
2861 net = socket_accept(sock);
2862 if (net < 0)
2863 return;
2865 if (!dontfork) {
2866 pid = spawn_child(&commsocket);
2867 if (pid) {
2868 if (pid > 0) {
2869 msg(LOG_INFO, "Spawned a child process");
2870 g_array_append_val(childsocks, commsocket);
2872 if (pid < 0)
2873 msg(LOG_ERR, "Failed to spawn a child process");
2874 close(net);
2875 return;
2877 /* Child just continues. */
2880 sock_flags_old = fcntl(net, F_GETFL, 0);
2881 if (sock_flags_old == -1) {
2882 msg(LOG_ERR, "Failed to get socket flags");
2883 goto handler_err;
2886 sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2887 if (sock_flags_new != sock_flags_old &&
2888 fcntl(net, F_SETFL, sock_flags_new) == -1) {
2889 msg(LOG_ERR, "Failed to set socket to blocking mode");
2890 goto handler_err;
2893 client = negotiate(net, servers, genconf);
2894 if (!client) {
2895 msg(LOG_ERR, "Modern initial negotiation failed");
2896 goto handler_err;
2899 if (!dontfork) {
2900 int i;
2902 /* Free all root server resources here, because we are
2903 * currently in the child process serving one specific
2904 * connection. These are not simply needed anymore. */
2905 g_hash_table_destroy(children);
2906 children = NULL;
2907 for (i = 0; i < modernsocks->len; i++) {
2908 close(g_array_index(modernsocks, int, i));
2910 g_array_free(modernsocks, TRUE);
2912 /* Now that we are in the child process after a
2913 * succesful negotiation, we do not need the list of
2914 * servers anymore, get rid of it.*/
2915 /* FALSE does not free the
2916 actual data. This is required,
2917 because the client has a
2918 direct reference into that
2919 data, and otherwise we get a
2920 segfault... */
2921 g_array_free(servers, FALSE);
2924 msg(LOG_INFO, "Starting to serve");
2925 mainloop_threaded(client);
2926 exit(EXIT_SUCCESS);
2928 handler_err:
2929 close(net);
2930 g_free(client);
2932 if (!dontfork) {
2933 exit(EXIT_FAILURE);
2937 static int handle_childname(GArray* servers, int socket)
2939 uint32_t len;
2940 char *buf;
2941 int i, r, rt = 0;
2943 while(rt < sizeof(len)) {
2944 switch((r = read(socket, &len, sizeof len))) {
2945 case 0:
2946 return -1;
2947 case -1:
2948 err_nonfatal("Error reading from acl socket: %m");
2949 return -1;
2950 default:
2951 rt += r;
2952 break;
2955 buf = g_malloc0(len + 1);
2956 buf[len] = 0;
2957 readit(socket, buf, len);
2958 for(i=0; i<servers->len; i++) {
2959 SERVER* srv = &g_array_index(servers, SERVER, i);
2960 if(strcmp(srv->servename, buf) == 0) {
2961 if(srv->max_connections == 0 || srv->max_connections > srv->numclients) {
2962 writeit(socket, "Y", 1);
2963 srv->numclients++;
2964 } else {
2965 writeit(socket, "N", 1);
2967 goto exit;
2970 writeit(socket, "X", 1);
2971 exit:
2972 g_free(buf);
2973 return 0;
2977 * Return the index of the server whose servename matches the given
2978 * name.
2980 * @param servename a string to match
2981 * @param servers an array of servers
2982 * @return the first index of the server whose servename matches the
2983 * given name or -1 if one cannot be found
2985 static int get_index_by_servename(const gchar *const servename,
2986 const GArray *const servers) {
2987 int i;
2989 for (i = 0; i < servers->len; ++i) {
2990 const SERVER server = g_array_index(servers, SERVER, i);
2992 if (strcmp(servename, server.servename) == 0)
2993 return i;
2996 return -1;
3000 * Parse configuration files and add servers to the array if they don't
3001 * already exist there. The existence is tested by comparing
3002 * servenames. A server is appended to the array only if its servename
3003 * is unique among all other servers.
3005 * @param servers an array of servers
3006 * @return the number of new servers appended to the array, or -1 in
3007 * case of an error
3009 static int append_new_servers(GArray *const servers, GError **const gerror) {
3010 int i;
3011 GArray *new_servers;
3012 const int old_len = servers->len;
3013 int retval = -1;
3014 struct generic_conf genconf;
3016 new_servers = parse_cfile(config_file_pos, &genconf, true, gerror);
3017 g_thread_pool_set_max_threads(tpool, genconf.threads, NULL);
3018 if (!new_servers)
3019 goto out;
3021 for (i = 0; i < new_servers->len; ++i) {
3022 SERVER new_server = g_array_index(new_servers, SERVER, i);
3024 if (new_server.servename
3025 && -1 == get_index_by_servename(new_server.servename,
3026 servers)) {
3027 g_array_append_val(servers, new_server);
3031 retval = servers->len - old_len;
3032 out:
3033 g_array_free(new_servers, TRUE);
3035 return retval;
3038 void serveloop(GArray* servers, struct generic_conf *genconf) G_GNUC_NORETURN;
3040 * Loop through the available servers, and serve them. Never returns.
3042 void serveloop(GArray* servers, struct generic_conf *genconf) {
3043 int i;
3044 int mmax, max;
3045 fd_set mset;
3046 fd_set rset;
3047 sigset_t blocking_mask;
3048 sigset_t original_mask;
3051 * Set up the master fd_set. The set of descriptors we need
3052 * to select() for never changes anyway and it buys us a *lot*
3053 * of time to only build this once. However, if we ever choose
3054 * to not fork() for clients anymore, we may have to revisit
3055 * this.
3057 mmax=0;
3058 FD_ZERO(&mset);
3059 for(i=0;i<modernsocks->len;i++) {
3060 int sock = g_array_index(modernsocks, int, i);
3061 FD_SET(sock, &mset);
3062 mmax=sock>mmax?sock:mmax;
3065 /* Construct a signal mask which is used to make signal testing and
3066 * receiving an atomic operation to ensure no signal is received between
3067 * tests and blocking pselect(). */
3068 if (sigemptyset(&blocking_mask) == -1)
3069 err("failed to initialize blocking_mask: %m");
3071 if (sigaddset(&blocking_mask, SIGCHLD) == -1)
3072 err("failed to add SIGCHLD to blocking_mask: %m");
3074 if (sigaddset(&blocking_mask, SIGHUP) == -1)
3075 err("failed to add SIGHUP to blocking_mask: %m");
3077 if (sigaddset(&blocking_mask, SIGTERM) == -1)
3078 err("failed to add SIGTERM to blocking_mask: %m");
3080 if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
3081 err("failed to block signals: %m");
3083 for(;;) {
3084 if (is_sigterm_caught) {
3085 is_sigterm_caught = 0;
3087 g_hash_table_foreach(children, killchild, NULL);
3088 unlink(pidfname);
3090 exit(EXIT_SUCCESS);
3093 if (is_sigchld_caught) {
3094 int status;
3095 int* i;
3096 pid_t pid;
3098 is_sigchld_caught = 0;
3100 while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
3101 if (WIFEXITED(status)) {
3102 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
3104 i = g_hash_table_lookup(children, &pid);
3105 if (!i) {
3106 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
3107 } else {
3108 DEBUG("Removing %d from the list of children", pid);
3109 g_hash_table_remove(children, &pid);
3114 /* SIGHUP causes the root server process to reconfigure
3115 * itself and add new export servers for each newly
3116 * found export configuration group, i.e. spawn new
3117 * server processes for each previously non-existent
3118 * export. This does not alter old runtime configuration
3119 * but just appends new exports. */
3120 if (is_sighup_caught) {
3121 int n;
3122 GError *gerror = NULL;
3124 msg(LOG_INFO, "reconfiguration request received");
3125 is_sighup_caught = 0; /* Reset to allow catching
3126 * it again. */
3128 n = append_new_servers(servers, &gerror);
3129 if (n == -1)
3130 msg(LOG_ERR, "failed to append new servers: %s",
3131 gerror->message);
3133 for (i = servers->len - n; i < servers->len; ++i) {
3134 const SERVER server = g_array_index(servers,
3135 SERVER, i);
3137 msg(LOG_INFO, "reconfigured new server: %s",
3138 server.servename);
3142 memcpy(&rset, &mset, sizeof(fd_set));
3143 max=mmax;
3144 for(i=0;i<childsocks->len;i++) {
3145 int sock = g_array_index(childsocks, int, i);
3146 FD_SET(sock, &rset);
3147 max=sock>max?sock:max;
3150 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
3151 DEBUG("accept, ");
3152 for(i=0; i < modernsocks->len; i++) {
3153 int sock = g_array_index(modernsocks, int, i);
3154 if(!FD_ISSET(sock, &rset)) {
3155 continue;
3158 handle_modern_connection(servers, sock, genconf);
3160 for(i=0; i < childsocks->len; i++) {
3161 int sock = g_array_index(childsocks, int, i);
3163 if(FD_ISSET(sock, &rset)) {
3164 if(handle_childname(servers, sock) < 0) {
3165 close(sock);
3166 g_array_remove_index(childsocks, i);
3175 * Set server socket options.
3177 * @param socket a socket descriptor of the server
3179 * @param gerror a pointer to an error object pointer used for reporting
3180 * errors. On error, if gerror is not NULL, *gerror is set and -1
3181 * is returned.
3183 * @return 0 on success, -1 on error
3185 int dosockopts(const int socket, GError **const gerror) {
3186 #ifndef sun
3187 int yes=1;
3188 #else
3189 char yes='1';
3190 #endif /* sun */
3191 struct linger l;
3193 /* lose the pesky "Address already in use" error message */
3194 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
3195 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
3196 "failed to set socket option SO_REUSEADDR: %s",
3197 strerror(errno));
3198 return -1;
3200 l.l_onoff = 1;
3201 l.l_linger = 10;
3202 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
3203 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
3204 "failed to set socket option SO_LINGER: %s",
3205 strerror(errno));
3206 return -1;
3208 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
3209 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
3210 "failed to set socket option SO_KEEPALIVE: %s",
3211 strerror(errno));
3212 return -1;
3215 return 0;
3218 int open_unix(const gchar *const sockname, GError **const gerror) {
3219 struct sockaddr_un sa;
3220 int sock=-1;
3221 int retval=-1;
3223 memset(&sa, 0, sizeof(struct sockaddr_un));
3224 sa.sun_family = AF_UNIX;
3225 strncpy(sa.sun_path, sockname, sizeof sa.sun_path);
3226 sa.sun_path[sizeof(sa.sun_path)-1] = '\0';
3227 sock = socket(AF_UNIX, SOCK_STREAM, 0);
3228 if(sock < 0) {
3229 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3230 "failed to open a unix socket: "
3231 "failed to create socket: %s",
3232 strerror(errno));
3233 goto out;
3235 if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
3236 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3237 "failed to open a unix socket: "
3238 "failed to bind to address %s: %s",
3239 sockname, strerror(errno));
3240 goto out;
3242 if(listen(sock, 10)<0) {
3243 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3244 "failed to open a unix socket: "
3245 "failed to start listening: %s",
3246 strerror(errno));
3247 goto out;
3249 retval=0;
3250 g_array_append_val(modernsocks, sock);
3251 out:
3252 if(retval<0 && sock >= 0) {
3253 close(sock);
3256 return retval;
3259 int open_modern(const gchar *const addr, const gchar *const port,
3260 GError **const gerror) {
3261 struct addrinfo hints;
3262 struct addrinfo* ai = NULL;
3263 struct addrinfo* ai_bak = NULL;
3264 struct sock_flags;
3265 int e;
3266 int retval = -1;
3267 int sock = -1;
3268 gchar** addrs;
3269 gchar const* l_addr = addr;
3271 if(!addr || strlen(addr) == 0) {
3272 l_addr = "::, 0.0.0.0";
3275 addrs = g_strsplit_set(l_addr, ", \t", -1);
3277 for(int i=0; addrs[i]!=NULL; i++) {
3278 if(addrs[i][0] == '\0') {
3279 continue;
3281 memset(&hints, '\0', sizeof(hints));
3282 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
3283 hints.ai_socktype = SOCK_STREAM;
3284 hints.ai_family = AF_UNSPEC;
3285 hints.ai_protocol = IPPROTO_TCP;
3286 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai);
3287 ai_bak = ai;
3288 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) {
3289 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
3290 "failed to open a modern socket: "
3291 "failed to get address info: %s",
3292 gai_strerror(e));
3293 goto out;
3296 while(ai != NULL) {
3297 sock = -1;
3299 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
3300 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3301 "failed to open a modern socket: "
3302 "failed to create a socket: %s",
3303 strerror(errno));
3304 goto out;
3307 if (dosockopts(sock, gerror) == -1) {
3308 g_prefix_error(gerror, "failed to open a modern socket: ");
3309 goto out;
3312 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
3314 * Some systems will return multiple entries for the
3315 * same address when we ask it for something
3316 * AF_UNSPEC, even though the first entry will
3317 * listen to both protocols. Other systems will
3318 * return multiple entries too, but we actually
3319 * do need to open both.
3321 * Handle this by ignoring EADDRINUSE if we've
3322 * already got at least one socket open
3324 if(errno == EADDRINUSE && modernsocks->len > 0) {
3325 goto next;
3327 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3328 "failed to open a modern socket: "
3329 "failed to bind an address to a socket: %s",
3330 strerror(errno));
3331 goto out;
3334 if(listen(sock, 10) <0) {
3335 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3336 "failed to open a modern socket: "
3337 "failed to start listening on a socket: %s",
3338 strerror(errno));
3339 goto out;
3341 g_array_append_val(modernsocks, sock);
3342 next:
3343 ai = ai->ai_next;
3345 if(ai_bak) {
3346 freeaddrinfo(ai_bak);
3347 ai_bak=NULL;
3351 retval = 0;
3352 out:
3354 if (retval == -1 && sock >= 0) {
3355 close(sock);
3357 if(ai_bak)
3358 freeaddrinfo(ai_bak);
3360 return retval;
3364 * Connect our servers.
3366 void setup_servers(GArray *const servers, const gchar *const modernaddr,
3367 const gchar *const modernport, const gchar* unixsock) {
3368 struct sigaction sa;
3370 GError *gerror = NULL;
3371 if (open_modern(modernaddr, modernport, &gerror) == -1) {
3372 msg(LOG_ERR, "failed to setup servers: %s",
3373 gerror->message);
3374 g_clear_error(&gerror);
3375 exit(EXIT_FAILURE);
3377 if(unixsock != NULL) {
3378 GError* gerror = NULL;
3379 if(open_unix(unixsock, &gerror) == -1) {
3380 msg(LOG_ERR, "failed to setup servers: %s",
3381 gerror->message);
3382 g_clear_error(&gerror);
3383 exit(EXIT_FAILURE);
3386 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
3388 sa.sa_handler = sigchld_handler;
3389 sigemptyset(&sa.sa_mask);
3390 sigaddset(&sa.sa_mask, SIGTERM);
3391 sa.sa_flags = SA_RESTART;
3392 if(sigaction(SIGCHLD, &sa, NULL) == -1)
3393 err("sigaction: %m");
3395 sa.sa_handler = sigterm_handler;
3396 sigemptyset(&sa.sa_mask);
3397 sigaddset(&sa.sa_mask, SIGCHLD);
3398 sa.sa_flags = SA_RESTART;
3399 if(sigaction(SIGTERM, &sa, NULL) == -1)
3400 err("sigaction: %m");
3402 sa.sa_handler = sighup_handler;
3403 sigemptyset(&sa.sa_mask);
3404 sa.sa_flags = SA_RESTART;
3405 if(sigaction(SIGHUP, &sa, NULL) == -1)
3406 err("sigaction: %m");
3408 sa.sa_handler = sigusr1_handler;
3409 sigemptyset(&sa.sa_mask);
3410 sa.sa_flags = SA_RESTART;
3411 if(sigaction(SIGUSR1, &sa, NULL) == -1)
3412 err("sigaction: %m");
3416 * Go daemon (unless we specified at compile time that we didn't want this)
3417 * @param serve the first server of our configuration. If its port is zero,
3418 * then do not daemonize, because we're doing inetd then. This parameter
3419 * is only used to create a PID file of the form
3420 * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
3422 #if !defined(NODAEMON)
3423 void daemonize() {
3424 FILE*pidf;
3426 if(daemon(0,0)<0) {
3427 err("daemon");
3429 if(!*pidfname) {
3430 strncpy(pidfname, "/var/run/nbd-server.pid", 255);
3432 pidf=fopen(pidfname, "w");
3433 if(pidf) {
3434 fprintf(pidf,"%d\n", (int)getpid());
3435 fclose(pidf);
3436 } else {
3437 perror("fopen");
3438 fprintf(stderr, "Not fatal; continuing");
3441 #else
3442 #define daemonize(serve)
3443 #endif /* !defined(NODAEMON) */
3446 * Everything beyond this point (in the file) is run in non-daemon mode.
3447 * The stuff above daemonize() isn't.
3451 * Set up user-ID and/or group-ID
3453 void dousers(const gchar *const username, const gchar *const groupname) {
3454 struct passwd *pw;
3455 struct group *gr;
3456 gchar* str;
3457 if (groupname) {
3458 gr = getgrnam(groupname);
3459 if(!gr) {
3460 str = g_strdup_printf("Invalid group name: %s", groupname);
3461 err(str);
3463 if(setgid(gr->gr_gid)<0) {
3464 err("Could not set GID: %m");
3467 if (username) {
3468 pw = getpwnam(username);
3469 if(!pw) {
3470 str = g_strdup_printf("Invalid user name: %s", username);
3471 err(str);
3473 setgroups(0, NULL);
3474 if(setuid(pw->pw_uid)<0) {
3475 err("Could not set UID: %m");
3480 #ifndef ISSERVER
3481 void glib_message_syslog_redirect(const gchar *log_domain,
3482 GLogLevelFlags log_level,
3483 const gchar *message,
3484 gpointer user_data)
3486 int level=LOG_DEBUG;
3488 switch( log_level )
3490 case G_LOG_FLAG_FATAL:
3491 case G_LOG_LEVEL_CRITICAL:
3492 case G_LOG_LEVEL_ERROR:
3493 level=LOG_ERR;
3494 break;
3495 case G_LOG_LEVEL_WARNING:
3496 level=LOG_WARNING;
3497 break;
3498 case G_LOG_LEVEL_MESSAGE:
3499 case G_LOG_LEVEL_INFO:
3500 level=LOG_INFO;
3501 break;
3502 case G_LOG_LEVEL_DEBUG:
3503 level=LOG_DEBUG;
3504 break;
3505 default:
3506 level=LOG_ERR;
3508 syslog(level, "%s", message);
3510 #endif
3513 * Main entry point...
3515 int main(int argc, char *argv[]) {
3516 SERVER *serve;
3517 GArray *servers;
3518 GError *gerr=NULL;
3519 struct generic_conf genconf;
3521 memset(&genconf, 0, sizeof(struct generic_conf));
3523 if (sizeof( struct nbd_request )!=28) {
3524 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
3525 exit(EXIT_FAILURE) ;
3528 modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
3529 childsocks = g_array_new(FALSE, FALSE, sizeof(int));
3531 logging(MY_NAME);
3532 config_file_pos = g_strdup(CFILE);
3533 serve=cmdline(argc, argv, &genconf);
3535 genconf.threads = 4;
3536 servers = parse_cfile(config_file_pos, &genconf, true, &gerr);
3538 /* Update global variables with parsed values. This will be
3539 * removed once we get rid of global configuration variables. */
3540 glob_flags |= genconf.flags;
3542 if(serve) {
3543 g_array_append_val(servers, *serve);
3546 if(!servers || !servers->len) {
3547 if(gerr && !(gerr->domain == NBDS_ERR
3548 && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) {
3549 g_warning("Could not parse config file: %s",
3550 gerr ? gerr->message : "Unknown error");
3553 if(serve) {
3554 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3557 if((!serve) && (!servers||!servers->len)) {
3558 if(gerr)
3559 g_message("No configured exports; quitting.");
3560 exit(EXIT_FAILURE);
3562 if (!dontfork)
3563 daemonize();
3564 #if HAVE_OLD_GLIB
3565 g_thread_init(NULL);
3566 #endif
3567 tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL);
3569 setup_servers(servers, genconf.modernaddr, genconf.modernport,
3570 genconf.unixsock);
3571 dousers(genconf.user, genconf.group);
3573 #if HAVE_GNUTLS
3574 gnutls_global_init();
3575 static gnutls_dh_params_t dh_params;
3576 gnutls_dh_params_init(&dh_params);
3577 gnutls_dh_params_generate2(dh_params,
3578 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH,
3579 // Renamed in GnuTLS 3.3
3580 #if GNUTLS_VERSION_NUMBER >= 0x030300
3581 GNUTLS_SEC_PARAM_MEDIUM
3582 #else
3583 GNUTLS_SEC_PARAM_NORMAL
3584 #endif
3586 #endif
3588 if((genconf.modernport != NULL) && strcmp(genconf.modernport, "0")==0) {
3589 #ifndef ISSERVER
3590 err("inetd mode requires syslog");
3591 #endif
3592 CLIENT* client = negotiate(0, servers, &genconf);
3593 if(!client) {
3594 exit(EXIT_FAILURE);
3596 mainloop_threaded(client);
3597 return 0;
3600 serveloop(servers, &genconf);