Remove unused variables
[nbd.git] / nbd-server.c
blobd861441500d361ffc00a58de5c97e5486e438546
1 /*
2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
34 * <wouter@debian.org>
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
43 * <wouter@debian.org>
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
51 * <wouter@debian.org>
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
60 #include "lfs.h"
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
63 #if NEED_BSD_SOURCE
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
65 #endif
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
68 #include <assert.h>
69 #include <sys/types.h>
70 #include <sys/socket.h>
71 #include <sys/stat.h>
72 #include <sys/select.h>
73 #include <sys/wait.h>
74 #include <sys/un.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
77 #endif
78 #ifdef HAVE_SYS_UIO_H
79 #include <sys/uio.h>
80 #endif
81 #include <sys/param.h>
82 #include <signal.h>
83 #include <errno.h>
84 #include <libgen.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
87 #include <netdb.h>
88 #include <syslog.h>
89 #include <unistd.h>
90 #include <stdbool.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <fcntl.h>
95 #if HAVE_FALLOC_PH
96 #include <linux/falloc.h>
97 #endif
98 #include <arpa/inet.h>
99 #include <strings.h>
100 #include <dirent.h>
101 #ifdef HAVE_SYS_DIR_H
102 #include <sys/dir.h>
103 #endif
104 #ifdef HAVE_SYS_DIRENT_H
105 #include <sys/dirent.h>
106 #endif
107 #include <getopt.h>
108 #include <pwd.h>
109 #include <grp.h>
110 #include <dirent.h>
111 #include <ctype.h>
112 #include <inttypes.h>
114 #include <glib.h>
116 #if HAVE_OLD_GLIB
117 #include <pthread.h>
118 #endif
120 #include <semaphore.h>
122 /* used in cliserv.h, so must come first */
123 #define MY_NAME "nbd_server"
124 #include "cliserv.h"
125 #include "nbd-debug.h"
126 #include "netdb-compat.h"
127 #include "backend.h"
128 #include "treefiles.h"
130 #ifdef WITH_SDP
131 #include <sdp_inet.h>
132 #endif
134 #if HAVE_FSCTL_SET_ZERO_DATA
135 #include <io.h>
136 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
137 #define NOMINMAX 1
138 #include <windef.h>
139 #include <winbase.h>
140 #include <winioctl.h>
141 #endif
143 /** Default position of the config file */
144 #ifndef SYSCONFDIR
145 #define SYSCONFDIR "/etc"
146 #endif
147 #define CFILE SYSCONFDIR "/nbd-server/config"
149 #if HAVE_GNUTLS
150 #include <gnutls/gnutls.h>
151 #include <gnutls/x509.h>
152 #endif
154 /** Where our config file actually is */
155 gchar* config_file_pos;
157 /** global flags */
158 int glob_flags=0;
160 /* Whether we should avoid forking */
161 int dontfork = 0;
164 * The highest value a variable of type off_t can reach. This is a signed
165 * integer, so set all bits except for the leftmost one.
167 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
168 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
169 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
171 /** Global flags: */
172 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
173 #define F_LIST 2 /**< Allow clients to list the exports on a server */
174 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
175 // also accepts F_FORCEDTLS (which is 16384)
176 GHashTable *children;
177 char pidfname[256]; /**< name of our PID file */
178 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
180 #define NEG_INIT (1 << 0)
181 #define NEG_OLD (1 << 1)
182 #define NEG_MODERN (1 << 2)
185 * If we want what the system really has set we'd have to read
186 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
188 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
189 #define SPLICE_IN 0
190 #define SPLICE_OUT 1
192 #include <nbdsrv.h>
194 /* Our thread pool */
195 GThreadPool *tpool;
197 /* A work package for the thread pool functions */
198 struct work_package {
199 CLIENT* client;
200 struct nbd_request* req;
201 int pipefd[2];
202 void* data; /**< for read requests */
205 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
206 SIGCHLD handler
207 to mark a child
208 exit */
210 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
211 SIGTERM handler
212 to mark a exit
213 request */
215 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
216 handler to mark a
217 reconfiguration
218 request */
220 GArray* modernsocks; /**< Sockets for the modern handler. Not used
221 if a client was only specified on the
222 command line; only port used if
223 oldstyle is set to false (and then the
224 command-line client isn't used, gna gna).
225 This may be more than one socket on
226 systems that don't support serving IPv4
227 and IPv6 from the same socket (like,
228 e.g., FreeBSD) */
229 GArray* childsocks; /**< parent-side sockets for communication with children */
230 int commsocket; /**< child-side socket for communication with parent */
231 static sem_t file_wait_sem;
233 bool logged_oversized=false; /**< whether we logged oversized requests already */
236 * Type of configuration file values
238 typedef enum {
239 PARAM_INT, /**< This parameter is an integer */
240 PARAM_INT64, /**< This parameter is an integer */
241 PARAM_STRING, /**< This parameter is a string */
242 PARAM_BOOL, /**< This parameter is a boolean */
243 } PARAM_TYPE;
246 * Configuration file values
248 typedef struct {
249 gchar *paramname; /**< Name of the parameter, as it appears in
250 the config file */
251 gboolean required; /**< Whether this is a required (as opposed to
252 optional) parameter */
253 PARAM_TYPE ptype; /**< Type of the parameter. */
254 gpointer target; /**< Pointer to where the data of this
255 parameter should be written. If ptype is
256 PARAM_BOOL, the data is or'ed rather than
257 overwritten. */
258 gint flagval; /**< Flag mask for this parameter in case ptype
259 is PARAM_BOOL. */
260 } PARAM;
263 * Configuration file values of the "generic" section
265 struct generic_conf {
266 gchar *user; /**< user we run the server as */
267 gchar *group; /**< group we run running as */
268 gchar *modernaddr; /**< address of the modern socket */
269 gchar *modernport; /**< port of the modern socket */
270 gchar *unixsock; /**< file name of the unix domain socket */
271 gchar *certfile; /**< certificate file */
272 gchar *keyfile; /**< key file */
273 gchar *cacertfile; /**< CA certificate file */
274 gchar *tlsprio; /**< TLS priority string */
275 gint flags; /**< global flags */
276 gint threads; /**< maximum number of parallel threads we want to run */
280 * Translate a command name into human readable form
282 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
283 * @return pointer to the command name
285 static inline const char * getcommandname(uint64_t command) {
286 switch (command) {
287 case NBD_CMD_READ:
288 return "NBD_CMD_READ";
289 case NBD_CMD_WRITE:
290 return "NBD_CMD_WRITE";
291 case NBD_CMD_DISC:
292 return "NBD_CMD_DISC";
293 case NBD_CMD_FLUSH:
294 return "NBD_CMD_FLUSH";
295 case NBD_CMD_TRIM:
296 return "NBD_CMD_TRIM";
297 case NBD_CMD_WRITE_ZEROES:
298 return "NBD_CMD_WRITE_ZEROES";
299 default:
300 return "UNKNOWN";
304 #if HAVE_GNUTLS
305 static int writeit_tls(gnutls_session_t s, void *buf, size_t len) {
306 ssize_t res;
307 char *m;
308 while(len > 0) {
309 DEBUG("+");
310 if ((res = gnutls_record_send(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
311 m = g_strdup_printf("issue while sending data: %s", gnutls_strerror(res));
312 err_nonfatal(m);
313 g_free(m);
314 } else if(res < 0) {
315 m = g_strdup_printf("could not send data: %s", gnutls_strerror(res));
316 err_nonfatal(m);
317 g_free(m);
318 return -1;
319 } else {
320 len -= res;
321 buf += res;
324 return 0;
327 static int readit_tls(gnutls_session_t s, void *buf, size_t len) {
328 ssize_t res;
329 char *m;
330 while(len > 0) {
331 DEBUG("*");
332 if((res = gnutls_record_recv(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
333 m = g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res));
334 err_nonfatal(m);
335 g_free(m);
336 } else if(res < 0) {
337 m = g_strdup_printf("could not receive data: %s", gnutls_strerror(res));
338 err_nonfatal(m);
339 g_free(m);
340 return -1;
341 } else {
342 len -= res;
343 buf += res;
346 return 0;
349 static int socket_read_tls(CLIENT* client, void *buf, size_t len) {
350 return readit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
353 static int socket_write_tls(CLIENT* client, void *buf, size_t len) {
354 return writeit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
356 #endif // HAVE_GNUTLS
358 static int socket_read_notls(CLIENT* client, void *buf, size_t len) {
359 return readit(client->net, buf, len);
362 static int socket_write_notls(CLIENT* client, void *buf, size_t len) {
363 return writeit(client->net, buf, len);
366 static void socket_read(CLIENT* client, void *buf, size_t len) {
367 g_assert(client->socket_read != NULL);
368 if(client->socket_read(client, buf, len)<0) {
369 g_assert(client->socket_closed != NULL);
370 client->socket_closed(client);
375 * Consume data from a socket that we don't want
377 * @param c the client to read from
378 * @param len the number of bytes to consume
379 * @param buf a buffer
380 * @param bufsiz the size of the buffer
382 static inline void consume(CLIENT* c, size_t len, void * buf, size_t bufsiz) {
383 size_t curlen;
384 while (len>0) {
385 curlen = (len>bufsiz)?bufsiz:len;
386 socket_read(c, buf, curlen);
387 len -= curlen;
392 * Consume a length field and corresponding payload that we don't want
394 * @param c the client to read from
396 static inline void consume_len(CLIENT* c) {
397 uint32_t len;
398 char buf[1024];
400 socket_read(c, &len, sizeof(len));
401 len = ntohl(len);
402 consume(c, len, buf, sizeof(buf));
405 static void socket_write(CLIENT* client, void *buf, size_t len) {
406 g_assert(client->socket_write != NULL);
407 if(client->socket_write(client, buf, len)<0) {
408 g_assert(client->socket_closed != NULL);
409 client->socket_closed(client);
413 static inline void socket_closed_negotiate(CLIENT* client) {
414 err("Negotiation failed: %m");
418 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
419 * options
421 * @param command the command to be ran. Read from the config file
422 * @param file the file name we're about to export
424 int do_run(gchar* command, gchar* file) {
425 gchar* cmd;
426 int retval=0;
428 if(command && *command) {
429 cmd = g_strdup_printf(command, file);
430 retval=system(cmd);
431 g_free(cmd);
433 return retval;
436 static inline void finalize_client(CLIENT* client) {
437 g_thread_pool_free(tpool, FALSE, TRUE);
438 do_run(client->server->postrun, client->exportname);
439 if(client->transactionlogfd != -1) {
440 close(client->transactionlogfd);
441 client->transactionlogfd = -1;
443 if(client->server->flags & F_COPYONWRITE) {
444 unlink(client->difffilename);
448 static inline void socket_closed_transmission(CLIENT* client) {
449 int saved_errno = errno;
450 finalize_client(client);
451 errno = saved_errno;
452 err("Connection dropped: %m");
455 #ifdef HAVE_SPLICE
457 * Splice data between a pipe and a file descriptor
459 * @param fd_in The fd to splice from.
460 * @param off_in The fd_in offset to splice from.
461 * @param fd_out The fd to splice to.
462 * @param off_out The fd_out offset to splice to.
463 * @param len The length to splice.
465 static inline void spliceit(int fd_in, loff_t *off_in, int fd_out,
466 loff_t *off_out, size_t len)
468 ssize_t ret;
469 while (len > 0) {
470 if ((ret = splice(fd_in, off_in, fd_out, off_out, len,
471 SPLICE_F_MOVE)) <= 0)
472 err("Splice failed: %m");
473 len -= ret;
476 #endif
479 * Print out a message about how to use nbd-server. Split out to a separate
480 * function so that we can call it from multiple places
482 void usage() {
483 printf("This is nbd-server version " VERSION "\n");
484 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
485 "\t-r|--read-only\t\tread only\n"
486 "\t-m|--multi-file\t\tmultiple file\n"
487 "\t-c|--copy-on-write\tcopy on write\n"
488 "\t-C|--config-file\tspecify an alternate configuration file\n"
489 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
490 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
491 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
492 "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
493 "\t-V|--version\toutput the version and exit\n\n"
494 "\tif port is set to 0, stdin is used (for running from inetd).\n"
495 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
496 "\t\taddress of the machine trying to connect\n"
497 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
498 printf("Using configuration file %s\n", CFILE);
499 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT);
502 /* Dumps a config file section of the given SERVER*, and exits. */
503 void dump_section(SERVER* serve, gchar* section_header) {
504 printf("[%s]\n", section_header);
505 printf("\texportname = %s\n", serve->exportname);
506 printf("\tlistenaddr = %s\n", serve->listenaddr);
507 if(serve->flags & F_READONLY) {
508 printf("\treadonly = true\n");
510 if(serve->flags & F_MULTIFILE) {
511 printf("\tmultifile = true\n");
513 if(serve->flags & F_TREEFILES) {
514 printf("\ttreefiles = true\n");
516 if(serve->flags & F_COPYONWRITE) {
517 printf("\tcopyonwrite = true\n");
519 if(serve->expected_size) {
520 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
522 if(serve->authname) {
523 printf("\tauthfile = %s\n", serve->authname);
525 exit(EXIT_SUCCESS);
529 * Parse the command line.
531 * @param argc the argc argument to main()
532 * @param argv the argv argument to main()
534 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) {
535 int i=0;
536 int nonspecial=0;
537 int c;
538 struct option long_options[] = {
539 {"read-only", no_argument, NULL, 'r'},
540 {"multi-file", no_argument, NULL, 'm'},
541 {"copy-on-write", no_argument, NULL, 'c'},
542 {"dont-fork", no_argument, NULL, 'd'},
543 {"authorize-file", required_argument, NULL, 'l'},
544 {"config-file", required_argument, NULL, 'C'},
545 {"pid-file", required_argument, NULL, 'p'},
546 {"output-config", required_argument, NULL, 'o'},
547 {"max-connection", required_argument, NULL, 'M'},
548 {"version", no_argument, NULL, 'V'},
549 {0,0,0,0}
551 SERVER *serve;
552 off_t es;
553 size_t last;
554 char suffix;
555 gboolean do_output=FALSE;
556 gchar* section_header="";
557 gchar** addr_port;
559 if(argc==1) {
560 return NULL;
562 serve=g_new0(SERVER, 1);
563 serve->authname = g_strdup(default_authname);
564 serve->virtstyle=VIRT_IPLIT;
565 while((c=getopt_long(argc, argv, "-C:cwdl:mo:rp:M:V", long_options, &i))>=0) {
566 switch (c) {
567 case 1:
568 /* non-option argument */
569 switch(nonspecial++) {
570 case 0:
571 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
572 addr_port=g_strsplit(optarg, ":", 2);
574 /* Check for "@" - maybe user using this separator
575 for IPv4 address */
576 if(!addr_port[1]) {
577 g_strfreev(addr_port);
578 addr_port=g_strsplit(optarg, "@", 2);
580 } else {
581 addr_port=g_strsplit(optarg, "@", 2);
584 if(addr_port[1]) {
585 genconf->modernport=g_strdup(addr_port[1]);
586 genconf->modernaddr=g_strdup(addr_port[0]);
587 } else {
588 g_free(genconf->modernaddr);
589 genconf->modernaddr=NULL;
590 genconf->modernport=g_strdup(addr_port[0]);
592 g_strfreev(addr_port);
593 break;
594 case 1:
595 serve->exportname = g_strdup(optarg);
596 if(serve->exportname[0] != '/') {
597 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
598 exit(EXIT_FAILURE);
600 break;
601 case 2:
602 last=strlen(optarg)-1;
603 suffix=optarg[last];
604 if (suffix == 'k' || suffix == 'K' ||
605 suffix == 'm' || suffix == 'M')
606 optarg[last] = '\0';
607 es = (off_t)atoll(optarg);
608 switch (suffix) {
609 case 'm':
610 case 'M': es <<= 10;
611 case 'k':
612 case 'K': es <<= 10;
613 default : break;
615 serve->expected_size = es;
616 break;
618 break;
619 case 'r':
620 serve->flags |= F_READONLY;
621 break;
622 case 'm':
623 serve->flags |= F_MULTIFILE;
624 break;
625 case 'o':
626 do_output = TRUE;
627 section_header = g_strdup(optarg);
628 break;
629 case 'p':
630 strncpy(pidfname, optarg, 256);
631 pidfname[255]='\0';
632 break;
633 case 'c':
634 serve->flags |=F_COPYONWRITE;
635 break;
636 case 'd':
637 dontfork = 1;
638 break;
639 case 'C':
640 g_free(config_file_pos);
641 config_file_pos=g_strdup(optarg);
642 break;
643 case 'l':
644 g_free(serve->authname);
645 serve->authname=g_strdup(optarg);
646 break;
647 case 'M':
648 serve->max_connections = strtol(optarg, NULL, 0);
649 break;
650 case 'V':
651 printf("This is nbd-server version " VERSION "\n");
652 exit(EXIT_SUCCESS);
653 break;
654 default:
655 usage();
656 exit(EXIT_FAILURE);
657 break;
660 /* What's left: the port to export, the name of the to be exported
661 * file, and, optionally, the size of the file, in that order. */
662 if(nonspecial<2) {
663 g_free(serve);
664 serve=NULL;
665 } else {
666 serve->servename = "";
668 if(do_output) {
669 if(!serve) {
670 g_critical("Need a complete configuration on the command line to output a config file section!");
671 exit(EXIT_FAILURE);
673 dump_section(serve, section_header);
675 return serve;
678 /* forward definition of parse_cfile */
679 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
681 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
682 #define NBD_D_TYPE de->d_type
683 #else
684 #define NBD_D_TYPE 0
685 #define DT_UNKNOWN 0
686 #define DT_REG 1
687 #endif
690 * Parse config file snippets in a directory. Uses readdir() and friends
691 * to find files and open them, then passes them on to parse_cfile
692 * with have_global set false
694 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
695 DIR* dirh = opendir(dir);
696 struct dirent* de;
697 gchar* fname;
698 GArray* retval = NULL;
699 GArray* tmp;
700 struct stat stbuf;
702 if(!dirh) {
703 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
704 return NULL;
706 errno=0;
707 while((de = readdir(dirh))) {
708 int saved_errno=errno;
709 fname = g_build_filename(dir, de->d_name, NULL);
710 switch(NBD_D_TYPE) {
711 case DT_UNKNOWN:
712 /* Filesystem doesn't return type of
713 * file through readdir. Run stat() on
714 * the file instead */
715 if(stat(fname, &stbuf)) {
716 perror("stat");
717 goto err_out;
719 if (!S_ISREG(stbuf.st_mode)) {
720 goto next;
722 case DT_REG:
723 /* Skip unless the name ends with '.conf' */
724 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
725 goto next;
727 tmp = parse_cfile(fname, genconf, false, e);
728 errno=saved_errno;
729 if(*e) {
730 goto err_out;
732 if(!retval)
733 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
734 retval = g_array_append_vals(retval, tmp->data, tmp->len);
735 g_array_free(tmp, TRUE);
736 default:
737 break;
739 next:
740 g_free(fname);
742 if(errno) {
743 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
744 err_out:
745 if(retval)
746 g_array_free(retval, TRUE);
747 retval = NULL;
749 if(dirh)
750 closedir(dirh);
751 return retval;
755 * Parse the config file.
757 * @param f the name of the config file
759 * @param genconf a pointer to generic configuration which will get
760 * updated with parsed values. If NULL, then parsed generic
761 * configuration values are safely and silently discarded.
763 * @param e a GError. Error code can be any of the following:
764 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
765 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
766 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
768 * @param expect_generic if true, we expect a configuration file that
769 * contains a [generic] section. If false, we don't.
771 * @return a GArray of SERVER* pointers. If the config file is empty or does not
772 * exist, returns an empty GArray; if the config file contains an
773 * error, returns NULL, and e is set appropriately
775 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
776 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
777 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
778 gchar* cfdir = NULL;
779 SERVER s;
780 gchar *virtstyle=NULL;
781 PARAM lp[] = {
782 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
783 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
784 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
785 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
786 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
787 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
788 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
789 { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 },
790 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
791 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
792 { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES },
793 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
794 { "waitfile", FALSE, PARAM_BOOL, &(s.flags), F_WAIT },
795 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
796 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
797 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
798 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
799 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
800 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
801 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
802 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
803 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
804 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
805 { "force_tls", FALSE, PARAM_BOOL, &(s.flags), F_FORCEDTLS },
806 { "splice", FALSE, PARAM_BOOL, &(s.flags), F_SPLICE},
808 const int lp_size=sizeof(lp)/sizeof(PARAM);
809 struct generic_conf genconftmp;
810 PARAM gp[] = {
811 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
812 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
813 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
814 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
815 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
816 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
817 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
818 { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 },
819 { "max_threads", FALSE, PARAM_INT, &(genconftmp.threads), 0 },
820 { "force_tls", FALSE, PARAM_BOOL, &(genconftmp.flags), F_FORCEDTLS },
821 { "certfile", FALSE, PARAM_STRING, &(genconftmp.certfile), 0 },
822 { "keyfile", FALSE, PARAM_STRING, &(genconftmp.keyfile), 0 },
823 { "cacertfile", FALSE, PARAM_STRING, &(genconftmp.cacertfile), 0 },
824 { "tlsprio", FALSE, PARAM_STRING, &(genconftmp.tlsprio), 0 },
826 PARAM* p=gp;
827 int p_size=sizeof(gp)/sizeof(PARAM);
828 GKeyFile *cfile;
829 GError *err = NULL;
830 const char *err_msg=NULL;
831 GArray *retval=NULL;
832 gchar **groups;
833 gboolean bval;
834 gint ival;
835 gint64 i64val;
836 gchar* sval;
837 gchar* startgroup;
838 gint i;
839 gint j;
841 memset(&genconftmp, 0, sizeof(struct generic_conf));
843 genconftmp.tlsprio = "NORMAL:-VERS-TLS-ALL:+VERS-TLS1.2:%SERVER_PRECEDENCE";
845 if (genconf) {
846 /* Use the passed configuration values as defaults. The
847 * parsing algorithm below updates all parameter targets
848 * found from configuration files. */
849 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
852 cfile = g_key_file_new();
853 retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
854 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
855 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
856 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
857 f, err->message);
858 g_key_file_free(cfile);
859 return retval;
861 startgroup = g_key_file_get_start_group(cfile);
862 if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
863 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
864 g_key_file_free(cfile);
865 return NULL;
867 groups = g_key_file_get_groups(cfile, NULL);
868 for(i=0;groups[i];i++) {
869 memset(&s, '\0', sizeof(SERVER));
871 /* After the [generic] group or when we're parsing an include
872 * directory, start parsing exports */
873 if(i==1 || !expect_generic) {
874 p=lp;
875 p_size=lp_size;
877 for(j=0;j<p_size;j++) {
878 assert(p[j].target != NULL);
879 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
880 switch(p[j].ptype) {
881 case PARAM_INT:
882 ival = g_key_file_get_integer(cfile,
883 groups[i],
884 p[j].paramname,
885 &err);
886 if(!err) {
887 *((gint*)p[j].target) = ival;
889 break;
890 case PARAM_INT64:
891 i64val = g_key_file_get_int64(cfile,
892 groups[i],
893 p[j].paramname,
894 &err);
895 if(!err) {
896 *((gint64*)p[j].target) = i64val;
898 break;
899 case PARAM_STRING:
900 sval = g_key_file_get_string(cfile,
901 groups[i],
902 p[j].paramname,
903 &err);
904 if(!err) {
905 *((gchar**)p[j].target) = sval;
907 break;
908 case PARAM_BOOL:
909 bval = g_key_file_get_boolean(cfile,
910 groups[i],
911 p[j].paramname, &err);
912 if(!err) {
913 if(bval) {
914 *((gint*)p[j].target) |= p[j].flagval;
915 } else {
916 *((gint*)p[j].target) &= ~(p[j].flagval);
919 break;
921 if(err) {
922 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
923 if(!p[j].required) {
924 /* Ignore not-found error for optional values */
925 g_clear_error(&err);
926 continue;
927 } else {
928 err_msg = MISSING_REQUIRED_ERROR;
930 } else {
931 err_msg = DEFAULT_ERROR;
933 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
934 g_array_free(retval, TRUE);
935 g_error_free(err);
936 g_key_file_free(cfile);
937 return NULL;
940 if(virtstyle) {
941 if(!strncmp(virtstyle, "none", 4)) {
942 s.virtstyle=VIRT_NONE;
943 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
944 s.virtstyle=VIRT_IPLIT;
945 } else if(!strncmp(virtstyle, "iphash", 6)) {
946 s.virtstyle=VIRT_IPHASH;
947 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
948 s.virtstyle=VIRT_CIDR;
949 if(strlen(virtstyle)<10) {
950 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
951 g_array_free(retval, TRUE);
952 g_key_file_free(cfile);
953 return NULL;
955 s.cidrlen=strtol(virtstyle+8, NULL, 0);
956 } else {
957 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
958 g_array_free(retval, TRUE);
959 g_key_file_free(cfile);
960 return NULL;
962 } else {
963 s.virtstyle=VIRT_IPLIT;
965 if(genconftmp.flags & F_OLDSTYLE) {
966 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
967 g_message("Exiting.");
968 return NULL;
970 #ifndef HAVE_SPLICE
971 if (s.flags & F_SPLICE) {
972 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without splice support, yet group %s uses it", groups[i]);
973 g_array_free(retval, TRUE);
974 g_key_file_free(cfile);
975 return NULL;
977 #endif
978 /* We can't mix copyonwrite and splice. */
979 if ((s.flags & F_COPYONWRITE) && (s.flags & F_SPLICE)) {
980 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_SPLICE,
981 "Cannot mix copyonwrite with splice for an export in group %s",
982 groups[i]);
983 g_array_free(retval, TRUE);
984 g_key_file_free(cfile);
985 return NULL;
987 if ((s.flags & F_COPYONWRITE) && (s.flags & F_WAIT)) {
988 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_WAIT,
989 "Cannot mix copyonwrite with waitfile for an export in group %s",
990 groups[i]);
991 g_array_free(retval, TRUE);
992 g_key_file_free(cfile);
993 return NULL;
995 /* Don't need to free this, it's not our string */
996 virtstyle=NULL;
997 /* Don't append values for the [generic] group */
998 if(i>0 || !expect_generic) {
999 s.servename = groups[i];
1001 g_array_append_val(retval, s);
1003 #ifndef WITH_SDP
1004 if(s.flags & F_SDP) {
1005 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
1006 g_array_free(retval, TRUE);
1007 g_key_file_free(cfile);
1008 return NULL;
1010 #endif
1012 g_key_file_free(cfile);
1013 if(cfdir) {
1014 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
1015 if(extra) {
1016 retval = g_array_append_vals(retval, extra->data, extra->len);
1017 i+=extra->len;
1018 g_array_free(extra, TRUE);
1019 } else {
1020 if(*e) {
1021 g_array_free(retval, TRUE);
1022 return NULL;
1026 if(i==1 && expect_generic) {
1027 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
1030 if (genconf) {
1031 /* Return the updated generic configuration through the
1032 * pointer parameter. */
1033 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
1036 return retval;
1040 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1041 * main loop of the root server process. This allows us to separate the signal
1042 * catching from th actual task triggered by SIGCHLD and hence processing in the
1043 * interrupt context is kept as minimial as possible.
1045 * @param s the signal we're handling (must be SIGCHLD, or something
1046 * is severely wrong)
1048 static void sigchld_handler(const int s G_GNUC_UNUSED) {
1049 is_sigchld_caught = 1;
1053 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1055 * @param key the key
1056 * @param value the value corresponding to the above key
1057 * @param user_data a pointer which we always set to 1, so that we know what
1058 * will happen next.
1060 void killchild(gpointer key, gpointer value, gpointer user_data) {
1061 pid_t *pid=value;
1063 kill(*pid, SIGTERM);
1067 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1068 * main loop of the root server process. This allows us to separate the signal
1069 * catching from th actual task triggered by SIGTERM and hence processing in the
1070 * interrupt context is kept as minimial as possible.
1072 * @param s the signal we're handling (must be SIGTERM, or something
1073 * is severely wrong).
1075 static void sigterm_handler(const int s G_GNUC_UNUSED) {
1076 is_sigterm_caught = 1;
1080 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1081 * the main loop of the root server process. This allows us to separate
1082 * the signal catching from th actual task triggered by SIGHUP and hence
1083 * processing in the interrupt context is kept as minimial as possible.
1085 * @param s the signal we're handling (must be SIGHUP, or something
1086 * is severely wrong).
1088 static void sighup_handler(const int s G_GNUC_UNUSED) {
1089 is_sighup_caught = 1;
1092 static void sigusr1_handler(const int s G_GNUC_UNUSED) {
1093 msg(LOG_INFO, "Got SIGUSR1");
1094 sem_post(&file_wait_sem);
1098 * Get the file handle and offset, given an export offset.
1100 * @param client The client we're serving for
1101 * @param a The offset to get corresponding file/offset for
1102 * @param fhandle [out] File descriptor
1103 * @param foffset [out] Offset into fhandle
1104 * @param maxbytes [out] Tells how many bytes can be read/written
1105 * from fhandle starting at foffset (0 if there is no limit)
1106 * @return 0 on success, -1 on failure
1108 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1110 GArray * const export = client->export;
1112 /* Negative offset not allowed */
1113 if(a < 0)
1114 return -1;
1116 /* Open separate file for treefiles */
1117 if (client->server->flags & F_TREEFILES) {
1118 *foffset = a % TREEPAGESIZE;
1119 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
1120 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock);
1121 return 0;
1124 /* Binary search for last file with starting offset <= a */
1125 FILE_INFO fi;
1126 int start = 0;
1127 int end = export->len - 1;
1128 while( start <= end ) {
1129 int mid = (start + end) / 2;
1130 fi = g_array_index(export, FILE_INFO, mid);
1131 if( fi.startoff < a ) {
1132 start = mid + 1;
1133 } else if( fi.startoff > a ) {
1134 end = mid - 1;
1135 } else {
1136 start = end = mid;
1137 break;
1141 /* end should never go negative, since first startoff is 0 and a >= 0 */
1142 assert(end >= 0);
1144 fi = g_array_index(export, FILE_INFO, end);
1145 *fhandle = fi.fhandle;
1146 *foffset = a - fi.startoff;
1147 *maxbytes = 0;
1148 if( end+1 < export->len ) {
1149 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1150 *maxbytes = fi_next.startoff - a;
1153 return 0;
1157 * Write an amount of bytes at a given offset to the right file. This
1158 * abstracts the write-side of the multiple file option.
1160 * @param a The offset where the write should start
1161 * @param buf The buffer to write from
1162 * @param len The length of buf
1163 * @param client The client we're serving for
1164 * @param fua Flag to indicate 'Force Unit Access'
1165 * @return The number of bytes actually written, or -1 in case of an error
1167 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1168 int fhandle;
1169 off_t foffset;
1170 size_t maxbytes;
1171 ssize_t retval;
1173 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1174 return -1;
1175 if(maxbytes && len > maxbytes)
1176 len = maxbytes;
1178 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1180 retval = pwrite(fhandle, buf, len, foffset);
1181 if(client->server->flags & F_SYNC) {
1182 fsync(fhandle);
1183 } else if (fua) {
1185 /* This is where we would do the following
1186 * #ifdef USE_SYNC_FILE_RANGE
1187 * However, we don't, for the reasons set out below
1188 * by Christoph Hellwig <hch@infradead.org>
1190 * [BEGINS]
1191 * fdatasync is equivalent to fsync except that it does not flush
1192 * non-essential metadata (basically just timestamps in practice), but it
1193 * does flush metadata requried to find the data again, e.g. allocation
1194 * information and extent maps. sync_file_range does nothing but flush
1195 * out pagecache content - it means you basically won't get your data
1196 * back in case of a crash if you either:
1198 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1199 * b) are using a sparse file on a filesystem
1200 * c) are using a fallocate-preallocated file on a filesystem
1201 * d) use any file on a COW filesystem like btrfs
1203 * e.g. it only does anything useful for you if you do not have a volatile
1204 * write cache, and either use a raw block device node, or just overwrite
1205 * an already fully allocated (and not preallocated) file on a non-COW
1206 * filesystem.
1207 * [ENDS]
1209 * What we should do is open a second FD with O_DSYNC set, then write to
1210 * that when appropriate. However, with a Linux client, every REQ_FUA
1211 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1212 * problems.
1215 #if 0
1216 sync_file_range(fhandle, foffset, len,
1217 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1218 SYNC_FILE_RANGE_WAIT_AFTER);
1219 #else
1220 fdatasync(fhandle);
1221 #endif
1223 /* close file pointer in case of treefiles */
1224 if (client->server->flags & F_TREEFILES) {
1225 close(fhandle);
1227 return retval;
1231 * Call rawexpwrite repeatedly until all data has been written.
1233 * @param a The offset where the write should start
1234 * @param buf The buffer to write from
1235 * @param len The length of buf
1236 * @param client The client we're serving for
1237 * @param fua Flag to indicate 'Force Unit Access'
1238 * @return 0 on success, nonzero on failure
1240 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1241 ssize_t ret=0;
1243 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1244 a += ret;
1245 buf += ret;
1246 len -= ret;
1248 return (ret < 0 || len != 0);
1252 * Read an amount of bytes at a given offset from the right file. This
1253 * abstracts the read-side of the multiple files option.
1255 * @param a The offset where the read should start
1256 * @param buf A buffer to read into
1257 * @param len The size of buf
1258 * @param client The client we're serving for
1259 * @return The number of bytes actually read, or -1 in case of an
1260 * error.
1262 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1263 int fhandle;
1264 off_t foffset;
1265 size_t maxbytes;
1266 ssize_t retval;
1268 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1269 return -1;
1270 if(maxbytes && len > maxbytes)
1271 len = maxbytes;
1273 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1275 retval = pread(fhandle, buf, len, foffset);
1276 if (client->server->flags & F_TREEFILES) {
1277 close(fhandle);
1279 return retval;
1283 * Call rawexpread repeatedly until all data has been read.
1284 * @return 0 on success, nonzero on failure
1286 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1287 ssize_t ret=0;
1289 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1290 a += ret;
1291 buf += ret;
1292 len -= ret;
1294 return (ret < 0 || len != 0);
1297 #ifdef HAVE_SPLICE
1298 int rawexpsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir,
1299 int fua)
1301 int fhandle;
1302 off_t foffset;
1303 size_t maxbytes;
1304 ssize_t retval;
1306 if (get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1307 return -1;
1308 if (maxbytes && len > maxbytes)
1309 len = maxbytes;
1311 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1312 (dir == SPLICE_IN) ? "from" : "to", fhandle,
1313 (unsigned long long)a, (unsigned)len);
1316 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1317 * it might, so go ahead and use it.
1319 if (dir == SPLICE_IN) {
1320 retval = splice(fhandle, &foffset, pipe, NULL, len,
1321 SPLICE_F_MOVE);
1322 } else {
1323 retval = splice(pipe, NULL, fhandle, &foffset, len,
1324 SPLICE_F_MOVE);
1325 if (client->server->flags & F_SYNC)
1326 fsync(fhandle);
1327 else if (fua)
1328 fdatasync(fhandle);
1330 if (client->server->flags & F_TREEFILES)
1331 close(fhandle);
1332 return retval;
1336 * Splice an amount of bytes from the given offset from/into the right file
1337 * from/into the given pipe.
1338 * @param pipe The pipe we are using for this splice.
1339 * @param a The offset of the file we are operating on.
1340 * @param len The length of the splice.
1341 * @param client The client we're splicing for.
1342 * @param dir The direction we are doing the splice in.
1343 * @param fua Set if this is a write and we need to fua.
1344 * @return 0 on success, nonzero on failure.
1346 int expsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir, int fua)
1348 ssize_t ret;
1350 while (len > 0 &&
1351 (ret = rawexpsplice(pipe, a, len, client, dir, fua)) > 0) {
1352 a += ret;
1353 len -= ret;
1355 return (ret < 0 || len != 0);
1357 #endif /* HAVE_SPLICE */
1360 * Read an amount of bytes at a given offset from the right file. This
1361 * abstracts the read-side of the copyonwrite stuff, and calls
1362 * rawexpread() with the right parameters to do the actual work.
1363 * @param a The offset where the read should start
1364 * @param buf A buffer to read into
1365 * @param len The size of buf
1366 * @param client The client we're going to read for
1367 * @return 0 on success, nonzero on failure
1369 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1370 off_t rdlen, offset;
1371 off_t mapcnt, mapl, maph, pagestart;
1373 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1375 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1376 return(rawexpread_fully(a, buf, len, client));
1378 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1380 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1381 pagestart=mapcnt*DIFFPAGESIZE;
1382 offset=a-pagestart;
1383 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1384 len : (size_t)DIFFPAGESIZE-offset;
1385 if (!(client->server->flags & F_COPYONWRITE))
1386 pthread_rwlock_rdlock(&client->export_lock);
1387 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1388 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1389 (unsigned long)(client->difmap[mapcnt]));
1390 if (pread(client->difffile, buf, rdlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != rdlen) goto fail;
1391 } else { /* the block is not there */
1392 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1393 DEBUG("Page %llu is not here, and waiting for file\n",
1394 (unsigned long long)mapcnt);
1395 goto fail;
1396 } else {
1397 DEBUG("Page %llu is not here, we read the original one\n",
1398 (unsigned long long)mapcnt);
1399 if(rawexpread_fully(a, buf, rdlen, client)) goto fail;
1402 if (!(client->server->flags & F_COPYONWRITE))
1403 pthread_rwlock_unlock(&client->export_lock);
1404 len-=rdlen; a+=rdlen; buf+=rdlen;
1406 return 0;
1407 fail:
1408 if (!(client->server->flags & F_COPYONWRITE))
1409 pthread_rwlock_unlock(&client->export_lock);
1410 return -1;
1414 * Write an amount of bytes at a given offset to the right file. This
1415 * abstracts the write-side of the copyonwrite option, and calls
1416 * rawexpwrite() with the right parameters to do the actual work.
1418 * @param a The offset where the write should start
1419 * @param buf The buffer to write from
1420 * @param len The length of buf
1421 * @param client The client we're going to write for.
1422 * @param fua Flag to indicate 'Force Unit Access'
1423 * @return 0 on success, nonzero on failure
1425 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1426 char pagebuf[DIFFPAGESIZE];
1427 off_t mapcnt,mapl,maph;
1428 off_t wrlen,rdlen;
1429 off_t pagestart;
1430 off_t offset;
1432 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1435 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1436 return(rawexpwrite_fully(a, buf, len, client, fua));
1438 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1440 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1441 pagestart=mapcnt*DIFFPAGESIZE ;
1442 offset=a-pagestart ;
1443 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1444 len : (size_t)DIFFPAGESIZE-offset;
1446 if (!(client->server->flags & F_COPYONWRITE))
1447 pthread_rwlock_rdlock(&client->export_lock);
1448 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1449 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1450 (unsigned long)(client->difmap[mapcnt])) ;
1451 if (pwrite(client->difffile, buf, wrlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != wrlen) goto fail;
1452 } else { /* the block is not there */
1453 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1454 DEBUG("Page %llu is not here, we put it at %lu\n",
1455 (unsigned long long)mapcnt,
1456 (unsigned long)(client->difmap[mapcnt]));
1457 if ((offset != 0) || (wrlen != DIFFPAGESIZE)){
1458 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1459 DEBUG("error: we can write only whole page while waiting for file\n");
1460 goto fail;
1462 rdlen=DIFFPAGESIZE ;
1463 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1464 goto fail;
1466 memcpy(pagebuf+offset,buf,wrlen) ;
1467 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != DIFFPAGESIZE)
1468 goto fail;
1470 if (!(client->server->flags & F_COPYONWRITE))
1471 pthread_rwlock_unlock(&client->export_lock);
1472 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1474 if (client->server->flags & F_SYNC) {
1475 fsync(client->difffile);
1476 } else if (fua) {
1477 /* open question: would it be cheaper to do multiple sync_file_ranges?
1478 as we iterate through the above?
1480 fdatasync(client->difffile);
1482 return 0;
1483 fail:
1484 if (!(client->server->flags & F_COPYONWRITE))
1485 pthread_rwlock_unlock(&client->export_lock);
1486 return -1;
1492 * Write an amount of zeroes at a given offset to the right file.
1493 * This routine could be optimised by not calling expwrite. However,
1494 * this is by far the simplest way to do it.
1496 * @param req the request
1497 * @param client The client we're going to write for.
1498 * @return 0 on success, nonzero on failure
1500 int expwrite_zeroes(struct nbd_request* req, CLIENT* client, int fua) {
1501 off_t a = req->from;
1502 size_t len = req->len;
1503 size_t maxsize = 64LL*1024LL*1024LL;
1504 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1505 char *buf = calloc (1, maxsize);
1506 int ret;
1507 while (len > 0) {
1508 size_t l = len;
1509 if (l > maxsize)
1510 l = maxsize;
1511 ret = expwrite(a, buf, l, client, fua);
1512 if (ret) {
1513 free(buf);
1514 return ret;
1516 len -= l;
1518 free(buf);
1519 return 0;
1523 * Flush data to a client
1525 * @param client The client we're going to write for.
1526 * @return 0 on success, nonzero on failure
1528 int expflush(CLIENT *client) {
1529 gint i;
1531 if (client->server->flags & F_COPYONWRITE) {
1532 return fsync(client->difffile);
1535 if (client->server->flags & F_WAIT) {
1536 return fsync(client->difffile);
1539 if (client->server->flags & F_TREEFILES ) {
1540 // all we can do is force sync the entire filesystem containing the tree
1541 if (client->server->flags & F_READONLY)
1542 return 0;
1543 sync();
1544 return 0;
1547 for (i = 0; i < client->export->len; i++) {
1548 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1549 if (fsync(fi.fhandle) < 0)
1550 return -1;
1553 return 0;
1556 void punch_hole(int fd, off_t off, off_t len) {
1557 DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len);
1558 #if HAVE_FALLOC_PH
1559 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len);
1560 #elif HAVE_FSCTL_SET_ZERO_DATA
1561 FILE_ZERO_DATA_INFORMATION zerodata;
1562 zerodata.FileOffset.QuadPart = off;
1563 zerodata.BeyondFinalZero.QuadPart = off + len;
1564 HANDLE w32handle = (HANDLE)_get_osfhandle(fd);
1565 DWORD bytesret;
1566 DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL);
1567 #else
1568 DEBUG("punching holes not supported on this platform\n");
1569 #endif
1572 static void send_reply(CLIENT* client, uint32_t opt, uint32_t reply_type, ssize_t datasize, void* data) {
1573 struct {
1574 uint64_t magic;
1575 uint32_t opt;
1576 uint32_t reply_type;
1577 uint32_t datasize;
1578 } __attribute__ ((packed)) header = {
1579 htonll(0x3e889045565a9LL),
1580 htonl(opt),
1581 htonl(reply_type),
1582 htonl(datasize),
1584 if(datasize < 0) {
1585 datasize = strlen((char*)data);
1586 header.datasize = htonl(datasize);
1588 socket_write(client, &header, sizeof(header));
1589 if(data != NULL) {
1590 socket_write(client, data, datasize);
1595 * Find the name of the file we have to serve. This will use g_strdup_printf
1596 * to put the IP address of the client inside a filename containing
1597 * "%s" (in the form as specified by the "virtstyle" option). That name
1598 * is then written to client->exportname.
1600 * @param net A socket connected to an nbd client
1601 * @param client information about the client. The IP address in human-readable
1602 * format will be written to a new char* buffer, the address of which will be
1603 * stored in client->clientname.
1604 * @return: 0 - OK, -1 - failed.
1606 int set_peername(int net, CLIENT *client) {
1607 struct sockaddr_storage netaddr;
1608 struct sockaddr* addr = (struct sockaddr*)&netaddr;
1609 socklen_t addrinlen = sizeof( struct sockaddr_storage );
1610 struct addrinfo hints;
1611 struct addrinfo *ai = NULL;
1612 char peername[NI_MAXHOST];
1613 char netname[NI_MAXHOST];
1614 char *tmp = NULL;
1615 int i;
1616 int e;
1618 if (getsockname(net, addr, &addrinlen) < 0) {
1619 msg(LOG_INFO, "getsockname failed: %m");
1620 return -1;
1623 if(netaddr.ss_family == AF_UNIX) {
1624 client->clientaddr.ss_family = AF_UNIX;
1625 strcpy(peername, "unix");
1626 } else {
1627 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1628 msg(LOG_INFO, "getpeername failed: %m");
1629 return -1;
1631 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1632 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1633 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1634 return -1;
1637 memset(&hints, '\0', sizeof (hints));
1638 hints.ai_flags = AI_ADDRCONFIG;
1639 e = getaddrinfo(peername, NULL, &hints, &ai);
1641 if(e != 0) {
1642 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
1643 freeaddrinfo(ai);
1644 return -1;
1648 if(strncmp(peername, "::ffff:", 7) == 0) {
1649 memmove(peername, peername+7, strlen(peername));
1652 switch(client->server->virtstyle) {
1653 case VIRT_NONE:
1654 msg(LOG_DEBUG, "virtualization is off");
1655 client->exportname=g_strdup(client->server->exportname);
1656 break;
1657 case VIRT_IPHASH:
1658 msg(LOG_DEBUG, "virtstyle iphash");
1659 for(i=0;i<strlen(peername);i++) {
1660 if(peername[i]=='.') {
1661 peername[i]='/';
1664 case VIRT_IPLIT:
1665 msg(LOG_DEBUG, "virtstyle ipliteral");
1666 client->exportname=g_strdup_printf(client->server->exportname, peername);
1667 break;
1668 case VIRT_CIDR:
1669 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
1670 memcpy(&netaddr, &(client->clientaddr), addrinlen);
1671 int addrbits;
1672 if(client->clientaddr.ss_family == AF_UNIX) {
1673 tmp = g_strdup(peername);
1674 } else {
1675 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
1676 if(ai->ai_family == AF_INET) {
1677 addrbits = 32;
1678 } else if(ai->ai_family == AF_INET6) {
1679 addrbits = 128;
1680 } else {
1681 g_assert_not_reached();
1683 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
1684 for(int i = 0; i < addrbits; i+=8) {
1685 int masklen = client->server->cidrlen - i;
1686 masklen = masklen > 0 ? masklen : 0;
1687 uint8_t mask = getmaskbyte(masklen);
1688 *addrptr &= mask;
1689 addrptr++;
1691 getnameinfo((struct sockaddr *) &netaddr, addrinlen,
1692 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1693 tmp=g_strdup_printf("%s/%s", netname, peername);
1696 if(tmp != NULL) {
1697 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1698 g_free(tmp);
1701 break;
1704 freeaddrinfo(ai);
1705 msg(LOG_INFO, "connect from %s, assigned file is %s",
1706 peername, client->exportname);
1707 client->clientname=g_strdup(peername);
1708 return 0;
1711 int commit_diff(CLIENT* client, bool lock, int fhandle){
1712 int dirtycount = 0;
1713 int pagecount = client->exportsize/DIFFPAGESIZE;
1714 off_t offset;
1715 char* buf = malloc(sizeof(char)*DIFFPAGESIZE);
1717 for (int i=0; i<pagecount; i++){
1718 offset = DIFFPAGESIZE*i;
1719 if (lock)
1720 pthread_rwlock_wrlock(&client->export_lock);
1721 if (client->difmap[i] != (u32)-1){
1722 dirtycount += 1;
1723 DEBUG("flushing dirty page %d, offset %ld\n", i, offset);
1724 if (pread(client->difffile, buf, DIFFPAGESIZE, client->difmap[i]*DIFFPAGESIZE) != DIFFPAGESIZE) {
1725 msg(LOG_WARNING, "could not read while committing diff: %m");
1726 if(lock) {
1727 pthread_rwlock_unlock(&client->export_lock);
1729 break;
1731 if (pwrite(fhandle, buf, DIFFPAGESIZE, offset) != DIFFPAGESIZE) {
1732 msg(LOG_WARNING, "could not write while committing diff: %m");
1733 if (lock) {
1734 pthread_rwlock_unlock(&client->export_lock);
1736 break;
1738 client->difmap[i] = (u32)-1;
1740 if (lock)
1741 pthread_rwlock_unlock(&client->export_lock);
1744 free(buf);
1745 return dirtycount;
1748 void* wait_file(void *void_ptr) {
1749 CLIENT* client = (CLIENT *)void_ptr;
1750 FILE_INFO fi;
1751 GArray* export;
1752 mode_t mode = O_RDWR;
1753 int dirtycount;
1755 fi.fhandle = -1;
1756 fi.startoff = 0;
1758 while (fi.fhandle < 1){
1759 sem_wait(&file_wait_sem);
1760 msg(LOG_INFO, "checking for file %s", client->server->exportname);
1761 fi.fhandle = open(client->server->exportname, mode);
1764 msg(LOG_INFO, "File %s appeared, fd %d", client->server->exportname, fi.fhandle);
1766 // first time there may be lot of data so we lock only per page
1767 do {
1768 dirtycount = commit_diff(client, true, fi.fhandle);
1769 } while (dirtycount > 0);
1771 //last time we lock export for the whole time until we switch write destination
1772 pthread_rwlock_wrlock(&client->export_lock);
1773 do {
1774 dirtycount = commit_diff(client, false, fi.fhandle);
1775 } while (dirtycount > 0);
1777 export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1778 g_array_append_val(export, fi);
1780 client->export = export;
1781 pthread_rwlock_unlock(&client->export_lock);
1782 msg(LOG_INFO, "Waiting for file ended, switching to exported file %s", client->server->exportname);
1784 return NULL;
1788 * Set up client export array, which is an array of FILE_INFO.
1789 * Also, split a single exportfile into multiple ones, if that was asked.
1790 * @param client information on the client which we want to setup export for
1792 bool setupexport(CLIENT* client) {
1793 int i = 0;
1794 off_t laststartoff = 0, lastsize = 0;
1795 int multifile = (client->server->flags & F_MULTIFILE);
1796 int treefile = (client->server->flags & F_TREEFILES);
1797 int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1798 int cancreate = (client->server->expected_size) && !multifile;
1800 if (treefile || (client->server->flags & F_WAIT)) {
1801 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
1802 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
1804 if(client->server->flags & F_WAIT){
1805 pthread_t wait_file_thread;
1806 if (pthread_create(&wait_file_thread, NULL, wait_file, client)){
1807 DEBUG("failed to create wait_file thread");
1808 return false;
1812 } else {
1813 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1815 /* If multi-file, open as many files as we can.
1816 * If not, open exactly one file.
1817 * Calculate file sizes as we go to get total size. */
1818 for(i=0; ; i++) {
1819 FILE_INFO fi;
1820 gchar *tmpname;
1821 gchar* error_string;
1823 if (i)
1824 cancreate = 0;
1825 /* if expected_size is specified, and this is the first file, we can create the file */
1826 mode_t mode = (client->server->flags & F_READONLY) ?
1827 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1829 if (temporary) {
1830 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1831 DEBUG( "Opening %s\n", tmpname );
1832 fi.fhandle = mkstemp(tmpname);
1833 } else {
1834 if(multifile) {
1835 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1836 } else {
1837 tmpname=g_strdup(client->exportname);
1839 DEBUG( "Opening %s\n", tmpname );
1840 fi.fhandle = open(tmpname, mode, 0600);
1841 if(fi.fhandle == -1 && mode == O_RDWR) {
1842 /* Try again because maybe media was read-only */
1843 fi.fhandle = open(tmpname, O_RDONLY);
1844 if(fi.fhandle != -1) {
1845 /* Opening the base file in copyonwrite mode is
1846 * okay */
1847 if(!(client->server->flags & F_COPYONWRITE)) {
1848 client->server->flags |= F_AUTOREADONLY;
1849 client->server->flags |= F_READONLY;
1854 if(fi.fhandle == -1) {
1855 if(multifile && i>0)
1856 break;
1857 error_string=g_strdup_printf(
1858 "Could not open exported file %s: %%m",
1859 tmpname);
1860 err_nonfatal(error_string);
1861 return false;
1864 if (temporary) {
1865 unlink(tmpname); /* File will stick around whilst FD open */
1868 fi.startoff = laststartoff + lastsize;
1869 g_array_append_val(client->export, fi);
1870 g_free(tmpname);
1872 /* Starting offset and size of this file will be used to
1873 * calculate starting offset of next file */
1874 laststartoff = fi.startoff;
1875 lastsize = size_autodetect(fi.fhandle);
1877 /* If we created the file, it will be length zero */
1878 if (!lastsize && cancreate) {
1879 assert(!multifile);
1880 if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1881 err_nonfatal("Could not expand file: %m");
1882 return false;
1884 lastsize = client->server->expected_size;
1885 break; /* don't look for any more files */
1888 if(!multifile || temporary)
1889 break;
1892 /* Set export size to total calculated size */
1893 client->exportsize = laststartoff + lastsize;
1895 /* Export size may be overridden */
1896 if(client->server->expected_size) {
1897 /* desired size must be <= total calculated size */
1898 if(client->server->expected_size > client->exportsize) {
1899 err_nonfatal("Size of exported file is too big\n");
1900 return false;
1903 client->exportsize = client->server->expected_size;
1907 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1908 if(multifile) {
1909 msg(LOG_INFO, "Total number of files: %d", i);
1911 if(treefile) {
1912 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
1914 return true;
1917 bool copyonwrite_prepare(CLIENT* client) {
1918 off_t i;
1919 gchar* dir;
1920 gchar* export_base;
1921 if (client->server->cowdir != NULL) {
1922 dir = g_strdup(client->server->cowdir);
1923 } else {
1924 dir = g_strdup(dirname(client->exportname));
1926 export_base = g_strdup(basename(client->exportname));
1927 client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
1928 (int)getpid());
1929 g_free(dir);
1930 g_free(export_base);
1931 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1932 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1933 if (client->difffile<0) {
1934 err("Could not create diff file (%m)");
1935 return false;
1937 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) {
1938 err("Could not allocate memory");
1939 return false;
1941 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1;
1943 return true;
1946 void send_export_info(CLIENT* client, bool maybe_zeroes) {
1947 uint64_t size_host = htonll((u64)(client->exportsize));
1948 uint16_t flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_WRITE_ZEROES;
1950 socket_write(client, &size_host, 8);
1951 if (client->server->flags & F_READONLY)
1952 flags |= NBD_FLAG_READ_ONLY;
1953 if (client->server->flags & F_FLUSH)
1954 flags |= NBD_FLAG_SEND_FLUSH;
1955 if (client->server->flags & F_FUA)
1956 flags |= NBD_FLAG_SEND_FUA;
1957 if (client->server->flags & F_ROTATIONAL)
1958 flags |= NBD_FLAG_ROTATIONAL;
1959 if (client->server->flags & F_TRIM)
1960 flags |= NBD_FLAG_SEND_TRIM;
1961 if (!(client->server->flags & F_COPYONWRITE))
1962 flags |= NBD_FLAG_CAN_MULTI_CONN;
1963 flags = htons(flags);
1964 socket_write(client, &flags, sizeof(flags));
1965 if (!(glob_flags & F_NO_ZEROES) && maybe_zeroes) {
1966 char zeros[128];
1967 memset(zeros, '\0', sizeof(zeros));
1968 socket_write(client, zeros, 124);
1973 * Commit to exporting the chosen export
1975 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
1976 * a number of things (verify whether the client is allowed access, try
1977 * to open files, etc etc) before we're ready to actually serve the
1978 * export.
1980 * This function does all those things.
1982 * @param client the CLIENT structure with .server and .net members set
1983 * up correctly
1984 * @return true if the client is allowed access to the export, false
1985 * otherwise
1987 static bool commit_client(CLIENT* client, SERVER* server) {
1988 char acl;
1989 uint32_t len;
1991 client->server = server;
1992 client->exportsize = OFFT_MAX;
1993 client->modern = TRUE;
1994 client->transactionlogfd = -1;
1995 if(pthread_mutex_init(&(client->lock), NULL)) {
1996 msg(LOG_ERR, "Unable to initialize mutex");
1997 return false;
1999 if (pthread_rwlock_init(&client->export_lock, NULL)){
2000 msg(LOG_ERR, "Unable to initialize write lock");
2001 return false;
2003 /* Check whether we exceeded the maximum number of allowed
2004 * clients already */
2005 if(dontfork) {
2006 acl = 'Y';
2007 } else {
2008 len = strlen(client->server->servename);
2009 writeit(commsocket, &len, sizeof len);
2010 writeit(commsocket, client->server->servename, len);
2011 readit(commsocket, &acl, 1);
2012 close(commsocket);
2014 switch(acl) {
2015 case 'N':
2016 msg(LOG_ERR, "Connection not allowed (too many clients)");
2017 return false;
2018 case 'X':
2019 msg(LOG_ERR, "Connection not allowed (unknown by parent?!?)");
2020 return false;
2023 /* Check whether the client is listed in the authfile */
2024 if (set_peername(client->net, client)) {
2025 msg(LOG_ERR, "Failed to set peername");
2026 return false;
2029 if (!authorized_client(client)) {
2030 msg(LOG_INFO, "Client '%s' is not authorized to access",
2031 client->clientname);
2032 return false;
2035 /* Set up the transactionlog, if we need one */
2036 if (client->server->transactionlog && (client->transactionlogfd == -1)) {
2037 if((client->transactionlogfd =
2038 open(client->server->transactionlog,
2039 O_WRONLY | O_CREAT,
2040 S_IRUSR | S_IWUSR)) ==
2041 -1) {
2042 msg(LOG_INFO, "Could not open transactionlog %s, moving on without it",
2043 client->server->transactionlog);
2047 /* Run any pre scripts that we may need */
2048 if (do_run(client->server->prerun, client->exportname)) {
2049 msg(LOG_INFO, "Client '%s' not allowed access by prerun script",
2050 client->clientname);
2051 return false;
2053 client->socket_closed = socket_closed_transmission;
2054 if(!setupexport(client)) {
2055 return false;
2058 if (client->server->flags & F_COPYONWRITE) {
2059 if(!copyonwrite_prepare(client)) {
2060 return false;
2064 if (client->server->flags & F_WAIT) {
2065 if(!copyonwrite_prepare(client)) {
2066 return false;
2070 setmysockopt(client->net);
2072 return true;
2075 static CLIENT* handle_export_name(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2076 uint32_t namelen;
2077 char* name;
2078 int i;
2080 socket_read(client, &namelen, sizeof(namelen));
2081 namelen = ntohl(namelen);
2082 if(namelen > 0) {
2083 name = malloc(namelen+1);
2084 name[namelen]=0;
2085 socket_read(client, name, namelen);
2086 } else {
2087 name = strdup("");
2089 for(i=0; i<servers->len; i++) {
2090 SERVER* serve = &(g_array_index(servers, SERVER, i));
2091 // hide exports that are TLS-only if we haven't negotiated TLS
2092 // yet
2093 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2094 continue;
2096 if(!strcmp(serve->servename, name)) {
2097 client->clientfeats = cflags;
2098 free(name);
2099 if(!commit_client(client, serve)) {
2100 return NULL;
2102 send_export_info(client, true);
2103 return client;
2106 free(name);
2107 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2110 static void handle_list(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2111 uint32_t len;
2112 int i;
2113 char buf[1024];
2114 char *ptr = buf + sizeof(len);
2116 socket_read(client, &len, sizeof(len));
2117 len = ntohl(len);
2118 if(len) {
2119 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2121 if(!(glob_flags & F_LIST)) {
2122 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Listing of exports denied by server configuration");
2123 err_nonfatal("Client tried disallowed list option");
2124 return;
2126 for(i=0; i<servers->len; i++) {
2127 SERVER* serve = &(g_array_index(servers, SERVER, i));
2128 // Hide TLS-only exports if we haven't negotiated TLS yet
2129 if(!client->tls_session && (serve->flags & F_FORCEDTLS)) {
2130 continue;
2132 len = htonl(strlen(serve->servename));
2133 memcpy(buf, &len, sizeof(len));
2134 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len));
2135 send_reply(client, opt, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
2137 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2140 #if HAVE_GNUTLS
2141 static int verify_cert(gnutls_session_t session) {
2142 int ret;
2143 unsigned int status, cert_list_size;
2144 const gnutls_datum_t *cert_list;
2145 gnutls_x509_crt_t cert;
2146 time_t now = time(NULL);
2148 ret = gnutls_certificate_verify_peers2(session, &status);
2149 if(ret < 0 || status != 0 || gnutls_certificate_type_get(session) !=
2150 GNUTLS_CRT_X509) {
2151 goto err;
2154 if(gnutls_x509_crt_init(&cert) < 0) {
2155 goto err;
2158 cert_list = gnutls_certificate_get_peers(session, &cert_list_size);
2159 if(cert_list == NULL) {
2160 goto err;
2162 if(gnutls_x509_crt_import(cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) {
2163 goto err;
2165 if(gnutls_x509_crt_get_activation_time(cert) > now) {
2166 goto err;
2168 if(gnutls_x509_crt_get_expiration_time(cert) < now) {
2169 goto err;
2171 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2172 msg(LOG_INFO, "client certificate verification successful");
2173 return 0;
2174 err:
2175 msg(LOG_ERR, "E: client certificate verification failed");
2176 return GNUTLS_E_CERTIFICATE_ERROR;
2179 CLIENT* handle_starttls(CLIENT* client, int opt, GArray* servers, uint32_t cflags, struct generic_conf *genconf) {
2180 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2181 gnutls_certificate_credentials_t x509_cred;
2182 CLIENT* retval = client;
2183 gnutls_priority_t priority_cache;
2184 gnutls_session_t *session = g_new0(gnutls_session_t, 1);
2185 int ret;
2186 int len;
2188 socket_read(client, &len, sizeof(len));
2189 if(G_UNLIKELY(len != 0)) {
2190 char buf[1024*1024];
2191 consume(client, len, buf, sizeof(buf));
2192 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Sending a STARTTLS command with data is invalid");
2193 return NULL;
2196 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2198 check_rv(gnutls_certificate_allocate_credentials(&x509_cred));
2199 gnutls_certificate_set_verify_function(x509_cred, verify_cert);
2200 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred, genconf->cacertfile, GNUTLS_X509_FMT_PEM));
2201 check_rv(gnutls_certificate_set_x509_key_file(x509_cred, genconf->certfile, genconf->keyfile, GNUTLS_X509_FMT_PEM));
2202 check_rv(gnutls_priority_init(&priority_cache, genconf->tlsprio, NULL));
2203 check_rv(gnutls_init(session, GNUTLS_SERVER));
2204 check_rv(gnutls_priority_set(*session, priority_cache));
2205 check_rv(gnutls_credentials_set(*session, GNUTLS_CRD_CERTIFICATE, x509_cred));
2207 gnutls_certificate_server_set_request(*session, GNUTLS_CERT_REQUEST);
2208 #if GNUTLS_VERSION_NUMBER >= 0x030109
2209 gnutls_transport_set_int(*session, client->net);
2210 #else
2211 gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) (intptr_t) client->net);
2212 #endif
2213 do {
2214 ret = gnutls_handshake(*session);
2215 } while(ret < 0 && gnutls_error_is_fatal(ret) == 0);
2217 if (ret < 0) {
2218 err_nonfatal(gnutls_strerror(ret));
2219 gnutls_deinit(*session);
2220 g_free(session);
2221 return NULL;
2223 client->tls_session = session;
2224 client->socket_read = socket_read_tls;
2225 client->socket_write = socket_write_tls;
2226 #undef check_rv
2227 exit:
2228 if(retval == NULL && session != NULL) {
2229 g_free(session);
2231 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2232 if(retval != NULL && retval->server != NULL) {
2233 retval->server = NULL;
2235 return retval;
2237 #endif
2240 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2242 * XXX this matches the proposal I sent out, rather than the officially
2243 * documented version of this command. Need to bring the two in sync
2244 * one way or the other.
2246 static bool handle_info(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2247 uint32_t namelen, len;
2248 char *name;
2249 int i;
2250 SERVER *server = NULL;
2251 uint16_t n_requests;
2252 uint16_t request;
2253 char buf[1024];
2254 bool sent_export = false;
2255 uint32_t reptype = NBD_REP_ERR_UNKNOWN;
2256 char *msg = "Export unknown";
2258 socket_read(client, &len, sizeof(len));
2259 len = htonl(len);
2260 socket_read(client, &namelen, sizeof(namelen));
2261 namelen = htonl(namelen);
2262 if(namelen > (len - 6)) {
2263 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2264 socket_read(client, buf, len - sizeof(namelen));
2266 if(namelen > 0) {
2267 name = malloc(namelen + 1);
2268 name[namelen] = 0;
2269 socket_read(client, name, namelen);
2270 } else {
2271 name = strdup("");
2273 for(i=0; i<servers->len; i++) {
2274 SERVER *serve = &(g_array_index(servers, SERVER, i));
2275 if (!strcmp(serve->servename, name)) {
2276 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2277 reptype = NBD_REP_ERR_TLS_REQD;
2278 msg = "TLS is required for that export";
2279 continue;
2281 server = serve;
2284 free(name);
2285 socket_read(client, &n_requests, sizeof(n_requests));
2286 n_requests = ntohs(n_requests);
2287 if(!server) {
2288 consume(client, n_requests * sizeof(request), buf,
2289 sizeof(buf));
2290 send_reply(client, opt, reptype, -1, msg);
2291 return false;
2293 if (opt == NBD_OPT_GO) {
2294 client->clientfeats = cflags;
2295 if(!commit_client(client, server)) {
2296 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Access denied by server configuration");
2297 return false;
2300 for(i=0; i<n_requests; i++) {
2301 socket_read(client, &request, sizeof(request));
2302 switch(ntohs(request)) {
2303 case NBD_INFO_EXPORT:
2304 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2305 socket_write(client, &request, 2);
2306 send_export_info(client, false);
2307 sent_export = true;
2308 break;
2309 default:
2310 // ignore all other options for now.
2311 break;
2314 if(!sent_export) {
2315 request = htons(NBD_INFO_EXPORT);
2316 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2317 socket_write(client, &request, 2);
2318 send_export_info(client, false);
2320 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2322 return true;
2326 * Do the initial negotiation.
2328 * @param net The socket we're doing the negotiation over.
2329 * @param servers The array of known servers.
2330 * @param genconf the global options (needed for accessing TLS config data)
2332 CLIENT* negotiate(int net, GArray* servers, struct generic_conf *genconf) {
2333 uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
2334 uint64_t magic;
2335 uint32_t cflags = 0;
2336 uint32_t opt;
2337 CLIENT* client = g_new0(CLIENT, 1);
2338 client->net = net;
2339 client->socket_read = socket_read_notls;
2340 client->socket_write = socket_write_notls;
2341 client->socket_closed = socket_closed_negotiate;
2343 assert(servers != NULL);
2344 socket_write(client, INIT_PASSWD, 8);
2345 magic = htonll(opts_magic);
2346 socket_write(client, &magic, sizeof(magic));
2348 smallflags = htons(smallflags);
2349 socket_write(client, &smallflags, sizeof(uint16_t));
2350 socket_read(client, &cflags, sizeof(cflags));
2351 cflags = htonl(cflags);
2352 if (cflags & NBD_FLAG_C_NO_ZEROES) {
2353 glob_flags |= F_NO_ZEROES;
2355 do {
2356 socket_read(client, &magic, sizeof(magic));
2357 magic = ntohll(magic);
2358 if(magic != opts_magic) {
2359 err_nonfatal("Negotiation failed/5a: magic mismatch");
2360 goto handler_err;
2362 socket_read(client, &opt, sizeof(opt));
2363 opt = ntohl(opt);
2364 if(client->tls_session == NULL
2365 && glob_flags & F_FORCEDTLS
2366 && opt != NBD_OPT_STARTTLS) {
2367 if(opt == NBD_OPT_EXPORT_NAME) {
2368 // can't send an error message for EXPORT_NAME,
2369 // so must do hard close
2370 goto handler_err;
2372 if(opt == NBD_OPT_ABORT) {
2373 // handled below
2374 break;
2376 consume_len(client);
2377 send_reply(client, opt, NBD_REP_ERR_TLS_REQD, -1, "TLS is required on this server");
2378 continue;
2380 switch(opt) {
2381 case NBD_OPT_EXPORT_NAME:
2382 // NBD_OPT_EXPORT_NAME must be the last
2383 // selected option, so return from here
2384 // if that is chosen.
2385 if(handle_export_name(client, opt, servers, cflags) != NULL) {
2386 return client;
2387 } else {
2388 goto handler_err;
2390 break;
2391 case NBD_OPT_LIST:
2392 handle_list(client, opt, servers, cflags);
2393 break;
2394 case NBD_OPT_ABORT:
2395 // handled below
2396 break;
2397 case NBD_OPT_STARTTLS:
2398 #if !HAVE_GNUTLS
2399 consume_len(client);
2400 send_reply(client, opt, NBD_REP_ERR_PLATFORM, -1, "This nbd-server was compiled without TLS support");
2401 #else
2402 if(client->tls_session != NULL) {
2403 consume_len(client);
2404 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2405 continue;
2407 if(genconf->keyfile == NULL) {
2408 consume_len(client);
2409 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "TLS not allowed on this server");
2410 continue;
2412 if(handle_starttls(client, opt, servers, cflags, genconf) == NULL) {
2413 // can't recover from failed TLS negotiation.
2414 goto handler_err;
2416 #endif
2417 break;
2418 case NBD_OPT_GO:
2419 case NBD_OPT_INFO:
2420 if(handle_info(client, opt, servers, cflags) && opt == NBD_OPT_GO) {
2421 return client;
2423 break;
2424 default:
2425 consume_len(client);
2426 send_reply(client, opt, NBD_REP_ERR_UNSUP, -1, "The given option is unknown to this server implementation");
2427 break;
2429 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
2430 if(opt == NBD_OPT_ABORT) {
2431 err_nonfatal("Session terminated by client");
2432 goto handler_err;
2434 err_nonfatal("Weird things happened: reached end of negotiation without success");
2435 handler_err:
2436 g_free(client);
2437 return NULL;
2440 static int nbd_errno(int errcode) {
2441 switch (errcode) {
2442 case EPERM:
2443 return htonl(1);
2444 case EIO:
2445 return htonl(5);
2446 case ENOMEM:
2447 return htonl(12);
2448 case EINVAL:
2449 return htonl(22);
2450 case EFBIG:
2451 case ENOSPC:
2452 #ifdef EDQUOT
2453 case EDQUOT:
2454 #endif
2455 return htonl(28); // ENOSPC
2456 default:
2457 return htonl(22); // EINVAL
2461 static void package_dispose(struct work_package* package) {
2462 if (package->pipefd[0] > 0)
2463 close(package->pipefd[0]);
2464 if (package->pipefd[1] > 0)
2465 close(package->pipefd[1]);
2466 g_free(package->data);
2467 g_free(package->req);
2468 g_free(package);
2471 static int mkpipe(int pipefd[2], size_t len)
2473 if (len > MAX_PIPE_SIZE)
2474 return -1;
2475 if (pipe(pipefd))
2476 return -1;
2478 #ifdef HAVE_SPLICE
2479 if (fcntl(pipefd[1], F_SETPIPE_SZ, MAX_PIPE_SIZE) < MAX_PIPE_SIZE) {
2480 close(pipefd[0]);
2481 close(pipefd[1]);
2482 pipefd[0] = -1;
2483 pipefd[1] = -1;
2484 return -1;
2486 #endif
2488 return 0;
2491 struct work_package* package_create(CLIENT* client, struct nbd_request* req) {
2492 struct work_package* rv = calloc(sizeof (struct work_package), 1);
2494 rv->req = req;
2495 rv->client = client;
2496 rv->data = NULL;
2497 rv->pipefd[0] = -1;
2498 rv->pipefd[1] = -1;
2500 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2501 if (client->server->flags & F_SPLICE) {
2502 if (mkpipe(rv->pipefd, req->len))
2503 rv->data = malloc(req->len);
2504 } else {
2505 rv->data = malloc(req->len);
2509 return rv;
2512 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) {
2513 rep->magic = htonl(NBD_REPLY_MAGIC);
2514 rep->error = 0;
2515 memcpy(&(rep->handle), &(req->handle), sizeof(req->handle));
2518 #ifdef HAVE_SPLICE
2519 static int handle_splice_read(CLIENT *client, struct nbd_request *req)
2521 struct nbd_reply rep;
2522 int pipefd[2];
2524 // splice doesn't work with TLS
2525 if (client->tls_session != NULL)
2526 return -1;
2528 if (mkpipe(pipefd, req->len))
2529 return -1;
2531 if (expsplice(pipefd[1], req->from, req->len, client, SPLICE_IN, 0)) {
2532 close(pipefd[1]);
2533 close(pipefd[0]);
2534 return -1;
2537 DEBUG("handling read request (splice)\n");
2538 setup_reply(&rep, req);
2539 pthread_mutex_lock(&(client->lock));
2540 writeit(client->net, &rep, sizeof(rep));
2541 spliceit(pipefd[0], NULL, client->net, NULL, req->len);
2542 pthread_mutex_unlock(&(client->lock));
2543 close(pipefd[0]);
2544 close(pipefd[1]);
2545 return 0;
2547 #endif
2549 static void handle_normal_read(CLIENT *client, struct nbd_request *req)
2551 struct nbd_reply rep;
2552 void* buf = malloc(req->len);
2553 if(!buf) {
2554 err("Could not allocate memory for request");
2556 DEBUG("handling read request\n");
2557 setup_reply(&rep, req);
2558 if(expread(req->from, buf, req->len, client)) {
2559 DEBUG("Read failed: %m");
2560 rep.error = nbd_errno(errno);
2562 pthread_mutex_lock(&(client->lock));
2563 socket_write(client, &rep, sizeof rep);
2564 if(!rep.error) {
2565 socket_write(client, buf, req->len);
2567 pthread_mutex_unlock(&(client->lock));
2568 free(buf);
2571 static void handle_read(CLIENT* client, struct nbd_request* req)
2573 #ifdef HAVE_SPLICE
2575 * If we have splice set we want to try that first, and if that fails
2576 * for whatever reason we fall through to ye olde read.
2578 if (client->server->flags & F_SPLICE)
2579 if (!handle_splice_read(client, req))
2580 return;
2581 #endif
2582 handle_normal_read(client, req);
2585 static void handle_write(struct work_package *pkg)
2587 CLIENT *client = pkg->client;
2588 struct nbd_request *req = pkg->req;
2589 struct nbd_reply rep;
2590 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2592 DEBUG("handling write request\n");
2593 setup_reply(&rep, req);
2595 if ((client->server->flags & F_READONLY) ||
2596 (client->server->flags & F_AUTOREADONLY)) {
2597 DEBUG("[WRITE to READONLY!]");
2598 rep.error = nbd_errno(EPERM);
2599 #ifdef HAVE_SPLICE
2600 } else if (!pkg->data) {
2601 if (expsplice(pkg->pipefd[0], req->from, req->len, client,
2602 SPLICE_OUT, fua)) {
2603 DEBUG("Splice failed: %M");
2604 rep.error = nbd_errno(errno);
2606 #endif
2607 } else {
2608 if(expwrite(req->from, pkg->data, req->len, client, fua)) {
2609 DEBUG("Write failed: %m");
2610 rep.error = nbd_errno(errno);
2613 pthread_mutex_lock(&(client->lock));
2614 socket_write(client, &rep, sizeof rep);
2615 pthread_mutex_unlock(&(client->lock));
2618 static void handle_flush(CLIENT* client, struct nbd_request* req) {
2619 struct nbd_reply rep;
2620 DEBUG("handling flush request\n");
2621 setup_reply(&rep, req);
2622 if(expflush(client)) {
2623 DEBUG("Flush failed: %m");
2624 rep.error = nbd_errno(errno);
2626 pthread_mutex_lock(&(client->lock));
2627 socket_write(client, &rep, sizeof rep);
2628 pthread_mutex_unlock(&(client->lock));
2631 static void handle_trim(CLIENT* client, struct nbd_request* req) {
2632 struct nbd_reply rep;
2633 DEBUG("handling trim request\n");
2634 setup_reply(&rep, req);
2635 if(exptrim(req, client)) {
2636 DEBUG("Trim failed: %m");
2637 rep.error = nbd_errno(errno);
2639 pthread_mutex_lock(&(client->lock));
2640 socket_write(client, &rep, sizeof rep);
2641 pthread_mutex_unlock(&(client->lock));
2644 static void handle_write_zeroes(CLIENT* client, struct nbd_request* req) {
2645 struct nbd_reply rep;
2646 DEBUG("handling write_zeroes request\n");
2647 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2648 setup_reply(&rep, req);
2649 if ((client->server->flags & F_READONLY) ||
2650 (client->server->flags & F_AUTOREADONLY)) {
2651 DEBUG("[WRITE to READONLY!]");
2652 rep.error = nbd_errno(EPERM);
2653 } else if(expwrite_zeroes(req, client, fua)) {
2654 DEBUG("Write_zeroes failed: %m");
2655 rep.error = nbd_errno(errno);
2657 // For now, don't trim
2658 // TODO: handle this far more efficiently with reference to the
2659 // actual backing driver
2660 pthread_mutex_lock(&(client->lock));
2661 socket_write(client, &rep, sizeof rep);
2662 pthread_mutex_unlock(&(client->lock));
2665 static void handle_request(gpointer data, gpointer user_data) {
2666 struct work_package* package = (struct work_package*) data;
2667 uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND;
2668 uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND;
2669 struct nbd_reply rep;
2671 if(flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
2672 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type);
2673 goto error;
2676 switch(type) {
2677 case NBD_CMD_READ:
2678 handle_read(package->client, package->req);
2679 break;
2680 case NBD_CMD_WRITE:
2681 handle_write(package);
2682 break;
2683 case NBD_CMD_FLUSH:
2684 handle_flush(package->client, package->req);
2685 break;
2686 case NBD_CMD_TRIM:
2687 handle_trim(package->client, package->req);
2688 break;
2689 case NBD_CMD_WRITE_ZEROES:
2690 handle_write_zeroes(package->client, package->req);
2691 break;
2692 default:
2693 msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type);
2694 goto error;
2696 goto end;
2697 error:
2698 setup_reply(&rep, package->req);
2699 rep.error = nbd_errno(EINVAL);
2700 pthread_mutex_lock(&(package->client->lock));
2701 socket_write(package->client, &rep, sizeof rep);
2702 pthread_mutex_unlock(&(package->client->lock));
2703 end:
2704 package_dispose(package);
2707 static int mainloop_threaded(CLIENT* client) {
2708 struct nbd_request* req;
2709 struct work_package* pkg;
2711 DEBUG("Entering request loop\n");
2712 while(1) {
2713 req = calloc(sizeof (struct nbd_request), 1);
2715 socket_read(client, req, sizeof(struct nbd_request));
2716 if(client->transactionlogfd != -1) {
2717 writeit(client->transactionlogfd, req, sizeof(struct nbd_request));
2720 req->from = ntohll(req->from);
2721 req->type = ntohl(req->type);
2722 req->len = ntohl(req->len);
2724 if(req->magic != htonl(NBD_REQUEST_MAGIC))
2725 err("Protocol error: not enough magic.");
2727 pkg = package_create(client, req);
2729 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2730 #ifdef HAVE_SPLICE
2731 if ((client->server->flags & F_SPLICE) &&
2732 (req->len <= MAX_PIPE_SIZE && pkg->pipefd[1] > 0) &&
2733 (client->tls_session == NULL))
2734 spliceit(client->net, NULL, pkg->pipefd[1],
2735 NULL, req->len);
2736 else
2737 #endif
2738 socket_read(client, pkg->data, req->len);
2740 if(req->type == NBD_CMD_DISC) {
2741 finalize_client(client);
2742 return 0;
2744 g_thread_pool_push(tpool, pkg, NULL);
2749 * Destroy a pid_t*
2750 * @param data a pointer to pid_t which should be freed
2752 void destroy_pid_t(gpointer data) {
2753 g_free(data);
2756 static pid_t
2757 spawn_child(int* socket)
2759 pid_t pid;
2760 sigset_t newset;
2761 sigset_t oldset;
2762 int sockets[2];
2764 sigemptyset(&newset);
2765 sigaddset(&newset, SIGCHLD);
2766 sigaddset(&newset, SIGTERM);
2767 sigprocmask(SIG_BLOCK, &newset, &oldset);
2768 socketpair(AF_UNIX, SOCK_STREAM, 0, sockets);
2769 pid = fork();
2770 if (pid < 0) {
2771 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
2772 goto out;
2774 if (pid > 0) { /* Parent */
2775 pid_t *pidp;
2777 pidp = g_malloc(sizeof(pid_t));
2778 *pidp = pid;
2779 *socket = sockets[1];
2780 close(sockets[0]);
2781 g_hash_table_insert(children, pidp, pidp);
2782 goto out;
2784 /* Child */
2785 *socket = sockets[0];
2786 close(sockets[1]);
2787 /* Child's signal disposition is reset to default. */
2788 signal(SIGCHLD, SIG_DFL);
2789 signal(SIGTERM, SIG_DFL);
2790 signal(SIGHUP, SIG_DFL);
2791 sigemptyset(&oldset);
2792 out:
2793 sigprocmask(SIG_SETMASK, &oldset, NULL);
2794 return pid;
2797 static int
2798 socket_accept(const int sock)
2800 struct sockaddr_storage addrin;
2801 socklen_t addrinlen = sizeof(addrin);
2802 int net;
2804 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
2805 if (net < 0) {
2806 err_nonfatal("Failed to accept socket connection: %m");
2809 return net;
2812 static void
2813 handle_modern_connection(GArray *const servers, const int sock, struct generic_conf *genconf)
2815 int net;
2816 pid_t pid;
2817 CLIENT *client = NULL;
2818 int sock_flags_old;
2819 int sock_flags_new;
2821 net = socket_accept(sock);
2822 if (net < 0)
2823 return;
2825 if (!dontfork) {
2826 pid = spawn_child(&commsocket);
2827 if (pid) {
2828 if (pid > 0) {
2829 msg(LOG_INFO, "Spawned a child process");
2830 g_array_append_val(childsocks, commsocket);
2832 if (pid < 0)
2833 msg(LOG_ERR, "Failed to spawn a child process");
2834 close(net);
2835 return;
2837 /* Child just continues. */
2840 sock_flags_old = fcntl(net, F_GETFL, 0);
2841 if (sock_flags_old == -1) {
2842 msg(LOG_ERR, "Failed to get socket flags");
2843 goto handler_err;
2846 sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2847 if (sock_flags_new != sock_flags_old &&
2848 fcntl(net, F_SETFL, sock_flags_new) == -1) {
2849 msg(LOG_ERR, "Failed to set socket to blocking mode");
2850 goto handler_err;
2853 client = negotiate(net, servers, genconf);
2854 if (!client) {
2855 msg(LOG_ERR, "Modern initial negotiation failed");
2856 goto handler_err;
2859 if (!dontfork) {
2860 int i;
2862 /* Free all root server resources here, because we are
2863 * currently in the child process serving one specific
2864 * connection. These are not simply needed anymore. */
2865 g_hash_table_destroy(children);
2866 children = NULL;
2867 for (i = 0; i < modernsocks->len; i++) {
2868 close(g_array_index(modernsocks, int, i));
2870 g_array_free(modernsocks, TRUE);
2872 /* Now that we are in the child process after a
2873 * succesful negotiation, we do not need the list of
2874 * servers anymore, get rid of it.*/
2875 /* FALSE does not free the
2876 actual data. This is required,
2877 because the client has a
2878 direct reference into that
2879 data, and otherwise we get a
2880 segfault... */
2881 g_array_free(servers, FALSE);
2884 msg(LOG_INFO, "Starting to serve");
2885 mainloop_threaded(client);
2886 exit(EXIT_SUCCESS);
2888 handler_err:
2889 close(net);
2890 g_free(client);
2892 if (!dontfork) {
2893 exit(EXIT_FAILURE);
2897 static int handle_childname(GArray* servers, int socket)
2899 uint32_t len;
2900 char *buf;
2901 int i, r, rt = 0;
2903 while(rt < sizeof(len)) {
2904 switch((r = read(socket, &len, sizeof len))) {
2905 case 0:
2906 return -1;
2907 case -1:
2908 err_nonfatal("Error reading from acl socket: %m");
2909 return -1;
2910 default:
2911 rt += r;
2912 break;
2915 buf = g_malloc0(len);
2916 readit(socket, buf, len);
2917 for(i=0; i<servers->len; i++) {
2918 SERVER* srv = &g_array_index(servers, SERVER, i);
2919 if(strcmp(srv->servename, buf) == 0) {
2920 if(srv->max_connections == 0 || srv->max_connections > srv->numclients) {
2921 writeit(socket, "Y", 1);
2922 srv->numclients++;
2923 } else {
2924 writeit(socket, "N", 1);
2926 goto exit;
2929 writeit(socket, "X", 1);
2930 exit:
2931 g_free(buf);
2932 return 0;
2936 * Return the index of the server whose servename matches the given
2937 * name.
2939 * @param servename a string to match
2940 * @param servers an array of servers
2941 * @return the first index of the server whose servename matches the
2942 * given name or -1 if one cannot be found
2944 static int get_index_by_servename(const gchar *const servename,
2945 const GArray *const servers) {
2946 int i;
2948 for (i = 0; i < servers->len; ++i) {
2949 const SERVER server = g_array_index(servers, SERVER, i);
2951 if (strcmp(servename, server.servename) == 0)
2952 return i;
2955 return -1;
2959 * Parse configuration files and add servers to the array if they don't
2960 * already exist there. The existence is tested by comparing
2961 * servenames. A server is appended to the array only if its servename
2962 * is unique among all other servers.
2964 * @param servers an array of servers
2965 * @return the number of new servers appended to the array, or -1 in
2966 * case of an error
2968 static int append_new_servers(GArray *const servers, GError **const gerror) {
2969 int i;
2970 GArray *new_servers;
2971 const int old_len = servers->len;
2972 int retval = -1;
2973 struct generic_conf genconf;
2975 new_servers = parse_cfile(config_file_pos, &genconf, true, gerror);
2976 g_thread_pool_set_max_threads(tpool, genconf.threads, NULL);
2977 if (!new_servers)
2978 goto out;
2980 for (i = 0; i < new_servers->len; ++i) {
2981 SERVER new_server = g_array_index(new_servers, SERVER, i);
2983 if (new_server.servename
2984 && -1 == get_index_by_servename(new_server.servename,
2985 servers)) {
2986 g_array_append_val(servers, new_server);
2990 retval = servers->len - old_len;
2991 out:
2992 g_array_free(new_servers, TRUE);
2994 return retval;
2997 void serveloop(GArray* servers, struct generic_conf *genconf) G_GNUC_NORETURN;
2999 * Loop through the available servers, and serve them. Never returns.
3001 void serveloop(GArray* servers, struct generic_conf *genconf) {
3002 int i;
3003 int mmax, max;
3004 fd_set mset;
3005 fd_set rset;
3006 sigset_t blocking_mask;
3007 sigset_t original_mask;
3010 * Set up the master fd_set. The set of descriptors we need
3011 * to select() for never changes anyway and it buys us a *lot*
3012 * of time to only build this once. However, if we ever choose
3013 * to not fork() for clients anymore, we may have to revisit
3014 * this.
3016 mmax=0;
3017 FD_ZERO(&mset);
3018 for(i=0;i<modernsocks->len;i++) {
3019 int sock = g_array_index(modernsocks, int, i);
3020 FD_SET(sock, &mset);
3021 mmax=sock>mmax?sock:mmax;
3024 /* Construct a signal mask which is used to make signal testing and
3025 * receiving an atomic operation to ensure no signal is received between
3026 * tests and blocking pselect(). */
3027 if (sigemptyset(&blocking_mask) == -1)
3028 err("failed to initialize blocking_mask: %m");
3030 if (sigaddset(&blocking_mask, SIGCHLD) == -1)
3031 err("failed to add SIGCHLD to blocking_mask: %m");
3033 if (sigaddset(&blocking_mask, SIGHUP) == -1)
3034 err("failed to add SIGHUP to blocking_mask: %m");
3036 if (sigaddset(&blocking_mask, SIGTERM) == -1)
3037 err("failed to add SIGTERM to blocking_mask: %m");
3039 if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
3040 err("failed to block signals: %m");
3042 for(;;) {
3043 if (is_sigterm_caught) {
3044 is_sigterm_caught = 0;
3046 g_hash_table_foreach(children, killchild, NULL);
3047 unlink(pidfname);
3049 exit(EXIT_SUCCESS);
3052 if (is_sigchld_caught) {
3053 int status;
3054 int* i;
3055 pid_t pid;
3057 is_sigchld_caught = 0;
3059 while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
3060 if (WIFEXITED(status)) {
3061 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
3063 i = g_hash_table_lookup(children, &pid);
3064 if (!i) {
3065 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
3066 } else {
3067 DEBUG("Removing %d from the list of children", pid);
3068 g_hash_table_remove(children, &pid);
3073 /* SIGHUP causes the root server process to reconfigure
3074 * itself and add new export servers for each newly
3075 * found export configuration group, i.e. spawn new
3076 * server processes for each previously non-existent
3077 * export. This does not alter old runtime configuration
3078 * but just appends new exports. */
3079 if (is_sighup_caught) {
3080 int n;
3081 GError *gerror = NULL;
3083 msg(LOG_INFO, "reconfiguration request received");
3084 is_sighup_caught = 0; /* Reset to allow catching
3085 * it again. */
3087 n = append_new_servers(servers, &gerror);
3088 if (n == -1)
3089 msg(LOG_ERR, "failed to append new servers: %s",
3090 gerror->message);
3092 for (i = servers->len - n; i < servers->len; ++i) {
3093 const SERVER server = g_array_index(servers,
3094 SERVER, i);
3096 msg(LOG_INFO, "reconfigured new server: %s",
3097 server.servename);
3101 memcpy(&rset, &mset, sizeof(fd_set));
3102 max=mmax;
3103 for(i=0;i<childsocks->len;i++) {
3104 int sock = g_array_index(childsocks, int, i);
3105 FD_SET(sock, &rset);
3106 max=sock>max?sock:max;
3109 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
3110 DEBUG("accept, ");
3111 for(i=0; i < modernsocks->len; i++) {
3112 int sock = g_array_index(modernsocks, int, i);
3113 if(!FD_ISSET(sock, &rset)) {
3114 continue;
3117 handle_modern_connection(servers, sock, genconf);
3119 for(i=0; i < childsocks->len; i++) {
3120 int sock = g_array_index(childsocks, int, i);
3122 if(FD_ISSET(sock, &rset)) {
3123 if(handle_childname(servers, sock) < 0) {
3124 close(sock);
3125 g_array_remove_index(childsocks, i);
3134 * Set server socket options.
3136 * @param socket a socket descriptor of the server
3138 * @param gerror a pointer to an error object pointer used for reporting
3139 * errors. On error, if gerror is not NULL, *gerror is set and -1
3140 * is returned.
3142 * @return 0 on success, -1 on error
3144 int dosockopts(const int socket, GError **const gerror) {
3145 #ifndef sun
3146 int yes=1;
3147 #else
3148 char yes='1';
3149 #endif /* sun */
3150 struct linger l;
3152 /* lose the pesky "Address already in use" error message */
3153 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
3154 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
3155 "failed to set socket option SO_REUSEADDR: %s",
3156 strerror(errno));
3157 return -1;
3159 l.l_onoff = 1;
3160 l.l_linger = 10;
3161 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
3162 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
3163 "failed to set socket option SO_LINGER: %s",
3164 strerror(errno));
3165 return -1;
3167 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
3168 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
3169 "failed to set socket option SO_KEEPALIVE: %s",
3170 strerror(errno));
3171 return -1;
3174 return 0;
3177 int open_unix(const gchar *const sockname, GError **const gerror) {
3178 struct sockaddr_un sa;
3179 int sock=-1;
3180 int retval=-1;
3182 memset(&sa, 0, sizeof(struct sockaddr_un));
3183 sa.sun_family = AF_UNIX;
3184 strncpy(sa.sun_path, sockname, sizeof sa.sun_path);
3185 sa.sun_path[sizeof(sa.sun_path)-1] = '\0';
3186 sock = socket(AF_UNIX, SOCK_STREAM, 0);
3187 if(sock < 0) {
3188 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3189 "failed to open a unix socket: "
3190 "failed to create socket: %s",
3191 strerror(errno));
3192 goto out;
3194 if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
3195 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3196 "failed to open a unix socket: "
3197 "failed to bind to address %s: %s",
3198 sockname, strerror(errno));
3199 goto out;
3201 if(listen(sock, 10)<0) {
3202 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3203 "failed to open a unix socket: "
3204 "failed to start listening: %s",
3205 strerror(errno));
3206 goto out;
3208 retval=0;
3209 g_array_append_val(modernsocks, sock);
3210 out:
3211 if(retval<0 && sock >= 0) {
3212 close(sock);
3215 return retval;
3218 int open_modern(const gchar *const addr, const gchar *const port,
3219 GError **const gerror) {
3220 struct addrinfo hints;
3221 struct addrinfo* ai = NULL;
3222 struct addrinfo* ai_bak = NULL;
3223 struct sock_flags;
3224 int e;
3225 int retval = -1;
3226 int sock = -1;
3227 gchar** addrs;
3228 gchar const* l_addr = addr;
3230 if(!addr || strlen(addr) == 0) {
3231 l_addr = "::, 0.0.0.0";
3234 addrs = g_strsplit_set(l_addr, ", \t", -1);
3236 for(int i=0; addrs[i]!=NULL; i++) {
3237 if(addrs[i][0] == '\0') {
3238 continue;
3240 memset(&hints, '\0', sizeof(hints));
3241 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
3242 hints.ai_socktype = SOCK_STREAM;
3243 hints.ai_family = AF_UNSPEC;
3244 hints.ai_protocol = IPPROTO_TCP;
3245 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai);
3246 ai_bak = ai;
3247 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) {
3248 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
3249 "failed to open a modern socket: "
3250 "failed to get address info: %s",
3251 gai_strerror(e));
3252 goto out;
3255 while(ai != NULL) {
3256 sock = -1;
3258 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
3259 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3260 "failed to open a modern socket: "
3261 "failed to create a socket: %s",
3262 strerror(errno));
3263 goto out;
3266 if (dosockopts(sock, gerror) == -1) {
3267 g_prefix_error(gerror, "failed to open a modern socket: ");
3268 goto out;
3271 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
3273 * Some systems will return multiple entries for the
3274 * same address when we ask it for something
3275 * AF_UNSPEC, even though the first entry will
3276 * listen to both protocols. Other systems will
3277 * return multiple entries too, but we actually
3278 * do need to open both.
3280 * Handle this by ignoring EADDRINUSE if we've
3281 * already got at least one socket open
3283 if(errno == EADDRINUSE && modernsocks->len > 0) {
3284 goto next;
3286 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3287 "failed to open a modern socket: "
3288 "failed to bind an address to a socket: %s",
3289 strerror(errno));
3290 goto out;
3293 if(listen(sock, 10) <0) {
3294 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3295 "failed to open a modern socket: "
3296 "failed to start listening on a socket: %s",
3297 strerror(errno));
3298 goto out;
3300 g_array_append_val(modernsocks, sock);
3301 next:
3302 ai = ai->ai_next;
3304 if(ai_bak) {
3305 freeaddrinfo(ai_bak);
3306 ai_bak=NULL;
3310 retval = 0;
3311 out:
3313 if (retval == -1 && sock >= 0) {
3314 close(sock);
3316 if(ai_bak)
3317 freeaddrinfo(ai_bak);
3319 return retval;
3323 * Connect our servers.
3325 void setup_servers(GArray *const servers, const gchar *const modernaddr,
3326 const gchar *const modernport, const gchar* unixsock) {
3327 struct sigaction sa;
3329 GError *gerror = NULL;
3330 if (open_modern(modernaddr, modernport, &gerror) == -1) {
3331 msg(LOG_ERR, "failed to setup servers: %s",
3332 gerror->message);
3333 g_clear_error(&gerror);
3334 exit(EXIT_FAILURE);
3336 if(unixsock != NULL) {
3337 GError* gerror = NULL;
3338 if(open_unix(unixsock, &gerror) == -1) {
3339 msg(LOG_ERR, "failed to setup servers: %s",
3340 gerror->message);
3341 g_clear_error(&gerror);
3342 exit(EXIT_FAILURE);
3345 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
3347 sa.sa_handler = sigchld_handler;
3348 sigemptyset(&sa.sa_mask);
3349 sigaddset(&sa.sa_mask, SIGTERM);
3350 sa.sa_flags = SA_RESTART;
3351 if(sigaction(SIGCHLD, &sa, NULL) == -1)
3352 err("sigaction: %m");
3354 sa.sa_handler = sigterm_handler;
3355 sigemptyset(&sa.sa_mask);
3356 sigaddset(&sa.sa_mask, SIGCHLD);
3357 sa.sa_flags = SA_RESTART;
3358 if(sigaction(SIGTERM, &sa, NULL) == -1)
3359 err("sigaction: %m");
3361 sa.sa_handler = sighup_handler;
3362 sigemptyset(&sa.sa_mask);
3363 sa.sa_flags = SA_RESTART;
3364 if(sigaction(SIGHUP, &sa, NULL) == -1)
3365 err("sigaction: %m");
3367 sa.sa_handler = sigusr1_handler;
3368 sigemptyset(&sa.sa_mask);
3369 sa.sa_flags = SA_RESTART;
3370 if(sigaction(SIGUSR1, &sa, NULL) == -1)
3371 err("sigaction: %m");
3375 * Go daemon (unless we specified at compile time that we didn't want this)
3376 * @param serve the first server of our configuration. If its port is zero,
3377 * then do not daemonize, because we're doing inetd then. This parameter
3378 * is only used to create a PID file of the form
3379 * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
3381 #if !defined(NODAEMON)
3382 void daemonize() {
3383 FILE*pidf;
3385 if(daemon(0,0)<0) {
3386 err("daemon");
3388 if(!*pidfname) {
3389 strncpy(pidfname, "/var/run/nbd-server.pid", 255);
3391 pidf=fopen(pidfname, "w");
3392 if(pidf) {
3393 fprintf(pidf,"%d\n", (int)getpid());
3394 fclose(pidf);
3395 } else {
3396 perror("fopen");
3397 fprintf(stderr, "Not fatal; continuing");
3400 #else
3401 #define daemonize(serve)
3402 #endif /* !defined(NODAEMON) */
3405 * Everything beyond this point (in the file) is run in non-daemon mode.
3406 * The stuff above daemonize() isn't.
3410 * Set up user-ID and/or group-ID
3412 void dousers(const gchar *const username, const gchar *const groupname) {
3413 struct passwd *pw;
3414 struct group *gr;
3415 gchar* str;
3416 if (groupname) {
3417 gr = getgrnam(groupname);
3418 if(!gr) {
3419 str = g_strdup_printf("Invalid group name: %s", groupname);
3420 err(str);
3422 if(setgid(gr->gr_gid)<0) {
3423 err("Could not set GID: %m");
3426 if (username) {
3427 pw = getpwnam(username);
3428 if(!pw) {
3429 str = g_strdup_printf("Invalid user name: %s", username);
3430 err(str);
3432 if(setuid(pw->pw_uid)<0) {
3433 err("Could not set UID: %m");
3438 #ifndef ISSERVER
3439 void glib_message_syslog_redirect(const gchar *log_domain,
3440 GLogLevelFlags log_level,
3441 const gchar *message,
3442 gpointer user_data)
3444 int level=LOG_DEBUG;
3446 switch( log_level )
3448 case G_LOG_FLAG_FATAL:
3449 case G_LOG_LEVEL_CRITICAL:
3450 case G_LOG_LEVEL_ERROR:
3451 level=LOG_ERR;
3452 break;
3453 case G_LOG_LEVEL_WARNING:
3454 level=LOG_WARNING;
3455 break;
3456 case G_LOG_LEVEL_MESSAGE:
3457 case G_LOG_LEVEL_INFO:
3458 level=LOG_INFO;
3459 break;
3460 case G_LOG_LEVEL_DEBUG:
3461 level=LOG_DEBUG;
3462 break;
3463 default:
3464 level=LOG_ERR;
3466 syslog(level, "%s", message);
3468 #endif
3471 * Main entry point...
3473 int main(int argc, char *argv[]) {
3474 SERVER *serve;
3475 GArray *servers;
3476 GError *gerr=NULL;
3477 struct generic_conf genconf;
3479 memset(&genconf, 0, sizeof(struct generic_conf));
3481 if (sizeof( struct nbd_request )!=28) {
3482 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
3483 exit(EXIT_FAILURE) ;
3486 modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
3487 childsocks = g_array_new(FALSE, FALSE, sizeof(int));
3489 logging(MY_NAME);
3490 config_file_pos = g_strdup(CFILE);
3491 serve=cmdline(argc, argv, &genconf);
3493 genconf.threads = 4;
3494 servers = parse_cfile(config_file_pos, &genconf, true, &gerr);
3496 /* Update global variables with parsed values. This will be
3497 * removed once we get rid of global configuration variables. */
3498 glob_flags |= genconf.flags;
3500 if(serve) {
3501 g_array_append_val(servers, *serve);
3503 if(strcmp(genconf.modernport, "0")==0) {
3504 #ifndef ISSERVER
3505 err("inetd mode requires syslog");
3506 #endif
3507 CLIENT* client = g_malloc(sizeof(CLIENT));
3508 client->net = -1;
3509 if(!commit_client(client, serve)) {
3510 exit(EXIT_FAILURE);
3512 mainloop_threaded(client);
3513 return 0;
3517 if(!servers || !servers->len) {
3518 if(gerr && !(gerr->domain == NBDS_ERR
3519 && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) {
3520 g_warning("Could not parse config file: %s",
3521 gerr ? gerr->message : "Unknown error");
3524 if(serve) {
3525 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3528 if((!serve) && (!servers||!servers->len)) {
3529 if(gerr)
3530 g_message("No configured exports; quitting.");
3531 exit(EXIT_FAILURE);
3533 if (!dontfork)
3534 daemonize();
3535 #if HAVE_OLD_GLIB
3536 g_thread_init(NULL);
3537 #endif
3538 tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL);
3540 setup_servers(servers, genconf.modernaddr, genconf.modernport,
3541 genconf.unixsock);
3542 dousers(genconf.user, genconf.group);
3544 #if HAVE_GNUTLS
3545 gnutls_global_init();
3546 static gnutls_dh_params_t dh_params;
3547 gnutls_dh_params_init(&dh_params);
3548 gnutls_dh_params_generate2(dh_params,
3549 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH,
3550 // Renamed in GnuTLS 3.3
3551 #if GNUTLS_VERSION_NUMBER >= 0x030300
3552 GNUTLS_SEC_PARAM_MEDIUM
3553 #else
3554 GNUTLS_SEC_PARAM_NORMAL
3555 #endif
3557 #endif
3558 serveloop(servers, &genconf);