doc: Define a standard URI syntax for NBD URIs.
[nbd.git] / nbd-server.c
blob0b32bcd257c88e7c2079dcc921f0b2a0d698c685
1 /*
2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
34 * <wouter@debian.org>
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
43 * <wouter@debian.org>
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
51 * <wouter@debian.org>
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
60 #include "lfs.h"
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
63 #if NEED_BSD_SOURCE
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
65 #endif
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
68 #include <assert.h>
69 #include <sys/types.h>
70 #include <sys/socket.h>
71 #include <sys/stat.h>
72 #include <sys/select.h>
73 #include <sys/wait.h>
74 #include <sys/un.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
77 #endif
78 #ifdef HAVE_SYS_UIO_H
79 #include <sys/uio.h>
80 #endif
81 #include <sys/param.h>
82 #include <signal.h>
83 #include <errno.h>
84 #include <libgen.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
87 #include <netdb.h>
88 #include <syslog.h>
89 #include <unistd.h>
90 #include <stdbool.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <string.h>
94 #include <fcntl.h>
95 #if HAVE_FALLOC_PH
96 #include <linux/falloc.h>
97 #endif
98 #if HAVE_BLKDISCARD
99 #include <linux/fs.h>
100 #endif
101 #include <arpa/inet.h>
102 #include <strings.h>
103 #include <dirent.h>
104 #ifdef HAVE_SYS_DIR_H
105 #include <sys/dir.h>
106 #endif
107 #ifdef HAVE_SYS_DIRENT_H
108 #include <sys/dirent.h>
109 #endif
110 #include <getopt.h>
111 #include <pwd.h>
112 #include <grp.h>
113 #include <dirent.h>
114 #include <ctype.h>
115 #include <inttypes.h>
117 #include <glib.h>
119 #if HAVE_OLD_GLIB
120 #include <pthread.h>
121 #endif
123 #include <semaphore.h>
125 /* used in cliserv.h, so must come first */
126 #define MY_NAME "nbd_server"
127 #include "cliserv.h"
128 #include "nbd-debug.h"
129 #include "netdb-compat.h"
130 #include "backend.h"
131 #include "treefiles.h"
133 #ifdef WITH_SDP
134 #include <sdp_inet.h>
135 #endif
137 #if HAVE_FSCTL_SET_ZERO_DATA
138 #include <io.h>
139 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
140 #define NOMINMAX 1
141 #include <windef.h>
142 #include <winbase.h>
143 #include <winioctl.h>
144 #endif
146 /** Default position of the config file */
147 #ifndef SYSCONFDIR
148 #define SYSCONFDIR "/etc"
149 #endif
150 #define CFILE SYSCONFDIR "/nbd-server/config"
152 #if HAVE_GNUTLS
153 #include <gnutls/gnutls.h>
154 #include <gnutls/x509.h>
155 #endif
157 /** Where our config file actually is */
158 gchar* config_file_pos;
160 /** global flags */
161 int glob_flags=0;
163 /* Whether we should avoid forking */
164 int dontfork = 0;
167 * The highest value a variable of type off_t can reach. This is a signed
168 * integer, so set all bits except for the leftmost one.
170 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
171 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
172 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
174 /** Global flags: */
175 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
176 #define F_LIST 2 /**< Allow clients to list the exports on a server */
177 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
178 #define F_DUAL_LISTEN 8 /**< Listen on both TCP and unix socket */
179 // also accepts F_FORCEDTLS (which is 16384)
180 GHashTable *children;
181 char pidfname[256]; /**< name of our PID file */
182 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
184 #define NEG_INIT (1 << 0)
185 #define NEG_OLD (1 << 1)
186 #define NEG_MODERN (1 << 2)
189 * If we want what the system really has set we'd have to read
190 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
192 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
193 #define SPLICE_IN 0
194 #define SPLICE_OUT 1
196 #include <nbdsrv.h>
198 /* Our thread pool */
199 GThreadPool *tpool;
201 /* A work package for the thread pool functions */
202 struct work_package {
203 CLIENT* client;
204 struct nbd_request* req;
205 int pipefd[2];
206 void* data; /**< for read requests */
209 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
210 SIGCHLD handler
211 to mark a child
212 exit */
214 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
215 SIGTERM handler
216 to mark a exit
217 request */
219 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
220 handler to mark a
221 reconfiguration
222 request */
224 GArray* modernsocks; /**< Sockets for the modern handler. Not used
225 if a client was only specified on the
226 command line; only port used if
227 oldstyle is set to false (and then the
228 command-line client isn't used, gna gna).
229 This may be more than one socket on
230 systems that don't support serving IPv4
231 and IPv6 from the same socket (like,
232 e.g., FreeBSD) */
233 GArray* childsocks; /**< parent-side sockets for communication with children */
234 int commsocket; /**< child-side socket for communication with parent */
235 static sem_t file_wait_sem;
237 bool logged_oversized=false; /**< whether we logged oversized requests already */
240 * Type of configuration file values
242 typedef enum {
243 PARAM_INT, /**< This parameter is an integer */
244 PARAM_INT64, /**< This parameter is an integer */
245 PARAM_STRING, /**< This parameter is a string */
246 PARAM_BOOL, /**< This parameter is a boolean */
247 } PARAM_TYPE;
250 * Configuration file values
252 typedef struct {
253 gchar *paramname; /**< Name of the parameter, as it appears in
254 the config file */
255 gboolean required; /**< Whether this is a required (as opposed to
256 optional) parameter */
257 PARAM_TYPE ptype; /**< Type of the parameter. */
258 gpointer target; /**< Pointer to where the data of this
259 parameter should be written. If ptype is
260 PARAM_BOOL, the data is or'ed rather than
261 overwritten. */
262 gint flagval; /**< Flag mask for this parameter in case ptype
263 is PARAM_BOOL. */
264 } PARAM;
267 * Configuration file values of the "generic" section
269 struct generic_conf {
270 gchar *user; /**< user we run the server as */
271 gchar *group; /**< group we run running as */
272 gchar *modernaddr; /**< address of the modern socket */
273 gchar *modernport; /**< port of the modern socket */
274 gchar *unixsock; /**< file name of the unix domain socket */
275 gchar *certfile; /**< certificate file */
276 gchar *keyfile; /**< key file */
277 gchar *cacertfile; /**< CA certificate file */
278 gchar *tlsprio; /**< TLS priority string */
279 gint flags; /**< global flags */
280 gint threads; /**< maximum number of parallel threads we want to run */
284 * Translate a command name into human readable form
286 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
287 * @return pointer to the command name
289 static inline const char * getcommandname(uint64_t command) {
290 switch (command) {
291 case NBD_CMD_READ:
292 return "NBD_CMD_READ";
293 case NBD_CMD_WRITE:
294 return "NBD_CMD_WRITE";
295 case NBD_CMD_DISC:
296 return "NBD_CMD_DISC";
297 case NBD_CMD_FLUSH:
298 return "NBD_CMD_FLUSH";
299 case NBD_CMD_TRIM:
300 return "NBD_CMD_TRIM";
301 case NBD_CMD_WRITE_ZEROES:
302 return "NBD_CMD_WRITE_ZEROES";
303 default:
304 return "UNKNOWN";
308 #if HAVE_GNUTLS
309 static int writeit_tls(gnutls_session_t s, void *buf, size_t len) {
310 ssize_t res;
311 char *m;
312 while(len > 0) {
313 DEBUG("+");
314 if ((res = gnutls_record_send(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
315 m = g_strdup_printf("issue while sending data: %s", gnutls_strerror(res));
316 err_nonfatal(m);
317 g_free(m);
318 } else if(res < 0) {
319 m = g_strdup_printf("could not send data: %s", gnutls_strerror(res));
320 err_nonfatal(m);
321 g_free(m);
322 return -1;
323 } else {
324 len -= res;
325 buf += res;
328 return 0;
331 static int readit_tls(gnutls_session_t s, void *buf, size_t len) {
332 ssize_t res;
333 char *m;
334 while(len > 0) {
335 DEBUG("*");
336 if((res = gnutls_record_recv(s, buf, len)) < 0 && !gnutls_error_is_fatal(res)) {
337 m = g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res));
338 err_nonfatal(m);
339 g_free(m);
340 } else if(res < 0) {
341 m = g_strdup_printf("could not receive data: %s", gnutls_strerror(res));
342 err_nonfatal(m);
343 g_free(m);
344 return -1;
345 } else {
346 len -= res;
347 buf += res;
350 return 0;
353 static int socket_read_tls(CLIENT* client, void *buf, size_t len) {
354 return readit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
357 static int socket_write_tls(CLIENT* client, void *buf, size_t len) {
358 return writeit_tls(*((gnutls_session_t*)client->tls_session), buf, len);
360 #endif // HAVE_GNUTLS
362 static int socket_read_notls(CLIENT* client, void *buf, size_t len) {
363 return readit(client->net, buf, len);
366 static int socket_write_notls(CLIENT* client, void *buf, size_t len) {
367 return writeit(client->net, buf, len);
370 static void socket_read(CLIENT* client, void *buf, size_t len) {
371 g_assert(client->socket_read != NULL);
372 if(client->socket_read(client, buf, len)<0) {
373 g_assert(client->socket_closed != NULL);
374 client->socket_closed(client);
379 * Consume data from a socket that we don't want
381 * @param c the client to read from
382 * @param len the number of bytes to consume
383 * @param buf a buffer
384 * @param bufsiz the size of the buffer
386 static inline void consume(CLIENT* c, size_t len, void * buf, size_t bufsiz) {
387 size_t curlen;
388 while (len>0) {
389 curlen = (len>bufsiz)?bufsiz:len;
390 socket_read(c, buf, curlen);
391 len -= curlen;
396 * Consume a length field and corresponding payload that we don't want
398 * @param c the client to read from
400 static inline void consume_len(CLIENT* c) {
401 uint32_t len;
402 char buf[1024];
404 socket_read(c, &len, sizeof(len));
405 len = ntohl(len);
406 consume(c, len, buf, sizeof(buf));
409 static void socket_write(CLIENT* client, void *buf, size_t len) {
410 g_assert(client->socket_write != NULL);
411 if(client->socket_write(client, buf, len)<0) {
412 g_assert(client->socket_closed != NULL);
413 client->socket_closed(client);
417 static inline void socket_closed_negotiate(CLIENT* client) {
418 err("Negotiation failed: %m");
422 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
423 * options
425 * @param command the command to be ran. Read from the config file
426 * @param file the file name we're about to export
428 int do_run(gchar* command, gchar* file) {
429 gchar* cmd;
430 int retval=0;
432 if(command && *command) {
433 cmd = g_strdup_printf(command, file);
434 retval=system(cmd);
435 g_free(cmd);
437 return retval;
440 static inline void finalize_client(CLIENT* client) {
441 g_thread_pool_free(tpool, FALSE, TRUE);
442 do_run(client->server->postrun, client->exportname);
443 if(client->transactionlogfd != -1) {
444 close(client->transactionlogfd);
445 client->transactionlogfd = -1;
447 if(client->server->flags & F_COPYONWRITE) {
448 unlink(client->difffilename);
450 serve_dec_ref(client->server);
453 static inline void socket_closed_transmission(CLIENT* client) {
454 int saved_errno = errno;
455 finalize_client(client);
456 errno = saved_errno;
457 err("Connection dropped: %m");
460 #ifdef HAVE_SPLICE
462 * Splice data between a pipe and a file descriptor
464 * @param fd_in The fd to splice from.
465 * @param off_in The fd_in offset to splice from.
466 * @param fd_out The fd to splice to.
467 * @param off_out The fd_out offset to splice to.
468 * @param len The length to splice.
470 static inline void spliceit(int fd_in, loff_t *off_in, int fd_out,
471 loff_t *off_out, size_t len)
473 ssize_t ret;
474 while (len > 0) {
475 if ((ret = splice(fd_in, off_in, fd_out, off_out, len,
476 SPLICE_F_MOVE)) <= 0)
477 err("Splice failed: %m");
478 len -= ret;
481 #endif
484 * Print out a message about how to use nbd-server. Split out to a separate
485 * function so that we can call it from multiple places
487 void usage() {
488 printf("This is nbd-server version " VERSION "\n");
489 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
490 "\t-r|--read-only\t\tread only\n"
491 "\t-m|--multi-file\t\tmultiple file\n"
492 "\t-c|--copy-on-write\tcopy on write\n"
493 "\t-C|--config-file\tspecify an alternate configuration file\n"
494 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
495 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
496 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
497 "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
498 "\t-V|--version\toutput the version and exit\n\n"
499 "\tif port is set to 0, stdin is used (for running from inetd).\n"
500 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
501 "\t\taddress of the machine trying to connect\n"
502 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
503 printf("Using configuration file %s\n", CFILE);
504 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT);
507 /* Dumps a config file section of the given SERVER*, and exits. */
508 void dump_section(SERVER* serve, gchar* section_header) {
509 printf("[%s]\n", section_header);
510 printf("\texportname = %s\n", serve->exportname);
511 printf("\tlistenaddr = %s\n", serve->listenaddr);
512 if(serve->flags & F_READONLY) {
513 printf("\treadonly = true\n");
515 if(serve->flags & F_MULTIFILE) {
516 printf("\tmultifile = true\n");
518 if(serve->flags & F_TREEFILES) {
519 printf("\ttreefiles = true\n");
521 if(serve->flags & F_COPYONWRITE) {
522 printf("\tcopyonwrite = true\n");
524 if(serve->expected_size) {
525 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
527 if(serve->authname) {
528 printf("\tauthfile = %s\n", serve->authname);
530 exit(EXIT_SUCCESS);
534 * Parse the command line.
536 * @param argc the argc argument to main()
537 * @param argv the argv argument to main()
539 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) {
540 int i=0;
541 int nonspecial=0;
542 int c;
543 struct option long_options[] = {
544 {"read-only", no_argument, NULL, 'r'},
545 {"multi-file", no_argument, NULL, 'm'},
546 {"copy-on-write", no_argument, NULL, 'c'},
547 {"dont-fork", no_argument, NULL, 'd'},
548 {"authorize-file", required_argument, NULL, 'l'},
549 {"config-file", required_argument, NULL, 'C'},
550 {"pid-file", required_argument, NULL, 'p'},
551 {"output-config", required_argument, NULL, 'o'},
552 {"max-connection", required_argument, NULL, 'M'},
553 {"version", no_argument, NULL, 'V'},
554 {0,0,0,0}
556 SERVER *serve;
557 off_t es;
558 size_t last;
559 char suffix;
560 bool do_output=false;
561 gchar* section_header="";
562 gchar** addr_port;
564 if(argc==1) {
565 return NULL;
567 serve=serve_inc_ref((SERVER*)g_new0(SERVER, 1));
568 serve->authname = g_strdup(default_authname);
569 serve->virtstyle=VIRT_IPLIT;
570 while((c=getopt_long(argc, argv, "-C:cwdl:mo:rp:M:V", long_options, &i))>=0) {
571 switch (c) {
572 case 1:
573 /* non-option argument */
574 switch(nonspecial++) {
575 case 0:
576 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
577 addr_port=g_strsplit(optarg, ":", 2);
579 /* Check for "@" - maybe user using this separator
580 for IPv4 address */
581 if(!addr_port[1]) {
582 g_strfreev(addr_port);
583 addr_port=g_strsplit(optarg, "@", 2);
585 } else {
586 addr_port=g_strsplit(optarg, "@", 2);
589 if(addr_port[1]) {
590 genconf->modernport=g_strdup(addr_port[1]);
591 genconf->modernaddr=g_strdup(addr_port[0]);
592 } else {
593 g_free(genconf->modernaddr);
594 genconf->modernaddr=NULL;
595 genconf->modernport=g_strdup(addr_port[0]);
597 g_strfreev(addr_port);
598 break;
599 case 1:
600 serve->exportname = g_strdup(optarg);
601 if(serve->exportname[0] != '/') {
602 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
603 exit(EXIT_FAILURE);
605 break;
606 case 2:
607 last=strlen(optarg)-1;
608 suffix=optarg[last];
609 if (suffix == 'k' || suffix == 'K' ||
610 suffix == 'm' || suffix == 'M')
611 optarg[last] = '\0';
612 es = (off_t)atoll(optarg);
613 switch (suffix) {
614 case 'm':
615 case 'M': es <<= 10;
616 case 'k':
617 case 'K': es <<= 10;
618 default : break;
620 serve->expected_size = es;
621 break;
623 break;
624 case 'r':
625 serve->flags |= F_READONLY;
626 break;
627 case 'm':
628 serve->flags |= F_MULTIFILE;
629 break;
630 case 'o':
631 do_output = true;
632 section_header = g_strdup(optarg);
633 break;
634 case 'p':
635 strncpy(pidfname, optarg, 256);
636 pidfname[255]='\0';
637 break;
638 case 'c':
639 serve->flags |=F_COPYONWRITE;
640 break;
641 case 'd':
642 dontfork = 1;
643 break;
644 case 'C':
645 g_free(config_file_pos);
646 config_file_pos=g_strdup(optarg);
647 break;
648 case 'l':
649 g_free(serve->authname);
650 serve->authname=g_strdup(optarg);
651 break;
652 case 'M':
653 serve->max_connections = strtol(optarg, NULL, 0);
654 break;
655 case 'V':
656 printf("This is nbd-server version " VERSION "\n");
657 exit(EXIT_SUCCESS);
658 break;
659 default:
660 usage();
661 exit(EXIT_FAILURE);
662 break;
665 /* What's left: the port to export, the name of the to be exported
666 * file, and, optionally, the size of the file, in that order. */
667 if(nonspecial<2) {
668 serve=serve_dec_ref(serve);
669 } else {
670 serve->servename = "";
672 if(do_output) {
673 if(!serve) {
674 g_critical("Need a complete configuration on the command line to output a config file section!");
675 exit(EXIT_FAILURE);
677 dump_section(serve, section_header);
679 return serve;
682 /* forward definition of parse_cfile */
683 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
685 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
686 #define NBD_D_TYPE de->d_type
687 #else
688 #define NBD_D_TYPE 0
689 #define DT_UNKNOWN 0
690 #define DT_REG 1
691 #endif
694 * Parse config file snippets in a directory. Uses readdir() and friends
695 * to find files and open them, then passes them on to parse_cfile
696 * with have_global set false
698 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
699 DIR* dirh = opendir(dir);
700 struct dirent* de;
701 gchar* fname;
702 GArray* retval = NULL;
703 GArray* tmp;
704 struct stat stbuf;
706 if(!dirh) {
707 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
708 return NULL;
710 errno=0;
711 while((de = readdir(dirh))) {
712 int saved_errno=errno;
713 fname = g_build_filename(dir, de->d_name, NULL);
714 switch(NBD_D_TYPE) {
715 case DT_UNKNOWN:
716 /* Filesystem doesn't return type of
717 * file through readdir, or struct dirent
718 * doesn't have d_type. Run stat() on the file
719 * instead */
720 if(stat(fname, &stbuf)) {
721 perror("stat");
722 goto err_out;
724 if (!S_ISREG(stbuf.st_mode)) {
725 goto next;
727 case DT_REG:
728 /* Skip unless the name ends with '.conf' */
729 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
730 goto next;
732 tmp = parse_cfile(fname, genconf, false, e);
733 errno=saved_errno;
734 if(*e) {
735 goto err_out;
737 if(!retval)
738 retval = g_array_new(FALSE, TRUE, sizeof(SERVER*));
739 retval = g_array_append_vals(retval, tmp->data, tmp->len);
740 g_array_free(tmp, TRUE);
741 default:
742 break;
744 next:
745 g_free(fname);
747 if(errno) {
748 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
749 err_out:
750 if(retval)
751 g_array_free(retval, TRUE);
752 retval = NULL;
754 if(dirh)
755 closedir(dirh);
756 return retval;
760 * Parse the config file.
762 * @param f the name of the config file
764 * @param genconf a pointer to generic configuration which will get
765 * updated with parsed values. If NULL, then parsed generic
766 * configuration values are safely and silently discarded.
768 * @param e a GError. Error code can be any of the following:
769 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
770 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
771 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
773 * @param expect_generic if true, we expect a configuration file that
774 * contains a [generic] section. If false, we don't.
776 * @return a GArray of SERVER* pointers. If the config file is empty or does not
777 * exist, returns an empty GArray; if the config file contains an
778 * error, returns NULL, and e is set appropriately
780 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
781 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
782 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
783 gchar* cfdir = NULL;
784 SERVER s;
785 gchar *virtstyle=NULL;
786 PARAM lp[] = {
787 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
788 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
789 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
790 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
791 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
792 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
793 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
794 { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 },
795 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
796 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
797 { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES },
798 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
799 { "waitfile", FALSE, PARAM_BOOL, &(s.flags), F_WAIT },
800 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
801 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
802 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
803 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
804 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
805 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
806 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
807 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
808 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
809 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
810 { "force_tls", FALSE, PARAM_BOOL, &(s.flags), F_FORCEDTLS },
811 { "splice", FALSE, PARAM_BOOL, &(s.flags), F_SPLICE},
813 const int lp_size=sizeof(lp)/sizeof(PARAM);
814 struct generic_conf genconftmp;
815 PARAM gp[] = {
816 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
817 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
818 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
819 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
820 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
821 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
822 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
823 { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 },
824 { "duallisten", FALSE, PARAM_BOOL, &(genconftmp.flags), F_DUAL_LISTEN }, // Used to listen on both TCP and unix socket
825 { "max_threads", FALSE, PARAM_INT, &(genconftmp.threads), 0 },
826 { "force_tls", FALSE, PARAM_BOOL, &(genconftmp.flags), F_FORCEDTLS },
827 { "certfile", FALSE, PARAM_STRING, &(genconftmp.certfile), 0 },
828 { "keyfile", FALSE, PARAM_STRING, &(genconftmp.keyfile), 0 },
829 { "cacertfile", FALSE, PARAM_STRING, &(genconftmp.cacertfile), 0 },
830 { "tlsprio", FALSE, PARAM_STRING, &(genconftmp.tlsprio), 0 },
832 PARAM* p=gp;
833 int p_size=sizeof(gp)/sizeof(PARAM);
834 GKeyFile *cfile;
835 GError *err = NULL;
836 const char *err_msg=NULL;
837 GArray *retval=NULL;
838 gchar **groups;
839 gboolean bval;
840 gint ival;
841 gint64 i64val;
842 gchar* sval;
843 gchar* startgroup;
844 gint i;
845 gint j;
847 memset(&genconftmp, 0, sizeof(struct generic_conf));
849 genconftmp.tlsprio = "NORMAL:-VERS-TLS-ALL:+VERS-TLS1.2:%SERVER_PRECEDENCE";
851 if (genconf) {
852 /* Use the passed configuration values as defaults. The
853 * parsing algorithm below updates all parameter targets
854 * found from configuration files. */
855 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
858 cfile = g_key_file_new();
859 retval = g_array_new(FALSE, TRUE, sizeof(SERVER*));
860 if(expect_generic) {
861 g_array_set_clear_func(retval, (GDestroyNotify)serve_dec_ref);
863 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
864 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
865 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
866 f, err->message);
867 g_key_file_free(cfile);
868 return retval;
870 startgroup = g_key_file_get_start_group(cfile);
871 if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
872 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
873 g_key_file_free(cfile);
874 return NULL;
876 groups = g_key_file_get_groups(cfile, NULL);
877 for(i=0;groups[i];i++) {
878 memset(&s, '\0', sizeof(SERVER));
880 /* After the [generic] group or when we're parsing an include
881 * directory, start parsing exports */
882 if(i==1 || !expect_generic) {
883 p=lp;
884 p_size=lp_size;
886 for(j=0;j<p_size;j++) {
887 assert(p[j].target != NULL);
888 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
889 switch(p[j].ptype) {
890 case PARAM_INT:
891 ival = g_key_file_get_integer(cfile,
892 groups[i],
893 p[j].paramname,
894 &err);
895 if(!err) {
896 *((gint*)p[j].target) = ival;
898 break;
899 case PARAM_INT64:
900 i64val = g_key_file_get_int64(cfile,
901 groups[i],
902 p[j].paramname,
903 &err);
904 if(!err) {
905 *((gint64*)p[j].target) = i64val;
907 break;
908 case PARAM_STRING:
909 sval = g_key_file_get_string(cfile,
910 groups[i],
911 p[j].paramname,
912 &err);
913 if(!err) {
914 *((gchar**)p[j].target) = sval;
916 break;
917 case PARAM_BOOL:
918 bval = g_key_file_get_boolean(cfile,
919 groups[i],
920 p[j].paramname, &err);
921 if(!err) {
922 if(bval) {
923 *((gint*)p[j].target) |= p[j].flagval;
924 } else {
925 *((gint*)p[j].target) &= ~(p[j].flagval);
928 break;
930 if(err) {
931 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
932 if(!p[j].required) {
933 /* Ignore not-found error for optional values */
934 g_clear_error(&err);
935 continue;
936 } else {
937 err_msg = MISSING_REQUIRED_ERROR;
939 } else {
940 err_msg = DEFAULT_ERROR;
942 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
943 g_array_free(retval, TRUE);
944 g_error_free(err);
945 g_key_file_free(cfile);
946 return NULL;
949 if(virtstyle) {
950 if(!strncmp(virtstyle, "none", 4)) {
951 s.virtstyle=VIRT_NONE;
952 } else if(!strncmp(virtstyle, "ipliteral", 9)) {
953 s.virtstyle=VIRT_IPLIT;
954 } else if(!strncmp(virtstyle, "iphash", 6)) {
955 s.virtstyle=VIRT_IPHASH;
956 } else if(!strncmp(virtstyle, "cidrhash", 8)) {
957 s.virtstyle=VIRT_CIDR;
958 if(strlen(virtstyle)<10) {
959 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
960 g_array_free(retval, TRUE);
961 g_key_file_free(cfile);
962 return NULL;
964 s.cidrlen=strtol(virtstyle+8, NULL, 0);
965 } else {
966 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
967 g_array_free(retval, TRUE);
968 g_key_file_free(cfile);
969 return NULL;
971 } else {
972 s.virtstyle=VIRT_IPLIT;
974 if(genconftmp.flags & F_OLDSTYLE) {
975 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
976 g_message("Exiting.");
977 return NULL;
979 #ifndef HAVE_SPLICE
980 if (s.flags & F_SPLICE) {
981 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without splice support, yet group %s uses it", groups[i]);
982 g_array_free(retval, TRUE);
983 g_key_file_free(cfile);
984 return NULL;
986 #endif
987 /* We can't mix copyonwrite and splice. */
988 if ((s.flags & F_COPYONWRITE) && (s.flags & F_SPLICE)) {
989 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_SPLICE,
990 "Cannot mix copyonwrite with splice for an export in group %s",
991 groups[i]);
992 g_array_free(retval, TRUE);
993 g_key_file_free(cfile);
994 return NULL;
996 if ((s.flags & F_COPYONWRITE) && (s.flags & F_WAIT)) {
997 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_INVALID_WAIT,
998 "Cannot mix copyonwrite with waitfile for an export in group %s",
999 groups[i]);
1000 g_array_free(retval, TRUE);
1001 g_key_file_free(cfile);
1002 return NULL;
1004 /* Don't need to free this, it's not our string */
1005 virtstyle=NULL;
1006 /* Don't append values for the [generic] group */
1007 if(i>0 || !expect_generic) {
1008 s.servename = groups[i];
1010 SERVER *srv = serve_inc_ref(g_memdup(&s, sizeof(SERVER)));
1011 g_array_append_val(retval, srv);
1013 #ifndef WITH_SDP
1014 if(s.flags & F_SDP) {
1015 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
1016 g_array_free(retval, TRUE);
1017 g_key_file_free(cfile);
1018 return NULL;
1020 #endif
1022 g_key_file_free(cfile);
1023 if(cfdir) {
1024 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
1025 if(extra) {
1026 retval = g_array_append_vals(retval, extra->data, extra->len);
1027 i+=extra->len;
1028 g_array_free(extra, TRUE);
1029 } else {
1030 if(*e) {
1031 g_array_free(retval, TRUE);
1032 return NULL;
1036 if(i==1 && expect_generic) {
1037 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
1040 if (genconf) {
1041 /* Return the updated generic configuration through the
1042 * pointer parameter. */
1043 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
1046 return retval;
1050 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1051 * main loop of the root server process. This allows us to separate the signal
1052 * catching from th actual task triggered by SIGCHLD and hence processing in the
1053 * interrupt context is kept as minimial as possible.
1055 * @param s the signal we're handling (must be SIGCHLD, or something
1056 * is severely wrong)
1058 static void sigchld_handler(const int s G_GNUC_UNUSED) {
1059 is_sigchld_caught = 1;
1063 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1065 * @param key the key
1066 * @param value the value corresponding to the above key
1067 * @param user_data a pointer which we always set to 1, so that we know what
1068 * will happen next.
1070 void killchild(gpointer key, gpointer value, gpointer user_data) {
1071 pid_t *pid=value;
1073 kill(*pid, SIGTERM);
1077 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1078 * main loop of the root server process. This allows us to separate the signal
1079 * catching from th actual task triggered by SIGTERM and hence processing in the
1080 * interrupt context is kept as minimial as possible.
1082 * @param s the signal we're handling (must be SIGTERM, or something
1083 * is severely wrong).
1085 static void sigterm_handler(const int s G_GNUC_UNUSED) {
1086 is_sigterm_caught = 1;
1090 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1091 * the main loop of the root server process. This allows us to separate
1092 * the signal catching from th actual task triggered by SIGHUP and hence
1093 * processing in the interrupt context is kept as minimial as possible.
1095 * @param s the signal we're handling (must be SIGHUP, or something
1096 * is severely wrong).
1098 static void sighup_handler(const int s G_GNUC_UNUSED) {
1099 is_sighup_caught = 1;
1102 static void sigusr1_handler(const int s G_GNUC_UNUSED) {
1103 msg(LOG_INFO, "Got SIGUSR1");
1104 sem_post(&file_wait_sem);
1108 * Get the file handle and offset, given an export offset.
1110 * @param client The client we're serving for
1111 * @param a The offset to get corresponding file/offset for
1112 * @param fhandle [out] File descriptor
1113 * @param foffset [out] Offset into fhandle
1114 * @param maxbytes [out] Tells how many bytes can be read/written
1115 * from fhandle starting at foffset (0 if there is no limit)
1116 * @return 0 on success, -1 on failure
1118 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1119 GArray * const export = client->export;
1121 /* Negative offset not allowed */
1122 if(a < 0)
1123 return -1;
1125 /* Open separate file for treefiles */
1126 if (client->server->flags & F_TREEFILES) {
1127 *foffset = a % TREEPAGESIZE;
1128 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
1129 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock);
1130 return 0;
1133 /* Binary search for last file with starting offset <= a */
1134 FILE_INFO fi;
1135 int start = 0;
1136 int end = export->len - 1;
1137 while( start <= end ) {
1138 int mid = (start + end) / 2;
1139 fi = g_array_index(export, FILE_INFO, mid);
1140 if( fi.startoff < a ) {
1141 start = mid + 1;
1142 } else if( fi.startoff > a ) {
1143 end = mid - 1;
1144 } else {
1145 start = end = mid;
1146 break;
1150 /* end should never go negative, since first startoff is 0 and a >= 0 */
1151 assert(end >= 0);
1153 fi = g_array_index(export, FILE_INFO, end);
1154 *fhandle = fi.fhandle;
1155 *foffset = a - fi.startoff;
1156 *maxbytes = 0;
1157 if( end+1 < export->len ) {
1158 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1159 *maxbytes = fi_next.startoff - a;
1162 return 0;
1166 * Write an amount of bytes at a given offset to the right file. This
1167 * abstracts the write-side of the multiple file option.
1169 * @param a The offset where the write should start
1170 * @param buf The buffer to write from
1171 * @param len The length of buf
1172 * @param client The client we're serving for
1173 * @param fua Flag to indicate 'Force Unit Access'
1174 * @return The number of bytes actually written, or -1 in case of an error
1176 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1177 int fhandle;
1178 off_t foffset;
1179 size_t maxbytes;
1180 ssize_t retval;
1182 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1183 return -1;
1184 if(maxbytes && len > maxbytes)
1185 len = maxbytes;
1187 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1189 retval = pwrite(fhandle, buf, len, foffset);
1190 if(client->server->flags & F_SYNC) {
1191 fsync(fhandle);
1192 } else if (fua) {
1194 /* This is where we would do the following
1195 * #ifdef USE_SYNC_FILE_RANGE
1196 * However, we don't, for the reasons set out below
1197 * by Christoph Hellwig <hch@infradead.org>
1199 * [BEGINS]
1200 * fdatasync is equivalent to fsync except that it does not flush
1201 * non-essential metadata (basically just timestamps in practice), but it
1202 * does flush metadata requried to find the data again, e.g. allocation
1203 * information and extent maps. sync_file_range does nothing but flush
1204 * out pagecache content - it means you basically won't get your data
1205 * back in case of a crash if you either:
1207 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1208 * b) are using a sparse file on a filesystem
1209 * c) are using a fallocate-preallocated file on a filesystem
1210 * d) use any file on a COW filesystem like btrfs
1212 * e.g. it only does anything useful for you if you do not have a volatile
1213 * write cache, and either use a raw block device node, or just overwrite
1214 * an already fully allocated (and not preallocated) file on a non-COW
1215 * filesystem.
1216 * [ENDS]
1218 * What we should do is open a second FD with O_DSYNC set, then write to
1219 * that when appropriate. However, with a Linux client, every REQ_FUA
1220 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1221 * problems.
1224 #if 0
1225 sync_file_range(fhandle, foffset, len,
1226 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1227 SYNC_FILE_RANGE_WAIT_AFTER);
1228 #else
1229 fdatasync(fhandle);
1230 #endif
1232 /* close file pointer in case of treefiles */
1233 if (client->server->flags & F_TREEFILES) {
1234 close(fhandle);
1236 return retval;
1240 * Call rawexpwrite repeatedly until all data has been written.
1242 * @param a The offset where the write should start
1243 * @param buf The buffer to write from
1244 * @param len The length of buf
1245 * @param client The client we're serving for
1246 * @param fua Flag to indicate 'Force Unit Access'
1247 * @return 0 on success, nonzero on failure
1249 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1250 ssize_t ret=0;
1252 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1253 a += ret;
1254 buf += ret;
1255 len -= ret;
1257 return (ret < 0 || len != 0);
1261 * Read an amount of bytes at a given offset from the right file. This
1262 * abstracts the read-side of the multiple files option.
1264 * @param a The offset where the read should start
1265 * @param buf A buffer to read into
1266 * @param len The size of buf
1267 * @param client The client we're serving for
1268 * @return The number of bytes actually read, or -1 in case of an
1269 * error.
1271 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1272 int fhandle;
1273 off_t foffset;
1274 size_t maxbytes;
1275 ssize_t retval;
1277 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1278 return -1;
1279 if(maxbytes && len > maxbytes)
1280 len = maxbytes;
1282 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1284 retval = pread(fhandle, buf, len, foffset);
1285 if (client->server->flags & F_TREEFILES) {
1286 close(fhandle);
1288 return retval;
1292 * Call rawexpread repeatedly until all data has been read.
1293 * @return 0 on success, nonzero on failure
1295 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1296 ssize_t ret=0;
1298 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1299 a += ret;
1300 buf += ret;
1301 len -= ret;
1303 return (ret < 0 || len != 0);
1306 #ifdef HAVE_SPLICE
1307 int rawexpsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir,
1308 int fua)
1310 int fhandle;
1311 off_t foffset;
1312 size_t maxbytes;
1313 ssize_t retval;
1315 if (get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1316 return -1;
1317 if (maxbytes && len > maxbytes)
1318 len = maxbytes;
1320 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1321 (dir == SPLICE_IN) ? "from" : "to", fhandle,
1322 (unsigned long long)a, (unsigned)len);
1325 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1326 * it might, so go ahead and use it.
1328 if (dir == SPLICE_IN) {
1329 retval = splice(fhandle, &foffset, pipe, NULL, len,
1330 SPLICE_F_MOVE);
1331 } else {
1332 retval = splice(pipe, NULL, fhandle, &foffset, len,
1333 SPLICE_F_MOVE);
1334 if (client->server->flags & F_SYNC)
1335 fsync(fhandle);
1336 else if (fua)
1337 fdatasync(fhandle);
1339 if (client->server->flags & F_TREEFILES)
1340 close(fhandle);
1341 return retval;
1345 * Splice an amount of bytes from the given offset from/into the right file
1346 * from/into the given pipe.
1347 * @param pipe The pipe we are using for this splice.
1348 * @param a The offset of the file we are operating on.
1349 * @param len The length of the splice.
1350 * @param client The client we're splicing for.
1351 * @param dir The direction we are doing the splice in.
1352 * @param fua Set if this is a write and we need to fua.
1353 * @return 0 on success, nonzero on failure.
1355 int expsplice(int pipe, off_t a, size_t len, CLIENT *client, int dir, int fua)
1357 ssize_t ret;
1359 while (len > 0 &&
1360 (ret = rawexpsplice(pipe, a, len, client, dir, fua)) > 0) {
1361 a += ret;
1362 len -= ret;
1364 return (ret < 0 || len != 0);
1366 #endif /* HAVE_SPLICE */
1369 * Read an amount of bytes at a given offset from the right file. This
1370 * abstracts the read-side of the copyonwrite stuff, and calls
1371 * rawexpread() with the right parameters to do the actual work.
1372 * @param a The offset where the read should start
1373 * @param buf A buffer to read into
1374 * @param len The size of buf
1375 * @param client The client we're going to read for
1376 * @return 0 on success, nonzero on failure
1378 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1379 off_t rdlen, offset;
1380 off_t mapcnt, mapl, maph, pagestart;
1382 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1384 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1385 return(rawexpread_fully(a, buf, len, client));
1387 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1389 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1390 pagestart=mapcnt*DIFFPAGESIZE;
1391 offset=a-pagestart;
1392 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1393 len : (size_t)DIFFPAGESIZE-offset;
1394 if (!(client->server->flags & F_COPYONWRITE))
1395 pthread_rwlock_rdlock(&client->export_lock);
1396 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1397 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1398 (unsigned long)(client->difmap[mapcnt]));
1399 if (pread(client->difffile, buf, rdlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != rdlen) goto fail;
1400 } else { /* the block is not there */
1401 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1402 DEBUG("Page %llu is not here, and waiting for file\n",
1403 (unsigned long long)mapcnt);
1404 goto fail;
1405 } else {
1406 DEBUG("Page %llu is not here, we read the original one\n",
1407 (unsigned long long)mapcnt);
1408 if(rawexpread_fully(a, buf, rdlen, client)) goto fail;
1411 if (!(client->server->flags & F_COPYONWRITE))
1412 pthread_rwlock_unlock(&client->export_lock);
1413 len-=rdlen; a+=rdlen; buf+=rdlen;
1415 return 0;
1416 fail:
1417 if (!(client->server->flags & F_COPYONWRITE))
1418 pthread_rwlock_unlock(&client->export_lock);
1419 return -1;
1423 * Write an amount of bytes at a given offset to the right file. This
1424 * abstracts the write-side of the copyonwrite option, and calls
1425 * rawexpwrite() with the right parameters to do the actual work.
1427 * @param a The offset where the write should start
1428 * @param buf The buffer to write from
1429 * @param len The length of buf
1430 * @param client The client we're going to write for.
1431 * @param fua Flag to indicate 'Force Unit Access'
1432 * @return 0 on success, nonzero on failure
1434 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1435 char pagebuf[DIFFPAGESIZE];
1436 off_t mapcnt,mapl,maph;
1437 off_t wrlen,rdlen;
1438 off_t pagestart;
1439 off_t offset;
1441 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1444 if (!(client->server->flags & F_COPYONWRITE) && !((client->server->flags & F_WAIT) && (client->export == NULL)))
1445 return(rawexpwrite_fully(a, buf, len, client, fua));
1447 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1449 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1450 pagestart=mapcnt*DIFFPAGESIZE ;
1451 offset=a-pagestart ;
1452 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1453 len : (size_t)DIFFPAGESIZE-offset;
1455 if (!(client->server->flags & F_COPYONWRITE))
1456 pthread_rwlock_rdlock(&client->export_lock);
1457 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1458 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1459 (unsigned long)(client->difmap[mapcnt])) ;
1460 if (pwrite(client->difffile, buf, wrlen, client->difmap[mapcnt]*DIFFPAGESIZE+offset) != wrlen) goto fail;
1461 } else { /* the block is not there */
1462 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1463 DEBUG("Page %llu is not here, we put it at %lu\n",
1464 (unsigned long long)mapcnt,
1465 (unsigned long)(client->difmap[mapcnt]));
1466 if ((offset != 0) || (wrlen != DIFFPAGESIZE)){
1467 if ((client->server->flags & F_WAIT) && (client->export == NULL)){
1468 DEBUG("error: we can write only whole page while waiting for file\n");
1469 goto fail;
1471 rdlen=DIFFPAGESIZE ;
1472 if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1473 goto fail;
1475 memcpy(pagebuf+offset,buf,wrlen) ;
1476 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != DIFFPAGESIZE)
1477 goto fail;
1479 if (!(client->server->flags & F_COPYONWRITE))
1480 pthread_rwlock_unlock(&client->export_lock);
1481 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1483 if (client->server->flags & F_SYNC) {
1484 fsync(client->difffile);
1485 } else if (fua) {
1486 /* open question: would it be cheaper to do multiple sync_file_ranges?
1487 as we iterate through the above?
1489 fdatasync(client->difffile);
1491 return 0;
1492 fail:
1493 if (!(client->server->flags & F_COPYONWRITE))
1494 pthread_rwlock_unlock(&client->export_lock);
1495 return -1;
1501 * Write an amount of zeroes at a given offset to the right file.
1502 * This routine could be optimised by not calling expwrite. However,
1503 * this is by far the simplest way to do it.
1505 * @param req the request
1506 * @param client The client we're going to write for.
1507 * @return 0 on success, nonzero on failure
1509 int expwrite_zeroes(struct nbd_request* req, CLIENT* client, int fua) {
1510 off_t a = req->from;
1511 size_t len = req->len;
1512 size_t maxsize = 64LL*1024LL*1024LL;
1513 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1514 char *buf = calloc (1, maxsize);
1515 int ret;
1516 while (len > 0) {
1517 size_t l = len;
1518 if (l > maxsize)
1519 l = maxsize;
1520 ret = expwrite(a, buf, l, client, fua);
1521 if (ret) {
1522 free(buf);
1523 return ret;
1525 len -= l;
1527 free(buf);
1528 return 0;
1532 * Flush data to a client
1534 * @param client The client we're going to write for.
1535 * @return 0 on success, nonzero on failure
1537 int expflush(CLIENT *client) {
1538 gint i;
1540 if (client->server->flags & F_COPYONWRITE) {
1541 return fsync(client->difffile);
1544 if (client->server->flags & F_WAIT) {
1545 return fsync(client->difffile);
1548 if (client->server->flags & F_TREEFILES ) {
1549 // all we can do is force sync the entire filesystem containing the tree
1550 if (client->server->flags & F_READONLY)
1551 return 0;
1552 sync();
1553 return 0;
1556 for (i = 0; i < client->export->len; i++) {
1557 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1558 if (fsync(fi.fhandle) < 0)
1559 return -1;
1562 return 0;
1565 void punch_hole(int fd, off_t off, off_t len) {
1566 DEBUG("Request to punch a hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len);
1567 errno = 0;
1568 // fallocate -- files, Linux
1569 #if HAVE_FALLOC_PH
1570 do {
1571 if(fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len) == 0)
1572 return;
1573 } while(errno == EINTR);
1574 #endif
1575 // ioctl(BLKDISCARD) -- block devices, Linux
1576 #if HAVE_BLKDISCARD
1577 uint64_t range[2] = {off, len};
1578 do {
1579 if(ioctl(fd, BLKDISCARD, range) == 0)
1580 return;
1581 } while(errno == EINTR);
1582 #endif
1583 // Windows
1584 #if HAVE_FSCTL_SET_ZERO_DATA
1585 FILE_ZERO_DATA_INFORMATION zerodata;
1586 zerodata.FileOffset.QuadPart = off;
1587 zerodata.BeyondFinalZero.QuadPart = off + len;
1588 HANDLE w32handle = (HANDLE)_get_osfhandle(fd);
1589 DWORD bytesret;
1590 DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL);
1591 return;
1592 #endif
1593 if(errno) {
1594 DEBUG("punching holes failed: %s", strerror(errno));
1595 } else {
1596 DEBUG("punching holes not supported on this platform\n");
1600 static void send_reply(CLIENT* client, uint32_t opt, uint32_t reply_type, ssize_t datasize, void* data) {
1601 struct {
1602 uint64_t magic;
1603 uint32_t opt;
1604 uint32_t reply_type;
1605 uint32_t datasize;
1606 } __attribute__ ((packed)) header = {
1607 htonll(0x3e889045565a9LL),
1608 htonl(opt),
1609 htonl(reply_type),
1610 htonl(datasize),
1612 if(datasize < 0) {
1613 datasize = strlen((char*)data);
1614 header.datasize = htonl(datasize);
1616 socket_write(client, &header, sizeof(header));
1617 if(data != NULL) {
1618 socket_write(client, data, datasize);
1623 * Find the name of the file we have to serve. This will use g_strdup_printf
1624 * to put the IP address of the client inside a filename containing
1625 * "%s" (in the form as specified by the "virtstyle" option). That name
1626 * is then written to client->exportname.
1628 * @param net A socket connected to an nbd client
1629 * @param client information about the client. The IP address in human-readable
1630 * format will be written to a new char* buffer, the address of which will be
1631 * stored in client->clientname.
1632 * @return: 0 - OK, -1 - failed.
1634 int set_peername(int net, CLIENT *client) {
1635 struct sockaddr_storage netaddr;
1636 struct sockaddr* addr = (struct sockaddr*)&netaddr;
1637 socklen_t addrinlen = sizeof( struct sockaddr_storage );
1638 struct addrinfo hints;
1639 struct addrinfo *ai = NULL;
1640 char peername[NI_MAXHOST];
1641 char netname[NI_MAXHOST];
1642 char *tmp = NULL;
1643 int i;
1644 int e;
1646 if (getsockname(net, addr, &addrinlen) < 0) {
1647 msg(LOG_INFO, "getsockname failed: %m");
1648 return -1;
1651 if(netaddr.ss_family == AF_UNIX) {
1652 client->clientaddr.ss_family = AF_UNIX;
1653 strcpy(peername, "unix");
1654 } else {
1655 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1656 msg(LOG_INFO, "getpeername failed: %m");
1657 return -1;
1659 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1660 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1661 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1662 return -1;
1665 memset(&hints, '\0', sizeof (hints));
1666 hints.ai_flags = AI_ADDRCONFIG;
1667 e = getaddrinfo(peername, NULL, &hints, &ai);
1669 if(e != 0) {
1670 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
1671 freeaddrinfo(ai);
1672 return -1;
1676 if(strncmp(peername, "::ffff:", 7) == 0) {
1677 memmove(peername, peername+7, strlen(peername));
1680 switch(client->server->virtstyle) {
1681 case VIRT_NONE:
1682 msg(LOG_DEBUG, "virtualization is off");
1683 client->exportname=g_strdup(client->server->exportname);
1684 break;
1685 case VIRT_IPHASH:
1686 msg(LOG_DEBUG, "virtstyle iphash");
1687 for(i=0;i<strlen(peername);i++) {
1688 if(peername[i]=='.') {
1689 peername[i]='/';
1692 case VIRT_IPLIT:
1693 msg(LOG_DEBUG, "virtstyle ipliteral");
1694 client->exportname=g_strdup_printf(client->server->exportname, peername);
1695 break;
1696 case VIRT_CIDR:
1697 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
1698 memcpy(&netaddr, &(client->clientaddr), addrinlen);
1699 int addrbits;
1700 if(client->clientaddr.ss_family == AF_UNIX) {
1701 tmp = g_strdup(peername);
1702 } else {
1703 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
1704 if(ai->ai_family == AF_INET) {
1705 addrbits = 32;
1706 } else if(ai->ai_family == AF_INET6) {
1707 addrbits = 128;
1708 } else {
1709 g_assert_not_reached();
1711 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
1712 for(int i = 0; i < addrbits; i+=8) {
1713 int masklen = client->server->cidrlen - i;
1714 masklen = masklen > 0 ? masklen : 0;
1715 uint8_t mask = getmaskbyte(masklen);
1716 *addrptr &= mask;
1717 addrptr++;
1719 getnameinfo((struct sockaddr *) &netaddr, addrinlen,
1720 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1721 tmp=g_strdup_printf("%s/%s", netname, peername);
1724 if(tmp != NULL) {
1725 client->exportname=g_strdup_printf(client->server->exportname, tmp);
1726 g_free(tmp);
1729 break;
1732 if(ai) {
1733 freeaddrinfo(ai);
1735 msg(LOG_INFO, "connect from %s, assigned file is %s",
1736 peername, client->exportname);
1737 client->clientname=g_strdup(peername);
1738 return 0;
1741 int commit_diff(CLIENT* client, bool lock, int fhandle){
1742 int dirtycount = 0;
1743 int pagecount = client->exportsize/DIFFPAGESIZE;
1744 off_t offset;
1745 char* buf = malloc(sizeof(char)*DIFFPAGESIZE);
1747 for (int i=0; i<pagecount; i++){
1748 offset = DIFFPAGESIZE*i;
1749 if (lock)
1750 pthread_rwlock_wrlock(&client->export_lock);
1751 if (client->difmap[i] != (u32)-1){
1752 dirtycount += 1;
1753 DEBUG("flushing dirty page %d, offset %ld\n", i, offset);
1754 if (pread(client->difffile, buf, DIFFPAGESIZE, client->difmap[i]*DIFFPAGESIZE) != DIFFPAGESIZE) {
1755 msg(LOG_WARNING, "could not read while committing diff: %m");
1756 if(lock) {
1757 pthread_rwlock_unlock(&client->export_lock);
1759 break;
1761 if (pwrite(fhandle, buf, DIFFPAGESIZE, offset) != DIFFPAGESIZE) {
1762 msg(LOG_WARNING, "could not write while committing diff: %m");
1763 if (lock) {
1764 pthread_rwlock_unlock(&client->export_lock);
1766 break;
1768 client->difmap[i] = (u32)-1;
1770 if (lock)
1771 pthread_rwlock_unlock(&client->export_lock);
1774 free(buf);
1775 return dirtycount;
1778 void* wait_file(void *void_ptr) {
1779 CLIENT* client = (CLIENT *)void_ptr;
1780 FILE_INFO fi;
1781 GArray* export;
1782 mode_t mode = O_RDWR;
1783 int dirtycount;
1785 fi.fhandle = -1;
1786 fi.startoff = 0;
1788 while (fi.fhandle < 1){
1789 sem_wait(&file_wait_sem);
1790 msg(LOG_INFO, "checking for file %s", client->server->exportname);
1791 fi.fhandle = open(client->server->exportname, mode);
1794 msg(LOG_INFO, "File %s appeared, fd %d", client->server->exportname, fi.fhandle);
1796 // first time there may be lot of data so we lock only per page
1797 do {
1798 dirtycount = commit_diff(client, true, fi.fhandle);
1799 } while (dirtycount > 0);
1801 //last time we lock export for the whole time until we switch write destination
1802 pthread_rwlock_wrlock(&client->export_lock);
1803 do {
1804 dirtycount = commit_diff(client, false, fi.fhandle);
1805 } while (dirtycount > 0);
1807 export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1808 g_array_append_val(export, fi);
1810 client->export = export;
1811 pthread_rwlock_unlock(&client->export_lock);
1812 msg(LOG_INFO, "Waiting for file ended, switching to exported file %s", client->server->exportname);
1814 return NULL;
1818 * Set up client export array, which is an array of FILE_INFO.
1819 * Also, split a single exportfile into multiple ones, if that was asked.
1820 * @param client information on the client which we want to setup export for
1822 bool setupexport(CLIENT* client) {
1823 int i = 0;
1824 off_t laststartoff = 0, lastsize = 0;
1825 int multifile = (client->server->flags & F_MULTIFILE);
1826 int treefile = (client->server->flags & F_TREEFILES);
1827 int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1828 int cancreate = (client->server->expected_size) && !multifile;
1830 if (treefile || (client->server->flags & F_WAIT)) {
1831 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
1832 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
1834 if(client->server->flags & F_WAIT){
1835 pthread_t wait_file_thread;
1836 if (pthread_create(&wait_file_thread, NULL, wait_file, client)){
1837 DEBUG("failed to create wait_file thread");
1838 return false;
1842 } else {
1843 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1845 /* If multi-file, open as many files as we can.
1846 * If not, open exactly one file.
1847 * Calculate file sizes as we go to get total size. */
1848 for(i=0; ; i++) {
1849 FILE_INFO fi;
1850 gchar *tmpname;
1851 gchar* error_string;
1853 if (i)
1854 cancreate = 0;
1855 /* if expected_size is specified, and this is the first file, we can create the file */
1856 mode_t mode = (client->server->flags & F_READONLY) ?
1857 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1859 if (temporary) {
1860 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1861 DEBUG( "Opening %s\n", tmpname );
1862 fi.fhandle = mkstemp(tmpname);
1863 } else {
1864 if(multifile) {
1865 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1866 } else {
1867 tmpname=g_strdup(client->exportname);
1869 DEBUG( "Opening %s\n", tmpname );
1870 fi.fhandle = open(tmpname, mode, 0600);
1871 if(fi.fhandle == -1 && mode == O_RDWR) {
1872 /* Try again because maybe media was read-only */
1873 fi.fhandle = open(tmpname, O_RDONLY);
1874 if(fi.fhandle != -1) {
1875 /* Opening the base file in copyonwrite mode is
1876 * okay */
1877 if(!(client->server->flags & F_COPYONWRITE)) {
1878 client->server->flags |= F_AUTOREADONLY;
1879 client->server->flags |= F_READONLY;
1884 if(fi.fhandle == -1) {
1885 if(multifile && i>0)
1886 break;
1887 error_string=g_strdup_printf(
1888 "Could not open exported file %s: %%m",
1889 tmpname);
1890 err_nonfatal(error_string);
1891 return false;
1894 if (temporary) {
1895 unlink(tmpname); /* File will stick around whilst FD open */
1898 fi.startoff = laststartoff + lastsize;
1899 g_array_append_val(client->export, fi);
1900 g_free(tmpname);
1902 /* Starting offset and size of this file will be used to
1903 * calculate starting offset of next file */
1904 laststartoff = fi.startoff;
1905 lastsize = size_autodetect(fi.fhandle);
1907 /* If we created the file, it will be length zero */
1908 if (!lastsize && cancreate) {
1909 assert(!multifile);
1910 if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1911 err_nonfatal("Could not expand file: %m");
1912 return false;
1914 lastsize = client->server->expected_size;
1915 break; /* don't look for any more files */
1918 if(!multifile || temporary)
1919 break;
1922 /* Set export size to total calculated size */
1923 client->exportsize = laststartoff + lastsize;
1925 /* Export size may be overridden */
1926 if(client->server->expected_size) {
1927 /* desired size must be <= total calculated size */
1928 if(client->server->expected_size > client->exportsize) {
1929 err_nonfatal("Size of exported file is too big\n");
1930 return false;
1933 client->exportsize = client->server->expected_size;
1937 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1938 if(multifile) {
1939 msg(LOG_INFO, "Total number of files: %d", i);
1941 if(treefile) {
1942 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
1944 return true;
1947 bool copyonwrite_prepare(CLIENT* client) {
1948 off_t i;
1949 gchar* dir;
1950 gchar* export_base;
1951 if (client->server->cowdir != NULL) {
1952 dir = g_strdup(client->server->cowdir);
1953 } else {
1954 dir = g_strdup(dirname(client->exportname));
1956 export_base = g_strdup(basename(client->exportname));
1957 client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
1958 (int)getpid());
1959 g_free(dir);
1960 g_free(export_base);
1961 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1962 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1963 if (client->difffile<0) {
1964 err("Could not create diff file (%m)");
1965 return false;
1967 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) {
1968 err("Could not allocate memory");
1969 return false;
1971 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1;
1973 return true;
1976 void send_export_info(CLIENT* client, SERVER* server, bool maybe_zeroes) {
1977 uint64_t size_host = htonll((u64)(client->exportsize));
1978 uint16_t flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_WRITE_ZEROES;
1980 socket_write(client, &size_host, 8);
1981 if (server->flags & F_READONLY)
1982 flags |= NBD_FLAG_READ_ONLY;
1983 if (server->flags & F_FLUSH)
1984 flags |= NBD_FLAG_SEND_FLUSH;
1985 if (server->flags & F_FUA)
1986 flags |= NBD_FLAG_SEND_FUA;
1987 if (server->flags & F_ROTATIONAL)
1988 flags |= NBD_FLAG_ROTATIONAL;
1989 if (server->flags & F_TRIM)
1990 flags |= NBD_FLAG_SEND_TRIM;
1991 if (!(server->flags & F_COPYONWRITE))
1992 flags |= NBD_FLAG_CAN_MULTI_CONN;
1993 flags = htons(flags);
1994 socket_write(client, &flags, sizeof(flags));
1995 if (!(glob_flags & F_NO_ZEROES) && maybe_zeroes) {
1996 char zeros[128];
1997 memset(zeros, '\0', sizeof(zeros));
1998 socket_write(client, zeros, 124);
2003 * Commit to exporting the chosen export
2005 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
2006 * a number of things (verify whether the client is allowed access, try
2007 * to open files, etc etc) before we're ready to actually serve the
2008 * export.
2010 * This function does all those things.
2012 * @param client the CLIENT structure with .server and .net members set
2013 * up correctly
2014 * @return true if the client is allowed access to the export, false
2015 * otherwise
2017 static bool commit_client(CLIENT* client, SERVER* server) {
2018 char acl;
2019 uint32_t len;
2021 client->server = serve_inc_ref(server);
2022 client->exportsize = OFFT_MAX;
2023 client->transactionlogfd = -1;
2024 if(pthread_mutex_init(&(client->lock), NULL)) {
2025 msg(LOG_ERR, "Unable to initialize mutex");
2026 return false;
2028 if (pthread_rwlock_init(&client->export_lock, NULL)){
2029 msg(LOG_ERR, "Unable to initialize write lock");
2030 return false;
2032 /* Check whether we exceeded the maximum number of allowed
2033 * clients already */
2034 if(dontfork) {
2035 acl = 'Y';
2036 } else {
2037 len = strlen(client->server->servename);
2038 writeit(commsocket, &len, sizeof len);
2039 writeit(commsocket, client->server->servename, len);
2040 readit(commsocket, &acl, 1);
2041 close(commsocket);
2043 switch(acl) {
2044 case 'N':
2045 msg(LOG_ERR, "Connection not allowed (too many clients)");
2046 return false;
2047 case 'X':
2048 msg(LOG_ERR, "Connection not allowed (unknown by parent?!?)");
2049 return false;
2052 /* Check whether the client is listed in the authfile */
2053 if (set_peername(client->net, client)) {
2054 msg(LOG_ERR, "Failed to set peername");
2055 return false;
2058 if (!authorized_client(client)) {
2059 msg(LOG_INFO, "Client '%s' is not authorized to access",
2060 client->clientname);
2061 return false;
2064 /* Set up the transactionlog, if we need one */
2065 if (client->server->transactionlog && (client->transactionlogfd == -1)) {
2066 if((client->transactionlogfd =
2067 open(client->server->transactionlog,
2068 O_WRONLY | O_CREAT,
2069 S_IRUSR | S_IWUSR)) ==
2070 -1) {
2071 msg(LOG_INFO, "Could not open transactionlog %s, moving on without it",
2072 client->server->transactionlog);
2076 /* Run any pre scripts that we may need */
2077 if (do_run(client->server->prerun, client->exportname)) {
2078 msg(LOG_INFO, "Client '%s' not allowed access by prerun script",
2079 client->clientname);
2080 return false;
2082 client->socket_closed = socket_closed_transmission;
2083 if(!setupexport(client)) {
2084 return false;
2087 if (client->server->flags & F_COPYONWRITE) {
2088 if(!copyonwrite_prepare(client)) {
2089 return false;
2093 if (client->server->flags & F_WAIT) {
2094 if(!copyonwrite_prepare(client)) {
2095 return false;
2099 setmysockopt(client->net);
2101 return true;
2104 static CLIENT* handle_export_name(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2105 uint32_t namelen;
2106 char* name;
2107 int i;
2109 socket_read(client, &namelen, sizeof(namelen));
2110 namelen = ntohl(namelen);
2111 if(namelen > 0) {
2112 name = malloc(namelen+1);
2113 name[namelen]=0;
2114 socket_read(client, name, namelen);
2115 } else {
2116 name = strdup("");
2118 for(i=0; i<servers->len; i++) {
2119 SERVER* serve = (g_array_index(servers, SERVER*, i));
2120 // hide exports that are TLS-only if we haven't negotiated TLS
2121 // yet
2122 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2123 continue;
2125 if(!strcmp(serve->servename, name)) {
2126 client->clientfeats = cflags;
2127 free(name);
2128 if(!commit_client(client, serve)) {
2129 return NULL;
2131 send_export_info(client, serve, true);
2132 return client;
2135 free(name);
2136 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2139 static void handle_list(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2140 uint32_t len;
2141 int i;
2142 char buf[1024];
2143 char *ptr = buf + sizeof(len);
2145 socket_read(client, &len, sizeof(len));
2146 len = ntohl(len);
2147 if(len) {
2148 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2150 if(!(glob_flags & F_LIST)) {
2151 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Listing of exports denied by server configuration");
2152 err_nonfatal("Client tried disallowed list option");
2153 return;
2155 for(i=0; i<servers->len; i++) {
2156 SERVER* serve = (g_array_index(servers, SERVER*, i));
2157 // Hide TLS-only exports if we haven't negotiated TLS yet
2158 if(!client->tls_session && (serve->flags & F_FORCEDTLS)) {
2159 continue;
2161 len = htonl(strlen(serve->servename));
2162 memcpy(buf, &len, sizeof(len));
2163 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len));
2164 send_reply(client, opt, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
2166 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2169 #if HAVE_GNUTLS
2170 static int verify_cert(gnutls_session_t session) {
2171 int ret;
2172 unsigned int status, cert_list_size;
2173 const gnutls_datum_t *cert_list;
2174 gnutls_x509_crt_t cert;
2175 time_t now = time(NULL);
2177 ret = gnutls_certificate_verify_peers2(session, &status);
2178 if(ret < 0 || status != 0 || gnutls_certificate_type_get(session) !=
2179 GNUTLS_CRT_X509) {
2180 goto err;
2183 if(gnutls_x509_crt_init(&cert) < 0) {
2184 goto err;
2187 cert_list = gnutls_certificate_get_peers(session, &cert_list_size);
2188 if(cert_list == NULL) {
2189 goto err;
2191 if(gnutls_x509_crt_import(cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) {
2192 goto err;
2194 if(gnutls_x509_crt_get_activation_time(cert) > now) {
2195 goto err;
2197 if(gnutls_x509_crt_get_expiration_time(cert) < now) {
2198 goto err;
2200 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2201 msg(LOG_INFO, "client certificate verification successful");
2202 return 0;
2203 err:
2204 msg(LOG_ERR, "E: client certificate verification failed");
2205 return GNUTLS_E_CERTIFICATE_ERROR;
2208 CLIENT* handle_starttls(CLIENT* client, int opt, GArray* servers, uint32_t cflags, struct generic_conf *genconf) {
2209 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2210 gnutls_certificate_credentials_t x509_cred;
2211 CLIENT* retval = client;
2212 gnutls_priority_t priority_cache;
2213 gnutls_session_t *session = g_new0(gnutls_session_t, 1);
2214 int ret;
2215 int len;
2217 socket_read(client, &len, sizeof(len));
2218 if(G_UNLIKELY(len != 0)) {
2219 char buf[1024*1024];
2220 consume(client, len, buf, sizeof(buf));
2221 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Sending a STARTTLS command with data is invalid");
2222 return NULL;
2225 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2227 check_rv(gnutls_certificate_allocate_credentials(&x509_cred));
2228 gnutls_certificate_set_verify_function(x509_cred, verify_cert);
2229 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred, genconf->cacertfile, GNUTLS_X509_FMT_PEM));
2230 check_rv(gnutls_certificate_set_x509_key_file(x509_cred, genconf->certfile, genconf->keyfile, GNUTLS_X509_FMT_PEM));
2231 check_rv(gnutls_priority_init(&priority_cache, genconf->tlsprio, NULL));
2232 check_rv(gnutls_init(session, GNUTLS_SERVER));
2233 check_rv(gnutls_priority_set(*session, priority_cache));
2234 check_rv(gnutls_credentials_set(*session, GNUTLS_CRD_CERTIFICATE, x509_cred));
2236 gnutls_certificate_server_set_request(*session, GNUTLS_CERT_REQUEST);
2237 #if GNUTLS_VERSION_NUMBER >= 0x030109
2238 gnutls_transport_set_int(*session, client->net);
2239 #else
2240 gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) (intptr_t) client->net);
2241 #endif
2242 do {
2243 ret = gnutls_handshake(*session);
2244 } while(ret < 0 && gnutls_error_is_fatal(ret) == 0);
2246 if (ret < 0) {
2247 err_nonfatal(gnutls_strerror(ret));
2248 gnutls_bye(*session, GNUTLS_SHUT_RDWR);
2249 gnutls_deinit(*session);
2250 g_free(session);
2251 return NULL;
2253 client->tls_session = session;
2254 client->socket_read = socket_read_tls;
2255 client->socket_write = socket_write_tls;
2256 #undef check_rv
2257 exit:
2258 if(retval == NULL && session != NULL) {
2259 g_free(session);
2261 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2262 if(retval != NULL && retval->server != NULL) {
2263 retval->server = NULL;
2265 return retval;
2267 #endif
2270 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2272 * XXX this matches the proposal I sent out, rather than the officially
2273 * documented version of this command. Need to bring the two in sync
2274 * one way or the other.
2276 static bool handle_info(CLIENT* client, uint32_t opt, GArray* servers, uint32_t cflags) {
2277 uint32_t namelen, len;
2278 char *name;
2279 int i;
2280 SERVER *server = NULL;
2281 uint16_t n_requests;
2282 uint16_t request;
2283 char buf[1024];
2284 bool sent_export = false;
2285 uint32_t reptype = NBD_REP_ERR_UNKNOWN;
2286 char *msg = "Export unknown";
2288 socket_read(client, &len, sizeof(len));
2289 len = htonl(len);
2290 socket_read(client, &namelen, sizeof(namelen));
2291 namelen = htonl(namelen);
2292 if(namelen > (len - 6)) {
2293 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2294 socket_read(client, buf, len - sizeof(namelen));
2296 if(namelen > 0) {
2297 name = malloc(namelen + 1);
2298 name[namelen] = 0;
2299 socket_read(client, name, namelen);
2300 } else {
2301 name = strdup("");
2303 for(i=0; i<servers->len; i++) {
2304 SERVER *serve = (g_array_index(servers, SERVER*, i));
2305 if (!strcmp(serve->servename, name)) {
2306 if ((serve->flags & F_FORCEDTLS) && !client->tls_session) {
2307 reptype = NBD_REP_ERR_TLS_REQD;
2308 msg = "TLS is required for that export";
2309 continue;
2311 server = serve;
2314 free(name);
2315 socket_read(client, &n_requests, sizeof(n_requests));
2316 n_requests = ntohs(n_requests);
2317 if(!server) {
2318 consume(client, n_requests * sizeof(request), buf,
2319 sizeof(buf));
2320 send_reply(client, opt, reptype, -1, msg);
2321 return false;
2323 if (opt == NBD_OPT_GO) {
2324 client->clientfeats = cflags;
2325 if(!commit_client(client, server)) {
2326 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "Access denied by server configuration");
2327 return false;
2330 for(i=0; i<n_requests; i++) {
2331 socket_read(client, &request, sizeof(request));
2332 switch(ntohs(request)) {
2333 case NBD_INFO_EXPORT:
2334 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2335 socket_write(client, &request, 2);
2336 send_export_info(client, server, false);
2337 sent_export = true;
2338 break;
2339 default:
2340 // ignore all other options for now.
2341 break;
2344 if(!sent_export) {
2345 request = htons(NBD_INFO_EXPORT);
2346 send_reply(client, opt, NBD_REP_INFO, 12, NULL);
2347 socket_write(client, &request, 2);
2348 send_export_info(client, server, false);
2350 send_reply(client, opt, NBD_REP_ACK, 0, NULL);
2352 return true;
2356 * Do the initial negotiation.
2358 * @param net The socket we're doing the negotiation over.
2359 * @param servers The array of known servers.
2360 * @param genconf the global options (needed for accessing TLS config data)
2362 CLIENT* negotiate(int net, GArray* servers, struct generic_conf *genconf) {
2363 uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
2364 uint64_t magic;
2365 uint32_t cflags = 0;
2366 uint32_t opt;
2367 CLIENT* client = g_new0(CLIENT, 1);
2368 client->net = net;
2369 client->socket_read = socket_read_notls;
2370 client->socket_write = socket_write_notls;
2371 client->socket_closed = socket_closed_negotiate;
2373 assert(servers != NULL);
2374 socket_write(client, INIT_PASSWD, 8);
2375 magic = htonll(opts_magic);
2376 socket_write(client, &magic, sizeof(magic));
2378 smallflags = htons(smallflags);
2379 socket_write(client, &smallflags, sizeof(uint16_t));
2380 socket_read(client, &cflags, sizeof(cflags));
2381 cflags = htonl(cflags);
2382 if (cflags & NBD_FLAG_C_NO_ZEROES) {
2383 glob_flags |= F_NO_ZEROES;
2385 do {
2386 socket_read(client, &magic, sizeof(magic));
2387 magic = ntohll(magic);
2388 if(magic != opts_magic) {
2389 err_nonfatal("Negotiation failed/5a: magic mismatch");
2390 goto handler_err;
2392 socket_read(client, &opt, sizeof(opt));
2393 opt = ntohl(opt);
2394 if(client->tls_session == NULL
2395 && glob_flags & F_FORCEDTLS
2396 && opt != NBD_OPT_STARTTLS) {
2397 if(opt == NBD_OPT_EXPORT_NAME) {
2398 // can't send an error message for EXPORT_NAME,
2399 // so must do hard close
2400 goto handler_err;
2402 if(opt == NBD_OPT_ABORT) {
2403 // handled below
2404 break;
2406 consume_len(client);
2407 send_reply(client, opt, NBD_REP_ERR_TLS_REQD, -1, "TLS is required on this server");
2408 continue;
2410 switch(opt) {
2411 case NBD_OPT_EXPORT_NAME:
2412 // NBD_OPT_EXPORT_NAME must be the last
2413 // selected option, so return from here
2414 // if that is chosen.
2415 if(handle_export_name(client, opt, servers, cflags) != NULL) {
2416 return client;
2417 } else {
2418 goto handler_err;
2420 break;
2421 case NBD_OPT_LIST:
2422 handle_list(client, opt, servers, cflags);
2423 break;
2424 case NBD_OPT_ABORT:
2425 // handled below
2426 break;
2427 case NBD_OPT_STARTTLS:
2428 #if !HAVE_GNUTLS
2429 consume_len(client);
2430 send_reply(client, opt, NBD_REP_ERR_PLATFORM, -1, "This nbd-server was compiled without TLS support");
2431 #else
2432 if(client->tls_session != NULL) {
2433 consume_len(client);
2434 send_reply(client, opt, NBD_REP_ERR_INVALID, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2435 continue;
2437 if(genconf->keyfile == NULL) {
2438 consume_len(client);
2439 send_reply(client, opt, NBD_REP_ERR_POLICY, -1, "TLS not allowed on this server");
2440 continue;
2442 if(handle_starttls(client, opt, servers, cflags, genconf) == NULL) {
2443 // can't recover from failed TLS negotiation.
2444 goto handler_err;
2446 #endif
2447 break;
2448 case NBD_OPT_GO:
2449 case NBD_OPT_INFO:
2450 if(handle_info(client, opt, servers, cflags) && opt == NBD_OPT_GO) {
2451 return client;
2453 break;
2454 default:
2455 consume_len(client);
2456 send_reply(client, opt, NBD_REP_ERR_UNSUP, -1, "The given option is unknown to this server implementation");
2457 break;
2459 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
2460 if(opt == NBD_OPT_ABORT) {
2461 err_nonfatal("Session terminated by client");
2462 goto handler_err;
2464 err_nonfatal("Weird things happened: reached end of negotiation without success");
2465 handler_err:
2466 g_free(client);
2467 return NULL;
2470 static int nbd_errno(int errcode) {
2471 switch (errcode) {
2472 case EPERM:
2473 return htonl(1);
2474 case EIO:
2475 return htonl(5);
2476 case ENOMEM:
2477 return htonl(12);
2478 case EINVAL:
2479 return htonl(22);
2480 case EFBIG:
2481 case ENOSPC:
2482 #ifdef EDQUOT
2483 case EDQUOT:
2484 #endif
2485 return htonl(28); // ENOSPC
2486 default:
2487 return htonl(22); // EINVAL
2491 static void package_dispose(struct work_package* package) {
2492 if (package->pipefd[0] > 0)
2493 close(package->pipefd[0]);
2494 if (package->pipefd[1] > 0)
2495 close(package->pipefd[1]);
2496 g_free(package->data);
2497 g_free(package->req);
2498 g_free(package);
2501 static int mkpipe(int pipefd[2], size_t len)
2503 if (len > MAX_PIPE_SIZE)
2504 return -1;
2505 if (pipe(pipefd))
2506 return -1;
2508 #ifdef HAVE_SPLICE
2509 if (fcntl(pipefd[1], F_SETPIPE_SZ, MAX_PIPE_SIZE) < MAX_PIPE_SIZE) {
2510 close(pipefd[0]);
2511 close(pipefd[1]);
2512 pipefd[0] = -1;
2513 pipefd[1] = -1;
2514 return -1;
2516 #endif
2518 return 0;
2521 struct work_package* package_create(CLIENT* client, struct nbd_request* req) {
2522 struct work_package* rv = calloc(sizeof (struct work_package), 1);
2524 rv->req = req;
2525 rv->client = client;
2526 rv->data = NULL;
2527 rv->pipefd[0] = -1;
2528 rv->pipefd[1] = -1;
2530 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2531 if (client->server->flags & F_SPLICE) {
2532 if (mkpipe(rv->pipefd, req->len))
2533 rv->data = malloc(req->len);
2534 } else {
2535 rv->data = malloc(req->len);
2539 return rv;
2542 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) {
2543 rep->magic = htonl(NBD_REPLY_MAGIC);
2544 rep->error = 0;
2545 memcpy(&(rep->handle), &(req->handle), sizeof(req->handle));
2548 #ifdef HAVE_SPLICE
2549 static int handle_splice_read(CLIENT *client, struct nbd_request *req)
2551 struct nbd_reply rep;
2552 int pipefd[2];
2554 // splice doesn't work with TLS
2555 if (client->tls_session != NULL)
2556 return -1;
2558 if (mkpipe(pipefd, req->len))
2559 return -1;
2561 if (expsplice(pipefd[1], req->from, req->len, client, SPLICE_IN, 0)) {
2562 close(pipefd[1]);
2563 close(pipefd[0]);
2564 return -1;
2567 DEBUG("handling read request (splice)\n");
2568 setup_reply(&rep, req);
2569 pthread_mutex_lock(&(client->lock));
2570 writeit(client->net, &rep, sizeof(rep));
2571 spliceit(pipefd[0], NULL, client->net, NULL, req->len);
2572 pthread_mutex_unlock(&(client->lock));
2573 close(pipefd[0]);
2574 close(pipefd[1]);
2575 return 0;
2577 #endif
2579 static void handle_normal_read(CLIENT *client, struct nbd_request *req)
2581 struct nbd_reply rep;
2582 void* buf = malloc(req->len);
2583 if(!buf) {
2584 err("Could not allocate memory for request");
2586 DEBUG("handling read request\n");
2587 setup_reply(&rep, req);
2588 if(expread(req->from, buf, req->len, client)) {
2589 DEBUG("Read failed: %m");
2590 rep.error = nbd_errno(errno);
2592 pthread_mutex_lock(&(client->lock));
2593 socket_write(client, &rep, sizeof rep);
2594 if(!rep.error) {
2595 socket_write(client, buf, req->len);
2597 pthread_mutex_unlock(&(client->lock));
2598 free(buf);
2601 static void handle_read(CLIENT* client, struct nbd_request* req)
2603 #ifdef HAVE_SPLICE
2605 * If we have splice set we want to try that first, and if that fails
2606 * for whatever reason we fall through to ye olde read.
2608 if (client->server->flags & F_SPLICE)
2609 if (!handle_splice_read(client, req))
2610 return;
2611 #endif
2612 handle_normal_read(client, req);
2615 static void handle_write(struct work_package *pkg)
2617 CLIENT *client = pkg->client;
2618 struct nbd_request *req = pkg->req;
2619 struct nbd_reply rep;
2620 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2622 DEBUG("handling write request\n");
2623 setup_reply(&rep, req);
2625 #ifdef HAVE_SPLICE
2626 if (!pkg->data) {
2627 if (expsplice(pkg->pipefd[0], req->from, req->len, client,
2628 SPLICE_OUT, fua)) {
2629 DEBUG("Splice failed: %m");
2630 rep.error = nbd_errno(errno);
2632 } else
2633 #endif
2635 if(expwrite(req->from, pkg->data, req->len, client, fua)) {
2636 DEBUG("Write failed: %m");
2637 rep.error = nbd_errno(errno);
2640 pthread_mutex_lock(&(client->lock));
2641 socket_write(client, &rep, sizeof rep);
2642 pthread_mutex_unlock(&(client->lock));
2645 static void handle_flush(CLIENT* client, struct nbd_request* req) {
2646 struct nbd_reply rep;
2647 DEBUG("handling flush request\n");
2648 setup_reply(&rep, req);
2649 if(expflush(client)) {
2650 DEBUG("Flush failed: %m");
2651 rep.error = nbd_errno(errno);
2653 pthread_mutex_lock(&(client->lock));
2654 socket_write(client, &rep, sizeof rep);
2655 pthread_mutex_unlock(&(client->lock));
2658 static void handle_trim(CLIENT* client, struct nbd_request* req) {
2659 struct nbd_reply rep;
2660 DEBUG("handling trim request\n");
2661 setup_reply(&rep, req);
2662 if(exptrim(req, client)) {
2663 DEBUG("Trim failed: %m");
2664 rep.error = nbd_errno(errno);
2666 pthread_mutex_lock(&(client->lock));
2667 socket_write(client, &rep, sizeof rep);
2668 pthread_mutex_unlock(&(client->lock));
2671 static void handle_write_zeroes(CLIENT* client, struct nbd_request* req) {
2672 struct nbd_reply rep;
2673 DEBUG("handling write_zeroes request\n");
2674 int fua = !!(req->type & NBD_CMD_FLAG_FUA);
2675 setup_reply(&rep, req);
2676 if(expwrite_zeroes(req, client, fua)) {
2677 DEBUG("Write_zeroes failed: %m");
2678 rep.error = nbd_errno(errno);
2680 // For now, don't trim
2681 // TODO: handle this far more efficiently with reference to the
2682 // actual backing driver
2683 pthread_mutex_lock(&(client->lock));
2684 socket_write(client, &rep, sizeof rep);
2685 pthread_mutex_unlock(&(client->lock));
2689 static bool bad_write(CLIENT* client, struct nbd_request* req) {
2690 if ((client->server->flags & F_READONLY) ||
2691 (client->server->flags & F_AUTOREADONLY)) {
2692 DEBUG("[WRITE to READONLY!]");
2693 return true;
2695 return false;
2698 static bool bad_range(CLIENT* client, struct nbd_request* req) {
2699 if(req->from > client->exportsize ||
2700 req->from + req->len > client->exportsize) {
2701 DEBUG("[out of bounds!]");
2702 return true;
2704 return false;
2707 static void handle_request(gpointer data, gpointer user_data) {
2708 struct work_package* package = (struct work_package*) data;
2709 uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND;
2710 uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND;
2711 struct nbd_reply rep;
2712 int err = EINVAL;
2714 if(flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
2715 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type);
2716 goto error;
2719 switch(type) {
2720 case NBD_CMD_READ:
2721 if (bad_range(package->client, package->req)) {
2722 goto error;
2724 handle_read(package->client, package->req);
2725 break;
2726 case NBD_CMD_WRITE:
2727 if (bad_write(package->client, package->req)) {
2728 err = EPERM;
2729 goto error;
2731 if (bad_range(package->client, package->req)) {
2732 err = ENOSPC;
2733 goto error;
2735 handle_write(package);
2736 break;
2737 case NBD_CMD_FLUSH:
2738 handle_flush(package->client, package->req);
2739 break;
2740 case NBD_CMD_TRIM:
2741 if (bad_write(package->client, package->req)) {
2742 err = EPERM;
2743 goto error;
2745 if (bad_range(package->client, package->req)) {
2746 goto error;
2748 handle_trim(package->client, package->req);
2749 break;
2750 case NBD_CMD_WRITE_ZEROES:
2751 if (bad_write(package->client, package->req)) {
2752 err = EPERM;
2753 goto error;
2755 if (bad_range(package->client, package->req)) {
2756 err = ENOSPC;
2757 goto error;
2759 handle_write_zeroes(package->client, package->req);
2760 break;
2761 default:
2762 msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type);
2763 goto error;
2765 goto end;
2766 error:
2767 setup_reply(&rep, package->req);
2768 rep.error = nbd_errno(err);
2769 pthread_mutex_lock(&(package->client->lock));
2770 socket_write(package->client, &rep, sizeof rep);
2771 pthread_mutex_unlock(&(package->client->lock));
2772 end:
2773 package_dispose(package);
2776 static int mainloop_threaded(CLIENT* client) {
2777 struct nbd_request* req;
2778 struct work_package* pkg;
2780 DEBUG("Entering request loop\n");
2781 while(1) {
2782 req = calloc(sizeof (struct nbd_request), 1);
2784 socket_read(client, req, sizeof(struct nbd_request));
2785 if(client->transactionlogfd != -1) {
2786 writeit(client->transactionlogfd, req, sizeof(struct nbd_request));
2789 req->from = ntohll(req->from);
2790 req->type = ntohl(req->type);
2791 req->len = ntohl(req->len);
2793 if(req->magic != htonl(NBD_REQUEST_MAGIC))
2794 err("Protocol error: not enough magic.");
2796 pkg = package_create(client, req);
2798 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
2799 #ifdef HAVE_SPLICE
2800 if ((client->server->flags & F_SPLICE) &&
2801 (req->len <= MAX_PIPE_SIZE && pkg->pipefd[1] > 0) &&
2802 (client->tls_session == NULL))
2803 spliceit(client->net, NULL, pkg->pipefd[1],
2804 NULL, req->len);
2805 else
2806 #endif
2807 socket_read(client, pkg->data, req->len);
2809 if(req->type == NBD_CMD_DISC) {
2810 finalize_client(client);
2811 return 0;
2813 g_thread_pool_push(tpool, pkg, NULL);
2818 * Destroy a pid_t*
2819 * @param data a pointer to pid_t which should be freed
2821 void destroy_pid_t(gpointer data) {
2822 g_free(data);
2825 static pid_t
2826 spawn_child(int* socket)
2828 pid_t pid;
2829 sigset_t newset;
2830 sigset_t oldset;
2831 int sockets[2];
2833 sigemptyset(&newset);
2834 sigaddset(&newset, SIGCHLD);
2835 sigaddset(&newset, SIGTERM);
2836 sigprocmask(SIG_BLOCK, &newset, &oldset);
2837 socketpair(AF_UNIX, SOCK_STREAM, 0, sockets);
2838 pid = fork();
2839 if (pid < 0) {
2840 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
2841 close(sockets[0]);
2842 close(sockets[1]);
2843 goto out;
2845 if (pid > 0) { /* Parent */
2846 pid_t *pidp;
2848 pidp = g_malloc(sizeof(pid_t));
2849 *pidp = pid;
2850 *socket = sockets[1];
2851 close(sockets[0]);
2852 g_hash_table_insert(children, pidp, pidp);
2853 goto out;
2855 /* Child */
2856 *socket = sockets[0];
2857 close(sockets[1]);
2858 /* Child's signal disposition is reset to default. */
2859 signal(SIGCHLD, SIG_DFL);
2860 signal(SIGTERM, SIG_DFL);
2861 signal(SIGHUP, SIG_DFL);
2862 sigemptyset(&oldset);
2863 out:
2864 sigprocmask(SIG_SETMASK, &oldset, NULL);
2865 return pid;
2868 static int
2869 socket_accept(const int sock)
2871 struct sockaddr_storage addrin;
2872 socklen_t addrinlen = sizeof(addrin);
2873 int net;
2875 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
2876 if (net < 0) {
2877 err_nonfatal("Failed to accept socket connection: %m");
2880 return net;
2883 static void
2884 handle_modern_connection(GArray *const servers, const int sock, struct generic_conf *genconf)
2886 int net;
2887 pid_t pid;
2888 CLIENT *client = NULL;
2889 int sock_flags_old;
2890 int sock_flags_new;
2892 net = socket_accept(sock);
2893 if (net < 0)
2894 return;
2896 if (!dontfork) {
2897 pid = spawn_child(&commsocket);
2898 if (pid) {
2899 if (pid > 0) {
2900 msg(LOG_INFO, "Spawned a child process");
2901 g_array_append_val(childsocks, commsocket);
2903 if (pid < 0)
2904 msg(LOG_ERR, "Failed to spawn a child process");
2905 close(net);
2906 return;
2908 /* Child just continues. */
2911 sock_flags_old = fcntl(net, F_GETFL, 0);
2912 if (sock_flags_old == -1) {
2913 msg(LOG_ERR, "Failed to get socket flags");
2914 goto handler_err;
2917 sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2918 if (sock_flags_new != sock_flags_old &&
2919 fcntl(net, F_SETFL, sock_flags_new) == -1) {
2920 msg(LOG_ERR, "Failed to set socket to blocking mode");
2921 goto handler_err;
2924 client = negotiate(net, servers, genconf);
2925 if (!client) {
2926 msg(LOG_ERR, "Modern initial negotiation failed");
2927 goto handler_err;
2930 if (!dontfork) {
2931 int i;
2933 /* Free all root server resources here, because we are
2934 * currently in the child process serving one specific
2935 * connection. These are not simply needed anymore. */
2936 g_hash_table_destroy(children);
2937 children = NULL;
2938 for (i = 0; i < modernsocks->len; i++) {
2939 close(g_array_index(modernsocks, int, i));
2941 g_array_free(modernsocks, TRUE);
2943 /* Now that we are in the child process after a
2944 * succesful negotiation, we do not need the list of
2945 * servers anymore, get rid of it.*/
2946 g_array_free(servers, FALSE);
2949 msg(LOG_INFO, "Starting to serve");
2950 mainloop_threaded(client);
2951 exit(EXIT_SUCCESS);
2953 handler_err:
2954 close(net);
2955 g_free(client);
2957 if (!dontfork) {
2958 exit(EXIT_FAILURE);
2962 static int handle_childname(GArray* servers, int socket)
2964 uint32_t len;
2965 char *buf;
2966 int i, r, rt = 0;
2968 while(rt < sizeof(len)) {
2969 switch((r = read(socket, &len, sizeof len))) {
2970 case 0:
2971 return -1;
2972 case -1:
2973 err_nonfatal("Error reading from acl socket: %m");
2974 return -1;
2975 default:
2976 rt += r;
2977 break;
2980 buf = g_malloc0(len + 1);
2981 readit(socket, buf, len);
2982 buf[len] = 0;
2983 for(i=0; i<servers->len; i++) {
2984 SERVER* srv = g_array_index(servers, SERVER*, i);
2985 if(strcmp(srv->servename, buf) == 0) {
2986 if(srv->max_connections == 0 || srv->max_connections > srv->numclients) {
2987 writeit(socket, "Y", 1);
2988 srv->numclients++;
2989 } else {
2990 writeit(socket, "N", 1);
2992 goto exit;
2995 writeit(socket, "X", 1);
2996 exit:
2997 g_free(buf);
2998 return 0;
3002 * Return the index of the server whose servename matches the given
3003 * name.
3005 * @param servename a string to match
3006 * @param servers an array of servers
3007 * @return the first index of the server whose servename matches the
3008 * given name or -1 if one cannot be found
3010 static int get_index_by_servename(const gchar *const servename,
3011 const GArray *const servers) {
3012 int i;
3014 for (i = 0; i < servers->len; ++i) {
3015 const SERVER* server = g_array_index(servers, SERVER*, i);
3017 if (strcmp(servename, server->servename) == 0)
3018 return i;
3021 return -1;
3025 * Parse configuration files and add servers to the array if they don't
3026 * already exist there. The existence is tested by comparing
3027 * servenames. A server is appended to the array only if its servename
3028 * is unique among all other servers.
3030 * @param servers an array of servers
3031 * @param genconf a pointer to generic configuration
3032 * @return the number of new servers appended to the array, or -1 in
3033 * case of an error
3035 static int append_new_servers(GArray *const servers, struct generic_conf *genconf, GError **const gerror) {
3036 int i;
3037 GArray *new_servers;
3038 const int old_len = servers->len;
3039 int retval = -1;
3041 new_servers = parse_cfile(config_file_pos, genconf, true, gerror);
3042 g_thread_pool_set_max_threads(tpool, genconf->threads, NULL);
3043 if (!new_servers)
3044 goto out;
3046 for (i = 0; i < new_servers->len; ++i) {
3047 SERVER *new_server = g_array_index(new_servers, SERVER*, i);
3049 if (new_server->servename
3050 && -1 == get_index_by_servename(new_server->servename,
3051 servers)) {
3052 g_array_append_val(servers, new_server);
3056 retval = servers->len - old_len;
3057 out:
3058 g_array_free(new_servers, TRUE);
3060 return retval;
3063 void serveloop(GArray* servers, struct generic_conf *genconf) G_GNUC_NORETURN;
3065 * Loop through the available servers, and serve them. Never returns.
3067 void serveloop(GArray* servers, struct generic_conf *genconf) {
3068 int i;
3069 int mmax, max;
3070 fd_set mset;
3071 fd_set rset;
3072 sigset_t blocking_mask;
3073 sigset_t original_mask;
3076 * Set up the master fd_set. The set of descriptors we need
3077 * to select() for never changes anyway and it buys us a *lot*
3078 * of time to only build this once. However, if we ever choose
3079 * to not fork() for clients anymore, we may have to revisit
3080 * this.
3082 mmax=0;
3083 FD_ZERO(&mset);
3084 for(i=0;i<modernsocks->len;i++) {
3085 int sock = g_array_index(modernsocks, int, i);
3086 FD_SET(sock, &mset);
3087 mmax=sock>mmax?sock:mmax;
3090 /* Construct a signal mask which is used to make signal testing and
3091 * receiving an atomic operation to ensure no signal is received between
3092 * tests and blocking pselect(). */
3093 if (sigemptyset(&blocking_mask) == -1)
3094 err("failed to initialize blocking_mask: %m");
3096 if (sigaddset(&blocking_mask, SIGCHLD) == -1)
3097 err("failed to add SIGCHLD to blocking_mask: %m");
3099 if (sigaddset(&blocking_mask, SIGHUP) == -1)
3100 err("failed to add SIGHUP to blocking_mask: %m");
3102 if (sigaddset(&blocking_mask, SIGTERM) == -1)
3103 err("failed to add SIGTERM to blocking_mask: %m");
3105 if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
3106 err("failed to block signals: %m");
3108 for(;;) {
3109 if (is_sigterm_caught) {
3110 is_sigterm_caught = 0;
3112 g_hash_table_foreach(children, killchild, NULL);
3113 unlink(pidfname);
3115 exit(EXIT_SUCCESS);
3118 if (is_sigchld_caught) {
3119 int status;
3120 int* i;
3121 pid_t pid;
3123 is_sigchld_caught = 0;
3125 while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
3126 if (WIFEXITED(status)) {
3127 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
3129 i = g_hash_table_lookup(children, &pid);
3130 if (!i) {
3131 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
3132 } else {
3133 DEBUG("Removing %d from the list of children", pid);
3134 g_hash_table_remove(children, &pid);
3139 /* SIGHUP causes the root server process to reconfigure
3140 * itself and add new export servers for each newly
3141 * found export configuration group, i.e. spawn new
3142 * server processes for each previously non-existent
3143 * export. This does not alter old runtime configuration
3144 * but just appends new exports. */
3145 if (is_sighup_caught) {
3146 int n;
3147 GError *gerror = NULL;
3149 msg(LOG_INFO, "reconfiguration request received");
3150 is_sighup_caught = 0; /* Reset to allow catching
3151 * it again. */
3153 n = append_new_servers(servers, genconf, &gerror);
3154 if (n == -1)
3155 msg(LOG_ERR, "failed to append new servers: %s",
3156 gerror->message);
3158 for (i = servers->len - n; i < servers->len; ++i) {
3159 const SERVER *server = g_array_index(servers,
3160 SERVER*, i);
3162 msg(LOG_INFO, "reconfigured new server: %s",
3163 server->servename);
3167 memcpy(&rset, &mset, sizeof(fd_set));
3168 max=mmax;
3169 for(i=0;i<childsocks->len;i++) {
3170 int sock = g_array_index(childsocks, int, i);
3171 FD_SET(sock, &rset);
3172 max=sock>max?sock:max;
3175 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
3176 DEBUG("accept, ");
3177 for(i=0; i < modernsocks->len; i++) {
3178 int sock = g_array_index(modernsocks, int, i);
3179 if(!FD_ISSET(sock, &rset)) {
3180 continue;
3183 handle_modern_connection(servers, sock, genconf);
3185 for(i=0; i < childsocks->len; i++) {
3186 int sock = g_array_index(childsocks, int, i);
3188 if(FD_ISSET(sock, &rset)) {
3189 if(handle_childname(servers, sock) < 0) {
3190 close(sock);
3191 g_array_remove_index(childsocks, i);
3200 * Set server socket options.
3202 * @param socket a socket descriptor of the server
3204 * @param gerror a pointer to an error object pointer used for reporting
3205 * errors. On error, if gerror is not NULL, *gerror is set and -1
3206 * is returned.
3208 * @return 0 on success, -1 on error
3210 int dosockopts(const int socket, GError **const gerror) {
3211 #ifndef sun
3212 int yes=1;
3213 #else
3214 char yes='1';
3215 #endif /* sun */
3216 struct linger l;
3218 /* lose the pesky "Address already in use" error message */
3219 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
3220 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
3221 "failed to set socket option SO_REUSEADDR: %s",
3222 strerror(errno));
3223 return -1;
3225 l.l_onoff = 1;
3226 l.l_linger = 10;
3227 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
3228 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
3229 "failed to set socket option SO_LINGER: %s",
3230 strerror(errno));
3231 return -1;
3233 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
3234 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
3235 "failed to set socket option SO_KEEPALIVE: %s",
3236 strerror(errno));
3237 return -1;
3240 return 0;
3243 int open_unix(const gchar *const sockname, GError **const gerror) {
3244 struct sockaddr_un sa;
3245 int sock=-1;
3246 int retval=-1;
3248 memset(&sa, 0, sizeof(struct sockaddr_un));
3249 sa.sun_family = AF_UNIX;
3250 strncpy(sa.sun_path, sockname, sizeof sa.sun_path);
3251 sa.sun_path[sizeof(sa.sun_path)-1] = '\0';
3252 sock = socket(AF_UNIX, SOCK_STREAM, 0);
3253 if(sock < 0) {
3254 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3255 "failed to open a unix socket: "
3256 "failed to create socket: %s",
3257 strerror(errno));
3258 goto out;
3260 if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
3261 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3262 "failed to open a unix socket: "
3263 "failed to bind to address %s: %s",
3264 sockname, strerror(errno));
3265 goto out;
3267 if(listen(sock, 10)<0) {
3268 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3269 "failed to open a unix socket: "
3270 "failed to start listening: %s",
3271 strerror(errno));
3272 goto out;
3274 retval=0;
3275 g_array_append_val(modernsocks, sock);
3276 out:
3277 if(retval<0 && sock >= 0) {
3278 close(sock);
3281 return retval;
3284 int open_modern(const gchar *const addr, const gchar *const port,
3285 GError **const gerror) {
3286 struct addrinfo hints;
3287 struct addrinfo* ai = NULL;
3288 struct addrinfo* ai_bak = NULL;
3289 struct sock_flags;
3290 int e;
3291 int retval = -1;
3292 int sock = -1;
3293 gchar** addrs;
3294 gchar const* l_addr = addr;
3296 if(!addr || strlen(addr) == 0) {
3297 l_addr = "::, 0.0.0.0";
3300 addrs = g_strsplit_set(l_addr, ", \t", -1);
3302 for(int i=0; addrs[i]!=NULL; i++) {
3303 if(addrs[i][0] == '\0') {
3304 continue;
3306 memset(&hints, '\0', sizeof(hints));
3307 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
3308 hints.ai_socktype = SOCK_STREAM;
3309 hints.ai_family = AF_UNSPEC;
3310 hints.ai_protocol = IPPROTO_TCP;
3311 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai);
3312 ai_bak = ai;
3313 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) {
3314 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
3315 "failed to open a modern socket: "
3316 "failed to get address info: %s",
3317 gai_strerror(e));
3318 goto out;
3321 while(ai != NULL) {
3322 sock = -1;
3324 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
3325 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
3326 "failed to open a modern socket: "
3327 "failed to create a socket: %s",
3328 strerror(errno));
3329 goto out;
3332 if (dosockopts(sock, gerror) == -1) {
3333 g_prefix_error(gerror, "failed to open a modern socket: ");
3334 goto out;
3337 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
3339 * Some systems will return multiple entries for the
3340 * same address when we ask it for something
3341 * AF_UNSPEC, even though the first entry will
3342 * listen to both protocols. Other systems will
3343 * return multiple entries too, but we actually
3344 * do need to open both.
3346 * Handle this by ignoring EADDRINUSE if we've
3347 * already got at least one socket open
3349 if(errno == EADDRINUSE && modernsocks->len > 0) {
3350 goto next;
3352 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3353 "failed to open a modern socket: "
3354 "failed to bind an address to a socket: %s",
3355 strerror(errno));
3356 goto out;
3359 if(listen(sock, 10) <0) {
3360 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
3361 "failed to open a modern socket: "
3362 "failed to start listening on a socket: %s",
3363 strerror(errno));
3364 goto out;
3366 g_array_append_val(modernsocks, sock);
3367 next:
3368 ai = ai->ai_next;
3370 if(ai_bak) {
3371 freeaddrinfo(ai_bak);
3372 ai_bak=NULL;
3376 retval = 0;
3377 out:
3379 if (retval == -1 && sock >= 0) {
3380 close(sock);
3382 if(ai_bak)
3383 freeaddrinfo(ai_bak);
3385 return retval;
3389 * Connect our servers.
3391 void setup_servers(GArray *const servers, const gchar *const modernaddr,
3392 const gchar *const modernport, const gchar* unixsock,
3393 const gint flags ) {
3394 struct sigaction sa;
3396 if(unixsock != NULL) {
3397 GError* gerror = NULL;
3398 if(open_unix(unixsock, &gerror) == -1) {
3399 msg(LOG_ERR, "failed to setup servers: %s",
3400 gerror->message);
3401 g_clear_error(&gerror);
3402 exit(EXIT_FAILURE);
3405 if (((flags & F_DUAL_LISTEN) != 0) || (unixsock == NULL)) {
3406 GError *gerror = NULL;
3407 if (open_modern(modernaddr, modernport, &gerror) == -1) {
3408 msg(LOG_ERR, "failed to setup servers: %s",
3409 gerror->message);
3410 g_clear_error(&gerror);
3411 exit(EXIT_FAILURE);
3414 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
3416 sa.sa_handler = sigchld_handler;
3417 sigemptyset(&sa.sa_mask);
3418 sigaddset(&sa.sa_mask, SIGTERM);
3419 sa.sa_flags = SA_RESTART;
3420 if(sigaction(SIGCHLD, &sa, NULL) == -1)
3421 err("sigaction: %m");
3423 sa.sa_handler = sigterm_handler;
3424 sigemptyset(&sa.sa_mask);
3425 sigaddset(&sa.sa_mask, SIGCHLD);
3426 sa.sa_flags = SA_RESTART;
3427 if(sigaction(SIGTERM, &sa, NULL) == -1)
3428 err("sigaction: %m");
3430 sa.sa_handler = sighup_handler;
3431 sigemptyset(&sa.sa_mask);
3432 sa.sa_flags = SA_RESTART;
3433 if(sigaction(SIGHUP, &sa, NULL) == -1)
3434 err("sigaction: %m");
3436 sa.sa_handler = sigusr1_handler;
3437 sigemptyset(&sa.sa_mask);
3438 sa.sa_flags = SA_RESTART;
3439 if(sigaction(SIGUSR1, &sa, NULL) == -1)
3440 err("sigaction: %m");
3444 * Go daemon (unless we specified at compile time that we didn't want this)
3445 * @param serve the first server of our configuration. If its port is zero,
3446 * then do not daemonize, because we're doing inetd then. This parameter
3447 * is only used to create a PID file of the form
3448 * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
3450 #if !defined(NODAEMON)
3451 void daemonize() {
3452 FILE*pidf;
3454 if(daemon(0,0)<0) {
3455 err("daemon");
3457 if(!*pidfname) {
3458 strncpy(pidfname, "/var/run/nbd-server.pid", 255);
3460 pidf=fopen(pidfname, "w");
3461 if(pidf) {
3462 fprintf(pidf,"%d\n", (int)getpid());
3463 fclose(pidf);
3464 } else {
3465 perror("fopen");
3466 fprintf(stderr, "Not fatal; continuing");
3469 #else
3470 #define daemonize(serve)
3471 #endif /* !defined(NODAEMON) */
3474 * Everything beyond this point (in the file) is run in non-daemon mode.
3475 * The stuff above daemonize() isn't.
3479 * Set up user-ID and/or group-ID
3481 void dousers(const gchar *const username, const gchar *const groupname) {
3482 struct passwd *pw;
3483 struct group *gr;
3484 gchar* str;
3485 if (groupname) {
3486 gr = getgrnam(groupname);
3487 if(!gr) {
3488 str = g_strdup_printf("Invalid group name: %s", groupname);
3489 err(str);
3491 if(setgid(gr->gr_gid)<0) {
3492 err("Could not set GID: %m");
3495 if (username) {
3496 pw = getpwnam(username);
3497 if(!pw) {
3498 str = g_strdup_printf("Invalid user name: %s", username);
3499 err(str);
3501 setgroups(0, NULL);
3502 if(setuid(pw->pw_uid)<0) {
3503 err("Could not set UID: %m");
3508 #ifndef ISSERVER
3509 void glib_message_syslog_redirect(const gchar *log_domain,
3510 GLogLevelFlags log_level,
3511 const gchar *message,
3512 gpointer user_data)
3514 int level=LOG_DEBUG;
3516 switch( log_level )
3518 case G_LOG_FLAG_FATAL:
3519 case G_LOG_LEVEL_CRITICAL:
3520 case G_LOG_LEVEL_ERROR:
3521 level=LOG_ERR;
3522 break;
3523 case G_LOG_LEVEL_WARNING:
3524 level=LOG_WARNING;
3525 break;
3526 case G_LOG_LEVEL_MESSAGE:
3527 case G_LOG_LEVEL_INFO:
3528 level=LOG_INFO;
3529 break;
3530 case G_LOG_LEVEL_DEBUG:
3531 level=LOG_DEBUG;
3532 break;
3533 default:
3534 level=LOG_ERR;
3536 syslog(level, "%s", message);
3538 #endif
3541 * Main entry point...
3543 int main(int argc, char *argv[]) {
3544 SERVER *serve;
3545 GArray *servers;
3546 GError *gerr=NULL;
3547 struct generic_conf genconf;
3549 memset(&genconf, 0, sizeof(struct generic_conf));
3551 if (sizeof( struct nbd_request )!=28) {
3552 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
3553 exit(EXIT_FAILURE) ;
3556 modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
3557 childsocks = g_array_new(FALSE, FALSE, sizeof(int));
3559 logging(MY_NAME);
3560 config_file_pos = g_strdup(CFILE);
3561 serve=cmdline(argc, argv, &genconf);
3563 genconf.threads = 4;
3564 servers = parse_cfile(config_file_pos, &genconf, true, &gerr);
3566 /* Update global variables with parsed values. This will be
3567 * removed once we get rid of global configuration variables. */
3568 glob_flags |= genconf.flags;
3570 if(serve) {
3571 g_array_append_val(servers, serve);
3574 if(!servers || !servers->len) {
3575 if(gerr && !(gerr->domain == NBDS_ERR
3576 && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) {
3577 g_warning("Could not parse config file: %s",
3578 gerr ? gerr->message : "Unknown error");
3581 if(serve) {
3582 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3585 if((!serve) && (!servers||!servers->len)) {
3586 if(gerr)
3587 g_message("No configured exports; quitting.");
3588 exit(EXIT_FAILURE);
3590 if (!dontfork)
3591 daemonize();
3592 #if HAVE_OLD_GLIB
3593 g_thread_init(NULL);
3594 #endif
3595 tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL);
3597 setup_servers(servers, genconf.modernaddr, genconf.modernport,
3598 genconf.unixsock, genconf.flags);
3599 dousers(genconf.user, genconf.group);
3601 #if HAVE_GNUTLS
3602 gnutls_global_init();
3603 static gnutls_dh_params_t dh_params;
3604 gnutls_dh_params_init(&dh_params);
3605 gnutls_dh_params_generate2(dh_params,
3606 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH,
3607 // Renamed in GnuTLS 3.3
3608 #if GNUTLS_VERSION_NUMBER >= 0x030300
3609 GNUTLS_SEC_PARAM_MEDIUM
3610 #else
3611 GNUTLS_SEC_PARAM_NORMAL
3612 #endif
3614 #endif
3616 if((genconf.modernport != NULL) && strcmp(genconf.modernport, "0")==0) {
3617 #ifndef ISSERVER
3618 err("inetd mode requires syslog");
3619 #endif
3620 CLIENT* client = negotiate(0, servers, &genconf);
3621 if(!client) {
3622 exit(EXIT_FAILURE);
3624 mainloop_threaded(client);
3625 return 0;
3628 serveloop(servers, &genconf);