2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
69 #include <sys/types.h>
70 #include <sys/socket.h>
72 #include <sys/select.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
81 #include <sys/param.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
96 #include <linux/falloc.h>
98 #include <arpa/inet.h>
101 #ifdef HAVE_SYS_DIR_H
104 #ifdef HAVE_SYS_DIRENT_H
105 #include <sys/dirent.h>
112 #include <inttypes.h>
120 #include <semaphore.h>
122 /* used in cliserv.h, so must come first */
123 #define MY_NAME "nbd_server"
125 #include "nbd-debug.h"
126 #include "netdb-compat.h"
128 #include "treefiles.h"
131 #include <sdp_inet.h>
134 #if HAVE_FSCTL_SET_ZERO_DATA
136 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
140 #include <winioctl.h>
143 /** Default position of the config file */
145 #define SYSCONFDIR "/etc"
147 #define CFILE SYSCONFDIR "/nbd-server/config"
150 #include <gnutls/gnutls.h>
151 #include <gnutls/x509.h>
154 /** Where our config file actually is */
155 gchar
* config_file_pos
;
160 /* Whether we should avoid forking */
164 * The highest value a variable of type off_t can reach. This is a signed
165 * integer, so set all bits except for the leftmost one.
167 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
168 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
169 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
172 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
173 #define F_LIST 2 /**< Allow clients to list the exports on a server */
174 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
175 #define F_DUAL_LISTEN 8 /**< Listen on both TCP and unix socket */
176 // also accepts F_FORCEDTLS (which is 16384)
177 GHashTable
*children
;
178 char pidfname
[256]; /**< name of our PID file */
179 char default_authname
[] = SYSCONFDIR
"/nbd-server/allow"; /**< default name of allow file */
181 #define NEG_INIT (1 << 0)
182 #define NEG_OLD (1 << 1)
183 #define NEG_MODERN (1 << 2)
186 * If we want what the system really has set we'd have to read
187 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
189 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
195 /* Our thread pool */
198 /* A work package for the thread pool functions */
199 struct work_package
{
201 struct nbd_request
* req
;
203 void* data
; /**< for read requests */
206 static volatile sig_atomic_t is_sigchld_caught
; /**< Flag set by
211 static volatile sig_atomic_t is_sigterm_caught
; /**< Flag set by
216 static volatile sig_atomic_t is_sighup_caught
; /**< Flag set by SIGHUP
221 GArray
* modernsocks
; /**< Sockets for the modern handler. Not used
222 if a client was only specified on the
223 command line; only port used if
224 oldstyle is set to false (and then the
225 command-line client isn't used, gna gna).
226 This may be more than one socket on
227 systems that don't support serving IPv4
228 and IPv6 from the same socket (like,
230 GArray
* childsocks
; /**< parent-side sockets for communication with children */
231 int commsocket
; /**< child-side socket for communication with parent */
232 static sem_t file_wait_sem
;
234 bool logged_oversized
=false; /**< whether we logged oversized requests already */
237 * Type of configuration file values
240 PARAM_INT
, /**< This parameter is an integer */
241 PARAM_INT64
, /**< This parameter is an integer */
242 PARAM_STRING
, /**< This parameter is a string */
243 PARAM_BOOL
, /**< This parameter is a boolean */
247 * Configuration file values
250 gchar
*paramname
; /**< Name of the parameter, as it appears in
252 gboolean required
; /**< Whether this is a required (as opposed to
253 optional) parameter */
254 PARAM_TYPE ptype
; /**< Type of the parameter. */
255 gpointer target
; /**< Pointer to where the data of this
256 parameter should be written. If ptype is
257 PARAM_BOOL, the data is or'ed rather than
259 gint flagval
; /**< Flag mask for this parameter in case ptype
264 * Configuration file values of the "generic" section
266 struct generic_conf
{
267 gchar
*user
; /**< user we run the server as */
268 gchar
*group
; /**< group we run running as */
269 gchar
*modernaddr
; /**< address of the modern socket */
270 gchar
*modernport
; /**< port of the modern socket */
271 gchar
*unixsock
; /**< file name of the unix domain socket */
272 gchar
*certfile
; /**< certificate file */
273 gchar
*keyfile
; /**< key file */
274 gchar
*cacertfile
; /**< CA certificate file */
275 gchar
*tlsprio
; /**< TLS priority string */
276 gint flags
; /**< global flags */
277 gint threads
; /**< maximum number of parallel threads we want to run */
281 * Translate a command name into human readable form
283 * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
284 * @return pointer to the command name
286 static inline const char * getcommandname(uint64_t command
) {
289 return "NBD_CMD_READ";
291 return "NBD_CMD_WRITE";
293 return "NBD_CMD_DISC";
295 return "NBD_CMD_FLUSH";
297 return "NBD_CMD_TRIM";
298 case NBD_CMD_WRITE_ZEROES
:
299 return "NBD_CMD_WRITE_ZEROES";
306 static int writeit_tls(gnutls_session_t s
, void *buf
, size_t len
) {
311 if ((res
= gnutls_record_send(s
, buf
, len
)) < 0 && !gnutls_error_is_fatal(res
)) {
312 m
= g_strdup_printf("issue while sending data: %s", gnutls_strerror(res
));
316 m
= g_strdup_printf("could not send data: %s", gnutls_strerror(res
));
328 static int readit_tls(gnutls_session_t s
, void *buf
, size_t len
) {
333 if((res
= gnutls_record_recv(s
, buf
, len
)) < 0 && !gnutls_error_is_fatal(res
)) {
334 m
= g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res
));
338 m
= g_strdup_printf("could not receive data: %s", gnutls_strerror(res
));
350 static int socket_read_tls(CLIENT
* client
, void *buf
, size_t len
) {
351 return readit_tls(*((gnutls_session_t
*)client
->tls_session
), buf
, len
);
354 static int socket_write_tls(CLIENT
* client
, void *buf
, size_t len
) {
355 return writeit_tls(*((gnutls_session_t
*)client
->tls_session
), buf
, len
);
357 #endif // HAVE_GNUTLS
359 static int socket_read_notls(CLIENT
* client
, void *buf
, size_t len
) {
360 return readit(client
->net
, buf
, len
);
363 static int socket_write_notls(CLIENT
* client
, void *buf
, size_t len
) {
364 return writeit(client
->net
, buf
, len
);
367 static void socket_read(CLIENT
* client
, void *buf
, size_t len
) {
368 g_assert(client
->socket_read
!= NULL
);
369 if(client
->socket_read(client
, buf
, len
)<0) {
370 g_assert(client
->socket_closed
!= NULL
);
371 client
->socket_closed(client
);
376 * Consume data from a socket that we don't want
378 * @param c the client to read from
379 * @param len the number of bytes to consume
380 * @param buf a buffer
381 * @param bufsiz the size of the buffer
383 static inline void consume(CLIENT
* c
, size_t len
, void * buf
, size_t bufsiz
) {
386 curlen
= (len
>bufsiz
)?bufsiz
:len
;
387 socket_read(c
, buf
, curlen
);
393 * Consume a length field and corresponding payload that we don't want
395 * @param c the client to read from
397 static inline void consume_len(CLIENT
* c
) {
401 socket_read(c
, &len
, sizeof(len
));
403 consume(c
, len
, buf
, sizeof(buf
));
406 static void socket_write(CLIENT
* client
, void *buf
, size_t len
) {
407 g_assert(client
->socket_write
!= NULL
);
408 if(client
->socket_write(client
, buf
, len
)<0) {
409 g_assert(client
->socket_closed
!= NULL
);
410 client
->socket_closed(client
);
414 static inline void socket_closed_negotiate(CLIENT
* client
) {
415 err("Negotiation failed: %m");
419 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
422 * @param command the command to be ran. Read from the config file
423 * @param file the file name we're about to export
425 int do_run(gchar
* command
, gchar
* file
) {
429 if(command
&& *command
) {
430 cmd
= g_strdup_printf(command
, file
);
437 static inline void finalize_client(CLIENT
* client
) {
438 g_thread_pool_free(tpool
, FALSE
, TRUE
);
439 do_run(client
->server
->postrun
, client
->exportname
);
440 if(client
->transactionlogfd
!= -1) {
441 close(client
->transactionlogfd
);
442 client
->transactionlogfd
= -1;
444 if(client
->server
->flags
& F_COPYONWRITE
) {
445 unlink(client
->difffilename
);
449 static inline void socket_closed_transmission(CLIENT
* client
) {
450 int saved_errno
= errno
;
451 finalize_client(client
);
453 err("Connection dropped: %m");
458 * Splice data between a pipe and a file descriptor
460 * @param fd_in The fd to splice from.
461 * @param off_in The fd_in offset to splice from.
462 * @param fd_out The fd to splice to.
463 * @param off_out The fd_out offset to splice to.
464 * @param len The length to splice.
466 static inline void spliceit(int fd_in
, loff_t
*off_in
, int fd_out
,
467 loff_t
*off_out
, size_t len
)
471 if ((ret
= splice(fd_in
, off_in
, fd_out
, off_out
, len
,
472 SPLICE_F_MOVE
)) <= 0)
473 err("Splice failed: %m");
480 * Print out a message about how to use nbd-server. Split out to a separate
481 * function so that we can call it from multiple places
484 printf("This is nbd-server version " VERSION
"\n");
485 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
486 "\t-r|--read-only\t\tread only\n"
487 "\t-m|--multi-file\t\tmultiple file\n"
488 "\t-c|--copy-on-write\tcopy on write\n"
489 "\t-C|--config-file\tspecify an alternate configuration file\n"
490 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
491 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
492 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
493 "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
494 "\t-V|--version\toutput the version and exit\n\n"
495 "\tif port is set to 0, stdin is used (for running from inetd).\n"
496 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
497 "\t\taddress of the machine trying to connect\n"
498 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
499 printf("Using configuration file %s\n", CFILE
);
500 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT
);
503 /* Dumps a config file section of the given SERVER*, and exits. */
504 void dump_section(SERVER
* serve
, gchar
* section_header
) {
505 printf("[%s]\n", section_header
);
506 printf("\texportname = %s\n", serve
->exportname
);
507 printf("\tlistenaddr = %s\n", serve
->listenaddr
);
508 if(serve
->flags
& F_READONLY
) {
509 printf("\treadonly = true\n");
511 if(serve
->flags
& F_MULTIFILE
) {
512 printf("\tmultifile = true\n");
514 if(serve
->flags
& F_TREEFILES
) {
515 printf("\ttreefiles = true\n");
517 if(serve
->flags
& F_COPYONWRITE
) {
518 printf("\tcopyonwrite = true\n");
520 if(serve
->expected_size
) {
521 printf("\tfilesize = %lld\n", (long long int)serve
->expected_size
);
523 if(serve
->authname
) {
524 printf("\tauthfile = %s\n", serve
->authname
);
530 * Parse the command line.
532 * @param argc the argc argument to main()
533 * @param argv the argv argument to main()
535 SERVER
* cmdline(int argc
, char *argv
[], struct generic_conf
*genconf
) {
539 struct option long_options
[] = {
540 {"read-only", no_argument
, NULL
, 'r'},
541 {"multi-file", no_argument
, NULL
, 'm'},
542 {"copy-on-write", no_argument
, NULL
, 'c'},
543 {"dont-fork", no_argument
, NULL
, 'd'},
544 {"authorize-file", required_argument
, NULL
, 'l'},
545 {"config-file", required_argument
, NULL
, 'C'},
546 {"pid-file", required_argument
, NULL
, 'p'},
547 {"output-config", required_argument
, NULL
, 'o'},
548 {"max-connection", required_argument
, NULL
, 'M'},
549 {"version", no_argument
, NULL
, 'V'},
556 bool do_output
=false;
557 gchar
* section_header
="";
563 serve
=g_new0(SERVER
, 1);
564 serve
->authname
= g_strdup(default_authname
);
565 serve
->virtstyle
=VIRT_IPLIT
;
566 while((c
=getopt_long(argc
, argv
, "-C:cwdl:mo:rp:M:V", long_options
, &i
))>=0) {
569 /* non-option argument */
570 switch(nonspecial
++) {
572 if(strchr(optarg
, ':') == strrchr(optarg
, ':')) {
573 addr_port
=g_strsplit(optarg
, ":", 2);
575 /* Check for "@" - maybe user using this separator
578 g_strfreev(addr_port
);
579 addr_port
=g_strsplit(optarg
, "@", 2);
582 addr_port
=g_strsplit(optarg
, "@", 2);
586 genconf
->modernport
=g_strdup(addr_port
[1]);
587 genconf
->modernaddr
=g_strdup(addr_port
[0]);
589 g_free(genconf
->modernaddr
);
590 genconf
->modernaddr
=NULL
;
591 genconf
->modernport
=g_strdup(addr_port
[0]);
593 g_strfreev(addr_port
);
596 serve
->exportname
= g_strdup(optarg
);
597 if(serve
->exportname
[0] != '/') {
598 fprintf(stderr
, "E: The to be exported file needs to be an absolute filename!\n");
603 last
=strlen(optarg
)-1;
605 if (suffix
== 'k' || suffix
== 'K' ||
606 suffix
== 'm' || suffix
== 'M')
608 es
= (off_t
)atoll(optarg
);
616 serve
->expected_size
= es
;
621 serve
->flags
|= F_READONLY
;
624 serve
->flags
|= F_MULTIFILE
;
628 section_header
= g_strdup(optarg
);
631 strncpy(pidfname
, optarg
, 256);
635 serve
->flags
|=F_COPYONWRITE
;
641 g_free(config_file_pos
);
642 config_file_pos
=g_strdup(optarg
);
645 g_free(serve
->authname
);
646 serve
->authname
=g_strdup(optarg
);
649 serve
->max_connections
= strtol(optarg
, NULL
, 0);
652 printf("This is nbd-server version " VERSION
"\n");
661 /* What's left: the port to export, the name of the to be exported
662 * file, and, optionally, the size of the file, in that order. */
667 serve
->servename
= "";
671 g_critical("Need a complete configuration on the command line to output a config file section!");
674 dump_section(serve
, section_header
);
679 /* forward definition of parse_cfile */
680 GArray
* parse_cfile(gchar
* f
, struct generic_conf
*genconf
, bool expect_generic
, GError
** e
);
682 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
683 #define NBD_D_TYPE de->d_type
691 * Parse config file snippets in a directory. Uses readdir() and friends
692 * to find files and open them, then passes them on to parse_cfile
693 * with have_global set false
695 GArray
* do_cfile_dir(gchar
* dir
, struct generic_conf
*const genconf
, GError
** e
) {
696 DIR* dirh
= opendir(dir
);
699 GArray
* retval
= NULL
;
704 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_DIR_UNKNOWN
, "Invalid directory specified: %s", strerror(errno
));
708 while((de
= readdir(dirh
))) {
709 int saved_errno
=errno
;
710 fname
= g_build_filename(dir
, de
->d_name
, NULL
);
713 /* Filesystem doesn't return type of
714 * file through readdir. Run stat() on
715 * the file instead */
716 if(stat(fname
, &stbuf
)) {
720 if (!S_ISREG(stbuf
.st_mode
)) {
724 /* Skip unless the name ends with '.conf' */
725 if(strcmp((de
->d_name
+ strlen(de
->d_name
) - 5), ".conf")) {
728 tmp
= parse_cfile(fname
, genconf
, false, e
);
734 retval
= g_array_new(FALSE
, TRUE
, sizeof(SERVER
));
735 retval
= g_array_append_vals(retval
, tmp
->data
, tmp
->len
);
736 g_array_free(tmp
, TRUE
);
744 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_READDIR_ERR
, "Error trying to read directory: %s", strerror(errno
));
747 g_array_free(retval
, TRUE
);
756 * Parse the config file.
758 * @param f the name of the config file
760 * @param genconf a pointer to generic configuration which will get
761 * updated with parsed values. If NULL, then parsed generic
762 * configuration values are safely and silently discarded.
764 * @param e a GError. Error code can be any of the following:
765 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
766 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
767 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
769 * @param expect_generic if true, we expect a configuration file that
770 * contains a [generic] section. If false, we don't.
772 * @return a GArray of SERVER* pointers. If the config file is empty or does not
773 * exist, returns an empty GArray; if the config file contains an
774 * error, returns NULL, and e is set appropriately
776 GArray
* parse_cfile(gchar
* f
, struct generic_conf
*const genconf
, bool expect_generic
, GError
** e
) {
777 const char* DEFAULT_ERROR
= "Could not parse %s in group %s: %s";
778 const char* MISSING_REQUIRED_ERROR
= "Could not find required value %s in group %s: %s";
781 gchar
*virtstyle
=NULL
;
783 { "exportname", TRUE
, PARAM_STRING
, &(s
.exportname
), 0 },
784 { "authfile", FALSE
, PARAM_STRING
, &(s
.authname
), 0 },
785 { "filesize", FALSE
, PARAM_OFFT
, &(s
.expected_size
), 0 },
786 { "virtstyle", FALSE
, PARAM_STRING
, &(virtstyle
), 0 },
787 { "prerun", FALSE
, PARAM_STRING
, &(s
.prerun
), 0 },
788 { "postrun", FALSE
, PARAM_STRING
, &(s
.postrun
), 0 },
789 { "transactionlog", FALSE
, PARAM_STRING
, &(s
.transactionlog
), 0 },
790 { "cowdir", FALSE
, PARAM_STRING
, &(s
.cowdir
), 0 },
791 { "readonly", FALSE
, PARAM_BOOL
, &(s
.flags
), F_READONLY
},
792 { "multifile", FALSE
, PARAM_BOOL
, &(s
.flags
), F_MULTIFILE
},
793 { "treefiles", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TREEFILES
},
794 { "copyonwrite", FALSE
, PARAM_BOOL
, &(s
.flags
), F_COPYONWRITE
},
795 { "waitfile", FALSE
, PARAM_BOOL
, &(s
.flags
), F_WAIT
},
796 { "sparse_cow", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SPARSE
},
797 { "sdp", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SDP
},
798 { "sync", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SYNC
},
799 { "flush", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FLUSH
},
800 { "fua", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FUA
},
801 { "rotational", FALSE
, PARAM_BOOL
, &(s
.flags
), F_ROTATIONAL
},
802 { "temporary", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TEMPORARY
},
803 { "trim", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TRIM
},
804 { "listenaddr", FALSE
, PARAM_STRING
, &(s
.listenaddr
), 0 },
805 { "maxconnections", FALSE
, PARAM_INT
, &(s
.max_connections
), 0 },
806 { "force_tls", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FORCEDTLS
},
807 { "splice", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SPLICE
},
809 const int lp_size
=sizeof(lp
)/sizeof(PARAM
);
810 struct generic_conf genconftmp
;
812 { "user", FALSE
, PARAM_STRING
, &(genconftmp
.user
), 0 },
813 { "group", FALSE
, PARAM_STRING
, &(genconftmp
.group
), 0 },
814 { "oldstyle", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_OLDSTYLE
}, // only left here so we can issue an appropriate error message when the option is used
815 { "listenaddr", FALSE
, PARAM_STRING
, &(genconftmp
.modernaddr
), 0 },
816 { "port", FALSE
, PARAM_STRING
, &(genconftmp
.modernport
), 0 },
817 { "includedir", FALSE
, PARAM_STRING
, &cfdir
, 0 },
818 { "allowlist", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_LIST
},
819 { "unixsock", FALSE
, PARAM_STRING
, &(genconftmp
.unixsock
), 0 },
820 { "duallisten", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_DUAL_LISTEN
}, // Used to listen on both TCP and unix socket
821 { "max_threads", FALSE
, PARAM_INT
, &(genconftmp
.threads
), 0 },
822 { "force_tls", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_FORCEDTLS
},
823 { "certfile", FALSE
, PARAM_STRING
, &(genconftmp
.certfile
), 0 },
824 { "keyfile", FALSE
, PARAM_STRING
, &(genconftmp
.keyfile
), 0 },
825 { "cacertfile", FALSE
, PARAM_STRING
, &(genconftmp
.cacertfile
), 0 },
826 { "tlsprio", FALSE
, PARAM_STRING
, &(genconftmp
.tlsprio
), 0 },
829 int p_size
=sizeof(gp
)/sizeof(PARAM
);
832 const char *err_msg
=NULL
;
843 memset(&genconftmp
, 0, sizeof(struct generic_conf
));
845 genconftmp
.tlsprio
= "NORMAL:-VERS-TLS-ALL:+VERS-TLS1.2:%SERVER_PRECEDENCE";
848 /* Use the passed configuration values as defaults. The
849 * parsing algorithm below updates all parameter targets
850 * found from configuration files. */
851 memcpy(&genconftmp
, genconf
, sizeof(struct generic_conf
));
854 cfile
= g_key_file_new();
855 retval
= g_array_new(FALSE
, TRUE
, sizeof(SERVER
));
856 if(!g_key_file_load_from_file(cfile
, f
, G_KEY_FILE_KEEP_COMMENTS
|
857 G_KEY_FILE_KEEP_TRANSLATIONS
, &err
)) {
858 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_NOTFOUND
, "Could not open config file %s: %s",
860 g_key_file_free(cfile
);
863 startgroup
= g_key_file_get_start_group(cfile
);
864 if((!startgroup
|| strcmp(startgroup
, "generic")) && expect_generic
) {
865 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_MISSING_GENERIC
, "Config file does not contain the [generic] group!");
866 g_key_file_free(cfile
);
869 groups
= g_key_file_get_groups(cfile
, NULL
);
870 for(i
=0;groups
[i
];i
++) {
871 memset(&s
, '\0', sizeof(SERVER
));
873 /* After the [generic] group or when we're parsing an include
874 * directory, start parsing exports */
875 if(i
==1 || !expect_generic
) {
879 for(j
=0;j
<p_size
;j
++) {
880 assert(p
[j
].target
!= NULL
);
881 assert(p
[j
].ptype
==PARAM_INT
||p
[j
].ptype
==PARAM_STRING
||p
[j
].ptype
==PARAM_BOOL
||p
[j
].ptype
==PARAM_INT64
);
884 ival
= g_key_file_get_integer(cfile
,
889 *((gint
*)p
[j
].target
) = ival
;
893 i64val
= g_key_file_get_int64(cfile
,
898 *((gint64
*)p
[j
].target
) = i64val
;
902 sval
= g_key_file_get_string(cfile
,
907 *((gchar
**)p
[j
].target
) = sval
;
911 bval
= g_key_file_get_boolean(cfile
,
913 p
[j
].paramname
, &err
);
916 *((gint
*)p
[j
].target
) |= p
[j
].flagval
;
918 *((gint
*)p
[j
].target
) &= ~(p
[j
].flagval
);
924 if(err
->code
== G_KEY_FILE_ERROR_KEY_NOT_FOUND
) {
926 /* Ignore not-found error for optional values */
930 err_msg
= MISSING_REQUIRED_ERROR
;
933 err_msg
= DEFAULT_ERROR
;
935 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, err_msg
, p
[j
].paramname
, groups
[i
], err
->message
);
936 g_array_free(retval
, TRUE
);
938 g_key_file_free(cfile
);
943 if(!strncmp(virtstyle
, "none", 4)) {
944 s
.virtstyle
=VIRT_NONE
;
945 } else if(!strncmp(virtstyle
, "ipliteral", 9)) {
946 s
.virtstyle
=VIRT_IPLIT
;
947 } else if(!strncmp(virtstyle
, "iphash", 6)) {
948 s
.virtstyle
=VIRT_IPHASH
;
949 } else if(!strncmp(virtstyle
, "cidrhash", 8)) {
950 s
.virtstyle
=VIRT_CIDR
;
951 if(strlen(virtstyle
)<10) {
952 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle
, groups
[i
]);
953 g_array_free(retval
, TRUE
);
954 g_key_file_free(cfile
);
957 s
.cidrlen
=strtol(virtstyle
+8, NULL
, 0);
959 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, "Invalid value %s for parameter virtstyle in group %s", virtstyle
, groups
[i
]);
960 g_array_free(retval
, TRUE
);
961 g_key_file_free(cfile
);
965 s
.virtstyle
=VIRT_IPLIT
;
967 if(genconftmp
.flags
& F_OLDSTYLE
) {
968 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
969 g_message("Exiting.");
973 if (s
.flags
& F_SPLICE
) {
974 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
, "This nbd-server was built without splice support, yet group %s uses it", groups
[i
]);
975 g_array_free(retval
, TRUE
);
976 g_key_file_free(cfile
);
980 /* We can't mix copyonwrite and splice. */
981 if ((s
.flags
& F_COPYONWRITE
) && (s
.flags
& F_SPLICE
)) {
982 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_INVALID_SPLICE
,
983 "Cannot mix copyonwrite with splice for an export in group %s",
985 g_array_free(retval
, TRUE
);
986 g_key_file_free(cfile
);
989 if ((s
.flags
& F_COPYONWRITE
) && (s
.flags
& F_WAIT
)) {
990 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_INVALID_WAIT
,
991 "Cannot mix copyonwrite with waitfile for an export in group %s",
993 g_array_free(retval
, TRUE
);
994 g_key_file_free(cfile
);
997 /* Don't need to free this, it's not our string */
999 /* Don't append values for the [generic] group */
1000 if(i
>0 || !expect_generic
) {
1001 s
.servename
= groups
[i
];
1003 g_array_append_val(retval
, s
);
1006 if(s
.flags
& F_SDP
) {
1007 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
, "This nbd-server was built without support for SDP, yet group %s uses it", groups
[i
]);
1008 g_array_free(retval
, TRUE
);
1009 g_key_file_free(cfile
);
1014 g_key_file_free(cfile
);
1016 GArray
* extra
= do_cfile_dir(cfdir
, &genconftmp
, e
);
1018 retval
= g_array_append_vals(retval
, extra
->data
, extra
->len
);
1020 g_array_free(extra
, TRUE
);
1023 g_array_free(retval
, TRUE
);
1028 if(i
==1 && expect_generic
) {
1029 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_NO_EXPORTS
, "The config file does not specify any exports");
1033 /* Return the updated generic configuration through the
1034 * pointer parameter. */
1035 memcpy(genconf
, &genconftmp
, sizeof(struct generic_conf
));
1042 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1043 * main loop of the root server process. This allows us to separate the signal
1044 * catching from th actual task triggered by SIGCHLD and hence processing in the
1045 * interrupt context is kept as minimial as possible.
1047 * @param s the signal we're handling (must be SIGCHLD, or something
1048 * is severely wrong)
1050 static void sigchld_handler(const int s G_GNUC_UNUSED
) {
1051 is_sigchld_caught
= 1;
1055 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1057 * @param key the key
1058 * @param value the value corresponding to the above key
1059 * @param user_data a pointer which we always set to 1, so that we know what
1062 void killchild(gpointer key
, gpointer value
, gpointer user_data
) {
1065 kill(*pid
, SIGTERM
);
1069 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1070 * main loop of the root server process. This allows us to separate the signal
1071 * catching from th actual task triggered by SIGTERM and hence processing in the
1072 * interrupt context is kept as minimial as possible.
1074 * @param s the signal we're handling (must be SIGTERM, or something
1075 * is severely wrong).
1077 static void sigterm_handler(const int s G_GNUC_UNUSED
) {
1078 is_sigterm_caught
= 1;
1082 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1083 * the main loop of the root server process. This allows us to separate
1084 * the signal catching from th actual task triggered by SIGHUP and hence
1085 * processing in the interrupt context is kept as minimial as possible.
1087 * @param s the signal we're handling (must be SIGHUP, or something
1088 * is severely wrong).
1090 static void sighup_handler(const int s G_GNUC_UNUSED
) {
1091 is_sighup_caught
= 1;
1094 static void sigusr1_handler(const int s G_GNUC_UNUSED
) {
1095 msg(LOG_INFO
, "Got SIGUSR1");
1096 sem_post(&file_wait_sem
);
1100 * Get the file handle and offset, given an export offset.
1102 * @param client The client we're serving for
1103 * @param a The offset to get corresponding file/offset for
1104 * @param fhandle [out] File descriptor
1105 * @param foffset [out] Offset into fhandle
1106 * @param maxbytes [out] Tells how many bytes can be read/written
1107 * from fhandle starting at foffset (0 if there is no limit)
1108 * @return 0 on success, -1 on failure
1110 int get_filepos(CLIENT
*client
, off_t a
, int* fhandle
, off_t
* foffset
, size_t* maxbytes
) {
1111 GArray
* const export
= client
->export
;
1113 /* Negative offset not allowed */
1117 /* Open separate file for treefiles */
1118 if (client
->server
->flags
& F_TREEFILES
) {
1119 *foffset
= a
% TREEPAGESIZE
;
1120 *maxbytes
= (( 1 + (a
/TREEPAGESIZE
) ) * TREEPAGESIZE
) - a
; // start position of next block
1121 *fhandle
= open_treefile(client
->exportname
, ((client
->server
->flags
& F_READONLY
) ? O_RDONLY
: O_RDWR
), client
->exportsize
,a
, &client
->lock
);
1125 /* Binary search for last file with starting offset <= a */
1128 int end
= export
->len
- 1;
1129 while( start
<= end
) {
1130 int mid
= (start
+ end
) / 2;
1131 fi
= g_array_index(export
, FILE_INFO
, mid
);
1132 if( fi
.startoff
< a
) {
1134 } else if( fi
.startoff
> a
) {
1142 /* end should never go negative, since first startoff is 0 and a >= 0 */
1145 fi
= g_array_index(export
, FILE_INFO
, end
);
1146 *fhandle
= fi
.fhandle
;
1147 *foffset
= a
- fi
.startoff
;
1149 if( end
+1 < export
->len
) {
1150 FILE_INFO fi_next
= g_array_index(export
, FILE_INFO
, end
+1);
1151 *maxbytes
= fi_next
.startoff
- a
;
1158 * Write an amount of bytes at a given offset to the right file. This
1159 * abstracts the write-side of the multiple file option.
1161 * @param a The offset where the write should start
1162 * @param buf The buffer to write from
1163 * @param len The length of buf
1164 * @param client The client we're serving for
1165 * @param fua Flag to indicate 'Force Unit Access'
1166 * @return The number of bytes actually written, or -1 in case of an error
1168 ssize_t
rawexpwrite(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1174 if(get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1176 if(maxbytes
&& len
> maxbytes
)
1179 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle
, (long long unsigned)foffset
, (unsigned int)len
, fua
);
1181 retval
= pwrite(fhandle
, buf
, len
, foffset
);
1182 if(client
->server
->flags
& F_SYNC
) {
1186 /* This is where we would do the following
1187 * #ifdef USE_SYNC_FILE_RANGE
1188 * However, we don't, for the reasons set out below
1189 * by Christoph Hellwig <hch@infradead.org>
1192 * fdatasync is equivalent to fsync except that it does not flush
1193 * non-essential metadata (basically just timestamps in practice), but it
1194 * does flush metadata requried to find the data again, e.g. allocation
1195 * information and extent maps. sync_file_range does nothing but flush
1196 * out pagecache content - it means you basically won't get your data
1197 * back in case of a crash if you either:
1199 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1200 * b) are using a sparse file on a filesystem
1201 * c) are using a fallocate-preallocated file on a filesystem
1202 * d) use any file on a COW filesystem like btrfs
1204 * e.g. it only does anything useful for you if you do not have a volatile
1205 * write cache, and either use a raw block device node, or just overwrite
1206 * an already fully allocated (and not preallocated) file on a non-COW
1210 * What we should do is open a second FD with O_DSYNC set, then write to
1211 * that when appropriate. However, with a Linux client, every REQ_FUA
1212 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1217 sync_file_range(fhandle
, foffset
, len
,
1218 SYNC_FILE_RANGE_WAIT_BEFORE
| SYNC_FILE_RANGE_WRITE
|
1219 SYNC_FILE_RANGE_WAIT_AFTER
);
1224 /* close file pointer in case of treefiles */
1225 if (client
->server
->flags
& F_TREEFILES
) {
1232 * Call rawexpwrite repeatedly until all data has been written.
1234 * @param a The offset where the write should start
1235 * @param buf The buffer to write from
1236 * @param len The length of buf
1237 * @param client The client we're serving for
1238 * @param fua Flag to indicate 'Force Unit Access'
1239 * @return 0 on success, nonzero on failure
1241 int rawexpwrite_fully(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1244 while(len
> 0 && (ret
=rawexpwrite(a
, buf
, len
, client
, fua
)) > 0 ) {
1249 return (ret
< 0 || len
!= 0);
1253 * Read an amount of bytes at a given offset from the right file. This
1254 * abstracts the read-side of the multiple files option.
1256 * @param a The offset where the read should start
1257 * @param buf A buffer to read into
1258 * @param len The size of buf
1259 * @param client The client we're serving for
1260 * @return The number of bytes actually read, or -1 in case of an
1263 ssize_t
rawexpread(off_t a
, char *buf
, size_t len
, CLIENT
*client
) {
1269 if(get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1271 if(maxbytes
&& len
> maxbytes
)
1274 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle
, (long long unsigned int)foffset
, (unsigned int)len
);
1276 retval
= pread(fhandle
, buf
, len
, foffset
);
1277 if (client
->server
->flags
& F_TREEFILES
) {
1284 * Call rawexpread repeatedly until all data has been read.
1285 * @return 0 on success, nonzero on failure
1287 int rawexpread_fully(off_t a
, char *buf
, size_t len
, CLIENT
*client
) {
1290 while(len
> 0 && (ret
=rawexpread(a
, buf
, len
, client
)) > 0 ) {
1295 return (ret
< 0 || len
!= 0);
1299 int rawexpsplice(int pipe
, off_t a
, size_t len
, CLIENT
*client
, int dir
,
1307 if (get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1309 if (maxbytes
&& len
> maxbytes
)
1312 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1313 (dir
== SPLICE_IN
) ? "from" : "to", fhandle
,
1314 (unsigned long long)a
, (unsigned)len
);
1317 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1318 * it might, so go ahead and use it.
1320 if (dir
== SPLICE_IN
) {
1321 retval
= splice(fhandle
, &foffset
, pipe
, NULL
, len
,
1324 retval
= splice(pipe
, NULL
, fhandle
, &foffset
, len
,
1326 if (client
->server
->flags
& F_SYNC
)
1331 if (client
->server
->flags
& F_TREEFILES
)
1337 * Splice an amount of bytes from the given offset from/into the right file
1338 * from/into the given pipe.
1339 * @param pipe The pipe we are using for this splice.
1340 * @param a The offset of the file we are operating on.
1341 * @param len The length of the splice.
1342 * @param client The client we're splicing for.
1343 * @param dir The direction we are doing the splice in.
1344 * @param fua Set if this is a write and we need to fua.
1345 * @return 0 on success, nonzero on failure.
1347 int expsplice(int pipe
, off_t a
, size_t len
, CLIENT
*client
, int dir
, int fua
)
1352 (ret
= rawexpsplice(pipe
, a
, len
, client
, dir
, fua
)) > 0) {
1356 return (ret
< 0 || len
!= 0);
1358 #endif /* HAVE_SPLICE */
1361 * Read an amount of bytes at a given offset from the right file. This
1362 * abstracts the read-side of the copyonwrite stuff, and calls
1363 * rawexpread() with the right parameters to do the actual work.
1364 * @param a The offset where the read should start
1365 * @param buf A buffer to read into
1366 * @param len The size of buf
1367 * @param client The client we're going to read for
1368 * @return 0 on success, nonzero on failure
1370 int expread(off_t a
, char *buf
, size_t len
, CLIENT
*client
) {
1371 off_t rdlen
, offset
;
1372 off_t mapcnt
, mapl
, maph
, pagestart
;
1374 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len
, (unsigned long long)a
);
1376 if (!(client
->server
->flags
& F_COPYONWRITE
) && !((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)))
1377 return(rawexpread_fully(a
, buf
, len
, client
));
1379 mapl
=a
/DIFFPAGESIZE
; maph
=(a
+len
-1)/DIFFPAGESIZE
;
1381 for (mapcnt
=mapl
;mapcnt
<=maph
;mapcnt
++) {
1382 pagestart
=mapcnt
*DIFFPAGESIZE
;
1384 rdlen
=(0<DIFFPAGESIZE
-offset
&& len
<(size_t)(DIFFPAGESIZE
-offset
)) ?
1385 len
: (size_t)DIFFPAGESIZE
-offset
;
1386 if (!(client
->server
->flags
& F_COPYONWRITE
))
1387 pthread_rwlock_rdlock(&client
->export_lock
);
1388 if (client
->difmap
[mapcnt
]!=(u32
)(-1)) { /* the block is already there */
1389 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt
,
1390 (unsigned long)(client
->difmap
[mapcnt
]));
1391 if (pread(client
->difffile
, buf
, rdlen
, client
->difmap
[mapcnt
]*DIFFPAGESIZE
+offset
) != rdlen
) goto fail
;
1392 } else { /* the block is not there */
1393 if ((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)){
1394 DEBUG("Page %llu is not here, and waiting for file\n",
1395 (unsigned long long)mapcnt
);
1398 DEBUG("Page %llu is not here, we read the original one\n",
1399 (unsigned long long)mapcnt
);
1400 if(rawexpread_fully(a
, buf
, rdlen
, client
)) goto fail
;
1403 if (!(client
->server
->flags
& F_COPYONWRITE
))
1404 pthread_rwlock_unlock(&client
->export_lock
);
1405 len
-=rdlen
; a
+=rdlen
; buf
+=rdlen
;
1409 if (!(client
->server
->flags
& F_COPYONWRITE
))
1410 pthread_rwlock_unlock(&client
->export_lock
);
1415 * Write an amount of bytes at a given offset to the right file. This
1416 * abstracts the write-side of the copyonwrite option, and calls
1417 * rawexpwrite() with the right parameters to do the actual work.
1419 * @param a The offset where the write should start
1420 * @param buf The buffer to write from
1421 * @param len The length of buf
1422 * @param client The client we're going to write for.
1423 * @param fua Flag to indicate 'Force Unit Access'
1424 * @return 0 on success, nonzero on failure
1426 int expwrite(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1427 char pagebuf
[DIFFPAGESIZE
];
1428 off_t mapcnt
,mapl
,maph
;
1433 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len
, (unsigned long long)a
);
1436 if (!(client
->server
->flags
& F_COPYONWRITE
) && !((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)))
1437 return(rawexpwrite_fully(a
, buf
, len
, client
, fua
));
1439 mapl
=a
/DIFFPAGESIZE
; maph
=(a
+len
-1)/DIFFPAGESIZE
;
1441 for (mapcnt
=mapl
;mapcnt
<=maph
;mapcnt
++) {
1442 pagestart
=mapcnt
*DIFFPAGESIZE
;
1443 offset
=a
-pagestart
;
1444 wrlen
=(0<DIFFPAGESIZE
-offset
&& len
<(size_t)(DIFFPAGESIZE
-offset
)) ?
1445 len
: (size_t)DIFFPAGESIZE
-offset
;
1447 if (!(client
->server
->flags
& F_COPYONWRITE
))
1448 pthread_rwlock_rdlock(&client
->export_lock
);
1449 if (client
->difmap
[mapcnt
]!=(u32
)(-1)) { /* the block is already there */
1450 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt
,
1451 (unsigned long)(client
->difmap
[mapcnt
])) ;
1452 if (pwrite(client
->difffile
, buf
, wrlen
, client
->difmap
[mapcnt
]*DIFFPAGESIZE
+offset
) != wrlen
) goto fail
;
1453 } else { /* the block is not there */
1454 client
->difmap
[mapcnt
]=(client
->server
->flags
&F_SPARSE
)?mapcnt
:client
->difffilelen
++;
1455 DEBUG("Page %llu is not here, we put it at %lu\n",
1456 (unsigned long long)mapcnt
,
1457 (unsigned long)(client
->difmap
[mapcnt
]));
1458 if ((offset
!= 0) || (wrlen
!= DIFFPAGESIZE
)){
1459 if ((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)){
1460 DEBUG("error: we can write only whole page while waiting for file\n");
1463 rdlen
=DIFFPAGESIZE
;
1464 if (rawexpread_fully(pagestart
, pagebuf
, rdlen
, client
))
1467 memcpy(pagebuf
+offset
,buf
,wrlen
) ;
1468 if (write(client
->difffile
, pagebuf
, DIFFPAGESIZE
) != DIFFPAGESIZE
)
1471 if (!(client
->server
->flags
& F_COPYONWRITE
))
1472 pthread_rwlock_unlock(&client
->export_lock
);
1473 len
-=wrlen
; a
+=wrlen
; buf
+=wrlen
;
1475 if (client
->server
->flags
& F_SYNC
) {
1476 fsync(client
->difffile
);
1478 /* open question: would it be cheaper to do multiple sync_file_ranges?
1479 as we iterate through the above?
1481 fdatasync(client
->difffile
);
1485 if (!(client
->server
->flags
& F_COPYONWRITE
))
1486 pthread_rwlock_unlock(&client
->export_lock
);
1493 * Write an amount of zeroes at a given offset to the right file.
1494 * This routine could be optimised by not calling expwrite. However,
1495 * this is by far the simplest way to do it.
1497 * @param req the request
1498 * @param client The client we're going to write for.
1499 * @return 0 on success, nonzero on failure
1501 int expwrite_zeroes(struct nbd_request
* req
, CLIENT
* client
, int fua
) {
1502 off_t a
= req
->from
;
1503 size_t len
= req
->len
;
1504 size_t maxsize
= 64LL*1024LL*1024LL;
1505 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1506 char *buf
= calloc (1, maxsize
);
1512 ret
= expwrite(a
, buf
, l
, client
, fua
);
1524 * Flush data to a client
1526 * @param client The client we're going to write for.
1527 * @return 0 on success, nonzero on failure
1529 int expflush(CLIENT
*client
) {
1532 if (client
->server
->flags
& F_COPYONWRITE
) {
1533 return fsync(client
->difffile
);
1536 if (client
->server
->flags
& F_WAIT
) {
1537 return fsync(client
->difffile
);
1540 if (client
->server
->flags
& F_TREEFILES
) {
1541 // all we can do is force sync the entire filesystem containing the tree
1542 if (client
->server
->flags
& F_READONLY
)
1548 for (i
= 0; i
< client
->export
->len
; i
++) {
1549 FILE_INFO fi
= g_array_index(client
->export
, FILE_INFO
, i
);
1550 if (fsync(fi
.fhandle
) < 0)
1557 void punch_hole(int fd
, off_t off
, off_t len
) {
1558 DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd
, (unsigned long long)off
, (unsigned long long)len
);
1560 fallocate(fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
, off
, len
);
1561 #elif HAVE_FSCTL_SET_ZERO_DATA
1562 FILE_ZERO_DATA_INFORMATION zerodata
;
1563 zerodata
.FileOffset
.QuadPart
= off
;
1564 zerodata
.BeyondFinalZero
.QuadPart
= off
+ len
;
1565 HANDLE w32handle
= (HANDLE
)_get_osfhandle(fd
);
1567 DeviceIoControl(w32handle
, FSCTL_SET_ZERO_DATA
, &zerodata
, sizeof(zerodata
), NULL
, 0, &bytesret
, NULL
);
1569 DEBUG("punching holes not supported on this platform\n");
1573 static void send_reply(CLIENT
* client
, uint32_t opt
, uint32_t reply_type
, ssize_t datasize
, void* data
) {
1577 uint32_t reply_type
;
1579 } __attribute__ ((packed
)) header
= {
1580 htonll(0x3e889045565a9LL
),
1586 datasize
= strlen((char*)data
);
1587 header
.datasize
= htonl(datasize
);
1589 socket_write(client
, &header
, sizeof(header
));
1591 socket_write(client
, data
, datasize
);
1596 * Find the name of the file we have to serve. This will use g_strdup_printf
1597 * to put the IP address of the client inside a filename containing
1598 * "%s" (in the form as specified by the "virtstyle" option). That name
1599 * is then written to client->exportname.
1601 * @param net A socket connected to an nbd client
1602 * @param client information about the client. The IP address in human-readable
1603 * format will be written to a new char* buffer, the address of which will be
1604 * stored in client->clientname.
1605 * @return: 0 - OK, -1 - failed.
1607 int set_peername(int net
, CLIENT
*client
) {
1608 struct sockaddr_storage netaddr
;
1609 struct sockaddr
* addr
= (struct sockaddr
*)&netaddr
;
1610 socklen_t addrinlen
= sizeof( struct sockaddr_storage
);
1611 struct addrinfo hints
;
1612 struct addrinfo
*ai
= NULL
;
1613 char peername
[NI_MAXHOST
];
1614 char netname
[NI_MAXHOST
];
1619 if (getsockname(net
, addr
, &addrinlen
) < 0) {
1620 msg(LOG_INFO
, "getsockname failed: %m");
1624 if(netaddr
.ss_family
== AF_UNIX
) {
1625 client
->clientaddr
.ss_family
= AF_UNIX
;
1626 strcpy(peername
, "unix");
1628 if (getpeername(net
, (struct sockaddr
*) &(client
->clientaddr
), &addrinlen
) < 0) {
1629 msg(LOG_INFO
, "getpeername failed: %m");
1632 if((e
= getnameinfo((struct sockaddr
*)&(client
->clientaddr
), addrinlen
,
1633 peername
, sizeof (peername
), NULL
, 0, NI_NUMERICHOST
))) {
1634 msg(LOG_INFO
, "getnameinfo failed: %s", gai_strerror(e
));
1638 memset(&hints
, '\0', sizeof (hints
));
1639 hints
.ai_flags
= AI_ADDRCONFIG
;
1640 e
= getaddrinfo(peername
, NULL
, &hints
, &ai
);
1643 msg(LOG_INFO
, "getaddrinfo failed: %s", gai_strerror(e
));
1649 if(strncmp(peername
, "::ffff:", 7) == 0) {
1650 memmove(peername
, peername
+7, strlen(peername
));
1653 switch(client
->server
->virtstyle
) {
1655 msg(LOG_DEBUG
, "virtualization is off");
1656 client
->exportname
=g_strdup(client
->server
->exportname
);
1659 msg(LOG_DEBUG
, "virtstyle iphash");
1660 for(i
=0;i
<strlen(peername
);i
++) {
1661 if(peername
[i
]=='.') {
1666 msg(LOG_DEBUG
, "virtstyle ipliteral");
1667 client
->exportname
=g_strdup_printf(client
->server
->exportname
, peername
);
1670 msg(LOG_DEBUG
, "virtstyle cidr %d", client
->server
->cidrlen
);
1671 memcpy(&netaddr
, &(client
->clientaddr
), addrinlen
);
1673 if(client
->clientaddr
.ss_family
== AF_UNIX
) {
1674 tmp
= g_strdup(peername
);
1676 assert((ai
->ai_family
== AF_INET
) || (ai
->ai_family
== AF_INET6
));
1677 if(ai
->ai_family
== AF_INET
) {
1679 } else if(ai
->ai_family
== AF_INET6
) {
1682 g_assert_not_reached();
1684 uint8_t* addrptr
= (uint8_t*)(((struct sockaddr
*)&netaddr
)->sa_data
);
1685 for(int i
= 0; i
< addrbits
; i
+=8) {
1686 int masklen
= client
->server
->cidrlen
- i
;
1687 masklen
= masklen
> 0 ? masklen
: 0;
1688 uint8_t mask
= getmaskbyte(masklen
);
1692 getnameinfo((struct sockaddr
*) &netaddr
, addrinlen
,
1693 netname
, sizeof (netname
), NULL
, 0, NI_NUMERICHOST
);
1694 tmp
=g_strdup_printf("%s/%s", netname
, peername
);
1698 client
->exportname
=g_strdup_printf(client
->server
->exportname
, tmp
);
1706 msg(LOG_INFO
, "connect from %s, assigned file is %s",
1707 peername
, client
->exportname
);
1708 client
->clientname
=g_strdup(peername
);
1712 int commit_diff(CLIENT
* client
, bool lock
, int fhandle
){
1714 int pagecount
= client
->exportsize
/DIFFPAGESIZE
;
1716 char* buf
= malloc(sizeof(char)*DIFFPAGESIZE
);
1718 for (int i
=0; i
<pagecount
; i
++){
1719 offset
= DIFFPAGESIZE
*i
;
1721 pthread_rwlock_wrlock(&client
->export_lock
);
1722 if (client
->difmap
[i
] != (u32
)-1){
1724 DEBUG("flushing dirty page %d, offset %ld\n", i
, offset
);
1725 if (pread(client
->difffile
, buf
, DIFFPAGESIZE
, client
->difmap
[i
]*DIFFPAGESIZE
) != DIFFPAGESIZE
) {
1726 msg(LOG_WARNING
, "could not read while committing diff: %m");
1728 pthread_rwlock_unlock(&client
->export_lock
);
1732 if (pwrite(fhandle
, buf
, DIFFPAGESIZE
, offset
) != DIFFPAGESIZE
) {
1733 msg(LOG_WARNING
, "could not write while committing diff: %m");
1735 pthread_rwlock_unlock(&client
->export_lock
);
1739 client
->difmap
[i
] = (u32
)-1;
1742 pthread_rwlock_unlock(&client
->export_lock
);
1749 void* wait_file(void *void_ptr
) {
1750 CLIENT
* client
= (CLIENT
*)void_ptr
;
1753 mode_t mode
= O_RDWR
;
1759 while (fi
.fhandle
< 1){
1760 sem_wait(&file_wait_sem
);
1761 msg(LOG_INFO
, "checking for file %s", client
->server
->exportname
);
1762 fi
.fhandle
= open(client
->server
->exportname
, mode
);
1765 msg(LOG_INFO
, "File %s appeared, fd %d", client
->server
->exportname
, fi
.fhandle
);
1767 // first time there may be lot of data so we lock only per page
1769 dirtycount
= commit_diff(client
, true, fi
.fhandle
);
1770 } while (dirtycount
> 0);
1772 //last time we lock export for the whole time until we switch write destination
1773 pthread_rwlock_wrlock(&client
->export_lock
);
1775 dirtycount
= commit_diff(client
, false, fi
.fhandle
);
1776 } while (dirtycount
> 0);
1778 export
= g_array_new(TRUE
, TRUE
, sizeof(FILE_INFO
));
1779 g_array_append_val(export
, fi
);
1781 client
->export
= export
;
1782 pthread_rwlock_unlock(&client
->export_lock
);
1783 msg(LOG_INFO
, "Waiting for file ended, switching to exported file %s", client
->server
->exportname
);
1789 * Set up client export array, which is an array of FILE_INFO.
1790 * Also, split a single exportfile into multiple ones, if that was asked.
1791 * @param client information on the client which we want to setup export for
1793 bool setupexport(CLIENT
* client
) {
1795 off_t laststartoff
= 0, lastsize
= 0;
1796 int multifile
= (client
->server
->flags
& F_MULTIFILE
);
1797 int treefile
= (client
->server
->flags
& F_TREEFILES
);
1798 int temporary
= (client
->server
->flags
& F_TEMPORARY
) && !multifile
;
1799 int cancreate
= (client
->server
->expected_size
) && !multifile
;
1801 if (treefile
|| (client
->server
->flags
& F_WAIT
)) {
1802 client
->export
= NULL
; // this could be thousands of files so we open handles on demand although its slower
1803 client
->exportsize
= client
->server
->expected_size
; // available space is not checked, as it could change during runtime anyway
1805 if(client
->server
->flags
& F_WAIT
){
1806 pthread_t wait_file_thread
;
1807 if (pthread_create(&wait_file_thread
, NULL
, wait_file
, client
)){
1808 DEBUG("failed to create wait_file thread");
1814 client
->export
= g_array_new(TRUE
, TRUE
, sizeof(FILE_INFO
));
1816 /* If multi-file, open as many files as we can.
1817 * If not, open exactly one file.
1818 * Calculate file sizes as we go to get total size. */
1822 gchar
* error_string
;
1826 /* if expected_size is specified, and this is the first file, we can create the file */
1827 mode_t mode
= (client
->server
->flags
& F_READONLY
) ?
1828 O_RDONLY
: (O_RDWR
| (cancreate
?O_CREAT
:0));
1831 tmpname
=g_strdup_printf("%s.%d-XXXXXX", client
->exportname
, i
);
1832 DEBUG( "Opening %s\n", tmpname
);
1833 fi
.fhandle
= mkstemp(tmpname
);
1836 tmpname
=g_strdup_printf("%s.%d", client
->exportname
, i
);
1838 tmpname
=g_strdup(client
->exportname
);
1840 DEBUG( "Opening %s\n", tmpname
);
1841 fi
.fhandle
= open(tmpname
, mode
, 0600);
1842 if(fi
.fhandle
== -1 && mode
== O_RDWR
) {
1843 /* Try again because maybe media was read-only */
1844 fi
.fhandle
= open(tmpname
, O_RDONLY
);
1845 if(fi
.fhandle
!= -1) {
1846 /* Opening the base file in copyonwrite mode is
1848 if(!(client
->server
->flags
& F_COPYONWRITE
)) {
1849 client
->server
->flags
|= F_AUTOREADONLY
;
1850 client
->server
->flags
|= F_READONLY
;
1855 if(fi
.fhandle
== -1) {
1856 if(multifile
&& i
>0)
1858 error_string
=g_strdup_printf(
1859 "Could not open exported file %s: %%m",
1861 err_nonfatal(error_string
);
1866 unlink(tmpname
); /* File will stick around whilst FD open */
1869 fi
.startoff
= laststartoff
+ lastsize
;
1870 g_array_append_val(client
->export
, fi
);
1873 /* Starting offset and size of this file will be used to
1874 * calculate starting offset of next file */
1875 laststartoff
= fi
.startoff
;
1876 lastsize
= size_autodetect(fi
.fhandle
);
1878 /* If we created the file, it will be length zero */
1879 if (!lastsize
&& cancreate
) {
1881 if(ftruncate (fi
.fhandle
, client
->server
->expected_size
)<0) {
1882 err_nonfatal("Could not expand file: %m");
1885 lastsize
= client
->server
->expected_size
;
1886 break; /* don't look for any more files */
1889 if(!multifile
|| temporary
)
1893 /* Set export size to total calculated size */
1894 client
->exportsize
= laststartoff
+ lastsize
;
1896 /* Export size may be overridden */
1897 if(client
->server
->expected_size
) {
1898 /* desired size must be <= total calculated size */
1899 if(client
->server
->expected_size
> client
->exportsize
) {
1900 err_nonfatal("Size of exported file is too big\n");
1904 client
->exportsize
= client
->server
->expected_size
;
1908 msg(LOG_INFO
, "Size of exported file/device is %llu", (unsigned long long)client
->exportsize
);
1910 msg(LOG_INFO
, "Total number of files: %d", i
);
1913 msg(LOG_INFO
, "Total number of (potential) files: %" PRId64
, (client
->exportsize
+TREEPAGESIZE
-1)/TREEPAGESIZE
);
1918 bool copyonwrite_prepare(CLIENT
* client
) {
1922 if (client
->server
->cowdir
!= NULL
) {
1923 dir
= g_strdup(client
->server
->cowdir
);
1925 dir
= g_strdup(dirname(client
->exportname
));
1927 export_base
= g_strdup(basename(client
->exportname
));
1928 client
->difffilename
= g_strdup_printf("%s/%s-%s-%d.diff",dir
,export_base
,client
->clientname
,
1931 g_free(export_base
);
1932 msg(LOG_INFO
, "About to create map and diff file %s", client
->difffilename
) ;
1933 client
->difffile
=open(client
->difffilename
,O_RDWR
| O_CREAT
| O_TRUNC
,0600) ;
1934 if (client
->difffile
<0) {
1935 err("Could not create diff file (%m)");
1938 if ((client
->difmap
=calloc(client
->exportsize
/DIFFPAGESIZE
,sizeof(u32
)))==NULL
) {
1939 err("Could not allocate memory");
1942 for (i
=0;i
<client
->exportsize
/DIFFPAGESIZE
;i
++) client
->difmap
[i
]=(u32
)-1;
1947 void send_export_info(CLIENT
* client
, SERVER
* server
, bool maybe_zeroes
) {
1948 uint64_t size_host
= htonll((u64
)(client
->exportsize
));
1949 uint16_t flags
= NBD_FLAG_HAS_FLAGS
| NBD_FLAG_SEND_WRITE_ZEROES
;
1951 socket_write(client
, &size_host
, 8);
1952 if (server
->flags
& F_READONLY
)
1953 flags
|= NBD_FLAG_READ_ONLY
;
1954 if (server
->flags
& F_FLUSH
)
1955 flags
|= NBD_FLAG_SEND_FLUSH
;
1956 if (server
->flags
& F_FUA
)
1957 flags
|= NBD_FLAG_SEND_FUA
;
1958 if (server
->flags
& F_ROTATIONAL
)
1959 flags
|= NBD_FLAG_ROTATIONAL
;
1960 if (server
->flags
& F_TRIM
)
1961 flags
|= NBD_FLAG_SEND_TRIM
;
1962 if (!(server
->flags
& F_COPYONWRITE
))
1963 flags
|= NBD_FLAG_CAN_MULTI_CONN
;
1964 flags
= htons(flags
);
1965 socket_write(client
, &flags
, sizeof(flags
));
1966 if (!(glob_flags
& F_NO_ZEROES
) && maybe_zeroes
) {
1968 memset(zeros
, '\0', sizeof(zeros
));
1969 socket_write(client
, zeros
, 124);
1974 * Commit to exporting the chosen export
1976 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
1977 * a number of things (verify whether the client is allowed access, try
1978 * to open files, etc etc) before we're ready to actually serve the
1981 * This function does all those things.
1983 * @param client the CLIENT structure with .server and .net members set
1985 * @return true if the client is allowed access to the export, false
1988 static bool commit_client(CLIENT
* client
, SERVER
* server
) {
1992 client
->server
= server
;
1993 client
->exportsize
= OFFT_MAX
;
1994 client
->transactionlogfd
= -1;
1995 if(pthread_mutex_init(&(client
->lock
), NULL
)) {
1996 msg(LOG_ERR
, "Unable to initialize mutex");
1999 if (pthread_rwlock_init(&client
->export_lock
, NULL
)){
2000 msg(LOG_ERR
, "Unable to initialize write lock");
2003 /* Check whether we exceeded the maximum number of allowed
2004 * clients already */
2008 len
= strlen(client
->server
->servename
);
2009 writeit(commsocket
, &len
, sizeof len
);
2010 writeit(commsocket
, client
->server
->servename
, len
);
2011 readit(commsocket
, &acl
, 1);
2016 msg(LOG_ERR
, "Connection not allowed (too many clients)");
2019 msg(LOG_ERR
, "Connection not allowed (unknown by parent?!?)");
2023 /* Check whether the client is listed in the authfile */
2024 if (set_peername(client
->net
, client
)) {
2025 msg(LOG_ERR
, "Failed to set peername");
2029 if (!authorized_client(client
)) {
2030 msg(LOG_INFO
, "Client '%s' is not authorized to access",
2031 client
->clientname
);
2035 /* Set up the transactionlog, if we need one */
2036 if (client
->server
->transactionlog
&& (client
->transactionlogfd
== -1)) {
2037 if((client
->transactionlogfd
=
2038 open(client
->server
->transactionlog
,
2040 S_IRUSR
| S_IWUSR
)) ==
2042 msg(LOG_INFO
, "Could not open transactionlog %s, moving on without it",
2043 client
->server
->transactionlog
);
2047 /* Run any pre scripts that we may need */
2048 if (do_run(client
->server
->prerun
, client
->exportname
)) {
2049 msg(LOG_INFO
, "Client '%s' not allowed access by prerun script",
2050 client
->clientname
);
2053 client
->socket_closed
= socket_closed_transmission
;
2054 if(!setupexport(client
)) {
2058 if (client
->server
->flags
& F_COPYONWRITE
) {
2059 if(!copyonwrite_prepare(client
)) {
2064 if (client
->server
->flags
& F_WAIT
) {
2065 if(!copyonwrite_prepare(client
)) {
2070 setmysockopt(client
->net
);
2075 static CLIENT
* handle_export_name(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2080 socket_read(client
, &namelen
, sizeof(namelen
));
2081 namelen
= ntohl(namelen
);
2083 name
= malloc(namelen
+1);
2085 socket_read(client
, name
, namelen
);
2089 for(i
=0; i
<servers
->len
; i
++) {
2090 SERVER
* serve
= &(g_array_index(servers
, SERVER
, i
));
2091 // hide exports that are TLS-only if we haven't negotiated TLS
2093 if ((serve
->flags
& F_FORCEDTLS
) && !client
->tls_session
) {
2096 if(!strcmp(serve
->servename
, name
)) {
2097 client
->clientfeats
= cflags
;
2099 if(!commit_client(client
, serve
)) {
2102 send_export_info(client
, serve
, true);
2107 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2110 static void handle_list(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2114 char *ptr
= buf
+ sizeof(len
);
2116 socket_read(client
, &len
, sizeof(len
));
2119 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2121 if(!(glob_flags
& F_LIST
)) {
2122 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "Listing of exports denied by server configuration");
2123 err_nonfatal("Client tried disallowed list option");
2126 for(i
=0; i
<servers
->len
; i
++) {
2127 SERVER
* serve
= &(g_array_index(servers
, SERVER
, i
));
2128 // Hide TLS-only exports if we haven't negotiated TLS yet
2129 if(!client
->tls_session
&& (serve
->flags
& F_FORCEDTLS
)) {
2132 len
= htonl(strlen(serve
->servename
));
2133 memcpy(buf
, &len
, sizeof(len
));
2134 strncpy(ptr
, serve
->servename
, sizeof(buf
) - sizeof(len
));
2135 send_reply(client
, opt
, NBD_REP_SERVER
, strlen(serve
->servename
)+sizeof(len
), buf
);
2137 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2141 static int verify_cert(gnutls_session_t session
) {
2143 unsigned int status
, cert_list_size
;
2144 const gnutls_datum_t
*cert_list
;
2145 gnutls_x509_crt_t cert
;
2146 time_t now
= time(NULL
);
2148 ret
= gnutls_certificate_verify_peers2(session
, &status
);
2149 if(ret
< 0 || status
!= 0 || gnutls_certificate_type_get(session
) !=
2154 if(gnutls_x509_crt_init(&cert
) < 0) {
2158 cert_list
= gnutls_certificate_get_peers(session
, &cert_list_size
);
2159 if(cert_list
== NULL
) {
2162 if(gnutls_x509_crt_import(cert
, &cert_list
[0], GNUTLS_X509_FMT_DER
) < 0) {
2165 if(gnutls_x509_crt_get_activation_time(cert
) > now
) {
2168 if(gnutls_x509_crt_get_expiration_time(cert
) < now
) {
2171 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2172 msg(LOG_INFO
, "client certificate verification successful");
2175 msg(LOG_ERR
, "E: client certificate verification failed");
2176 return GNUTLS_E_CERTIFICATE_ERROR
;
2179 CLIENT
* handle_starttls(CLIENT
* client
, int opt
, GArray
* servers
, uint32_t cflags
, struct generic_conf
*genconf
) {
2180 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2181 gnutls_certificate_credentials_t x509_cred
;
2182 CLIENT
* retval
= client
;
2183 gnutls_priority_t priority_cache
;
2184 gnutls_session_t
*session
= g_new0(gnutls_session_t
, 1);
2188 socket_read(client
, &len
, sizeof(len
));
2189 if(G_UNLIKELY(len
!= 0)) {
2190 char buf
[1024*1024];
2191 consume(client
, len
, buf
, sizeof(buf
));
2192 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "Sending a STARTTLS command with data is invalid");
2196 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2198 check_rv(gnutls_certificate_allocate_credentials(&x509_cred
));
2199 gnutls_certificate_set_verify_function(x509_cred
, verify_cert
);
2200 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred
, genconf
->cacertfile
, GNUTLS_X509_FMT_PEM
));
2201 check_rv(gnutls_certificate_set_x509_key_file(x509_cred
, genconf
->certfile
, genconf
->keyfile
, GNUTLS_X509_FMT_PEM
));
2202 check_rv(gnutls_priority_init(&priority_cache
, genconf
->tlsprio
, NULL
));
2203 check_rv(gnutls_init(session
, GNUTLS_SERVER
));
2204 check_rv(gnutls_priority_set(*session
, priority_cache
));
2205 check_rv(gnutls_credentials_set(*session
, GNUTLS_CRD_CERTIFICATE
, x509_cred
));
2207 gnutls_certificate_server_set_request(*session
, GNUTLS_CERT_REQUEST
);
2208 #if GNUTLS_VERSION_NUMBER >= 0x030109
2209 gnutls_transport_set_int(*session
, client
->net
);
2211 gnutls_transport_set_ptr(*session
, (gnutls_transport_ptr_t
) (intptr_t) client
->net
);
2214 ret
= gnutls_handshake(*session
);
2215 } while(ret
< 0 && gnutls_error_is_fatal(ret
) == 0);
2218 err_nonfatal(gnutls_strerror(ret
));
2219 gnutls_bye(*session
, GNUTLS_SHUT_RDWR
);
2220 gnutls_deinit(*session
);
2224 client
->tls_session
= session
;
2225 client
->socket_read
= socket_read_tls
;
2226 client
->socket_write
= socket_write_tls
;
2229 if(retval
== NULL
&& session
!= NULL
) {
2232 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2233 if(retval
!= NULL
&& retval
->server
!= NULL
) {
2234 retval
->server
= NULL
;
2241 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2243 * XXX this matches the proposal I sent out, rather than the officially
2244 * documented version of this command. Need to bring the two in sync
2245 * one way or the other.
2247 static bool handle_info(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2248 uint32_t namelen
, len
;
2251 SERVER
*server
= NULL
;
2252 uint16_t n_requests
;
2255 bool sent_export
= false;
2256 uint32_t reptype
= NBD_REP_ERR_UNKNOWN
;
2257 char *msg
= "Export unknown";
2259 socket_read(client
, &len
, sizeof(len
));
2261 socket_read(client
, &namelen
, sizeof(namelen
));
2262 namelen
= htonl(namelen
);
2263 if(namelen
> (len
- 6)) {
2264 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2265 socket_read(client
, buf
, len
- sizeof(namelen
));
2268 name
= malloc(namelen
+ 1);
2270 socket_read(client
, name
, namelen
);
2274 for(i
=0; i
<servers
->len
; i
++) {
2275 SERVER
*serve
= &(g_array_index(servers
, SERVER
, i
));
2276 if (!strcmp(serve
->servename
, name
)) {
2277 if ((serve
->flags
& F_FORCEDTLS
) && !client
->tls_session
) {
2278 reptype
= NBD_REP_ERR_TLS_REQD
;
2279 msg
= "TLS is required for that export";
2286 socket_read(client
, &n_requests
, sizeof(n_requests
));
2287 n_requests
= ntohs(n_requests
);
2289 consume(client
, n_requests
* sizeof(request
), buf
,
2291 send_reply(client
, opt
, reptype
, -1, msg
);
2294 if (opt
== NBD_OPT_GO
) {
2295 client
->clientfeats
= cflags
;
2296 if(!commit_client(client
, server
)) {
2297 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "Access denied by server configuration");
2301 for(i
=0; i
<n_requests
; i
++) {
2302 socket_read(client
, &request
, sizeof(request
));
2303 switch(ntohs(request
)) {
2304 case NBD_INFO_EXPORT
:
2305 send_reply(client
, opt
, NBD_REP_INFO
, 12, NULL
);
2306 socket_write(client
, &request
, 2);
2307 send_export_info(client
, server
, false);
2311 // ignore all other options for now.
2316 request
= htons(NBD_INFO_EXPORT
);
2317 send_reply(client
, opt
, NBD_REP_INFO
, 12, NULL
);
2318 socket_write(client
, &request
, 2);
2319 send_export_info(client
, server
, false);
2321 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2327 * Do the initial negotiation.
2329 * @param net The socket we're doing the negotiation over.
2330 * @param servers The array of known servers.
2331 * @param genconf the global options (needed for accessing TLS config data)
2333 CLIENT
* negotiate(int net
, GArray
* servers
, struct generic_conf
*genconf
) {
2334 uint16_t smallflags
= NBD_FLAG_FIXED_NEWSTYLE
| NBD_FLAG_NO_ZEROES
;
2336 uint32_t cflags
= 0;
2338 CLIENT
* client
= g_new0(CLIENT
, 1);
2340 client
->socket_read
= socket_read_notls
;
2341 client
->socket_write
= socket_write_notls
;
2342 client
->socket_closed
= socket_closed_negotiate
;
2344 assert(servers
!= NULL
);
2345 socket_write(client
, INIT_PASSWD
, 8);
2346 magic
= htonll(opts_magic
);
2347 socket_write(client
, &magic
, sizeof(magic
));
2349 smallflags
= htons(smallflags
);
2350 socket_write(client
, &smallflags
, sizeof(uint16_t));
2351 socket_read(client
, &cflags
, sizeof(cflags
));
2352 cflags
= htonl(cflags
);
2353 if (cflags
& NBD_FLAG_C_NO_ZEROES
) {
2354 glob_flags
|= F_NO_ZEROES
;
2357 socket_read(client
, &magic
, sizeof(magic
));
2358 magic
= ntohll(magic
);
2359 if(magic
!= opts_magic
) {
2360 err_nonfatal("Negotiation failed/5a: magic mismatch");
2363 socket_read(client
, &opt
, sizeof(opt
));
2365 if(client
->tls_session
== NULL
2366 && glob_flags
& F_FORCEDTLS
2367 && opt
!= NBD_OPT_STARTTLS
) {
2368 if(opt
== NBD_OPT_EXPORT_NAME
) {
2369 // can't send an error message for EXPORT_NAME,
2370 // so must do hard close
2373 if(opt
== NBD_OPT_ABORT
) {
2377 consume_len(client
);
2378 send_reply(client
, opt
, NBD_REP_ERR_TLS_REQD
, -1, "TLS is required on this server");
2382 case NBD_OPT_EXPORT_NAME
:
2383 // NBD_OPT_EXPORT_NAME must be the last
2384 // selected option, so return from here
2385 // if that is chosen.
2386 if(handle_export_name(client
, opt
, servers
, cflags
) != NULL
) {
2393 handle_list(client
, opt
, servers
, cflags
);
2398 case NBD_OPT_STARTTLS
:
2400 consume_len(client
);
2401 send_reply(client
, opt
, NBD_REP_ERR_PLATFORM
, -1, "This nbd-server was compiled without TLS support");
2403 if(client
->tls_session
!= NULL
) {
2404 consume_len(client
);
2405 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2408 if(genconf
->keyfile
== NULL
) {
2409 consume_len(client
);
2410 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "TLS not allowed on this server");
2413 if(handle_starttls(client
, opt
, servers
, cflags
, genconf
) == NULL
) {
2414 // can't recover from failed TLS negotiation.
2421 if(handle_info(client
, opt
, servers
, cflags
) && opt
== NBD_OPT_GO
) {
2426 consume_len(client
);
2427 send_reply(client
, opt
, NBD_REP_ERR_UNSUP
, -1, "The given option is unknown to this server implementation");
2430 } while((opt
!= NBD_OPT_EXPORT_NAME
) && (opt
!= NBD_OPT_ABORT
));
2431 if(opt
== NBD_OPT_ABORT
) {
2432 err_nonfatal("Session terminated by client");
2435 err_nonfatal("Weird things happened: reached end of negotiation without success");
2441 static int nbd_errno(int errcode
) {
2456 return htonl(28); // ENOSPC
2458 return htonl(22); // EINVAL
2462 static void package_dispose(struct work_package
* package
) {
2463 if (package
->pipefd
[0] > 0)
2464 close(package
->pipefd
[0]);
2465 if (package
->pipefd
[1] > 0)
2466 close(package
->pipefd
[1]);
2467 g_free(package
->data
);
2468 g_free(package
->req
);
2472 static int mkpipe(int pipefd
[2], size_t len
)
2474 if (len
> MAX_PIPE_SIZE
)
2480 if (fcntl(pipefd
[1], F_SETPIPE_SZ
, MAX_PIPE_SIZE
) < MAX_PIPE_SIZE
) {
2492 struct work_package
* package_create(CLIENT
* client
, struct nbd_request
* req
) {
2493 struct work_package
* rv
= calloc(sizeof (struct work_package
), 1);
2496 rv
->client
= client
;
2501 if((req
->type
& NBD_CMD_MASK_COMMAND
) == NBD_CMD_WRITE
) {
2502 if (client
->server
->flags
& F_SPLICE
) {
2503 if (mkpipe(rv
->pipefd
, req
->len
))
2504 rv
->data
= malloc(req
->len
);
2506 rv
->data
= malloc(req
->len
);
2513 static void setup_reply(struct nbd_reply
* rep
, struct nbd_request
* req
) {
2514 rep
->magic
= htonl(NBD_REPLY_MAGIC
);
2516 memcpy(&(rep
->handle
), &(req
->handle
), sizeof(req
->handle
));
2520 static int handle_splice_read(CLIENT
*client
, struct nbd_request
*req
)
2522 struct nbd_reply rep
;
2525 // splice doesn't work with TLS
2526 if (client
->tls_session
!= NULL
)
2529 if (mkpipe(pipefd
, req
->len
))
2532 if (expsplice(pipefd
[1], req
->from
, req
->len
, client
, SPLICE_IN
, 0)) {
2538 DEBUG("handling read request (splice)\n");
2539 setup_reply(&rep
, req
);
2540 pthread_mutex_lock(&(client
->lock
));
2541 writeit(client
->net
, &rep
, sizeof(rep
));
2542 spliceit(pipefd
[0], NULL
, client
->net
, NULL
, req
->len
);
2543 pthread_mutex_unlock(&(client
->lock
));
2550 static void handle_normal_read(CLIENT
*client
, struct nbd_request
*req
)
2552 struct nbd_reply rep
;
2553 void* buf
= malloc(req
->len
);
2555 err("Could not allocate memory for request");
2557 DEBUG("handling read request\n");
2558 setup_reply(&rep
, req
);
2559 if(expread(req
->from
, buf
, req
->len
, client
)) {
2560 DEBUG("Read failed: %m");
2561 rep
.error
= nbd_errno(errno
);
2563 pthread_mutex_lock(&(client
->lock
));
2564 socket_write(client
, &rep
, sizeof rep
);
2566 socket_write(client
, buf
, req
->len
);
2568 pthread_mutex_unlock(&(client
->lock
));
2572 static void handle_read(CLIENT
* client
, struct nbd_request
* req
)
2576 * If we have splice set we want to try that first, and if that fails
2577 * for whatever reason we fall through to ye olde read.
2579 if (client
->server
->flags
& F_SPLICE
)
2580 if (!handle_splice_read(client
, req
))
2583 handle_normal_read(client
, req
);
2586 static void handle_write(struct work_package
*pkg
)
2588 CLIENT
*client
= pkg
->client
;
2589 struct nbd_request
*req
= pkg
->req
;
2590 struct nbd_reply rep
;
2591 int fua
= !!(req
->type
& NBD_CMD_FLAG_FUA
);
2593 DEBUG("handling write request\n");
2594 setup_reply(&rep
, req
);
2598 if (expsplice(pkg
->pipefd
[0], req
->from
, req
->len
, client
,
2600 DEBUG("Splice failed: %m");
2601 rep
.error
= nbd_errno(errno
);
2606 if(expwrite(req
->from
, pkg
->data
, req
->len
, client
, fua
)) {
2607 DEBUG("Write failed: %m");
2608 rep
.error
= nbd_errno(errno
);
2611 pthread_mutex_lock(&(client
->lock
));
2612 socket_write(client
, &rep
, sizeof rep
);
2613 pthread_mutex_unlock(&(client
->lock
));
2616 static void handle_flush(CLIENT
* client
, struct nbd_request
* req
) {
2617 struct nbd_reply rep
;
2618 DEBUG("handling flush request\n");
2619 setup_reply(&rep
, req
);
2620 if(expflush(client
)) {
2621 DEBUG("Flush failed: %m");
2622 rep
.error
= nbd_errno(errno
);
2624 pthread_mutex_lock(&(client
->lock
));
2625 socket_write(client
, &rep
, sizeof rep
);
2626 pthread_mutex_unlock(&(client
->lock
));
2629 static void handle_trim(CLIENT
* client
, struct nbd_request
* req
) {
2630 struct nbd_reply rep
;
2631 DEBUG("handling trim request\n");
2632 setup_reply(&rep
, req
);
2633 if(exptrim(req
, client
)) {
2634 DEBUG("Trim failed: %m");
2635 rep
.error
= nbd_errno(errno
);
2637 pthread_mutex_lock(&(client
->lock
));
2638 socket_write(client
, &rep
, sizeof rep
);
2639 pthread_mutex_unlock(&(client
->lock
));
2642 static void handle_write_zeroes(CLIENT
* client
, struct nbd_request
* req
) {
2643 struct nbd_reply rep
;
2644 DEBUG("handling write_zeroes request\n");
2645 int fua
= !!(req
->type
& NBD_CMD_FLAG_FUA
);
2646 setup_reply(&rep
, req
);
2647 if(expwrite_zeroes(req
, client
, fua
)) {
2648 DEBUG("Write_zeroes failed: %m");
2649 rep
.error
= nbd_errno(errno
);
2651 // For now, don't trim
2652 // TODO: handle this far more efficiently with reference to the
2653 // actual backing driver
2654 pthread_mutex_lock(&(client
->lock
));
2655 socket_write(client
, &rep
, sizeof rep
);
2656 pthread_mutex_unlock(&(client
->lock
));
2660 static bool bad_write(CLIENT
* client
, struct nbd_request
* req
) {
2661 if ((client
->server
->flags
& F_READONLY
) ||
2662 (client
->server
->flags
& F_AUTOREADONLY
)) {
2663 DEBUG("[WRITE to READONLY!]");
2669 static bool bad_range(CLIENT
* client
, struct nbd_request
* req
) {
2670 if(req
->from
> client
->exportsize
||
2671 req
->from
+ req
->len
> client
->exportsize
) {
2672 DEBUG("[out of bounds!]");
2678 static void handle_request(gpointer data
, gpointer user_data
) {
2679 struct work_package
* package
= (struct work_package
*) data
;
2680 uint32_t type
= package
->req
->type
& NBD_CMD_MASK_COMMAND
;
2681 uint32_t flags
= package
->req
->type
& ~NBD_CMD_MASK_COMMAND
;
2682 struct nbd_reply rep
;
2685 if(flags
& ~(NBD_CMD_FLAG_FUA
| NBD_CMD_FLAG_NO_HOLE
)) {
2686 msg(LOG_ERR
, "E: received invalid flag %d on command %d, ignoring", flags
, type
);
2692 if (bad_range(package
->client
, package
->req
)) {
2695 handle_read(package
->client
, package
->req
);
2698 if (bad_write(package
->client
, package
->req
)) {
2702 if (bad_range(package
->client
, package
->req
)) {
2706 handle_write(package
);
2709 handle_flush(package
->client
, package
->req
);
2712 if (bad_write(package
->client
, package
->req
)) {
2716 if (bad_range(package
->client
, package
->req
)) {
2719 handle_trim(package
->client
, package
->req
);
2721 case NBD_CMD_WRITE_ZEROES
:
2722 if (bad_write(package
->client
, package
->req
)) {
2726 if (bad_range(package
->client
, package
->req
)) {
2730 handle_write_zeroes(package
->client
, package
->req
);
2733 msg(LOG_ERR
, "E: received unknown command %d of type, ignoring", package
->req
->type
);
2738 setup_reply(&rep
, package
->req
);
2739 rep
.error
= nbd_errno(err
);
2740 pthread_mutex_lock(&(package
->client
->lock
));
2741 socket_write(package
->client
, &rep
, sizeof rep
);
2742 pthread_mutex_unlock(&(package
->client
->lock
));
2744 package_dispose(package
);
2747 static int mainloop_threaded(CLIENT
* client
) {
2748 struct nbd_request
* req
;
2749 struct work_package
* pkg
;
2751 DEBUG("Entering request loop\n");
2753 req
= calloc(sizeof (struct nbd_request
), 1);
2755 socket_read(client
, req
, sizeof(struct nbd_request
));
2756 if(client
->transactionlogfd
!= -1) {
2757 writeit(client
->transactionlogfd
, req
, sizeof(struct nbd_request
));
2760 req
->from
= ntohll(req
->from
);
2761 req
->type
= ntohl(req
->type
);
2762 req
->len
= ntohl(req
->len
);
2764 if(req
->magic
!= htonl(NBD_REQUEST_MAGIC
))
2765 err("Protocol error: not enough magic.");
2767 pkg
= package_create(client
, req
);
2769 if((req
->type
& NBD_CMD_MASK_COMMAND
) == NBD_CMD_WRITE
) {
2771 if ((client
->server
->flags
& F_SPLICE
) &&
2772 (req
->len
<= MAX_PIPE_SIZE
&& pkg
->pipefd
[1] > 0) &&
2773 (client
->tls_session
== NULL
))
2774 spliceit(client
->net
, NULL
, pkg
->pipefd
[1],
2778 socket_read(client
, pkg
->data
, req
->len
);
2780 if(req
->type
== NBD_CMD_DISC
) {
2781 finalize_client(client
);
2784 g_thread_pool_push(tpool
, pkg
, NULL
);
2790 * @param data a pointer to pid_t which should be freed
2792 void destroy_pid_t(gpointer data
) {
2797 spawn_child(int* socket
)
2804 sigemptyset(&newset
);
2805 sigaddset(&newset
, SIGCHLD
);
2806 sigaddset(&newset
, SIGTERM
);
2807 sigprocmask(SIG_BLOCK
, &newset
, &oldset
);
2808 socketpair(AF_UNIX
, SOCK_STREAM
, 0, sockets
);
2811 msg(LOG_ERR
, "Could not fork (%s)", strerror(errno
));
2816 if (pid
> 0) { /* Parent */
2819 pidp
= g_malloc(sizeof(pid_t
));
2821 *socket
= sockets
[1];
2823 g_hash_table_insert(children
, pidp
, pidp
);
2827 *socket
= sockets
[0];
2829 /* Child's signal disposition is reset to default. */
2830 signal(SIGCHLD
, SIG_DFL
);
2831 signal(SIGTERM
, SIG_DFL
);
2832 signal(SIGHUP
, SIG_DFL
);
2833 sigemptyset(&oldset
);
2835 sigprocmask(SIG_SETMASK
, &oldset
, NULL
);
2840 socket_accept(const int sock
)
2842 struct sockaddr_storage addrin
;
2843 socklen_t addrinlen
= sizeof(addrin
);
2846 net
= accept(sock
, (struct sockaddr
*) &addrin
, &addrinlen
);
2848 err_nonfatal("Failed to accept socket connection: %m");
2855 handle_modern_connection(GArray
*const servers
, const int sock
, struct generic_conf
*genconf
)
2859 CLIENT
*client
= NULL
;
2863 net
= socket_accept(sock
);
2868 pid
= spawn_child(&commsocket
);
2871 msg(LOG_INFO
, "Spawned a child process");
2872 g_array_append_val(childsocks
, commsocket
);
2875 msg(LOG_ERR
, "Failed to spawn a child process");
2879 /* Child just continues. */
2882 sock_flags_old
= fcntl(net
, F_GETFL
, 0);
2883 if (sock_flags_old
== -1) {
2884 msg(LOG_ERR
, "Failed to get socket flags");
2888 sock_flags_new
= sock_flags_old
& ~O_NONBLOCK
;
2889 if (sock_flags_new
!= sock_flags_old
&&
2890 fcntl(net
, F_SETFL
, sock_flags_new
) == -1) {
2891 msg(LOG_ERR
, "Failed to set socket to blocking mode");
2895 client
= negotiate(net
, servers
, genconf
);
2897 msg(LOG_ERR
, "Modern initial negotiation failed");
2904 /* Free all root server resources here, because we are
2905 * currently in the child process serving one specific
2906 * connection. These are not simply needed anymore. */
2907 g_hash_table_destroy(children
);
2909 for (i
= 0; i
< modernsocks
->len
; i
++) {
2910 close(g_array_index(modernsocks
, int, i
));
2912 g_array_free(modernsocks
, TRUE
);
2914 /* Now that we are in the child process after a
2915 * succesful negotiation, we do not need the list of
2916 * servers anymore, get rid of it.*/
2917 /* FALSE does not free the
2918 actual data. This is required,
2919 because the client has a
2920 direct reference into that
2921 data, and otherwise we get a
2923 g_array_free(servers
, FALSE
);
2926 msg(LOG_INFO
, "Starting to serve");
2927 mainloop_threaded(client
);
2939 static int handle_childname(GArray
* servers
, int socket
)
2945 while(rt
< sizeof(len
)) {
2946 switch((r
= read(socket
, &len
, sizeof len
))) {
2950 err_nonfatal("Error reading from acl socket: %m");
2957 buf
= g_malloc0(len
+ 1);
2959 readit(socket
, buf
, len
);
2960 for(i
=0; i
<servers
->len
; i
++) {
2961 SERVER
* srv
= &g_array_index(servers
, SERVER
, i
);
2962 if(strcmp(srv
->servename
, buf
) == 0) {
2963 if(srv
->max_connections
== 0 || srv
->max_connections
> srv
->numclients
) {
2964 writeit(socket
, "Y", 1);
2967 writeit(socket
, "N", 1);
2972 writeit(socket
, "X", 1);
2979 * Return the index of the server whose servename matches the given
2982 * @param servename a string to match
2983 * @param servers an array of servers
2984 * @return the first index of the server whose servename matches the
2985 * given name or -1 if one cannot be found
2987 static int get_index_by_servename(const gchar
*const servename
,
2988 const GArray
*const servers
) {
2991 for (i
= 0; i
< servers
->len
; ++i
) {
2992 const SERVER server
= g_array_index(servers
, SERVER
, i
);
2994 if (strcmp(servename
, server
.servename
) == 0)
3002 * Parse configuration files and add servers to the array if they don't
3003 * already exist there. The existence is tested by comparing
3004 * servenames. A server is appended to the array only if its servename
3005 * is unique among all other servers.
3007 * @param servers an array of servers
3008 * @return the number of new servers appended to the array, or -1 in
3011 static int append_new_servers(GArray
*const servers
, GError
**const gerror
) {
3013 GArray
*new_servers
;
3014 const int old_len
= servers
->len
;
3016 struct generic_conf genconf
;
3018 new_servers
= parse_cfile(config_file_pos
, &genconf
, true, gerror
);
3019 g_thread_pool_set_max_threads(tpool
, genconf
.threads
, NULL
);
3023 for (i
= 0; i
< new_servers
->len
; ++i
) {
3024 SERVER new_server
= g_array_index(new_servers
, SERVER
, i
);
3026 if (new_server
.servename
3027 && -1 == get_index_by_servename(new_server
.servename
,
3029 g_array_append_val(servers
, new_server
);
3033 retval
= servers
->len
- old_len
;
3035 g_array_free(new_servers
, TRUE
);
3040 void serveloop(GArray
* servers
, struct generic_conf
*genconf
) G_GNUC_NORETURN
;
3042 * Loop through the available servers, and serve them. Never returns.
3044 void serveloop(GArray
* servers
, struct generic_conf
*genconf
) {
3049 sigset_t blocking_mask
;
3050 sigset_t original_mask
;
3053 * Set up the master fd_set. The set of descriptors we need
3054 * to select() for never changes anyway and it buys us a *lot*
3055 * of time to only build this once. However, if we ever choose
3056 * to not fork() for clients anymore, we may have to revisit
3061 for(i
=0;i
<modernsocks
->len
;i
++) {
3062 int sock
= g_array_index(modernsocks
, int, i
);
3063 FD_SET(sock
, &mset
);
3064 mmax
=sock
>mmax
?sock
:mmax
;
3067 /* Construct a signal mask which is used to make signal testing and
3068 * receiving an atomic operation to ensure no signal is received between
3069 * tests and blocking pselect(). */
3070 if (sigemptyset(&blocking_mask
) == -1)
3071 err("failed to initialize blocking_mask: %m");
3073 if (sigaddset(&blocking_mask
, SIGCHLD
) == -1)
3074 err("failed to add SIGCHLD to blocking_mask: %m");
3076 if (sigaddset(&blocking_mask
, SIGHUP
) == -1)
3077 err("failed to add SIGHUP to blocking_mask: %m");
3079 if (sigaddset(&blocking_mask
, SIGTERM
) == -1)
3080 err("failed to add SIGTERM to blocking_mask: %m");
3082 if (sigprocmask(SIG_BLOCK
, &blocking_mask
, &original_mask
) == -1)
3083 err("failed to block signals: %m");
3086 if (is_sigterm_caught
) {
3087 is_sigterm_caught
= 0;
3089 g_hash_table_foreach(children
, killchild
, NULL
);
3095 if (is_sigchld_caught
) {
3100 is_sigchld_caught
= 0;
3102 while ((pid
=waitpid(-1, &status
, WNOHANG
)) > 0) {
3103 if (WIFEXITED(status
)) {
3104 msg(LOG_INFO
, "Child exited with %d", WEXITSTATUS(status
));
3106 i
= g_hash_table_lookup(children
, &pid
);
3108 msg(LOG_INFO
, "SIGCHLD received for an unknown child with PID %ld", (long)pid
);
3110 DEBUG("Removing %d from the list of children", pid
);
3111 g_hash_table_remove(children
, &pid
);
3116 /* SIGHUP causes the root server process to reconfigure
3117 * itself and add new export servers for each newly
3118 * found export configuration group, i.e. spawn new
3119 * server processes for each previously non-existent
3120 * export. This does not alter old runtime configuration
3121 * but just appends new exports. */
3122 if (is_sighup_caught
) {
3124 GError
*gerror
= NULL
;
3126 msg(LOG_INFO
, "reconfiguration request received");
3127 is_sighup_caught
= 0; /* Reset to allow catching
3130 n
= append_new_servers(servers
, &gerror
);
3132 msg(LOG_ERR
, "failed to append new servers: %s",
3135 for (i
= servers
->len
- n
; i
< servers
->len
; ++i
) {
3136 const SERVER server
= g_array_index(servers
,
3139 msg(LOG_INFO
, "reconfigured new server: %s",
3144 memcpy(&rset
, &mset
, sizeof(fd_set
));
3146 for(i
=0;i
<childsocks
->len
;i
++) {
3147 int sock
= g_array_index(childsocks
, int, i
);
3148 FD_SET(sock
, &rset
);
3149 max
=sock
>max
?sock
:max
;
3152 if (pselect(max
+ 1, &rset
, NULL
, NULL
, NULL
, &original_mask
) > 0) {
3154 for(i
=0; i
< modernsocks
->len
; i
++) {
3155 int sock
= g_array_index(modernsocks
, int, i
);
3156 if(!FD_ISSET(sock
, &rset
)) {
3160 handle_modern_connection(servers
, sock
, genconf
);
3162 for(i
=0; i
< childsocks
->len
; i
++) {
3163 int sock
= g_array_index(childsocks
, int, i
);
3165 if(FD_ISSET(sock
, &rset
)) {
3166 if(handle_childname(servers
, sock
) < 0) {
3168 g_array_remove_index(childsocks
, i
);
3177 * Set server socket options.
3179 * @param socket a socket descriptor of the server
3181 * @param gerror a pointer to an error object pointer used for reporting
3182 * errors. On error, if gerror is not NULL, *gerror is set and -1
3185 * @return 0 on success, -1 on error
3187 int dosockopts(const int socket
, GError
**const gerror
) {
3195 /* lose the pesky "Address already in use" error message */
3196 if (setsockopt(socket
,SOL_SOCKET
,SO_REUSEADDR
,&yes
,sizeof(int)) == -1) {
3197 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_REUSEADDR
,
3198 "failed to set socket option SO_REUSEADDR: %s",
3204 if (setsockopt(socket
,SOL_SOCKET
,SO_LINGER
,&l
,sizeof(l
)) == -1) {
3205 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_LINGER
,
3206 "failed to set socket option SO_LINGER: %s",
3210 if (setsockopt(socket
,SOL_SOCKET
,SO_KEEPALIVE
,&yes
,sizeof(int)) == -1) {
3211 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_KEEPALIVE
,
3212 "failed to set socket option SO_KEEPALIVE: %s",
3220 int open_unix(const gchar
*const sockname
, GError
**const gerror
) {
3221 struct sockaddr_un sa
;
3225 memset(&sa
, 0, sizeof(struct sockaddr_un
));
3226 sa
.sun_family
= AF_UNIX
;
3227 strncpy(sa
.sun_path
, sockname
, sizeof sa
.sun_path
);
3228 sa
.sun_path
[sizeof(sa
.sun_path
)-1] = '\0';
3229 sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
3231 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SOCKET
,
3232 "failed to open a unix socket: "
3233 "failed to create socket: %s",
3237 if(bind(sock
, (struct sockaddr
*)&sa
, sizeof(struct sockaddr_un
))<0) {
3238 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3239 "failed to open a unix socket: "
3240 "failed to bind to address %s: %s",
3241 sockname
, strerror(errno
));
3244 if(listen(sock
, 10)<0) {
3245 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3246 "failed to open a unix socket: "
3247 "failed to start listening: %s",
3252 g_array_append_val(modernsocks
, sock
);
3254 if(retval
<0 && sock
>= 0) {
3261 int open_modern(const gchar
*const addr
, const gchar
*const port
,
3262 GError
**const gerror
) {
3263 struct addrinfo hints
;
3264 struct addrinfo
* ai
= NULL
;
3265 struct addrinfo
* ai_bak
= NULL
;
3271 gchar
const* l_addr
= addr
;
3273 if(!addr
|| strlen(addr
) == 0) {
3274 l_addr
= "::, 0.0.0.0";
3277 addrs
= g_strsplit_set(l_addr
, ", \t", -1);
3279 for(int i
=0; addrs
[i
]!=NULL
; i
++) {
3280 if(addrs
[i
][0] == '\0') {
3283 memset(&hints
, '\0', sizeof(hints
));
3284 hints
.ai_flags
= AI_PASSIVE
| AI_ADDRCONFIG
;
3285 hints
.ai_socktype
= SOCK_STREAM
;
3286 hints
.ai_family
= AF_UNSPEC
;
3287 hints
.ai_protocol
= IPPROTO_TCP
;
3288 e
= getaddrinfo(addrs
[i
], port
? port
: NBD_DEFAULT_PORT
, &hints
, &ai
);
3290 if(e
!= 0 && addrs
[i
+1] == NULL
&& modernsocks
->len
== 0) {
3291 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_GAI
,
3292 "failed to open a modern socket: "
3293 "failed to get address info: %s",
3301 if((sock
= socket(ai
->ai_family
, ai
->ai_socktype
, ai
->ai_protocol
))<0) {
3302 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SOCKET
,
3303 "failed to open a modern socket: "
3304 "failed to create a socket: %s",
3309 if (dosockopts(sock
, gerror
) == -1) {
3310 g_prefix_error(gerror
, "failed to open a modern socket: ");
3314 if(bind(sock
, ai
->ai_addr
, ai
->ai_addrlen
)) {
3316 * Some systems will return multiple entries for the
3317 * same address when we ask it for something
3318 * AF_UNSPEC, even though the first entry will
3319 * listen to both protocols. Other systems will
3320 * return multiple entries too, but we actually
3321 * do need to open both.
3323 * Handle this by ignoring EADDRINUSE if we've
3324 * already got at least one socket open
3326 if(errno
== EADDRINUSE
&& modernsocks
->len
> 0) {
3329 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3330 "failed to open a modern socket: "
3331 "failed to bind an address to a socket: %s",
3336 if(listen(sock
, 10) <0) {
3337 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3338 "failed to open a modern socket: "
3339 "failed to start listening on a socket: %s",
3343 g_array_append_val(modernsocks
, sock
);
3348 freeaddrinfo(ai_bak
);
3356 if (retval
== -1 && sock
>= 0) {
3360 freeaddrinfo(ai_bak
);
3366 * Connect our servers.
3368 void setup_servers(GArray
*const servers
, const gchar
*const modernaddr
,
3369 const gchar
*const modernport
, const gchar
* unixsock
,
3370 const gint flags
) {
3371 struct sigaction sa
;
3373 if(unixsock
!= NULL
) {
3374 GError
* gerror
= NULL
;
3375 if(open_unix(unixsock
, &gerror
) == -1) {
3376 msg(LOG_ERR
, "failed to setup servers: %s",
3378 g_clear_error(&gerror
);
3382 if (((flags
& F_DUAL_LISTEN
) != 0) || (unixsock
== NULL
)) {
3383 GError
*gerror
= NULL
;
3384 if (open_modern(modernaddr
, modernport
, &gerror
) == -1) {
3385 msg(LOG_ERR
, "failed to setup servers: %s",
3387 g_clear_error(&gerror
);
3391 children
=g_hash_table_new_full(g_int_hash
, g_int_equal
, NULL
, destroy_pid_t
);
3393 sa
.sa_handler
= sigchld_handler
;
3394 sigemptyset(&sa
.sa_mask
);
3395 sigaddset(&sa
.sa_mask
, SIGTERM
);
3396 sa
.sa_flags
= SA_RESTART
;
3397 if(sigaction(SIGCHLD
, &sa
, NULL
) == -1)
3398 err("sigaction: %m");
3400 sa
.sa_handler
= sigterm_handler
;
3401 sigemptyset(&sa
.sa_mask
);
3402 sigaddset(&sa
.sa_mask
, SIGCHLD
);
3403 sa
.sa_flags
= SA_RESTART
;
3404 if(sigaction(SIGTERM
, &sa
, NULL
) == -1)
3405 err("sigaction: %m");
3407 sa
.sa_handler
= sighup_handler
;
3408 sigemptyset(&sa
.sa_mask
);
3409 sa
.sa_flags
= SA_RESTART
;
3410 if(sigaction(SIGHUP
, &sa
, NULL
) == -1)
3411 err("sigaction: %m");
3413 sa
.sa_handler
= sigusr1_handler
;
3414 sigemptyset(&sa
.sa_mask
);
3415 sa
.sa_flags
= SA_RESTART
;
3416 if(sigaction(SIGUSR1
, &sa
, NULL
) == -1)
3417 err("sigaction: %m");
3421 * Go daemon (unless we specified at compile time that we didn't want this)
3422 * @param serve the first server of our configuration. If its port is zero,
3423 * then do not daemonize, because we're doing inetd then. This parameter
3424 * is only used to create a PID file of the form
3425 * /var/run/nbd-server.<port>.pid; it's not modified in any way.
3427 #if !defined(NODAEMON)
3435 strncpy(pidfname
, "/var/run/nbd-server.pid", 255);
3437 pidf
=fopen(pidfname
, "w");
3439 fprintf(pidf
,"%d\n", (int)getpid());
3443 fprintf(stderr
, "Not fatal; continuing");
3447 #define daemonize(serve)
3448 #endif /* !defined(NODAEMON) */
3451 * Everything beyond this point (in the file) is run in non-daemon mode.
3452 * The stuff above daemonize() isn't.
3456 * Set up user-ID and/or group-ID
3458 void dousers(const gchar
*const username
, const gchar
*const groupname
) {
3463 gr
= getgrnam(groupname
);
3465 str
= g_strdup_printf("Invalid group name: %s", groupname
);
3468 if(setgid(gr
->gr_gid
)<0) {
3469 err("Could not set GID: %m");
3473 pw
= getpwnam(username
);
3475 str
= g_strdup_printf("Invalid user name: %s", username
);
3479 if(setuid(pw
->pw_uid
)<0) {
3480 err("Could not set UID: %m");
3486 void glib_message_syslog_redirect(const gchar
*log_domain
,
3487 GLogLevelFlags log_level
,
3488 const gchar
*message
,
3491 int level
=LOG_DEBUG
;
3495 case G_LOG_FLAG_FATAL
:
3496 case G_LOG_LEVEL_CRITICAL
:
3497 case G_LOG_LEVEL_ERROR
:
3500 case G_LOG_LEVEL_WARNING
:
3503 case G_LOG_LEVEL_MESSAGE
:
3504 case G_LOG_LEVEL_INFO
:
3507 case G_LOG_LEVEL_DEBUG
:
3513 syslog(level
, "%s", message
);
3518 * Main entry point...
3520 int main(int argc
, char *argv
[]) {
3524 struct generic_conf genconf
;
3526 memset(&genconf
, 0, sizeof(struct generic_conf
));
3528 if (sizeof( struct nbd_request
)!=28) {
3529 fprintf(stderr
,"Bad size of structure. Alignment problems?\n");
3530 exit(EXIT_FAILURE
) ;
3533 modernsocks
= g_array_new(FALSE
, FALSE
, sizeof(int));
3534 childsocks
= g_array_new(FALSE
, FALSE
, sizeof(int));
3537 config_file_pos
= g_strdup(CFILE
);
3538 serve
=cmdline(argc
, argv
, &genconf
);
3540 genconf
.threads
= 4;
3541 servers
= parse_cfile(config_file_pos
, &genconf
, true, &gerr
);
3543 /* Update global variables with parsed values. This will be
3544 * removed once we get rid of global configuration variables. */
3545 glob_flags
|= genconf
.flags
;
3548 g_array_append_val(servers
, *serve
);
3551 if(!servers
|| !servers
->len
) {
3552 if(gerr
&& !(gerr
->domain
== NBDS_ERR
3553 && gerr
->code
== NBDS_ERR_CFILE_NOTFOUND
)) {
3554 g_warning("Could not parse config file: %s",
3555 gerr
? gerr
->message
: "Unknown error");
3559 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3562 if((!serve
) && (!servers
||!servers
->len
)) {
3564 g_message("No configured exports; quitting.");
3570 g_thread_init(NULL
);
3572 tpool
= g_thread_pool_new(handle_request
, NULL
, genconf
.threads
, FALSE
, NULL
);
3574 setup_servers(servers
, genconf
.modernaddr
, genconf
.modernport
,
3575 genconf
.unixsock
, genconf
.flags
);
3576 dousers(genconf
.user
, genconf
.group
);
3579 gnutls_global_init();
3580 static gnutls_dh_params_t dh_params
;
3581 gnutls_dh_params_init(&dh_params
);
3582 gnutls_dh_params_generate2(dh_params
,
3583 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH
,
3584 // Renamed in GnuTLS 3.3
3585 #if GNUTLS_VERSION_NUMBER >= 0x030300
3586 GNUTLS_SEC_PARAM_MEDIUM
3588 GNUTLS_SEC_PARAM_NORMAL
3593 if((genconf
.modernport
!= NULL
) && strcmp(genconf
.modernport
, "0")==0) {
3595 err("inetd mode requires syslog");
3597 CLIENT
* client
= negotiate(0, servers
, &genconf
);
3601 mainloop_threaded(client
);
3605 serveloop(servers
, &genconf
);