2 * Network Block Device - server
4 * Copyright 1996-1998 Pavel Machek, distribute under GPL
5 * <pavel@atrey.karlin.mff.cuni.cz>
6 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
9 * Version 1.0 - hopefully 64-bit-clean
10 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13 * type, or don't have 64 bit file offsets by defining FS_32BIT
14 * in compile options for nbd-server *only*. This can be done
15 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16 * original autoconf input file, or I would make it a configure
17 * option.) Ken Yap <ken@nlc.net.au>.
18 * Version 1.6 - fix autodetection of block device size and really make 64 bit
19 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20 * Version 2.0 - Version synchronised with client
21 * Version 2.1 - Reap zombie client processes when they exit. Removed
22 * (uncommented) the _IO magic, it's no longer necessary. Wouter
23 * Verhelst <wouter@debian.org>
24 * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25 * Version 2.3 - Fixed code so that Large File Support works. This
26 * removes the FS_32BIT compile-time directive; define
27 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28 * using FS_32BIT. This will allow you to use files >2GB instead of
29 * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30 * Version 2.4 - Added code to keep track of children, so that we can
31 * properly kill them from initscripts. Add a call to daemon(),
32 * so that processes don't think they have to wait for us, which is
33 * interesting for initscripts as well. Wouter Verhelst
35 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36 * zero after fork()ing, resulting in nbd-server going berserk
37 * when it receives a signal with at least one child open. Wouter
38 * Verhelst <wouter@debian.org>
39 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40 * rectified type of mainloop::size_host (sf.net bugs 814435 and
41 * 817385); close the PID file after writing to it, so that the
42 * daemon can actually be found. Wouter Verhelst
44 * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45 * correctly put in network endianness. Many types were corrected
46 * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47 * Version 2.6 - Some code cleanup.
48 * Version 2.7 - Better build system.
49 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50 * lot more work, but this is a start. Wouter Verhelst
52 * 16/03/2010 - Add IPv6 support.
53 * Kitt Tientanopajai <kitt@kitty.in.th>
54 * Neutron Soutmun <neo.neutron@gmail.com>
55 * Suriya Soutmun <darksolar@gmail.com>
58 /* Includes LFS defines, which defines behaviours of some of the following
59 * headers, so must come before those */
61 #define _DEFAULT_SOURCE
62 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
64 #define _BSD_SOURCE /* to get DT_* macros on some platforms */
66 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
69 #include <sys/types.h>
70 #include <sys/socket.h>
72 #include <sys/select.h>
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
81 #include <sys/param.h>
85 #include <netinet/tcp.h>
86 #include <netinet/in.h>
96 #include <linux/falloc.h>
101 #include <arpa/inet.h>
104 #ifdef HAVE_SYS_DIR_H
107 #ifdef HAVE_SYS_DIRENT_H
108 #include <sys/dirent.h>
115 #include <inttypes.h>
119 /* used in cliserv.h, so must come first */
120 #define MY_NAME "nbd_server"
122 #include "nbd-debug.h"
123 #include "netdb-compat.h"
125 #include "treefiles.h"
126 #include "nbd-helper.h"
129 #include <sdp_inet.h>
132 #if HAVE_FSCTL_SET_ZERO_DATA
134 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
138 #include <winioctl.h>
141 /** Default position of the config file */
143 #define SYSCONFDIR "/etc"
145 #define CFILE SYSCONFDIR "/nbd-server/config"
148 #include <gnutls/gnutls.h>
149 #include <gnutls/x509.h>
152 #ifndef HAVE_G_MEMDUP2
153 /* Our uses of g_memdup2 below are safe from g_memdup's 32-bit overflow */
154 #define g_memdup2 g_memdup
158 * Shorten error handling and regular function return sequences
159 * automatically freeing dynamically allocated resources
161 #define _cleanup_(x) __attribute__((__cleanup__(x)))
162 static inline void g_freep(void *p
) {
165 #define _cleanup_g_free_ _cleanup_(g_freep)
166 #define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \
167 static inline void func##p(type *p) { \
171 DEFINE_TRIVIAL_CLEANUP_FUNC(GKeyFile
*, g_key_file_free
)
172 DEFINE_TRIVIAL_CLEANUP_FUNC(gchar
**, g_strfreev
)
174 /** Where our config file actually is */
175 gchar
* config_file_pos
;
180 /* Whether we should avoid daemonizing the main process */
183 /* Whether we should avoid forking into child processes */
187 * The highest value a variable of type off_t can reach. This is a signed
188 * integer, so set all bits except for the leftmost one.
190 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
191 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
192 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
195 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
196 #define F_LIST 2 /**< Allow clients to list the exports on a server */
197 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
198 #define F_DUAL_LISTEN 8 /**< Listen on both TCP and unix socket */
199 // also accepts F_FORCEDTLS (which is 16384)
200 GHashTable
*children
;
201 char pidfname
[256]; /**< name of our PID file */
202 char default_authname
[] = SYSCONFDIR
"/nbd-server/allow"; /**< default name of allow file */
204 #define NEG_INIT (1 << 0)
205 #define NEG_OLD (1 << 1)
206 #define NEG_MODERN (1 << 2)
209 * If we want what the system really has set we'd have to read
210 * /proc/sys/fs/pipe-max-size, but for now 1mb should be enough.
212 #define MAX_PIPE_SIZE (1 * 1024 * 1024)
218 /* Our thread pool */
219 GThreadPool
*tpool
= NULL
;
221 /* A work package for the thread pool functions */
222 struct work_package
{
224 struct nbd_request
* req
;
226 void* data
; /**< for write requests */
229 static volatile sig_atomic_t is_sigchld_caught
; /**< Flag set by
234 static volatile sig_atomic_t is_sigterm_caught
; /**< Flag set by
239 static volatile sig_atomic_t is_sighup_caught
; /**< Flag set by SIGHUP
244 GArray
* modernsocks
; /**< Sockets for the modern handler. Not used
245 if a client was only specified on the
246 command line; only port used if
247 oldstyle is set to false (and then the
248 command-line client isn't used, gna gna).
249 This may be more than one socket on
250 systems that don't support serving IPv4
251 and IPv6 from the same socket (like,
253 GArray
* childsocks
; /**< parent-side sockets for communication with children */
254 int commsocket
; /**< child-side socket for communication with parent */
255 static sem_t file_wait_sem
;
257 bool logged_oversized
=false; /**< whether we logged oversized requests already */
260 * Type of configuration file values
263 PARAM_INT
, /**< This parameter is an integer */
264 PARAM_INT64
, /**< This parameter is an integer */
265 PARAM_STRING
, /**< This parameter is a string */
266 PARAM_BOOL
, /**< This parameter is a boolean */
270 * Configuration file values
273 gchar
*paramname
; /**< Name of the parameter, as it appears in
275 gboolean required
; /**< Whether this is a required (as opposed to
276 optional) parameter */
277 PARAM_TYPE ptype
; /**< Type of the parameter. */
278 gpointer target
; /**< Pointer to where the data of this
279 parameter should be written. If ptype is
280 PARAM_BOOL, the data is or'ed rather than
282 gint flagval
; /**< Flag mask for this parameter in case ptype
287 * Configuration file values of the "generic" section
289 struct generic_conf
{
290 gchar
*user
; /**< user we run the server as */
291 gchar
*group
; /**< group we run running as */
292 gchar
*modernaddr
; /**< address of the modern socket */
293 gchar
*modernport
; /**< port of the modern socket */
294 gchar
*unixsock
; /**< file name of the unix domain socket */
295 gchar
*certfile
; /**< certificate file */
296 gchar
*keyfile
; /**< key file */
297 gchar
*cacertfile
; /**< CA certificate file */
298 gchar
*tlsprio
; /**< TLS priority string */
299 gint flags
; /**< global flags */
300 gint threads
; /**< maximum number of parallel threads we want to run */
304 static int writeit_tls(gnutls_session_t s
, const void *buf
, size_t len
) {
305 _cleanup_g_free_
char *m
= NULL
;
309 if ((res
= gnutls_record_send(s
, buf
, len
)) < 0 && !gnutls_error_is_fatal(res
)) {
310 m
= g_strdup_printf("issue while sending data: %s", gnutls_strerror(res
));
313 m
= g_strdup_printf("could not send data: %s", gnutls_strerror(res
));
324 static int readit_tls(gnutls_session_t s
, void *buf
, size_t len
) {
325 _cleanup_g_free_
char *m
= NULL
;
329 if((res
= gnutls_record_recv(s
, buf
, len
)) < 0 && !gnutls_error_is_fatal(res
)) {
330 m
= g_strdup_printf("issue while receiving data: %s", gnutls_strerror(res
));
333 m
= g_strdup_printf("could not receive data: %s", gnutls_strerror(res
));
344 static int socket_read_tls(CLIENT
* client
, void *buf
, size_t len
) {
345 return readit_tls(*((gnutls_session_t
*)client
->tls_session
), buf
, len
);
348 static int socket_write_tls(CLIENT
* client
, const void *buf
, size_t len
) {
349 return writeit_tls(*((gnutls_session_t
*)client
->tls_session
), buf
, len
);
351 #endif // HAVE_GNUTLS
353 static int socket_read_notls(CLIENT
* client
, void *buf
, size_t len
) {
354 return readit(client
->net
, buf
, len
);
357 static int socket_write_notls(CLIENT
* client
, const void *buf
, size_t len
) {
358 return writeit(client
->net
, buf
, len
);
361 static void socket_read(CLIENT
* client
, void *buf
, size_t len
) {
362 g_assert(client
->socket_read
!= NULL
);
363 if(client
->socket_read(client
, buf
, len
)<0) {
364 g_assert(client
->socket_closed
!= NULL
);
365 client
->socket_closed(client
);
370 * Consume data from a socket that we don't want
372 * @param c the client to read from
373 * @param len the number of bytes to consume
374 * @param buf a buffer
375 * @param bufsiz the size of the buffer
377 static inline void consume(CLIENT
* c
, size_t len
, void * buf
, size_t bufsiz
) {
380 curlen
= (len
>bufsiz
)?bufsiz
:len
;
381 socket_read(c
, buf
, curlen
);
387 * Consume a length field and corresponding payload that we don't want
389 * @param c the client to read from
391 static inline void consume_len(CLIENT
* c
) {
395 socket_read(c
, &len
, sizeof(len
));
397 consume(c
, len
, buf
, sizeof(buf
));
400 static void socket_write(CLIENT
* client
, const void *buf
, size_t len
) {
401 g_assert(client
->socket_write
!= NULL
);
402 if(client
->socket_write(client
, buf
, len
)<0) {
403 g_assert(client
->socket_closed
!= NULL
);
404 client
->socket_closed(client
);
408 static inline void socket_closed_negotiate(CLIENT
* client
) {
409 err("Negotiation failed: %m");
412 static void cleanup_transactionlog(CLIENT
*client
) {
414 if (client
->transactionlogfd
!= -1) {
415 close(client
->transactionlogfd
);
416 client
->transactionlogfd
= -1;
418 if (client
->logsem
!= SEM_FAILED
) {
419 sem_close(client
->logsem
);
420 client
->logsem
= SEM_FAILED
;
421 sem_unlink(client
->semname
);
425 static void lock_logsem(CLIENT
*client
) {
426 sem_wait(client
->logsem
);
428 static void unlock_logsem(CLIENT
*client
) {
429 sem_post(client
->logsem
);
433 * Run a command. This is used for the ``prerun'' and ``postrun'' config file
436 * @param command the command to be ran. Read from the config file
437 * @param file the file name we're about to export
439 int do_run(gchar
* command
, gchar
* file
) {
440 _cleanup_g_free_ gchar
* cmd
= NULL
;
443 if(command
&& *command
) {
444 cmd
= g_strdup_printf(command
, file
);
450 static inline void finalize_client(CLIENT
* client
) {
451 g_thread_pool_free(tpool
, FALSE
, TRUE
);
452 do_run(client
->server
->postrun
, client
->exportname
);
453 if(client
->transactionlogfd
!= -1)
454 cleanup_transactionlog(client
);
456 if(client
->server
->flags
& F_COPYONWRITE
) {
457 unlink(client
->difffilename
);
459 serve_dec_ref(client
->server
);
462 static inline void socket_closed_transmission(CLIENT
* client
) {
463 int saved_errno
= errno
;
464 finalize_client(client
);
466 err("Connection dropped: %m");
471 * Splice data between a pipe and a file descriptor
473 * @param fd_in The fd to splice from.
474 * @param off_in The fd_in offset to splice from.
475 * @param fd_out The fd to splice to.
476 * @param off_out The fd_out offset to splice to.
477 * @param len The length to splice.
479 static inline void spliceit(int fd_in
, loff_t
*off_in
, int fd_out
,
480 loff_t
*off_out
, size_t len
)
484 if ((ret
= splice(fd_in
, off_in
, fd_out
, off_out
, len
,
485 SPLICE_F_MOVE
)) <= 0)
486 err("Splice failed: %m");
493 * Print out a message about how to use nbd-server. Split out to a separate
494 * function so that we can call it from multiple places
497 printf("This is nbd-server version " VERSION
"\n");
498 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V] [-n] [-d]\n"
499 "\t-r|--read-only\t\tread only\n"
500 "\t-m|--multi-file\t\tmultiple file\n"
501 "\t-c|--copy-on-write\tcopy on write\n"
502 "\t-C|--config-file\tspecify an alternate configuration file\n"
503 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
504 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
505 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
506 "\t-M|--max-connection\tspecify the maximum number of opened connections\n"
507 "\t-V|--version\t\toutput the version and exit\n"
508 "\t-n|--nodaemon\t\tdo not daemonize main process\n"
509 "\t-d|--dont-fork\t\tdo not fork (implies --nodaemon)\n\n"
510 "\tif port is set to 0, stdin is used (for running from inetd).\n"
511 "\tif file_to_export contains '%%s', it is substituted with the IP\n"
512 "\t\taddress of the machine trying to connect\n"
513 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
514 printf("Using configuration file %s\n", CFILE
);
515 printf("For help, or when encountering bugs, please contact %s\n", PACKAGE_BUGREPORT
);
518 /* Dumps a config file section of the given SERVER*, and exits. */
519 void dump_section(SERVER
* serve
, gchar
* section_header
) {
520 printf("[%s]\n", section_header
);
521 printf("\texportname = %s\n", serve
->exportname
);
522 printf("\tlistenaddr = %s\n", serve
->listenaddr
);
523 if(serve
->flags
& F_READONLY
) {
524 printf("\treadonly = true\n");
526 if(serve
->flags
& F_MULTIFILE
) {
527 printf("\tmultifile = true\n");
529 if(serve
->flags
& F_TREEFILES
) {
530 printf("\ttreefiles = true\n");
532 if(serve
->flags
& F_COPYONWRITE
) {
533 printf("\tcopyonwrite = true\n");
535 if(serve
->expected_size
) {
536 printf("\tfilesize = %lld\n", (long long int)serve
->expected_size
);
538 if(serve
->authname
) {
539 printf("\tauthfile = %s\n", serve
->authname
);
545 * Parse the command line.
547 * @param argc the argc argument to main()
548 * @param argv the argv argument to main()
550 SERVER
* cmdline(int argc
, char *argv
[], struct generic_conf
*genconf
) {
554 struct option long_options
[] = {
555 {"read-only", no_argument
, NULL
, 'r'},
556 {"multi-file", no_argument
, NULL
, 'm'},
557 {"copy-on-write", no_argument
, NULL
, 'c'},
558 {"nodaemon", no_argument
, NULL
, 'n'},
559 {"dont-fork", no_argument
, NULL
, 'd'},
560 {"authorize-file", required_argument
, NULL
, 'l'},
561 {"config-file", required_argument
, NULL
, 'C'},
562 {"pid-file", required_argument
, NULL
, 'p'},
563 {"output-config", required_argument
, NULL
, 'o'},
564 {"max-connection", required_argument
, NULL
, 'M'},
565 {"version", no_argument
, NULL
, 'V'},
572 bool do_output
=false;
573 gchar
* section_header
="";
579 serve
=serve_inc_ref((SERVER
*)g_new0(SERVER
, 1));
580 serve
->authname
= g_strdup(default_authname
);
581 serve
->virtstyle
=VIRT_IPLIT
;
582 while((c
=getopt_long(argc
, argv
, "-C:cwndl:mo:rp:M:V", long_options
, &i
))>=0) {
585 /* non-option argument */
586 switch(nonspecial
++) {
588 if(strchr(optarg
, ':') == strrchr(optarg
, ':')) {
589 addr_port
=g_strsplit(optarg
, ":", 2);
591 /* Check for "@" - maybe user using this separator
594 g_strfreev(addr_port
);
595 addr_port
=g_strsplit(optarg
, "@", 2);
598 addr_port
=g_strsplit(optarg
, "@", 2);
602 genconf
->modernport
=g_strdup(addr_port
[1]);
603 genconf
->modernaddr
=g_strdup(addr_port
[0]);
605 g_free(genconf
->modernaddr
);
606 genconf
->modernaddr
=NULL
;
607 genconf
->modernport
=g_strdup(addr_port
[0]);
609 g_strfreev(addr_port
);
612 serve
->exportname
= g_strdup(optarg
);
613 if(serve
->exportname
[0] != '/') {
614 fprintf(stderr
, "E: The to be exported file needs to be an absolute filename!\n");
619 last
=strlen(optarg
)-1;
621 if (suffix
== 'k' || suffix
== 'K' ||
622 suffix
== 'm' || suffix
== 'M')
624 es
= (off_t
)atoll(optarg
);
632 serve
->expected_size
= es
;
637 serve
->flags
|= F_READONLY
;
640 serve
->flags
|= F_MULTIFILE
;
644 section_header
= g_strdup(optarg
);
647 strncpy(pidfname
, optarg
, 256);
651 serve
->flags
|=F_COPYONWRITE
;
661 g_free(config_file_pos
);
662 config_file_pos
=g_strdup(optarg
);
665 g_free(serve
->authname
);
666 serve
->authname
=g_strdup(optarg
);
669 serve
->max_connections
= strtol(optarg
, NULL
, 0);
672 printf("This is nbd-server version " VERSION
"\n");
681 /* What's left: the port to export, the name of the to be exported
682 * file, and, optionally, the size of the file, in that order. */
684 serve
=serve_dec_ref(serve
);
686 serve
->servename
= "";
690 g_critical("Need a complete configuration on the command line to output a config file section!");
693 dump_section(serve
, section_header
);
698 /* forward definition of parse_cfile */
699 GArray
* parse_cfile(gchar
* f
, struct generic_conf
*genconf
, bool expect_generic
, GError
** e
);
701 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
702 #define NBD_D_TYPE de->d_type
710 * Parse config file snippets in a directory. Uses readdir() and friends
711 * to find files and open them, then passes them on to parse_cfile
712 * with have_global set false
714 GArray
* do_cfile_dir(gchar
* dir
, struct generic_conf
*const genconf
, GError
** e
) {
715 DIR* dirh
= opendir(dir
);
718 GArray
* retval
= NULL
;
723 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_DIR_UNKNOWN
, "Invalid directory specified: %s", strerror(errno
));
727 while((de
= readdir(dirh
))) {
728 int saved_errno
=errno
;
729 fname
= g_build_filename(dir
, de
->d_name
, NULL
);
732 /* Filesystem doesn't return type of
733 * file through readdir, or struct dirent
734 * doesn't have d_type. Run stat() on the file
736 if(stat(fname
, &stbuf
)) {
740 if (!S_ISREG(stbuf
.st_mode
)) {
744 /* Skip unless the name ends with '.conf' */
745 if(strcmp((de
->d_name
+ strlen(de
->d_name
) - 5), ".conf")) {
748 tmp
= parse_cfile(fname
, genconf
, false, e
);
754 retval
= g_array_new(FALSE
, TRUE
, sizeof(SERVER
*));
755 retval
= g_array_append_vals(retval
, tmp
->data
, tmp
->len
);
756 g_array_free(tmp
, TRUE
);
764 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_READDIR_ERR
, "Error trying to read directory: %s", strerror(errno
));
767 g_array_free(retval
, TRUE
);
776 * To be called by GArray clearing function.
777 * @param server pointer to server element
779 static void serve_clear_element(SERVER
**server
) {
780 serve_dec_ref(*server
);
784 * Parse the config file.
786 * @param f the name of the config file
788 * @param genconf a pointer to generic configuration which will get
789 * updated with parsed values. If NULL, then parsed generic
790 * configuration values are safely and silently discarded.
792 * @param e a GError. Error code can be any of the following:
793 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
794 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
795 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
797 * @param expect_generic if true, we expect a configuration file that
798 * contains a [generic] section. If false, we don't.
800 * @return a GArray of SERVER* pointers. If the config file is empty or does not
801 * exist, returns an empty GArray; if the config file contains an
802 * error, returns NULL, and e is set appropriately
804 GArray
* parse_cfile(gchar
* f
, struct generic_conf
*const genconf
, bool expect_generic
, GError
** e
) {
805 const char* DEFAULT_ERROR
= "Could not parse %s in group %s: %s";
806 const char* MISSING_REQUIRED_ERROR
= "Could not find required value %s in group %s: %s";
809 gchar
*virtstyle
=NULL
;
811 { "exportname", TRUE
, PARAM_STRING
, &(s
.exportname
), 0 },
812 { "authfile", FALSE
, PARAM_STRING
, &(s
.authname
), 0 },
813 { "filesize", FALSE
, PARAM_OFFT
, &(s
.expected_size
), 0 },
814 { "virtstyle", FALSE
, PARAM_STRING
, &(virtstyle
), 0 },
815 { "prerun", FALSE
, PARAM_STRING
, &(s
.prerun
), 0 },
816 { "postrun", FALSE
, PARAM_STRING
, &(s
.postrun
), 0 },
817 { "transactionlog", FALSE
, PARAM_STRING
, &(s
.transactionlog
), 0 },
818 { "cowdir", FALSE
, PARAM_STRING
, &(s
.cowdir
), 0 },
819 { "readonly", FALSE
, PARAM_BOOL
, &(s
.flags
), F_READONLY
},
820 { "multifile", FALSE
, PARAM_BOOL
, &(s
.flags
), F_MULTIFILE
},
821 { "treefiles", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TREEFILES
},
822 { "copyonwrite", FALSE
, PARAM_BOOL
, &(s
.flags
), F_COPYONWRITE
},
823 { "waitfile", FALSE
, PARAM_BOOL
, &(s
.flags
), F_WAIT
},
824 { "sparse_cow", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SPARSE
},
825 { "sdp", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SDP
},
826 { "sync", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SYNC
},
827 { "flush", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FLUSH
},
828 { "fua", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FUA
},
829 { "rotational", FALSE
, PARAM_BOOL
, &(s
.flags
), F_ROTATIONAL
},
830 { "temporary", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TEMPORARY
},
831 { "trim", FALSE
, PARAM_BOOL
, &(s
.flags
), F_TRIM
},
832 { "datalog", FALSE
, PARAM_BOOL
, &(s
.flags
), F_DATALOG
},
833 { "listenaddr", FALSE
, PARAM_STRING
, &(s
.listenaddr
), 0 },
834 { "maxconnections", FALSE
, PARAM_INT
, &(s
.max_connections
), 0 },
835 { "force_tls", FALSE
, PARAM_BOOL
, &(s
.flags
), F_FORCEDTLS
},
836 { "splice", FALSE
, PARAM_BOOL
, &(s
.flags
), F_SPLICE
},
838 const int lp_size
=sizeof(lp
)/sizeof(PARAM
);
839 struct generic_conf genconftmp
;
841 { "user", FALSE
, PARAM_STRING
, &(genconftmp
.user
), 0 },
842 { "group", FALSE
, PARAM_STRING
, &(genconftmp
.group
), 0 },
843 { "oldstyle", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_OLDSTYLE
}, // only left here so we can issue an appropriate error message when the option is used
844 { "listenaddr", FALSE
, PARAM_STRING
, &(genconftmp
.modernaddr
), 0 },
845 { "port", FALSE
, PARAM_STRING
, &(genconftmp
.modernport
), 0 },
846 { "includedir", FALSE
, PARAM_STRING
, &cfdir
, 0 },
847 { "allowlist", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_LIST
},
848 { "unixsock", FALSE
, PARAM_STRING
, &(genconftmp
.unixsock
), 0 },
849 { "duallisten", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_DUAL_LISTEN
}, // Used to listen on both TCP and unix socket
850 { "max_threads", FALSE
, PARAM_INT
, &(genconftmp
.threads
), 0 },
851 { "force_tls", FALSE
, PARAM_BOOL
, &(genconftmp
.flags
), F_FORCEDTLS
},
852 { "certfile", FALSE
, PARAM_STRING
, &(genconftmp
.certfile
), 0 },
853 { "keyfile", FALSE
, PARAM_STRING
, &(genconftmp
.keyfile
), 0 },
854 { "cacertfile", FALSE
, PARAM_STRING
, &(genconftmp
.cacertfile
), 0 },
855 { "tlsprio", FALSE
, PARAM_STRING
, &(genconftmp
.tlsprio
), 0 },
858 int p_size
=sizeof(gp
)/sizeof(PARAM
);
859 _cleanup_(g_key_file_freep
) GKeyFile
*cfile
= NULL
;
860 g_autoptr(GError
) err
= NULL
;
861 const char *err_msg
=NULL
;
868 _cleanup_g_free_ gchar
* startgroup
= NULL
;
872 memset(&genconftmp
, 0, sizeof(struct generic_conf
));
874 genconftmp
.tlsprio
= "NORMAL:+VERS-TLS-ALL:-VERS-TLS1.0:+VERS-TLS1.1:%SERVER_PRECEDENCE";
877 /* Use the passed configuration values as defaults. The
878 * parsing algorithm below updates all parameter targets
879 * found from configuration files. */
880 memcpy(&genconftmp
, genconf
, sizeof(struct generic_conf
));
883 cfile
= g_key_file_new();
884 retval
= g_array_new(FALSE
, TRUE
, sizeof(SERVER
*));
886 g_array_set_clear_func(retval
, (GDestroyNotify
)serve_clear_element
);
888 if(!g_key_file_load_from_file(cfile
, f
, G_KEY_FILE_KEEP_COMMENTS
|
889 G_KEY_FILE_KEEP_TRANSLATIONS
, &err
)) {
890 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_NOTFOUND
, "Could not open config file %s: %s",
894 startgroup
= g_key_file_get_start_group(cfile
);
895 if((!startgroup
|| strcmp(startgroup
, "generic")) && expect_generic
) {
896 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_MISSING_GENERIC
, "Config file does not contain the [generic] group!");
899 groups
= g_key_file_get_groups(cfile
, NULL
);
900 for(i
=0;groups
[i
];i
++) {
901 memset(&s
, '\0', sizeof(SERVER
));
903 /* After the [generic] group or when we're parsing an include
904 * directory, start parsing exports */
905 if(i
==1 || !expect_generic
) {
909 for(j
=0;j
<p_size
;j
++) {
910 assert(p
[j
].target
!= NULL
);
911 assert(p
[j
].ptype
==PARAM_INT
||p
[j
].ptype
==PARAM_STRING
||p
[j
].ptype
==PARAM_BOOL
||p
[j
].ptype
==PARAM_INT64
);
914 ival
= g_key_file_get_integer(cfile
,
919 *((gint
*)p
[j
].target
) = ival
;
923 i64val
= g_key_file_get_int64(cfile
,
928 *((gint64
*)p
[j
].target
) = i64val
;
932 sval
= g_key_file_get_string(cfile
,
937 *((gchar
**)p
[j
].target
) = sval
;
941 bval
= g_key_file_get_boolean(cfile
,
943 p
[j
].paramname
, &err
);
946 *((gint
*)p
[j
].target
) |= p
[j
].flagval
;
948 *((gint
*)p
[j
].target
) &= ~(p
[j
].flagval
);
954 if(err
->code
== G_KEY_FILE_ERROR_KEY_NOT_FOUND
) {
956 /* Ignore not-found error for optional values */
960 err_msg
= MISSING_REQUIRED_ERROR
;
963 err_msg
= DEFAULT_ERROR
;
965 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, err_msg
, p
[j
].paramname
, groups
[i
], err
->message
);
966 g_array_free(retval
, TRUE
);
971 if(!strncmp(virtstyle
, "none", 4)) {
972 s
.virtstyle
=VIRT_NONE
;
973 } else if(!strncmp(virtstyle
, "ipliteral", 9)) {
974 s
.virtstyle
=VIRT_IPLIT
;
975 } else if(!strncmp(virtstyle
, "iphash", 6)) {
976 s
.virtstyle
=VIRT_IPHASH
;
977 } else if(!strncmp(virtstyle
, "cidrhash", 8)) {
978 s
.virtstyle
=VIRT_CIDR
;
979 if(strlen(virtstyle
)<10) {
980 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle
, groups
[i
]);
981 g_array_free(retval
, TRUE
);
984 s
.cidrlen
=strtol(virtstyle
+8, NULL
, 0);
986 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_INVALID
, "Invalid value %s for parameter virtstyle in group %s", virtstyle
, groups
[i
]);
987 g_array_free(retval
, TRUE
);
991 s
.virtstyle
=VIRT_IPLIT
;
993 if(genconftmp
.flags
& F_OLDSTYLE
) {
994 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
995 g_message("Exiting.");
999 if (s
.flags
& F_SPLICE
) {
1000 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
, "This nbd-server was built without splice support, yet group %s uses it", groups
[i
]);
1001 g_array_free(retval
, TRUE
);
1005 /* We can't mix copyonwrite and splice. */
1006 if ((s
.flags
& F_COPYONWRITE
) && (s
.flags
& F_SPLICE
)) {
1007 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_INVALID_SPLICE
,
1008 "Cannot mix copyonwrite with splice for an export in group %s",
1010 g_array_free(retval
, TRUE
);
1013 if ((s
.flags
& F_COPYONWRITE
) && (s
.flags
& F_WAIT
)) {
1014 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_INVALID_WAIT
,
1015 "Cannot mix copyonwrite with waitfile for an export in group %s",
1017 g_array_free(retval
, TRUE
);
1020 /* We can't mix datalog and splice. */
1021 if ((s
.flags
& F_DATALOG
) && (s
.flags
& F_SPLICE
)) {
1022 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_INVALID_SPLICE
,
1023 "Cannot mix datalog with splice for an export in group %s",
1025 g_array_free(retval
, TRUE
);
1028 /* Don't need to free this, it's not our string */
1030 /* Don't append values for the [generic] group */
1031 if(i
>0 || !expect_generic
) {
1032 s
.servename
= groups
[i
];
1034 SERVER
*srv
= serve_inc_ref(g_memdup2(&s
, sizeof(SERVER
)));
1035 g_array_append_val(retval
, srv
);
1038 if(s
.flags
& F_SDP
) {
1039 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
, "This nbd-server was built without support for SDP, yet group %s uses it", groups
[i
]);
1040 g_array_free(retval
, TRUE
);
1046 GArray
* extra
= do_cfile_dir(cfdir
, &genconftmp
, e
);
1048 retval
= g_array_append_vals(retval
, extra
->data
, extra
->len
);
1050 g_array_free(extra
, TRUE
);
1053 g_array_free(retval
, TRUE
);
1058 if(i
==1 && expect_generic
) {
1059 g_set_error(e
, NBDS_ERR
, NBDS_ERR_CFILE_NO_EXPORTS
, "The config file does not specify any exports");
1063 /* Return the updated generic configuration through the
1064 * pointer parameter. */
1065 memcpy(genconf
, &genconftmp
, sizeof(struct generic_conf
));
1072 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
1073 * main loop of the root server process. This allows us to separate the signal
1074 * catching from th actual task triggered by SIGCHLD and hence processing in the
1075 * interrupt context is kept as minimial as possible.
1077 * @param s the signal we're handling (must be SIGCHLD, or something
1078 * is severely wrong)
1080 static void sigchld_handler(const int s G_GNUC_UNUSED
) {
1081 is_sigchld_caught
= 1;
1085 * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
1087 * @param key the key
1088 * @param value the value corresponding to the above key
1089 * @param user_data a pointer which we always set to 1, so that we know what
1092 void killchild(gpointer key
, gpointer value
, gpointer user_data
) {
1095 kill(*pid
, SIGTERM
);
1099 * Handle SIGTERM by setting atomically a flag which will be evaluated in the
1100 * main loop of the root server process. This allows us to separate the signal
1101 * catching from th actual task triggered by SIGTERM and hence processing in the
1102 * interrupt context is kept as minimial as possible.
1104 * @param s the signal we're handling (must be SIGTERM, or something
1105 * is severely wrong).
1107 static void sigterm_handler(const int s G_GNUC_UNUSED
) {
1108 is_sigterm_caught
= 1;
1112 * Handle SIGHUP by setting atomically a flag which will be evaluated in
1113 * the main loop of the root server process. This allows us to separate
1114 * the signal catching from th actual task triggered by SIGHUP and hence
1115 * processing in the interrupt context is kept as minimial as possible.
1117 * @param s the signal we're handling (must be SIGHUP, or something
1118 * is severely wrong).
1120 static void sighup_handler(const int s G_GNUC_UNUSED
) {
1121 is_sighup_caught
= 1;
1124 static void sigusr1_handler(const int s G_GNUC_UNUSED
) {
1125 msg(LOG_INFO
, "Got SIGUSR1");
1126 sem_post(&file_wait_sem
);
1130 * Get the file handle and offset, given an export offset.
1132 * @param client The client we're serving for
1133 * @param a The offset to get corresponding file/offset for
1134 * @param fhandle [out] File descriptor
1135 * @param foffset [out] Offset into fhandle
1136 * @param maxbytes [out] Tells how many bytes can be read/written
1137 * from fhandle starting at foffset (0 if there is no limit)
1138 * @return 0 on success, -1 on failure
1140 int get_filepos(CLIENT
*client
, off_t a
, int* fhandle
, off_t
* foffset
, size_t* maxbytes
) {
1141 GArray
* const export
= client
->export
;
1143 /* Negative offset not allowed */
1147 /* Open separate file for treefiles */
1148 if (client
->server
->flags
& F_TREEFILES
) {
1149 *foffset
= a
% TREEPAGESIZE
;
1150 *maxbytes
= (( 1 + (a
/TREEPAGESIZE
) ) * TREEPAGESIZE
) - a
; // start position of next block
1151 *fhandle
= open_treefile(client
->exportname
, ((client
->server
->flags
& F_READONLY
) ? O_RDONLY
: O_RDWR
), client
->exportsize
,a
, &client
->lock
);
1155 /* Binary search for last file with starting offset <= a */
1158 int end
= export
->len
- 1;
1159 while( start
<= end
) {
1160 int mid
= (start
+ end
) / 2;
1161 fi
= g_array_index(export
, FILE_INFO
, mid
);
1162 if( fi
.startoff
< a
) {
1164 } else if( fi
.startoff
> a
) {
1172 /* end should never go negative, since first startoff is 0 and a >= 0 */
1175 fi
= g_array_index(export
, FILE_INFO
, end
);
1176 *fhandle
= fi
.fhandle
;
1177 *foffset
= a
- fi
.startoff
;
1179 if( end
+1 < export
->len
) {
1180 FILE_INFO fi_next
= g_array_index(export
, FILE_INFO
, end
+1);
1181 *maxbytes
= fi_next
.startoff
- a
;
1188 * Write an amount of bytes at a given offset to the right file. This
1189 * abstracts the write-side of the multiple file option.
1191 * @param a The offset where the write should start
1192 * @param buf The buffer to write from
1193 * @param len The length of buf
1194 * @param client The client we're serving for
1195 * @param fua Flag to indicate 'Force Unit Access'
1196 * @return The number of bytes actually written, or -1 in case of an error
1198 ssize_t
rawexpwrite(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1204 if(get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1206 if(maxbytes
&& len
> maxbytes
)
1209 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle
, (long long unsigned)foffset
, (unsigned int)len
, fua
);
1211 retval
= pwrite(fhandle
, buf
, len
, foffset
);
1212 if(client
->server
->flags
& F_SYNC
) {
1216 /* This is where we would do the following
1217 * #ifdef USE_SYNC_FILE_RANGE
1218 * However, we don't, for the reasons set out below
1219 * by Christoph Hellwig <hch@infradead.org>
1222 * fdatasync is equivalent to fsync except that it does not flush
1223 * non-essential metadata (basically just timestamps in practice), but it
1224 * does flush metadata requried to find the data again, e.g. allocation
1225 * information and extent maps. sync_file_range does nothing but flush
1226 * out pagecache content - it means you basically won't get your data
1227 * back in case of a crash if you either:
1229 * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1230 * b) are using a sparse file on a filesystem
1231 * c) are using a fallocate-preallocated file on a filesystem
1232 * d) use any file on a COW filesystem like btrfs
1234 * e.g. it only does anything useful for you if you do not have a volatile
1235 * write cache, and either use a raw block device node, or just overwrite
1236 * an already fully allocated (and not preallocated) file on a non-COW
1240 * What we should do is open a second FD with O_DSYNC set, then write to
1241 * that when appropriate. However, with a Linux client, every REQ_FUA
1242 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1247 sync_file_range(fhandle
, foffset
, len
,
1248 SYNC_FILE_RANGE_WAIT_BEFORE
| SYNC_FILE_RANGE_WRITE
|
1249 SYNC_FILE_RANGE_WAIT_AFTER
);
1254 /* close file pointer in case of treefiles */
1255 if (client
->server
->flags
& F_TREEFILES
) {
1262 * Call rawexpwrite repeatedly until all data has been written.
1264 * @param a The offset where the write should start
1265 * @param buf The buffer to write from
1266 * @param len The length of buf
1267 * @param client The client we're serving for
1268 * @param fua Flag to indicate 'Force Unit Access'
1269 * @return 0 on success, nonzero on failure
1271 int rawexpwrite_fully(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1274 while(len
> 0 && (ret
=rawexpwrite(a
, buf
, len
, client
, fua
)) > 0 ) {
1279 return (ret
< 0 || len
!= 0);
1282 static void setup_reply(struct nbd_reply
* rep
, struct nbd_request
* req
) {
1283 rep
->magic
= htonl(NBD_REPLY_MAGIC
);
1285 rep
->cookie
= req
->cookie
;
1288 static void log_reply(CLIENT
*client
, struct nbd_reply
*prply
) {
1289 if (client
->transactionlogfd
!= -1) {
1290 lock_logsem(client
);
1291 writeit(client
->transactionlogfd
, prply
, sizeof(*prply
));
1292 unlock_logsem(client
);
1296 static void log_structured_reply(CLIENT
*client
, struct nbd_structured_reply
*prply
) {
1297 if (client
->transactionlogfd
!= -1) {
1298 lock_logsem(client
);
1299 writeit(client
->transactionlogfd
, prply
, sizeof(*prply
));
1300 unlock_logsem(client
);
1304 void send_structured_chunk(CLIENT
*client
, struct nbd_request
*req
, uint16_t flags
, uint16_t type
, uint32_t length
, int bufcount
, void *buf
[], size_t buflen
[]) {
1305 struct nbd_structured_reply rep
;
1306 rep
.magic
= htonl(NBD_STRUCTURED_REPLY_MAGIC
);
1307 rep
.flags
= htons(flags
);
1308 rep
.type
= htons(type
);
1309 rep
.cookie
= req
->cookie
;
1310 rep
.paylen
= htonl(length
);
1311 pthread_mutex_lock(&(client
->lock
));
1312 socket_write(client
, &rep
, sizeof rep
);
1313 for(int i
=0; i
<bufcount
; i
++) {
1314 socket_write(client
, buf
[i
], buflen
[i
]);
1316 pthread_mutex_unlock(&(client
->lock
));
1317 log_structured_reply(client
, &rep
);
1320 void send_structured_chunk_v(CLIENT
*client
, struct nbd_request
*req
, uint16_t flags
, uint16_t type
, uint32_t length
, int bufcount
, ...) {
1321 struct nbd_structured_reply rep
;
1323 rep
.magic
= htonl(NBD_STRUCTURED_REPLY_MAGIC
);
1324 rep
.flags
= htons(flags
);
1325 rep
.type
= htons(type
);
1326 rep
.cookie
= req
->cookie
;
1327 rep
.paylen
= htonl(length
);
1328 va_start(ap
, bufcount
);
1329 pthread_mutex_lock(&(client
->lock
));
1330 socket_write(client
, &rep
, sizeof rep
);
1331 for(int i
=0; i
<bufcount
; i
++) {
1332 void *buf
= va_arg(ap
, void*);
1333 size_t size
= va_arg(ap
, size_t);
1334 socket_write(client
, buf
, size
);
1336 pthread_mutex_unlock(&(client
->lock
));
1337 log_structured_reply(client
, &rep
);
1342 * Find the location to write the data for the next chunk to.
1343 * Assumes checks on memory sizes etc have already been done.
1345 * @param ctx the context we're working with
1346 * @param offset the offset into the request
1347 * @param len the length of this chunk.
1349 char * find_read_buf(READ_CTX
*ctx
) {
1350 if(!(ctx
->is_structured
) || ctx
->df
) {
1351 return ctx
->buf
+ ctx
->current_offset
;
1353 ctx
->buf
= malloc(ctx
->current_len
);
1355 err("Could not allocate memory for request");
1360 void confirm_read(CLIENT
*client
, READ_CTX
*ctx
, size_t len_read
) {
1361 if(ctx
->is_structured
&& !(ctx
->df
)) {
1362 uint64_t offset
= htonll(ctx
->req
->from
+ (uint64_t)(ctx
->current_offset
));
1363 send_structured_chunk_v(client
, ctx
->req
, 0, NBD_REPLY_TYPE_OFFSET_DATA
, len_read
+ 8, 2, &offset
, sizeof offset
, ctx
->buf
, (size_t)len_read
);
1368 void complete_read(CLIENT
*client
, READ_CTX
*ctx
, uint32_t error
, char *errmsg
, uint16_t msglen
, bool with_offset
, uint64_t err_offset
) {
1370 uint64_t offset
= 0;
1371 if(ctx
->is_structured
) {
1373 uint32_t len
= ctx
->req
->len
;
1374 if(error
!= 0 && with_offset
) {
1377 if(error
== 0 || with_offset
) {
1378 offset
= htonll(ctx
->req
->from
);
1379 send_structured_chunk_v(client
, ctx
->req
, 0, NBD_REPLY_TYPE_OFFSET_DATA
, len
+ 8, 2, &offset
, sizeof offset
, ctx
->buf
, err_offset
);
1384 struct nbd_structured_error_payload pl
;
1392 offset
+= err_offset
;
1393 type
= NBD_REPLY_TYPE_ERROR_OFFSET
;
1395 type
= NBD_REPLY_TYPE_ERROR
;
1398 bufsize
[0] = sizeof pl
;
1399 total_size
= bufsize
[0];
1401 buf
[payloads
] = errmsg
;
1402 bufsize
[payloads
++] = msglen
;
1403 total_size
+= msglen
;
1406 buf
[payloads
] = &offset
;
1407 bufsize
[payloads
++] = sizeof offset
;
1408 total_size
+= sizeof offset
;
1410 send_structured_chunk(client
, ctx
->req
, NBD_REPLY_FLAG_DONE
, type
, total_size
, payloads
, buf
, bufsize
);
1413 send_structured_chunk_v(client
, ctx
->req
, NBD_REPLY_FLAG_DONE
, NBD_REPLY_TYPE_NONE
, 0, 0);
1415 struct nbd_reply rep
;
1416 setup_reply(&rep
, ctx
->req
);
1420 log_reply(client
, &rep
);
1421 pthread_mutex_lock(&(client
->lock
));
1422 socket_write(client
, &rep
, sizeof rep
);
1424 socket_write(client
, ctx
->buf
, ctx
->buflen
);
1426 pthread_mutex_unlock(&(client
->lock
));
1432 * Read an amount of bytes at a given offset from the right file. This
1433 * abstracts the read-side of the multiple files option.
1435 * @param a The offset where the read should start
1436 * @param buf A buffer to read into
1437 * @param len The size of buf
1438 * @param client The client we're serving for
1439 * @return The number of bytes actually read, or -1 in case of an
1442 ssize_t
rawexpread(off_t a
, char *buf
, size_t len
, CLIENT
*client
) {
1448 if(get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1450 if(maxbytes
&& len
> maxbytes
)
1453 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle
, (long long unsigned int)foffset
, (unsigned int)len
);
1455 retval
= pread(fhandle
, buf
, len
, foffset
);
1456 if (client
->server
->flags
& F_TREEFILES
) {
1463 * Call rawexpread repeatedly until all data has been read.
1464 * @return 0 on success, nonzero on failure
1466 int rawexpread_fully(READ_CTX
*ctx
, CLIENT
*client
) {
1471 while(ctx
->current_len
> 0) {
1472 buf
= find_read_buf(ctx
);
1473 if((ret
= rawexpread((off_t
)ctx
->req
->from
+ (off_t
)ctx
->current_offset
, buf
, ctx
->current_len
, client
)) <= 0) {
1476 confirm_read(client
, ctx
, ret
);
1477 ctx
->current_offset
+= ret
;
1478 ctx
->current_len
-= ret
;
1480 return (ret
< 0 || ctx
->current_len
!= 0);
1484 int rawexpsplice(int pipe
, off_t a
, size_t len
, CLIENT
*client
, int dir
,
1492 if (get_filepos(client
, a
, &fhandle
, &foffset
, &maxbytes
))
1494 if (maxbytes
&& len
> maxbytes
)
1497 DEBUG("(SPLICE %s fd %d offset %llu len %u), ",
1498 (dir
== SPLICE_IN
) ? "from" : "to", fhandle
,
1499 (unsigned long long)a
, (unsigned)len
);
1502 * SPLICE_F_MOVE doesn't actually work at the moment, but in the future
1503 * it might, so go ahead and use it.
1505 if (dir
== SPLICE_IN
) {
1506 retval
= splice(fhandle
, &foffset
, pipe
, NULL
, len
,
1509 retval
= splice(pipe
, NULL
, fhandle
, &foffset
, len
,
1511 if (client
->server
->flags
& F_SYNC
)
1516 if (client
->server
->flags
& F_TREEFILES
)
1522 * Splice an amount of bytes from the given offset from/into the right file
1523 * from/into the given pipe.
1524 * @param pipe The pipe we are using for this splice.
1525 * @param a The offset of the file we are operating on.
1526 * @param len The length of the splice.
1527 * @param client The client we're splicing for.
1528 * @param dir The direction we are doing the splice in.
1529 * @param fua Set if this is a write and we need to fua.
1530 * @return 0 on success, nonzero on failure.
1532 int expsplice(int pipe
, off_t a
, size_t len
, CLIENT
*client
, int dir
, int fua
)
1537 (ret
= rawexpsplice(pipe
, a
, len
, client
, dir
, fua
)) > 0) {
1541 return (ret
< 0 || len
!= 0);
1543 #endif /* HAVE_SPLICE */
1546 * Read an amount of bytes at a given offset from the right file. This
1547 * abstracts the read-side of the copyonwrite stuff, and calls
1548 * rawexpread() with the right parameters to do the actual work.
1549 * @param a The offset where the read should start
1550 * @param buf A buffer to read into
1551 * @param len The size of buf
1552 * @param client The client we're going to read for
1553 * @return 0 on success, nonzero on failure
1555 int expread(READ_CTX
*ctx
, CLIENT
*client
) {
1556 off_t rdlen
, offset
;
1557 off_t mapcnt
, mapl
, maph
, pagestart
;
1558 off_t a
= (off_t
)ctx
->current_offset
+ (off_t
)ctx
->req
->from
;
1559 size_t len
= (size_t) ctx
->req
->len
;
1562 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len
, (unsigned long long)a
);
1564 if (!(client
->server
->flags
& F_COPYONWRITE
) && !((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)))
1565 return(rawexpread_fully(ctx
, client
));
1567 mapl
=a
/DIFFPAGESIZE
; maph
=(a
+len
-1)/DIFFPAGESIZE
;
1569 for (mapcnt
=mapl
;mapcnt
<=maph
;mapcnt
++) {
1570 pagestart
=mapcnt
*DIFFPAGESIZE
;
1572 rdlen
=(0<DIFFPAGESIZE
-offset
&& len
<(size_t)(DIFFPAGESIZE
-offset
)) ?
1573 len
: (size_t)DIFFPAGESIZE
-offset
;
1574 if (!(client
->server
->flags
& F_COPYONWRITE
))
1575 pthread_rwlock_rdlock(&client
->export_lock
);
1576 if (client
->difmap
[mapcnt
]!=(u32
)(-1)) { /* the block is already there */
1577 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt
,
1578 (unsigned long)(client
->difmap
[mapcnt
]));
1579 char *buf
= find_read_buf(ctx
);
1580 if (pread(client
->difffile
, buf
, rdlen
, client
->difmap
[mapcnt
]*DIFFPAGESIZE
+offset
) != rdlen
) {
1583 confirm_read(client
, ctx
, rdlen
);
1584 } else { /* the block is not there */
1585 if ((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)){
1586 DEBUG("Page %llu is not here, and waiting for file\n",
1587 (unsigned long long)mapcnt
);
1590 DEBUG("Page %llu is not here, we read the original one\n",
1591 (unsigned long long)mapcnt
);
1592 ctx
->current_len
= rdlen
;
1593 if(rawexpread_fully(ctx
, client
)) goto fail
;
1596 if (!(client
->server
->flags
& F_COPYONWRITE
))
1597 pthread_rwlock_unlock(&client
->export_lock
);
1598 len
-=rdlen
; a
+=rdlen
;
1603 if (!(client
->server
->flags
& F_COPYONWRITE
))
1604 pthread_rwlock_unlock(&client
->export_lock
);
1611 * Write an amount of bytes at a given offset to the right file. This
1612 * abstracts the write-side of the copyonwrite option, and calls
1613 * rawexpwrite() with the right parameters to do the actual work.
1615 * @param a The offset where the write should start
1616 * @param buf The buffer to write from
1617 * @param len The length of buf
1618 * @param client The client we're going to write for.
1619 * @param fua Flag to indicate 'Force Unit Access'
1620 * @return 0 on success, nonzero on failure
1622 int expwrite(off_t a
, char *buf
, size_t len
, CLIENT
*client
, int fua
) {
1623 char pagebuf
[DIFFPAGESIZE
];
1624 off_t mapcnt
,mapl
,maph
;
1629 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len
, (unsigned long long)a
);
1632 if (!(client
->server
->flags
& F_COPYONWRITE
) && !((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)))
1633 return(rawexpwrite_fully(a
, buf
, len
, client
, fua
));
1635 mapl
=a
/DIFFPAGESIZE
; maph
=(a
+len
-1)/DIFFPAGESIZE
;
1637 for (mapcnt
=mapl
;mapcnt
<=maph
;mapcnt
++) {
1638 pagestart
=mapcnt
*DIFFPAGESIZE
;
1639 offset
=a
-pagestart
;
1640 wrlen
=(0<DIFFPAGESIZE
-offset
&& len
<(size_t)(DIFFPAGESIZE
-offset
)) ?
1641 len
: (size_t)DIFFPAGESIZE
-offset
;
1643 if (!(client
->server
->flags
& F_COPYONWRITE
))
1644 pthread_rwlock_rdlock(&client
->export_lock
);
1645 if (client
->difmap
[mapcnt
]!=(u32
)(-1)) { /* the block is already there */
1646 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt
,
1647 (unsigned long)(client
->difmap
[mapcnt
])) ;
1648 if (pwrite(client
->difffile
, buf
, wrlen
, client
->difmap
[mapcnt
]*DIFFPAGESIZE
+offset
) != wrlen
) goto fail
;
1649 } else { /* the block is not there */
1650 client
->difmap
[mapcnt
]=(client
->server
->flags
&F_SPARSE
)?mapcnt
:client
->difffilelen
++;
1651 DEBUG("Page %llu is not here, we put it at %lu\n",
1652 (unsigned long long)mapcnt
,
1653 (unsigned long)(client
->difmap
[mapcnt
]));
1654 if ((offset
!= 0) || (wrlen
!= DIFFPAGESIZE
)){
1655 if ((client
->server
->flags
& F_WAIT
) && (client
->export
== NULL
)){
1656 DEBUG("error: we can write only whole page while waiting for file\n");
1661 char *ptr
= pagebuf
;
1662 while(rdlen
> 0 && (ret
= rawexpread(pagestart
, ptr
, rdlen
, client
)) > 0) {
1667 if(ret
< 0 ) goto fail
;
1669 memcpy(pagebuf
+offset
,buf
,wrlen
) ;
1670 if (write(client
->difffile
, pagebuf
, DIFFPAGESIZE
) != DIFFPAGESIZE
)
1673 if (!(client
->server
->flags
& F_COPYONWRITE
))
1674 pthread_rwlock_unlock(&client
->export_lock
);
1675 len
-=wrlen
; a
+=wrlen
; buf
+=wrlen
;
1677 if (client
->server
->flags
& F_SYNC
) {
1678 fsync(client
->difffile
);
1680 /* open question: would it be cheaper to do multiple sync_file_ranges?
1681 as we iterate through the above?
1683 fdatasync(client
->difffile
);
1687 if (!(client
->server
->flags
& F_COPYONWRITE
))
1688 pthread_rwlock_unlock(&client
->export_lock
);
1694 * Write an amount of zeroes at a given offset to the right file.
1695 * This routine could be optimised by not calling expwrite. However,
1696 * this is by far the simplest way to do it.
1698 * @param req the request
1699 * @param client The client we're going to write for.
1700 * @return 0 on success, nonzero on failure
1702 int expwrite_zeroes(struct nbd_request
* req
, CLIENT
* client
, int fua
) {
1703 off_t a
= req
->from
;
1704 size_t len
= req
->len
;
1705 size_t maxsize
= 64LL*1024LL*1024LL;
1706 /* use calloc() as sadly MAP_ANON is apparently not POSIX standard */
1707 char *buf
= calloc (1, maxsize
);
1713 ret
= expwrite(a
, buf
, l
, client
, fua
);
1725 * Flush data to a client
1727 * @param client The client we're going to write for.
1728 * @return 0 on success, nonzero on failure
1730 int expflush(CLIENT
*client
) {
1733 if (client
->server
->flags
& F_COPYONWRITE
) {
1734 return fsync(client
->difffile
);
1737 if (client
->server
->flags
& F_WAIT
) {
1738 return fsync(client
->difffile
);
1741 if (client
->server
->flags
& F_TREEFILES
) {
1742 // all we can do is force sync the entire filesystem containing the tree
1743 if (client
->server
->flags
& F_READONLY
)
1749 for (i
= 0; i
< client
->export
->len
; i
++) {
1750 FILE_INFO fi
= g_array_index(client
->export
, FILE_INFO
, i
);
1751 if (fsync(fi
.fhandle
) < 0)
1758 void punch_hole(int fd
, off_t off
, off_t len
) {
1759 DEBUG("Request to punch a hole in fd=%d, starting from %llu, length %llu\n", fd
, (unsigned long long)off
, (unsigned long long)len
);
1761 // fallocate -- files, Linux
1764 if(fallocate(fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
, off
, len
) == 0)
1766 } while(errno
== EINTR
);
1768 // ioctl(BLKDISCARD) -- block devices, Linux
1770 uint64_t range
[2] = {off
, len
};
1772 if(ioctl(fd
, BLKDISCARD
, range
) == 0)
1774 } while(errno
== EINTR
);
1777 #if HAVE_FSCTL_SET_ZERO_DATA
1778 FILE_ZERO_DATA_INFORMATION zerodata
;
1779 zerodata
.FileOffset
.QuadPart
= off
;
1780 zerodata
.BeyondFinalZero
.QuadPart
= off
+ len
;
1781 HANDLE w32handle
= (HANDLE
)_get_osfhandle(fd
);
1783 DeviceIoControl(w32handle
, FSCTL_SET_ZERO_DATA
, &zerodata
, sizeof(zerodata
), NULL
, 0, &bytesret
, NULL
);
1787 DEBUG("punching holes failed: %s", strerror(errno
));
1789 DEBUG("punching holes not supported on this platform\n");
1793 static void send_reply(CLIENT
* client
, uint32_t opt
, uint32_t reply_type
, ssize_t datasize
, const void* data
) {
1797 uint32_t reply_type
;
1799 } __attribute__ ((packed
)) header
= {
1800 htonll(0x3e889045565a9LL
),
1806 datasize
= strlen((char*)data
);
1807 header
.datasize
= htonl(datasize
);
1809 socket_write(client
, &header
, sizeof(header
));
1811 socket_write(client
, data
, datasize
);
1816 * Find the name of the file we have to serve. This will use g_strdup_printf
1817 * to put the IP address of the client inside a filename containing
1818 * "%s" (in the form as specified by the "virtstyle" option). That name
1819 * is then written to client->exportname.
1821 * @param net A socket connected to an nbd client
1822 * @param client information about the client. The IP address in human-readable
1823 * format will be written to a new char* buffer, the address of which will be
1824 * stored in client->clientname.
1825 * @return: 0 - OK, -1 - failed.
1827 int set_peername(int net
, CLIENT
*client
) {
1828 struct sockaddr_storage netaddr
;
1829 struct sockaddr
* addr
= (struct sockaddr
*)&netaddr
;
1830 socklen_t addrinlen
= sizeof( struct sockaddr_storage
);
1831 struct addrinfo hints
;
1832 struct addrinfo
*ai
= NULL
;
1833 char peername
[NI_MAXHOST
];
1834 char netname
[NI_MAXHOST
];
1839 if (getsockname(net
, addr
, &addrinlen
) < 0) {
1840 msg(LOG_INFO
, "getsockname failed: %m");
1844 if(netaddr
.ss_family
== AF_UNIX
) {
1845 client
->clientaddr
.ss_family
= AF_UNIX
;
1846 strcpy(peername
, "unix");
1848 if (getpeername(net
, (struct sockaddr
*) &(client
->clientaddr
), &addrinlen
) < 0) {
1849 msg(LOG_INFO
, "getpeername failed: %m");
1852 if((e
= getnameinfo((struct sockaddr
*)&(client
->clientaddr
), addrinlen
,
1853 peername
, sizeof (peername
), NULL
, 0, NI_NUMERICHOST
))) {
1854 msg(LOG_INFO
, "getnameinfo failed: %s", gai_strerror(e
));
1858 memset(&hints
, '\0', sizeof (hints
));
1859 hints
.ai_flags
= AI_ADDRCONFIG
;
1860 e
= getaddrinfo(peername
, NULL
, &hints
, &ai
);
1863 msg(LOG_INFO
, "getaddrinfo failed: %s", gai_strerror(e
));
1869 if(strncmp(peername
, "::ffff:", 7) == 0) {
1870 memmove(peername
, peername
+7, strlen(peername
));
1873 switch(client
->server
->virtstyle
) {
1875 msg(LOG_DEBUG
, "virtualization is off");
1876 client
->exportname
=g_strdup(client
->server
->exportname
);
1879 msg(LOG_DEBUG
, "virtstyle iphash");
1880 for(i
=0;i
<strlen(peername
);i
++) {
1881 if(peername
[i
]=='.') {
1887 msg(LOG_DEBUG
, "virtstyle ipliteral");
1888 client
->exportname
=g_strdup_printf(client
->server
->exportname
, peername
);
1891 msg(LOG_DEBUG
, "virtstyle cidr %d", client
->server
->cidrlen
);
1892 memcpy(&netaddr
, &(client
->clientaddr
), addrinlen
);
1894 if(client
->clientaddr
.ss_family
== AF_UNIX
) {
1895 tmp
= g_strdup(peername
);
1897 assert((ai
->ai_family
== AF_INET
) || (ai
->ai_family
== AF_INET6
));
1898 if(ai
->ai_family
== AF_INET
) {
1900 } else if(ai
->ai_family
== AF_INET6
) {
1903 g_assert_not_reached();
1905 uint8_t* addrptr
= (uint8_t*)(((struct sockaddr
*)&netaddr
)->sa_data
);
1906 for(int i
= 0; i
< addrbits
; i
+=8) {
1907 int masklen
= client
->server
->cidrlen
- i
;
1908 masklen
= masklen
> 0 ? masklen
: 0;
1909 uint8_t mask
= getmaskbyte(masklen
);
1913 getnameinfo((struct sockaddr
*) &netaddr
, addrinlen
,
1914 netname
, sizeof (netname
), NULL
, 0, NI_NUMERICHOST
);
1915 tmp
=g_strdup_printf("%s/%s", netname
, peername
);
1919 client
->exportname
=g_strdup_printf(client
->server
->exportname
, tmp
);
1929 msg(LOG_INFO
, "connect from %s, assigned file is %s",
1930 peername
, client
->exportname
);
1931 client
->clientname
=g_strdup(peername
);
1935 int commit_diff(CLIENT
* client
, bool lock
, int fhandle
){
1937 int pagecount
= client
->exportsize
/DIFFPAGESIZE
;
1939 char* buf
= malloc(sizeof(char)*DIFFPAGESIZE
);
1941 for (int i
=0; i
<pagecount
; i
++){
1942 offset
= DIFFPAGESIZE
*i
;
1944 pthread_rwlock_wrlock(&client
->export_lock
);
1945 if (client
->difmap
[i
] != (u32
)-1){
1947 DEBUG("flushing dirty page %d, offset %ld\n", i
, offset
);
1948 if (pread(client
->difffile
, buf
, DIFFPAGESIZE
, client
->difmap
[i
]*DIFFPAGESIZE
) != DIFFPAGESIZE
) {
1949 msg(LOG_WARNING
, "could not read while committing diff: %m");
1951 pthread_rwlock_unlock(&client
->export_lock
);
1955 if (pwrite(fhandle
, buf
, DIFFPAGESIZE
, offset
) != DIFFPAGESIZE
) {
1956 msg(LOG_WARNING
, "could not write while committing diff: %m");
1958 pthread_rwlock_unlock(&client
->export_lock
);
1962 client
->difmap
[i
] = (u32
)-1;
1965 pthread_rwlock_unlock(&client
->export_lock
);
1972 void* wait_file(void *void_ptr
) {
1973 CLIENT
* client
= (CLIENT
*)void_ptr
;
1976 mode_t mode
= O_RDWR
;
1982 while (fi
.fhandle
< 1){
1983 sem_wait(&file_wait_sem
);
1984 msg(LOG_INFO
, "checking for file %s", client
->server
->exportname
);
1985 fi
.fhandle
= open(client
->server
->exportname
, mode
);
1988 msg(LOG_INFO
, "File %s appeared, fd %d", client
->server
->exportname
, fi
.fhandle
);
1990 // first time there may be lot of data so we lock only per page
1992 dirtycount
= commit_diff(client
, true, fi
.fhandle
);
1993 } while (dirtycount
> 0);
1995 //last time we lock export for the whole time until we switch write destination
1996 pthread_rwlock_wrlock(&client
->export_lock
);
1998 dirtycount
= commit_diff(client
, false, fi
.fhandle
);
1999 } while (dirtycount
> 0);
2001 export
= g_array_new(TRUE
, TRUE
, sizeof(FILE_INFO
));
2002 g_array_append_val(export
, fi
);
2004 client
->export
= export
;
2005 pthread_rwlock_unlock(&client
->export_lock
);
2006 msg(LOG_INFO
, "Waiting for file ended, switching to exported file %s", client
->server
->exportname
);
2012 * Set up client export array, which is an array of FILE_INFO.
2013 * Also, split a single exportfile into multiple ones, if that was asked.
2014 * @param client information on the client which we want to setup export for
2016 bool setupexport(CLIENT
* client
) {
2018 off_t laststartoff
= 0, lastsize
= 0;
2019 int multifile
= (client
->server
->flags
& F_MULTIFILE
);
2020 int treefile
= (client
->server
->flags
& F_TREEFILES
);
2021 int temporary
= (client
->server
->flags
& F_TEMPORARY
) && !multifile
;
2022 int cancreate
= (client
->server
->expected_size
) && !multifile
;
2024 if (treefile
|| (client
->server
->flags
& F_WAIT
)) {
2025 client
->export
= NULL
; // this could be thousands of files so we open handles on demand although its slower
2026 client
->exportsize
= client
->server
->expected_size
; // available space is not checked, as it could change during runtime anyway
2028 if(client
->server
->flags
& F_WAIT
){
2029 pthread_t wait_file_thread
;
2030 if (pthread_create(&wait_file_thread
, NULL
, wait_file
, client
)){
2031 DEBUG("failed to create wait_file thread");
2037 client
->export
= g_array_new(TRUE
, TRUE
, sizeof(FILE_INFO
));
2039 /* If multi-file, open as many files as we can.
2040 * If not, open exactly one file.
2041 * Calculate file sizes as we go to get total size. */
2044 _cleanup_g_free_ gchar
*tmpname
= NULL
;
2045 _cleanup_g_free_ gchar
* error_string
= NULL
;
2049 /* if expected_size is specified, and this is the first file, we can create the file */
2050 mode_t mode
= (client
->server
->flags
& F_READONLY
) ?
2051 O_RDONLY
: (O_RDWR
| (cancreate
?O_CREAT
:0));
2054 tmpname
=g_strdup_printf("%s.%d-XXXXXX", client
->exportname
, i
);
2055 DEBUG( "Opening %s\n", tmpname
);
2056 fi
.fhandle
= mkstemp(tmpname
);
2059 tmpname
=g_strdup_printf("%s.%d", client
->exportname
, i
);
2061 tmpname
=g_strdup(client
->exportname
);
2063 DEBUG( "Opening %s\n", tmpname
);
2064 fi
.fhandle
= open(tmpname
, mode
, 0600);
2065 if(fi
.fhandle
== -1 && mode
== O_RDWR
) {
2066 /* Try again because maybe media was read-only */
2067 fi
.fhandle
= open(tmpname
, O_RDONLY
);
2068 if(fi
.fhandle
!= -1) {
2069 /* Opening the base file in copyonwrite mode is
2071 if(!(client
->server
->flags
& F_COPYONWRITE
)) {
2072 client
->server
->flags
|= F_AUTOREADONLY
;
2073 client
->server
->flags
|= F_READONLY
;
2078 if(fi
.fhandle
== -1) {
2079 if(multifile
&& i
>0)
2081 error_string
=g_strdup_printf(
2082 "Could not open exported file %s: %%m",
2084 err_nonfatal(error_string
);
2089 unlink(tmpname
); /* File will stick around whilst FD open */
2092 fi
.startoff
= laststartoff
+ lastsize
;
2093 g_array_append_val(client
->export
, fi
);
2095 /* Starting offset and size of this file will be used to
2096 * calculate starting offset of next file */
2097 laststartoff
= fi
.startoff
;
2098 lastsize
= size_autodetect(fi
.fhandle
);
2100 /* If we created the file, it will be length zero */
2101 if (!lastsize
&& cancreate
) {
2103 if(ftruncate (fi
.fhandle
, client
->server
->expected_size
)<0) {
2104 err_nonfatal("Could not expand file: %m");
2107 lastsize
= client
->server
->expected_size
;
2108 break; /* don't look for any more files */
2111 if(!multifile
|| temporary
)
2115 /* Set export size to total calculated size */
2116 client
->exportsize
= laststartoff
+ lastsize
;
2118 /* Export size may be overridden */
2119 if(client
->server
->expected_size
) {
2120 /* desired size must be <= total calculated size */
2121 if(client
->server
->expected_size
> client
->exportsize
) {
2122 err_nonfatal("Size of exported file is too big\n");
2126 client
->exportsize
= client
->server
->expected_size
;
2130 msg(LOG_INFO
, "Size of exported file/device is %llu", (unsigned long long)client
->exportsize
);
2132 msg(LOG_INFO
, "Total number of files: %d", i
);
2135 msg(LOG_INFO
, "Total number of (potential) files: %" PRId64
, (client
->exportsize
+TREEPAGESIZE
-1)/TREEPAGESIZE
);
2140 bool copyonwrite_prepare(CLIENT
* client
) {
2142 _cleanup_g_free_ gchar
* dir
= NULL
;
2143 _cleanup_g_free_ gchar
* export_base
= NULL
;
2144 if (client
->server
->cowdir
!= NULL
) {
2145 dir
= g_strdup(client
->server
->cowdir
);
2147 dir
= g_strdup(dirname(client
->exportname
));
2149 export_base
= g_strdup(basename(client
->exportname
));
2150 client
->difffilename
= g_strdup_printf("%s/%s-%s-%d.diff",dir
,export_base
,client
->clientname
,
2152 msg(LOG_INFO
, "About to create map and diff file %s", client
->difffilename
) ;
2153 client
->difffile
=open(client
->difffilename
,O_RDWR
| O_CREAT
| O_TRUNC
,0600) ;
2154 if (client
->difffile
<0) {
2155 err("Could not create diff file (%m)");
2158 if ((client
->difmap
=calloc(client
->exportsize
/DIFFPAGESIZE
,sizeof(u32
)))==NULL
) {
2159 err("Could not allocate memory");
2162 for (i
=0;i
<client
->exportsize
/DIFFPAGESIZE
;i
++) client
->difmap
[i
]=(u32
)-1;
2167 void send_export_info(CLIENT
* client
, SERVER
* server
, bool maybe_zeroes
) {
2168 uint64_t size_host
= htonll((u64
)(client
->exportsize
));
2169 uint16_t flags
= NBD_FLAG_HAS_FLAGS
| NBD_FLAG_SEND_WRITE_ZEROES
;
2171 socket_write(client
, &size_host
, 8);
2172 if (server
->flags
& F_READONLY
)
2173 flags
|= NBD_FLAG_READ_ONLY
;
2174 if (server
->flags
& F_FLUSH
)
2175 flags
|= NBD_FLAG_SEND_FLUSH
;
2176 if (server
->flags
& F_FUA
)
2177 flags
|= NBD_FLAG_SEND_FUA
;
2178 if (server
->flags
& F_ROTATIONAL
)
2179 flags
|= NBD_FLAG_ROTATIONAL
;
2180 if (server
->flags
& F_TRIM
)
2181 flags
|= NBD_FLAG_SEND_TRIM
;
2182 if (!(server
->flags
& F_COPYONWRITE
))
2183 flags
|= NBD_FLAG_CAN_MULTI_CONN
;
2184 if (client
->clientflags
& F_STRUCTURED
)
2185 flags
|= NBD_FLAG_SEND_DF
;
2186 flags
= htons(flags
);
2187 socket_write(client
, &flags
, sizeof(flags
));
2188 if (!(glob_flags
& F_NO_ZEROES
) && maybe_zeroes
) {
2190 memset(zeros
, '\0', sizeof(zeros
));
2191 socket_write(client
, zeros
, 124);
2196 * Setup the transaction log
2198 * The function does all things required for the transaction log:
2199 * - Create a new log file.
2200 * - allocate the posix semaphore for synchronization.
2201 * - Report if a log file already exists.
2202 * - If needed add a header to the log.
2204 * If something goes wrong, logging is disabled.
2206 * @param client the CLIENT structure with .server and .net members set
2209 static void setup_transactionlog(CLIENT
*client
) {
2213 /* 1) create the file */
2214 if((client
->transactionlogfd
=
2215 open(client
->server
->transactionlog
,
2217 S_IRUSR
| S_IWUSR
)) ==
2219 msg(LOG_INFO
, "Could not open transactionlog %s, moving on without it",
2220 client
->server
->transactionlog
);
2224 /* 2) If needed, write flags */
2225 if (client
->server
->flags
& F_DATALOG
) {
2226 struct nbd_request req
;
2229 req
.magic
= htonl(NBD_TRACELOG_MAGIC
);
2230 req
.type
= htonl(NBD_TRACELOG_SET_DATALOG
);
2232 req
.from
= htonll(NBD_TRACELOG_FROM_MAGIC
);
2233 req
.len
= htonl(TRUE
);
2235 ret
= writeit(client
->transactionlogfd
, &req
, sizeof(struct nbd_request
));
2237 msg(LOG_INFO
, "Could not write to transactionlog %s, moving on without it",
2238 client
->server
->transactionlog
);
2239 close(client
->transactionlogfd
);
2240 client
->transactionlogfd
= -1;
2245 /* 3) Allocate the semaphore used for locking */
2246 ret
= fstat(client
->transactionlogfd
, &fdinfo
);
2248 msg(LOG_INFO
, "Could not stat transactionlog %s, moving on without it",
2249 client
->server
->transactionlog
);
2250 close(client
->transactionlogfd
);
2251 client
->transactionlogfd
= -1;
2254 snprintf(client
->semname
, sizeof(client
->semname
), "/nbd-server-%llx-%llx",
2255 (unsigned long long)fdinfo
.st_dev
,
2256 (unsigned long long)fdinfo
.st_ino
);
2257 client
->logsem
= sem_open(client
->semname
, O_CREAT
, 0600, 1);
2258 if (client
->logsem
== SEM_FAILED
) {
2259 msg(LOG_INFO
, "Could not allocate semaphore for transactionlog %s, moving on without it",
2260 client
->server
->transactionlog
);
2261 close(client
->transactionlogfd
);
2262 client
->transactionlogfd
= -1;
2267 * Commit to exporting the chosen export
2269 * When a client sends NBD_OPT_EXPORT_NAME or NBD_OPT_GO, we need to do
2270 * a number of things (verify whether the client is allowed access, try
2271 * to open files, etc etc) before we're ready to actually serve the
2274 * This function does all those things.
2276 * @param client the CLIENT structure with .server and .net members set
2278 * @return true if the client is allowed access to the export, false
2281 static bool commit_client(CLIENT
* client
, SERVER
* server
) {
2285 client
->server
= serve_inc_ref(server
);
2286 client
->exportsize
= OFFT_MAX
;
2287 client
->transactionlogfd
= -1;
2288 if(pthread_mutex_init(&(client
->lock
), NULL
)) {
2289 msg(LOG_ERR
, "Unable to initialize mutex");
2292 if (pthread_rwlock_init(&client
->export_lock
, NULL
)){
2293 msg(LOG_ERR
, "Unable to initialize write lock");
2296 /* Check whether we exceeded the maximum number of allowed
2297 * clients already */
2301 len
= strlen(client
->server
->servename
);
2302 writeit(commsocket
, &len
, sizeof len
);
2303 writeit(commsocket
, client
->server
->servename
, len
);
2304 readit(commsocket
, &acl
, 1);
2309 msg(LOG_ERR
, "Connection not allowed (too many clients)");
2312 msg(LOG_ERR
, "Connection not allowed (unknown by parent?!?)");
2316 /* Check whether the client is listed in the authfile */
2317 if (set_peername(client
->net
, client
)) {
2318 msg(LOG_ERR
, "Failed to set peername");
2322 if (!authorized_client(client
)) {
2323 msg(LOG_INFO
, "Client '%s' is not authorized to access",
2324 client
->clientname
);
2328 /* Set up the transactionlog, if we need one */
2329 if (client
->server
->transactionlog
&& (client
->transactionlogfd
== -1))
2330 setup_transactionlog(client
);
2332 /* Run any pre scripts that we may need */
2333 if (do_run(client
->server
->prerun
, client
->exportname
)) {
2334 msg(LOG_INFO
, "Client '%s' not allowed access by prerun script",
2335 client
->clientname
);
2338 client
->socket_closed
= socket_closed_transmission
;
2339 if(!setupexport(client
)) {
2343 if (client
->server
->flags
& F_COPYONWRITE
) {
2344 if(!copyonwrite_prepare(client
)) {
2349 if (client
->server
->flags
& F_WAIT
) {
2350 if(!copyonwrite_prepare(client
)) {
2355 setmysockopt(client
->net
);
2360 static CLIENT
* handle_export_name(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2365 socket_read(client
, &namelen
, sizeof(namelen
));
2366 namelen
= ntohl(namelen
);
2367 if(namelen
> 4096) {
2371 name
= malloc(namelen
+1);
2373 socket_read(client
, name
, namelen
);
2377 for(i
=0; i
<servers
->len
; i
++) {
2378 SERVER
* serve
= (g_array_index(servers
, SERVER
*, i
));
2379 // hide exports that are TLS-only if we haven't negotiated TLS
2381 if ((serve
->flags
& F_FORCEDTLS
) && !client
->tls_session
) {
2384 if(!strcmp(serve
->servename
, name
)) {
2385 client
->clientfeats
= cflags
;
2387 if(!commit_client(client
, serve
)) {
2390 send_export_info(client
, serve
, true);
2395 err("Negotiation failed/8a: Requested export not found, or is TLS-only and client did not negotiate TLS");
2398 static void handle_list(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2402 char *ptr
= buf
+ sizeof(len
);
2404 socket_read(client
, &len
, sizeof(len
));
2407 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "NBD_OPT_LIST with nonzero data length is not a valid request");
2409 if(!(glob_flags
& F_LIST
)) {
2410 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "Listing of exports denied by server configuration");
2411 err_nonfatal("Client tried disallowed list option");
2414 for(i
=0; i
<servers
->len
; i
++) {
2415 SERVER
* serve
= (g_array_index(servers
, SERVER
*, i
));
2416 // Hide TLS-only exports if we haven't negotiated TLS yet
2417 if(!client
->tls_session
&& (serve
->flags
& F_FORCEDTLS
)) {
2420 len
= htonl(strlen(serve
->servename
));
2421 memcpy(buf
, &len
, sizeof(len
));
2422 strncpy(ptr
, serve
->servename
, sizeof(buf
) - sizeof(len
));
2423 send_reply(client
, opt
, NBD_REP_SERVER
, strlen(serve
->servename
)+sizeof(len
), buf
);
2425 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2429 static int verify_cert(gnutls_session_t session
) {
2431 unsigned int status
, cert_list_size
;
2432 const gnutls_datum_t
*cert_list
;
2433 gnutls_x509_crt_t cert
;
2434 time_t now
= time(NULL
);
2436 ret
= gnutls_certificate_verify_peers2(session
, &status
);
2437 if(ret
< 0 || status
!= 0 || gnutls_certificate_type_get(session
) !=
2442 if(gnutls_x509_crt_init(&cert
) < 0) {
2446 cert_list
= gnutls_certificate_get_peers(session
, &cert_list_size
);
2447 if(cert_list
== NULL
) {
2450 if(gnutls_x509_crt_import(cert
, &cert_list
[0], GNUTLS_X509_FMT_DER
) < 0) {
2453 if(gnutls_x509_crt_get_activation_time(cert
) > now
) {
2456 if(gnutls_x509_crt_get_expiration_time(cert
) < now
) {
2459 // TODO: check CRLs and/or OCSP etc. Patches welcome.
2460 msg(LOG_INFO
, "client certificate verification successful");
2463 msg(LOG_ERR
, "E: client certificate verification failed");
2464 return GNUTLS_E_CERTIFICATE_ERROR
;
2467 CLIENT
* handle_starttls(CLIENT
* client
, int opt
, GArray
* servers
, uint32_t cflags
, struct generic_conf
*genconf
) {
2468 #define check_rv(c) if((c)<0) { retval = NULL; goto exit; }
2469 gnutls_certificate_credentials_t x509_cred
;
2470 CLIENT
* retval
= client
;
2471 gnutls_priority_t priority_cache
;
2472 gnutls_session_t
*session
= g_new0(gnutls_session_t
, 1);
2476 socket_read(client
, &len
, sizeof(len
));
2477 if(G_UNLIKELY(len
!= 0)) {
2478 char buf
[1024*1024];
2479 consume(client
, len
, buf
, sizeof(buf
));
2480 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "Sending a STARTTLS command with data is invalid");
2484 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2486 check_rv(gnutls_certificate_allocate_credentials(&x509_cred
));
2487 gnutls_certificate_set_verify_function(x509_cred
, verify_cert
);
2488 check_rv(gnutls_certificate_set_x509_trust_file(x509_cred
, genconf
->cacertfile
, GNUTLS_X509_FMT_PEM
));
2489 check_rv(gnutls_certificate_set_x509_key_file(x509_cred
, genconf
->certfile
, genconf
->keyfile
, GNUTLS_X509_FMT_PEM
));
2490 check_rv(gnutls_priority_init(&priority_cache
, genconf
->tlsprio
, NULL
));
2491 check_rv(gnutls_init(session
, GNUTLS_SERVER
));
2492 check_rv(gnutls_priority_set(*session
, priority_cache
));
2493 check_rv(gnutls_credentials_set(*session
, GNUTLS_CRD_CERTIFICATE
, x509_cred
));
2495 gnutls_certificate_server_set_request(*session
, GNUTLS_CERT_REQUEST
);
2496 #if GNUTLS_VERSION_NUMBER >= 0x030109
2497 gnutls_transport_set_int(*session
, client
->net
);
2499 gnutls_transport_set_ptr(*session
, (gnutls_transport_ptr_t
) (intptr_t) client
->net
);
2502 ret
= gnutls_handshake(*session
);
2503 } while(ret
< 0 && gnutls_error_is_fatal(ret
) == 0);
2506 err_nonfatal(gnutls_strerror(ret
));
2507 gnutls_bye(*session
, GNUTLS_SHUT_RDWR
);
2508 gnutls_deinit(*session
);
2512 client
->tls_session
= session
;
2513 client
->socket_read
= socket_read_tls
;
2514 client
->socket_write
= socket_write_tls
;
2517 if(retval
== NULL
&& session
!= NULL
) {
2520 /* export names cannot be chosen before NBD_OPT_STARTTLS and be retained */
2521 if(retval
!= NULL
&& retval
->server
!= NULL
) {
2522 retval
->server
= NULL
;
2529 * Handle an NBD_OPT_STRUCTURED_REPLY message
2531 static void handle_structured_reply(CLIENT
*client
, uint32_t opt
, GArray
*servers
, uint32_t cflags
) {
2535 socket_read(client
, &len
, sizeof(len
));
2538 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "NBD_OPT_STRUCTURED_REPLY with nonzero data length is not a valid request");
2540 consume(client
, len
, buf
, sizeof buf
);
2543 if(client
->clientflags
& F_STRUCTURED
) {
2544 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "NBD_OPT_STRUCTURED_REPLY has already been called");
2547 client
->clientflags
|= F_STRUCTURED
;
2548 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2552 * Handle an NBD_OPT_INFO or NBD_OPT_GO request.
2554 static bool handle_info(CLIENT
* client
, uint32_t opt
, GArray
* servers
, uint32_t cflags
) {
2555 uint32_t namelen
, len
;
2558 SERVER
*server
= NULL
;
2559 uint16_t n_requests
;
2562 bool sent_export
= false;
2563 uint32_t reptype
= NBD_REP_ERR_UNKNOWN
;
2564 char *msg
= "Export unknown";
2566 socket_read(client
, &len
, sizeof(len
));
2568 socket_read(client
, &namelen
, sizeof(namelen
));
2569 namelen
= htonl(namelen
);
2570 if(namelen
> (len
- 6)) {
2571 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "An OPT_INFO request cannot be smaller than the length of the name + 6");
2572 consume(client
, len
- sizeof(namelen
), buf
, sizeof(buf
));
2574 if(namelen
> 4096) {
2575 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "The name for this OPT_INFO request is too long");
2576 consume(client
, namelen
, buf
, sizeof(buf
));
2579 name
= malloc(namelen
+ 1);
2581 send_reply(client
, opt
, reptype
, -1, "nbd server out of memory");
2585 socket_read(client
, name
, namelen
);
2589 for(i
=0; i
<servers
->len
; i
++) {
2590 SERVER
*serve
= (g_array_index(servers
, SERVER
*, i
));
2591 if (!strcmp(serve
->servename
, name
)) {
2592 if ((serve
->flags
& F_FORCEDTLS
) && !client
->tls_session
) {
2593 reptype
= NBD_REP_ERR_TLS_REQD
;
2594 msg
= "TLS is required for that export";
2601 socket_read(client
, &n_requests
, sizeof(n_requests
));
2602 n_requests
= ntohs(n_requests
);
2604 consume(client
, n_requests
* sizeof(request
), buf
,
2606 send_reply(client
, opt
, reptype
, -1, msg
);
2609 if (opt
== NBD_OPT_GO
) {
2610 client
->clientfeats
= cflags
;
2611 if(!commit_client(client
, server
)) {
2612 consume(client
, n_requests
* sizeof(request
), buf
,
2614 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "Access denied by server configuration");
2618 for(i
=0; i
<n_requests
; i
++) {
2619 socket_read(client
, &request
, sizeof(request
));
2620 switch(ntohs(request
)) {
2621 case NBD_INFO_EXPORT
:
2622 send_reply(client
, opt
, NBD_REP_INFO
, 12, NULL
);
2623 socket_write(client
, &request
, 2);
2624 send_export_info(client
, server
, false);
2628 // ignore all other options for now.
2633 request
= htons(NBD_INFO_EXPORT
);
2634 send_reply(client
, opt
, NBD_REP_INFO
, 12, NULL
);
2635 socket_write(client
, &request
, 2);
2636 send_export_info(client
, server
, false);
2638 send_reply(client
, opt
, NBD_REP_ACK
, 0, NULL
);
2644 * Do the initial negotiation.
2646 * @param net The socket we're doing the negotiation over.
2647 * @param servers The array of known servers.
2648 * @param genconf the global options (needed for accessing TLS config data)
2650 CLIENT
* negotiate(int net
, GArray
* servers
, struct generic_conf
*genconf
) {
2651 uint16_t smallflags
= NBD_FLAG_FIXED_NEWSTYLE
| NBD_FLAG_NO_ZEROES
;
2653 uint32_t cflags
= 0;
2655 CLIENT
* client
= g_new0(CLIENT
, 1);
2657 client
->socket_read
= socket_read_notls
;
2658 client
->socket_write
= socket_write_notls
;
2659 client
->socket_closed
= socket_closed_negotiate
;
2660 client
->transactionlogfd
= -1;
2661 client
->logsem
= SEM_FAILED
;
2663 assert(servers
!= NULL
);
2664 socket_write(client
, INIT_PASSWD
, 8);
2665 magic
= htonll(opts_magic
);
2666 socket_write(client
, &magic
, sizeof(magic
));
2668 smallflags
= htons(smallflags
);
2669 socket_write(client
, &smallflags
, sizeof(uint16_t));
2670 socket_read(client
, &cflags
, sizeof(cflags
));
2671 cflags
= htonl(cflags
);
2672 if (cflags
& NBD_FLAG_C_NO_ZEROES
) {
2673 glob_flags
|= F_NO_ZEROES
;
2676 socket_read(client
, &magic
, sizeof(magic
));
2677 magic
= ntohll(magic
);
2678 if(magic
!= opts_magic
) {
2679 err_nonfatal("Negotiation failed/5a: magic mismatch");
2682 socket_read(client
, &opt
, sizeof(opt
));
2684 if(client
->tls_session
== NULL
2685 && glob_flags
& F_FORCEDTLS
2686 && opt
!= NBD_OPT_STARTTLS
) {
2687 if(opt
== NBD_OPT_EXPORT_NAME
) {
2688 // can't send an error message for EXPORT_NAME,
2689 // so must do hard close
2692 if(opt
== NBD_OPT_ABORT
) {
2696 consume_len(client
);
2697 send_reply(client
, opt
, NBD_REP_ERR_TLS_REQD
, -1, "TLS is required on this server");
2701 case NBD_OPT_EXPORT_NAME
:
2702 // NBD_OPT_EXPORT_NAME must be the last
2703 // selected option, so return from here
2704 // if that is chosen.
2705 if(handle_export_name(client
, opt
, servers
, cflags
) != NULL
) {
2712 handle_list(client
, opt
, servers
, cflags
);
2717 case NBD_OPT_STARTTLS
:
2719 consume_len(client
);
2720 send_reply(client
, opt
, NBD_REP_ERR_PLATFORM
, -1, "This nbd-server was compiled without TLS support");
2722 if(client
->tls_session
!= NULL
) {
2723 consume_len(client
);
2724 send_reply(client
, opt
, NBD_REP_ERR_INVALID
, -1, "Invalid STARTTLS request: TLS has already been negotiated!");
2727 if(genconf
->keyfile
== NULL
) {
2728 consume_len(client
);
2729 send_reply(client
, opt
, NBD_REP_ERR_POLICY
, -1, "TLS not allowed on this server");
2732 if(handle_starttls(client
, opt
, servers
, cflags
, genconf
) == NULL
) {
2733 // can't recover from failed TLS negotiation.
2736 // once TLS has been negotiated, any state must be cleared
2737 client
->clientflags
= 0;
2742 if(handle_info(client
, opt
, servers
, cflags
) && opt
== NBD_OPT_GO
) {
2746 case NBD_OPT_STRUCTURED_REPLY
:
2747 handle_structured_reply(client
, opt
, servers
, cflags
);
2750 consume_len(client
);
2751 send_reply(client
, opt
, NBD_REP_ERR_UNSUP
, -1, "The given option is unknown to this server implementation");
2754 } while((opt
!= NBD_OPT_EXPORT_NAME
) && (opt
!= NBD_OPT_ABORT
));
2755 if(opt
== NBD_OPT_ABORT
) {
2756 err_nonfatal("Session terminated by client");
2759 err_nonfatal("Weird things happened: reached end of negotiation without success");
2765 static int nbd_errno(int errcode
) {
2780 return htonl(28); // ENOSPC
2782 return htonl(22); // EINVAL
2786 static void package_dispose(struct work_package
* package
) {
2787 if (package
->pipefd
[0] > 0)
2788 close(package
->pipefd
[0]);
2789 if (package
->pipefd
[1] > 0)
2790 close(package
->pipefd
[1]);
2791 g_free(package
->data
);
2792 g_free(package
->req
);
2796 static int mkpipe(int pipefd
[2], size_t len
)
2798 if (len
> MAX_PIPE_SIZE
)
2804 if (fcntl(pipefd
[1], F_SETPIPE_SZ
, MAX_PIPE_SIZE
) < MAX_PIPE_SIZE
) {
2816 struct work_package
* package_create(CLIENT
* client
, struct nbd_request
* req
) {
2817 struct work_package
* rv
= calloc(sizeof (struct work_package
), 1);
2820 rv
->client
= client
;
2825 if((req
->type
& NBD_CMD_MASK_COMMAND
) == NBD_CMD_WRITE
) {
2826 if (client
->server
->flags
& F_SPLICE
) {
2827 if (mkpipe(rv
->pipefd
, req
->len
))
2828 rv
->data
= malloc(req
->len
);
2830 rv
->data
= malloc(req
->len
);
2838 static int handle_splice_read(CLIENT
*client
, struct nbd_request
*req
)
2840 struct nbd_reply rep
;
2843 // splice doesn't work with TLS
2844 if (client
->tls_session
!= NULL
)
2847 if (mkpipe(pipefd
, req
->len
))
2850 if (expsplice(pipefd
[1], req
->from
, req
->len
, client
, SPLICE_IN
, 0)) {
2856 DEBUG("handling read request (splice)\n");
2857 setup_reply(&rep
, req
);
2858 log_reply(client
, &rep
);
2859 pthread_mutex_lock(&(client
->lock
));
2860 writeit(client
->net
, &rep
, sizeof(rep
));
2861 spliceit(pipefd
[0], NULL
, client
->net
, NULL
, req
->len
);
2862 pthread_mutex_unlock(&(client
->lock
));
2869 static void handle_normal_read(CLIENT
*client
, struct nbd_request
*req
)
2871 DEBUG("handling read request\n");
2872 char read_failed
[] = "Read failed";
2873 _cleanup_g_free_ READ_CTX
*ctx
= g_new0(READ_CTX
, 1);
2875 ctx
->current_len
= req
->len
;
2877 char *errmsg
= NULL
;
2878 uint16_t msglen
= 0;
2879 if(client
->clientflags
& F_STRUCTURED
) {
2880 ctx
->is_structured
= 1;
2882 ctx
->is_structured
= 0;
2884 if((req
->type
& NBD_CMD_FLAG_DF
) != 0) {
2887 if(ctx
->is_structured
&& ctx
->df
&& req
->len
> (1 << 20)) {
2888 /* standard requires a minimum of 64KiB; we are more generous
2889 * by allowing up to 1MiB as our largest unfragmented answer */
2890 const char too_long
[] = "Request too long for unfragmented reply";
2891 struct nbd_structured_error_payload pl
;
2892 pl
.error
= NBD_EOVERFLOW
;
2893 pl
.msglen
= sizeof too_long
;
2894 send_structured_chunk_v(client
, req
, NBD_REPLY_FLAG_DONE
, NBD_REPLY_TYPE_ERROR
, 6 + pl
.msglen
, 2, &pl
, sizeof pl
, too_long
, sizeof too_long
);
2897 if(ctx
->df
|| !(ctx
->is_structured
)) {
2898 ctx
->buf
= malloc(req
->len
);
2900 err("Could not allocate memory for request");
2902 ctx
->buflen
= req
->len
;
2904 if(expread(ctx
, client
)) {
2905 DEBUG("Read failed: %m");
2906 error
= nbd_errno(errno
);
2907 errmsg
= read_failed
;
2908 msglen
= sizeof read_failed
;
2910 complete_read(client
, ctx
, error
, errmsg
, msglen
, false, 0);
2913 static void handle_read(CLIENT
* client
, struct nbd_request
* req
)
2917 * If we have splice set we want to try that first, and if that fails
2918 * for whatever reason we fall through to ye olde read.
2920 if (client
->server
->flags
& F_SPLICE
)
2921 if (!handle_splice_read(client
, req
))
2924 handle_normal_read(client
, req
);
2927 static void handle_write(struct work_package
*pkg
)
2929 CLIENT
*client
= pkg
->client
;
2930 struct nbd_request
*req
= pkg
->req
;
2931 struct nbd_reply rep
;
2932 int fua
= !!(req
->type
& NBD_CMD_FLAG_FUA
);
2934 DEBUG("handling write request\n");
2935 setup_reply(&rep
, req
);
2939 if (expsplice(pkg
->pipefd
[0], req
->from
, req
->len
, client
,
2941 DEBUG("Splice failed: %m");
2942 rep
.error
= nbd_errno(errno
);
2947 if(expwrite(req
->from
, pkg
->data
, req
->len
, client
, fua
)) {
2948 DEBUG("Write failed: %m");
2949 rep
.error
= nbd_errno(errno
);
2952 log_reply(client
, &rep
);
2953 pthread_mutex_lock(&(client
->lock
));
2954 socket_write(client
, &rep
, sizeof rep
);
2955 pthread_mutex_unlock(&(client
->lock
));
2958 static void handle_flush(CLIENT
* client
, struct nbd_request
* req
) {
2959 struct nbd_reply rep
;
2960 DEBUG("handling flush request\n");
2961 setup_reply(&rep
, req
);
2962 if(expflush(client
)) {
2963 DEBUG("Flush failed: %m");
2964 rep
.error
= nbd_errno(errno
);
2966 log_reply(client
, &rep
);
2967 pthread_mutex_lock(&(client
->lock
));
2968 socket_write(client
, &rep
, sizeof rep
);
2969 pthread_mutex_unlock(&(client
->lock
));
2972 static void handle_trim(CLIENT
* client
, struct nbd_request
* req
) {
2973 struct nbd_reply rep
;
2974 DEBUG("handling trim request\n");
2975 setup_reply(&rep
, req
);
2976 if(exptrim(req
, client
)) {
2977 DEBUG("Trim failed: %m");
2978 rep
.error
= nbd_errno(errno
);
2980 log_reply(client
, &rep
);
2981 pthread_mutex_lock(&(client
->lock
));
2982 socket_write(client
, &rep
, sizeof rep
);
2983 pthread_mutex_unlock(&(client
->lock
));
2986 static void handle_write_zeroes(CLIENT
* client
, struct nbd_request
* req
) {
2987 struct nbd_reply rep
;
2988 DEBUG("handling write_zeroes request\n");
2989 int fua
= !!(req
->type
& NBD_CMD_FLAG_FUA
);
2990 setup_reply(&rep
, req
);
2991 if(expwrite_zeroes(req
, client
, fua
)) {
2992 DEBUG("Write_zeroes failed: %m");
2993 rep
.error
= nbd_errno(errno
);
2995 // For now, don't trim
2996 // TODO: handle this far more efficiently with reference to the
2997 // actual backing driver
2998 log_reply(client
, &rep
);
2999 pthread_mutex_lock(&(client
->lock
));
3000 socket_write(client
, &rep
, sizeof rep
);
3001 pthread_mutex_unlock(&(client
->lock
));
3005 static bool bad_write(CLIENT
* client
, struct nbd_request
* req
) {
3006 if ((client
->server
->flags
& F_READONLY
) ||
3007 (client
->server
->flags
& F_AUTOREADONLY
)) {
3008 DEBUG("[WRITE to READONLY!]");
3014 static bool bad_range(CLIENT
* client
, struct nbd_request
* req
) {
3015 if(req
->from
> client
->exportsize
||
3016 req
->from
+ req
->len
> client
->exportsize
) {
3017 DEBUG("[out of bounds!]");
3023 static void handle_request(gpointer data
, gpointer user_data
) {
3024 struct work_package
* package
= (struct work_package
*) data
;
3025 uint32_t type
= package
->req
->type
& NBD_CMD_MASK_COMMAND
;
3026 uint32_t flags
= package
->req
->type
& ~NBD_CMD_MASK_COMMAND
;
3027 struct nbd_reply rep
;
3030 if(flags
& ~(NBD_CMD_FLAG_FUA
| NBD_CMD_FLAG_NO_HOLE
)) {
3031 msg(LOG_ERR
, "E: received invalid flag %d on command %d, ignoring", flags
, type
);
3037 if (bad_range(package
->client
, package
->req
)) {
3040 handle_read(package
->client
, package
->req
);
3043 if (bad_write(package
->client
, package
->req
)) {
3047 if (bad_range(package
->client
, package
->req
)) {
3051 handle_write(package
);
3054 handle_flush(package
->client
, package
->req
);
3057 if (bad_write(package
->client
, package
->req
)) {
3061 if (bad_range(package
->client
, package
->req
)) {
3064 handle_trim(package
->client
, package
->req
);
3066 case NBD_CMD_WRITE_ZEROES
:
3067 if (bad_write(package
->client
, package
->req
)) {
3071 if (bad_range(package
->client
, package
->req
)) {
3075 handle_write_zeroes(package
->client
, package
->req
);
3078 msg(LOG_ERR
, "E: received unknown command %d of type, ignoring", package
->req
->type
);
3083 setup_reply(&rep
, package
->req
);
3084 rep
.error
= nbd_errno(err
);
3085 log_reply(package
->client
, &rep
);
3086 pthread_mutex_lock(&(package
->client
->lock
));
3087 socket_write(package
->client
, &rep
, sizeof rep
);
3088 pthread_mutex_unlock(&(package
->client
->lock
));
3090 package_dispose(package
);
3093 static int mainloop_threaded(CLIENT
* client
) {
3094 struct nbd_request
* req
;
3095 struct work_package
* pkg
;
3096 int write_data
= false;
3098 DEBUG("Entering request loop\n");
3100 req
= calloc(sizeof (struct nbd_request
), 1);
3102 socket_read(client
, req
, sizeof(struct nbd_request
));
3104 if(client
->transactionlogfd
!= -1) {
3105 lock_logsem(client
);
3106 writeit(client
->transactionlogfd
, req
, sizeof(struct nbd_request
));
3107 if(((ntohl(req
->type
) & NBD_CMD_MASK_COMMAND
) == NBD_CMD_WRITE
) &&
3108 (client
->server
->flags
& F_DATALOG
) &&
3109 !(client
->server
->flags
& F_SPLICE
)) {
3113 unlock_logsem(client
);
3117 req
->from
= ntohll(req
->from
);
3118 req
->type
= ntohl(req
->type
);
3119 req
->len
= ntohl(req
->len
);
3121 if(req
->magic
!= htonl(NBD_REQUEST_MAGIC
))
3122 err("Protocol error: not enough magic.");
3124 pkg
= package_create(client
, req
);
3126 if((req
->type
& NBD_CMD_MASK_COMMAND
) == NBD_CMD_WRITE
) {
3128 if ((client
->server
->flags
& F_SPLICE
) &&
3129 (req
->len
<= MAX_PIPE_SIZE
&& pkg
->pipefd
[1] > 0) &&
3130 (client
->tls_session
== NULL
))
3131 spliceit(client
->net
, NULL
, pkg
->pipefd
[1],
3135 socket_read(client
, pkg
->data
, req
->len
);
3138 writeit(client
->transactionlogfd
, pkg
->data
, req
->len
);
3139 unlock_logsem(client
);
3143 if(req
->type
== NBD_CMD_DISC
) {
3144 finalize_client(client
);
3145 package_dispose(pkg
);
3148 g_thread_pool_push(tpool
, pkg
, NULL
);
3154 * @param data a pointer to pid_t which should be freed
3156 void destroy_pid_t(gpointer data
) {
3160 static pid_t
spawn_child(int* socket
) {
3166 sigemptyset(&newset
);
3167 sigaddset(&newset
, SIGCHLD
);
3168 sigaddset(&newset
, SIGTERM
);
3169 sigprocmask(SIG_BLOCK
, &newset
, &oldset
);
3170 socketpair(AF_UNIX
, SOCK_STREAM
, 0, sockets
);
3173 msg(LOG_ERR
, "Could not fork (%s)", strerror(errno
));
3178 if (pid
> 0) { /* Parent */
3181 pidp
= g_malloc(sizeof(pid_t
));
3183 *socket
= sockets
[1];
3185 g_hash_table_insert(children
, pidp
, pidp
);
3189 *socket
= sockets
[0];
3191 /* Child's signal disposition is reset to default. */
3192 signal(SIGCHLD
, SIG_DFL
);
3193 signal(SIGTERM
, SIG_DFL
);
3194 signal(SIGHUP
, SIG_DFL
);
3195 sigemptyset(&oldset
);
3197 sigprocmask(SIG_SETMASK
, &oldset
, NULL
);
3202 socket_accept(const int sock
)
3204 struct sockaddr_storage addrin
;
3205 socklen_t addrinlen
= sizeof(addrin
);
3208 net
= accept(sock
, (struct sockaddr
*) &addrin
, &addrinlen
);
3210 err_nonfatal("Failed to accept socket connection: %m");
3217 handle_modern_connection(GArray
*const servers
, const int sock
, struct generic_conf
*genconf
)
3221 CLIENT
*client
= NULL
;
3225 net
= socket_accept(sock
);
3230 pid
= spawn_child(&commsocket
);
3233 msg(LOG_INFO
, "Spawned a child process");
3234 g_array_append_val(childsocks
, commsocket
);
3237 msg(LOG_ERR
, "Failed to spawn a child process");
3241 /* Child just continues. */
3243 tpool
= g_thread_pool_new(handle_request
, NULL
, genconf
->threads
, FALSE
, NULL
);
3245 sock_flags_old
= fcntl(net
, F_GETFL
, 0);
3246 if (sock_flags_old
== -1) {
3247 msg(LOG_ERR
, "Failed to get socket flags");
3251 sock_flags_new
= sock_flags_old
& ~O_NONBLOCK
;
3252 if (sock_flags_new
!= sock_flags_old
&&
3253 fcntl(net
, F_SETFL
, sock_flags_new
) == -1) {
3254 msg(LOG_ERR
, "Failed to set socket to blocking mode");
3258 client
= negotiate(net
, servers
, genconf
);
3260 msg(LOG_ERR
, "Modern initial negotiation failed");
3267 /* Free all root server resources here, because we are
3268 * currently in the child process serving one specific
3269 * connection. These are not simply needed anymore. */
3270 g_hash_table_destroy(children
);
3272 for (i
= 0; i
< modernsocks
->len
; i
++) {
3273 close(g_array_index(modernsocks
, int, i
));
3275 g_array_free(modernsocks
, TRUE
);
3277 /* Now that we are in the child process after a
3278 * succesful negotiation, we do not need the list of
3279 * servers anymore, get rid of it.*/
3280 g_array_free(servers
, FALSE
);
3283 msg(LOG_INFO
, "Starting to serve");
3284 mainloop_threaded(client
);
3296 static int handle_childname(GArray
* servers
, int socket
)
3299 _cleanup_g_free_
char *buf
= NULL
;
3302 while(rt
< sizeof(len
)) {
3303 switch((r
= read(socket
, &len
, sizeof len
))) {
3307 err_nonfatal("Error reading from acl socket: %m");
3314 if (len
>= UINT32_MAX
- 1) {
3315 err_nonfatal("Value out of range");
3318 buf
= g_malloc0(len
+ 1);
3319 readit(socket
, buf
, len
);
3321 for(i
=0; i
<servers
->len
; i
++) {
3322 SERVER
* srv
= g_array_index(servers
, SERVER
*, i
);
3323 if(strcmp(srv
->servename
, buf
) == 0) {
3324 if(srv
->max_connections
== 0 || srv
->max_connections
> srv
->numclients
) {
3325 writeit(socket
, "Y", 1);
3328 writeit(socket
, "N", 1);
3333 writeit(socket
, "X", 1);
3339 * Return the index of the server whose servename matches the given
3342 * @param servename a string to match
3343 * @param servers an array of servers
3344 * @return the first index of the server whose servename matches the
3345 * given name or -1 if one cannot be found
3347 static int get_index_by_servename(const gchar
*const servename
,
3348 const GArray
*const servers
) {
3351 for (i
= 0; i
< servers
->len
; ++i
) {
3352 const SERVER
* server
= g_array_index(servers
, SERVER
*, i
);
3354 if (strcmp(servename
, server
->servename
) == 0)
3362 * Parse configuration files and add servers to the array if they don't
3363 * already exist there. The existence is tested by comparing
3364 * servenames. A server is appended to the array only if its servename
3365 * is unique among all other servers.
3367 * @param servers an array of servers
3368 * @param genconf a pointer to generic configuration
3369 * @return the number of new servers appended to the array, or -1 in
3372 static int append_new_servers(GArray
*const servers
, struct generic_conf
*genconf
, GError
**const gerror
) {
3374 GArray
*new_servers
;
3375 const int old_len
= servers
->len
;
3378 new_servers
= parse_cfile(config_file_pos
, genconf
, true, gerror
);
3379 if(tpool
) g_thread_pool_set_max_threads(tpool
, genconf
->threads
, NULL
);
3383 for(i
= 0; i
< new_servers
->len
; ++i
) {
3384 SERVER
*new_server
= g_array_index(new_servers
, SERVER
*, i
);
3386 if (new_server
->servename
3387 && -1 == get_index_by_servename(new_server
->servename
,
3389 serve_inc_ref(new_server
);
3390 g_array_append_val(servers
, new_server
);
3394 retval
= servers
->len
- old_len
;
3396 g_array_free(new_servers
, TRUE
);
3401 void serveloop(GArray
* servers
, struct generic_conf
*genconf
) G_GNUC_NORETURN
;
3403 * Loop through the available servers, and serve them. Never returns.
3405 void serveloop(GArray
* servers
, struct generic_conf
*genconf
) {
3410 sigset_t blocking_mask
;
3411 sigset_t original_mask
;
3414 * Set up the master fd_set. The set of descriptors we need
3415 * to select() for never changes anyway and it buys us a *lot*
3416 * of time to only build this once. However, if we ever choose
3417 * to not fork() for clients anymore, we may have to revisit
3422 for(i
=0;i
<modernsocks
->len
;i
++) {
3423 int sock
= g_array_index(modernsocks
, int, i
);
3424 FD_SET(sock
, &mset
);
3425 mmax
=sock
>mmax
?sock
:mmax
;
3428 /* Construct a signal mask which is used to make signal testing and
3429 * receiving an atomic operation to ensure no signal is received between
3430 * tests and blocking pselect(). */
3431 if (sigemptyset(&blocking_mask
) == -1)
3432 err("failed to initialize blocking_mask: %m");
3434 if (sigaddset(&blocking_mask
, SIGCHLD
) == -1)
3435 err("failed to add SIGCHLD to blocking_mask: %m");
3437 if (sigaddset(&blocking_mask
, SIGHUP
) == -1)
3438 err("failed to add SIGHUP to blocking_mask: %m");
3440 if (sigaddset(&blocking_mask
, SIGTERM
) == -1)
3441 err("failed to add SIGTERM to blocking_mask: %m");
3443 if (sigprocmask(SIG_BLOCK
, &blocking_mask
, &original_mask
) == -1)
3444 err("failed to block signals: %m");
3447 if (is_sigterm_caught
) {
3448 is_sigterm_caught
= 0;
3450 g_hash_table_foreach(children
, killchild
, NULL
);
3456 if (is_sigchld_caught
) {
3461 is_sigchld_caught
= 0;
3463 while ((pid
=waitpid(-1, &status
, WNOHANG
)) > 0) {
3464 if (WIFEXITED(status
)) {
3465 msg(LOG_INFO
, "Child exited with %d", WEXITSTATUS(status
));
3467 i
= g_hash_table_lookup(children
, &pid
);
3469 msg(LOG_INFO
, "SIGCHLD received for an unknown child with PID %ld", (long)pid
);
3471 DEBUG("Removing %d from the list of children", pid
);
3472 g_hash_table_remove(children
, &pid
);
3477 /* SIGHUP causes the root server process to reconfigure
3478 * itself and add new export servers for each newly
3479 * found export configuration group, i.e. spawn new
3480 * server processes for each previously non-existent
3481 * export. This does not alter old runtime configuration
3482 * but just appends new exports. */
3483 if (is_sighup_caught
) {
3485 GError
*gerror
= NULL
;
3487 msg(LOG_INFO
, "reconfiguration request received");
3488 is_sighup_caught
= 0; /* Reset to allow catching
3491 n
= append_new_servers(servers
, genconf
, &gerror
);
3493 msg(LOG_ERR
, "failed to append new servers: %s",
3496 for (i
= servers
->len
- n
; i
< servers
->len
; ++i
) {
3497 const SERVER
*server
= g_array_index(servers
,
3500 msg(LOG_INFO
, "reconfigured new server: %s",
3505 memcpy(&rset
, &mset
, sizeof(fd_set
));
3507 for(i
=0;i
<childsocks
->len
;i
++) {
3508 int sock
= g_array_index(childsocks
, int, i
);
3509 FD_SET(sock
, &rset
);
3510 max
=sock
>max
?sock
:max
;
3513 if (pselect(max
+ 1, &rset
, NULL
, NULL
, NULL
, &original_mask
) > 0) {
3515 for(i
=0; i
< modernsocks
->len
; i
++) {
3516 int sock
= g_array_index(modernsocks
, int, i
);
3517 if(!FD_ISSET(sock
, &rset
)) {
3521 handle_modern_connection(servers
, sock
, genconf
);
3523 for(i
=0; i
< childsocks
->len
; i
++) {
3524 int sock
= g_array_index(childsocks
, int, i
);
3526 if(FD_ISSET(sock
, &rset
)) {
3527 if(handle_childname(servers
, sock
) < 0) {
3529 g_array_remove_index(childsocks
, i
);
3538 * Set server socket options.
3540 * @param socket a socket descriptor of the server
3542 * @param gerror a pointer to an error object pointer used for reporting
3543 * errors. On error, if gerror is not NULL, *gerror is set and -1
3546 * @return 0 on success, -1 on error
3548 int dosockopts(const int socket
, GError
**const gerror
) {
3556 /* lose the pesky "Address already in use" error message */
3557 if (setsockopt(socket
,SOL_SOCKET
,SO_REUSEADDR
,&yes
,sizeof(int)) == -1) {
3558 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_REUSEADDR
,
3559 "failed to set socket option SO_REUSEADDR: %s",
3565 if (setsockopt(socket
,SOL_SOCKET
,SO_LINGER
,&l
,sizeof(l
)) == -1) {
3566 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_LINGER
,
3567 "failed to set socket option SO_LINGER: %s",
3571 if (setsockopt(socket
,SOL_SOCKET
,SO_KEEPALIVE
,&yes
,sizeof(int)) == -1) {
3572 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SO_KEEPALIVE
,
3573 "failed to set socket option SO_KEEPALIVE: %s",
3581 int open_unix(const gchar
*const sockname
, GError
**const gerror
) {
3582 struct sockaddr_un sa
;
3586 memset(&sa
, 0, sizeof(struct sockaddr_un
));
3587 sa
.sun_family
= AF_UNIX
;
3588 strncpy(sa
.sun_path
, sockname
, sizeof sa
.sun_path
);
3589 sa
.sun_path
[sizeof(sa
.sun_path
)-1] = '\0';
3590 sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
3592 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SOCKET
,
3593 "failed to open a unix socket: "
3594 "failed to create socket: %s",
3598 if(bind(sock
, (struct sockaddr
*)&sa
, sizeof(struct sockaddr_un
))<0) {
3599 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3600 "failed to open a unix socket: "
3601 "failed to bind to address %s: %s",
3602 sockname
, strerror(errno
));
3605 if(listen(sock
, 10)<0) {
3606 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3607 "failed to open a unix socket: "
3608 "failed to start listening: %s",
3613 g_array_append_val(modernsocks
, sock
);
3615 if(retval
<0 && sock
>= 0) {
3622 int open_modern(const gchar
*const addr
, const gchar
*const port
,
3623 GError
**const gerror
) {
3624 struct addrinfo hints
;
3625 struct addrinfo
* ai
= NULL
;
3626 struct addrinfo
* ai_bak
= NULL
;
3631 _cleanup_(g_strfreevp
) gchar
** addrs
;
3632 gchar
const* l_addr
= addr
;
3634 if(!addr
|| strlen(addr
) == 0) {
3635 l_addr
= "::, 0.0.0.0";
3638 addrs
= g_strsplit_set(l_addr
, ", \t", -1);
3640 for(int i
=0; addrs
[i
]!=NULL
; i
++) {
3641 if(addrs
[i
][0] == '\0') {
3644 memset(&hints
, '\0', sizeof(hints
));
3645 hints
.ai_flags
= AI_PASSIVE
| AI_ADDRCONFIG
;
3646 hints
.ai_socktype
= SOCK_STREAM
;
3647 hints
.ai_family
= AF_UNSPEC
;
3648 hints
.ai_protocol
= IPPROTO_TCP
;
3649 e
= getaddrinfo(addrs
[i
], port
? port
: NBD_DEFAULT_PORT
, &hints
, &ai
);
3651 if(e
!= 0 && addrs
[i
+1] == NULL
&& modernsocks
->len
== 0) {
3652 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_GAI
,
3653 "failed to open a modern socket: "
3654 "failed to get address info: %s",
3660 sock
= socket(ai
->ai_family
, ai
->ai_socktype
, ai
->ai_protocol
);
3662 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_SOCKET
,
3663 "failed to open a modern socket: "
3664 "failed to create a socket: %s",
3669 if (dosockopts(sock
, gerror
) == -1) {
3670 g_prefix_error(gerror
, "failed to open a modern socket: ");
3674 if(bind(sock
, ai
->ai_addr
, ai
->ai_addrlen
)) {
3676 * Some systems will return multiple entries for the
3677 * same address when we ask it for something
3678 * AF_UNSPEC, even though the first entry will
3679 * listen to both protocols. Other systems will
3680 * return multiple entries too, but we actually
3681 * do need to open both.
3683 * Handle this by ignoring EADDRINUSE if we've
3684 * already got at least one socket open
3686 if(errno
== EADDRINUSE
&& modernsocks
->len
> 0) {
3689 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3690 "failed to open a modern socket: "
3691 "failed to bind an address to a socket: %s",
3696 if(listen(sock
, 10) <0) {
3697 g_set_error(gerror
, NBDS_ERR
, NBDS_ERR_BIND
,
3698 "failed to open a modern socket: "
3699 "failed to start listening on a socket: %s",
3703 g_array_append_val(modernsocks
, sock
);
3708 freeaddrinfo(ai_bak
);
3716 if (retval
== -1 && sock
>= 0) {
3720 freeaddrinfo(ai_bak
);
3726 * Connect our servers.
3728 void setup_servers(GArray
*const servers
, const gchar
*const modernaddr
,
3729 const gchar
*const modernport
, const gchar
* unixsock
,
3730 const gint flags
) {
3731 struct sigaction sa
;
3733 if(unixsock
!= NULL
) {
3734 GError
* gerror
= NULL
;
3735 if(open_unix(unixsock
, &gerror
) == -1) {
3736 msg(LOG_ERR
, "failed to setup servers: %s",
3738 g_clear_error(&gerror
);
3742 if (((flags
& F_DUAL_LISTEN
) != 0) || (unixsock
== NULL
)) {
3743 GError
*gerror
= NULL
;
3744 if (open_modern(modernaddr
, modernport
, &gerror
) == -1) {
3745 msg(LOG_ERR
, "failed to setup servers: %s",
3747 g_clear_error(&gerror
);
3751 children
=g_hash_table_new_full(g_int_hash
, g_int_equal
, NULL
, destroy_pid_t
);
3753 sa
.sa_handler
= sigchld_handler
;
3754 sigemptyset(&sa
.sa_mask
);
3755 sigaddset(&sa
.sa_mask
, SIGTERM
);
3756 sa
.sa_flags
= SA_RESTART
;
3757 if(sigaction(SIGCHLD
, &sa
, NULL
) == -1)
3758 err("sigaction: %m");
3760 sa
.sa_handler
= sigterm_handler
;
3761 sigemptyset(&sa
.sa_mask
);
3762 sigaddset(&sa
.sa_mask
, SIGCHLD
);
3763 sa
.sa_flags
= SA_RESTART
;
3764 if(sigaction(SIGTERM
, &sa
, NULL
) == -1)
3765 err("sigaction: %m");
3767 sa
.sa_handler
= sighup_handler
;
3768 sigemptyset(&sa
.sa_mask
);
3769 sa
.sa_flags
= SA_RESTART
;
3770 if(sigaction(SIGHUP
, &sa
, NULL
) == -1)
3771 err("sigaction: %m");
3773 sa
.sa_handler
= sigusr1_handler
;
3774 sigemptyset(&sa
.sa_mask
);
3775 sa
.sa_flags
= SA_RESTART
;
3776 if(sigaction(SIGUSR1
, &sa
, NULL
) == -1)
3777 err("sigaction: %m");
3781 * Go daemon (unless we specified at compile time that we didn't want this)
3783 #if !defined(NODAEMON)
3788 } else if(child
> 0) {
3799 strncpy(pidfname
, "/var/run/nbd-server.pid", 255);
3802 if((newfd
= open("/dev/null", O_RDWR
)) < 0) {
3805 if(dup2(0, newfd
) < 0) {
3808 if(dup2(1, newfd
) < 0) {
3811 if(dup2(2, newfd
) < 0) {
3817 } else if(child
> 0) {
3820 FILE*pidf
=fopen(pidfname
, "w");
3822 fprintf(pidf
,"%d\n", (int)getpid());
3826 fprintf(stderr
, "Not fatal; continuing");
3831 #endif /* !defined(NODAEMON) */
3834 * Everything beyond this point (in the file) is run in non-daemon mode.
3835 * The stuff above daemonize() isn't.
3839 * Set up user-ID and/or group-ID
3841 void dousers(const gchar
*const username
, const gchar
*const groupname
) {
3846 gr
= getgrnam(groupname
);
3848 str
= g_strdup_printf("Invalid group name: %s", groupname
);
3851 if(setgid(gr
->gr_gid
)<0) {
3852 err("Could not set GID: %m");
3856 pw
= getpwnam(username
);
3858 str
= g_strdup_printf("Invalid user name: %s", username
);
3861 if (setgroups(0, NULL
)<0) {
3862 err("Could not set groups: %m");
3864 if(setuid(pw
->pw_uid
)<0) {
3865 err("Could not set UID: %m");
3871 void glib_message_syslog_redirect(const gchar
*log_domain
,
3872 GLogLevelFlags log_level
,
3873 const gchar
*message
,
3876 int level
=LOG_DEBUG
;
3880 case G_LOG_FLAG_FATAL
:
3881 case G_LOG_LEVEL_CRITICAL
:
3882 case G_LOG_LEVEL_ERROR
:
3885 case G_LOG_LEVEL_WARNING
:
3888 case G_LOG_LEVEL_MESSAGE
:
3889 case G_LOG_LEVEL_INFO
:
3892 case G_LOG_LEVEL_DEBUG
:
3898 syslog(level
, "%s", message
);
3903 * Main entry point...
3905 int main(int argc
, char *argv
[]) {
3909 struct generic_conf genconf
;
3911 memset(&genconf
, 0, sizeof(struct generic_conf
));
3913 if (sizeof( struct nbd_request
)!=28) {
3914 fprintf(stderr
,"Bad size of structure. Alignment problems?\n");
3915 exit(EXIT_FAILURE
) ;
3918 modernsocks
= g_array_new(FALSE
, FALSE
, sizeof(int));
3919 childsocks
= g_array_new(FALSE
, FALSE
, sizeof(int));
3922 config_file_pos
= g_strdup(CFILE
);
3923 serve
=cmdline(argc
, argv
, &genconf
);
3925 genconf
.threads
= 4;
3926 servers
= parse_cfile(config_file_pos
, &genconf
, true, &gerr
);
3928 /* Update global variables with parsed values. This will be
3929 * removed once we get rid of global configuration variables. */
3930 glob_flags
|= genconf
.flags
;
3933 g_array_append_val(servers
, serve
);
3936 if(!servers
|| !servers
->len
) {
3937 if(gerr
&& !(gerr
->domain
== NBDS_ERR
3938 && gerr
->code
== NBDS_ERR_CFILE_NOTFOUND
)) {
3939 g_warning("Could not parse config file: %s", gerr
->message
);
3943 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
3946 if((!serve
) && (!servers
||!servers
->len
)) {
3948 g_message("No configured exports; quitting.");
3954 setup_servers(servers
, genconf
.modernaddr
, genconf
.modernport
,
3955 genconf
.unixsock
, genconf
.flags
);
3956 dousers(genconf
.user
, genconf
.group
);
3959 gnutls_global_init();
3960 static gnutls_dh_params_t dh_params
;
3961 gnutls_dh_params_init(&dh_params
);
3962 gnutls_dh_params_generate2(dh_params
,
3963 gnutls_sec_param_to_pk_bits(GNUTLS_PK_DH
,
3964 // Renamed in GnuTLS 3.3
3965 #if GNUTLS_VERSION_NUMBER >= 0x030300
3966 GNUTLS_SEC_PARAM_MEDIUM
3968 GNUTLS_SEC_PARAM_NORMAL
3973 if((genconf
.modernport
!= NULL
) && strcmp(genconf
.modernport
, "0")==0) {
3975 err("inetd mode requires syslog");
3977 CLIENT
* client
= negotiate(0, servers
, &genconf
);
3981 tpool
= g_thread_pool_new(handle_request
, NULL
, genconf
.threads
, FALSE
, NULL
);
3982 mainloop_threaded(client
);
3986 serveloop(servers
, &genconf
);