1:255.16-alt1
[systemd_ALT.git] / src / storagetm / storagetm.c
blob16d4fb07d4ef874ded943e91dd0695042cede036
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <getopt.h>
4 #include <sys/file.h>
6 #include "af-list.h"
7 #include "alloc-util.h"
8 #include "blockdev-util.h"
9 #include "build.h"
10 #include "daemon-util.h"
11 #include "device-util.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "format-util.h"
15 #include "fs-util.h"
16 #include "id128-util.h"
17 #include "local-addresses.h"
18 #include "loop-util.h"
19 #include "main-func.h"
20 #include "os-util.h"
21 #include "parse-argument.h"
22 #include "path-util.h"
23 #include "plymouth-util.h"
24 #include "pretty-print.h"
25 #include "process-util.h"
26 #include "random-util.h"
27 #include "recurse-dir.h"
28 #include "socket-util.h"
29 #include "terminal-util.h"
30 #include "udev-util.h"
32 static char **arg_devices = NULL;
33 static char *arg_nqn = NULL;
34 static int arg_all = 0;
36 STATIC_DESTRUCTOR_REGISTER(arg_devices, strv_freep);
37 STATIC_DESTRUCTOR_REGISTER(arg_nqn, freep);
39 static int help(void) {
40 _cleanup_free_ char *link = NULL;
41 int r;
43 r = terminal_urlify_man("systemd-storagetm", "8", &link);
44 if (r < 0)
45 return log_oom();
47 printf("%s [OPTIONS...] [DEVICE...]\n"
48 "\n%sExpose a block device or regular file as NVMe-TCP volume.%s\n\n"
49 " -h --help Show this help\n"
50 " --version Show package version\n"
51 " --nqn=STRING Select NQN (NVMe Qualified Name)\n"
52 " -a --all Expose all devices\n"
53 "\nSee the %s for details.\n",
54 program_invocation_short_name,
55 ansi_highlight(),
56 ansi_normal(),
57 link);
59 return 0;
62 static int parse_argv(int argc, char *argv[]) {
64 enum {
65 ARG_NQN = 0x100,
66 ARG_VERSION,
69 static const struct option options[] = {
70 { "help", no_argument, NULL, 'h' },
71 { "version", no_argument, NULL, ARG_VERSION },
72 { "nqn", required_argument, NULL, ARG_NQN },
73 { "all", no_argument, NULL, 'a' },
77 int r, c;
79 assert(argc >= 0);
80 assert(argv);
82 while ((c = getopt_long(argc, argv, "ha", options, NULL)) >= 0)
84 switch (c) {
86 case 'h':
87 return help();
89 case ARG_VERSION:
90 return version();
92 case ARG_NQN:
93 if (!filename_is_valid(optarg))
94 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "NQN invalid: %s", optarg);
96 if (free_and_strdup(&arg_nqn, optarg) < 0)
97 return log_oom();
99 break;
101 case 'a':
102 arg_all++;
103 break;
105 case '?':
106 return -EINVAL;
108 default:
109 assert_not_reached();
112 if (arg_all > 0) {
113 if (argc > optind)
114 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expects no further arguments if --all/-a is specified.");
115 } else {
116 if (optind >= argc)
117 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expecting device name or --all/-a.");
119 for (int i = optind; i < argc; i++)
120 if (!path_is_valid(argv[i]))
121 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid path: %s", argv[i]);
123 arg_devices = strv_copy(argv + optind);
126 if (!arg_nqn) {
127 sd_id128_t id;
129 r = sd_id128_get_machine_app_specific(SD_ID128_MAKE(b4,f9,4e,52,b8,e2,45,db,88,84,6e,2e,c3,f4,ef,18), &id);
130 if (r < 0)
131 return log_error_errno(r, "Failed to get machine ID: %m");
133 /* See NVM Express Base Specification 2.0c, 4.5 "NVMe Qualified Names" */
134 if (asprintf(&arg_nqn, "nqn.2023-10.io.systemd:storagetm." SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
135 return log_oom();
138 return 1;
141 typedef struct NvmeSubsystem {
142 char *name;
143 struct stat device_stat;
144 int device_fd;
145 int nvme_all_subsystems_fd; /* The /sys/kernel/config/nvmet/subsystems/ dir, that contains all subsystems */
146 int nvme_our_subsystem_fd; /* Our private subsystem dir below it. */
147 char *device;
148 } NvmeSubsystem;
150 static NvmeSubsystem* nvme_subsystem_free(NvmeSubsystem *s) {
151 if (!s)
152 return NULL;
154 free(s->name);
155 safe_close(s->nvme_all_subsystems_fd);
156 safe_close(s->nvme_our_subsystem_fd);
157 safe_close(s->device_fd);
158 free(s->device);
160 return mfree(s);
163 static int nvme_subsystem_unlink(NvmeSubsystem *s) {
164 int r;
166 assert(s);
168 if (s->nvme_our_subsystem_fd >= 0) {
169 _cleanup_close_ int namespaces_fd = -EBADF;
171 namespaces_fd = openat(s->nvme_our_subsystem_fd, "namespaces", O_CLOEXEC|O_DIRECTORY|O_RDONLY);
172 if (namespaces_fd < 0)
173 log_warning_errno(errno, "Failed to open 'namespaces' directory of subsystem '%s': %m", s->name);
174 else {
175 _cleanup_free_ DirectoryEntries *de = NULL;
177 r = readdir_all(namespaces_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
178 if (r < 0)
179 log_warning_errno(r, "Failed to read 'namespaces' dir of subsystem '%s', ignoring: %m", s->name);
180 else {
181 FOREACH_ARRAY(ee, de->entries, de->n_entries) {
182 _cleanup_free_ char *enable_fn = NULL;
183 const struct dirent *e = *ee;
185 enable_fn = path_join(e->d_name, "enable");
186 if (!enable_fn)
187 return log_oom();
189 r = write_string_file_at(namespaces_fd, enable_fn, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
190 if (r < 0)
191 log_warning_errno(r, "Failed to disable namespace '%s' of NVME subsystem '%s', ignoring: %m", e->d_name, s->name);
193 if (unlinkat(namespaces_fd, e->d_name, AT_REMOVEDIR) < 0 && errno != ENOENT)
194 log_warning_errno(errno, "Failed to remove namespace '%s' of NVME subsystem '%s', ignoring: %m", e->d_name, s->name);
199 s->nvme_our_subsystem_fd = safe_close(s->nvme_our_subsystem_fd);
202 if (s->nvme_all_subsystems_fd >= 0 && s->name) {
203 if (unlinkat(s->nvme_all_subsystems_fd, s->name, AT_REMOVEDIR) < 0 && errno != ENOENT)
204 log_warning_errno(errno, "Failed to remove NVME subsystem '%s', ignoring: %m", s->name);
206 s->nvme_all_subsystems_fd = safe_close(s->nvme_all_subsystems_fd); /* Invalidate the subsystems/ dir fd, to remember we unlinked the thing already */
208 log_info("NVME subsystem '%s' removed.", s->name);
211 return 0;
214 static NvmeSubsystem *nvme_subsystem_destroy(NvmeSubsystem *s) {
215 if (!s)
216 return NULL;
218 (void) nvme_subsystem_unlink(s);
220 return nvme_subsystem_free(s);
223 DEFINE_TRIVIAL_CLEANUP_FUNC(NvmeSubsystem*, nvme_subsystem_destroy);
225 static int nvme_subsystem_write_metadata(int subsystem_fd, sd_device *device) {
226 _cleanup_free_ char *image_id = NULL, *image_version = NULL, *os_id = NULL, *os_version = NULL, *combined_model = NULL, *synthetic_serial = NULL;
227 const char *hwmodel = NULL, *hwserial = NULL, *w;
228 int r;
230 assert(subsystem_fd >= 0);
232 (void) parse_os_release(
233 /* root= */ NULL,
234 "IMAGE_ID", &image_id,
235 "IMAGE_VERSION", &image_version,
236 "ID", &os_id,
237 "VERSION_ID", &os_version);
239 if (device) {
240 (void) device_get_model_string(device, &hwmodel);
241 (void) sd_device_get_property_value(device, "ID_SERIAL_SHORT", &hwserial);
244 w = secure_getenv("SYSTEMD_NVME_MODEL");
245 if (!w) {
246 if (hwmodel && (image_id || os_id)) {
247 if (asprintf(&combined_model, "%s (%s)", hwmodel, image_id ?: os_id) < 0)
248 return log_oom();
249 w = combined_model;
250 } else
251 w = hwmodel ?: image_id ?: os_id;
253 if (w) {
254 _cleanup_free_ char *truncated = strndup(w, 40); /* kernel refuses more than 40 chars (as per nvme spec) */
256 /* The default string stored in 'attr_model' is "Linux" btw. */
257 r = write_string_file_at(subsystem_fd, "attr_model", truncated, WRITE_STRING_FILE_DISABLE_BUFFER);
258 if (r < 0)
259 log_warning_errno(r, "Failed to set model of subsystem to '%s', ignoring: %m", w);
262 w = secure_getenv("SYSTEMD_NVME_FIRMWARE");
263 if (!w)
264 w = image_version ?: os_version;
265 if (w) {
266 _cleanup_free_ char *truncated = strndup(w, 8); /* kernel refuses more than 8 chars (as per nvme spec) */
267 if (!truncated)
268 return log_oom();
270 /* The default string stored in 'attr_firmware' is `uname -r` btw, but truncated to 8 chars. */
271 r = write_string_file_at(subsystem_fd, "attr_firmware", truncated, WRITE_STRING_FILE_DISABLE_BUFFER);
272 if (r < 0)
273 log_warning_errno(r, "Failed to set model of subsystem to '%s', ignoring: %m", truncated);
276 w = secure_getenv("SYSTEMD_NVME_SERIAL");
277 if (!w) {
278 if (hwserial)
279 w = hwserial;
280 else {
281 sd_id128_t mid;
283 r = sd_id128_get_machine_app_specific(SD_ID128_MAKE(39,7f,4d,bf,1e,bf,46,6d,b3,cb,45,b8,0d,49,5b,c1), &mid);
284 if (r < 0)
285 log_warning_errno(r, "Failed to get machine ID, ignoring: %m");
286 else {
287 if (asprintf(&synthetic_serial, SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(mid)) < 0)
288 return log_oom();
289 w = synthetic_serial;
293 if (w) {
294 _cleanup_free_ char *truncated = strndup(w, 20); /* kernel refuses more than 20 chars (as per nvme spec) */
295 if (!truncated)
296 return log_oom();
298 r = write_string_file_at(subsystem_fd, "attr_serial", truncated, WRITE_STRING_FILE_DISABLE_BUFFER);
299 if (r < 0)
300 log_warning_errno(r, "Failed to set serial of subsystem to '%s', ignoring: %m", truncated);
303 return 0;
306 static int nvme_namespace_write_metadata(int namespace_fd, sd_device *device, const char *node) {
307 sd_id128_t id = SD_ID128_NULL;
308 const char *e;
309 int r;
311 assert(namespace_fd >= 0);
313 e = secure_getenv("SYSTEMD_NVME_UUID");
314 if (e) {
315 r = sd_id128_from_string(e, &id);
316 if (r < 0)
317 log_warning_errno(r, "Failed to parse $SYSTEMD_NVME_UUID, ignoring: %s", e);
320 if (sd_id128_is_null(id)) {
321 const char *serial = NULL;
322 sd_id128_t mid = SD_ID128_NULL;
324 /* We combine machine ID and ID_SERIAL and hash a UUID from it */
326 if (device) {
327 (void) sd_device_get_property_value(device, "ID_SERIAL", &serial);
328 if (!serial)
329 (void) sd_device_get_devname(device, &serial);
331 if (!serial)
332 serial = node;
334 r = sd_id128_get_machine(&mid);
335 if (r < 0)
336 log_warning_errno(r, "Failed to get machine ID, ignoring: %m");
338 size_t l = sizeof(mid) + strlen_ptr(serial);
339 _cleanup_free_ void *j = malloc(l + 1);
340 if (!j)
341 return log_oom();
343 strcpy(mempcpy(j, &mid, sizeof(mid)), strempty(serial));
345 id = id128_digest(j, l);
348 r = write_string_file_at(namespace_fd, "device_uuid", SD_ID128_TO_UUID_STRING(id), WRITE_STRING_FILE_DISABLE_BUFFER);
349 if (r < 0)
350 log_warning_errno(r, "Failed to set uuid of namespace to '%s', ignoring: %m", SD_ID128_TO_UUID_STRING(id));
352 return 0;
355 static int nvme_subsystem_add(const char *node, int consumed_fd, sd_device *device, NvmeSubsystem **ret) {
356 _cleanup_(sd_device_unrefp) sd_device *allocated_device = NULL;
357 _cleanup_close_ int fd = consumed_fd; /* always take possession of the fd */
358 int r;
360 assert(node);
361 assert(ret);
363 _cleanup_free_ char *fname = NULL;
364 r = path_extract_filename(node, &fname);
365 if (r < 0)
366 return log_error_errno(r, "Failed to extract file name from path: %s", node);
368 _cleanup_free_ char *j = NULL;
369 j = strjoin(arg_nqn, ".", fname);
370 if (!j)
371 return log_oom();
373 if (fd < 0) {
374 fd = RET_NERRNO(open(node, O_RDONLY|O_CLOEXEC|O_NONBLOCK));
375 if (fd < 0)
376 return log_error_errno(fd, "Failed to open '%s': %m", node);
379 struct stat st;
380 if (fstat(fd, &st) < 0)
381 return log_error_errno(errno, "Failed to fstat '%s': %m", node);
382 if (S_ISBLK(st.st_mode)) {
383 if (!device) {
384 r = sd_device_new_from_devnum(&allocated_device, 'b', st.st_rdev);
385 if (r < 0)
386 return log_error_errno(r, "Failed to get device information for device '%s': %m", node);
388 device = allocated_device;
390 } else {
391 r = stat_verify_regular(&st);
392 if (r < 0)
393 return log_error_errno(r, "Not a block device or regular file, refusing: %s", node);
396 /* Let's lock this device continuously while we are operating on it */
397 r = lock_generic_with_timeout(fd, LOCK_BSD, LOCK_EX, 10 * USEC_PER_SEC);
398 if (r < 0)
399 return log_error_errno(r, "Failed to lock block device: %m");
401 _cleanup_close_ int subsystems_fd = -EBADF;
402 subsystems_fd = RET_NERRNO(open("/sys/kernel/config/nvmet/subsystems", O_DIRECTORY|O_CLOEXEC|O_RDONLY));
403 if (subsystems_fd < 0)
404 return log_error_errno(subsystems_fd, "Failed to open /sys/kernel/config/nvmet/subsystems: %m");
406 _cleanup_close_ int subsystem_fd = -EBADF;
407 subsystem_fd = open_mkdir_at(subsystems_fd, j, O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
408 if (subsystem_fd < 0)
409 return log_error_errno(subsystem_fd, "Failed to create NVME subsystem '%s': %m", j);
411 r = write_string_file_at(subsystem_fd, "attr_allow_any_host", "1", WRITE_STRING_FILE_DISABLE_BUFFER);
412 if (r < 0)
413 return log_error_errno(r, "Failed to set 'attr_allow_any_host' flag: %m");
415 (void) nvme_subsystem_write_metadata(subsystem_fd, device);
417 _cleanup_close_ int namespace_fd = -EBADF;
418 namespace_fd = open_mkdir_at(subsystem_fd, "namespaces/1", O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
419 if (namespace_fd < 0)
420 return log_error_errno(namespace_fd, "Failed to create NVME namespace '1': %m");
422 (void) nvme_namespace_write_metadata(namespace_fd, device, node);
424 /* We use /proc/$PID/fd/$FD rather than /proc/self/fd/$FD, because this string is visible to others
425 * via configfs, and by including the PID it's clear to who the stuff belongs. */
426 r = write_string_file_at(namespace_fd, "device_path", FORMAT_PROC_PID_FD_PATH(0, fd), WRITE_STRING_FILE_DISABLE_BUFFER);
427 if (r < 0)
428 return log_error_errno(r, "Failed to write 'device_path' attribute: %m");
430 r = write_string_file_at(namespace_fd, "enable", "1", WRITE_STRING_FILE_DISABLE_BUFFER);
431 if (r < 0)
432 return log_error_errno(r, "Failed to write 'enable' attribute: %m");
434 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *subsys = NULL;
436 subsys = new(NvmeSubsystem, 1);
437 if (!subsys)
438 return log_oom();
440 *subsys = (NvmeSubsystem) {
441 .name = TAKE_PTR(j),
442 .device_fd = TAKE_FD(fd),
443 .nvme_all_subsystems_fd = TAKE_FD(subsystems_fd),
444 .nvme_our_subsystem_fd = TAKE_FD(subsystem_fd),
445 .device_stat = st,
448 subsys->device = strdup(node);
449 if (!subsys->device)
450 return log_oom();
452 *ret = TAKE_PTR(subsys);
453 return 0;
456 typedef struct NvmePort {
457 uint16_t portnr; /* used for both the IP and the NVME port numer */
459 int nvme_port_fd;
460 int nvme_ports_fd;
462 int ip_family;
463 } NvmePort;
465 static NvmePort *nvme_port_free(NvmePort *p) {
466 if (!p)
467 return NULL;
469 safe_close(p->nvme_port_fd);
470 safe_close(p->nvme_ports_fd);
472 return mfree(p);
475 static int nvme_port_unlink(NvmePort *p) {
476 int r, ret = 0;
478 assert(p);
480 if (p->nvme_port_fd >= 0) {
481 _cleanup_close_ int subsystems_dir_fd = -EBADF;
483 subsystems_dir_fd = openat(p->nvme_port_fd, "subsystems", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
484 if (subsystems_dir_fd < 0)
485 log_warning_errno(errno, "Failed to open 'subsystems' dir of port %" PRIu16 ", ignoring: %m", p->portnr);
486 else {
487 _cleanup_free_ DirectoryEntries *de = NULL;
489 r = readdir_all(subsystems_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
490 if (r < 0)
491 log_warning_errno(r, "Failed to read 'subsystems' dir of port %" PRIu16 ", ignoring: %m", p->portnr);
492 else
493 FOREACH_ARRAY(ee, de->entries, de->n_entries) {
494 const struct dirent *e = *ee;
496 if (unlinkat(subsystems_dir_fd, e->d_name, 0) < 0 && errno != ENOENT)
497 log_warning_errno(errno, "Failed to remove 'subsystems' symlink '%s' of port %" PRIu16 ", ignoring: %m", e->d_name, p->portnr);
501 p->nvme_port_fd = safe_close(p->nvme_port_fd);
504 if (p->nvme_ports_fd >= 0) {
505 _cleanup_free_ char *fn = NULL;
506 if (asprintf(&fn, "%" PRIu16, p->portnr) < 0)
507 return log_oom();
509 if (unlinkat(p->nvme_ports_fd, fn, AT_REMOVEDIR) < 0) {
510 if (errno == ENOENT)
511 ret = 0;
512 else
513 ret = log_warning_errno(errno, "Failed to remove port '%" PRIu16 ", ignoring: %m", p->portnr);
514 } else
515 ret = 1;
517 p->nvme_ports_fd = safe_close(p->nvme_ports_fd);
520 return ret;
523 static NvmePort *nvme_port_destroy(NvmePort *p) {
524 if (!p)
525 return NULL;
527 (void) nvme_port_unlink(p);
529 return nvme_port_free(p);
532 DEFINE_TRIVIAL_CLEANUP_FUNC(NvmePort*, nvme_port_destroy);
534 static int nvme_port_add_portnr(
535 int ports_fd,
536 uint16_t portnr,
537 int ip_family,
538 int *ret_fd) {
540 int r;
542 assert(ports_fd >= 0);
543 assert(IN_SET(ip_family, AF_INET, AF_INET6));
544 assert(ret_fd);
546 _cleanup_free_ char *fname = NULL;
547 if (asprintf(&fname, "%" PRIu16, portnr) < 0)
548 return log_oom();
550 _cleanup_close_ int port_fd = -EBADF;
551 port_fd = open_mkdir_at(ports_fd, fname, O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
552 if (port_fd < 0) {
553 if (port_fd != -EEXIST)
554 return log_error_errno(port_fd, "Failed to create port %" PRIu16 ": %m", portnr);
556 *ret_fd = -EBADF;
557 return 0;
560 r = write_string_file_at(port_fd, "addr_adrfam", af_to_ipv4_ipv6(ip_family), WRITE_STRING_FILE_DISABLE_BUFFER);
561 if (r < 0)
562 return log_error_errno(r, "Failed to set address family on NVME port %" PRIu16 ": %m", portnr);
564 r = write_string_file_at(port_fd, "addr_trtype", "tcp", WRITE_STRING_FILE_DISABLE_BUFFER);
565 if (r < 0)
566 return log_error_errno(r, "Failed to set transport type on NVME port %" PRIu16 ": %m", portnr);
568 r = write_string_file_at(port_fd, "addr_trsvcid", fname, WRITE_STRING_FILE_DISABLE_BUFFER);
569 if (r < 0)
570 return log_error_errno(r, "Failed to set IP port on NVME port %" PRIu16 ": %m", portnr);
572 r = write_string_file_at(port_fd, "addr_traddr", ip_family == AF_INET6 ? "::" : "0.0.0.0", WRITE_STRING_FILE_DISABLE_BUFFER);
573 if (r < 0)
574 return log_error_errno(r, "Failed to set IP address on NVME port %" PRIu16 ": %m", portnr);
576 *ret_fd = TAKE_FD(port_fd);
577 return 1;
580 static uint16_t calculate_start_port(const char *name, int ip_family) {
581 struct siphash state;
582 uint16_t nr;
584 assert(name);
585 assert(IN_SET(ip_family, AF_INET, AF_INET6));
587 /* Use some fixed key Lennart pulled from /dev/urandom, so that we are deterministic */
588 siphash24_init(&state, SD_ID128_MAKE(d1,0b,67,b5,e2,b7,4a,91,8d,6b,27,b6,35,c1,9f,d9).bytes);
589 siphash24_compress_string(name, &state);
590 siphash24_compress(&ip_family, sizeof(ip_family), &state);
592 nr = 1024U + siphash24_finalize(&state) % (0xFFFFU - 1024U);
593 SET_FLAG(nr, 1, ip_family == AF_INET6); /* Lowest bit reflects family */
595 return nr;
598 static uint16_t calculate_next_port(int ip_family) {
599 uint16_t nr;
601 assert(IN_SET(ip_family, AF_INET, AF_INET6));
603 nr = 1024U + random_u64_range(0xFFFFU - 1024U);
604 SET_FLAG(nr, 1, ip_family == AF_INET6); /* Lowest bit reflects family */
606 return nr;
609 static int nvme_port_add(const char *name, int ip_family, NvmePort **ret) {
610 int r;
612 assert(name);
613 assert(IN_SET(ip_family, AF_INET, AF_INET6));
614 assert(ret);
616 _cleanup_close_ int ports_fd = -EBADF;
617 ports_fd = RET_NERRNO(open("/sys/kernel/config/nvmet/ports", O_DIRECTORY|O_RDONLY|O_CLOEXEC));
618 if (ports_fd < 0)
619 return log_error_errno(ports_fd, "Failed to open /sys/kernel/config/nvmet/ports: %m");
621 _cleanup_close_ int port_fd = -EBADF;
622 uint16_t portnr = calculate_start_port(name, ip_family);
623 for (unsigned attempt = 0;; attempt++) {
624 r = nvme_port_add_portnr(ports_fd, portnr, ip_family, &port_fd);
625 if (r < 0)
626 return r;
627 if (r > 0)
628 break;
630 if (attempt > 16)
631 return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Can't find free NVME port after %u attempts.", attempt);
633 log_debug_errno(port_fd, "NVME port %" PRIu16 " exists already, randomizing port.", portnr);
635 portnr = calculate_next_port(ip_family);
638 _cleanup_(nvme_port_destroyp) NvmePort *p = new(NvmePort, 1);
639 if (!p)
640 return log_oom();
642 *p = (NvmePort) {
643 .portnr = portnr,
644 .nvme_ports_fd = TAKE_FD(ports_fd),
645 .nvme_port_fd = TAKE_FD(port_fd),
646 .ip_family = ip_family,
649 *ret = TAKE_PTR(p);
650 return 0;
653 static int nvme_port_link_subsystem(NvmePort *port, NvmeSubsystem *subsys) {
654 assert(port);
655 assert(subsys);
657 _cleanup_free_ char *target = NULL, *linkname = NULL;
658 target = path_join("/sys/kernel/config/nvmet/subsystems", subsys->name);
659 if (!target)
660 return log_oom();
662 linkname = path_join("subsystems", subsys->name);
663 if (!linkname)
664 return log_oom();
666 if (symlinkat(target, port->nvme_port_fd, linkname) < 0)
667 return log_error_errno(errno, "Failed to link subsystem '%s' to port %" PRIu16 ": %m", subsys->name, port->portnr);
669 return 0;
672 static int nvme_port_unlink_subsystem(NvmePort *port, NvmeSubsystem *subsys) {
673 assert(port);
674 assert(subsys);
676 _cleanup_free_ char *linkname = NULL;
677 linkname = path_join("subsystems", subsys->name);
678 if (!linkname)
679 return log_oom();
681 if (unlinkat(port->nvme_port_fd, linkname, 0) < 0 && errno != ENOENT)
682 return log_error_errno(errno, "Failed to unlink subsystem '%s' to port %" PRIu16 ": %m", subsys->name, port->portnr);
684 return 0;
687 static int nvme_subsystem_report(NvmeSubsystem *subsystem, NvmePort *ipv4, NvmePort *ipv6) {
688 assert(subsystem);
690 _cleanup_free_ struct local_address *addresses = NULL;
691 int n_addresses;
692 n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
693 if (n_addresses < 0)
694 return log_error_errno(n_addresses, "Failed to determine local IP addresses: %m");
696 log_notice("NVMe-TCP: %s %s%s%s (%s)",
697 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
698 emoji_enabled() ? special_glyph(SPECIAL_GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : "",
699 subsystem->name, subsystem->device);
701 FOREACH_ARRAY(a, addresses, n_addresses) {
702 NvmePort *port = a->family == AF_INET ? ipv4 : ipv6;
704 if (!port)
705 continue;
707 log_info(" %s Try for specific device: nvme connect -t tcp -n '%s' -a %s -s %" PRIu16,
708 special_glyph(a >= addresses + (n_addresses - 1) ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH),
709 subsystem->name,
710 IN_ADDR_TO_STRING(a->family, &a->address),
711 port->portnr);
714 return 0;
717 static int plymouth_send_text(const char *text) {
718 _cleanup_free_ char *plymouth_message = NULL;
719 int c, r;
721 assert(text);
723 c = asprintf(&plymouth_message,
724 "M\x02%c%s%c"
725 "A%c", /* pause spinner */
726 (int) strlen(text) + 1, text, '\x00',
727 '\x00');
728 if (c < 0)
729 return log_oom();
731 r = plymouth_send_raw(plymouth_message, c, SOCK_NONBLOCK);
732 if (r < 0)
733 return log_full_errno(ERRNO_IS_NO_PLYMOUTH(r) ? LOG_DEBUG : LOG_WARNING, r,
734 "Failed to communicate with plymouth, ignoring: %m");
736 return 0;
739 static int plymouth_notify_port(NvmePort *port, struct local_address *a) {
740 _cleanup_free_ char *m = NULL;
742 if (!port || !a)
743 return 0;
745 if (asprintf(&m, "nvme connect-all -t tcp -a %s -s %" PRIu16, IN_ADDR_TO_STRING(a->family, &a->address), port->portnr) < 0)
746 return log_oom();
748 return plymouth_send_text(m);
751 static int nvme_port_report(NvmePort *port, bool *plymouth_done) {
752 if (!port)
753 return 0;
755 _cleanup_free_ struct local_address *addresses = NULL;
756 int n_addresses;
757 n_addresses = local_addresses(NULL, 0, port->ip_family, &addresses);
758 if (n_addresses < 0)
759 return log_error_errno(n_addresses, "Failed to determine local IP addresses: %m");
761 log_notice("NVMe-TCP: %s %s%sListening on %s (port %" PRIu16 ")",
762 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
763 emoji_enabled() ? special_glyph(SPECIAL_GLYPH_WORLD) : "", emoji_enabled() ? " " : "",
764 af_to_ipv4_ipv6(port->ip_family),
765 port->portnr);
767 FOREACH_ARRAY(a, addresses, n_addresses)
768 log_info(" %s Try for all devices: nvme connect-all -t tcp -a %s -s %" PRIu16,
769 special_glyph(a >= addresses + (n_addresses - 1) ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH),
770 IN_ADDR_TO_STRING(a->family, &a->address),
771 port->portnr);
773 if (plymouth_done && !*plymouth_done) {
774 (void) plymouth_notify_port(port, n_addresses > 0 ? addresses : NULL);
775 *plymouth_done = n_addresses > 0;
778 return 0;
781 typedef struct Context {
782 Hashmap *subsystems;
783 NvmePort *ipv4_port, *ipv6_port;
785 bool display_refresh_scheduled;
786 } Context;
788 static void device_hash_func(const struct stat *q, struct siphash *state) {
789 assert(q);
791 mode_t m = q->st_mode & S_IFMT;
792 siphash24_compress(&m, sizeof(m), state);
794 if (S_ISBLK(q->st_mode) || S_ISCHR(q->st_mode)) {
795 siphash24_compress(&q->st_rdev, sizeof(q->st_rdev), state);
796 return;
799 return inode_hash_func(q, state);
802 static int device_compare_func(const struct stat *a, const struct stat *b) {
803 int r;
805 assert(a);
806 assert(b);
808 r = CMP(a->st_mode & S_IFMT, b->st_mode & S_IFMT);
809 if (r != 0)
810 return r;
812 if (S_ISBLK(a->st_mode) || S_ISCHR(a->st_mode)) {
813 r = CMP(major(a->st_rdev), major(b->st_rdev));
814 if (r != 0)
815 return r;
817 r = CMP(minor(a->st_rdev), minor(b->st_rdev));
818 if (r != 0)
819 return r;
821 return 0;
824 return inode_compare_func(a, b);
827 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
828 nvme_subsystem_hash_ops,
829 struct stat,
830 device_hash_func,
831 device_compare_func,
832 NvmeSubsystem,
833 nvme_subsystem_destroy);
835 static void context_done(Context *c) {
836 assert(c);
838 c->ipv4_port = nvme_port_destroy(c->ipv4_port);
839 c->ipv6_port = nvme_port_destroy(c->ipv6_port);
841 c->subsystems = hashmap_free(c->subsystems);
844 static void device_track_back(sd_device *d, sd_device **ret) {
845 int r;
847 assert(d);
848 assert(ret);
850 const char *devname = NULL;
851 (void) sd_device_get_devname(d, &devname);
853 _cleanup_(sd_device_unrefp) sd_device *d_originating = NULL;
854 r = block_device_get_originating(d, &d_originating);
855 if (r < 0)
856 log_device_debug_errno(d, r, "Failed to get originating device for '%s', ignoring: %m", strna(devname));
858 sd_device *d_whole = NULL;
859 r = block_device_get_whole_disk(d_originating ?: d, &d_whole); /* does not ref returned device */
860 if (r < 0)
861 log_device_debug_errno(d, r, "Failed to get whole device for '%s', ignoring: %m", strna(devname));
863 *ret = d_whole ? sd_device_ref(d_whole) : d_originating ? TAKE_PTR(d_originating) : sd_device_ref(d);
866 static int device_is_same(sd_device *a, sd_device *b) {
867 dev_t devnum_a, devnum_b;
868 int r;
870 assert(a);
871 assert(b);
873 r = sd_device_get_devnum(a, &devnum_a);
874 if (r < 0)
875 return r;
877 r = sd_device_get_devnum(b, &devnum_b);
878 if (r < 0)
879 return r;
881 return devnum_a == devnum_b;
884 static bool device_is_allowed(sd_device *d) {
885 int r;
887 assert(d);
889 if (arg_all >= 2) /* If --all is specified twice we allow even the root fs to shared */
890 return true;
892 const char *devname;
893 r = sd_device_get_devname(d, &devname);
894 if (r < 0)
895 return log_device_error_errno(d, r, "Failed to get device name: %m");
897 dev_t root_devnum;
898 r = get_block_device("/", &root_devnum);
899 if (r < 0) {
900 log_warning_errno(r, "Failed to get backing device of the root file system: %m");
901 return false; /* Better safe */
903 if (root_devnum == 0) /* Not backed by a block device? */
904 return true;
906 _cleanup_(sd_device_unrefp) sd_device *root_device = NULL;
907 r = sd_device_new_from_devnum(&root_device, 'b', root_devnum);
908 if (r < 0) {
909 log_warning_errno(r, "Failed to get root block device, assuming device '%s' is same as root device: %m", devname);
910 return false;
913 _cleanup_(sd_device_unrefp) sd_device *whole_root_device = NULL;
914 device_track_back(root_device, &whole_root_device);
916 _cleanup_(sd_device_unrefp) sd_device *whole_d = NULL;
917 device_track_back(d, &whole_d);
919 r = device_is_same(whole_root_device, whole_d);
920 if (r < 0) {
921 log_warning_errno(r, "Failed to determine if root device and device '%s' are the same, assuming they are: %m", devname);
922 return false; /* Better safe */
925 return !r;
928 static int device_added(Context *c, sd_device *device) {
929 _cleanup_close_ int fd = -EBADF;
930 int r;
932 assert(c);
933 assert(device);
935 const char *sysname;
936 r = sd_device_get_sysname(device, &sysname);
937 if (r < 0)
938 return log_device_error_errno(device, r, "Failed to get device name: %m");
940 log_device_debug(device, "new block device '%s'", sysname);
942 if (STARTSWITH_SET(sysname, "loop", "zram")) /* Ignore some devices */
943 return 0;
945 const char *devname;
946 r = sd_device_get_devname(device, &devname);
947 if (r < 0)
948 return log_device_error_errno(device, r, "Failed to get device node path: %m");
950 struct stat lookup_key = {
951 .st_mode = S_IFBLK,
954 r = sd_device_get_devnum(device, &lookup_key.st_rdev);
955 if (r < 0)
956 return log_device_error_errno(device, r, "Failed to get major/minor from device: %m");
958 if (hashmap_contains(c->subsystems, &lookup_key)) {
959 log_debug("Device '%s' already seen.", devname);
960 return 0;
963 if (!device_is_allowed(device)) {
964 log_device_debug(device, "Not exposing device '%s', as it is backed by root disk.", devname);
965 return 0;
968 fd = sd_device_open(device, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
969 if (fd < 0) {
970 log_device_warning_errno(device, fd, "Failed to open newly acquired device '%s', ignoring device: %m", devname);
971 return 0;
974 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *s = NULL;
975 r = nvme_subsystem_add(devname, TAKE_FD(fd), device, &s);
976 if (r < 0)
977 return r;
979 if (c->ipv4_port) {
980 r = nvme_port_link_subsystem(c->ipv4_port, s);
981 if (r < 0)
982 return r;
985 if (c->ipv6_port) {
986 r = nvme_port_link_subsystem(c->ipv6_port, s);
987 if (r < 0)
988 return r;
991 r = hashmap_ensure_put(&c->subsystems, &nvme_subsystem_hash_ops, &s->device_stat, s);
992 if (r < 0)
993 return log_error_errno(r, "Failed to add subsystem to hash table: %m");
995 (void) nvme_subsystem_report(s, c->ipv4_port, c->ipv6_port);
997 TAKE_PTR(s);
998 return 1;
1001 static int device_removed(Context *c, sd_device *device) {
1002 int r;
1004 assert(device);
1006 struct stat lookup_key = {
1007 .st_mode = S_IFBLK,
1010 r = sd_device_get_devnum(device, &lookup_key.st_rdev);
1011 if (r < 0)
1012 return log_device_error_errno(device, r, "Failed to get major/minor from device: %m");
1014 NvmeSubsystem *s = hashmap_remove(c->subsystems, &lookup_key);
1015 if (!s)
1016 return 0;
1018 log_device_debug(device, "removed block device '%s'", s->name);
1020 if (c->ipv4_port)
1021 (void) nvme_port_unlink_subsystem(c->ipv4_port, s);
1022 if (c->ipv6_port)
1023 (void) nvme_port_unlink_subsystem(c->ipv6_port, s);
1025 s = nvme_subsystem_destroy(s);
1026 return 1;
1029 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
1030 Context *c = ASSERT_PTR(userdata);
1032 if (device_for_action(device, SD_DEVICE_REMOVE))
1033 device_removed(c, device);
1034 else
1035 device_added(c, device);
1037 return 0;
1040 static int on_display_refresh(sd_event_source *s, uint64_t usec, void *userdata) {
1041 Context *c = ASSERT_PTR(userdata);
1043 assert(s);
1045 c->display_refresh_scheduled = false;
1047 if (isatty(STDERR_FILENO) > 0)
1048 fputs(ANSI_HOME_CLEAR, stderr);
1050 /* If we have both IPv4 and IPv6, we display IPv4 info via Plymouth, since it doesn't have much
1051 * space, and IPv4 is simply shorter (and easy to type off screen) */
1053 bool plymouth_done = false;
1054 (void) nvme_port_report(c->ipv4_port, &plymouth_done);
1055 (void) nvme_port_report(c->ipv6_port, &plymouth_done);
1057 if (!plymouth_done)
1058 (void) plymouth_send_text("Network disconnected.");
1060 NvmeSubsystem *i;
1061 HASHMAP_FOREACH(i, c->subsystems)
1062 (void) nvme_subsystem_report(i, c->ipv4_port, c->ipv6_port);
1064 return 0;
1067 static int on_address_change(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
1068 Context *c = ASSERT_PTR(userdata);
1069 int r, family;
1071 assert(rtnl);
1072 assert(mm);
1074 r = sd_rtnl_message_addr_get_family(mm, &family);
1075 if (r < 0) {
1076 log_warning_errno(r, "Failed to get address family from netlink address message, ignoring: %m");
1077 return 0;
1080 if (!c->display_refresh_scheduled) {
1081 r = sd_event_add_time_relative(
1082 sd_netlink_get_event(rtnl),
1083 /* ret_slot= */ NULL,
1084 CLOCK_MONOTONIC,
1085 750 * USEC_PER_MSEC,
1087 on_display_refresh,
1089 if (r < 0)
1090 log_warning_errno(r, "Failed to schedule display refresh, ignoring: %m");
1091 else
1092 c->display_refresh_scheduled = true;
1095 return 0;
1098 static int run(int argc, char* argv[]) {
1099 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
1100 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
1101 _cleanup_(context_done) Context context = {};
1102 int r;
1104 log_show_color(true);
1105 log_parse_environment();
1106 log_open();
1108 r = parse_argv(argc, argv);
1109 if (r <= 0)
1110 return r;
1112 r = sd_event_new(&event);
1113 if (r < 0)
1114 return log_error_errno(r, "Failed to allocate event loop: %m");
1116 r = sd_event_set_signal_exit(event, true);
1117 if (r < 0)
1118 return log_error_errno(r, "Failed to install exit signal handlers: %m");
1120 STRV_FOREACH(i, arg_devices) {
1121 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *subsys = NULL;
1123 r = nvme_subsystem_add(*i, -EBADF, /* device= */ NULL, &subsys);
1124 if (r < 0)
1125 return r;
1127 r = hashmap_ensure_put(&context.subsystems, &nvme_subsystem_hash_ops, &subsys->device_stat, subsys);
1128 if (r == -EEXIST) {
1129 log_warning_errno(r, "Duplicate device '%s' specified, skipping: %m", *i);
1130 continue;
1132 if (r < 0)
1133 return log_error_errno(r, "Failed to add subsystem to hash table: %m");
1135 TAKE_PTR(subsys);
1138 r = nvme_port_add(arg_nqn, AF_INET, &context.ipv4_port);
1139 if (r < 0)
1140 return r;
1142 bool plymouth_done = false;
1143 nvme_port_report(context.ipv4_port, &plymouth_done);
1145 if (socket_ipv6_is_enabled()) {
1146 r = nvme_port_add(arg_nqn, AF_INET6, &context.ipv6_port);
1147 if (r < 0)
1148 return r;
1150 nvme_port_report(context.ipv6_port, &plymouth_done);
1153 if (!plymouth_done)
1154 (void) plymouth_send_text("Network disconnected.");
1156 NvmeSubsystem *i;
1157 HASHMAP_FOREACH(i, context.subsystems) {
1158 if (context.ipv4_port) {
1159 r = nvme_port_link_subsystem(context.ipv4_port, i);
1160 if (r < 0)
1161 return r;
1164 if (context.ipv6_port) {
1165 r = nvme_port_link_subsystem(context.ipv6_port, i);
1166 if (r < 0)
1167 return r;
1170 (void) nvme_subsystem_report(i, context.ipv4_port, context.ipv6_port);
1173 if (arg_all > 0) {
1174 r = sd_device_monitor_new(&monitor);
1175 if (r < 0)
1176 return log_error_errno(r, "Failed to allocate device monitor: %m");
1178 r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "disk");
1179 if (r < 0)
1180 return log_error_errno(r, "Failed to configure device monitor match: %m");
1182 r = sd_device_monitor_attach_event(monitor, event);
1183 if (r < 0)
1184 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
1186 r = sd_device_monitor_start(monitor, device_monitor_handler, &context);
1187 if (r < 0)
1188 return log_error_errno(r, "Failed to start device monitor: %m");
1190 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL;
1191 r = sd_device_enumerator_new(&enumerator);
1192 if (r < 0)
1193 return log_error_errno(r, "Failed to allocate enumerator: %m");
1195 r = sd_device_enumerator_add_match_subsystem(enumerator, "block", /* match= */ true);
1196 if (r < 0)
1197 return log_error_errno(r, "Failed to match block devices: %m");
1199 r = sd_device_enumerator_add_match_property(enumerator, "DEVTYPE", "disk");
1200 if (r < 0)
1201 return log_error_errno(r, "Failed to match whole block devices: %m");
1203 r = sd_device_enumerator_add_nomatch_sysname(enumerator, "loop*");
1204 if (r < 0)
1205 return log_error_errno(r, "Failed to exclude loop devices: %m");
1207 FOREACH_DEVICE(enumerator, device)
1208 device_added(&context, device);
1211 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
1212 r = sd_netlink_open(&rtnl);
1213 if (r < 0)
1214 return log_error_errno(r, "Failed to connect to netlink: %m");
1216 r = sd_netlink_attach_event(rtnl, event, SD_EVENT_PRIORITY_NORMAL);
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to attach netlink socket to event loop: %m");
1220 r = sd_netlink_add_match(rtnl, /* ret_slot= */ NULL, RTM_NEWADDR, on_address_change, /* destroy_callback= */ NULL, &context, "storagetm-newaddr");
1221 if (r < 0)
1222 return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR events: %m");
1224 r = sd_netlink_add_match(rtnl, /* ret_slot= */ NULL, RTM_DELADDR, on_address_change, /* destroy_callback= */ NULL, &context, "storagetm-deladdr");
1225 if (r < 0)
1226 return log_error_errno(r, "Failed to subscribe to RTM_DELADDR events: %m");
1228 if (isatty(0) > 0)
1229 log_info("Hit Ctrl-C to exit target mode.");
1231 _unused_ _cleanup_(notify_on_cleanup) const char *notify_message =
1232 notify_start("READY=1\n"
1233 "STATUS=Exposing disks in target mode...",
1234 NOTIFY_STOPPING);
1236 r = sd_event_loop(event);
1237 if (r < 0)
1238 return log_error_errno(r, "Failed to run event loop: %m");
1240 log_info("Exiting target mode.");
1241 return r;
1244 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);