8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / avs / nsctl / nskernd.c
blob1a205979d52ce755fbf956b49ae9f4b39922b16f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/resource.h>
29 #include <sys/priocntl.h>
30 #include <sys/rtpriocntl.h>
31 #include <sys/tspriocntl.h>
32 #include <sys/wait.h>
33 #include <sys/stat.h>
35 #include <strings.h>
36 #include <thread.h>
37 #include <stdlib.h>
38 #include <signal.h>
39 #include <errno.h>
40 #include <stdio.h>
41 #include <fcntl.h>
42 #include <locale.h>
43 #include <unistd.h>
44 #include <syslog.h>
46 #include <sys/nsctl/cfg.h>
47 #include <sys/nsctl/nsctl.h>
48 #include <sys/nsctl/nsc_ioctl.h>
49 #include <sys/nskernd.h>
50 #include <nsctl.h>
52 #include <sys/mkdev.h>
53 #include <sys/nsctl/sv_efi.h>
55 static const char *rdev = "/dev/nsctl";
58 * Define a minimal user stack size in bytes over and above the
59 * libthread THR_STACK_MIN minimum value.
61 * This stack size needs to be sufficient to run _newlwp() and then
62 * ioctl() down into the kernel.
64 #define NSK_STACK_SIZE 512
67 * LWP scheduling control switches.
69 * allow_pri - set to non-zero to enable priocntl() manipulations of
70 * created LWPs.
71 * allow_rt - set to non-zero to use the RT rather than the TS
72 * scheduling class when manipulating the schduling
73 * parameters for an LWP. Only used if allow_pri is
74 * non-zero.
76 static int allow_pri = 1;
77 static int allow_rt = 0; /* disallow - bad interactions with timeout() */
79 static int nsctl_fd = -1;
80 static int sigterm;
82 static int nthreads; /* number of threads in the kernel */
83 static int exiting; /* shutdown in progress flag */
84 static mutex_t thr_mutex = DEFAULTMUTEX;
85 static mutex_t cfg_mutex = DEFAULTMUTEX;
87 static int cl_nodeid = -1;
89 static int display_msg = 0;
90 static int delay_time = 30;
92 static void
93 usage(void)
95 (void) fprintf(stderr, gettext("usage: nskernd\n"));
96 exit(255);
100 static void
101 sighand(int sig)
103 if (sig == SIGTERM) {
104 sigterm++;
110 * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel
113 nthread_inc(void)
115 (void) mutex_lock(&thr_mutex);
116 if (exiting) {
117 /* cannot enter kernel as nskernd is being shutdown - exit */
118 (void) mutex_unlock(&thr_mutex);
119 return (0);
121 nthreads++;
122 (void) mutex_unlock(&thr_mutex);
123 return (1);
127 void
128 nthread_dec(void)
130 (void) mutex_lock(&thr_mutex);
131 nthreads--;
132 (void) mutex_unlock(&thr_mutex);
137 * returns: 1 - can shutdown; 0 - unable to shutdown
140 canshutdown(void)
142 int rc = 1;
143 time_t start_delay;
145 (void) mutex_lock(&thr_mutex);
146 if (nthreads > 0) {
147 if (display_msg) {
148 (void) fprintf(stderr,
149 gettext("nskernd: unable to shutdown: "
150 "%d kernel threads in use\n"), nthreads);
152 start_delay = time(0);
153 while (nthreads > 0 && (time(0) - start_delay) < delay_time) {
154 (void) mutex_unlock(&thr_mutex);
155 (void) sleep(1);
156 (void) mutex_lock(&thr_mutex);
157 (void) fprintf(stderr,
158 gettext("nskernd: delay shutdown: "
159 "%d kernel threads in use\n"), nthreads);
161 if (nthreads > 0) {
162 rc = 0;
163 } else {
164 exiting = 1;
166 } else {
167 /* flag shutdown in progress */
168 exiting = 1;
170 (void) mutex_unlock(&thr_mutex);
172 return (rc);
177 * returns: 1 - shutdown successful; 0 - unable to shutdown
180 shutdown(void)
182 struct nskernd data;
183 int rc;
185 if (nsctl_fd < 0)
186 return (1);
188 bzero(&data, sizeof (data));
189 data.command = NSKERND_STOP;
191 if (!canshutdown()) {
192 return (0);
195 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
196 if (rc < 0) {
197 if (errno != EINTR || !sigterm) {
198 (void) fprintf(stderr,
199 gettext("nskernd: NSKERND_STOP failed\n"));
203 return (1);
208 * First function run by a NSKERND_NEWLWP thread.
210 * Determines if it needs to change the scheduling priority of the LWP,
211 * and then calls back into the kernel.
213 static void *
214 _newlwp(void *arg)
216 struct nskernd nsk;
217 pcparms_t pcparms;
218 pcinfo_t pcinfo;
220 /* copy arguments onto stack and free heap memory */
221 bcopy(arg, &nsk, sizeof (nsk));
222 free(arg);
224 if (nsk.data2 && allow_pri) {
225 /* increase the scheduling priority of this LWP */
227 bzero(&pcinfo, sizeof (pcinfo));
228 (void) strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS");
230 if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) {
231 (void) fprintf(stderr,
232 gettext(
233 "nskernd: priocntl(PC_GETCID) failed: %s\n"),
234 strerror(errno));
235 goto pri_done;
238 bzero(&pcparms, sizeof (pcparms));
239 pcparms.pc_cid = pcinfo.pc_cid;
241 if (allow_rt) {
242 ((rtparms_t *)pcparms.pc_clparms)->rt_pri =
243 (pri_t)0; /* minimum RT priority */
244 ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs =
245 (uint_t)RT_TQDEF;
246 ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs =
247 RT_TQDEF;
248 } else {
249 ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim =
250 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
251 ((tsparms_t *)pcparms.pc_clparms)->ts_upri =
252 ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
255 if (priocntl(P_LWPID, P_MYID,
256 PC_SETPARMS, (char *)&pcparms) < 0) {
257 (void) fprintf(stderr,
258 gettext(
259 "nskernd: priocntl(PC_SETPARMS) failed: %s\n"),
260 strerror(errno));
264 pri_done:
265 if (nthread_inc()) {
266 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
267 nthread_dec();
269 return (NULL);
274 * Start a new thread bound to an LWP.
276 * This is the user level side of nsc_create_process().
278 static void
279 newlwp(struct nskernd *req)
281 struct nskernd *nskp;
282 thread_t tid;
283 int rc;
285 nskp = malloc(sizeof (*nskp));
286 if (!nskp) {
287 #ifdef DEBUG
288 (void) fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"),
289 sizeof (*nskp));
290 #endif
291 req->data1 = (uint64_t)ENOMEM;
292 return;
295 /* copy args for child */
296 bcopy(req, nskp, sizeof (*nskp));
298 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
299 _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid);
301 if (rc != 0) {
302 /* thr_create failed */
303 #ifdef DEBUG
304 (void) fprintf(stderr,
305 gettext("nskernd: thr_create failed: %s\n"),
306 strerror(errno));
307 #endif
308 req->data1 = (uint64_t)errno;
309 free(nskp);
310 } else {
311 /* success - _newlwp() will free nskp */
312 req->data1 = (uint64_t)0;
316 static int
317 log_iibmp_err(char *set, int flags)
319 CFGFILE *cfg;
320 char key[CFG_MAX_KEY];
321 char buf[CFG_MAX_BUF];
322 char newflags[CFG_MAX_BUF];
323 char outbuf[CFG_MAX_BUF];
324 char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp;
325 int setno, found = 0;
326 int setlen;
327 int rc = 0;
328 pid_t pid = -1;
330 if (set && *set) {
331 setlen = strlen(set);
332 } else {
333 return (EINVAL);
336 (void) mutex_lock(&cfg_mutex);
337 cfg = cfg_open("");
338 if (!cfg) {
339 (void) mutex_unlock(&cfg_mutex);
340 return (ENXIO);
343 if (!cfg_lock(cfg, CFG_WRLOCK)) {
345 (void) mutex_unlock(&cfg_mutex);
346 cfg_close(cfg);
348 pid = fork();
350 if (pid == -1) {
351 (void) fprintf(stderr, gettext(
352 "nskernd: Error forking\n"));
353 return (errno);
354 } else if (pid > 0) {
355 (void) fprintf(stdout, gettext(
356 "nskernd: Attempting deferred bitmap error\n"));
357 return (0);
360 (void) mutex_lock(&cfg_mutex);
361 cfg = cfg_open("");
362 if (!cfg) {
363 (void) mutex_unlock(&cfg_mutex);
364 (void) fprintf(stderr, gettext(
365 "nskernd: Failed cfg_open, deferred bitmap\n"));
366 return (ENXIO);
369 /* Sooner or later, this lock will be free */
370 while (!cfg_lock(cfg, CFG_WRLOCK))
371 (void) sleep(2);
374 /* find the proper set number */
375 for (setno = 1; !found; setno++) {
376 (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
377 if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) {
378 break;
381 mst = strtok(buf, " ");
382 shd = strtok(NULL, " ");
383 if (strncmp(shd, set, setlen) == 0) {
384 found = 1;
386 bmp = strtok(NULL, " ");
387 mode = strtok(NULL, " ");
388 ovr = strtok(NULL, " ");
389 cnode = strtok(NULL, " ");
390 opt = strtok(NULL, " ");
391 grp = strtok(NULL, " ");
392 break;
396 if (found) {
397 /* were there flags in the options field already? */
398 (void) snprintf(newflags, CFG_MAX_BUF, "%s=0x%x",
399 NSKERN_II_BMP_OPTION, flags);
400 if (opt && strcmp(opt, "-") != 0) {
401 bzero(newflags, CFG_MAX_BUF);
402 opt = strtok(opt, ";");
403 while (opt) {
404 if (strncmp(opt, NSKERN_II_BMP_OPTION,
405 strlen(NSKERN_II_BMP_OPTION)) != 0) {
406 (void) strcat(newflags, ";");
407 (void) strcat(newflags, opt);
411 (void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
412 (void) snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s",
413 mst, shd, bmp, mode, ovr, cnode, newflags, grp);
414 if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) {
415 (void) printf("Failed to put [%s]\n", outbuf);
416 rc = ENXIO;
417 } else {
418 (void) cfg_commit(cfg);
419 rc = 0;
421 } else {
422 (void) fprintf(stderr, gettext(
423 "nskernd: Failed deferred bitmap [%s]\n"), set);
424 rc = EINVAL;
426 cfg_unlock(cfg);
427 cfg_close(cfg);
428 (void) mutex_unlock(&cfg_mutex);
431 * if we are the fork'ed client, just exit, if parent just return
433 if (pid == 0) {
434 exit(rc);
435 /*NOTREACHED*/
436 } else {
437 return (rc);
442 * First function run by a NSKERND_LOCK thread.
444 * Opens dscfg and locks it,
445 * and then calls back into the kernel.
447 * Incoming:
448 * data1 is the kernel address of the sync structure.
449 * data2 is read(0)/write(1) lock mode.
451 * Returns:
452 * data1 as incoming.
453 * data2 errno.
455 static void *
456 _dolock(void *arg)
458 struct nskernd nsk;
459 CFGFILE *cfg;
460 int locked;
461 int mode;
462 int rc = 0;
464 /* copy arguments onto stack and free heap memory */
465 bcopy(arg, &nsk, sizeof (nsk));
466 free(arg);
468 (void) mutex_lock(&cfg_mutex);
469 cfg = cfg_open("");
470 if (cfg == NULL) {
471 #ifdef DEBUG
472 (void) fprintf(stderr,
473 gettext("nskernd: cfg_open failed: %s\n"),
474 strerror(errno));
475 #endif
476 rc = ENXIO;
479 if (nsk.data2 == 0) {
480 mode = CFG_RDLOCK;
481 } else {
482 mode = CFG_WRLOCK;
485 locked = 0;
486 if (rc == 0) {
487 if (cfg_lock(cfg, mode)) {
488 locked = 1;
489 } else {
490 #ifdef DEBUG
491 (void) fprintf(stderr,
492 gettext("nskernd: cfg_lock failed: %s\n"),
493 strerror(errno));
494 #endif
495 rc = EINVAL;
499 /* return to kernel */
501 nsk.data2 = (uint64_t)rc;
502 if (nthread_inc()) {
503 (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
504 nthread_dec();
507 /* cleanup */
509 if (locked) {
510 cfg_unlock(cfg);
511 locked = 0;
514 if (cfg != NULL) {
515 cfg_close(cfg);
516 cfg = NULL;
518 (void) mutex_unlock(&cfg_mutex);
520 return (NULL);
525 * Inter-node lock thread.
527 * This is the user level side of nsc_rmlock().
529 static void
530 dolock(struct nskernd *req)
532 struct nskernd *nskp;
533 thread_t tid;
534 int rc;
536 /* create a new thread to do the lock and return to kernel */
538 nskp = malloc(sizeof (*nskp));
539 if (!nskp) {
540 #ifdef DEBUG
541 (void) fprintf(stderr,
542 gettext("nskernd:dolock: malloc(%d) failed\n"),
543 sizeof (*nskp));
544 #endif
545 req->data1 = (uint64_t)ENOMEM;
546 return;
549 /* copy args for child */
550 bcopy(req, nskp, sizeof (*nskp));
552 rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
553 _dolock, nskp, THR_BOUND|THR_DETACHED, &tid);
555 if (rc != 0) {
556 /* thr_create failed */
557 #ifdef DEBUG
558 (void) fprintf(stderr,
559 gettext("nskernd: thr_create failed: %s\n"),
560 strerror(errno));
561 #endif
562 req->data1 = (uint64_t)errno;
563 free(nskp);
564 } else {
565 /* success - _dolock() will free nskp */
566 req->data1 = (uint64_t)0;
572 * Convenience code for engineering test of multi-terabyte volumes.
574 * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI
575 * labels. This code allocates a simple efi label structure and ioctls
576 * to extract the size of a zvol. It only handles the minimal EFI ioctl
577 * implementation in zvol.
580 static void
581 zvol_bsize(char *path, uint64_t *size, const int pnum)
583 struct stat64 stb1, stb2;
584 struct dk_minfo dkm;
585 int fd = -1;
586 int rc;
588 if (cl_nodeid || pnum != 0)
589 return;
591 if ((fd = open(path, O_RDONLY)) < 0) {
592 return;
595 if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 ||
596 fstat64(fd, &stb2) != 0 ||
597 !S_ISCHR(stb1.st_mode) ||
598 !S_ISCHR(stb2.st_mode) ||
599 major(stb1.st_rdev) != major(stb2.st_rdev)) {
600 (void) close(fd);
601 return;
604 rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm);
605 if (rc >= 0) {
606 *size = LE_64(dkm.dki_capacity) *
607 (dkm.dki_lbsize) / 512;
610 (void) close(fd);
613 /* ARGSUSED */
614 static void
615 get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path)
617 struct nscioc_bsize bsize;
618 #ifdef DKIOCPARTITION
619 struct partition64 p64;
620 #endif
621 struct dk_cinfo dki_info;
622 struct vtoc vtoc;
623 int fd;
625 *partitionp = -1;
626 *size = (uint64_t)0;
628 dki_info.dki_partition = (ushort_t)-1;
629 bsize.dki_info = (uint64_t)(unsigned long)&dki_info;
630 bsize.vtoc = (uint64_t)(unsigned long)&vtoc;
631 bsize.raw_fd = raw_fd;
632 bsize.efi = 0;
634 fd = open(rdev, O_RDONLY);
635 if (fd < 0)
636 return;
638 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
639 if (dki_info.dki_partition != (ushort_t)-1) {
640 /* assume part# is ok and just the size failed */
641 *partitionp = (int)dki_info.dki_partition;
643 #ifdef DKIOCPARTITION
644 /* see if this is an EFI label */
645 bzero(&p64, sizeof (p64));
646 p64.p_partno = (uint_t)*partitionp;
647 if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) {
648 *size = (uint64_t)p64.p_size;
649 } else {
650 bsize.p64 = (uint64_t)(unsigned long)&p64;
651 bsize.efi = 1;
653 if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
654 /* see if this is a zvol */
655 zvol_bsize(path, size, *partitionp);
656 } else {
657 *size = (uint64_t)p64.p_size;
660 #endif /* DKIOCPARTITION */
663 (void) close(fd);
664 return;
667 (void) close(fd);
669 *partitionp = (int)dki_info.dki_partition;
671 if (vtoc.v_sanity != VTOC_SANE)
672 return;
674 if (vtoc.v_version != V_VERSION && vtoc.v_version != 0)
675 return;
677 if (dki_info.dki_partition > V_NUMPAR)
678 return;
680 *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size;
684 static int
685 iscluster(void)
688 * Find out if we are running in a cluster
690 cl_nodeid = cfg_iscluster();
691 if (cl_nodeid > 0) {
692 return (TRUE);
693 } else if (cl_nodeid == 0) {
694 return (FALSE);
697 (void) fprintf(stderr, "%s\n",
698 gettext("nskernd: unable to ascertain environment"));
699 exit(1);
700 /* NOTREACHED */
704 * Runtime Solaris release checking - build release == runtime release
705 * is always considered success, so only keep entries in the map for
706 * the special cases.
708 static nsc_release_t nskernd_rel_map[] = {
709 /* { "5.10", "5.10" }, */
710 { "5.11", "5.10" },
711 { NULL, NULL }
715 #ifdef lint
716 #define main nskernd_main
717 #endif
718 /* ARGSUSED1 */
720 main(int argc, char *argv[])
722 const char *dir = "/";
723 struct nskernd data;
724 struct rlimit rl;
725 int i, run, rc;
726 int partition;
727 char *reqd;
728 int syncpipe[2];
729 int startup;
731 (void) setlocale(LC_ALL, "");
732 (void) textdomain("nskernd");
734 rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd);
735 if (rc < 0) {
736 (void) fprintf(stderr,
737 gettext("nskernd: unable to determine the current "
738 "Solaris release: %s\n"), strerror(errno));
739 exit(1);
740 } else if (rc == FALSE) {
741 (void) fprintf(stderr,
742 gettext("nskernd: incorrect Solaris release "
743 "(requires %s)\n"), reqd);
744 exit(1);
747 rc = 0;
749 if (argc != 1)
750 usage();
753 * Usage: <progname> [-g] [-d <seconds to delay>]
755 while ((i = getopt(argc, argv, "gd:")) != EOF) {
756 switch (i) {
757 case 'g':
758 display_msg = 1;
759 break;
760 case 'd':
761 delay_time = atoi(optarg);
762 if (delay_time <= 0) {
763 delay_time = 30;
765 break;
766 default:
767 syslog(LOG_ERR,
768 "Usage: nskernd [-g] [-d <seconds to delay>]");
769 exit(1);
770 break;
774 if (chroot(dir) < 0) {
775 (void) fprintf(stderr, gettext("nskernd: chroot failed: %s\n"),
776 strerror(errno));
777 exit(1);
780 if (chdir(dir) < 0) {
781 (void) fprintf(stderr, gettext("nskernd: chdir failed: %s\n"),
782 strerror(errno));
783 exit(1);
787 * Determine if we are in a Sun Cluster or not, before fork'ing
789 (void) iscluster();
792 * create a pipe to synchronise the parent with the
793 * child just before it enters its service loop.
795 if (pipe(syncpipe) < 0) {
796 (void) fprintf(stderr,
797 gettext("nskernd: cannot create pipe: %s\n"),
798 strerror(errno));
799 exit(1);
802 * Fork off a child that becomes the daemon.
805 if ((rc = fork()) > 0) {
806 char c;
807 int n;
808 (void) close(syncpipe[1]);
810 * wait for the close of the pipe.
811 * If we get a char back, indicates good
812 * status from child, so exit 0.
813 * If we get a zero length read, then the
814 * child has failed, so we do too.
816 n = read(syncpipe[0], &c, 1);
817 exit((n <= 0) ? 1 : 0);
818 } else if (rc < 0) {
819 (void) fprintf(stderr, gettext("nskernd: cannot fork: %s\n"),
820 strerror(errno));
821 exit(1);
825 * In child - become daemon.
828 /* use closefrom(3C) from PSARC/2000/193 when possible */
829 for (i = 0; i < syncpipe[1]; i++) {
830 (void) close(i);
832 closefrom(syncpipe[1] + 1);
834 (void) open("/dev/console", O_WRONLY|O_APPEND);
835 (void) dup(0);
836 (void) dup(0);
837 (void) close(0);
839 (void) setpgrp();
842 * Ignore all signals apart from SIGTERM.
845 for (i = 1; i < _sys_nsig; i++)
846 (void) sigset(i, SIG_IGN);
848 (void) sigset(SIGTERM, sighand);
851 * Increase the number of fd's that can be open.
854 rl.rlim_cur = RLIM_INFINITY;
855 rl.rlim_max = RLIM_INFINITY;
856 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) {
857 (void) fprintf(stderr,
858 gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"),
859 strerror(errno));
860 (void) fprintf(stderr,
861 gettext("nskernd: the maximum number of nsctl open "
862 "devices may be reduced\n"));
866 * Open /dev/nsctl and startup.
869 nsctl_fd = open(rdev, O_RDONLY);
870 if (nsctl_fd < 0) {
871 (void) fprintf(stderr, gettext("nskernd: unable to open %s\n"),
872 rdev);
873 exit(1);
876 bzero(&data, sizeof (data));
878 data.command = NSKERND_START;
879 data.data1 = (uint64_t)cl_nodeid;
880 run = 1;
882 startup = 1;
883 while (run) {
884 rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
885 if (rc < 0) {
886 /* try and do kernel cleanup and exit */
887 if (shutdown()) {
888 run = 0;
889 } else {
890 sigterm = 0;
893 (void) fprintf(stderr,
894 gettext("nskernd: NSCIOC_NSKERND failed: %s\n"),
895 strerror(errno));
896 continue;
897 } else if (sigterm) {
898 /* SIGTERM received - terminate */
899 if (data.command != NSKERND_START &&
900 (data.command != NSKERND_STOP ||
901 data.data1 != (uint64_t)1)) {
902 /* need to do kernel cleanup */
903 if (shutdown()) {
904 run = 0;
905 } else {
906 sigterm = 0;
907 data.command = NSKERND_START;
908 data.data1 = (uint64_t)cl_nodeid;
910 } else {
911 /* just quit */
912 if (canshutdown()) {
913 run = 0;
914 } else {
915 /* cannot shutdown - threads active */
916 sigterm = 0;
917 data.command = NSKERND_START;
918 data.data1 = (uint64_t)cl_nodeid;
921 continue;
923 if (startup) {
924 char c = 0;
925 (void) write(syncpipe[1], &c, 1);
926 (void) close(syncpipe[1]);
927 startup = 0;
929 switch (data.command) {
930 case NSKERND_START: /* (re)start completion */
931 if (rc == 1) {
932 (void) fprintf(stderr,
933 gettext("nskernd: already started\n"));
934 run = 0;
935 } else if (rc == 2) {
936 (void) fprintf(stderr,
937 gettext("nskernd: stopped by kernel\n"));
938 run = 0;
940 data.command = NSKERND_WAIT;
941 break;
943 case NSKERND_STOP: /* kernel telling daemon to stop */
944 if (data.data1 != (uint64_t)1) {
945 (void) shutdown();
946 run = 0;
948 break;
950 case NSKERND_BSIZE:
952 * kernel requesting partsize
953 * data1 - size return
954 * data2 - raw_fd (entry)
955 * - partition number (return)
957 partition = -1;
958 get_bsize(data.data2, &data.data1,
959 &partition, data.char1);
960 data.data2 = (uint64_t)partition;
961 data.command = NSKERND_WAIT;
962 break;
964 case NSKERND_NEWLWP: /* kernel requesting a new LWP */
965 newlwp(&data);
966 data.command = NSKERND_WAIT;
967 break;
969 case NSKERND_LOCK: /* kernel requesting lock */
970 dolock(&data);
971 data.command = NSKERND_WAIT;
972 break;
974 case NSKERND_WAIT: /* kernel retrying wait */
976 * the kernel thread can be woken by the dr config
977 * utilities (ie cfgadm) therefore we just reissue
978 * the wait.
980 break;
982 case NSKERND_IIBITMAP:
983 rc = log_iibmp_err(data.char1, (int)data.data1);
984 data.data1 = (uint64_t)rc;
985 data.command = NSKERND_WAIT;
986 break;
988 default:
989 (void) fprintf(stderr,
990 gettext("nskernd: unknown command %d"),
991 data.command);
992 data.command = NSKERND_WAIT;
993 break;
997 (void) close(nsctl_fd);
999 return (rc);