4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
35 #include <sys/mdesc.h>
36 #include <sys/mdesc_impl.h>
37 #include <sys/debug.h>
39 #include <sys/types.h>
40 #include <sys/utsname.h>
47 * sun4 support for libv12n.
49 * Non-sun4v support is minimal. The v12n_capabilities() function will
50 * only return 0 (not supported, not enabled, no implementation).
52 * For sun4v the support for v12n_capabilities(), v12n_domain_roles(),
53 * v12n_domain_name() and v12n_domain_uuid() are supported by scanning the
54 * MD from /dev/mdesc for specific properties. For v12n_ctrl_domain() and
55 * v12n_chassis_serialno(), the ldoms agent daemon (ldmad) on the control
56 * domain supplies the required information via the "agent-system" domain
61 static void *v12n_ds_dlhdl
= NULL
;
62 static int (*v12n_ds_send_msg
)(ds_hdl_t
, void *, size_t) = NULL
;
63 static int (*v12n_ds_clnt_reg
)(ds_capability_t
*, ds_ops_t
*);
64 static int (*v12n_ds_unreg_svc
)(char *, boolean_t
);
67 * Defines to support the 'agent-system' domain service.
70 #define LDMA_SYSTEM_NVERS \
71 (sizeof (v12n_ldma_system_vers) / sizeof (ds_ver_t))
72 static ds_ver_t v12n_ldma_system_vers
[] = { { 1, 0} };
74 static ds_capability_t v12n_ldma_cap
= {
75 LDMA_NAME_SYSTEM
, /* svc_id */
76 v12n_ldma_system_vers
, /* vers */
77 LDMA_SYSTEM_NVERS
/* nvers */
80 static void v12n_ldma_register_handler(ds_hdl_t hdl
, ds_cb_arg_t arg
,
81 ds_ver_t
*ver
, ds_domain_hdl_t dhdl
);
82 static void v12n_ldma_data_handler(ds_hdl_t hdl
, ds_cb_arg_t arg
, void *buf
,
85 static ds_ops_t v12n_ldma_ops
= {
86 v12n_ldma_register_handler
, /* ds_reg_cb */
87 NULL
, /* ds_unreg_cb */
88 v12n_ldma_data_handler
, /* ds_data_cb */
92 /* v12n_ldma_cv_state values */
93 #define V12N_LDMA_CVINVALID -1 /* invalid value for cv_state */
94 #define V12N_LDMA_REGWAITING 0 /* waiting for ctrl domain reg */
95 #define V12N_LDMA_REGRECEIVED 1 /* received ctrl domain reg */
96 #define V12N_LDMA_MSGWAITING 2 /* waiting for message response */
97 #define V12N_LDMA_MSGRECEIVED 3 /* received message response */
98 #define V12N_LDMA_MSGERROR 4 /* received a bad message */
100 /* 'agent-system' data used in async registration/data message handlers */
101 static ds_hdl_t v12n_ldma_ctrl_hdl
= DS_INVALID_HDL
;
102 static int v12n_ldma_msgtype
;
103 static char *v12n_ldma_msgstr
;
104 static mutex_t v12n_ldma_lock
= DEFAULTMUTEX
;
105 static cond_t v12n_ldma_cv
= DEFAULTCV
;
106 static int v12n_ldma_cv_state
= V12N_LDMA_CVINVALID
;
107 static mutex_t v12n_ldma_cv_lock
= DEFAULTMUTEX
;
109 /* 'agent-system' timeout values in seconds */
110 static int v12n_ldma_timeout
= 15;
111 static int v12n_ldma_sleeptime
= 1;
114 #define V12N_LDOMS_SUPPORTED (V12N_CAP_SUPPORTED | V12N_CAP_ENABLED | \
117 #define MD_DEVICE "/dev/mdesc"
120 * libv12n routines to support /dev/mdesc.
124 * Wrapper for MD free: need unused size argument.
128 v12n_md_free(void *buf
, size_t n
)
134 * Wrapper for MD init: read MD and invoke md_init_intern.
146 * Open the Machine Description (MD)
148 fd
= open(MD_DEVICE
, O_RDONLY
);
153 if (read(fd
, &mdh
, sizeof (md_header_t
)) != sizeof (md_header_t
))
156 md_size
= sizeof (md_header_t
) + mdh
.node_blk_sz
+ mdh
.name_blk_sz
+
159 if ((buf
= malloc(md_size
)) == NULL
)
162 (void) memcpy(buf
, &mdh
, sizeof (md_header_t
));
163 if (read(fd
, buf
+ sizeof (md_header_t
),
164 md_size
- sizeof (md_header_t
)) != md_size
- sizeof (md_header_t
)) {
168 mdp
= md_init_intern((uint64_t *)((void *)buf
), malloc
, v12n_md_free
);
182 * Wrapper for md_fini. Allow NULL md ptr and free MD buffer.
185 v12n_md_fini(void *md
)
187 md_impl_t
*mdp
= (md_impl_t
*)md
;
196 * See if LDoms domaining is enabled, returns 1 if enabled.
197 * Get the value of the 'domaining-enabled' property under the
198 * 'platform' node. Value of 1 => domaining is enabled.
201 v12n_domaining_enabled()
203 mde_cookie_t
*nodes
, rootnode
;
205 uint64_t prop_val
= 0;
208 if ((mdp
= v12n_md_init()) == NULL
) {
212 nnodes
= md_node_count(mdp
);
213 nodes
= malloc(nnodes
* sizeof (mde_cookie_t
));
219 rootnode
= md_root_node(mdp
);
221 nnodes
= md_scan_dag(mdp
, rootnode
, md_find_name(mdp
, "platform"),
222 md_find_name(mdp
, "fwd"), nodes
);
225 (void) md_get_prop_val(mdp
, nodes
[0], "domaining-enabled",
231 return (prop_val
== 1);
237 struct utsname uinfo
;
242 * Check if this is an LDoms system. When using LDoms each
243 * domain should have a /dev/mdesc device providing access to
244 * the Machine Description (MD) of the domain. If this device
245 * does not exist then this is not an LDoms system.
247 if (uname(&uinfo
) == -1 || strcmp(uinfo
.machine
, "sun4v")) {
249 * Not sun4v -> LDoms not supported
252 } else if (stat(MD_DEVICE
, &st
) == 0) {
254 * sun4v + /dev/mdesc exists -> Check if LDoms enabled
255 * via the 'domaining-enabled' property.
257 cap
= (V12N_CAP_SUPPORTED
| V12N_CAP_IMPL_LDOMS
|
258 (v12n_domaining_enabled() ? V12N_CAP_ENABLED
: 0));
259 } else if (errno
== ENOENT
) {
261 * sun4v + /dev/mdesc does not exist -> LDoms supported
264 cap
= (V12N_CAP_SUPPORTED
| V12N_CAP_IMPL_LDOMS
);
271 * Routines to support v12n_domain_roles.
274 v12n_scan_md_nodes(md_t
*mdp
, char *node_name
, char *node_str_prop
,
277 mde_cookie_t
*nodes
, rootnode
;
281 nnodes
= md_node_count(mdp
);
282 nodes
= malloc(nnodes
* sizeof (mde_cookie_t
));
287 rootnode
= md_root_node(mdp
);
289 nnodes
= md_scan_dag(mdp
, rootnode
, md_find_name(mdp
, node_name
),
290 md_find_name(mdp
, "fwd"), nodes
);
292 if (node_str_prop
== NULL
)
295 for (i
= 0; i
< nnodes
; i
++) {
296 if (md_get_prop_str(mdp
, nodes
[i
], node_str_prop
, &prop_str
))
298 for (j
= 0; props
[j
] != NULL
; j
++) {
299 if (strcmp(prop_str
, props
[j
]) == 0) {
310 * Check if MD has a hypervisor access point, returns 1 if true.
311 * Check the MD for a 'virtual-device-port' node whose 'vldc-svc-name' is
315 v12n_check_hv_access(md_t
*mdp
)
317 static char *hvctl_str
[] = {
322 return (v12n_scan_md_nodes(mdp
, "virtual-device-port", "vldc-svc-name",
327 * Check if MD has a virtual device service (vcc, vsw, vds), returns 1 if true.
328 * Need to check all the MD 'virtual-device' nodes for a 'device-type' property
329 * of 'vcc', 'vsw' or 'vds'.
332 v12n_check_virtual_service(md_t
*mdp
)
334 static char *vdevs
[] = {
341 return (v12n_scan_md_nodes(mdp
, "virtual-device", "device-type",
346 * Check if MD has an physical I/O device node, returns 1 if true.
349 v12n_check_io_service(md_t
*mdp
)
351 return (v12n_scan_md_nodes(mdp
, "iodevice", NULL
, NULL
));
355 * Check if a MD node is root PCI device, returns 1 if true.
356 * Need to check all the MD 'iodevice' nodes for a 'device-type' property
360 v12n_check_root(md_t
*mdp
)
362 static char *pciex
[] = {
367 return (v12n_scan_md_nodes(mdp
, "iodevice", "device-type", pciex
));
371 * Get the domain roles for the domain.
379 if (v12n_capabilities() != V12N_LDOMS_SUPPORTED
) {
384 if ((mdp
= v12n_md_init()) == NULL
) {
389 if (v12n_check_hv_access(mdp
))
390 roles
|= V12N_ROLE_CONTROL
;
392 if (v12n_check_virtual_service(mdp
))
393 roles
|= V12N_ROLE_SERVICE
;
395 if (v12n_check_io_service(mdp
))
396 roles
|= V12N_ROLE_IO
;
398 if (v12n_check_root(mdp
))
399 roles
|= V12N_ROLE_ROOT
;
407 * Get domain name from MD's virtual domain service node, returns 1 on success.
408 * The domain name is a string property 'vlds-domain-name' under the
409 * 'virtual-device' device node whose name is 'virtual-domain-service'.
412 v12n_get_md_domain_name(md_t
*mdp
, char **vds_dnamep
)
414 mde_cookie_t
*vdev_nodes
, rootnode
;
415 int list_size
, nvdevs
, num_nodes
, i
, rv
;
418 num_nodes
= md_node_count(mdp
);
419 list_size
= num_nodes
* sizeof (mde_cookie_t
);
420 vdev_nodes
= malloc(list_size
);
421 if (vdev_nodes
== NULL
) {
425 rootnode
= md_root_node(mdp
);
427 nvdevs
= md_scan_dag(mdp
, rootnode
, md_find_name(mdp
, "virtual-device"),
428 md_find_name(mdp
, "fwd"), vdev_nodes
);
431 for (i
= 0; i
< nvdevs
; i
++) {
432 if (md_get_prop_str(mdp
, vdev_nodes
[i
], "name", &vldc_name
))
434 if (strcmp(vldc_name
, "virtual-domain-service") == 0) {
435 rv
= (md_get_prop_str(mdp
, vdev_nodes
[i
],
436 "vlds-domain-name", vds_dnamep
) == 0);
445 * String copyout utility.
448 v12n_string_copyout(char *sout
, char *sfrom
, size_t count
)
450 size_t ret
= strlen(sfrom
) + 1;
452 if (sout
!= NULL
&& count
> 0) {
453 count
= MIN(ret
, count
);
454 (void) memcpy(sout
, sfrom
, count
);
460 * Get the domain name of this domain.
463 v12n_domain_name(char *buf
, size_t count
)
469 if (v12n_capabilities() != V12N_LDOMS_SUPPORTED
) {
471 } else if ((mdp
= v12n_md_init()) == NULL
) {
473 } else if (!v12n_get_md_domain_name(mdp
, &ldmname
)) {
476 rv
= v12n_string_copyout(buf
, ldmname
, count
);
484 * Get UUID string from MD, returns 1 on success.
485 * The UUID is a string property 'uuid' under the 'platform' node of the MD.
488 v12n_get_md_uuid_str(md_t
*mdp
, char **uuid_strp
)
490 mde_cookie_t
*plat_nodes
, rootnode
;
491 int list_size
, npnodes
, num_nodes
, rv
;
493 num_nodes
= md_node_count(mdp
);
494 list_size
= num_nodes
* sizeof (mde_cookie_t
);
495 plat_nodes
= malloc(list_size
);
496 if (plat_nodes
== NULL
) {
500 rootnode
= md_root_node(mdp
);
502 npnodes
= md_scan_dag(mdp
, rootnode
, md_find_name(mdp
, "platform"),
503 md_find_name(mdp
, "fwd"), plat_nodes
);
506 rv
= !md_get_prop_str(mdp
, plat_nodes
[0], "uuid", uuid_strp
);
515 * Get the domain UUID.
518 v12n_domain_uuid(uuid_t uuid
)
524 if (v12n_capabilities() != V12N_LDOMS_SUPPORTED
) {
526 } else if ((mdp
= v12n_md_init()) == NULL
) {
528 } else if (!v12n_get_md_uuid_str(mdp
, &uuid_str
)) {
531 rv
= uuid_parse(uuid_str
, uuid
);
540 * Send 'agent-sytem' request message.
543 v12n_ldma_send_request()
545 ldma_message_header_t ldmamsg
;
547 if (v12n_ds_send_msg
== NULL
|| v12n_ldma_ctrl_hdl
== DS_INVALID_HDL
)
551 ldmamsg
.msg_type
= v12n_ldma_msgtype
;
552 ldmamsg
.msg_info
= 0;
553 return (v12n_ds_send_msg(v12n_ldma_ctrl_hdl
, (char *)&ldmamsg
,
558 * 'agent-system' registration handler.
559 * If we get a registration from the control domain (domain 0), then send
560 * the requested message. Otherwise, ignore the registration.
564 v12n_ldma_register_handler(ds_hdl_t hdl
, ds_cb_arg_t arg
, ds_ver_t
*ver
,
565 ds_domain_hdl_t dhdl
)
568 /* got registration from control domain */
570 (void) mutex_lock(&v12n_ldma_cv_lock
);
571 if (v12n_ldma_cv_state
== V12N_LDMA_REGWAITING
) {
572 v12n_ldma_ctrl_hdl
= hdl
;
573 v12n_ldma_cv_state
= V12N_LDMA_REGRECEIVED
;
574 (void) cond_signal(&v12n_ldma_cv
);
576 (void) mutex_unlock(&v12n_ldma_cv_lock
);
581 * 'agent-system' data handler.
585 v12n_ldma_data_handler(ds_hdl_t hdl
, ds_cb_arg_t arg
, void *buf
,
589 ldma_message_header_t
*ldmp
;
591 int cv_state
= V12N_LDMA_MSGERROR
;
594 * Ignore any message not from the control domain.
596 if (v12n_ldma_ctrl_hdl
!= hdl
)
600 * Ignore any unexpected message.
602 if (buflen
< LDMA_MESSAGE_HEADER_SIZE
)
606 * Ignore message with unexpected msgnum.
608 ldmp
= (ldma_message_header_t
*)buf
;
609 if (ldmp
->msg_num
!= 0)
612 switch (ldmp
->msg_type
) {
614 case LDMA_MSG_RESULT
:
615 if (ldmp
->msg_info
== 0 ||
616 ldmp
->msg_info
> LDMA_MESSAGE_DLEN(buflen
)) {
617 cv_state
= V12N_LDMA_MSGERROR
;
620 data
= LDMA_HDR2DATA(buf
);
622 /* ensure that data ends with a '\0' */
623 data
[ldmp
->msg_info
- 1] = '\0';
624 switch (v12n_ldma_msgtype
) {
626 case LDMA_MSGSYS_GET_SYSINFO
:
628 * Control domain nodename is second string in the
629 * message. Make sure there is enough data in the msg
630 * to have a second string.
633 if (LDMA_MESSAGE_DLEN(buflen
) <= n
+ 3) {
634 cv_state
= V12N_LDMA_MSGERROR
;
638 if ((v12n_ldma_msgstr
= strdup(data
)) == NULL
)
639 cv_state
= V12N_LDMA_MSGERROR
;
641 cv_state
= V12N_LDMA_MSGRECEIVED
;
644 case LDMA_MSGSYS_GET_CHASSISNO
:
645 if ((v12n_ldma_msgstr
= strdup(data
)) == NULL
)
646 cv_state
= V12N_LDMA_MSGERROR
;
648 cv_state
= V12N_LDMA_MSGRECEIVED
;
652 /* v12n_ldma_msgtype must be valid */
658 cv_state
= V12N_LDMA_MSGERROR
;
662 /* unexpected message, ignored */
666 (void) mutex_lock(&v12n_ldma_cv_lock
);
667 v12n_ldma_cv_state
= cv_state
;
668 (void) cond_signal(&v12n_ldma_cv
);
669 (void) mutex_unlock(&v12n_ldma_cv_lock
);
674 * libds doesn't exist on non-sun4v, dynamically load it and get the
675 * function pointers to the needed lib functions.
678 v12n_libds_init(void)
680 if (v12n_ds_dlhdl
!= NULL
) {
681 if (v12n_ds_clnt_reg
== NULL
|| v12n_ds_send_msg
== NULL
||
682 v12n_ds_unreg_svc
== NULL
)
687 if ((v12n_ds_dlhdl
= dlopen("libds.so.1",
688 RTLD_NOW
| RTLD_GLOBAL
)) == NULL
)
691 if ((v12n_ds_clnt_reg
= (int (*)(ds_capability_t
*, ds_ops_t
*))
692 dlsym(v12n_ds_dlhdl
, "ds_clnt_reg")) == NULL
)
695 if ((v12n_ds_send_msg
= (int (*)(ds_hdl_t
, void *, size_t))
696 dlsym(v12n_ds_dlhdl
, "ds_send_msg")) == NULL
)
699 if ((v12n_ds_unreg_svc
= (int (*)(char *, boolean_t
))
700 dlsym(v12n_ds_dlhdl
, "ds_unreg_svc")) == NULL
)
707 * Initiate and wait for an ldmad 'agent-system' domain service.
708 * Dynamically load libds, register the client 'agent-system' service
709 * and wait for a specified amount of time for the 'agent-system'
710 * service on the control domain to respond to the request.
713 v12n_get_ldma_system_msg(int msgtype
, char **strp
)
720 * Ensure that there's only one thread trying to do a
721 * 'agent-system' client registration/message at a time.
723 (void) mutex_lock(&v12n_ldma_lock
);
724 if ((err
= v12n_libds_init()) != 0) {
725 (void) mutex_unlock(&v12n_ldma_lock
);
729 v12n_ldma_msgtype
= msgtype
;
730 v12n_ldma_msgstr
= NULL
;
732 /* initialize v12n_ldma_cv_state variable before registering service */
733 (void) mutex_lock(&v12n_ldma_cv_lock
);
734 v12n_ldma_cv_state
= V12N_LDMA_REGWAITING
;
735 (void) mutex_unlock(&v12n_ldma_cv_lock
);
738 * Other instances may be trying to load the "agent-system" service.
739 * If a collision happens (EBUSY error), wait and try again.
741 for (tout
= 0; tout
< v12n_ldma_timeout
; tout
+= v12n_ldma_sleeptime
) {
742 if ((err
= v12n_ds_clnt_reg(&v12n_ldma_cap
,
743 &v12n_ldma_ops
)) == 0)
745 if (err
!= EALREADY
) {
748 (void) sleep(v12n_ldma_sleeptime
);
751 if (tout
>= v12n_ldma_timeout
) {
757 * Wait for control domain registration.
759 timeout
.tv_sec
= v12n_ldma_timeout
;
762 (void) mutex_lock(&v12n_ldma_cv_lock
);
763 while (v12n_ldma_cv_state
== V12N_LDMA_REGWAITING
) {
764 if ((err
= cond_reltimedwait(&v12n_ldma_cv
,
765 &v12n_ldma_cv_lock
, &timeout
)) != EINTR
)
770 * Check for timeout or an error.
772 if (v12n_ldma_cv_state
!= V12N_LDMA_REGRECEIVED
) {
775 (void) mutex_unlock(&v12n_ldma_cv_lock
);
780 * Received a registration request, send the request message.
782 v12n_ldma_cv_state
= V12N_LDMA_MSGWAITING
;
783 if ((err
= v12n_ldma_send_request()) != 0) {
784 (void) mutex_unlock(&v12n_ldma_cv_lock
);
788 while (v12n_ldma_cv_state
== V12N_LDMA_MSGWAITING
) {
789 if ((err
= cond_reltimedwait(&v12n_ldma_cv
,
790 &v12n_ldma_cv_lock
, &timeout
)) != EINTR
)
794 if (v12n_ldma_cv_state
!= V12N_LDMA_MSGRECEIVED
) {
797 (void) mutex_unlock(&v12n_ldma_cv_lock
);
801 v12n_ldma_cv_state
= V12N_LDMA_CVINVALID
;
802 (void) mutex_unlock(&v12n_ldma_cv_lock
);
805 * If v12n_ldma_msgstr is set, a valid data response was seen.
807 if (v12n_ldma_msgstr
== NULL
)
810 if (*v12n_ldma_msgstr
== '\0' ||
811 (*strp
= strdup(v12n_ldma_msgstr
)) == NULL
)
813 free(v12n_ldma_msgstr
);
814 v12n_ldma_msgstr
= NULL
;
818 v12n_ds_unreg_svc(LDMA_NAME_SYSTEM
, B_TRUE
);
819 v12n_ldma_msgtype
= -1;
820 v12n_ldma_ctrl_hdl
= DS_INVALID_HDL
;
821 (void) mutex_unlock(&v12n_ldma_lock
);
827 * Get the nodename of the control domain. Returns the equivalent
828 * of 'uname -n' on the control domain.
829 * This is obtained via the 'agent-system' domain service provided
833 v12n_ctrl_domain(char *buf
, size_t count
)
837 size_t rv
= (size_t)(-1);
839 if (v12n_capabilities() != V12N_LDOMS_SUPPORTED
) {
841 } else if ((err
= v12n_get_ldma_system_msg(LDMA_MSGSYS_GET_SYSINFO
,
845 rv
= v12n_string_copyout(buf
, str
, count
);
851 * Get the Chassis serial number from the Control Domain.
852 * This is obtained via the 'agent-system' domain service provided
856 v12n_chassis_serialno(char *buf
, size_t count
)
860 size_t rv
= (size_t)(-1);
862 if (v12n_capabilities() != V12N_LDOMS_SUPPORTED
) {
864 } else if ((err
= v12n_get_ldma_system_msg(LDMA_MSGSYS_GET_CHASSISNO
,
868 rv
= v12n_string_copyout(buf
, str
, count
);