4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
14 * Copyright (c) 2016, Intel Corporation.
15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
16 * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
19 #include <libnvpair.h>
26 #include <sys/sysevent/eventdefs.h>
27 #include <sys/sysevent/dev.h>
28 #include <sys/fm/protocol.h>
29 #include <sys/fm/fs/zfs.h>
33 #include "zfs_agents.h"
35 #include "../zed_log.h"
41 static pthread_mutex_t agent_lock
= PTHREAD_MUTEX_INITIALIZER
;
42 static pthread_cond_t agent_cond
= PTHREAD_COND_INITIALIZER
;
43 static list_t agent_events
; /* list of pending events */
44 static int agent_exiting
;
46 typedef struct agent_event
{
53 pthread_t g_agents_tid
;
55 libzfs_handle_t
*g_zfs_hdl
;
57 /* guid search data */
58 typedef enum device_type
{
59 DEVICE_TYPE_L2ARC
, /* l2arc device */
60 DEVICE_TYPE_SPARE
, /* spare device */
61 DEVICE_TYPE_PRIMARY
/* any primary pool storage device */
64 typedef struct guid_search
{
65 uint64_t gs_pool_guid
;
66 uint64_t gs_vdev_guid
;
68 device_type_t gs_vdev_type
;
69 uint64_t gs_vdev_expandtime
; /* vdev expansion time */
73 * Walks the vdev tree recursively looking for a matching devid.
74 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
77 zfs_agent_iter_vdev(zpool_handle_t
*zhp
, nvlist_t
*nvl
, void *arg
)
79 guid_search_t
*gsp
= arg
;
80 const char *path
= NULL
;
86 * First iterate over any children.
88 if (nvlist_lookup_nvlist_array(nvl
, ZPOOL_CONFIG_CHILDREN
,
89 &child
, &children
) == 0) {
90 for (c
= 0; c
< children
; c
++) {
91 if (zfs_agent_iter_vdev(zhp
, child
[c
], gsp
)) {
92 gsp
->gs_vdev_type
= DEVICE_TYPE_PRIMARY
;
98 * Iterate over any spares and cache devices
100 if (nvlist_lookup_nvlist_array(nvl
, ZPOOL_CONFIG_SPARES
,
101 &child
, &children
) == 0) {
102 for (c
= 0; c
< children
; c
++) {
103 if (zfs_agent_iter_vdev(zhp
, child
[c
], gsp
)) {
104 gsp
->gs_vdev_type
= DEVICE_TYPE_SPARE
;
109 if (nvlist_lookup_nvlist_array(nvl
, ZPOOL_CONFIG_L2CACHE
,
110 &child
, &children
) == 0) {
111 for (c
= 0; c
< children
; c
++) {
112 if (zfs_agent_iter_vdev(zhp
, child
[c
], gsp
)) {
113 gsp
->gs_vdev_type
= DEVICE_TYPE_L2ARC
;
119 * On a devid match, grab the vdev guid and expansion time, if any.
121 if (gsp
->gs_devid
!= NULL
&&
122 (nvlist_lookup_string(nvl
, ZPOOL_CONFIG_DEVID
, &path
) == 0) &&
123 (strcmp(gsp
->gs_devid
, path
) == 0)) {
124 (void) nvlist_lookup_uint64(nvl
, ZPOOL_CONFIG_GUID
,
126 (void) nvlist_lookup_uint64(nvl
, ZPOOL_CONFIG_EXPANSION_TIME
,
127 &gsp
->gs_vdev_expandtime
);
131 * Otherwise, on a vdev guid match, grab the devid and expansion
132 * time. The devid might be missing on removal since its not part
133 * of blkid cache and L2ARC VDEV does not contain pool guid in its
134 * blkid, so this is a special case for L2ARC VDEV.
136 else if (gsp
->gs_vdev_guid
!= 0 && gsp
->gs_devid
== NULL
&&
137 nvlist_lookup_uint64(nvl
, ZPOOL_CONFIG_GUID
, &vdev_guid
) == 0 &&
138 gsp
->gs_vdev_guid
== vdev_guid
) {
139 (void) nvlist_lookup_string(nvl
, ZPOOL_CONFIG_DEVID
,
141 (void) nvlist_lookup_uint64(nvl
, ZPOOL_CONFIG_EXPANSION_TIME
,
142 &gsp
->gs_vdev_expandtime
);
150 zfs_agent_iter_pool(zpool_handle_t
*zhp
, void *arg
)
152 guid_search_t
*gsp
= arg
;
153 nvlist_t
*config
, *nvl
;
156 * For each vdev in this pool, look for a match by devid
158 if ((config
= zpool_get_config(zhp
, NULL
)) != NULL
) {
159 if (nvlist_lookup_nvlist(config
, ZPOOL_CONFIG_VDEV_TREE
,
161 (void) zfs_agent_iter_vdev(zhp
, nvl
, gsp
);
165 * if a match was found then grab the pool guid
167 if (gsp
->gs_vdev_guid
&& gsp
->gs_devid
) {
168 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_POOL_GUID
,
173 return (gsp
->gs_devid
!= NULL
&& gsp
->gs_vdev_guid
!= 0);
177 zfs_agent_post_event(const char *class, const char *subclass
, nvlist_t
*nvl
)
179 agent_event_t
*event
;
181 if (subclass
== NULL
)
184 event
= malloc(sizeof (agent_event_t
));
185 if (event
== NULL
|| nvlist_dup(nvl
, &event
->ae_nvl
, 0) != 0) {
191 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
193 subclass
= ESC_ZFS_VDEV_CHECK
;
197 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
198 * from the vdev_disk layer after a hot unplug. Fortunately we do
199 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
200 * proxy so we remap it here for the benefit of the diagnosis engine.
201 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
202 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
204 if ((strcmp(class, EC_DEV_REMOVE
) == 0) &&
205 (strcmp(subclass
, ESC_DISK
) == 0) &&
206 (nvlist_exists(nvl
, ZFS_EV_VDEV_GUID
) ||
207 nvlist_exists(nvl
, DEV_IDENTIFIER
))) {
208 nvlist_t
*payload
= event
->ae_nvl
;
211 uint64_t pool_guid
= 0, vdev_guid
= 0;
212 guid_search_t search
= { 0 };
213 device_type_t devtype
= DEVICE_TYPE_PRIMARY
;
214 const char *devid
= NULL
;
216 class = "resource.fs.zfs.removed";
219 (void) nvlist_add_string(payload
, FM_CLASS
, class);
220 (void) nvlist_lookup_string(nvl
, DEV_IDENTIFIER
, &devid
);
221 (void) nvlist_lookup_uint64(nvl
, ZFS_EV_POOL_GUID
, &pool_guid
);
222 (void) nvlist_lookup_uint64(nvl
, ZFS_EV_VDEV_GUID
, &vdev_guid
);
224 (void) gettimeofday(&tv
, NULL
);
227 (void) nvlist_add_int64_array(payload
, FM_EREPORT_TIME
, tod
, 2);
230 * If devid is missing but vdev_guid is available, find devid
231 * and pool_guid from vdev_guid.
232 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
233 * ZFS_EV_POOL_GUID may be missing so find them.
235 if (devid
== NULL
|| pool_guid
== 0 || vdev_guid
== 0) {
237 search
.gs_vdev_guid
= vdev_guid
;
239 search
.gs_devid
= devid
;
240 zpool_iter(g_zfs_hdl
, zfs_agent_iter_pool
, &search
);
242 devid
= search
.gs_devid
;
244 pool_guid
= search
.gs_pool_guid
;
246 vdev_guid
= search
.gs_vdev_guid
;
247 devtype
= search
.gs_vdev_type
;
251 * We want to avoid reporting "remove" events coming from
252 * libudev for VDEVs which were expanded recently (10s) and
253 * avoid activating spares in response to partitions being
254 * deleted and created in rapid succession.
256 if (search
.gs_vdev_expandtime
!= 0 &&
257 search
.gs_vdev_expandtime
+ 10 > tv
.tv_sec
) {
258 zed_log_msg(LOG_INFO
, "agent post event: ignoring '%s' "
259 "for recently expanded device '%s'", EC_DEV_REMOVE
,
261 fnvlist_free(payload
);
266 (void) nvlist_add_uint64(payload
,
267 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID
, pool_guid
);
268 (void) nvlist_add_uint64(payload
,
269 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID
, vdev_guid
);
271 case DEVICE_TYPE_L2ARC
:
272 (void) nvlist_add_string(payload
,
273 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE
,
276 case DEVICE_TYPE_SPARE
:
277 (void) nvlist_add_string(payload
,
278 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE
, VDEV_TYPE_SPARE
);
280 case DEVICE_TYPE_PRIMARY
:
281 (void) nvlist_add_string(payload
,
282 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE
, VDEV_TYPE_DISK
);
286 zed_log_msg(LOG_INFO
, "agent post event: mapping '%s' to '%s'",
287 EC_DEV_REMOVE
, class);
290 (void) strlcpy(event
->ae_class
, class, sizeof (event
->ae_class
));
291 (void) strlcpy(event
->ae_subclass
, subclass
,
292 sizeof (event
->ae_subclass
));
294 (void) pthread_mutex_lock(&agent_lock
);
295 list_insert_tail(&agent_events
, event
);
296 (void) pthread_mutex_unlock(&agent_lock
);
299 (void) pthread_cond_signal(&agent_cond
);
303 zfs_agent_dispatch(const char *class, const char *subclass
, nvlist_t
*nvl
)
306 * The diagnosis engine subscribes to the following events.
307 * On illumos these subscriptions reside in:
308 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
310 if (strstr(class, "ereport.fs.zfs.") != NULL
||
311 strstr(class, "resource.fs.zfs.") != NULL
||
312 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
313 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
314 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
315 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl
, class);
319 * The retire agent subscribes to the following events.
320 * On illumos these subscriptions reside in:
321 * /usr/lib/fm/fmd/plugins/zfs-retire.conf
323 * NOTE: faults events come directly from our diagnosis engine
324 * and will not pass through the zfs kernel module.
326 if (strcmp(class, FM_LIST_SUSPECT_CLASS
) == 0 ||
327 strcmp(class, "resource.fs.zfs.removed") == 0 ||
328 strcmp(class, "resource.fs.zfs.statechange") == 0 ||
329 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
330 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl
, class);
334 * The SLM module only consumes disk events and vdev check events
336 * NOTE: disk events come directly from disk monitor and will
337 * not pass through the zfs kernel module.
339 if (strstr(class, "EC_dev_") != NULL
||
340 strcmp(class, EC_ZFS
) == 0) {
341 (void) zfs_slm_event(class, subclass
, nvl
);
346 * Events are consumed and dispatched from this thread
347 * An agent can also post an event so event list lock
348 * is not held when calling an agent.
349 * One event is consumed at a time.
352 zfs_agent_consumer_thread(void *arg
)
357 agent_event_t
*event
;
359 (void) pthread_mutex_lock(&agent_lock
);
361 /* wait for an event to show up */
362 while (!agent_exiting
&& list_is_empty(&agent_events
))
363 (void) pthread_cond_wait(&agent_cond
, &agent_lock
);
366 (void) pthread_mutex_unlock(&agent_lock
);
367 zed_log_msg(LOG_INFO
, "zfs_agent_consumer_thread: "
372 if ((event
= list_remove_head(&agent_events
)) != NULL
) {
373 (void) pthread_mutex_unlock(&agent_lock
);
375 /* dispatch to all event subscribers */
376 zfs_agent_dispatch(event
->ae_class
, event
->ae_subclass
,
379 nvlist_free(event
->ae_nvl
);
384 (void) pthread_mutex_unlock(&agent_lock
);
391 zfs_agent_init(libzfs_handle_t
*zfs_hdl
)
397 if (zfs_slm_init() != 0)
398 zed_log_die("Failed to initialize zfs slm");
399 zed_log_msg(LOG_INFO
, "Add Agent: init");
401 hdl
= fmd_module_hdl("zfs-diagnosis");
402 _zfs_diagnosis_init(hdl
);
403 if (!fmd_module_initialized(hdl
))
404 zed_log_die("Failed to initialize zfs diagnosis");
406 hdl
= fmd_module_hdl("zfs-retire");
407 _zfs_retire_init(hdl
);
408 if (!fmd_module_initialized(hdl
))
409 zed_log_die("Failed to initialize zfs retire");
411 list_create(&agent_events
, sizeof (agent_event_t
),
412 offsetof(struct agent_event
, ae_node
));
414 if (pthread_create(&g_agents_tid
, NULL
, zfs_agent_consumer_thread
,
416 list_destroy(&agent_events
);
417 zed_log_die("Failed to initialize agents");
419 pthread_setname_np(g_agents_tid
, "agents");
426 agent_event_t
*event
;
429 (void) pthread_cond_signal(&agent_cond
);
431 /* wait for zfs_enum_pools thread to complete */
432 (void) pthread_join(g_agents_tid
, NULL
);
434 /* drain any pending events */
435 while ((event
= list_remove_head(&agent_events
)) != NULL
) {
436 nvlist_free(event
->ae_nvl
);
440 list_destroy(&agent_events
);
442 if ((hdl
= fmd_module_hdl("zfs-retire")) != NULL
) {
443 _zfs_retire_fini(hdl
);
444 fmd_hdl_unregister(hdl
);
446 if ((hdl
= fmd_module_hdl("zfs-diagnosis")) != NULL
) {
447 _zfs_diagnosis_fini(hdl
);
448 fmd_hdl_unregister(hdl
);
451 zed_log_msg(LOG_INFO
, "Add Agent: fini");