4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
26 #include <zfs_comutil.h>
29 * Keeps stats on last N reads per spa_t, disabled by default.
31 static uint_t zfs_read_history
= B_FALSE
;
34 * Include cache hits in history, disabled by default.
36 static int zfs_read_history_hits
= B_FALSE
;
39 * Keeps stats on the last 100 txgs by default.
41 static uint_t zfs_txg_history
= 100;
44 * Keeps stats on the last N MMP updates, disabled by default.
46 static uint_t zfs_multihost_history
= B_FALSE
;
49 * ==========================================================================
50 * SPA Read History Routines
51 * ==========================================================================
55 * Read statistics - Information exported regarding each arc_read call
57 typedef struct spa_read_history
{
58 hrtime_t start
; /* time read completed */
59 uint64_t objset
; /* read from this objset */
60 uint64_t object
; /* read of this object number */
61 uint64_t level
; /* block's indirection level */
62 uint64_t blkid
; /* read of this block id */
63 char origin
[24]; /* read originated from here */
64 uint32_t aflags
; /* ARC flags (cached, prefetch, etc.) */
65 pid_t pid
; /* PID of task doing read */
66 char comm
[16]; /* process name of task doing read */
67 procfs_list_node_t srh_node
;
71 spa_read_history_show_header(struct seq_file
*f
)
73 seq_printf(f
, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
74 "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
75 "level", "blkid", "aflags", "origin", "pid", "process");
81 spa_read_history_show(struct seq_file
*f
, void *data
)
83 spa_read_history_t
*srh
= (spa_read_history_t
*)data
;
85 seq_printf(f
, "%-8llu %-16llu 0x%-6llx "
86 "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
87 (u_longlong_t
)srh
->srh_node
.pln_id
, srh
->start
,
88 (longlong_t
)srh
->objset
, (longlong_t
)srh
->object
,
89 (longlong_t
)srh
->level
, (longlong_t
)srh
->blkid
,
90 srh
->aflags
, srh
->origin
, srh
->pid
, srh
->comm
);
95 /* Remove oldest elements from list until there are no more than 'size' left */
97 spa_read_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
99 spa_read_history_t
*srh
;
100 while (shl
->size
> size
) {
101 srh
= list_remove_head(&shl
->procfs_list
.pl_list
);
102 ASSERT3P(srh
, !=, NULL
);
103 kmem_free(srh
, sizeof (spa_read_history_t
));
108 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
112 spa_read_history_clear(procfs_list_t
*procfs_list
)
114 spa_history_list_t
*shl
= procfs_list
->pl_private
;
115 mutex_enter(&procfs_list
->pl_lock
);
116 spa_read_history_truncate(shl
, 0);
117 mutex_exit(&procfs_list
->pl_lock
);
122 spa_read_history_init(spa_t
*spa
)
124 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
127 shl
->procfs_list
.pl_private
= shl
;
128 procfs_list_install("zfs",
133 spa_read_history_show
,
134 spa_read_history_show_header
,
135 spa_read_history_clear
,
136 offsetof(spa_read_history_t
, srh_node
));
140 spa_read_history_destroy(spa_t
*spa
)
142 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
143 procfs_list_uninstall(&shl
->procfs_list
);
144 spa_read_history_truncate(shl
, 0);
145 procfs_list_destroy(&shl
->procfs_list
);
149 spa_read_history_add(spa_t
*spa
, const zbookmark_phys_t
*zb
, uint32_t aflags
)
151 spa_history_list_t
*shl
= &spa
->spa_stats
.read_history
;
152 spa_read_history_t
*srh
;
154 ASSERT3P(spa
, !=, NULL
);
155 ASSERT3P(zb
, !=, NULL
);
157 if (zfs_read_history
== 0 && shl
->size
== 0)
160 if (zfs_read_history_hits
== 0 && (aflags
& ARC_FLAG_CACHED
))
163 srh
= kmem_zalloc(sizeof (spa_read_history_t
), KM_SLEEP
);
164 strlcpy(srh
->comm
, getcomm(), sizeof (srh
->comm
));
165 srh
->start
= gethrtime();
166 srh
->objset
= zb
->zb_objset
;
167 srh
->object
= zb
->zb_object
;
168 srh
->level
= zb
->zb_level
;
169 srh
->blkid
= zb
->zb_blkid
;
170 srh
->aflags
= aflags
;
173 mutex_enter(&shl
->procfs_list
.pl_lock
);
175 procfs_list_add(&shl
->procfs_list
, srh
);
178 spa_read_history_truncate(shl
, zfs_read_history
);
180 mutex_exit(&shl
->procfs_list
.pl_lock
);
184 * ==========================================================================
185 * SPA TXG History Routines
186 * ==========================================================================
190 * Txg statistics - Information exported regarding each txg sync
193 typedef struct spa_txg_history
{
194 uint64_t txg
; /* txg id */
195 txg_state_t state
; /* active txg state */
196 uint64_t nread
; /* number of bytes read */
197 uint64_t nwritten
; /* number of bytes written */
198 uint64_t reads
; /* number of read operations */
199 uint64_t writes
; /* number of write operations */
200 uint64_t ndirty
; /* number of dirty bytes */
201 hrtime_t times
[TXG_STATE_COMMITTED
]; /* completion times */
202 procfs_list_node_t sth_node
;
206 spa_txg_history_show_header(struct seq_file
*f
)
208 seq_printf(f
, "%-8s %-16s %-5s %-12s %-12s %-12s "
209 "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
210 "ndirty", "nread", "nwritten", "reads", "writes",
211 "otime", "qtime", "wtime", "stime");
216 spa_txg_history_show(struct seq_file
*f
, void *data
)
218 spa_txg_history_t
*sth
= (spa_txg_history_t
*)data
;
219 uint64_t open
= 0, quiesce
= 0, wait
= 0, sync
= 0;
222 switch (sth
->state
) {
223 case TXG_STATE_BIRTH
: state
= 'B'; break;
224 case TXG_STATE_OPEN
: state
= 'O'; break;
225 case TXG_STATE_QUIESCED
: state
= 'Q'; break;
226 case TXG_STATE_WAIT_FOR_SYNC
: state
= 'W'; break;
227 case TXG_STATE_SYNCED
: state
= 'S'; break;
228 case TXG_STATE_COMMITTED
: state
= 'C'; break;
229 default: state
= '?'; break;
232 if (sth
->times
[TXG_STATE_OPEN
])
233 open
= sth
->times
[TXG_STATE_OPEN
] -
234 sth
->times
[TXG_STATE_BIRTH
];
236 if (sth
->times
[TXG_STATE_QUIESCED
])
237 quiesce
= sth
->times
[TXG_STATE_QUIESCED
] -
238 sth
->times
[TXG_STATE_OPEN
];
240 if (sth
->times
[TXG_STATE_WAIT_FOR_SYNC
])
241 wait
= sth
->times
[TXG_STATE_WAIT_FOR_SYNC
] -
242 sth
->times
[TXG_STATE_QUIESCED
];
244 if (sth
->times
[TXG_STATE_SYNCED
])
245 sync
= sth
->times
[TXG_STATE_SYNCED
] -
246 sth
->times
[TXG_STATE_WAIT_FOR_SYNC
];
248 seq_printf(f
, "%-8llu %-16llu %-5c %-12llu "
249 "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
250 (longlong_t
)sth
->txg
, sth
->times
[TXG_STATE_BIRTH
], state
,
251 (u_longlong_t
)sth
->ndirty
,
252 (u_longlong_t
)sth
->nread
, (u_longlong_t
)sth
->nwritten
,
253 (u_longlong_t
)sth
->reads
, (u_longlong_t
)sth
->writes
,
254 (u_longlong_t
)open
, (u_longlong_t
)quiesce
, (u_longlong_t
)wait
,
260 /* Remove oldest elements from list until there are no more than 'size' left */
262 spa_txg_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
264 spa_txg_history_t
*sth
;
265 while (shl
->size
> size
) {
266 sth
= list_remove_head(&shl
->procfs_list
.pl_list
);
267 ASSERT3P(sth
, !=, NULL
);
268 kmem_free(sth
, sizeof (spa_txg_history_t
));
273 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
278 spa_txg_history_clear(procfs_list_t
*procfs_list
)
280 spa_history_list_t
*shl
= procfs_list
->pl_private
;
281 mutex_enter(&procfs_list
->pl_lock
);
282 spa_txg_history_truncate(shl
, 0);
283 mutex_exit(&procfs_list
->pl_lock
);
288 spa_txg_history_init(spa_t
*spa
)
290 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
293 shl
->procfs_list
.pl_private
= shl
;
294 procfs_list_install("zfs",
299 spa_txg_history_show
,
300 spa_txg_history_show_header
,
301 spa_txg_history_clear
,
302 offsetof(spa_txg_history_t
, sth_node
));
306 spa_txg_history_destroy(spa_t
*spa
)
308 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
309 procfs_list_uninstall(&shl
->procfs_list
);
310 spa_txg_history_truncate(shl
, 0);
311 procfs_list_destroy(&shl
->procfs_list
);
315 * Add a new txg to historical record.
318 spa_txg_history_add(spa_t
*spa
, uint64_t txg
, hrtime_t birth_time
)
320 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
321 spa_txg_history_t
*sth
;
323 if (zfs_txg_history
== 0 && shl
->size
== 0)
326 sth
= kmem_zalloc(sizeof (spa_txg_history_t
), KM_SLEEP
);
328 sth
->state
= TXG_STATE_OPEN
;
329 sth
->times
[TXG_STATE_BIRTH
] = birth_time
;
331 mutex_enter(&shl
->procfs_list
.pl_lock
);
332 procfs_list_add(&shl
->procfs_list
, sth
);
334 spa_txg_history_truncate(shl
, zfs_txg_history
);
335 mutex_exit(&shl
->procfs_list
.pl_lock
);
339 * Set txg state completion time and increment current state.
342 spa_txg_history_set(spa_t
*spa
, uint64_t txg
, txg_state_t completed_state
,
343 hrtime_t completed_time
)
345 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
346 spa_txg_history_t
*sth
;
349 if (zfs_txg_history
== 0)
352 mutex_enter(&shl
->procfs_list
.pl_lock
);
353 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
354 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
355 if (sth
->txg
== txg
) {
356 sth
->times
[completed_state
] = completed_time
;
362 mutex_exit(&shl
->procfs_list
.pl_lock
);
371 spa_txg_history_set_io(spa_t
*spa
, uint64_t txg
, uint64_t nread
,
372 uint64_t nwritten
, uint64_t reads
, uint64_t writes
, uint64_t ndirty
)
374 spa_history_list_t
*shl
= &spa
->spa_stats
.txg_history
;
375 spa_txg_history_t
*sth
;
378 if (zfs_txg_history
== 0)
381 mutex_enter(&shl
->procfs_list
.pl_lock
);
382 for (sth
= list_tail(&shl
->procfs_list
.pl_list
); sth
!= NULL
;
383 sth
= list_prev(&shl
->procfs_list
.pl_list
, sth
)) {
384 if (sth
->txg
== txg
) {
386 sth
->nwritten
= nwritten
;
388 sth
->writes
= writes
;
389 sth
->ndirty
= ndirty
;
394 mutex_exit(&shl
->procfs_list
.pl_lock
);
400 spa_txg_history_init_io(spa_t
*spa
, uint64_t txg
, dsl_pool_t
*dp
)
404 if (zfs_txg_history
== 0)
407 ts
= kmem_alloc(sizeof (txg_stat_t
), KM_SLEEP
);
409 spa_config_enter(spa
, SCL_CONFIG
, FTAG
, RW_READER
);
410 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs1
);
411 spa_config_exit(spa
, SCL_CONFIG
, FTAG
);
414 ts
->ndirty
= dp
->dp_dirty_pertxg
[txg
& TXG_MASK
];
416 spa_txg_history_set(spa
, txg
, TXG_STATE_WAIT_FOR_SYNC
, gethrtime());
422 spa_txg_history_fini_io(spa_t
*spa
, txg_stat_t
*ts
)
427 if (zfs_txg_history
== 0) {
428 kmem_free(ts
, sizeof (txg_stat_t
));
432 spa_config_enter(spa
, SCL_CONFIG
, FTAG
, RW_READER
);
433 vdev_get_stats(spa
->spa_root_vdev
, &ts
->vs2
);
434 spa_config_exit(spa
, SCL_CONFIG
, FTAG
);
436 spa_txg_history_set(spa
, ts
->txg
, TXG_STATE_SYNCED
, gethrtime());
437 spa_txg_history_set_io(spa
, ts
->txg
,
438 ts
->vs2
.vs_bytes
[ZIO_TYPE_READ
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_READ
],
439 ts
->vs2
.vs_bytes
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_bytes
[ZIO_TYPE_WRITE
],
440 ts
->vs2
.vs_ops
[ZIO_TYPE_READ
] - ts
->vs1
.vs_ops
[ZIO_TYPE_READ
],
441 ts
->vs2
.vs_ops
[ZIO_TYPE_WRITE
] - ts
->vs1
.vs_ops
[ZIO_TYPE_WRITE
],
444 kmem_free(ts
, sizeof (txg_stat_t
));
448 * ==========================================================================
449 * SPA TX Assign Histogram Routines
450 * ==========================================================================
454 * Tx statistics - Information exported regarding dmu_tx_assign time.
458 * When the kstat is written zero all buckets. When the kstat is read
459 * count the number of trailing buckets set to zero and update ks_ndata
460 * such that they are not output.
463 spa_tx_assign_update(kstat_t
*ksp
, int rw
)
465 spa_t
*spa
= ksp
->ks_private
;
466 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
469 if (rw
== KSTAT_WRITE
) {
470 for (i
= 0; i
< shk
->count
; i
++)
471 ((kstat_named_t
*)shk
->priv
)[i
].value
.ui64
= 0;
474 for (i
= shk
->count
; i
> 0; i
--)
475 if (((kstat_named_t
*)shk
->priv
)[i
-1].value
.ui64
!= 0)
479 ksp
->ks_data_size
= i
* sizeof (kstat_named_t
);
485 spa_tx_assign_init(spa_t
*spa
)
487 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
493 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
495 shk
->count
= 42; /* power of two buckets for 1ns to 2,199s */
496 shk
->size
= shk
->count
* sizeof (kstat_named_t
);
497 shk
->priv
= kmem_alloc(shk
->size
, KM_SLEEP
);
499 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
501 for (i
= 0; i
< shk
->count
; i
++) {
502 ks
= &((kstat_named_t
*)shk
->priv
)[i
];
503 ks
->data_type
= KSTAT_DATA_UINT64
;
505 (void) snprintf(ks
->name
, KSTAT_STRLEN
, "%llu ns",
506 (u_longlong_t
)1 << i
);
509 ksp
= kstat_create(name
, 0, "dmu_tx_assign", "misc",
510 KSTAT_TYPE_NAMED
, 0, KSTAT_FLAG_VIRTUAL
);
514 ksp
->ks_lock
= &shk
->lock
;
515 ksp
->ks_data
= shk
->priv
;
516 ksp
->ks_ndata
= shk
->count
;
517 ksp
->ks_data_size
= shk
->size
;
518 ksp
->ks_private
= spa
;
519 ksp
->ks_update
= spa_tx_assign_update
;
526 spa_tx_assign_destroy(spa_t
*spa
)
528 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
535 kmem_free(shk
->priv
, shk
->size
);
536 mutex_destroy(&shk
->lock
);
540 spa_tx_assign_add_nsecs(spa_t
*spa
, uint64_t nsecs
)
542 spa_history_kstat_t
*shk
= &spa
->spa_stats
.tx_assign_histogram
;
545 while (((1ULL << idx
) < nsecs
) && (idx
< shk
->size
- 1))
548 atomic_inc_64(&((kstat_named_t
*)shk
->priv
)[idx
].value
.ui64
);
552 * ==========================================================================
553 * SPA MMP History Routines
554 * ==========================================================================
558 * MMP statistics - Information exported regarding attempted MMP writes
559 * For MMP writes issued, fields used as per comments below.
560 * For MMP writes skipped, an entry represents a span of time when
561 * writes were skipped for same reason (error from mmp_random_leaf).
563 * timestamp time first write skipped, if >1 skipped in a row
564 * mmp_delay delay value at timestamp
565 * vdev_guid number of writes skipped
566 * io_error one of enum mmp_error
567 * duration time span (ns) of skipped writes
570 typedef struct spa_mmp_history
{
571 uint64_t mmp_node_id
; /* unique # for updates */
572 uint64_t txg
; /* txg of last sync */
573 uint64_t timestamp
; /* UTC time MMP write issued */
574 uint64_t mmp_delay
; /* mmp_thread.mmp_delay at timestamp */
575 uint64_t vdev_guid
; /* unique ID of leaf vdev */
577 int vdev_label
; /* vdev label */
578 int io_error
; /* error status of MMP write */
579 hrtime_t error_start
; /* hrtime of start of error period */
580 hrtime_t duration
; /* time from submission to completion */
581 procfs_list_node_t smh_node
;
585 spa_mmp_history_show_header(struct seq_file
*f
)
587 seq_printf(f
, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
588 "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
589 "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
594 spa_mmp_history_show(struct seq_file
*f
, void *data
)
596 spa_mmp_history_t
*smh
= (spa_mmp_history_t
*)data
;
597 char skip_fmt
[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
599 char write_fmt
[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
602 seq_printf(f
, (smh
->error_start
? skip_fmt
: write_fmt
),
603 (u_longlong_t
)smh
->mmp_node_id
, (u_longlong_t
)smh
->txg
,
604 (u_longlong_t
)smh
->timestamp
, (longlong_t
)smh
->io_error
,
605 (longlong_t
)smh
->duration
, (u_longlong_t
)smh
->mmp_delay
,
606 (u_longlong_t
)smh
->vdev_guid
, (u_longlong_t
)smh
->vdev_label
,
607 (smh
->vdev_path
? smh
->vdev_path
: "-"));
612 /* Remove oldest elements from list until there are no more than 'size' left */
614 spa_mmp_history_truncate(spa_history_list_t
*shl
, unsigned int size
)
616 spa_mmp_history_t
*smh
;
617 while (shl
->size
> size
) {
618 smh
= list_remove_head(&shl
->procfs_list
.pl_list
);
620 kmem_strfree(smh
->vdev_path
);
621 kmem_free(smh
, sizeof (spa_mmp_history_t
));
626 ASSERT(list_is_empty(&shl
->procfs_list
.pl_list
));
631 spa_mmp_history_clear(procfs_list_t
*procfs_list
)
633 spa_history_list_t
*shl
= procfs_list
->pl_private
;
634 mutex_enter(&procfs_list
->pl_lock
);
635 spa_mmp_history_truncate(shl
, 0);
636 mutex_exit(&procfs_list
->pl_lock
);
641 spa_mmp_history_init(spa_t
*spa
)
643 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
647 shl
->procfs_list
.pl_private
= shl
;
648 procfs_list_install("zfs",
653 spa_mmp_history_show
,
654 spa_mmp_history_show_header
,
655 spa_mmp_history_clear
,
656 offsetof(spa_mmp_history_t
, smh_node
));
660 spa_mmp_history_destroy(spa_t
*spa
)
662 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
663 procfs_list_uninstall(&shl
->procfs_list
);
664 spa_mmp_history_truncate(shl
, 0);
665 procfs_list_destroy(&shl
->procfs_list
);
669 * Set duration in existing "skip" record to how long we have waited for a leaf
670 * vdev to become available.
672 * Important that we start search at the tail of the list where new
673 * records are inserted, so this is normally an O(1) operation.
676 spa_mmp_history_set_skip(spa_t
*spa
, uint64_t mmp_node_id
)
678 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
679 spa_mmp_history_t
*smh
;
682 if (zfs_multihost_history
== 0 && shl
->size
== 0)
685 mutex_enter(&shl
->procfs_list
.pl_lock
);
686 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
687 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
688 if (smh
->mmp_node_id
== mmp_node_id
) {
689 ASSERT3U(smh
->io_error
, !=, 0);
690 smh
->duration
= gethrtime() - smh
->error_start
;
696 mutex_exit(&shl
->procfs_list
.pl_lock
);
702 * Set MMP write duration and error status in existing record.
703 * See comment re: search order above spa_mmp_history_set_skip().
706 spa_mmp_history_set(spa_t
*spa
, uint64_t mmp_node_id
, int io_error
,
709 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
710 spa_mmp_history_t
*smh
;
713 if (zfs_multihost_history
== 0 && shl
->size
== 0)
716 mutex_enter(&shl
->procfs_list
.pl_lock
);
717 for (smh
= list_tail(&shl
->procfs_list
.pl_list
); smh
!= NULL
;
718 smh
= list_prev(&shl
->procfs_list
.pl_list
, smh
)) {
719 if (smh
->mmp_node_id
== mmp_node_id
) {
720 ASSERT(smh
->io_error
== 0);
721 smh
->io_error
= io_error
;
722 smh
->duration
= duration
;
727 mutex_exit(&shl
->procfs_list
.pl_lock
);
733 * Add a new MMP historical record.
734 * error == 0 : a write was issued.
735 * error != 0 : a write was not issued because no leaves were found.
738 spa_mmp_history_add(spa_t
*spa
, uint64_t txg
, uint64_t timestamp
,
739 uint64_t mmp_delay
, vdev_t
*vd
, int label
, uint64_t mmp_node_id
,
742 spa_history_list_t
*shl
= &spa
->spa_stats
.mmp_history
;
743 spa_mmp_history_t
*smh
;
745 if (zfs_multihost_history
== 0 && shl
->size
== 0)
748 smh
= kmem_zalloc(sizeof (spa_mmp_history_t
), KM_SLEEP
);
750 smh
->timestamp
= timestamp
;
751 smh
->mmp_delay
= mmp_delay
;
753 smh
->vdev_guid
= vd
->vdev_guid
;
755 smh
->vdev_path
= kmem_strdup(vd
->vdev_path
);
757 smh
->vdev_label
= label
;
758 smh
->mmp_node_id
= mmp_node_id
;
761 smh
->io_error
= error
;
762 smh
->error_start
= gethrtime();
766 mutex_enter(&shl
->procfs_list
.pl_lock
);
767 procfs_list_add(&shl
->procfs_list
, smh
);
769 spa_mmp_history_truncate(shl
, zfs_multihost_history
);
770 mutex_exit(&shl
->procfs_list
.pl_lock
);
774 spa_state_addr(kstat_t
*ksp
, loff_t n
)
777 return (ksp
->ks_private
); /* return the spa_t */
782 spa_state_data(char *buf
, size_t size
, void *data
)
784 spa_t
*spa
= (spa_t
*)data
;
785 (void) snprintf(buf
, size
, "%s\n", spa_state_to_name(spa
));
790 * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
792 * This is a lock-less read of the pool's state (unlike using 'zpool', which
793 * can potentially block for seconds). Because it doesn't block, it can useful
794 * as a pool heartbeat value.
797 spa_state_init(spa_t
*spa
)
799 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
803 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
805 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
806 ksp
= kstat_create(name
, 0, "state", "misc",
807 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
811 ksp
->ks_lock
= &shk
->lock
;
813 ksp
->ks_private
= spa
;
814 ksp
->ks_flags
|= KSTAT_FLAG_NO_HEADERS
;
815 kstat_set_raw_ops(ksp
, NULL
, spa_state_data
, spa_state_addr
);
823 spa_guid_data(char *buf
, size_t size
, void *data
)
825 spa_t
*spa
= (spa_t
*)data
;
826 (void) snprintf(buf
, size
, "%llu\n", (u_longlong_t
)spa_guid(spa
));
831 spa_guid_init(spa_t
*spa
)
833 spa_history_kstat_t
*shk
= &spa
->spa_stats
.guid
;
837 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
839 name
= kmem_asprintf("zfs/%s", spa_name(spa
));
841 ksp
= kstat_create(name
, 0, "guid", "misc",
842 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VIRTUAL
);
846 ksp
->ks_lock
= &shk
->lock
;
848 ksp
->ks_private
= spa
;
849 ksp
->ks_flags
|= KSTAT_FLAG_NO_HEADERS
;
850 kstat_set_raw_ops(ksp
, NULL
, spa_guid_data
, spa_state_addr
);
858 spa_health_destroy(spa_t
*spa
)
860 spa_history_kstat_t
*shk
= &spa
->spa_stats
.state
;
861 kstat_t
*ksp
= shk
->kstat
;
865 mutex_destroy(&shk
->lock
);
869 spa_guid_destroy(spa_t
*spa
)
871 spa_history_kstat_t
*shk
= &spa
->spa_stats
.guid
;
872 kstat_t
*ksp
= shk
->kstat
;
876 mutex_destroy(&shk
->lock
);
879 static const spa_iostats_t spa_iostats_template
= {
880 { "trim_extents_written", KSTAT_DATA_UINT64
},
881 { "trim_bytes_written", KSTAT_DATA_UINT64
},
882 { "trim_extents_skipped", KSTAT_DATA_UINT64
},
883 { "trim_bytes_skipped", KSTAT_DATA_UINT64
},
884 { "trim_extents_failed", KSTAT_DATA_UINT64
},
885 { "trim_bytes_failed", KSTAT_DATA_UINT64
},
886 { "autotrim_extents_written", KSTAT_DATA_UINT64
},
887 { "autotrim_bytes_written", KSTAT_DATA_UINT64
},
888 { "autotrim_extents_skipped", KSTAT_DATA_UINT64
},
889 { "autotrim_bytes_skipped", KSTAT_DATA_UINT64
},
890 { "autotrim_extents_failed", KSTAT_DATA_UINT64
},
891 { "autotrim_bytes_failed", KSTAT_DATA_UINT64
},
892 { "simple_trim_extents_written", KSTAT_DATA_UINT64
},
893 { "simple_trim_bytes_written", KSTAT_DATA_UINT64
},
894 { "simple_trim_extents_skipped", KSTAT_DATA_UINT64
},
895 { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64
},
896 { "simple_trim_extents_failed", KSTAT_DATA_UINT64
},
897 { "simple_trim_bytes_failed", KSTAT_DATA_UINT64
},
898 { "arc_read_count", KSTAT_DATA_UINT64
},
899 { "arc_read_bytes", KSTAT_DATA_UINT64
},
900 { "arc_write_count", KSTAT_DATA_UINT64
},
901 { "arc_write_bytes", KSTAT_DATA_UINT64
},
902 { "direct_read_count", KSTAT_DATA_UINT64
},
903 { "direct_read_bytes", KSTAT_DATA_UINT64
},
904 { "direct_write_count", KSTAT_DATA_UINT64
},
905 { "direct_write_bytes", KSTAT_DATA_UINT64
},
908 #define SPA_IOSTATS_ADD(stat, val) \
909 atomic_add_64(&iostats->stat.value.ui64, (val));
912 spa_iostats_trim_add(spa_t
*spa
, trim_type_t type
,
913 uint64_t extents_written
, uint64_t bytes_written
,
914 uint64_t extents_skipped
, uint64_t bytes_skipped
,
915 uint64_t extents_failed
, uint64_t bytes_failed
)
917 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
918 kstat_t
*ksp
= shk
->kstat
;
919 spa_iostats_t
*iostats
;
924 iostats
= ksp
->ks_data
;
925 if (type
== TRIM_TYPE_MANUAL
) {
926 SPA_IOSTATS_ADD(trim_extents_written
, extents_written
);
927 SPA_IOSTATS_ADD(trim_bytes_written
, bytes_written
);
928 SPA_IOSTATS_ADD(trim_extents_skipped
, extents_skipped
);
929 SPA_IOSTATS_ADD(trim_bytes_skipped
, bytes_skipped
);
930 SPA_IOSTATS_ADD(trim_extents_failed
, extents_failed
);
931 SPA_IOSTATS_ADD(trim_bytes_failed
, bytes_failed
);
932 } else if (type
== TRIM_TYPE_AUTO
) {
933 SPA_IOSTATS_ADD(autotrim_extents_written
, extents_written
);
934 SPA_IOSTATS_ADD(autotrim_bytes_written
, bytes_written
);
935 SPA_IOSTATS_ADD(autotrim_extents_skipped
, extents_skipped
);
936 SPA_IOSTATS_ADD(autotrim_bytes_skipped
, bytes_skipped
);
937 SPA_IOSTATS_ADD(autotrim_extents_failed
, extents_failed
);
938 SPA_IOSTATS_ADD(autotrim_bytes_failed
, bytes_failed
);
940 SPA_IOSTATS_ADD(simple_trim_extents_written
, extents_written
);
941 SPA_IOSTATS_ADD(simple_trim_bytes_written
, bytes_written
);
942 SPA_IOSTATS_ADD(simple_trim_extents_skipped
, extents_skipped
);
943 SPA_IOSTATS_ADD(simple_trim_bytes_skipped
, bytes_skipped
);
944 SPA_IOSTATS_ADD(simple_trim_extents_failed
, extents_failed
);
945 SPA_IOSTATS_ADD(simple_trim_bytes_failed
, bytes_failed
);
950 spa_iostats_read_add(spa_t
*spa
, uint64_t size
, uint64_t iops
, uint32_t flags
)
952 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
953 kstat_t
*ksp
= shk
->kstat
;
958 spa_iostats_t
*iostats
= ksp
->ks_data
;
959 if (flags
& DMU_DIRECTIO
) {
960 SPA_IOSTATS_ADD(direct_read_count
, iops
);
961 SPA_IOSTATS_ADD(direct_read_bytes
, size
);
963 SPA_IOSTATS_ADD(arc_read_count
, iops
);
964 SPA_IOSTATS_ADD(arc_read_bytes
, size
);
969 spa_iostats_write_add(spa_t
*spa
, uint64_t size
, uint64_t iops
, uint32_t flags
)
971 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
972 kstat_t
*ksp
= shk
->kstat
;
977 spa_iostats_t
*iostats
= ksp
->ks_data
;
978 if (flags
& DMU_DIRECTIO
) {
979 SPA_IOSTATS_ADD(direct_write_count
, iops
);
980 SPA_IOSTATS_ADD(direct_write_bytes
, size
);
982 SPA_IOSTATS_ADD(arc_write_count
, iops
);
983 SPA_IOSTATS_ADD(arc_write_bytes
, size
);
988 spa_iostats_update(kstat_t
*ksp
, int rw
)
990 if (rw
== KSTAT_WRITE
) {
991 memcpy(ksp
->ks_data
, &spa_iostats_template
,
992 sizeof (spa_iostats_t
));
999 spa_iostats_init(spa_t
*spa
)
1001 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
1003 mutex_init(&shk
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1005 char *name
= kmem_asprintf("zfs/%s", spa_name(spa
));
1006 kstat_t
*ksp
= kstat_create(name
, 0, "iostats", "misc",
1007 KSTAT_TYPE_NAMED
, sizeof (spa_iostats_t
) / sizeof (kstat_named_t
),
1008 KSTAT_FLAG_VIRTUAL
);
1012 int size
= sizeof (spa_iostats_t
);
1013 ksp
->ks_lock
= &shk
->lock
;
1014 ksp
->ks_private
= spa
;
1015 ksp
->ks_update
= spa_iostats_update
;
1016 ksp
->ks_data
= kmem_alloc(size
, KM_SLEEP
);
1017 memcpy(ksp
->ks_data
, &spa_iostats_template
, size
);
1025 spa_iostats_destroy(spa_t
*spa
)
1027 spa_history_kstat_t
*shk
= &spa
->spa_stats
.iostats
;
1028 kstat_t
*ksp
= shk
->kstat
;
1030 kmem_free(ksp
->ks_data
, sizeof (spa_iostats_t
));
1034 mutex_destroy(&shk
->lock
);
1038 spa_stats_init(spa_t
*spa
)
1040 spa_read_history_init(spa
);
1041 spa_txg_history_init(spa
);
1042 spa_tx_assign_init(spa
);
1043 spa_mmp_history_init(spa
);
1044 spa_state_init(spa
);
1046 spa_iostats_init(spa
);
1050 spa_stats_destroy(spa_t
*spa
)
1052 spa_iostats_destroy(spa
);
1053 spa_health_destroy(spa
);
1054 spa_tx_assign_destroy(spa
);
1055 spa_txg_history_destroy(spa
);
1056 spa_read_history_destroy(spa
);
1057 spa_mmp_history_destroy(spa
);
1058 spa_guid_destroy(spa
);
1061 ZFS_MODULE_PARAM(zfs
, zfs_
, read_history
, UINT
, ZMOD_RW
,
1062 "Historical statistics for the last N reads");
1064 ZFS_MODULE_PARAM(zfs
, zfs_
, read_history_hits
, INT
, ZMOD_RW
,
1065 "Include cache hits in read history");
1067 ZFS_MODULE_PARAM(zfs_txg
, zfs_txg_
, history
, UINT
, ZMOD_RW
,
1068 "Historical statistics for the last N txgs");
1070 ZFS_MODULE_PARAM(zfs_multihost
, zfs_multihost_
, history
, UINT
, ZMOD_RW
,
1071 "Historical statistics for last N multihost writes");