4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 #include <sys/crypto/icp.h>
37 #include <sys/processor.h>
38 #include <sys/rrwlock.h>
41 #include <sys/systeminfo.h>
43 #include <sys/utsname.h>
44 #include <sys/zfs_context.h>
45 #include <sys/zfs_onexit.h>
46 #include <sys/zfs_vfsops.h>
47 #include <sys/zstd/zstd.h>
49 #include <zfs_fletcher.h>
53 * Emulation of kernel services in userland.
58 struct utsname hw_utsname
;
60 /* If set, all blocks read will be copied to the specified directory. */
61 char *vn_dumpdir
= NULL
;
63 /* this only exists to have its address taken */
67 * =========================================================================
69 * =========================================================================
71 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While
72 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
73 * the expected stack depth while small enough to avoid exhausting address
74 * space with high thread counts.
76 #define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)
77 #define TS_STACK_MAX (256 * 1024)
79 struct zk_thread_wrapper
{
85 zk_thread_wrapper(void *arg
)
87 struct zk_thread_wrapper ztw
;
88 memcpy(&ztw
, arg
, sizeof (ztw
));
95 zk_thread_create(void (*func
)(void *), void *arg
, size_t stksize
, int state
)
100 struct zk_thread_wrapper
*ztw
;
101 int detachstate
= PTHREAD_CREATE_DETACHED
;
103 VERIFY0(pthread_attr_init(&attr
));
105 if (state
& TS_JOINABLE
)
106 detachstate
= PTHREAD_CREATE_JOINABLE
;
108 VERIFY0(pthread_attr_setdetachstate(&attr
, detachstate
));
111 * We allow the default stack size in user space to be specified by
112 * setting the ZFS_STACK_SIZE environment variable. This allows us
113 * the convenience of observing and debugging stack overruns in
114 * user space. Explicitly specified stack sizes will be honored.
115 * The usage of ZFS_STACK_SIZE is discussed further in the
116 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
119 stkstr
= getenv("ZFS_STACK_SIZE");
122 stksize
= TS_STACK_MAX
;
124 stksize
= MAX(atoi(stkstr
), TS_STACK_MIN
);
127 VERIFY3S(stksize
, >, 0);
128 stksize
= P2ROUNDUP(MAX(stksize
, TS_STACK_MIN
), PAGESIZE
);
131 * If this ever fails, it may be because the stack size is not a
132 * multiple of system page size.
134 VERIFY0(pthread_attr_setstacksize(&attr
, stksize
));
135 VERIFY0(pthread_attr_setguardsize(&attr
, PAGESIZE
));
137 VERIFY(ztw
= malloc(sizeof (*ztw
)));
140 VERIFY0(pthread_create(&tid
, &attr
, zk_thread_wrapper
, ztw
));
141 VERIFY0(pthread_attr_destroy(&attr
));
143 return ((void *)(uintptr_t)tid
);
147 * =========================================================================
149 * =========================================================================
152 kstat_create(const char *module
, int instance
, const char *name
,
153 const char *class, uchar_t type
, ulong_t ndata
, uchar_t ks_flag
)
155 (void) module
, (void) instance
, (void) name
, (void) class, (void) type
,
156 (void) ndata
, (void) ks_flag
;
161 kstat_install(kstat_t
*ksp
)
167 kstat_delete(kstat_t
*ksp
)
173 kstat_set_raw_ops(kstat_t
*ksp
,
174 int (*headers
)(char *buf
, size_t size
),
175 int (*data
)(char *buf
, size_t size
, void *data
),
176 void *(*addr
)(kstat_t
*ksp
, loff_t index
))
178 (void) ksp
, (void) headers
, (void) data
, (void) addr
;
182 * =========================================================================
184 * =========================================================================
188 mutex_init(kmutex_t
*mp
, char *name
, int type
, void *cookie
)
190 (void) name
, (void) type
, (void) cookie
;
191 VERIFY0(pthread_mutex_init(&mp
->m_lock
, NULL
));
192 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
196 mutex_destroy(kmutex_t
*mp
)
198 VERIFY0(pthread_mutex_destroy(&mp
->m_lock
));
202 mutex_enter(kmutex_t
*mp
)
204 VERIFY0(pthread_mutex_lock(&mp
->m_lock
));
205 mp
->m_owner
= pthread_self();
209 mutex_tryenter(kmutex_t
*mp
)
211 int error
= pthread_mutex_trylock(&mp
->m_lock
);
213 mp
->m_owner
= pthread_self();
216 VERIFY3S(error
, ==, EBUSY
);
222 mutex_exit(kmutex_t
*mp
)
224 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
225 VERIFY0(pthread_mutex_unlock(&mp
->m_lock
));
229 * =========================================================================
231 * =========================================================================
235 rw_init(krwlock_t
*rwlp
, char *name
, int type
, void *arg
)
237 (void) name
, (void) type
, (void) arg
;
238 VERIFY0(pthread_rwlock_init(&rwlp
->rw_lock
, NULL
));
239 rwlp
->rw_readers
= 0;
244 rw_destroy(krwlock_t
*rwlp
)
246 VERIFY0(pthread_rwlock_destroy(&rwlp
->rw_lock
));
250 rw_enter(krwlock_t
*rwlp
, krw_t rw
)
252 if (rw
== RW_READER
) {
253 VERIFY0(pthread_rwlock_rdlock(&rwlp
->rw_lock
));
254 atomic_inc_uint(&rwlp
->rw_readers
);
256 VERIFY0(pthread_rwlock_wrlock(&rwlp
->rw_lock
));
257 rwlp
->rw_owner
= pthread_self();
262 rw_exit(krwlock_t
*rwlp
)
264 if (RW_READ_HELD(rwlp
))
265 atomic_dec_uint(&rwlp
->rw_readers
);
269 VERIFY0(pthread_rwlock_unlock(&rwlp
->rw_lock
));
273 rw_tryenter(krwlock_t
*rwlp
, krw_t rw
)
278 error
= pthread_rwlock_tryrdlock(&rwlp
->rw_lock
);
280 error
= pthread_rwlock_trywrlock(&rwlp
->rw_lock
);
284 atomic_inc_uint(&rwlp
->rw_readers
);
286 rwlp
->rw_owner
= pthread_self();
291 VERIFY3S(error
, ==, EBUSY
);
297 zone_get_hostid(void *zonep
)
300 * We're emulating the system's hostid in userland.
307 rw_tryupgrade(krwlock_t
*rwlp
)
314 * =========================================================================
315 * condition variables
316 * =========================================================================
320 cv_init(kcondvar_t
*cv
, char *name
, int type
, void *arg
)
322 (void) name
, (void) type
, (void) arg
;
323 VERIFY0(pthread_cond_init(cv
, NULL
));
327 cv_destroy(kcondvar_t
*cv
)
329 VERIFY0(pthread_cond_destroy(cv
));
333 cv_wait(kcondvar_t
*cv
, kmutex_t
*mp
)
335 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
336 VERIFY0(pthread_cond_wait(cv
, &mp
->m_lock
));
337 mp
->m_owner
= pthread_self();
341 cv_wait_sig(kcondvar_t
*cv
, kmutex_t
*mp
)
348 cv_timedwait(kcondvar_t
*cv
, kmutex_t
*mp
, clock_t abstime
)
355 delta
= abstime
- ddi_get_lbolt();
359 VERIFY(gettimeofday(&tv
, NULL
) == 0);
361 ts
.tv_sec
= tv
.tv_sec
+ delta
/ hz
;
362 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% hz
) * (NANOSEC
/ hz
);
363 if (ts
.tv_nsec
>= NANOSEC
) {
365 ts
.tv_nsec
-= NANOSEC
;
368 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
369 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
370 mp
->m_owner
= pthread_self();
372 if (error
== ETIMEDOUT
)
381 cv_timedwait_hires(kcondvar_t
*cv
, kmutex_t
*mp
, hrtime_t tim
, hrtime_t res
,
390 ASSERT(flag
== 0 || flag
== CALLOUT_FLAG_ABSOLUTE
);
393 if (flag
& CALLOUT_FLAG_ABSOLUTE
)
394 delta
-= gethrtime();
399 VERIFY0(gettimeofday(&tv
, NULL
));
401 ts
.tv_sec
= tv
.tv_sec
+ delta
/ NANOSEC
;
402 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% NANOSEC
);
403 if (ts
.tv_nsec
>= NANOSEC
) {
405 ts
.tv_nsec
-= NANOSEC
;
408 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
409 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
410 mp
->m_owner
= pthread_self();
412 if (error
== ETIMEDOUT
)
421 cv_signal(kcondvar_t
*cv
)
423 VERIFY0(pthread_cond_signal(cv
));
427 cv_broadcast(kcondvar_t
*cv
)
429 VERIFY0(pthread_cond_broadcast(cv
));
433 * =========================================================================
435 * =========================================================================
439 seq_printf(struct seq_file
*m
, const char *fmt
, ...)
441 (void) m
, (void) fmt
;
445 procfs_list_install(const char *module
,
446 const char *submodule
,
449 procfs_list_t
*procfs_list
,
450 int (*show
)(struct seq_file
*f
, void *p
),
451 int (*show_header
)(struct seq_file
*f
),
452 int (*clear
)(procfs_list_t
*procfs_list
),
453 size_t procfs_list_node_off
)
455 (void) module
, (void) submodule
, (void) name
, (void) mode
, (void) show
,
456 (void) show_header
, (void) clear
;
457 mutex_init(&procfs_list
->pl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
458 list_create(&procfs_list
->pl_list
,
459 procfs_list_node_off
+ sizeof (procfs_list_node_t
),
460 procfs_list_node_off
+ offsetof(procfs_list_node_t
, pln_link
));
461 procfs_list
->pl_next_id
= 1;
462 procfs_list
->pl_node_offset
= procfs_list_node_off
;
466 procfs_list_uninstall(procfs_list_t
*procfs_list
)
472 procfs_list_destroy(procfs_list_t
*procfs_list
)
474 ASSERT(list_is_empty(&procfs_list
->pl_list
));
475 list_destroy(&procfs_list
->pl_list
);
476 mutex_destroy(&procfs_list
->pl_lock
);
479 #define NODE_ID(procfs_list, obj) \
480 (((procfs_list_node_t *)(((char *)obj) + \
481 (procfs_list)->pl_node_offset))->pln_id)
484 procfs_list_add(procfs_list_t
*procfs_list
, void *p
)
486 ASSERT(MUTEX_HELD(&procfs_list
->pl_lock
));
487 NODE_ID(procfs_list
, p
) = procfs_list
->pl_next_id
++;
488 list_insert_tail(&procfs_list
->pl_list
, p
);
492 * =========================================================================
494 * =========================================================================
498 * =========================================================================
499 * Figure out which debugging statements to print
500 * =========================================================================
503 static char *dprintf_string
;
504 static int dprintf_print_all
;
507 dprintf_find_string(const char *string
)
509 char *tmp_str
= dprintf_string
;
510 int len
= strlen(string
);
513 * Find out if this is a string we want to print.
514 * String format: file1.c,function_name1,file2.c,file3.c
517 while (tmp_str
!= NULL
) {
518 if (strncmp(tmp_str
, string
, len
) == 0 &&
519 (tmp_str
[len
] == ',' || tmp_str
[len
] == '\0'))
521 tmp_str
= strchr(tmp_str
, ',');
523 tmp_str
++; /* Get rid of , */
529 dprintf_setup(int *argc
, char **argv
)
534 * Debugging can be specified two ways: by setting the
535 * environment variable ZFS_DEBUG, or by including a
536 * "debug=..." argument on the command line. The command
537 * line setting overrides the environment variable.
540 for (i
= 1; i
< *argc
; i
++) {
541 int len
= strlen("debug=");
542 /* First look for a command line argument */
543 if (strncmp("debug=", argv
[i
], len
) == 0) {
544 dprintf_string
= argv
[i
] + len
;
545 /* Remove from args */
546 for (j
= i
; j
< *argc
; j
++)
553 if (dprintf_string
== NULL
) {
554 /* Look for ZFS_DEBUG environment variable */
555 dprintf_string
= getenv("ZFS_DEBUG");
559 * Are we just turning on all debugging?
561 if (dprintf_find_string("on"))
562 dprintf_print_all
= 1;
564 if (dprintf_string
!= NULL
)
565 zfs_flags
|= ZFS_DEBUG_DPRINTF
;
569 * =========================================================================
571 * =========================================================================
574 __dprintf(boolean_t dprint
, const char *file
, const char *func
,
575 int line
, const char *fmt
, ...)
577 /* Get rid of annoying "../common/" prefix to filename. */
578 const char *newfile
= zfs_basename(file
);
582 /* dprintf messages are printed immediately */
584 if (!dprintf_print_all
&&
585 !dprintf_find_string(newfile
) &&
586 !dprintf_find_string(func
))
589 /* Print out just the function name if requested */
591 if (dprintf_find_string("pid"))
592 (void) printf("%d ", getpid());
593 if (dprintf_find_string("tid"))
594 (void) printf("%ju ",
595 (uintmax_t)(uintptr_t)pthread_self());
596 if (dprintf_find_string("cpu"))
597 (void) printf("%u ", getcpuid());
598 if (dprintf_find_string("time"))
599 (void) printf("%llu ", gethrtime());
600 if (dprintf_find_string("long"))
601 (void) printf("%s, line %d: ", newfile
, line
);
602 (void) printf("dprintf: %s: ", func
);
604 (void) vprintf(fmt
, adx
);
608 /* zfs_dbgmsg is logged for dumping later */
614 buf
= umem_alloc(size
, UMEM_NOFAIL
);
615 i
= snprintf(buf
, size
, "%s:%d:%s(): ", newfile
, line
, func
);
619 (void) vsnprintf(buf
+ i
, size
- i
, fmt
, adx
);
625 umem_free(buf
, size
);
630 * =========================================================================
631 * cmn_err() and panic()
632 * =========================================================================
634 static char ce_prefix
[CE_IGNORE
][10] = { "", "NOTICE: ", "WARNING: ", "" };
635 static char ce_suffix
[CE_IGNORE
][2] = { "", "\n", "\n", "" };
637 __attribute__((noreturn
)) void
638 vpanic(const char *fmt
, va_list adx
)
640 (void) fprintf(stderr
, "error: ");
641 (void) vfprintf(stderr
, fmt
, adx
);
642 (void) fprintf(stderr
, "\n");
644 abort(); /* think of it as a "user-level crash dump" */
647 __attribute__((noreturn
)) void
648 panic(const char *fmt
, ...)
658 vcmn_err(int ce
, const char *fmt
, va_list adx
)
662 if (ce
!= CE_NOTE
) { /* suppress noise in userland stress testing */
663 (void) fprintf(stderr
, "%s", ce_prefix
[ce
]);
664 (void) vfprintf(stderr
, fmt
, adx
);
665 (void) fprintf(stderr
, "%s", ce_suffix
[ce
]);
670 cmn_err(int ce
, const char *fmt
, ...)
675 vcmn_err(ce
, fmt
, adx
);
680 * =========================================================================
682 * =========================================================================
688 (void) poll(0, 0, ticks
* (1000 / hz
));
692 * Find highest one bit set.
693 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
694 * The __builtin_clzll() function is supported by both GCC and Clang.
697 highbit64(uint64_t i
)
702 return (NBBY
* sizeof (uint64_t) - __builtin_clzll(i
));
706 * Find lowest one bit set.
707 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
708 * The __builtin_ffsll() function is supported by both GCC and Clang.
716 return (__builtin_ffsll(i
));
719 const char *random_path
= "/dev/random";
720 const char *urandom_path
= "/dev/urandom";
721 static int random_fd
= -1, urandom_fd
= -1;
726 VERIFY((random_fd
= open(random_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
727 VERIFY((urandom_fd
= open(urandom_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
741 random_get_bytes_common(uint8_t *ptr
, size_t len
, int fd
)
749 bytes
= read(fd
, ptr
, resid
);
750 ASSERT3S(bytes
, >=, 0);
759 random_get_bytes(uint8_t *ptr
, size_t len
)
761 return (random_get_bytes_common(ptr
, len
, random_fd
));
765 random_get_pseudo_bytes(uint8_t *ptr
, size_t len
)
767 return (random_get_bytes_common(ptr
, len
, urandom_fd
));
771 ddi_strtoull(const char *str
, char **nptr
, int base
, u_longlong_t
*result
)
774 *result
= strtoull(str
, nptr
, base
);
783 return (&hw_utsname
);
787 * =========================================================================
788 * kernel emulation setup & teardown
789 * =========================================================================
792 umem_out_of_memory(void)
794 char errmsg
[] = "out of memory -- generating core dump\n";
796 (void) fprintf(stderr
, "%s", errmsg
);
802 kernel_init(int mode
)
804 extern uint_t rrw_tsd_key
;
806 umem_nofail_callback(umem_out_of_memory
);
808 physmem
= sysconf(_SC_PHYS_PAGES
);
810 dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t
)physmem
,
811 (double)physmem
* sysconf(_SC_PAGE_SIZE
) / (1ULL << 30));
813 hostid
= (mode
& SPA_MODE_WRITE
) ? get_system_hostid() : 0;
817 VERIFY0(uname(&hw_utsname
));
824 spa_init((spa_mode_t
)mode
);
828 tsd_create(&rrw_tsd_key
, rrw_tsd_destroy
);
853 crgetruid(cred_t
*cr
)
867 crgetngroups(cred_t
*cr
)
874 crgetgroups(cred_t
*cr
)
881 zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
)
883 (void) name
, (void) cr
;
888 zfs_secpolicy_rename_perms(const char *from
, const char *to
, cred_t
*cr
)
890 (void) from
, (void) to
, (void) cr
;
895 zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
)
897 (void) name
, (void) cr
;
902 secpolicy_zfs(const cred_t
*cr
)
909 secpolicy_zfs_proc(const cred_t
*cr
, proc_t
*proc
)
911 (void) cr
, (void) proc
;
916 ksid_lookupdomain(const char *dom
)
920 kd
= umem_zalloc(sizeof (ksiddomain_t
), UMEM_NOFAIL
);
921 kd
->kd_name
= spa_strdup(dom
);
926 ksiddomain_rele(ksiddomain_t
*ksid
)
928 spa_strfree(ksid
->kd_name
);
929 umem_free(ksid
, sizeof (ksiddomain_t
));
933 kmem_vasprintf(const char *fmt
, va_list adx
)
938 va_copy(adx_copy
, adx
);
939 VERIFY(vasprintf(&buf
, fmt
, adx_copy
) != -1);
946 kmem_asprintf(const char *fmt
, ...)
952 VERIFY(vasprintf(&buf
, fmt
, adx
) != -1);
959 * kmem_scnprintf() will return the number of characters that it would have
960 * printed whenever it is limited by value of the size variable, rather than
961 * the number of characters that it did print. This can cause misbehavior on
962 * subsequent uses of the return value, so we define a safe version that will
963 * return the number of characters actually printed, minus the NULL format
964 * character. Subsequent use of this by the safe string functions is safe
965 * whether it is snprintf(), strlcat() or strlcpy().
968 kmem_scnprintf(char *restrict str
, size_t size
, const char *restrict fmt
, ...)
973 /* Make the 0 case a no-op so that we do not return -1 */
978 n
= vsnprintf(str
, size
, fmt
, ap
);
988 zfs_onexit_fd_hold(int fd
, minor_t
*minorp
)
996 zfs_onexit_fd_rele(zfs_file_t
*fp
)
1002 zfs_onexit_add_cb(minor_t minor
, void (*func
)(void *), void *data
,
1003 uintptr_t *action_handle
)
1005 (void) minor
, (void) func
, (void) data
, (void) action_handle
;
1010 spl_fstrans_mark(void)
1012 return ((fstrans_cookie_t
)0);
1016 spl_fstrans_unmark(fstrans_cookie_t cookie
)
1022 __spl_pf_fstrans_check(void)
1028 kmem_cache_reap_active(void)
1034 zvol_create_minor(const char *name
)
1040 zvol_create_minors_recursive(const char *name
)
1046 zvol_remove_minors(spa_t
*spa
, const char *name
, boolean_t async
)
1048 (void) spa
, (void) name
, (void) async
;
1052 zvol_rename_minors(spa_t
*spa
, const char *oldname
, const char *newname
,
1055 (void) spa
, (void) oldname
, (void) newname
, (void) async
;
1061 * path - fully qualified path to file
1062 * flags - file attributes O_READ / O_WRITE / O_EXCL
1063 * fpp - pointer to return file pointer
1065 * Returns 0 on success underlying error on failure.
1068 zfs_file_open(const char *path
, int flags
, int mode
, zfs_file_t
**fpp
)
1077 if (!(flags
& O_CREAT
) && stat64(path
, &st
) == -1)
1080 if (!(flags
& O_CREAT
) && S_ISBLK(st
.st_mode
))
1083 if (flags
& O_CREAT
)
1084 old_umask
= umask(0);
1086 fd
= open64(path
, flags
, mode
);
1090 if (flags
& O_CREAT
)
1091 (void) umask(old_umask
);
1093 if (vn_dumpdir
!= NULL
) {
1094 char *dumppath
= umem_zalloc(MAXPATHLEN
, UMEM_NOFAIL
);
1095 const char *inpath
= zfs_basename(path
);
1097 (void) snprintf(dumppath
, MAXPATHLEN
,
1098 "%s/%s", vn_dumpdir
, inpath
);
1099 dump_fd
= open64(dumppath
, O_CREAT
| O_WRONLY
, 0666);
1100 umem_free(dumppath
, MAXPATHLEN
);
1101 if (dump_fd
== -1) {
1110 (void) fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
1112 fp
= umem_zalloc(sizeof (zfs_file_t
), UMEM_NOFAIL
);
1114 fp
->f_dump_fd
= dump_fd
;
1121 zfs_file_close(zfs_file_t
*fp
)
1124 if (fp
->f_dump_fd
!= -1)
1125 close(fp
->f_dump_fd
);
1127 umem_free(fp
, sizeof (zfs_file_t
));
1131 * Stateful write - use os internal file pointer to determine where to
1132 * write and update on successful completion.
1134 * fp - pointer to file (pipe, socket, etc) to write to
1135 * buf - buffer to write
1136 * count - # of bytes to write
1137 * resid - pointer to count of unwritten bytes (if short write)
1139 * Returns 0 on success errno on failure.
1142 zfs_file_write(zfs_file_t
*fp
, const void *buf
, size_t count
, ssize_t
*resid
)
1146 rc
= write(fp
->f_fd
, buf
, count
);
1151 *resid
= count
- rc
;
1152 } else if (rc
!= count
) {
1160 * Stateless write - os internal file pointer is not updated.
1162 * fp - pointer to file (pipe, socket, etc) to write to
1163 * buf - buffer to write
1164 * count - # of bytes to write
1165 * off - file offset to write to (only valid for seekable types)
1166 * resid - pointer to count of unwritten bytes
1168 * Returns 0 on success errno on failure.
1171 zfs_file_pwrite(zfs_file_t
*fp
, const void *buf
,
1172 size_t count
, loff_t pos
, ssize_t
*resid
)
1174 ssize_t rc
, split
, done
;
1178 * To simulate partial disk writes, we split writes into two
1179 * system calls so that the process can be killed in between.
1180 * This is used by ztest to simulate realistic failure modes.
1182 sectors
= count
>> SPA_MINBLOCKSHIFT
;
1183 split
= (sectors
> 0 ? rand() % sectors
: 0) << SPA_MINBLOCKSHIFT
;
1184 rc
= pwrite64(fp
->f_fd
, buf
, split
, pos
);
1187 rc
= pwrite64(fp
->f_fd
, (char *)buf
+ split
,
1188 count
- split
, pos
+ split
);
1191 if (rc
== -1 && errno
== EINVAL
) {
1193 * Under Linux, this most likely means an alignment issue
1194 * (memory or disk) due to O_DIRECT, so we abort() in order
1195 * to catch the offender.
1207 *resid
= count
- done
;
1208 } else if (done
!= count
) {
1216 * Stateful read - use os internal file pointer to determine where to
1217 * read and update on successful completion.
1219 * fp - pointer to file (pipe, socket, etc) to read from
1220 * buf - buffer to write
1221 * count - # of bytes to read
1222 * resid - pointer to count of unread bytes (if short read)
1224 * Returns 0 on success errno on failure.
1227 zfs_file_read(zfs_file_t
*fp
, void *buf
, size_t count
, ssize_t
*resid
)
1231 rc
= read(fp
->f_fd
, buf
, count
);
1236 *resid
= count
- rc
;
1237 } else if (rc
!= count
) {
1245 * Stateless read - os internal file pointer is not updated.
1247 * fp - pointer to file (pipe, socket, etc) to read from
1248 * buf - buffer to write
1249 * count - # of bytes to write
1250 * off - file offset to read from (only valid for seekable types)
1251 * resid - pointer to count of unwritten bytes (if short write)
1253 * Returns 0 on success errno on failure.
1256 zfs_file_pread(zfs_file_t
*fp
, void *buf
, size_t count
, loff_t off
,
1261 rc
= pread64(fp
->f_fd
, buf
, count
, off
);
1265 * Under Linux, this most likely means an alignment issue
1266 * (memory or disk) due to O_DIRECT, so we abort() in order to
1267 * catch the offender.
1269 if (errno
== EINVAL
)
1275 if (fp
->f_dump_fd
!= -1) {
1278 status
= pwrite64(fp
->f_dump_fd
, buf
, rc
, off
);
1279 ASSERT(status
!= -1);
1283 *resid
= count
- rc
;
1284 } else if (rc
!= count
) {
1292 * lseek - set / get file pointer
1294 * fp - pointer to file (pipe, socket, etc) to read from
1295 * offp - value to seek to, returns current value plus passed offset
1296 * whence - see man pages for standard lseek whence values
1298 * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1301 zfs_file_seek(zfs_file_t
*fp
, loff_t
*offp
, int whence
)
1305 rc
= lseek(fp
->f_fd
, *offp
, whence
);
1315 * Get file attributes
1317 * filp - file pointer
1318 * zfattr - pointer to file attr structure
1320 * Currently only used for fetching size and file mode
1322 * Returns 0 on success or error code of underlying getattr call on failure.
1325 zfs_file_getattr(zfs_file_t
*fp
, zfs_file_attr_t
*zfattr
)
1329 if (fstat64_blk(fp
->f_fd
, &st
) == -1)
1332 zfattr
->zfa_size
= st
.st_size
;
1333 zfattr
->zfa_mode
= st
.st_mode
;
1341 * filp - file pointer
1342 * flags - O_SYNC and or O_DSYNC
1344 * Returns 0 on success or error code of underlying sync call on failure.
1347 zfs_file_fsync(zfs_file_t
*fp
, int flags
)
1351 if (fsync(fp
->f_fd
) < 0)
1358 * fallocate - allocate or free space on disk
1361 * mode (non-standard options for hole punching etc)
1362 * offset - offset to start allocating or freeing from
1363 * len - length to free / allocate
1368 zfs_file_fallocate(zfs_file_t
*fp
, int mode
, loff_t offset
, loff_t len
)
1371 return (fallocate(fp
->f_fd
, mode
, offset
, len
));
1373 (void) fp
, (void) mode
, (void) offset
, (void) len
;
1374 return (EOPNOTSUPP
);
1379 * Request current file pointer offset
1381 * fp - pointer to file
1383 * Returns current file offset.
1386 zfs_file_off(zfs_file_t
*fp
)
1388 return (lseek(fp
->f_fd
, SEEK_CUR
, 0));
1394 * path - fully qualified file path
1396 * Returns 0 on success.
1401 zfs_file_unlink(const char *path
)
1403 return (remove(path
));
1407 * Get reference to file pointer
1409 * fd - input file descriptor
1411 * Returns pointer to file struct or NULL.
1412 * Unsupported in user space.
1415 zfs_file_get(int fd
)
1422 * Drop reference to file pointer
1424 * fp - pointer to file struct
1426 * Unsupported in user space.
1429 zfs_file_put(zfs_file_t
*fp
)
1436 zfsvfs_update_fromname(const char *oldname
, const char *newname
)
1438 (void) oldname
, (void) newname
;
1442 spa_import_os(spa_t
*spa
)
1448 spa_export_os(spa_t
*spa
)
1454 spa_activate_os(spa_t
*spa
)
1460 spa_deactivate_os(spa_t
*spa
)