4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 #include <sys/crypto/icp.h>
37 #include <sys/processor.h>
38 #include <sys/rrwlock.h>
41 #include <sys/systeminfo.h>
43 #include <sys/utsname.h>
44 #include <sys/zfs_context.h>
45 #include <sys/zfs_onexit.h>
46 #include <sys/zfs_vfsops.h>
47 #include <sys/zstd/zstd.h>
49 #include <zfs_fletcher.h>
53 * Emulation of kernel services in userland.
58 struct utsname hw_utsname
;
60 /* If set, all blocks read will be copied to the specified directory. */
61 char *vn_dumpdir
= NULL
;
63 /* this only exists to have its address taken */
67 * =========================================================================
69 * =========================================================================
71 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While
72 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
73 * the expected stack depth while small enough to avoid exhausting address
74 * space with high thread counts.
76 #define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)
77 #define TS_STACK_MAX (256 * 1024)
79 struct zk_thread_wrapper
{
85 zk_thread_wrapper(void *arg
)
87 struct zk_thread_wrapper ztw
;
88 memcpy(&ztw
, arg
, sizeof (ztw
));
95 zk_thread_create(const char *name
, void (*func
)(void *), void *arg
,
96 size_t stksize
, int state
)
101 struct zk_thread_wrapper
*ztw
;
102 int detachstate
= PTHREAD_CREATE_DETACHED
;
104 VERIFY0(pthread_attr_init(&attr
));
106 if (state
& TS_JOINABLE
)
107 detachstate
= PTHREAD_CREATE_JOINABLE
;
109 VERIFY0(pthread_attr_setdetachstate(&attr
, detachstate
));
112 * We allow the default stack size in user space to be specified by
113 * setting the ZFS_STACK_SIZE environment variable. This allows us
114 * the convenience of observing and debugging stack overruns in
115 * user space. Explicitly specified stack sizes will be honored.
116 * The usage of ZFS_STACK_SIZE is discussed further in the
117 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
120 stkstr
= getenv("ZFS_STACK_SIZE");
123 stksize
= TS_STACK_MAX
;
125 stksize
= MAX(atoi(stkstr
), TS_STACK_MIN
);
128 VERIFY3S(stksize
, >, 0);
129 stksize
= P2ROUNDUP(MAX(stksize
, TS_STACK_MIN
), PAGESIZE
);
132 * If this ever fails, it may be because the stack size is not a
133 * multiple of system page size.
135 VERIFY0(pthread_attr_setstacksize(&attr
, stksize
));
136 VERIFY0(pthread_attr_setguardsize(&attr
, PAGESIZE
));
138 VERIFY(ztw
= malloc(sizeof (*ztw
)));
141 VERIFY0(pthread_create(&tid
, &attr
, zk_thread_wrapper
, ztw
));
142 VERIFY0(pthread_attr_destroy(&attr
));
144 pthread_setname_np(tid
, name
);
146 return ((void *)(uintptr_t)tid
);
150 * =========================================================================
152 * =========================================================================
155 kstat_create(const char *module
, int instance
, const char *name
,
156 const char *class, uchar_t type
, ulong_t ndata
, uchar_t ks_flag
)
158 (void) module
, (void) instance
, (void) name
, (void) class, (void) type
,
159 (void) ndata
, (void) ks_flag
;
164 kstat_install(kstat_t
*ksp
)
170 kstat_delete(kstat_t
*ksp
)
176 kstat_set_raw_ops(kstat_t
*ksp
,
177 int (*headers
)(char *buf
, size_t size
),
178 int (*data
)(char *buf
, size_t size
, void *data
),
179 void *(*addr
)(kstat_t
*ksp
, loff_t index
))
181 (void) ksp
, (void) headers
, (void) data
, (void) addr
;
185 * =========================================================================
187 * =========================================================================
191 mutex_init(kmutex_t
*mp
, char *name
, int type
, void *cookie
)
193 (void) name
, (void) type
, (void) cookie
;
194 VERIFY0(pthread_mutex_init(&mp
->m_lock
, NULL
));
195 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
199 mutex_destroy(kmutex_t
*mp
)
201 VERIFY0(pthread_mutex_destroy(&mp
->m_lock
));
205 mutex_enter(kmutex_t
*mp
)
207 VERIFY0(pthread_mutex_lock(&mp
->m_lock
));
208 mp
->m_owner
= pthread_self();
212 mutex_enter_check_return(kmutex_t
*mp
)
214 int error
= pthread_mutex_lock(&mp
->m_lock
);
216 mp
->m_owner
= pthread_self();
221 mutex_tryenter(kmutex_t
*mp
)
223 int error
= pthread_mutex_trylock(&mp
->m_lock
);
225 mp
->m_owner
= pthread_self();
228 VERIFY3S(error
, ==, EBUSY
);
234 mutex_exit(kmutex_t
*mp
)
236 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
237 VERIFY0(pthread_mutex_unlock(&mp
->m_lock
));
241 * =========================================================================
243 * =========================================================================
247 rw_init(krwlock_t
*rwlp
, char *name
, int type
, void *arg
)
249 (void) name
, (void) type
, (void) arg
;
250 VERIFY0(pthread_rwlock_init(&rwlp
->rw_lock
, NULL
));
251 rwlp
->rw_readers
= 0;
256 rw_destroy(krwlock_t
*rwlp
)
258 VERIFY0(pthread_rwlock_destroy(&rwlp
->rw_lock
));
262 rw_enter(krwlock_t
*rwlp
, krw_t rw
)
264 if (rw
== RW_READER
) {
265 VERIFY0(pthread_rwlock_rdlock(&rwlp
->rw_lock
));
266 atomic_inc_uint(&rwlp
->rw_readers
);
268 VERIFY0(pthread_rwlock_wrlock(&rwlp
->rw_lock
));
269 rwlp
->rw_owner
= pthread_self();
274 rw_exit(krwlock_t
*rwlp
)
276 if (RW_READ_HELD(rwlp
))
277 atomic_dec_uint(&rwlp
->rw_readers
);
281 VERIFY0(pthread_rwlock_unlock(&rwlp
->rw_lock
));
285 rw_tryenter(krwlock_t
*rwlp
, krw_t rw
)
290 error
= pthread_rwlock_tryrdlock(&rwlp
->rw_lock
);
292 error
= pthread_rwlock_trywrlock(&rwlp
->rw_lock
);
296 atomic_inc_uint(&rwlp
->rw_readers
);
298 rwlp
->rw_owner
= pthread_self();
303 VERIFY3S(error
, ==, EBUSY
);
309 zone_get_hostid(void *zonep
)
312 * We're emulating the system's hostid in userland.
319 rw_tryupgrade(krwlock_t
*rwlp
)
326 * =========================================================================
327 * condition variables
328 * =========================================================================
332 cv_init(kcondvar_t
*cv
, char *name
, int type
, void *arg
)
334 (void) name
, (void) type
, (void) arg
;
335 VERIFY0(pthread_cond_init(cv
, NULL
));
339 cv_destroy(kcondvar_t
*cv
)
341 VERIFY0(pthread_cond_destroy(cv
));
345 cv_wait(kcondvar_t
*cv
, kmutex_t
*mp
)
347 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
348 VERIFY0(pthread_cond_wait(cv
, &mp
->m_lock
));
349 mp
->m_owner
= pthread_self();
353 cv_wait_sig(kcondvar_t
*cv
, kmutex_t
*mp
)
360 cv_timedwait(kcondvar_t
*cv
, kmutex_t
*mp
, clock_t abstime
)
367 delta
= abstime
- ddi_get_lbolt();
371 VERIFY(gettimeofday(&tv
, NULL
) == 0);
373 ts
.tv_sec
= tv
.tv_sec
+ delta
/ hz
;
374 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% hz
) * (NANOSEC
/ hz
);
375 if (ts
.tv_nsec
>= NANOSEC
) {
377 ts
.tv_nsec
-= NANOSEC
;
380 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
381 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
382 mp
->m_owner
= pthread_self();
384 if (error
== ETIMEDOUT
)
393 cv_timedwait_hires(kcondvar_t
*cv
, kmutex_t
*mp
, hrtime_t tim
, hrtime_t res
,
402 ASSERT(flag
== 0 || flag
== CALLOUT_FLAG_ABSOLUTE
);
405 if (flag
& CALLOUT_FLAG_ABSOLUTE
)
406 delta
-= gethrtime();
411 VERIFY0(gettimeofday(&tv
, NULL
));
413 ts
.tv_sec
= tv
.tv_sec
+ delta
/ NANOSEC
;
414 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% NANOSEC
);
415 if (ts
.tv_nsec
>= NANOSEC
) {
417 ts
.tv_nsec
-= NANOSEC
;
420 memset(&mp
->m_owner
, 0, sizeof (pthread_t
));
421 error
= pthread_cond_timedwait(cv
, &mp
->m_lock
, &ts
);
422 mp
->m_owner
= pthread_self();
424 if (error
== ETIMEDOUT
)
433 cv_signal(kcondvar_t
*cv
)
435 VERIFY0(pthread_cond_signal(cv
));
439 cv_broadcast(kcondvar_t
*cv
)
441 VERIFY0(pthread_cond_broadcast(cv
));
445 * =========================================================================
447 * =========================================================================
451 seq_printf(struct seq_file
*m
, const char *fmt
, ...)
453 (void) m
, (void) fmt
;
457 procfs_list_install(const char *module
,
458 const char *submodule
,
461 procfs_list_t
*procfs_list
,
462 int (*show
)(struct seq_file
*f
, void *p
),
463 int (*show_header
)(struct seq_file
*f
),
464 int (*clear
)(procfs_list_t
*procfs_list
),
465 size_t procfs_list_node_off
)
467 (void) module
, (void) submodule
, (void) name
, (void) mode
, (void) show
,
468 (void) show_header
, (void) clear
;
469 mutex_init(&procfs_list
->pl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
470 list_create(&procfs_list
->pl_list
,
471 procfs_list_node_off
+ sizeof (procfs_list_node_t
),
472 procfs_list_node_off
+ offsetof(procfs_list_node_t
, pln_link
));
473 procfs_list
->pl_next_id
= 1;
474 procfs_list
->pl_node_offset
= procfs_list_node_off
;
478 procfs_list_uninstall(procfs_list_t
*procfs_list
)
484 procfs_list_destroy(procfs_list_t
*procfs_list
)
486 ASSERT(list_is_empty(&procfs_list
->pl_list
));
487 list_destroy(&procfs_list
->pl_list
);
488 mutex_destroy(&procfs_list
->pl_lock
);
491 #define NODE_ID(procfs_list, obj) \
492 (((procfs_list_node_t *)(((char *)obj) + \
493 (procfs_list)->pl_node_offset))->pln_id)
496 procfs_list_add(procfs_list_t
*procfs_list
, void *p
)
498 ASSERT(MUTEX_HELD(&procfs_list
->pl_lock
));
499 NODE_ID(procfs_list
, p
) = procfs_list
->pl_next_id
++;
500 list_insert_tail(&procfs_list
->pl_list
, p
);
504 * =========================================================================
506 * =========================================================================
510 * =========================================================================
511 * Figure out which debugging statements to print
512 * =========================================================================
515 static char *dprintf_string
;
516 static int dprintf_print_all
;
519 dprintf_find_string(const char *string
)
521 char *tmp_str
= dprintf_string
;
522 int len
= strlen(string
);
525 * Find out if this is a string we want to print.
526 * String format: file1.c,function_name1,file2.c,file3.c
529 while (tmp_str
!= NULL
) {
530 if (strncmp(tmp_str
, string
, len
) == 0 &&
531 (tmp_str
[len
] == ',' || tmp_str
[len
] == '\0'))
533 tmp_str
= strchr(tmp_str
, ',');
535 tmp_str
++; /* Get rid of , */
541 dprintf_setup(int *argc
, char **argv
)
546 * Debugging can be specified two ways: by setting the
547 * environment variable ZFS_DEBUG, or by including a
548 * "debug=..." argument on the command line. The command
549 * line setting overrides the environment variable.
552 for (i
= 1; i
< *argc
; i
++) {
553 int len
= strlen("debug=");
554 /* First look for a command line argument */
555 if (strncmp("debug=", argv
[i
], len
) == 0) {
556 dprintf_string
= argv
[i
] + len
;
557 /* Remove from args */
558 for (j
= i
; j
< *argc
; j
++)
565 if (dprintf_string
== NULL
) {
566 /* Look for ZFS_DEBUG environment variable */
567 dprintf_string
= getenv("ZFS_DEBUG");
571 * Are we just turning on all debugging?
573 if (dprintf_find_string("on"))
574 dprintf_print_all
= 1;
576 if (dprintf_string
!= NULL
)
577 zfs_flags
|= ZFS_DEBUG_DPRINTF
;
581 * =========================================================================
583 * =========================================================================
586 __dprintf(boolean_t dprint
, const char *file
, const char *func
,
587 int line
, const char *fmt
, ...)
589 /* Get rid of annoying "../common/" prefix to filename. */
590 const char *newfile
= zfs_basename(file
);
594 /* dprintf messages are printed immediately */
596 if (!dprintf_print_all
&&
597 !dprintf_find_string(newfile
) &&
598 !dprintf_find_string(func
))
601 /* Print out just the function name if requested */
603 if (dprintf_find_string("pid"))
604 (void) printf("%d ", getpid());
605 if (dprintf_find_string("tid"))
606 (void) printf("%ju ",
607 (uintmax_t)(uintptr_t)pthread_self());
608 if (dprintf_find_string("cpu"))
609 (void) printf("%u ", getcpuid());
610 if (dprintf_find_string("time"))
611 (void) printf("%llu ", gethrtime());
612 if (dprintf_find_string("long"))
613 (void) printf("%s, line %d: ", newfile
, line
);
614 (void) printf("dprintf: %s: ", func
);
616 (void) vprintf(fmt
, adx
);
620 /* zfs_dbgmsg is logged for dumping later */
626 buf
= umem_alloc(size
, UMEM_NOFAIL
);
627 i
= snprintf(buf
, size
, "%s:%d:%s(): ", newfile
, line
, func
);
631 (void) vsnprintf(buf
+ i
, size
- i
, fmt
, adx
);
637 umem_free(buf
, size
);
642 * =========================================================================
643 * cmn_err() and panic()
644 * =========================================================================
646 static char ce_prefix
[CE_IGNORE
][10] = { "", "NOTICE: ", "WARNING: ", "" };
647 static char ce_suffix
[CE_IGNORE
][2] = { "", "\n", "\n", "" };
649 __attribute__((noreturn
)) void
650 vpanic(const char *fmt
, va_list adx
)
652 (void) fprintf(stderr
, "error: ");
653 (void) vfprintf(stderr
, fmt
, adx
);
654 (void) fprintf(stderr
, "\n");
656 abort(); /* think of it as a "user-level crash dump" */
659 __attribute__((noreturn
)) void
660 panic(const char *fmt
, ...)
670 vcmn_err(int ce
, const char *fmt
, va_list adx
)
674 if (ce
!= CE_NOTE
) { /* suppress noise in userland stress testing */
675 (void) fprintf(stderr
, "%s", ce_prefix
[ce
]);
676 (void) vfprintf(stderr
, fmt
, adx
);
677 (void) fprintf(stderr
, "%s", ce_suffix
[ce
]);
682 cmn_err(int ce
, const char *fmt
, ...)
687 vcmn_err(ce
, fmt
, adx
);
692 * =========================================================================
694 * =========================================================================
700 (void) poll(0, 0, ticks
* (1000 / hz
));
704 * Find highest one bit set.
705 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
706 * The __builtin_clzll() function is supported by both GCC and Clang.
709 highbit64(uint64_t i
)
714 return (NBBY
* sizeof (uint64_t) - __builtin_clzll(i
));
718 * Find lowest one bit set.
719 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
720 * The __builtin_ffsll() function is supported by both GCC and Clang.
728 return (__builtin_ffsll(i
));
731 const char *random_path
= "/dev/random";
732 const char *urandom_path
= "/dev/urandom";
733 static int random_fd
= -1, urandom_fd
= -1;
738 VERIFY((random_fd
= open(random_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
739 VERIFY((urandom_fd
= open(urandom_path
, O_RDONLY
| O_CLOEXEC
)) != -1);
753 random_get_bytes_common(uint8_t *ptr
, size_t len
, int fd
)
761 bytes
= read(fd
, ptr
, resid
);
762 ASSERT3S(bytes
, >=, 0);
771 random_get_bytes(uint8_t *ptr
, size_t len
)
773 return (random_get_bytes_common(ptr
, len
, random_fd
));
777 random_get_pseudo_bytes(uint8_t *ptr
, size_t len
)
779 return (random_get_bytes_common(ptr
, len
, urandom_fd
));
783 ddi_strtoull(const char *str
, char **nptr
, int base
, u_longlong_t
*result
)
786 *result
= strtoull(str
, nptr
, base
);
795 return (&hw_utsname
);
799 * =========================================================================
800 * kernel emulation setup & teardown
801 * =========================================================================
804 umem_out_of_memory(void)
806 char errmsg
[] = "out of memory -- generating core dump\n";
808 (void) fprintf(stderr
, "%s", errmsg
);
814 kernel_init(int mode
)
816 extern uint_t rrw_tsd_key
;
818 umem_nofail_callback(umem_out_of_memory
);
820 physmem
= sysconf(_SC_PHYS_PAGES
);
822 dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t
)physmem
,
823 (double)physmem
* sysconf(_SC_PAGE_SIZE
) / (1ULL << 30));
825 hostid
= (mode
& SPA_MODE_WRITE
) ? get_system_hostid() : 0;
829 VERIFY0(uname(&hw_utsname
));
836 spa_init((spa_mode_t
)mode
);
840 tsd_create(&rrw_tsd_key
, rrw_tsd_destroy
);
865 crgetruid(cred_t
*cr
)
879 crgetngroups(cred_t
*cr
)
886 crgetgroups(cred_t
*cr
)
893 zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
)
895 (void) name
, (void) cr
;
900 zfs_secpolicy_rename_perms(const char *from
, const char *to
, cred_t
*cr
)
902 (void) from
, (void) to
, (void) cr
;
907 zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
)
909 (void) name
, (void) cr
;
914 secpolicy_zfs(const cred_t
*cr
)
921 secpolicy_zfs_proc(const cred_t
*cr
, proc_t
*proc
)
923 (void) cr
, (void) proc
;
928 ksid_lookupdomain(const char *dom
)
932 kd
= umem_zalloc(sizeof (ksiddomain_t
), UMEM_NOFAIL
);
933 kd
->kd_name
= spa_strdup(dom
);
938 ksiddomain_rele(ksiddomain_t
*ksid
)
940 spa_strfree(ksid
->kd_name
);
941 umem_free(ksid
, sizeof (ksiddomain_t
));
945 kmem_vasprintf(const char *fmt
, va_list adx
)
950 va_copy(adx_copy
, adx
);
951 VERIFY(vasprintf(&buf
, fmt
, adx_copy
) != -1);
958 kmem_asprintf(const char *fmt
, ...)
964 VERIFY(vasprintf(&buf
, fmt
, adx
) != -1);
971 * kmem_scnprintf() will return the number of characters that it would have
972 * printed whenever it is limited by value of the size variable, rather than
973 * the number of characters that it did print. This can cause misbehavior on
974 * subsequent uses of the return value, so we define a safe version that will
975 * return the number of characters actually printed, minus the NULL format
976 * character. Subsequent use of this by the safe string functions is safe
977 * whether it is snprintf(), strlcat() or strlcpy().
980 kmem_scnprintf(char *restrict str
, size_t size
, const char *restrict fmt
, ...)
985 /* Make the 0 case a no-op so that we do not return -1 */
990 n
= vsnprintf(str
, size
, fmt
, ap
);
1000 zfs_onexit_fd_hold(int fd
, minor_t
*minorp
)
1008 zfs_onexit_fd_rele(zfs_file_t
*fp
)
1014 zfs_onexit_add_cb(minor_t minor
, void (*func
)(void *), void *data
,
1015 uintptr_t *action_handle
)
1017 (void) minor
, (void) func
, (void) data
, (void) action_handle
;
1022 spl_fstrans_mark(void)
1024 return ((fstrans_cookie_t
)0);
1028 spl_fstrans_unmark(fstrans_cookie_t cookie
)
1034 __spl_pf_fstrans_check(void)
1040 kmem_cache_reap_active(void)
1046 zvol_create_minor(const char *name
)
1052 zvol_create_minors_recursive(const char *name
)
1058 zvol_remove_minors(spa_t
*spa
, const char *name
, boolean_t async
)
1060 (void) spa
, (void) name
, (void) async
;
1064 zvol_rename_minors(spa_t
*spa
, const char *oldname
, const char *newname
,
1067 (void) spa
, (void) oldname
, (void) newname
, (void) async
;
1073 * path - fully qualified path to file
1074 * flags - file attributes O_READ / O_WRITE / O_EXCL
1075 * fpp - pointer to return file pointer
1077 * Returns 0 on success underlying error on failure.
1080 zfs_file_open(const char *path
, int flags
, int mode
, zfs_file_t
**fpp
)
1089 if (!(flags
& O_CREAT
) && stat64(path
, &st
) == -1)
1092 if (!(flags
& O_CREAT
) && S_ISBLK(st
.st_mode
))
1095 if (flags
& O_CREAT
)
1096 old_umask
= umask(0);
1098 fd
= open64(path
, flags
, mode
);
1102 if (flags
& O_CREAT
)
1103 (void) umask(old_umask
);
1105 if (vn_dumpdir
!= NULL
) {
1106 char *dumppath
= umem_zalloc(MAXPATHLEN
, UMEM_NOFAIL
);
1107 const char *inpath
= zfs_basename(path
);
1109 (void) snprintf(dumppath
, MAXPATHLEN
,
1110 "%s/%s", vn_dumpdir
, inpath
);
1111 dump_fd
= open64(dumppath
, O_CREAT
| O_WRONLY
, 0666);
1112 umem_free(dumppath
, MAXPATHLEN
);
1113 if (dump_fd
== -1) {
1122 (void) fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
1124 fp
= umem_zalloc(sizeof (zfs_file_t
), UMEM_NOFAIL
);
1126 fp
->f_dump_fd
= dump_fd
;
1133 zfs_file_close(zfs_file_t
*fp
)
1136 if (fp
->f_dump_fd
!= -1)
1137 close(fp
->f_dump_fd
);
1139 umem_free(fp
, sizeof (zfs_file_t
));
1143 * Stateful write - use os internal file pointer to determine where to
1144 * write and update on successful completion.
1146 * fp - pointer to file (pipe, socket, etc) to write to
1147 * buf - buffer to write
1148 * count - # of bytes to write
1149 * resid - pointer to count of unwritten bytes (if short write)
1151 * Returns 0 on success errno on failure.
1154 zfs_file_write(zfs_file_t
*fp
, const void *buf
, size_t count
, ssize_t
*resid
)
1158 rc
= write(fp
->f_fd
, buf
, count
);
1163 *resid
= count
- rc
;
1164 } else if (rc
!= count
) {
1172 * Stateless write - os internal file pointer is not updated.
1174 * fp - pointer to file (pipe, socket, etc) to write to
1175 * buf - buffer to write
1176 * count - # of bytes to write
1177 * off - file offset to write to (only valid for seekable types)
1178 * resid - pointer to count of unwritten bytes
1180 * Returns 0 on success errno on failure.
1183 zfs_file_pwrite(zfs_file_t
*fp
, const void *buf
,
1184 size_t count
, loff_t pos
, ssize_t
*resid
)
1186 ssize_t rc
, split
, done
;
1190 * To simulate partial disk writes, we split writes into two
1191 * system calls so that the process can be killed in between.
1192 * This is used by ztest to simulate realistic failure modes.
1194 sectors
= count
>> SPA_MINBLOCKSHIFT
;
1195 split
= (sectors
> 0 ? rand() % sectors
: 0) << SPA_MINBLOCKSHIFT
;
1196 rc
= pwrite64(fp
->f_fd
, buf
, split
, pos
);
1199 rc
= pwrite64(fp
->f_fd
, (char *)buf
+ split
,
1200 count
- split
, pos
+ split
);
1203 if (rc
== -1 && errno
== EINVAL
) {
1205 * Under Linux, this most likely means an alignment issue
1206 * (memory or disk) due to O_DIRECT, so we abort() in order
1207 * to catch the offender.
1219 *resid
= count
- done
;
1220 } else if (done
!= count
) {
1228 * Stateful read - use os internal file pointer to determine where to
1229 * read and update on successful completion.
1231 * fp - pointer to file (pipe, socket, etc) to read from
1232 * buf - buffer to write
1233 * count - # of bytes to read
1234 * resid - pointer to count of unread bytes (if short read)
1236 * Returns 0 on success errno on failure.
1239 zfs_file_read(zfs_file_t
*fp
, void *buf
, size_t count
, ssize_t
*resid
)
1243 rc
= read(fp
->f_fd
, buf
, count
);
1248 *resid
= count
- rc
;
1249 } else if (rc
!= count
) {
1257 * Stateless read - os internal file pointer is not updated.
1259 * fp - pointer to file (pipe, socket, etc) to read from
1260 * buf - buffer to write
1261 * count - # of bytes to write
1262 * off - file offset to read from (only valid for seekable types)
1263 * resid - pointer to count of unwritten bytes (if short write)
1265 * Returns 0 on success errno on failure.
1268 zfs_file_pread(zfs_file_t
*fp
, void *buf
, size_t count
, loff_t off
,
1273 rc
= pread64(fp
->f_fd
, buf
, count
, off
);
1277 * Under Linux, this most likely means an alignment issue
1278 * (memory or disk) due to O_DIRECT, so we abort() in order to
1279 * catch the offender.
1281 if (errno
== EINVAL
)
1287 if (fp
->f_dump_fd
!= -1) {
1290 status
= pwrite64(fp
->f_dump_fd
, buf
, rc
, off
);
1291 ASSERT(status
!= -1);
1295 *resid
= count
- rc
;
1296 } else if (rc
!= count
) {
1304 * lseek - set / get file pointer
1306 * fp - pointer to file (pipe, socket, etc) to read from
1307 * offp - value to seek to, returns current value plus passed offset
1308 * whence - see man pages for standard lseek whence values
1310 * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1313 zfs_file_seek(zfs_file_t
*fp
, loff_t
*offp
, int whence
)
1317 rc
= lseek(fp
->f_fd
, *offp
, whence
);
1327 * Get file attributes
1329 * filp - file pointer
1330 * zfattr - pointer to file attr structure
1332 * Currently only used for fetching size and file mode
1334 * Returns 0 on success or error code of underlying getattr call on failure.
1337 zfs_file_getattr(zfs_file_t
*fp
, zfs_file_attr_t
*zfattr
)
1341 if (fstat64_blk(fp
->f_fd
, &st
) == -1)
1344 zfattr
->zfa_size
= st
.st_size
;
1345 zfattr
->zfa_mode
= st
.st_mode
;
1353 * filp - file pointer
1354 * flags - O_SYNC and or O_DSYNC
1356 * Returns 0 on success or error code of underlying sync call on failure.
1359 zfs_file_fsync(zfs_file_t
*fp
, int flags
)
1363 if (fsync(fp
->f_fd
) < 0)
1370 * deallocate - zero and/or deallocate file storage
1373 * offset - offset to start zeroing or deallocating
1374 * len - length to zero or deallocate
1377 zfs_file_deallocate(zfs_file_t
*fp
, loff_t offset
, loff_t len
)
1380 #if defined(__linux__)
1381 rc
= fallocate(fp
->f_fd
,
1382 FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
, offset
, len
);
1383 #elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)
1384 struct spacectl_range rqsr
= {
1388 rc
= fspacectl(fp
->f_fd
, SPACECTL_DEALLOC
, &rqsr
, 0, &rqsr
);
1390 (void) fp
, (void) offset
, (void) len
;
1394 return (SET_ERROR(rc
));
1399 * Request current file pointer offset
1401 * fp - pointer to file
1403 * Returns current file offset.
1406 zfs_file_off(zfs_file_t
*fp
)
1408 return (lseek(fp
->f_fd
, SEEK_CUR
, 0));
1414 * path - fully qualified file path
1416 * Returns 0 on success.
1421 zfs_file_unlink(const char *path
)
1423 return (remove(path
));
1427 * Get reference to file pointer
1429 * fd - input file descriptor
1431 * Returns pointer to file struct or NULL.
1432 * Unsupported in user space.
1435 zfs_file_get(int fd
)
1442 * Drop reference to file pointer
1444 * fp - pointer to file struct
1446 * Unsupported in user space.
1449 zfs_file_put(zfs_file_t
*fp
)
1456 zfsvfs_update_fromname(const char *oldname
, const char *newname
)
1458 (void) oldname
, (void) newname
;
1462 spa_import_os(spa_t
*spa
)
1468 spa_export_os(spa_t
*spa
)
1474 spa_activate_os(spa_t
*spa
)
1480 spa_deactivate_os(spa_t
*spa
)