4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
36 #include <sys/vnode.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
43 #include <sys/termios.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/sunddi.h>
47 #include <sys/esunddi.h>
48 #include <sys/flock.h>
49 #include <sys/modctl.h>
50 #include <sys/cmn_err.h>
51 #include <sys/vmsystm.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <../../../../../../kernel/fs/sockfs/sockcommon.h>
56 #include <../../../../../../kernel/fs/sockfs/socktpi.h>
58 #include <netinet/in.h>
59 #include <sys/sendfile.h>
61 #include <sys/tihdr.h>
62 #include <sys/atomic.h>
64 #include <inet/common.h>
69 extern int sosendfile64(file_t
*, file_t
*, const struct ksendfilevec64
*,
71 extern int snf_segmap(file_t
*, vnode_t
*, uoff_t
, uoff_t
, ssize_t
*,
73 extern sotpi_info_t
*sotpi_sototpi(struct sonode
*);
75 #define SEND_MAX_CHUNK 16
77 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
79 * 64 bit offsets for 32 bit applications only running either on
80 * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
81 * more than 2GB of data.
84 sendvec_chunk64(file_t
*fp
, uoff_t
*fileoff
, struct ksendfilevec64
*sfv
,
85 int copy_cnt
, ssize32_t
*count
)
100 for (i
= 0; i
< copy_cnt
; i
++) {
102 if (ISSIG(curthread
, JUSTLOOKING
))
106 * Do similar checks as "write" as we are writing
107 * sfv_len bytes into "vp".
109 sfv_len
= (ssize32_t
)sfv
->sfv_len
;
119 if (vp
->v_type
== VREG
) {
120 if (*fileoff
>= curproc
->p_fsz_ctl
) {
121 mutex_enter(&curproc
->p_lock
);
123 rctlproc_legacy
[RLIMIT_FSIZE
],
124 curproc
->p_rctls
, curproc
, RCA_SAFE
);
125 mutex_exit(&curproc
->p_lock
);
129 if (*fileoff
>= OFFSET_MAX(fp
))
132 if (*fileoff
+ sfv_len
> OFFSET_MAX(fp
))
136 tmpcount
= *count
+ sfv_len
;
140 sfv_off
= sfv
->sfv_off
;
142 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
143 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
144 aiov
.iov_len
= sfv_len
;
145 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
146 auio
.uio_loffset
= *fileoff
;
148 auio
.uio_resid
= sfv_len
;
149 auio
.uio_iov
= &aiov
;
150 auio
.uio_segflg
= UIO_USERSPACE
;
151 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
152 auio
.uio_fmode
= fflag
;
153 ioflag
= auio
.uio_fmode
& (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
154 while (sfv_len
> 0) {
155 error
= fop_write(vp
, &auio
, ioflag
,
157 cnt
= sfv_len
- auio
.uio_resid
;
159 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)cnt
;
160 if (vp
->v_type
== VREG
)
172 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
)
175 if ((ffp
->f_flag
& FREAD
) == 0) {
176 releasef(sfv
->sfv_fd
);
180 readvp
= ffp
->f_vnode
;
181 if (readvp
->v_type
!= VREG
) {
182 releasef(sfv
->sfv_fd
);
187 * No point reading and writing to same vp,
188 * as long as both are regular files. readvp is not
189 * locked; but since we got it from an open file the
190 * contents will be valid during the time of access.
192 if (vn_compare(vp
, readvp
)) {
193 releasef(sfv
->sfv_fd
);
198 * Optimize the regular file over
201 if (vp
->v_type
== VSOCK
) {
202 error
= sosendfile64(fp
, ffp
, sfv
,
212 * Note: we assume readvp != vp. "vp" is already
213 * locked, and "readvp" must not be.
216 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
217 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
219 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
221 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
226 * Same checks as in pread64.
228 if (sfv_off
> MAXOFFSET_T
) {
229 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
230 releasef(sfv
->sfv_fd
);
234 if (sfv_off
+ sfv_len
> MAXOFFSET_T
)
235 sfv_len
= (ssize32_t
)(MAXOFFSET_T
- sfv_off
);
237 /* Find the native blocksize to transfer data */
238 size
= MIN(vp
->v_vfsp
->vfs_bsize
,
239 readvp
->v_vfsp
->vfs_bsize
);
240 size
= sfv_len
< size
? sfv_len
: size
;
241 ptr
= kmem_alloc(size
, KM_NOSLEEP
);
243 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
244 releasef(sfv
->sfv_fd
);
248 while (sfv_len
> 0) {
251 iov_len
= MIN(size
, sfv_len
);
253 aiov
.iov_len
= iov_len
;
254 auio
.uio_loffset
= sfv_off
;
255 auio
.uio_iov
= &aiov
;
257 auio
.uio_resid
= iov_len
;
258 auio
.uio_segflg
= UIO_SYSSPACE
;
259 auio
.uio_llimit
= MAXOFFSET_T
;
260 auio
.uio_fmode
= ffp
->f_flag
;
261 ioflag
= auio
.uio_fmode
&
262 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
265 * If read sync is not asked for,
268 if ((ioflag
& FRSYNC
) == 0)
269 ioflag
&= ~(FSYNC
|FDSYNC
);
270 error
= fop_read(readvp
, &auio
, ioflag
,
273 kmem_free(ptr
, size
);
274 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
276 releasef(sfv
->sfv_fd
);
281 * Check how must data was really read.
282 * Decrement the 'len' and increment the
283 * 'off' appropriately.
285 cnt
= iov_len
- auio
.uio_resid
;
288 * If we were reading a pipe (currently
289 * not implemented), we may now lose
292 kmem_free(ptr
, size
);
293 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
295 releasef(sfv
->sfv_fd
);
303 auio
.uio_loffset
= *fileoff
;
304 auio
.uio_iov
= &aiov
;
306 auio
.uio_resid
= cnt
;
307 auio
.uio_segflg
= UIO_SYSSPACE
;
308 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
309 auio
.uio_fmode
= fflag
;
310 ioflag
= auio
.uio_fmode
&
311 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
312 error
= fop_write(vp
, &auio
, ioflag
,
316 * Check how much data was written. Increment
317 * the 'len' and decrement the 'off' if all
318 * the data was not written.
320 cnt
-= auio
.uio_resid
;
321 sfv_len
+= auio
.uio_resid
;
322 sfv_off
-= auio
.uio_resid
;
323 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)cnt
;
324 if (vp
->v_type
== VREG
)
328 kmem_free(ptr
, size
);
329 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
331 releasef(sfv
->sfv_fd
);
335 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
336 releasef(sfv
->sfv_fd
);
337 kmem_free(ptr
, size
);
345 sendvec64(file_t
*fp
, const struct ksendfilevec64
*vec
, int sfvcnt
,
346 size32_t
*xferred
, int fildes
)
350 const struct ksendfilevec64
*copy_vec
;
351 struct ksendfilevec64 sfv
[SEND_MAX_CHUNK
];
357 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
360 fileoff
= fp
->f_offset
;
363 copy_cnt
= MIN(sfvcnt
, SEND_MAX_CHUNK
);
364 if (copyin(copy_vec
, sfv
, copy_cnt
*
365 sizeof (struct ksendfilevec64
))) {
370 error
= sendvec_chunk64(fp
, &fileoff
, sfv
, copy_cnt
, &count
);
374 copy_vec
+= copy_cnt
;
376 } while (sfvcnt
> 0);
378 if (vp
->v_type
== VREG
)
379 fp
->f_offset
+= count
;
381 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
382 if (copyout(&count
, xferred
, sizeof (count
)))
386 return (set_errno(error
));
392 sendvec_small_chunk(file_t
*fp
, uoff_t
*fileoff
, struct sendfilevec
*sfv
,
393 int copy_cnt
, ssize_t total_size
, int maxblk
, ssize_t
*count
)
404 #ifdef _SYSCALL32_IMPL
405 model_t model
= get_udatamodel();
406 uoff_t maxoff
= (model
== DATAMODEL_ILP32
) ?
407 MAXOFF32_T
: MAXOFFSET_T
;
409 const uoff_t maxoff
= MAXOFF32_T
;
416 size_t size
= total_size
;
424 ASSERT(vp
->v_type
== VSOCK
);
427 /* If nothing to send, return */
431 if (vp
->v_stream
!= NULL
) {
432 wroff
= (int)vp
->v_stream
->sd_wroff
;
433 tail_len
= (int)vp
->v_stream
->sd_tail
;
438 wroff
= so
->so_proto_props
.sopp_wroff
;
439 tail_len
= so
->so_proto_props
.sopp_tail
;
442 extra
= wroff
+ tail_len
;
444 buf_left
= MIN(total_size
, maxblk
);
445 head
= dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
448 head
->b_wptr
= head
->b_rptr
= head
->b_rptr
+ wroff
;
449 bzero(&msg
, sizeof (msg
));
451 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
452 for (i
= 0; i
< copy_cnt
; i
++) {
453 if (ISSIG(curthread
, JUSTLOOKING
)) {
459 * Do similar checks as "write" as we are writing
460 * sfv_len bytes into "vp".
462 sfv_len
= (ssize_t
)sfv
->sfv_len
;
469 /* Check for overflow */
470 #ifdef _SYSCALL32_IMPL
471 if (model
== DATAMODEL_ILP32
) {
472 if (((ssize32_t
)(*count
+ sfv_len
)) < 0) {
478 if ((*count
+ sfv_len
) < 0) {
483 sfv_off
= (uoff_t
)(ulong_t
)sfv
->sfv_off
;
485 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
486 while (sfv_len
> 0) {
489 buf_left
= MIN(total_size
, maxblk
);
490 iov_len
= MIN(buf_left
, sfv_len
);
491 dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
496 dmp
->b_wptr
= dmp
->b_rptr
=
500 iov_len
= MIN(buf_left
, sfv_len
);
503 aiov
.iov_len
= iov_len
;
504 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
505 auio
.uio_loffset
= *fileoff
;
507 auio
.uio_resid
= iov_len
;
508 auio
.uio_iov
= &aiov
;
509 auio
.uio_segflg
= UIO_USERSPACE
;
510 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
511 auio
.uio_fmode
= fflag
;
514 total_size
-= iov_len
;
518 error
= uiomove((caddr_t
)dmp
->b_wptr
,
519 iov_len
, UIO_WRITE
, &auio
);
524 dmp
->b_wptr
+= iov_len
;
530 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
) {
535 if ((ffp
->f_flag
& FREAD
) == 0) {
536 releasef(sfv
->sfv_fd
);
541 readvp
= ffp
->f_vnode
;
542 if (readvp
->v_type
!= VREG
) {
543 releasef(sfv
->sfv_fd
);
549 * No point reading and writing to same vp,
550 * as long as both are regular files. readvp is not
551 * locked; but since we got it from an open file the
552 * contents will be valid during the time of access.
555 if (vn_compare(vp
, readvp
)) {
556 releasef(sfv
->sfv_fd
);
562 * Note: we assume readvp != vp. "vp" is already
563 * locked, and "readvp" must not be.
567 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
568 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
570 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
572 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
576 /* Same checks as in pread */
577 if (sfv_off
> maxoff
) {
578 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
579 releasef(sfv
->sfv_fd
);
583 if (sfv_off
+ sfv_len
> maxoff
) {
584 total_size
-= (sfv_off
+ sfv_len
- maxoff
);
585 sfv_len
= (ssize_t
)((offset_t
)maxoff
-
589 while (sfv_len
> 0) {
592 buf_left
= MIN(total_size
, maxblk
);
593 iov_len
= MIN(buf_left
, sfv_len
);
594 dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
597 V_WRITELOCK_FALSE
, NULL
);
598 releasef(sfv
->sfv_fd
);
602 dmp
->b_wptr
= dmp
->b_rptr
=
606 iov_len
= MIN(buf_left
, sfv_len
);
608 aiov
.iov_base
= (caddr_t
)dmp
->b_wptr
;
609 aiov
.iov_len
= iov_len
;
610 auio
.uio_loffset
= sfv_off
;
611 auio
.uio_iov
= &aiov
;
613 auio
.uio_resid
= iov_len
;
614 auio
.uio_segflg
= UIO_SYSSPACE
;
615 auio
.uio_llimit
= MAXOFFSET_T
;
616 auio
.uio_fmode
= ffp
->f_flag
;
617 ioflag
= auio
.uio_fmode
&
618 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
621 * If read sync is not asked for,
624 if ((ioflag
& FRSYNC
) == 0)
625 ioflag
&= ~(FSYNC
|FDSYNC
);
626 error
= fop_read(readvp
, &auio
, ioflag
,
630 * If we were reading a pipe (currently
631 * not implemented), we may now loose
634 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
636 releasef(sfv
->sfv_fd
);
642 * Check how much data was really read.
643 * Decrement the 'len' and increment the
644 * 'off' appropriately.
646 cnt
= iov_len
- auio
.uio_resid
;
648 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
650 releasef(sfv
->sfv_fd
);
661 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
662 releasef(sfv
->sfv_fd
);
667 ASSERT(total_size
== 0);
668 error
= socket_sendmblk(VTOSO(vp
), &msg
, fflag
, CRED(), &head
);
674 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)size
;
682 sendvec_chunk(file_t
*fp
, uoff_t
*fileoff
, struct sendfilevec
*sfv
,
683 int copy_cnt
, ssize_t
*count
)
694 #ifdef _SYSCALL32_IMPL
695 model_t model
= get_udatamodel();
696 uoff_t maxoff
= (model
== DATAMODEL_ILP32
) ?
697 MAXOFF32_T
: MAXOFFSET_T
;
699 const uoff_t maxoff
= MAXOFF32_T
;
704 int maxblk
, wroff
, tail_len
;
712 if (vp
->v_type
== VSOCK
) {
714 if (vp
->v_stream
!= NULL
) {
716 wroff
= (int)stp
->sd_wroff
;
717 tail_len
= (int)stp
->sd_tail
;
718 maxblk
= (int)stp
->sd_maxblk
;
721 wroff
= so
->so_proto_props
.sopp_wroff
;
722 tail_len
= so
->so_proto_props
.sopp_tail
;
723 maxblk
= so
->so_proto_props
.sopp_maxblk
;
725 extra
= wroff
+ tail_len
;
728 bzero(&msg
, sizeof (msg
));
729 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
730 for (i
= 0; i
< copy_cnt
; i
++) {
731 if (ISSIG(curthread
, JUSTLOOKING
))
735 * Do similar checks as "write" as we are writing
736 * sfv_len bytes into "vp".
738 sfv_len
= (ssize_t
)sfv
->sfv_len
;
745 if (vp
->v_type
== VREG
) {
746 if (*fileoff
>= curproc
->p_fsz_ctl
) {
747 mutex_enter(&curproc
->p_lock
);
749 rctlproc_legacy
[RLIMIT_FSIZE
],
750 curproc
->p_rctls
, curproc
, RCA_SAFE
);
751 mutex_exit(&curproc
->p_lock
);
756 if (*fileoff
>= maxoff
)
759 if (*fileoff
+ sfv_len
> maxoff
)
763 /* Check for overflow */
764 #ifdef _SYSCALL32_IMPL
765 if (model
== DATAMODEL_ILP32
) {
766 if (((ssize32_t
)(*count
+ sfv_len
)) < 0)
770 if ((*count
+ sfv_len
) < 0)
773 sfv_off
= (uoff_t
)(ulong_t
)sfv
->sfv_off
;
775 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
776 if (vp
->v_type
== VSOCK
) {
777 while (sfv_len
> 0) {
782 * Socket filters can limit the mblk
783 * size, so limit reads to maxblk if
784 * there are filters present.
786 if (so
->so_filter_active
> 0 &&
788 iov_len
= MIN(iov_len
, maxblk
);
790 aiov
.iov_len
= iov_len
;
792 (caddr_t
)(uintptr_t)sfv_off
;
794 auio
.uio_iov
= &aiov
;
796 auio
.uio_loffset
= *fileoff
;
797 auio
.uio_segflg
= UIO_USERSPACE
;
798 auio
.uio_fmode
= fflag
;
799 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
800 auio
.uio_resid
= iov_len
;
802 dmp
= allocb(iov_len
+ extra
, BPRI_HI
);
805 dmp
->b_wptr
= dmp
->b_rptr
=
807 error
= uiomove((caddr_t
)dmp
->b_wptr
,
808 iov_len
, UIO_WRITE
, &auio
);
813 dmp
->b_wptr
+= iov_len
;
814 error
= socket_sendmblk(VTOSO(vp
),
815 &msg
, fflag
, CRED(), &dmp
);
822 ttolwp(curthread
)->lwp_ru
.ioch
+=
829 aiov
.iov_len
= sfv_len
;
830 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
832 auio
.uio_iov
= &aiov
;
834 auio
.uio_loffset
= *fileoff
;
835 auio
.uio_segflg
= UIO_USERSPACE
;
836 auio
.uio_fmode
= fflag
;
837 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
838 auio
.uio_resid
= sfv_len
;
840 ioflag
= auio
.uio_fmode
&
841 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
842 while (sfv_len
> 0) {
843 error
= fop_write(vp
, &auio
, ioflag
,
845 cnt
= sfv_len
- auio
.uio_resid
;
847 ttolwp(curthread
)->lwp_ru
.ioch
+=
859 struct vnode
*realvp
;
863 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
)
866 if ((ffp
->f_flag
& FREAD
) == 0) {
867 releasef(sfv
->sfv_fd
);
871 readvp
= ffp
->f_vnode
;
872 if (fop_realvp(readvp
, &realvp
, NULL
) == 0)
874 if (readvp
->v_type
!= VREG
) {
875 releasef(sfv
->sfv_fd
);
880 * No point reading and writing to same vp,
881 * as long as both are regular files. readvp is not
882 * locked; but since we got it from an open file the
883 * contents will be valid during the time of access.
885 if (vn_compare(vp
, readvp
)) {
886 releasef(sfv
->sfv_fd
);
891 * Note: we assume readvp != vp. "vp" is already
892 * locked, and "readvp" must not be.
895 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
896 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
898 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
900 (void) fop_rwlock(readvp
, V_WRITELOCK_FALSE
,
904 /* Same checks as in pread */
905 if (sfv_off
> maxoff
) {
906 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
907 releasef(sfv
->sfv_fd
);
910 if (sfv_off
+ sfv_len
> maxoff
) {
911 sfv_len
= (ssize_t
)((offset_t
)maxoff
-
914 /* Find the native blocksize to transfer data */
915 size
= MIN(vp
->v_vfsp
->vfs_bsize
,
916 readvp
->v_vfsp
->vfs_bsize
);
917 size
= sfv_len
< size
? sfv_len
: size
;
919 if (vp
->v_type
!= VSOCK
) {
921 buf
= kmem_alloc(size
, KM_NOSLEEP
);
923 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
925 releasef(sfv
->sfv_fd
);
931 copyflag
= stp
!= NULL
? stp
->sd_copyflag
:
932 so
->so_proto_props
.sopp_zcopyflag
;
935 * Socket filters can limit the mblk size,
936 * so limit reads to maxblk if there are
939 if (so
->so_filter_active
> 0 &&
941 size
= MIN(size
, maxblk
);
943 if (vn_has_flocks(readvp
) ||
944 readvp
->v_flag
& VNOMAP
||
945 copyflag
& STZCVMUNSAFE
) {
947 } else if (copyflag
& STZCVMSAFE
) {
951 if (socket_setsockopt(VTOSO(vp
),
952 SOL_SOCKET
, SO_SND_COPYAVOID
,
953 &on
, sizeof (on
), CRED()) == 0)
961 nowait
= (sfv
->sfv_flag
& SFV_NOWAIT
) != 0;
962 error
= snf_segmap(fp
, readvp
, sfv_off
,
963 (uoff_t
)sfv_len
, (ssize_t
*)&cnt
,
965 releasef(sfv
->sfv_fd
);
973 while (sfv_len
> 0) {
976 iov_len
= MIN(size
, sfv_len
);
978 if (vp
->v_type
== VSOCK
) {
979 dmp
= allocb(iov_len
+ extra
, BPRI_HI
);
982 V_WRITELOCK_FALSE
, NULL
);
983 releasef(sfv
->sfv_fd
);
986 dmp
->b_wptr
= dmp
->b_rptr
=
988 ptr
= (caddr_t
)dmp
->b_rptr
;
994 aiov
.iov_len
= iov_len
;
995 auio
.uio_loffset
= sfv_off
;
996 auio
.uio_iov
= &aiov
;
998 auio
.uio_resid
= iov_len
;
999 auio
.uio_segflg
= UIO_SYSSPACE
;
1000 auio
.uio_llimit
= MAXOFFSET_T
;
1001 auio
.uio_fmode
= ffp
->f_flag
;
1002 ioflag
= auio
.uio_fmode
&
1003 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1006 * If read sync is not asked for,
1009 if ((ioflag
& FRSYNC
) == 0)
1010 ioflag
&= ~(FSYNC
|FDSYNC
);
1011 error
= fop_read(readvp
, &auio
, ioflag
,
1015 * If we were reading a pipe (currently
1016 * not implemented), we may now lose
1019 if (vp
->v_type
== VSOCK
)
1022 kmem_free(buf
, size
);
1023 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
1025 releasef(sfv
->sfv_fd
);
1030 * Check how much data was really read.
1031 * Decrement the 'len' and increment the
1032 * 'off' appropriately.
1034 cnt
= iov_len
- auio
.uio_resid
;
1036 if (vp
->v_type
== VSOCK
)
1039 kmem_free(buf
, size
);
1040 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
,
1042 releasef(sfv
->sfv_fd
);
1048 if (vp
->v_type
== VSOCK
) {
1049 dmp
->b_wptr
= dmp
->b_rptr
+ cnt
;
1051 error
= socket_sendmblk(VTOSO(vp
),
1052 &msg
, fflag
, CRED(), &dmp
);
1057 fop_rwunlock(readvp
,
1058 V_WRITELOCK_FALSE
, NULL
);
1059 releasef(sfv
->sfv_fd
);
1063 ttolwp(curthread
)->lwp_ru
.ioch
+=
1068 aiov
.iov_base
= ptr
;
1070 auio
.uio_loffset
= *fileoff
;
1071 auio
.uio_resid
= cnt
;
1072 auio
.uio_iov
= &aiov
;
1073 auio
.uio_iovcnt
= 1;
1074 auio
.uio_segflg
= UIO_SYSSPACE
;
1075 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
1076 auio
.uio_fmode
= fflag
;
1077 ioflag
= auio
.uio_fmode
&
1078 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1079 error
= fop_write(vp
, &auio
, ioflag
,
1083 * Check how much data was written.
1084 * Increment the 'len' and decrement the
1085 * 'off' if all the data was not
1088 cnt
-= auio
.uio_resid
;
1089 sfv_len
+= auio
.uio_resid
;
1090 sfv_off
-= auio
.uio_resid
;
1091 ttolwp(curthread
)->lwp_ru
.ioch
+=
1096 kmem_free(buf
, size
);
1097 fop_rwunlock(readvp
,
1098 V_WRITELOCK_FALSE
, NULL
);
1099 releasef(sfv
->sfv_fd
);
1105 kmem_free(buf
, size
);
1108 fop_rwunlock(readvp
, V_WRITELOCK_FALSE
, NULL
);
1109 releasef(sfv
->sfv_fd
);
1117 sendfilev(int opcode
, int fildes
, const struct sendfilevec
*vec
, int sfvcnt
,
1121 int first_vector_error
= 0;
1127 const struct sendfilevec
*copy_vec
;
1128 struct sendfilevec sfv
[SEND_MAX_CHUNK
];
1130 #ifdef _SYSCALL32_IMPL
1131 struct ksendfilevec32 sfv32
[SEND_MAX_CHUNK
];
1135 boolean_t is_sock
= B_FALSE
;
1139 return (set_errno(EINVAL
));
1141 if ((fp
= getf(fildes
)) == NULL
)
1142 return (set_errno(EBADF
));
1144 if (((fp
->f_flag
) & FWRITE
) == 0) {
1149 fileoff
= fp
->f_offset
;
1152 switch (vp
->v_type
) {
1156 if (SOCK_IS_NONSTR(so
)) {
1157 maxblk
= so
->so_proto_props
.sopp_maxblk
;
1159 maxblk
= (int)vp
->v_stream
->sd_maxblk
;
1163 * We need to make sure that the socket that we're sending on
1164 * supports sendfile behavior. sockfs doesn't know that the APIs
1165 * we want to use are coming from sendfile, so we can't rely on
1166 * it to check for us.
1168 if ((so
->so_mode
& SM_SENDFILESUPP
) == 0) {
1183 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1185 return (sendvec64(fp
, (struct ksendfilevec64
*)vec
, sfvcnt
,
1186 (size32_t
*)xferred
, fildes
));
1193 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
1198 copy_cnt
= MIN(sfvcnt
, SEND_MAX_CHUNK
);
1199 #ifdef _SYSCALL32_IMPL
1200 /* 32-bit callers need to have their iovec expanded. */
1201 if (get_udatamodel() == DATAMODEL_ILP32
) {
1202 if (copyin(copy_vec
, sfv32
,
1203 copy_cnt
* sizeof (ksendfilevec32_t
))) {
1208 for (i
= 0; i
< copy_cnt
; i
++) {
1209 sfv
[i
].sfv_fd
= sfv32
[i
].sfv_fd
;
1211 (off_t
)(uint32_t)sfv32
[i
].sfv_off
;
1212 sfv
[i
].sfv_len
= (size_t)sfv32
[i
].sfv_len
;
1213 total_size
+= sfv
[i
].sfv_len
;
1214 sfv
[i
].sfv_flag
= sfv32
[i
].sfv_flag
;
1216 * Individual elements of the vector must not
1217 * wrap or overflow, as later math is signed.
1218 * Equally total_size needs to be checked after
1219 * each vector is added in, to be sure that
1220 * rogue values haven't overflowed the counter.
1222 if (((ssize32_t
)sfv
[i
].sfv_len
< 0) ||
1223 ((ssize32_t
)total_size
< 0)) {
1225 * Truncate the vector to send data
1226 * described by elements before the
1230 first_vector_error
= EINVAL
;
1231 /* total_size can't be trusted */
1232 if ((ssize32_t
)total_size
< 0)
1237 /* Nothing to do, process errors */
1243 if (copyin(copy_vec
, sfv
,
1244 copy_cnt
* sizeof (sendfilevec_t
))) {
1249 for (i
= 0; i
< copy_cnt
; i
++) {
1250 total_size
+= sfv
[i
].sfv_len
;
1252 * Individual elements of the vector must not
1253 * wrap or overflow, as later math is signed.
1254 * Equally total_size needs to be checked after
1255 * each vector is added in, to be sure that
1256 * rogue values haven't overflowed the counter.
1258 if (((ssize_t
)sfv
[i
].sfv_len
< 0) ||
1261 * Truncate the vector to send data
1262 * described by elements before the
1266 first_vector_error
= EINVAL
;
1267 /* total_size can't be trusted */
1273 /* Nothing to do, process errors */
1276 #ifdef _SYSCALL32_IMPL
1281 * The task between deciding to use sendvec_small_chunk
1282 * and sendvec_chunk is dependant on multiple things:
1284 * i) latency is important for smaller files. So if the
1285 * data is smaller than 'tcp_slow_start_initial' times
1286 * maxblk, then use sendvec_small_chunk which creates
1287 * maxblk size mblks and chains them together and sends
1288 * them to TCP in one shot. It also leaves 'wroff' size
1289 * space for the headers in each mblk.
1291 * ii) for total size bigger than 'tcp_slow_start_initial'
1292 * time maxblk, its probably real file data which is
1293 * dominating. So its better to use sendvec_chunk because
1294 * performance goes to dog if we don't do pagesize reads.
1295 * sendvec_chunk will do pagesize reads and write them
1296 * in pagesize mblks to TCP.
1298 * Side Notes: A write to file has not been optimized.
1299 * Future zero copy code will plugin into sendvec_chunk
1300 * only because doing zero copy for files smaller then
1301 * pagesize is useless.
1304 if ((total_size
<= (4 * maxblk
)) &&
1306 error
= sendvec_small_chunk(fp
,
1307 &fileoff
, sfv
, copy_cnt
,
1308 total_size
, maxblk
, &count
);
1310 error
= sendvec_chunk(fp
, &fileoff
,
1311 sfv
, copy_cnt
, &count
);
1314 ASSERT(vp
->v_type
== VREG
);
1315 error
= sendvec_chunk(fp
, &fileoff
, sfv
, copy_cnt
,
1320 #ifdef _SYSCALL32_IMPL
1321 if (get_udatamodel() == DATAMODEL_ILP32
) {
1322 copy_vec
= (const struct sendfilevec
*)
1324 (copy_cnt
* sizeof (ksendfilevec32_t
)));
1327 copy_vec
+= copy_cnt
;
1330 /* Process all vector members up to first error */
1331 } while ((sfvcnt
> 0) && first_vector_error
== 0 && error
== 0);
1333 if (vp
->v_type
== VREG
)
1334 fp
->f_offset
+= count
;
1336 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
1338 #ifdef _SYSCALL32_IMPL
1339 if (get_udatamodel() == DATAMODEL_ILP32
) {
1340 ssize32_t count32
= (ssize32_t
)count
;
1341 if (copyout(&count32
, xferred
, sizeof (count32
)))
1345 return (set_errno(error
));
1346 if (first_vector_error
!= 0)
1347 return (set_errno(first_vector_error
));
1351 if (copyout(&count
, xferred
, sizeof (count
)))
1355 return (set_errno(error
));
1356 if (first_vector_error
!= 0)
1357 return (set_errno(first_vector_error
));
1362 return (set_errno(error
));