1 // SPDX-License-Identifier: GPL-2.0-only
3 * Common NFS I/O operations for the pnfs file based
6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
8 * Tom Haynes <loghyr@primarydata.com>
11 #include <linux/nfs_fs.h>
12 #include <linux/nfs_page.h>
13 #include <linux/sunrpc/addr.h>
14 #include <linux/module.h>
16 #include "nfs4session.h"
20 #define NFSDBG_FACILITY NFSDBG_PNFS
22 void pnfs_generic_rw_release(void *data
)
24 struct nfs_pgio_header
*hdr
= data
;
26 nfs_put_client(hdr
->ds_clp
);
27 hdr
->mds_ops
->rpc_release(data
);
29 EXPORT_SYMBOL_GPL(pnfs_generic_rw_release
);
31 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
32 void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data
*data
)
34 struct nfs_writeverf
*verf
= data
->res
.verf
;
36 data
->task
.tk_status
= 0;
37 memset(&verf
->verifier
, 0, sizeof(verf
->verifier
));
38 verf
->committed
= NFS_UNSTABLE
;
40 EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes
);
42 void pnfs_generic_write_commit_done(struct rpc_task
*task
, void *data
)
44 struct nfs_commit_data
*wdata
= data
;
46 /* Note this may cause RPC to be resent */
47 wdata
->mds_ops
->rpc_call_done(task
, data
);
49 EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done
);
51 void pnfs_generic_commit_release(void *calldata
)
53 struct nfs_commit_data
*data
= calldata
;
55 data
->completion_ops
->completion(data
);
56 pnfs_put_lseg(data
->lseg
);
57 nfs_put_client(data
->ds_clp
);
58 nfs_commitdata_release(data
);
60 EXPORT_SYMBOL_GPL(pnfs_generic_commit_release
);
62 static struct pnfs_layout_segment
*
63 pnfs_free_bucket_lseg(struct pnfs_commit_bucket
*bucket
)
65 if (list_empty(&bucket
->committing
) && list_empty(&bucket
->written
)) {
66 struct pnfs_layout_segment
*freeme
= bucket
->lseg
;
73 /* The generic layer is about to remove the req from the commit list.
74 * If this will make the bucket empty, it will need to put the lseg reference.
75 * Note this must be called holding nfsi->commit_mutex
78 pnfs_generic_clear_request_commit(struct nfs_page
*req
,
79 struct nfs_commit_info
*cinfo
)
81 struct pnfs_commit_bucket
*bucket
= NULL
;
83 if (!test_and_clear_bit(PG_COMMIT_TO_DS
, &req
->wb_flags
))
85 cinfo
->ds
->nwritten
--;
86 if (list_is_singular(&req
->wb_list
))
87 bucket
= list_first_entry(&req
->wb_list
,
88 struct pnfs_commit_bucket
, written
);
90 nfs_request_remove_commit_list(req
, cinfo
);
92 pnfs_put_lseg(pnfs_free_bucket_lseg(bucket
));
94 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit
);
96 struct pnfs_commit_array
*
97 pnfs_alloc_commit_array(size_t n
, gfp_t gfp_flags
)
99 struct pnfs_commit_array
*p
;
100 struct pnfs_commit_bucket
*b
;
102 p
= kmalloc(struct_size(p
, buckets
, n
), gfp_flags
);
106 INIT_LIST_HEAD(&p
->cinfo_list
);
107 INIT_LIST_HEAD(&p
->lseg_list
);
109 for (b
= &p
->buckets
[0]; n
!= 0; b
++, n
--) {
110 INIT_LIST_HEAD(&b
->written
);
111 INIT_LIST_HEAD(&b
->committing
);
113 b
->direct_verf
.committed
= NFS_INVALID_STABLE_HOW
;
117 EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array
);
120 pnfs_free_commit_array(struct pnfs_commit_array
*p
)
124 EXPORT_SYMBOL_GPL(pnfs_free_commit_array
);
126 static struct pnfs_commit_array
*
127 pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info
*fl_cinfo
,
128 struct pnfs_layout_segment
*lseg
)
130 struct pnfs_commit_array
*array
;
132 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
133 if (array
->lseg
== lseg
)
139 struct pnfs_commit_array
*
140 pnfs_add_commit_array(struct pnfs_ds_commit_info
*fl_cinfo
,
141 struct pnfs_commit_array
*new,
142 struct pnfs_layout_segment
*lseg
)
144 struct pnfs_commit_array
*array
;
146 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
150 refcount_set(&new->refcount
, 1);
151 list_add_rcu(&new->cinfo_list
, &fl_cinfo
->commits
);
152 list_add(&new->lseg_list
, &lseg
->pls_commits
);
155 EXPORT_SYMBOL_GPL(pnfs_add_commit_array
);
157 static struct pnfs_commit_array
*
158 pnfs_lookup_commit_array(struct pnfs_ds_commit_info
*fl_cinfo
,
159 struct pnfs_layout_segment
*lseg
)
161 struct pnfs_commit_array
*array
;
164 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
167 fl_cinfo
->ops
->setup_ds_info(fl_cinfo
, lseg
);
169 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
176 pnfs_release_commit_array_locked(struct pnfs_commit_array
*array
)
178 list_del_rcu(&array
->cinfo_list
);
179 list_del(&array
->lseg_list
);
180 pnfs_free_commit_array(array
);
184 pnfs_put_commit_array_locked(struct pnfs_commit_array
*array
)
186 if (refcount_dec_and_test(&array
->refcount
))
187 pnfs_release_commit_array_locked(array
);
191 pnfs_put_commit_array(struct pnfs_commit_array
*array
, struct inode
*inode
)
193 if (refcount_dec_and_lock(&array
->refcount
, &inode
->i_lock
)) {
194 pnfs_release_commit_array_locked(array
);
195 spin_unlock(&inode
->i_lock
);
199 static struct pnfs_commit_array
*
200 pnfs_get_commit_array(struct pnfs_commit_array
*array
)
202 if (refcount_inc_not_zero(&array
->refcount
))
208 pnfs_remove_and_free_commit_array(struct pnfs_commit_array
*array
)
211 list_del_init(&array
->lseg_list
);
212 pnfs_put_commit_array_locked(array
);
216 pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info
*fl_cinfo
,
217 struct pnfs_layout_segment
*lseg
)
219 struct pnfs_commit_array
*array
, *tmp
;
221 list_for_each_entry_safe(array
, tmp
, &lseg
->pls_commits
, lseg_list
)
222 pnfs_remove_and_free_commit_array(array
);
224 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg
);
227 pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info
*fl_cinfo
)
229 struct pnfs_commit_array
*array
, *tmp
;
231 list_for_each_entry_safe(array
, tmp
, &fl_cinfo
->commits
, cinfo_list
)
232 pnfs_remove_and_free_commit_array(array
);
234 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy
);
237 * Locks the nfs_page requests for commit and moves them to
238 * @bucket->committing.
241 pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket
*bucket
,
242 struct nfs_commit_info
*cinfo
,
245 struct list_head
*src
= &bucket
->written
;
246 struct list_head
*dst
= &bucket
->committing
;
249 lockdep_assert_held(&NFS_I(cinfo
->inode
)->commit_mutex
);
250 ret
= nfs_scan_commit_list(src
, dst
, cinfo
, max
);
252 cinfo
->ds
->nwritten
-= ret
;
253 cinfo
->ds
->ncommitting
+= ret
;
258 static int pnfs_bucket_scan_array(struct nfs_commit_info
*cinfo
,
259 struct pnfs_commit_bucket
*buckets
,
260 unsigned int nbuckets
,
266 for (i
= 0; i
< nbuckets
&& max
!= 0; i
++) {
267 cnt
= pnfs_bucket_scan_ds_commit_list(&buckets
[i
], cinfo
, max
);
274 /* Move reqs from written to committing lists, returning count
277 int pnfs_generic_scan_commit_lists(struct nfs_commit_info
*cinfo
, int max
)
279 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
280 struct pnfs_commit_array
*array
;
284 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
285 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
288 cnt
= pnfs_bucket_scan_array(cinfo
, array
->buckets
,
289 array
->nbuckets
, max
);
291 pnfs_put_commit_array(array
, cinfo
->inode
);
300 EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists
);
303 pnfs_bucket_recover_commit_reqs(struct list_head
*dst
,
304 struct pnfs_commit_bucket
*buckets
,
305 unsigned int nbuckets
,
306 struct nfs_commit_info
*cinfo
)
308 struct pnfs_commit_bucket
*b
;
309 struct pnfs_layout_segment
*freeme
;
310 unsigned int nwritten
, ret
= 0;
314 for (i
= 0, b
= buckets
; i
< nbuckets
; i
++, b
++) {
315 nwritten
= nfs_scan_commit_list(&b
->written
, dst
, cinfo
, 0);
319 freeme
= pnfs_free_bucket_lseg(b
);
321 pnfs_put_lseg(freeme
);
328 /* Pull everything off the committing lists and dump into @dst. */
329 void pnfs_generic_recover_commit_reqs(struct list_head
*dst
,
330 struct nfs_commit_info
*cinfo
)
332 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
333 struct pnfs_commit_array
*array
;
334 unsigned int nwritten
;
336 lockdep_assert_held(&NFS_I(cinfo
->inode
)->commit_mutex
);
338 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
339 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
342 nwritten
= pnfs_bucket_recover_commit_reqs(dst
,
347 pnfs_put_commit_array(array
, cinfo
->inode
);
348 fl_cinfo
->nwritten
-= nwritten
;
352 EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs
);
354 static struct pnfs_layout_segment
*
355 pnfs_bucket_get_committing(struct list_head
*head
,
356 struct pnfs_commit_bucket
*bucket
,
357 struct nfs_commit_info
*cinfo
)
359 struct pnfs_layout_segment
*lseg
;
360 struct list_head
*pos
;
362 list_for_each(pos
, &bucket
->committing
)
363 cinfo
->ds
->ncommitting
--;
364 list_splice_init(&bucket
->committing
, head
);
365 lseg
= pnfs_free_bucket_lseg(bucket
);
367 lseg
= pnfs_get_lseg(bucket
->lseg
);
371 static struct nfs_commit_data
*
372 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket
*bucket
,
373 struct nfs_commit_info
*cinfo
)
375 struct nfs_commit_data
*data
= nfs_commitdata_alloc();
379 data
->lseg
= pnfs_bucket_get_committing(&data
->pages
, bucket
, cinfo
);
383 static void pnfs_generic_retry_commit(struct pnfs_commit_bucket
*buckets
,
384 unsigned int nbuckets
,
385 struct nfs_commit_info
*cinfo
,
388 struct pnfs_commit_bucket
*bucket
;
389 struct pnfs_layout_segment
*freeme
;
392 for (bucket
= buckets
; idx
< nbuckets
; bucket
++, idx
++) {
393 if (list_empty(&bucket
->committing
))
395 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
396 freeme
= pnfs_bucket_get_committing(&pages
, bucket
, cinfo
);
397 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
398 nfs_retry_commit(&pages
, freeme
, cinfo
, idx
);
399 pnfs_put_lseg(freeme
);
404 pnfs_bucket_alloc_ds_commits(struct list_head
*list
,
405 struct pnfs_commit_bucket
*buckets
,
406 unsigned int nbuckets
,
407 struct nfs_commit_info
*cinfo
)
409 struct pnfs_commit_bucket
*bucket
;
410 struct nfs_commit_data
*data
;
412 unsigned int nreq
= 0;
414 for (i
= 0, bucket
= buckets
; i
< nbuckets
; i
++, bucket
++) {
415 if (list_empty(&bucket
->committing
))
417 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
418 if (!list_empty(&bucket
->committing
)) {
419 data
= pnfs_bucket_fetch_commitdata(bucket
, cinfo
);
422 data
->ds_commit_index
= i
;
423 list_add_tail(&data
->list
, list
);
426 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
430 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
431 /* Clean up on error */
432 pnfs_generic_retry_commit(buckets
, nbuckets
, cinfo
, i
);
437 pnfs_alloc_ds_commits_list(struct list_head
*list
,
438 struct pnfs_ds_commit_info
*fl_cinfo
,
439 struct nfs_commit_info
*cinfo
)
441 struct pnfs_commit_array
*array
;
442 unsigned int ret
= 0;
445 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
446 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
449 ret
+= pnfs_bucket_alloc_ds_commits(list
, array
->buckets
,
450 array
->nbuckets
, cinfo
);
452 pnfs_put_commit_array(array
, cinfo
->inode
);
458 /* This follows nfs_commit_list pretty closely */
460 pnfs_generic_commit_pagelist(struct inode
*inode
, struct list_head
*mds_pages
,
461 int how
, struct nfs_commit_info
*cinfo
,
462 int (*initiate_commit
)(struct nfs_commit_data
*data
,
465 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
466 struct nfs_commit_data
*data
, *tmp
;
468 unsigned int nreq
= 0;
470 if (!list_empty(mds_pages
)) {
471 data
= nfs_commitdata_alloc();
473 nfs_retry_commit(mds_pages
, NULL
, cinfo
, -1);
476 data
->ds_commit_index
= -1;
477 list_splice_init(mds_pages
, &data
->pages
);
478 list_add_tail(&data
->list
, &list
);
482 nreq
+= pnfs_alloc_ds_commits_list(&list
, fl_cinfo
, cinfo
);
486 list_for_each_entry_safe(data
, tmp
, &list
, list
) {
487 list_del(&data
->list
);
488 if (data
->ds_commit_index
< 0) {
489 nfs_init_commit(data
, NULL
, NULL
, cinfo
);
490 nfs_initiate_commit(NFS_CLIENT(inode
), data
,
491 NFS_PROTO(data
->inode
),
493 RPC_TASK_CRED_NOREF
, NULL
);
495 nfs_init_commit(data
, NULL
, data
->lseg
, cinfo
);
496 initiate_commit(data
, how
);
500 return PNFS_ATTEMPTED
;
502 EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist
);
507 * Data servers can be mapped to different device ids.
508 * nfs4_pnfs_ds reference counting
509 * - set to 1 on allocation
510 * - incremented when a device id maps a data server already in the cache.
511 * - decremented when deviceid is removed from the cache.
513 static DEFINE_SPINLOCK(nfs4_ds_cache_lock
);
514 static LIST_HEAD(nfs4_data_server_cache
);
518 print_ds(struct nfs4_pnfs_ds
*ds
)
521 printk(KERN_WARNING
"%s NULL device\n", __func__
);
524 printk(KERN_WARNING
" ds %s\n"
527 " cl_exchange_flags %x\n",
529 refcount_read(&ds
->ds_count
), ds
->ds_clp
,
530 ds
->ds_clp
? ds
->ds_clp
->cl_exchange_flags
: 0);
534 same_sockaddr(struct sockaddr
*addr1
, struct sockaddr
*addr2
)
536 struct sockaddr_in
*a
, *b
;
537 struct sockaddr_in6
*a6
, *b6
;
539 if (addr1
->sa_family
!= addr2
->sa_family
)
542 switch (addr1
->sa_family
) {
544 a
= (struct sockaddr_in
*)addr1
;
545 b
= (struct sockaddr_in
*)addr2
;
547 if (a
->sin_addr
.s_addr
== b
->sin_addr
.s_addr
&&
548 a
->sin_port
== b
->sin_port
)
553 a6
= (struct sockaddr_in6
*)addr1
;
554 b6
= (struct sockaddr_in6
*)addr2
;
556 /* LINKLOCAL addresses must have matching scope_id */
557 if (ipv6_addr_src_scope(&a6
->sin6_addr
) ==
558 IPV6_ADDR_SCOPE_LINKLOCAL
&&
559 a6
->sin6_scope_id
!= b6
->sin6_scope_id
)
562 if (ipv6_addr_equal(&a6
->sin6_addr
, &b6
->sin6_addr
) &&
563 a6
->sin6_port
== b6
->sin6_port
)
568 dprintk("%s: unhandled address family: %u\n",
569 __func__
, addr1
->sa_family
);
577 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
581 _same_data_server_addrs_locked(const struct list_head
*dsaddrs1
,
582 const struct list_head
*dsaddrs2
)
584 struct nfs4_pnfs_ds_addr
*da1
, *da2
;
585 struct sockaddr
*sa1
, *sa2
;
588 list_for_each_entry(da1
, dsaddrs1
, da_node
) {
589 sa1
= (struct sockaddr
*)&da1
->da_addr
;
591 list_for_each_entry(da2
, dsaddrs2
, da_node
) {
592 sa2
= (struct sockaddr
*)&da2
->da_addr
;
593 match
= same_sockaddr(sa1
, sa2
);
604 * Lookup DS by addresses. nfs4_ds_cache_lock is held
606 static struct nfs4_pnfs_ds
*
607 _data_server_lookup_locked(const struct list_head
*dsaddrs
)
609 struct nfs4_pnfs_ds
*ds
;
611 list_for_each_entry(ds
, &nfs4_data_server_cache
, ds_node
)
612 if (_same_data_server_addrs_locked(&ds
->ds_addrs
, dsaddrs
))
617 static struct nfs4_pnfs_ds_addr
*nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags
)
619 struct nfs4_pnfs_ds_addr
*da
= kzalloc(sizeof(*da
), gfp_flags
);
621 INIT_LIST_HEAD(&da
->da_node
);
625 static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr
*da
)
627 kfree(da
->da_remotestr
);
632 static void destroy_ds(struct nfs4_pnfs_ds
*ds
)
634 struct nfs4_pnfs_ds_addr
*da
;
636 dprintk("--> %s\n", __func__
);
640 nfs_put_client(ds
->ds_clp
);
642 while (!list_empty(&ds
->ds_addrs
)) {
643 da
= list_first_entry(&ds
->ds_addrs
,
644 struct nfs4_pnfs_ds_addr
,
646 list_del_init(&da
->da_node
);
647 nfs4_pnfs_ds_addr_free(da
);
650 kfree(ds
->ds_remotestr
);
654 void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds
*ds
)
656 if (refcount_dec_and_lock(&ds
->ds_count
,
657 &nfs4_ds_cache_lock
)) {
658 list_del_init(&ds
->ds_node
);
659 spin_unlock(&nfs4_ds_cache_lock
);
663 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put
);
666 * Create a string with a human readable address and port to avoid
667 * complicated setup around many dprinks.
670 nfs4_pnfs_remotestr(struct list_head
*dsaddrs
, gfp_t gfp_flags
)
672 struct nfs4_pnfs_ds_addr
*da
;
677 len
= 3; /* '{', '}' and eol */
678 list_for_each_entry(da
, dsaddrs
, da_node
) {
679 len
+= strlen(da
->da_remotestr
) + 1; /* string plus comma */
682 remotestr
= kzalloc(len
, gfp_flags
);
689 list_for_each_entry(da
, dsaddrs
, da_node
) {
690 size_t ll
= strlen(da
->da_remotestr
);
695 memcpy(p
, da
->da_remotestr
, ll
);
715 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
716 * uncached and return cached struct nfs4_pnfs_ds.
718 struct nfs4_pnfs_ds
*
719 nfs4_pnfs_ds_add(struct list_head
*dsaddrs
, gfp_t gfp_flags
)
721 struct nfs4_pnfs_ds
*tmp_ds
, *ds
= NULL
;
724 if (list_empty(dsaddrs
)) {
725 dprintk("%s: no addresses defined\n", __func__
);
729 ds
= kzalloc(sizeof(*ds
), gfp_flags
);
733 /* this is only used for debugging, so it's ok if its NULL */
734 remotestr
= nfs4_pnfs_remotestr(dsaddrs
, gfp_flags
);
736 spin_lock(&nfs4_ds_cache_lock
);
737 tmp_ds
= _data_server_lookup_locked(dsaddrs
);
738 if (tmp_ds
== NULL
) {
739 INIT_LIST_HEAD(&ds
->ds_addrs
);
740 list_splice_init(dsaddrs
, &ds
->ds_addrs
);
741 ds
->ds_remotestr
= remotestr
;
742 refcount_set(&ds
->ds_count
, 1);
743 INIT_LIST_HEAD(&ds
->ds_node
);
745 list_add(&ds
->ds_node
, &nfs4_data_server_cache
);
746 dprintk("%s add new data server %s\n", __func__
,
751 refcount_inc(&tmp_ds
->ds_count
);
752 dprintk("%s data server %s found, inc'ed ds_count to %d\n",
753 __func__
, tmp_ds
->ds_remotestr
,
754 refcount_read(&tmp_ds
->ds_count
));
757 spin_unlock(&nfs4_ds_cache_lock
);
761 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add
);
763 static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds
*ds
)
766 return wait_on_bit(&ds
->ds_state
, NFS4DS_CONNECTING
, TASK_KILLABLE
);
769 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds
*ds
)
771 smp_mb__before_atomic();
772 clear_and_wake_up_bit(NFS4DS_CONNECTING
, &ds
->ds_state
);
775 static struct nfs_client
*(*get_v3_ds_connect
)(
776 struct nfs_server
*mds_srv
,
777 const struct sockaddr_storage
*ds_addr
,
780 unsigned int ds_timeo
,
781 unsigned int ds_retrans
);
783 static bool load_v3_ds_connect(void)
785 if (!get_v3_ds_connect
) {
786 get_v3_ds_connect
= symbol_request(nfs3_set_ds_client
);
787 WARN_ON_ONCE(!get_v3_ds_connect
);
790 return(get_v3_ds_connect
!= NULL
);
793 void nfs4_pnfs_v3_ds_connect_unload(void)
795 if (get_v3_ds_connect
) {
796 symbol_put(nfs3_set_ds_client
);
797 get_v3_ds_connect
= NULL
;
801 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server
*mds_srv
,
802 struct nfs4_pnfs_ds
*ds
,
804 unsigned int retrans
)
806 struct nfs_client
*clp
= ERR_PTR(-EIO
);
807 struct nfs4_pnfs_ds_addr
*da
;
808 unsigned long connect_timeout
= timeo
* (retrans
+ 1) * HZ
/ 10;
811 dprintk("--> %s DS %s\n", __func__
, ds
->ds_remotestr
);
813 if (!load_v3_ds_connect())
814 return -EPROTONOSUPPORT
;
816 list_for_each_entry(da
, &ds
->ds_addrs
, da_node
) {
817 dprintk("%s: DS %s: trying address %s\n",
818 __func__
, ds
->ds_remotestr
, da
->da_remotestr
);
821 struct xprt_create xprt_args
= {
822 .ident
= da
->da_transport
,
824 .dstaddr
= (struct sockaddr
*)&da
->da_addr
,
825 .addrlen
= da
->da_addrlen
,
826 .servername
= clp
->cl_hostname
,
827 .connect_timeout
= connect_timeout
,
828 .reconnect_timeout
= connect_timeout
,
831 if (da
->da_transport
!= clp
->cl_proto
)
833 if (da
->da_addr
.ss_family
!= clp
->cl_addr
.ss_family
)
835 /* Add this address as an alias */
836 rpc_clnt_add_xprt(clp
->cl_rpcclient
, &xprt_args
,
837 rpc_clnt_test_and_add_xprt
, NULL
);
840 clp
= get_v3_ds_connect(mds_srv
,
842 da
->da_addrlen
, da
->da_transport
,
846 clp
->cl_rpcclient
->cl_softerr
= 0;
847 clp
->cl_rpcclient
->cl_softrtry
= 0;
851 status
= PTR_ERR(clp
);
856 WRITE_ONCE(ds
->ds_clp
, clp
);
857 dprintk("%s [new] addr: %s\n", __func__
, ds
->ds_remotestr
);
862 static int _nfs4_pnfs_v4_ds_connect(struct nfs_server
*mds_srv
,
863 struct nfs4_pnfs_ds
*ds
,
865 unsigned int retrans
,
868 struct nfs_client
*clp
= ERR_PTR(-EIO
);
869 struct nfs4_pnfs_ds_addr
*da
;
872 dprintk("--> %s DS %s\n", __func__
, ds
->ds_remotestr
);
874 list_for_each_entry(da
, &ds
->ds_addrs
, da_node
) {
877 dprintk("%s: DS %s: trying address %s\n",
878 __func__
, ds
->ds_remotestr
, da
->da_remotestr
);
880 if (!IS_ERR(clp
) && clp
->cl_mvops
->session_trunk
) {
881 struct xprt_create xprt_args
= {
882 .ident
= da
->da_transport
,
884 .dstaddr
= (struct sockaddr
*)&da
->da_addr
,
885 .addrlen
= da
->da_addrlen
,
886 .servername
= clp
->cl_hostname
,
887 .xprtsec
= clp
->cl_xprtsec
,
889 struct nfs4_add_xprt_data xprtdata
= {
892 struct rpc_add_xprt_test rpcdata
= {
893 .add_xprt_test
= clp
->cl_mvops
->session_trunk
,
897 if (da
->da_transport
!= clp
->cl_proto
&&
898 clp
->cl_proto
!= XPRT_TRANSPORT_TCP_TLS
)
900 if (da
->da_transport
== XPRT_TRANSPORT_TCP
&&
901 mds_srv
->nfs_client
->cl_proto
==
902 XPRT_TRANSPORT_TCP_TLS
) {
903 struct sockaddr
*addr
=
904 (struct sockaddr
*)&da
->da_addr
;
905 struct sockaddr_in
*sin
=
906 (struct sockaddr_in
*)&da
->da_addr
;
907 struct sockaddr_in6
*sin6
=
908 (struct sockaddr_in6
*)&da
->da_addr
;
910 /* for NFS with TLS we need to supply a correct
911 * servername of the trunked transport, not the
912 * servername of the main transport stored in
913 * clp->cl_hostname. And set the protocol to
914 * indicate to use TLS
916 servername
[0] = '\0';
917 switch(addr
->sa_family
) {
919 snprintf(servername
, sizeof(servername
),
920 "%pI4", &sin
->sin_addr
.s_addr
);
923 snprintf(servername
, sizeof(servername
),
924 "%pI6", &sin6
->sin6_addr
);
927 /* do not consider this address */
930 xprt_args
.ident
= XPRT_TRANSPORT_TCP_TLS
;
931 xprt_args
.servername
= servername
;
933 if (da
->da_addr
.ss_family
!= clp
->cl_addr
.ss_family
)
937 * Test this address for session trunking and
940 xprtdata
.cred
= nfs4_get_clid_cred(clp
);
941 rpc_clnt_add_xprt(clp
->cl_rpcclient
, &xprt_args
,
942 rpc_clnt_setup_test_and_add_xprt
,
945 put_cred(xprtdata
.cred
);
947 if (da
->da_transport
== XPRT_TRANSPORT_TCP
&&
948 mds_srv
->nfs_client
->cl_proto
==
949 XPRT_TRANSPORT_TCP_TLS
)
950 da
->da_transport
= XPRT_TRANSPORT_TCP_TLS
;
951 clp
= nfs4_set_ds_client(mds_srv
,
954 da
->da_transport
, timeo
,
955 retrans
, minor_version
);
959 status
= nfs4_init_ds_session(clp
,
960 mds_srv
->nfs_client
->cl_lease_time
);
971 status
= PTR_ERR(clp
);
976 WRITE_ONCE(ds
->ds_clp
, clp
);
977 dprintk("%s [new] addr: %s\n", __func__
, ds
->ds_remotestr
);
983 * Create an rpc connection to the nfs4_pnfs_ds data server.
984 * Currently only supports IPv4 and IPv6 addresses.
985 * If connection fails, make devid unavailable and return a -errno.
987 int nfs4_pnfs_ds_connect(struct nfs_server
*mds_srv
, struct nfs4_pnfs_ds
*ds
,
988 struct nfs4_deviceid_node
*devid
, unsigned int timeo
,
989 unsigned int retrans
, u32 version
, u32 minor_version
)
994 err
= nfs4_wait_ds_connect(ds
);
995 if (err
|| ds
->ds_clp
)
997 if (nfs4_test_deviceid_unavailable(devid
))
999 } while (test_and_set_bit(NFS4DS_CONNECTING
, &ds
->ds_state
) != 0);
1006 err
= _nfs4_pnfs_v3_ds_connect(mds_srv
, ds
, timeo
, retrans
);
1009 err
= _nfs4_pnfs_v4_ds_connect(mds_srv
, ds
, timeo
, retrans
,
1013 dprintk("%s: unsupported DS version %d\n", __func__
, version
);
1014 err
= -EPROTONOSUPPORT
;
1018 nfs4_clear_ds_conn_bit(ds
);
1021 * At this point the ds->ds_clp should be ready, but it might have
1025 if (!ds
->ds_clp
|| !nfs_client_init_is_complete(ds
->ds_clp
)) {
1026 WARN_ON_ONCE(ds
->ds_clp
||
1027 !nfs4_test_deviceid_unavailable(devid
));
1030 err
= nfs_client_init_status(ds
->ds_clp
);
1035 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect
);
1038 * Currently only supports ipv4, ipv6 and one multi-path address.
1040 struct nfs4_pnfs_ds_addr
*
1041 nfs4_decode_mp_ds_addr(struct net
*net
, struct xdr_stream
*xdr
, gfp_t gfp_flags
)
1043 struct nfs4_pnfs_ds_addr
*da
= NULL
;
1044 char *buf
, *portstr
;
1050 char *startsep
= "";
1055 nlen
= xdr_stream_decode_string_dup(xdr
, &netid
, XDR_MAX_NETOBJ
,
1057 if (unlikely(nlen
< 0))
1060 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
1061 /* port is ".ABC.DEF", 8 chars max */
1062 rlen
= xdr_stream_decode_string_dup(xdr
, &buf
, INET6_ADDRSTRLEN
+
1063 IPV6_SCOPE_ID_LEN
+ 8, gfp_flags
);
1064 if (unlikely(rlen
< 0))
1065 goto out_free_netid
;
1067 /* replace port '.' with '-' */
1068 portstr
= strrchr(buf
, '.');
1070 dprintk("%s: Failed finding expected dot in port\n",
1076 /* find '.' between address and port */
1077 portstr
= strrchr(buf
, '.');
1079 dprintk("%s: Failed finding expected dot between address and "
1080 "port\n", __func__
);
1085 da
= nfs4_pnfs_ds_addr_alloc(gfp_flags
);
1089 if (!rpc_pton(net
, buf
, portstr
-buf
, (struct sockaddr
*)&da
->da_addr
,
1090 sizeof(da
->da_addr
))) {
1091 dprintk("%s: error parsing address %s\n", __func__
, buf
);
1096 sscanf(portstr
, "%d-%d", &tmp
[0], &tmp
[1]);
1097 port
= htons((tmp
[0] << 8) | (tmp
[1]));
1099 switch (da
->da_addr
.ss_family
) {
1101 ((struct sockaddr_in
*)&da
->da_addr
)->sin_port
= port
;
1102 da
->da_addrlen
= sizeof(struct sockaddr_in
);
1106 ((struct sockaddr_in6
*)&da
->da_addr
)->sin6_port
= port
;
1107 da
->da_addrlen
= sizeof(struct sockaddr_in6
);
1113 dprintk("%s: unsupported address family: %u\n",
1114 __func__
, da
->da_addr
.ss_family
);
1118 da
->da_transport
= xprt_find_transport_ident(netid
);
1119 if (da
->da_transport
< 0) {
1120 dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
1125 da
->da_netid
= netid
;
1127 /* save human readable address */
1128 len
= strlen(startsep
) + strlen(buf
) + strlen(endsep
) + 7;
1129 da
->da_remotestr
= kzalloc(len
, gfp_flags
);
1131 /* NULL is ok, only used for dprintk */
1132 if (da
->da_remotestr
)
1133 snprintf(da
->da_remotestr
, len
, "%s%s%s:%u", startsep
,
1134 buf
, endsep
, ntohs(port
));
1136 dprintk("%s: Parsed DS addr %s\n", __func__
, da
->da_remotestr
);
1143 dprintk("%s: Error parsing DS addr: %s\n", __func__
, buf
);
1150 EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr
);
1153 pnfs_layout_mark_request_commit(struct nfs_page
*req
,
1154 struct pnfs_layout_segment
*lseg
,
1155 struct nfs_commit_info
*cinfo
,
1158 struct list_head
*list
;
1159 struct pnfs_commit_array
*array
;
1160 struct pnfs_commit_bucket
*bucket
;
1162 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1163 array
= pnfs_lookup_commit_array(cinfo
->ds
, lseg
);
1164 if (!array
|| !pnfs_is_valid_lseg(lseg
))
1166 bucket
= &array
->buckets
[ds_commit_idx
];
1167 list
= &bucket
->written
;
1168 /* Non-empty buckets hold a reference on the lseg. That ref
1169 * is normally transferred to the COMMIT call and released
1170 * there. It could also be released if the last req is pulled
1171 * off due to a rewrite, in which case it will be done in
1172 * pnfs_common_clear_request_commit
1175 bucket
->lseg
= pnfs_get_lseg(lseg
);
1176 set_bit(PG_COMMIT_TO_DS
, &req
->wb_flags
);
1177 cinfo
->ds
->nwritten
++;
1179 nfs_request_add_commit_list_locked(req
, list
, cinfo
);
1180 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1181 nfs_folio_mark_unstable(nfs_page_to_folio(req
), cinfo
);
1184 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1185 cinfo
->completion_ops
->resched_write(cinfo
, req
);
1187 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit
);
1190 pnfs_nfs_generic_sync(struct inode
*inode
, bool datasync
)
1194 if (!pnfs_layoutcommit_outstanding(inode
))
1196 ret
= nfs_commit_inode(inode
, FLUSH_SYNC
);
1201 return pnfs_layoutcommit_inode(inode
, true);
1203 EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync
);