1 // SPDX-License-Identifier: GPL-2.0-only
3 * Common NFS I/O operations for the pnfs file based
6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
8 * Tom Haynes <loghyr@primarydata.com>
11 #include <linux/nfs_fs.h>
12 #include <linux/nfs_page.h>
13 #include <linux/sunrpc/addr.h>
14 #include <linux/module.h>
16 #include "nfs4session.h"
20 #define NFSDBG_FACILITY NFSDBG_PNFS
22 void pnfs_generic_rw_release(void *data
)
24 struct nfs_pgio_header
*hdr
= data
;
26 nfs_put_client(hdr
->ds_clp
);
27 hdr
->mds_ops
->rpc_release(data
);
29 EXPORT_SYMBOL_GPL(pnfs_generic_rw_release
);
31 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
32 void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data
*data
)
34 struct nfs_writeverf
*verf
= data
->res
.verf
;
36 data
->task
.tk_status
= 0;
37 memset(&verf
->verifier
, 0, sizeof(verf
->verifier
));
38 verf
->committed
= NFS_UNSTABLE
;
40 EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes
);
42 void pnfs_generic_write_commit_done(struct rpc_task
*task
, void *data
)
44 struct nfs_commit_data
*wdata
= data
;
46 /* Note this may cause RPC to be resent */
47 wdata
->mds_ops
->rpc_call_done(task
, data
);
49 EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done
);
51 void pnfs_generic_commit_release(void *calldata
)
53 struct nfs_commit_data
*data
= calldata
;
55 data
->completion_ops
->completion(data
);
56 pnfs_put_lseg(data
->lseg
);
57 nfs_put_client(data
->ds_clp
);
58 nfs_commitdata_release(data
);
60 EXPORT_SYMBOL_GPL(pnfs_generic_commit_release
);
62 static struct pnfs_layout_segment
*
63 pnfs_free_bucket_lseg(struct pnfs_commit_bucket
*bucket
)
65 if (list_empty(&bucket
->committing
) && list_empty(&bucket
->written
)) {
66 struct pnfs_layout_segment
*freeme
= bucket
->lseg
;
73 /* The generic layer is about to remove the req from the commit list.
74 * If this will make the bucket empty, it will need to put the lseg reference.
75 * Note this must be called holding nfsi->commit_mutex
78 pnfs_generic_clear_request_commit(struct nfs_page
*req
,
79 struct nfs_commit_info
*cinfo
)
81 struct pnfs_layout_segment
*freeme
= NULL
;
83 if (!test_and_clear_bit(PG_COMMIT_TO_DS
, &req
->wb_flags
))
85 cinfo
->ds
->nwritten
--;
86 if (list_is_singular(&req
->wb_list
)) {
87 struct pnfs_commit_bucket
*bucket
;
89 bucket
= list_first_entry(&req
->wb_list
,
90 struct pnfs_commit_bucket
,
92 freeme
= pnfs_free_bucket_lseg(bucket
);
95 nfs_request_remove_commit_list(req
, cinfo
);
96 pnfs_put_lseg(freeme
);
98 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit
);
100 struct pnfs_commit_array
*
101 pnfs_alloc_commit_array(size_t n
, gfp_t gfp_flags
)
103 struct pnfs_commit_array
*p
;
104 struct pnfs_commit_bucket
*b
;
106 p
= kmalloc(struct_size(p
, buckets
, n
), gfp_flags
);
110 INIT_LIST_HEAD(&p
->cinfo_list
);
111 INIT_LIST_HEAD(&p
->lseg_list
);
113 for (b
= &p
->buckets
[0]; n
!= 0; b
++, n
--) {
114 INIT_LIST_HEAD(&b
->written
);
115 INIT_LIST_HEAD(&b
->committing
);
117 b
->direct_verf
.committed
= NFS_INVALID_STABLE_HOW
;
121 EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array
);
124 pnfs_free_commit_array(struct pnfs_commit_array
*p
)
128 EXPORT_SYMBOL_GPL(pnfs_free_commit_array
);
130 static struct pnfs_commit_array
*
131 pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info
*fl_cinfo
,
132 struct pnfs_layout_segment
*lseg
)
134 struct pnfs_commit_array
*array
;
136 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
137 if (array
->lseg
== lseg
)
143 struct pnfs_commit_array
*
144 pnfs_add_commit_array(struct pnfs_ds_commit_info
*fl_cinfo
,
145 struct pnfs_commit_array
*new,
146 struct pnfs_layout_segment
*lseg
)
148 struct pnfs_commit_array
*array
;
150 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
154 refcount_set(&new->refcount
, 1);
155 list_add_rcu(&new->cinfo_list
, &fl_cinfo
->commits
);
156 list_add(&new->lseg_list
, &lseg
->pls_commits
);
159 EXPORT_SYMBOL_GPL(pnfs_add_commit_array
);
161 static struct pnfs_commit_array
*
162 pnfs_lookup_commit_array(struct pnfs_ds_commit_info
*fl_cinfo
,
163 struct pnfs_layout_segment
*lseg
)
165 struct pnfs_commit_array
*array
;
168 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
171 fl_cinfo
->ops
->setup_ds_info(fl_cinfo
, lseg
);
173 array
= pnfs_find_commit_array_by_lseg(fl_cinfo
, lseg
);
180 pnfs_release_commit_array_locked(struct pnfs_commit_array
*array
)
182 list_del_rcu(&array
->cinfo_list
);
183 list_del(&array
->lseg_list
);
184 pnfs_free_commit_array(array
);
188 pnfs_put_commit_array_locked(struct pnfs_commit_array
*array
)
190 if (refcount_dec_and_test(&array
->refcount
))
191 pnfs_release_commit_array_locked(array
);
195 pnfs_put_commit_array(struct pnfs_commit_array
*array
, struct inode
*inode
)
197 if (refcount_dec_and_lock(&array
->refcount
, &inode
->i_lock
)) {
198 pnfs_release_commit_array_locked(array
);
199 spin_unlock(&inode
->i_lock
);
203 static struct pnfs_commit_array
*
204 pnfs_get_commit_array(struct pnfs_commit_array
*array
)
206 if (refcount_inc_not_zero(&array
->refcount
))
212 pnfs_remove_and_free_commit_array(struct pnfs_commit_array
*array
)
215 list_del_init(&array
->lseg_list
);
216 pnfs_put_commit_array_locked(array
);
220 pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info
*fl_cinfo
,
221 struct pnfs_layout_segment
*lseg
)
223 struct pnfs_commit_array
*array
, *tmp
;
225 list_for_each_entry_safe(array
, tmp
, &lseg
->pls_commits
, lseg_list
)
226 pnfs_remove_and_free_commit_array(array
);
228 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg
);
231 pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info
*fl_cinfo
)
233 struct pnfs_commit_array
*array
, *tmp
;
235 list_for_each_entry_safe(array
, tmp
, &fl_cinfo
->commits
, cinfo_list
)
236 pnfs_remove_and_free_commit_array(array
);
238 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy
);
241 * Locks the nfs_page requests for commit and moves them to
242 * @bucket->committing.
245 pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket
*bucket
,
246 struct nfs_commit_info
*cinfo
,
249 struct list_head
*src
= &bucket
->written
;
250 struct list_head
*dst
= &bucket
->committing
;
253 lockdep_assert_held(&NFS_I(cinfo
->inode
)->commit_mutex
);
254 ret
= nfs_scan_commit_list(src
, dst
, cinfo
, max
);
256 cinfo
->ds
->nwritten
-= ret
;
257 cinfo
->ds
->ncommitting
+= ret
;
262 static int pnfs_bucket_scan_array(struct nfs_commit_info
*cinfo
,
263 struct pnfs_commit_bucket
*buckets
,
264 unsigned int nbuckets
,
270 for (i
= 0; i
< nbuckets
&& max
!= 0; i
++) {
271 cnt
= pnfs_bucket_scan_ds_commit_list(&buckets
[i
], cinfo
, max
);
278 /* Move reqs from written to committing lists, returning count
281 int pnfs_generic_scan_commit_lists(struct nfs_commit_info
*cinfo
, int max
)
283 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
284 struct pnfs_commit_array
*array
;
288 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
289 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
292 cnt
= pnfs_bucket_scan_array(cinfo
, array
->buckets
,
293 array
->nbuckets
, max
);
295 pnfs_put_commit_array(array
, cinfo
->inode
);
304 EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists
);
307 pnfs_bucket_recover_commit_reqs(struct list_head
*dst
,
308 struct pnfs_commit_bucket
*buckets
,
309 unsigned int nbuckets
,
310 struct nfs_commit_info
*cinfo
)
312 struct pnfs_commit_bucket
*b
;
313 struct pnfs_layout_segment
*freeme
;
314 unsigned int nwritten
, ret
= 0;
318 for (i
= 0, b
= buckets
; i
< nbuckets
; i
++, b
++) {
319 nwritten
= nfs_scan_commit_list(&b
->written
, dst
, cinfo
, 0);
323 freeme
= pnfs_free_bucket_lseg(b
);
325 pnfs_put_lseg(freeme
);
332 /* Pull everything off the committing lists and dump into @dst. */
333 void pnfs_generic_recover_commit_reqs(struct list_head
*dst
,
334 struct nfs_commit_info
*cinfo
)
336 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
337 struct pnfs_commit_array
*array
;
338 unsigned int nwritten
;
340 lockdep_assert_held(&NFS_I(cinfo
->inode
)->commit_mutex
);
342 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
343 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
346 nwritten
= pnfs_bucket_recover_commit_reqs(dst
,
351 pnfs_put_commit_array(array
, cinfo
->inode
);
352 fl_cinfo
->nwritten
-= nwritten
;
356 EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs
);
358 static struct nfs_page
*
359 pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket
*buckets
,
360 unsigned int nbuckets
, struct page
*page
)
362 struct nfs_page
*req
;
363 struct pnfs_commit_bucket
*b
;
366 /* Linearly search the commit lists for each bucket until a matching
367 * request is found */
368 for (i
= 0, b
= buckets
; i
< nbuckets
; i
++, b
++) {
369 list_for_each_entry(req
, &b
->written
, wb_list
) {
370 if (req
->wb_page
== page
)
373 list_for_each_entry(req
, &b
->committing
, wb_list
) {
374 if (req
->wb_page
== page
)
381 /* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest
383 * @cinfo - commit info for current inode
384 * @page - page to search for matching head request
386 * Returns a the head request if one is found, otherwise returns NULL.
389 pnfs_generic_search_commit_reqs(struct nfs_commit_info
*cinfo
, struct page
*page
)
391 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
392 struct pnfs_commit_array
*array
;
393 struct nfs_page
*req
;
395 list_for_each_entry(array
, &fl_cinfo
->commits
, cinfo_list
) {
396 req
= pnfs_bucket_search_commit_reqs(array
->buckets
,
397 array
->nbuckets
, page
);
403 EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs
);
405 static struct pnfs_layout_segment
*
406 pnfs_bucket_get_committing(struct list_head
*head
,
407 struct pnfs_commit_bucket
*bucket
,
408 struct nfs_commit_info
*cinfo
)
410 struct list_head
*pos
;
412 list_for_each(pos
, &bucket
->committing
)
413 cinfo
->ds
->ncommitting
--;
414 list_splice_init(&bucket
->committing
, head
);
415 return pnfs_free_bucket_lseg(bucket
);
418 static struct nfs_commit_data
*
419 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket
*bucket
,
420 struct nfs_commit_info
*cinfo
)
422 struct nfs_commit_data
*data
= nfs_commitdata_alloc(false);
426 data
->lseg
= pnfs_bucket_get_committing(&data
->pages
, bucket
, cinfo
);
428 data
->lseg
= pnfs_get_lseg(bucket
->lseg
);
432 static void pnfs_generic_retry_commit(struct pnfs_commit_bucket
*buckets
,
433 unsigned int nbuckets
,
434 struct nfs_commit_info
*cinfo
,
437 struct pnfs_commit_bucket
*bucket
;
438 struct pnfs_layout_segment
*freeme
;
441 for (bucket
= buckets
; idx
< nbuckets
; bucket
++, idx
++) {
442 if (list_empty(&bucket
->committing
))
444 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
445 freeme
= pnfs_bucket_get_committing(&pages
, bucket
, cinfo
);
446 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
447 nfs_retry_commit(&pages
, freeme
, cinfo
, idx
);
448 pnfs_put_lseg(freeme
);
453 pnfs_bucket_alloc_ds_commits(struct list_head
*list
,
454 struct pnfs_commit_bucket
*buckets
,
455 unsigned int nbuckets
,
456 struct nfs_commit_info
*cinfo
)
458 struct pnfs_commit_bucket
*bucket
;
459 struct nfs_commit_data
*data
;
461 unsigned int nreq
= 0;
463 for (i
= 0, bucket
= buckets
; i
< nbuckets
; i
++, bucket
++) {
464 if (list_empty(&bucket
->committing
))
466 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
467 if (!list_empty(&bucket
->committing
)) {
468 data
= pnfs_bucket_fetch_commitdata(bucket
, cinfo
);
471 data
->ds_commit_index
= i
;
472 list_add_tail(&data
->list
, list
);
473 atomic_inc(&cinfo
->mds
->rpcs_out
);
476 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
480 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
481 /* Clean up on error */
482 pnfs_generic_retry_commit(buckets
, nbuckets
, cinfo
, i
);
487 pnfs_alloc_ds_commits_list(struct list_head
*list
,
488 struct pnfs_ds_commit_info
*fl_cinfo
,
489 struct nfs_commit_info
*cinfo
)
491 struct pnfs_commit_array
*array
;
492 unsigned int ret
= 0;
495 list_for_each_entry_rcu(array
, &fl_cinfo
->commits
, cinfo_list
) {
496 if (!array
->lseg
|| !pnfs_get_commit_array(array
))
499 ret
+= pnfs_bucket_alloc_ds_commits(list
, array
->buckets
,
500 array
->nbuckets
, cinfo
);
502 pnfs_put_commit_array(array
, cinfo
->inode
);
508 /* This follows nfs_commit_list pretty closely */
510 pnfs_generic_commit_pagelist(struct inode
*inode
, struct list_head
*mds_pages
,
511 int how
, struct nfs_commit_info
*cinfo
,
512 int (*initiate_commit
)(struct nfs_commit_data
*data
,
515 struct pnfs_ds_commit_info
*fl_cinfo
= cinfo
->ds
;
516 struct nfs_commit_data
*data
, *tmp
;
518 unsigned int nreq
= 0;
520 if (!list_empty(mds_pages
)) {
521 data
= nfs_commitdata_alloc(true);
522 data
->ds_commit_index
= -1;
523 list_splice_init(mds_pages
, &data
->pages
);
524 list_add_tail(&data
->list
, &list
);
525 atomic_inc(&cinfo
->mds
->rpcs_out
);
529 nreq
+= pnfs_alloc_ds_commits_list(&list
, fl_cinfo
, cinfo
);
533 list_for_each_entry_safe(data
, tmp
, &list
, list
) {
534 list_del(&data
->list
);
535 if (data
->ds_commit_index
< 0) {
536 nfs_init_commit(data
, NULL
, NULL
, cinfo
);
537 nfs_initiate_commit(NFS_CLIENT(inode
), data
,
538 NFS_PROTO(data
->inode
),
540 RPC_TASK_CRED_NOREF
);
542 nfs_init_commit(data
, NULL
, data
->lseg
, cinfo
);
543 initiate_commit(data
, how
);
547 return PNFS_ATTEMPTED
;
549 EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist
);
554 * Data servers can be mapped to different device ids.
555 * nfs4_pnfs_ds reference counting
556 * - set to 1 on allocation
557 * - incremented when a device id maps a data server already in the cache.
558 * - decremented when deviceid is removed from the cache.
560 static DEFINE_SPINLOCK(nfs4_ds_cache_lock
);
561 static LIST_HEAD(nfs4_data_server_cache
);
565 print_ds(struct nfs4_pnfs_ds
*ds
)
568 printk(KERN_WARNING
"%s NULL device\n", __func__
);
571 printk(KERN_WARNING
" ds %s\n"
574 " cl_exchange_flags %x\n",
576 refcount_read(&ds
->ds_count
), ds
->ds_clp
,
577 ds
->ds_clp
? ds
->ds_clp
->cl_exchange_flags
: 0);
581 same_sockaddr(struct sockaddr
*addr1
, struct sockaddr
*addr2
)
583 struct sockaddr_in
*a
, *b
;
584 struct sockaddr_in6
*a6
, *b6
;
586 if (addr1
->sa_family
!= addr2
->sa_family
)
589 switch (addr1
->sa_family
) {
591 a
= (struct sockaddr_in
*)addr1
;
592 b
= (struct sockaddr_in
*)addr2
;
594 if (a
->sin_addr
.s_addr
== b
->sin_addr
.s_addr
&&
595 a
->sin_port
== b
->sin_port
)
600 a6
= (struct sockaddr_in6
*)addr1
;
601 b6
= (struct sockaddr_in6
*)addr2
;
603 /* LINKLOCAL addresses must have matching scope_id */
604 if (ipv6_addr_src_scope(&a6
->sin6_addr
) ==
605 IPV6_ADDR_SCOPE_LINKLOCAL
&&
606 a6
->sin6_scope_id
!= b6
->sin6_scope_id
)
609 if (ipv6_addr_equal(&a6
->sin6_addr
, &b6
->sin6_addr
) &&
610 a6
->sin6_port
== b6
->sin6_port
)
615 dprintk("%s: unhandled address family: %u\n",
616 __func__
, addr1
->sa_family
);
624 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
628 _same_data_server_addrs_locked(const struct list_head
*dsaddrs1
,
629 const struct list_head
*dsaddrs2
)
631 struct nfs4_pnfs_ds_addr
*da1
, *da2
;
632 struct sockaddr
*sa1
, *sa2
;
635 list_for_each_entry(da1
, dsaddrs1
, da_node
) {
636 sa1
= (struct sockaddr
*)&da1
->da_addr
;
638 list_for_each_entry(da2
, dsaddrs2
, da_node
) {
639 sa2
= (struct sockaddr
*)&da2
->da_addr
;
640 match
= same_sockaddr(sa1
, sa2
);
651 * Lookup DS by addresses. nfs4_ds_cache_lock is held
653 static struct nfs4_pnfs_ds
*
654 _data_server_lookup_locked(const struct list_head
*dsaddrs
)
656 struct nfs4_pnfs_ds
*ds
;
658 list_for_each_entry(ds
, &nfs4_data_server_cache
, ds_node
)
659 if (_same_data_server_addrs_locked(&ds
->ds_addrs
, dsaddrs
))
664 static struct nfs4_pnfs_ds_addr
*nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags
)
666 struct nfs4_pnfs_ds_addr
*da
= kzalloc(sizeof(*da
), gfp_flags
);
668 INIT_LIST_HEAD(&da
->da_node
);
672 static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr
*da
)
674 kfree(da
->da_remotestr
);
679 static void destroy_ds(struct nfs4_pnfs_ds
*ds
)
681 struct nfs4_pnfs_ds_addr
*da
;
683 dprintk("--> %s\n", __func__
);
687 nfs_put_client(ds
->ds_clp
);
689 while (!list_empty(&ds
->ds_addrs
)) {
690 da
= list_first_entry(&ds
->ds_addrs
,
691 struct nfs4_pnfs_ds_addr
,
693 list_del_init(&da
->da_node
);
694 nfs4_pnfs_ds_addr_free(da
);
697 kfree(ds
->ds_remotestr
);
701 void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds
*ds
)
703 if (refcount_dec_and_lock(&ds
->ds_count
,
704 &nfs4_ds_cache_lock
)) {
705 list_del_init(&ds
->ds_node
);
706 spin_unlock(&nfs4_ds_cache_lock
);
710 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put
);
713 * Create a string with a human readable address and port to avoid
714 * complicated setup around many dprinks.
717 nfs4_pnfs_remotestr(struct list_head
*dsaddrs
, gfp_t gfp_flags
)
719 struct nfs4_pnfs_ds_addr
*da
;
724 len
= 3; /* '{', '}' and eol */
725 list_for_each_entry(da
, dsaddrs
, da_node
) {
726 len
+= strlen(da
->da_remotestr
) + 1; /* string plus comma */
729 remotestr
= kzalloc(len
, gfp_flags
);
736 list_for_each_entry(da
, dsaddrs
, da_node
) {
737 size_t ll
= strlen(da
->da_remotestr
);
742 memcpy(p
, da
->da_remotestr
, ll
);
762 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
763 * uncached and return cached struct nfs4_pnfs_ds.
765 struct nfs4_pnfs_ds
*
766 nfs4_pnfs_ds_add(struct list_head
*dsaddrs
, gfp_t gfp_flags
)
768 struct nfs4_pnfs_ds
*tmp_ds
, *ds
= NULL
;
771 if (list_empty(dsaddrs
)) {
772 dprintk("%s: no addresses defined\n", __func__
);
776 ds
= kzalloc(sizeof(*ds
), gfp_flags
);
780 /* this is only used for debugging, so it's ok if its NULL */
781 remotestr
= nfs4_pnfs_remotestr(dsaddrs
, gfp_flags
);
783 spin_lock(&nfs4_ds_cache_lock
);
784 tmp_ds
= _data_server_lookup_locked(dsaddrs
);
785 if (tmp_ds
== NULL
) {
786 INIT_LIST_HEAD(&ds
->ds_addrs
);
787 list_splice_init(dsaddrs
, &ds
->ds_addrs
);
788 ds
->ds_remotestr
= remotestr
;
789 refcount_set(&ds
->ds_count
, 1);
790 INIT_LIST_HEAD(&ds
->ds_node
);
792 list_add(&ds
->ds_node
, &nfs4_data_server_cache
);
793 dprintk("%s add new data server %s\n", __func__
,
798 refcount_inc(&tmp_ds
->ds_count
);
799 dprintk("%s data server %s found, inc'ed ds_count to %d\n",
800 __func__
, tmp_ds
->ds_remotestr
,
801 refcount_read(&tmp_ds
->ds_count
));
804 spin_unlock(&nfs4_ds_cache_lock
);
808 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add
);
810 static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds
*ds
)
813 wait_on_bit(&ds
->ds_state
, NFS4DS_CONNECTING
,
817 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds
*ds
)
819 smp_mb__before_atomic();
820 clear_bit(NFS4DS_CONNECTING
, &ds
->ds_state
);
821 smp_mb__after_atomic();
822 wake_up_bit(&ds
->ds_state
, NFS4DS_CONNECTING
);
825 static struct nfs_client
*(*get_v3_ds_connect
)(
826 struct nfs_server
*mds_srv
,
827 const struct sockaddr
*ds_addr
,
830 unsigned int ds_timeo
,
831 unsigned int ds_retrans
);
833 static bool load_v3_ds_connect(void)
835 if (!get_v3_ds_connect
) {
836 get_v3_ds_connect
= symbol_request(nfs3_set_ds_client
);
837 WARN_ON_ONCE(!get_v3_ds_connect
);
840 return(get_v3_ds_connect
!= NULL
);
843 void nfs4_pnfs_v3_ds_connect_unload(void)
845 if (get_v3_ds_connect
) {
846 symbol_put(nfs3_set_ds_client
);
847 get_v3_ds_connect
= NULL
;
851 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server
*mds_srv
,
852 struct nfs4_pnfs_ds
*ds
,
854 unsigned int retrans
)
856 struct nfs_client
*clp
= ERR_PTR(-EIO
);
857 struct nfs4_pnfs_ds_addr
*da
;
860 dprintk("--> %s DS %s\n", __func__
, ds
->ds_remotestr
);
862 if (!load_v3_ds_connect())
865 list_for_each_entry(da
, &ds
->ds_addrs
, da_node
) {
866 dprintk("%s: DS %s: trying address %s\n",
867 __func__
, ds
->ds_remotestr
, da
->da_remotestr
);
870 struct xprt_create xprt_args
= {
871 .ident
= da
->da_transport
,
873 .dstaddr
= (struct sockaddr
*)&da
->da_addr
,
874 .addrlen
= da
->da_addrlen
,
875 .servername
= clp
->cl_hostname
,
878 if (da
->da_transport
!= clp
->cl_proto
)
880 if (da
->da_addr
.ss_family
!= clp
->cl_addr
.ss_family
)
882 /* Add this address as an alias */
883 rpc_clnt_add_xprt(clp
->cl_rpcclient
, &xprt_args
,
884 rpc_clnt_test_and_add_xprt
, NULL
);
887 clp
= get_v3_ds_connect(mds_srv
,
888 (struct sockaddr
*)&da
->da_addr
,
889 da
->da_addrlen
, da
->da_transport
,
893 clp
->cl_rpcclient
->cl_softerr
= 0;
894 clp
->cl_rpcclient
->cl_softrtry
= 0;
898 status
= PTR_ERR(clp
);
904 dprintk("%s [new] addr: %s\n", __func__
, ds
->ds_remotestr
);
909 static int _nfs4_pnfs_v4_ds_connect(struct nfs_server
*mds_srv
,
910 struct nfs4_pnfs_ds
*ds
,
912 unsigned int retrans
,
915 struct nfs_client
*clp
= ERR_PTR(-EIO
);
916 struct nfs4_pnfs_ds_addr
*da
;
919 dprintk("--> %s DS %s\n", __func__
, ds
->ds_remotestr
);
921 list_for_each_entry(da
, &ds
->ds_addrs
, da_node
) {
922 dprintk("%s: DS %s: trying address %s\n",
923 __func__
, ds
->ds_remotestr
, da
->da_remotestr
);
925 if (!IS_ERR(clp
) && clp
->cl_mvops
->session_trunk
) {
926 struct xprt_create xprt_args
= {
927 .ident
= da
->da_transport
,
929 .dstaddr
= (struct sockaddr
*)&da
->da_addr
,
930 .addrlen
= da
->da_addrlen
,
931 .servername
= clp
->cl_hostname
,
933 struct nfs4_add_xprt_data xprtdata
= {
936 struct rpc_add_xprt_test rpcdata
= {
937 .add_xprt_test
= clp
->cl_mvops
->session_trunk
,
941 if (da
->da_transport
!= clp
->cl_proto
)
943 if (da
->da_addr
.ss_family
!= clp
->cl_addr
.ss_family
)
946 * Test this address for session trunking and
949 xprtdata
.cred
= nfs4_get_clid_cred(clp
),
950 rpc_clnt_add_xprt(clp
->cl_rpcclient
, &xprt_args
,
951 rpc_clnt_setup_test_and_add_xprt
,
954 put_cred(xprtdata
.cred
);
956 clp
= nfs4_set_ds_client(mds_srv
,
957 (struct sockaddr
*)&da
->da_addr
,
959 da
->da_transport
, timeo
,
960 retrans
, minor_version
);
964 status
= nfs4_init_ds_session(clp
,
965 mds_srv
->nfs_client
->cl_lease_time
);
976 status
= PTR_ERR(clp
);
982 dprintk("%s [new] addr: %s\n", __func__
, ds
->ds_remotestr
);
988 * Create an rpc connection to the nfs4_pnfs_ds data server.
989 * Currently only supports IPv4 and IPv6 addresses.
990 * If connection fails, make devid unavailable and return a -errno.
992 int nfs4_pnfs_ds_connect(struct nfs_server
*mds_srv
, struct nfs4_pnfs_ds
*ds
,
993 struct nfs4_deviceid_node
*devid
, unsigned int timeo
,
994 unsigned int retrans
, u32 version
, u32 minor_version
)
1000 if (test_and_set_bit(NFS4DS_CONNECTING
, &ds
->ds_state
) == 0) {
1002 err
= _nfs4_pnfs_v3_ds_connect(mds_srv
, ds
, timeo
,
1004 } else if (version
== 4) {
1005 err
= _nfs4_pnfs_v4_ds_connect(mds_srv
, ds
, timeo
,
1006 retrans
, minor_version
);
1008 dprintk("%s: unsupported DS version %d\n", __func__
,
1010 err
= -EPROTONOSUPPORT
;
1013 nfs4_clear_ds_conn_bit(ds
);
1015 nfs4_wait_ds_connect(ds
);
1017 /* what was waited on didn't connect AND didn't mark unavail */
1018 if (!ds
->ds_clp
&& !nfs4_test_deviceid_unavailable(devid
))
1023 * At this point the ds->ds_clp should be ready, but it might have
1027 if (!ds
->ds_clp
|| !nfs_client_init_is_complete(ds
->ds_clp
)) {
1028 WARN_ON_ONCE(ds
->ds_clp
||
1029 !nfs4_test_deviceid_unavailable(devid
));
1032 err
= nfs_client_init_status(ds
->ds_clp
);
1037 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect
);
1040 * Currently only supports ipv4, ipv6 and one multi-path address.
1042 struct nfs4_pnfs_ds_addr
*
1043 nfs4_decode_mp_ds_addr(struct net
*net
, struct xdr_stream
*xdr
, gfp_t gfp_flags
)
1045 struct nfs4_pnfs_ds_addr
*da
= NULL
;
1046 char *buf
, *portstr
;
1052 char *startsep
= "";
1057 nlen
= xdr_stream_decode_string_dup(xdr
, &netid
, XDR_MAX_NETOBJ
,
1059 if (unlikely(nlen
< 0))
1062 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
1063 /* port is ".ABC.DEF", 8 chars max */
1064 rlen
= xdr_stream_decode_string_dup(xdr
, &buf
, INET6_ADDRSTRLEN
+
1065 IPV6_SCOPE_ID_LEN
+ 8, gfp_flags
);
1066 if (unlikely(rlen
< 0))
1067 goto out_free_netid
;
1069 /* replace port '.' with '-' */
1070 portstr
= strrchr(buf
, '.');
1072 dprintk("%s: Failed finding expected dot in port\n",
1078 /* find '.' between address and port */
1079 portstr
= strrchr(buf
, '.');
1081 dprintk("%s: Failed finding expected dot between address and "
1082 "port\n", __func__
);
1087 da
= nfs4_pnfs_ds_addr_alloc(gfp_flags
);
1091 if (!rpc_pton(net
, buf
, portstr
-buf
, (struct sockaddr
*)&da
->da_addr
,
1092 sizeof(da
->da_addr
))) {
1093 dprintk("%s: error parsing address %s\n", __func__
, buf
);
1098 sscanf(portstr
, "%d-%d", &tmp
[0], &tmp
[1]);
1099 port
= htons((tmp
[0] << 8) | (tmp
[1]));
1101 switch (da
->da_addr
.ss_family
) {
1103 ((struct sockaddr_in
*)&da
->da_addr
)->sin_port
= port
;
1104 da
->da_addrlen
= sizeof(struct sockaddr_in
);
1108 ((struct sockaddr_in6
*)&da
->da_addr
)->sin6_port
= port
;
1109 da
->da_addrlen
= sizeof(struct sockaddr_in6
);
1115 dprintk("%s: unsupported address family: %u\n",
1116 __func__
, da
->da_addr
.ss_family
);
1120 da
->da_transport
= xprt_find_transport_ident(netid
);
1121 if (da
->da_transport
< 0) {
1122 dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
1127 da
->da_netid
= netid
;
1129 /* save human readable address */
1130 len
= strlen(startsep
) + strlen(buf
) + strlen(endsep
) + 7;
1131 da
->da_remotestr
= kzalloc(len
, gfp_flags
);
1133 /* NULL is ok, only used for dprintk */
1134 if (da
->da_remotestr
)
1135 snprintf(da
->da_remotestr
, len
, "%s%s%s:%u", startsep
,
1136 buf
, endsep
, ntohs(port
));
1138 dprintk("%s: Parsed DS addr %s\n", __func__
, da
->da_remotestr
);
1145 dprintk("%s: Error parsing DS addr: %s\n", __func__
, buf
);
1152 EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr
);
1155 pnfs_layout_mark_request_commit(struct nfs_page
*req
,
1156 struct pnfs_layout_segment
*lseg
,
1157 struct nfs_commit_info
*cinfo
,
1160 struct list_head
*list
;
1161 struct pnfs_commit_array
*array
;
1162 struct pnfs_commit_bucket
*bucket
;
1164 mutex_lock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1165 array
= pnfs_lookup_commit_array(cinfo
->ds
, lseg
);
1166 if (!array
|| !pnfs_is_valid_lseg(lseg
))
1168 bucket
= &array
->buckets
[ds_commit_idx
];
1169 list
= &bucket
->written
;
1170 /* Non-empty buckets hold a reference on the lseg. That ref
1171 * is normally transferred to the COMMIT call and released
1172 * there. It could also be released if the last req is pulled
1173 * off due to a rewrite, in which case it will be done in
1174 * pnfs_common_clear_request_commit
1177 bucket
->lseg
= pnfs_get_lseg(lseg
);
1178 set_bit(PG_COMMIT_TO_DS
, &req
->wb_flags
);
1179 cinfo
->ds
->nwritten
++;
1181 nfs_request_add_commit_list_locked(req
, list
, cinfo
);
1182 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1183 nfs_mark_page_unstable(req
->wb_page
, cinfo
);
1186 mutex_unlock(&NFS_I(cinfo
->inode
)->commit_mutex
);
1187 cinfo
->completion_ops
->resched_write(cinfo
, req
);
1189 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit
);
1192 pnfs_nfs_generic_sync(struct inode
*inode
, bool datasync
)
1196 if (!pnfs_layoutcommit_outstanding(inode
))
1198 ret
= nfs_commit_inode(inode
, FLUSH_SYNC
);
1203 return pnfs_layoutcommit_inode(inode
, true);
1205 EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync
);