1 /* SPDX-License-Identifier: GPL-2.0 */
3 * linux/include/linux/sunrpc/svc.h
5 * RPC server declarations.
7 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
15 #include <linux/in6.h>
16 #include <linux/sunrpc/types.h>
17 #include <linux/sunrpc/xdr.h>
18 #include <linux/sunrpc/auth.h>
19 #include <linux/sunrpc/svcauth.h>
20 #include <linux/lwq.h>
21 #include <linux/wait.h>
23 #include <linux/pagevec.h>
24 #include <linux/kthread.h>
28 * RPC service thread pool.
30 * Pool of threads and temporary sockets. Generally there is only
31 * a single one of these per RPC service, but on NUMA machines those
32 * services that can benefit from it (i.e. nfs but not lockd) will
33 * have one pool per NUMA node. This optimisation reduces cross-
34 * node traffic on multi-node NUMA NFS servers.
37 unsigned int sp_id
; /* pool id; also node id on NUMA */
38 struct lwq sp_xprts
; /* pending transports */
39 unsigned int sp_nrthreads
; /* # of threads in pool */
40 struct list_head sp_all_threads
; /* all server threads */
41 struct llist_head sp_idle_threads
; /* idle server threads */
43 /* statistics on pool operation */
44 struct percpu_counter sp_messages_arrived
;
45 struct percpu_counter sp_sockets_queued
;
46 struct percpu_counter sp_threads_woken
;
48 unsigned long sp_flags
;
49 } ____cacheline_aligned_in_smp
;
51 /* bits for sp_flags */
53 SP_TASK_PENDING
, /* still work to do even if no xprt is queued */
54 SP_NEED_VICTIM
, /* One thread needs to agree to exit */
55 SP_VICTIM_REMAINS
, /* One thread needs to actually exit */
62 * An RPC service is a ``daemon,'' possibly multithreaded, which
63 * receives and processes incoming RPC messages.
64 * It has one or more transport sockets associated with it, and maintains
65 * a list of idle threads waiting for input.
67 * We currently do not support more than one RPC program per daemon.
70 struct svc_program
* sv_programs
; /* RPC programs */
71 struct svc_stat
* sv_stats
; /* RPC statistics */
73 unsigned int sv_nprogs
; /* Number of sv_programs */
74 unsigned int sv_nrthreads
; /* # of server threads */
75 unsigned int sv_maxconn
; /* max connections allowed or
76 * '0' causing max to be based
77 * on number of threads. */
79 unsigned int sv_max_payload
; /* datagram payload size */
80 unsigned int sv_max_mesg
; /* max_payload + 1 page for overheads */
81 unsigned int sv_xdrsize
; /* XDR buffer size */
82 struct list_head sv_permsocks
; /* all permanent sockets */
83 struct list_head sv_tempsocks
; /* all temporary sockets */
84 int sv_tmpcnt
; /* count of temporary sockets */
85 struct timer_list sv_temptimer
; /* timer for aging temporary sockets */
87 char * sv_name
; /* service name */
89 unsigned int sv_nrpools
; /* number of thread pools */
90 bool sv_is_pooled
; /* is this a pooled service? */
91 struct svc_pool
* sv_pools
; /* array of thread pools */
92 int (*sv_threadfn
)(void *data
);
94 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
95 struct lwq sv_cb_list
; /* queue for callback requests
96 * that arrive over the same
98 bool sv_bc_enabled
; /* service uses backchannel */
99 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
102 /* This is used by pool_stats to find and lock an svc */
104 struct svc_serv
*serv
;
108 void svc_destroy(struct svc_serv
**svcp
);
111 * Maximum payload size supported by a kernel RPC server.
112 * This is use to determine the max number of pages nfsd is
113 * willing to return in a single READ operation.
115 * These happen to all be powers of 2, which is not strictly
116 * necessary but helps enforce the real limitation, which is
117 * that they should be multiples of PAGE_SIZE.
119 * For UDP transports, a block plus NFS,RPC, and UDP headers
120 * has to fit into the IP datagram limit of 64K. The largest
121 * feasible number for all known page sizes is probably 48K,
122 * but we choose 32K here. This is the same as the historical
123 * Linux limit; someone who cares more about NFS/UDP performance
124 * can test a larger number.
126 * For TCP transports we have more freedom. A size of 1MB is
127 * chosen to match the client limit. Other OSes are known to
128 * have larger limits, but those numbers are probably beyond
129 * the point of diminishing returns.
131 #define RPCSVC_MAXPAYLOAD (1*1024*1024u)
132 #define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD
133 #define RPCSVC_MAXPAYLOAD_UDP (32*1024u)
135 extern u32
svc_max_payload(const struct svc_rqst
*rqstp
);
138 * RPC Requests and replies are stored in one or more pages.
139 * We maintain an array of pages for each server thread.
140 * Requests are copied into these pages as they arrive. Remaining
141 * pages are available to write the reply into.
143 * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
144 * needs to allocate more to replace those used in sending. To help keep track
145 * of these pages we have a receive list where all pages initialy live, and a
146 * send list where pages are moved to when there are to be part of a reply.
148 * We use xdr_buf for holding responses as it fits well with NFS
149 * read responses (that have a header, and some data pages, and possibly
150 * a tail) and means we can share some client side routines.
152 * The xdr_buf.head kvec always points to the first page in the rq_*pages
153 * list. The xdr_buf.pages pointer points to the second page on that
154 * list. xdr_buf.tail points to the end of the first page.
155 * This assumes that the non-page part of an rpc reply will fit
156 * in a page - NFSd ensures this. lockd also has no trouble.
158 * Each request/reply pair can have at most one "payload", plus two pages,
159 * one for the request, and one for the reply.
160 * We using ->sendfile to return read data, we might need one extra page
161 * if the request is not page-aligned. So add another '1'.
163 #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \
167 * The context of a single thread, including the request currently being
171 struct list_head rq_all
; /* all threads list */
172 struct llist_node rq_idle
; /* On the idle list */
173 struct rcu_head rq_rcu_head
; /* for RCU deferred kfree */
174 struct svc_xprt
* rq_xprt
; /* transport ptr */
176 struct sockaddr_storage rq_addr
; /* peer address */
178 struct sockaddr_storage rq_daddr
; /* dest addr of request
179 * - reply from here */
182 struct svc_serv
* rq_server
; /* RPC service definition */
183 struct svc_pool
* rq_pool
; /* thread pool */
184 const struct svc_procedure
*rq_procinfo
;/* procedure info */
185 struct auth_ops
* rq_authop
; /* authentication flavour */
186 struct svc_cred rq_cred
; /* auth info */
187 void * rq_xprt_ctxt
; /* transport specific context ptr */
188 struct svc_deferred_req
*rq_deferred
; /* deferred request we are replaying */
190 struct xdr_buf rq_arg
;
191 struct xdr_stream rq_arg_stream
;
192 struct xdr_stream rq_res_stream
;
193 struct page
*rq_scratch_page
;
194 struct xdr_buf rq_res
;
195 struct page
*rq_pages
[RPCSVC_MAXPAGES
+ 1];
196 struct page
* *rq_respages
; /* points into rq_pages */
197 struct page
* *rq_next_page
; /* next reply page to use */
198 struct page
* *rq_page_end
; /* one past the last page */
200 struct folio_batch rq_fbatch
;
201 struct kvec rq_vec
[RPCSVC_MAXPAGES
]; /* generally useful.. */
202 struct bio_vec rq_bvec
[RPCSVC_MAXPAGES
];
204 __be32 rq_xid
; /* transmission id */
205 u32 rq_prog
; /* program number */
206 u32 rq_vers
; /* program version */
207 u32 rq_proc
; /* procedure number */
208 u32 rq_prot
; /* IP protocol */
209 int rq_cachetype
; /* catering to nfsd */
210 unsigned long rq_flags
; /* flags field */
211 ktime_t rq_qtime
; /* enqueue time */
213 void * rq_argp
; /* decoded arguments */
214 void * rq_resp
; /* xdr'd results */
215 __be32
*rq_accept_statp
;
216 void * rq_auth_data
; /* flavor-specific data */
217 __be32 rq_auth_stat
; /* authentication status */
218 int rq_auth_slack
; /* extra space xdr code
219 * should leave in head
222 int rq_reserved
; /* space on socket outq
223 * reserved for this request
225 ktime_t rq_stime
; /* start time */
227 struct cache_req rq_chandle
; /* handle passed to caches for
230 /* Catering to nfsd */
231 struct auth_domain
* rq_client
; /* RPC peer info */
232 struct auth_domain
* rq_gssclient
; /* "gss/"-style peer info */
233 struct task_struct
*rq_task
; /* service thread */
234 struct net
*rq_bc_net
; /* pointer to backchannel's
238 int rq_err
; /* Thread sets this to inidicate
239 * initialisation success.
242 unsigned long bc_to_initval
;
243 unsigned int bc_to_retries
;
244 void ** rq_lease_breaker
; /* The v4 client breaking a lease */
245 unsigned int rq_status_counter
; /* RPC processing counter */
248 /* bits for rq_flags */
250 RQ_SECURE
, /* secure port */
251 RQ_LOCAL
, /* local request */
252 RQ_USEDEFERRAL
, /* use deferral */
253 RQ_DROPME
, /* drop current reply */
254 RQ_VICTIM
, /* Have agreed to shut down */
255 RQ_DATA
, /* request has data */
258 #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
261 * Rigorous type checking on sockaddr type conversions
263 static inline struct sockaddr_in
*svc_addr_in(const struct svc_rqst
*rqst
)
265 return (struct sockaddr_in
*) &rqst
->rq_addr
;
268 static inline struct sockaddr_in6
*svc_addr_in6(const struct svc_rqst
*rqst
)
270 return (struct sockaddr_in6
*) &rqst
->rq_addr
;
273 static inline struct sockaddr
*svc_addr(const struct svc_rqst
*rqst
)
275 return (struct sockaddr
*) &rqst
->rq_addr
;
278 static inline struct sockaddr_in
*svc_daddr_in(const struct svc_rqst
*rqst
)
280 return (struct sockaddr_in
*) &rqst
->rq_daddr
;
283 static inline struct sockaddr_in6
*svc_daddr_in6(const struct svc_rqst
*rqst
)
285 return (struct sockaddr_in6
*) &rqst
->rq_daddr
;
288 static inline struct sockaddr
*svc_daddr(const struct svc_rqst
*rqst
)
290 return (struct sockaddr
*) &rqst
->rq_daddr
;
294 * svc_thread_should_stop - check if this thread should stop
295 * @rqstp: the thread that might need to stop
297 * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
298 * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming
299 * the victim using this function. It should then promptly call
300 * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS
301 * so the task waiting for a thread to exit can wake and continue.
304 * %true: caller should invoke svc_exit_thread()
305 * %false: caller should do nothing
307 static inline bool svc_thread_should_stop(struct svc_rqst
*rqstp
)
309 if (test_and_clear_bit(SP_NEED_VICTIM
, &rqstp
->rq_pool
->sp_flags
))
310 set_bit(RQ_VICTIM
, &rqstp
->rq_flags
);
312 return test_bit(RQ_VICTIM
, &rqstp
->rq_flags
);
316 * svc_thread_init_status - report whether thread has initialised successfully
317 * @rqstp: the thread in question
320 * After performing any initialisation that could fail, and before starting
321 * normal work, each sunrpc svc_thread must call svc_thread_init_status()
322 * with an appropriate error, or zero.
324 * If zero is passed, the thread is ready and must continue until
325 * svc_thread_should_stop() returns true. If a non-zero error is passed
326 * the call will not return - the thread will exit.
328 static inline void svc_thread_init_status(struct svc_rqst
*rqstp
, int err
)
331 /* memory barrier ensures assignment to error above is visible before
332 * waitqueue_active() test below completes.
335 wake_up_var(&rqstp
->rq_err
);
340 struct svc_deferred_req
{
341 u32 prot
; /* protocol (UDP or TCP) */
342 struct svc_xprt
*xprt
;
343 struct sockaddr_storage addr
; /* where reply must go */
345 struct sockaddr_storage daddr
; /* where reply must come from */
348 struct cache_deferred_req handle
;
353 struct svc_process_info
{
355 int (*dispatch
)(struct svc_rqst
*rqstp
);
364 * RPC program - an array of these can use the same transport endpoint
367 u32 pg_prog
; /* program number */
368 unsigned int pg_lovers
; /* lowest version */
369 unsigned int pg_hivers
; /* highest version */
370 unsigned int pg_nvers
; /* number of versions */
371 const struct svc_version
**pg_vers
; /* version array */
372 char * pg_name
; /* service name */
373 char * pg_class
; /* class name: services sharing authentication */
374 enum svc_auth_status (*pg_authenticate
)(struct svc_rqst
*rqstp
);
375 __be32 (*pg_init_request
)(struct svc_rqst
*,
376 const struct svc_program
*,
377 struct svc_process_info
*);
378 int (*pg_rpcbind_set
)(struct net
*net
,
379 const struct svc_program
*,
380 u32 version
, int family
,
381 unsigned short proto
,
382 unsigned short port
);
386 * RPC program version
389 u32 vs_vers
; /* version number */
390 u32 vs_nproc
; /* number of procedures */
391 const struct svc_procedure
*vs_proc
; /* per-procedure info */
392 unsigned long __percpu
*vs_count
; /* call counts */
393 u32 vs_xdrsize
; /* xdrsize needed for this version */
395 /* Don't register with rpcbind */
398 /* Don't care if the rpcbind registration fails */
401 /* Need xprt with congestion control */
402 bool vs_need_cong_ctrl
;
404 /* Dispatch function */
405 int (*vs_dispatch
)(struct svc_rqst
*rqstp
);
411 struct svc_procedure
{
412 /* process the request: */
413 __be32 (*pc_func
)(struct svc_rqst
*);
414 /* XDR decode args: */
415 bool (*pc_decode
)(struct svc_rqst
*rqstp
,
416 struct xdr_stream
*xdr
);
417 /* XDR encode result: */
418 bool (*pc_encode
)(struct svc_rqst
*rqstp
,
419 struct xdr_stream
*xdr
);
420 /* XDR free result: */
421 void (*pc_release
)(struct svc_rqst
*);
422 unsigned int pc_argsize
; /* argument struct size */
423 unsigned int pc_argzero
; /* how much of argument to clear */
424 unsigned int pc_ressize
; /* result struct size */
425 unsigned int pc_cachetype
; /* cache info (NFS) */
426 unsigned int pc_xdrressize
; /* maximum size of XDR reply */
427 const char * pc_name
; /* for display */
431 * Function prototypes.
433 int sunrpc_set_pool_mode(const char *val
);
434 int sunrpc_get_pool_mode(char *val
, size_t size
);
435 void svc_rpcb_cleanup(struct svc_serv
*serv
, struct net
*net
);
436 int svc_bind(struct svc_serv
*serv
, struct net
*net
);
437 struct svc_serv
*svc_create(struct svc_program
*, unsigned int,
438 int (*threadfn
)(void *data
));
439 bool svc_rqst_replace_page(struct svc_rqst
*rqstp
,
441 void svc_rqst_release_pages(struct svc_rqst
*rqstp
);
442 void svc_exit_thread(struct svc_rqst
*);
443 struct svc_serv
* svc_create_pooled(struct svc_program
*prog
,
445 struct svc_stat
*stats
,
446 unsigned int bufsize
,
447 int (*threadfn
)(void *data
));
448 int svc_set_num_threads(struct svc_serv
*, struct svc_pool
*, int);
449 int svc_pool_stats_open(struct svc_info
*si
, struct file
*file
);
450 void svc_process(struct svc_rqst
*rqstp
);
451 void svc_process_bc(struct rpc_rqst
*req
, struct svc_rqst
*rqstp
);
452 int svc_register(const struct svc_serv
*, struct net
*, const int,
453 const unsigned short, const unsigned short);
455 void svc_wake_up(struct svc_serv
*);
456 void svc_reserve(struct svc_rqst
*rqstp
, int space
);
457 void svc_pool_wake_idle_thread(struct svc_pool
*pool
);
458 struct svc_pool
*svc_pool_for_cpu(struct svc_serv
*serv
);
459 char * svc_print_addr(struct svc_rqst
*, char *, size_t);
460 const char * svc_proc_name(const struct svc_rqst
*rqstp
);
461 int svc_encode_result_payload(struct svc_rqst
*rqstp
,
463 unsigned int length
);
464 unsigned int svc_fill_write_vector(struct svc_rqst
*rqstp
,
465 struct xdr_buf
*payload
);
466 char *svc_fill_symlink_pathname(struct svc_rqst
*rqstp
,
467 struct kvec
*first
, void *p
,
469 __be32
svc_generic_init_request(struct svc_rqst
*rqstp
,
470 const struct svc_program
*progp
,
471 struct svc_process_info
*procinfo
);
472 int svc_generic_rpcbind_set(struct net
*net
,
473 const struct svc_program
*progp
,
474 u32 version
, int family
,
475 unsigned short proto
,
476 unsigned short port
);
478 #define RPC_MAX_ADDRBUFLEN (63U)
481 * When we want to reduce the size of the reserved space in the response
482 * buffer, we need to take into account the size of any checksum data that
483 * may be at the end of the packet. This is difficult to determine exactly
484 * for all cases without actually generating the checksum, so we just use a
487 static inline void svc_reserve_auth(struct svc_rqst
*rqstp
, int space
)
489 svc_reserve(rqstp
, space
+ rqstp
->rq_auth_slack
);
493 * svcxdr_init_decode - Prepare an xdr_stream for Call decoding
494 * @rqstp: controlling server RPC transaction context
497 static inline void svcxdr_init_decode(struct svc_rqst
*rqstp
)
499 struct xdr_stream
*xdr
= &rqstp
->rq_arg_stream
;
500 struct xdr_buf
*buf
= &rqstp
->rq_arg
;
501 struct kvec
*argv
= buf
->head
;
503 WARN_ON(buf
->len
!= buf
->head
->iov_len
+ buf
->page_len
+ buf
->tail
->iov_len
);
504 buf
->len
= buf
->head
->iov_len
+ buf
->page_len
+ buf
->tail
->iov_len
;
506 xdr_init_decode(xdr
, buf
, argv
->iov_base
, NULL
);
507 xdr_set_scratch_page(xdr
, rqstp
->rq_scratch_page
);
511 * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding
512 * @rqstp: controlling server RPC transaction context
515 static inline void svcxdr_init_encode(struct svc_rqst
*rqstp
)
517 struct xdr_stream
*xdr
= &rqstp
->rq_res_stream
;
518 struct xdr_buf
*buf
= &rqstp
->rq_res
;
519 struct kvec
*resv
= buf
->head
;
521 xdr_reset_scratch_buffer(xdr
);
525 xdr
->p
= resv
->iov_base
+ resv
->iov_len
;
526 xdr
->end
= resv
->iov_base
+ PAGE_SIZE
;
527 buf
->len
= resv
->iov_len
;
528 xdr
->page_ptr
= buf
->pages
- 1;
529 buf
->buflen
= PAGE_SIZE
* (rqstp
->rq_page_end
- buf
->pages
);
534 * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
535 * @xdr: xdr_stream to be updated
536 * @pages: array of pages to insert
537 * @base: starting offset of first data byte in @pages
538 * @len: number of data bytes in @pages to insert
540 * After the @pages are added, the tail iovec is instantiated pointing
541 * to end of the head buffer, and the stream is set up to encode
542 * subsequent items into the tail.
544 static inline void svcxdr_encode_opaque_pages(struct svc_rqst
*rqstp
,
545 struct xdr_stream
*xdr
,
550 xdr_write_pages(xdr
, pages
, base
, len
);
551 xdr
->page_ptr
= rqstp
->rq_next_page
- 1;
555 * svcxdr_set_auth_slack -
556 * @rqstp: RPC transaction
557 * @slack: buffer space to reserve for the transaction's security flavor
559 * Set the request's slack space requirement, and set aside that much
560 * space in the rqstp's rq_res.head for use when the auth wraps the Reply.
562 static inline void svcxdr_set_auth_slack(struct svc_rqst
*rqstp
, int slack
)
564 struct xdr_stream
*xdr
= &rqstp
->rq_res_stream
;
565 struct xdr_buf
*buf
= &rqstp
->rq_res
;
566 struct kvec
*resv
= buf
->head
;
568 rqstp
->rq_auth_slack
= slack
;
570 xdr
->end
-= XDR_QUADLEN(slack
);
571 buf
->buflen
-= rqstp
->rq_auth_slack
;
573 WARN_ON(xdr
->iov
!= resv
);
574 WARN_ON(xdr
->p
> xdr
->end
);
578 * svcxdr_set_accept_stat - Reserve space for the accept_stat field
579 * @rqstp: RPC transaction context
583 * %false: No response buffer space was available
585 static inline bool svcxdr_set_accept_stat(struct svc_rqst
*rqstp
)
587 struct xdr_stream
*xdr
= &rqstp
->rq_res_stream
;
589 rqstp
->rq_accept_statp
= xdr_reserve_space(xdr
, XDR_UNIT
);
590 if (unlikely(!rqstp
->rq_accept_statp
))
592 *rqstp
->rq_accept_statp
= rpc_success
;
596 #endif /* SUNRPC_SVC_H */