2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/blkdev.h>
17 #include <linux/bio.h>
18 #include <linux/dst.h>
20 #include <linux/in6.h>
21 #include <linux/poll.h>
22 #include <linux/slab.h>
23 #include <linux/socket.h>
28 * Export bioset is used for server block IO requests.
30 static struct bio_set
*dst_bio_set
;
32 int __init
dst_export_init(void)
36 dst_bio_set
= bioset_create(32, sizeof(struct dst_export_priv
));
46 void dst_export_exit(void)
48 bioset_free(dst_bio_set
);
52 * When client connects and autonegotiates with the server node,
53 * its permissions are checked in a security attributes and sent
56 static unsigned int dst_check_permissions(struct dst_state
*main
, struct dst_state
*st
)
58 struct dst_node
*n
= main
->node
;
59 struct dst_secure
*sentry
;
60 struct dst_secure_user
*s
;
61 struct saddr
*sa
= &st
->ctl
.addr
;
62 unsigned int perm
= 0;
64 mutex_lock(&n
->security_lock
);
65 list_for_each_entry(sentry
, &n
->security_list
, sec_entry
) {
68 if (s
->addr
.sa_family
!= sa
->sa_family
)
71 if (s
->addr
.sa_data_len
!= sa
->sa_data_len
)
75 * This '2' below is a port field. This may be very wrong to do
76 * in atalk for example though. If there will be any need to extent
77 * protocol to something else, I can create per-family helpers and
78 * use them instead of this memcmp.
80 if (memcmp(s
->addr
.sa_data
+ 2, sa
->sa_data
+ 2,
84 perm
= s
->permissions
;
86 mutex_unlock(&n
->security_lock
);
92 * Accept new client: allocate appropriate network state and check permissions.
94 static struct dst_state
*dst_accept_client(struct dst_state
*st
)
96 unsigned int revents
= 0;
97 unsigned int err_mask
= POLLERR
| POLLHUP
| POLLRDHUP
;
98 unsigned int mask
= err_mask
| POLLIN
;
99 struct dst_node
*n
= st
->node
;
101 struct socket
*sock
= NULL
;
102 struct dst_state
*new;
104 while (!err
&& !sock
) {
105 revents
= dst_state_poll(st
);
107 if (!(revents
& mask
)) {
111 prepare_to_wait(&st
->thread_wait
,
112 &wait
, TASK_INTERRUPTIBLE
);
113 if (!n
->trans_scan_timeout
|| st
->need_exit
)
116 revents
= dst_state_poll(st
);
121 if (signal_pending(current
))
125 * Magic HZ? Polling check above is not safe in
126 * all cases (like socket reset in BH context),
127 * so it is simpler just to postpone it to the
128 * process context instead of implementing special
131 schedule_timeout(HZ
);
133 finish_wait(&st
->thread_wait
, &wait
);
139 dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
140 __func__
, st
, revents
, revents
& err_mask
,
143 if (revents
& err_mask
) {
144 dprintk("%s: revents: %x, socket: %p, err: %d.\n",
145 __func__
, revents
, st
->socket
, err
);
149 if (!n
->trans_scan_timeout
|| st
->need_exit
)
152 if (st
->socket
&& (revents
& POLLIN
))
153 err
= kernel_accept(st
->socket
, &sock
, 0);
155 dst_state_unlock(st
);
161 new = dst_state_alloc(st
->node
);
164 goto err_out_release
;
168 new->ctl
.addr
.sa_data_len
= sizeof(struct sockaddr
);
169 err
= kernel_getpeername(sock
, (struct sockaddr
*)&new->ctl
.addr
,
170 (int *)&new->ctl
.addr
.sa_data_len
);
174 new->permissions
= dst_check_permissions(st
, new);
175 if (new->permissions
== 0) {
177 dst_dump_addr(sock
, (struct sockaddr
*)&new->ctl
.addr
,
178 "Client is not allowed to connect");
182 err
= dst_poll_init(new);
186 dst_dump_addr(sock
, (struct sockaddr
*)&new->ctl
.addr
,
200 * Each server's block request sometime finishes.
201 * Usually it happens in hard irq context of the appropriate controller,
202 * so to play good with all cases we just queue BIO into the queue
203 * and wake up processing thread, which gets completed request and
204 * send (encrypting if needed) it back to the client (if it was a read
205 * request), or sends back reply that writing succesfully completed.
207 static int dst_export_process_request_queue(struct dst_state
*st
)
210 struct dst_export_priv
*p
= NULL
;
214 while (!list_empty(&st
->request_list
)) {
215 spin_lock_irqsave(&st
->request_lock
, flags
);
216 if (!list_empty(&st
->request_list
)) {
217 p
= list_first_entry(&st
->request_list
,
218 struct dst_export_priv
, request_entry
);
219 list_del(&p
->request_entry
);
221 spin_unlock_irqrestore(&st
->request_lock
, flags
);
228 if (dst_need_crypto(st
->node
) && (bio_data_dir(bio
) == READ
))
229 err
= dst_export_crypto(st
->node
, bio
);
231 err
= dst_export_send_bio(bio
);
241 * Cleanup export state.
242 * It has to wait until all requests are finished,
243 * and then free them all.
245 static void dst_state_cleanup_export(struct dst_state
*st
)
247 struct dst_export_priv
*p
;
251 * This loop waits for all pending bios to be completed and freed.
253 while (atomic_read(&st
->refcnt
) > 1) {
254 dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
255 __func__
, st
, atomic_read(&st
->refcnt
),
256 list_empty(&st
->request_list
));
257 wait_event_timeout(st
->thread_wait
,
258 (atomic_read(&st
->refcnt
) == 1) ||
259 !list_empty(&st
->request_list
),
262 while (!list_empty(&st
->request_list
)) {
264 spin_lock_irqsave(&st
->request_lock
, flags
);
265 if (!list_empty(&st
->request_list
)) {
266 p
= list_first_entry(&st
->request_list
,
267 struct dst_export_priv
, request_entry
);
268 list_del(&p
->request_entry
);
270 spin_unlock_irqrestore(&st
->request_lock
, flags
);
275 dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: %p.\n",
276 __func__
, st
, atomic_read(&st
->refcnt
),
277 list_empty(&st
->request_list
), p
);
285 * Client accepting thread.
286 * Not only accepts new connection, but also schedules receiving thread
287 * and performs request completion described above.
289 static int dst_accept(void *init_data
, void *schedule_data
)
291 struct dst_state
*main_st
= schedule_data
;
292 struct dst_node
*n
= init_data
;
293 struct dst_state
*st
;
296 while (n
->trans_scan_timeout
&& !main_st
->need_exit
) {
297 dprintk("%s: main_st: %p, n: %p.\n", __func__
, main_st
, n
);
298 st
= dst_accept_client(main_st
);
302 err
= dst_state_schedule_receiver(st
);
304 while (n
->trans_scan_timeout
) {
305 err
= wait_event_interruptible_timeout(st
->thread_wait
,
306 !list_empty(&st
->request_list
) ||
307 !n
->trans_scan_timeout
||
311 if (!n
->trans_scan_timeout
|| st
->need_exit
)
314 if (list_empty(&st
->request_list
))
317 err
= dst_export_process_request_queue(st
);
323 wake_up(&st
->thread_wait
);
326 dst_state_cleanup_export(st
);
329 dprintk("%s: freeing listening socket st: %p.\n", __func__
, main_st
);
331 dst_state_lock(main_st
);
332 dst_poll_exit(main_st
);
333 dst_state_socket_release(main_st
);
334 dst_state_unlock(main_st
);
335 dst_state_put(main_st
);
336 dprintk("%s: freed listening socket st: %p.\n", __func__
, main_st
);
341 int dst_start_export(struct dst_node
*n
)
343 if (list_empty(&n
->security_list
)) {
344 printk(KERN_ERR
"You are trying to export node '%s' without security attributes.\n"
345 "No clients will be allowed to connect. Exiting.\n", n
->name
);
348 return dst_node_trans_init(n
, sizeof(struct dst_export_priv
));
352 * Initialize listening state and schedule accepting thread.
354 int dst_node_init_listened(struct dst_node
*n
, struct dst_export_ctl
*le
)
356 struct dst_state
*st
;
358 struct dst_network_ctl
*ctl
= &le
->ctl
;
360 memcpy(&n
->info
->net
, ctl
, sizeof(struct dst_network_ctl
));
362 st
= dst_state_alloc(n
);
367 memcpy(&st
->ctl
, ctl
, sizeof(struct dst_network_ctl
));
369 err
= dst_state_socket_create(st
);
373 st
->socket
->sk
->sk_reuse
= 1;
375 err
= kernel_bind(st
->socket
, (struct sockaddr
*)&ctl
->addr
,
376 ctl
->addr
.sa_data_len
);
378 goto err_out_socket_release
;
380 err
= kernel_listen(st
->socket
, 1024);
382 goto err_out_socket_release
;
385 err
= dst_poll_init(st
);
387 goto err_out_socket_release
;
391 err
= thread_pool_schedule(n
->pool
, dst_thread_setup
,
392 dst_accept
, st
, MAX_SCHEDULE_TIMEOUT
);
394 goto err_out_poll_exit
;
400 err_out_socket_release
:
401 dst_state_socket_release(st
);
410 * Free bio and related private data.
411 * Also drop a reference counter for appropriate state,
412 * which waits when there are no more block IOs in-flight.
414 static void dst_bio_destructor(struct bio
*bio
)
417 struct dst_export_priv
*priv
= bio
->bi_private
;
420 bio_for_each_segment(bv
, bio
, i
) {
424 __free_page(bv
->bv_page
);
428 dst_state_put(priv
->state
);
429 bio_free(bio
, dst_bio_set
);
433 * Block IO completion. Queue request to be sent back to
434 * the client (or just confirmation).
436 static void dst_bio_end_io(struct bio
*bio
, int err
)
438 struct dst_export_priv
*p
= bio
->bi_private
;
439 struct dst_state
*st
= p
->state
;
442 spin_lock_irqsave(&st
->request_lock
, flags
);
443 list_add_tail(&p
->request_entry
, &st
->request_list
);
444 spin_unlock_irqrestore(&st
->request_lock
, flags
);
446 wake_up(&st
->thread_wait
);
450 * Allocate read request for the server.
452 static int dst_export_read_request(struct bio
*bio
, unsigned int total_size
)
460 page
= alloc_page(GFP_KERNEL
);
464 size
= min_t(unsigned int, PAGE_SIZE
, total_size
);
466 err
= bio_add_page(bio
, page
, size
, 0);
467 dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
468 __func__
, (u64
)bio
->bi_sector
, bio
->bi_size
,
471 goto err_out_free_page
;
485 * Allocate write request for the server.
486 * Should not only get pages, but also read data from the network.
488 static int dst_export_write_request(struct dst_state
*st
,
489 struct bio
*bio
, unsigned int total_size
)
498 page
= alloc_page(GFP_KERNEL
);
504 goto err_out_free_page
;
506 size
= min_t(unsigned int, PAGE_SIZE
, total_size
);
508 err
= dst_data_recv(st
, data
, size
);
510 goto err_out_unmap_page
;
512 err
= bio_add_page(bio
, page
, size
, 0);
514 goto err_out_unmap_page
;
532 * Groovy, we've gotten an IO request from the client.
533 * Allocate BIO from the bioset, private data from the mempool
534 * and lots of pages for IO.
536 int dst_process_io(struct dst_state
*st
)
538 struct dst_node
*n
= st
->node
;
539 struct dst_cmd
*cmd
= st
->data
;
541 struct dst_export_priv
*priv
;
544 if (unlikely(!n
->bdev
)) {
549 bio
= bio_alloc_bioset(GFP_KERNEL
,
550 PAGE_ALIGN(cmd
->size
) >> PAGE_SHIFT
,
555 priv
= (struct dst_export_priv
*)(((void *)bio
) - sizeof (struct dst_export_priv
));
557 priv
->state
= dst_state_get(st
);
560 bio
->bi_private
= priv
;
561 bio
->bi_end_io
= dst_bio_end_io
;
562 bio
->bi_destructor
= dst_bio_destructor
;
563 bio
->bi_bdev
= n
->bdev
;
566 * Server side is only interested in two low bits:
567 * uptodate (set by itself actually) and rw block
569 bio
->bi_flags
|= cmd
->flags
& 3;
571 bio
->bi_rw
= cmd
->rw
;
573 bio
->bi_sector
= cmd
->sector
;
575 dst_bio_to_cmd(bio
, &priv
->cmd
, DST_IO_RESPONSE
, cmd
->id
);
578 priv
->cmd
.size
= cmd
->size
;
580 if (bio_data_dir(bio
) == WRITE
) {
581 err
= dst_recv_cdata(st
, priv
->cmd
.hash
);
585 err
= dst_export_write_request(st
, bio
, cmd
->size
);
589 if (dst_need_crypto(n
))
590 return dst_export_crypto(n
, bio
);
592 err
= dst_export_read_request(bio
, cmd
->size
);
597 dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
598 __func__
, (u64
)bio
->bi_sector
, bio
->bi_size
,
599 bio
->bi_rw
, bio_data_dir(bio
),
600 bio
->bi_flags
, bio
->bi_phys_segments
);
602 generic_make_request(bio
);
613 * Ok, block IO is ready, let's send it back to the client...
615 int dst_export_send_bio(struct bio
*bio
)
617 struct dst_export_priv
*p
= bio
->bi_private
;
618 struct dst_state
*st
= p
->state
;
619 struct dst_cmd
*cmd
= &p
->cmd
;
622 dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
623 __func__
, cmd
->id
, (u64
)bio
->bi_sector
, bio
->bi_size
,
624 cmd
->csize
, bio
->bi_flags
, bio
->bi_rw
);
626 dst_convert_cmd(cmd
);
634 if (bio_data_dir(bio
) == WRITE
) {
635 /* ... or just confirmation that writing has completed. */
636 cmd
->size
= cmd
->csize
= 0;
637 err
= dst_data_send_header(st
->socket
, cmd
,
638 sizeof(struct dst_cmd
), 0);
642 err
= dst_send_bio(st
, cmd
, bio
);
647 dst_state_unlock(st
);
653 dst_state_unlock(st
);