2 * Lightweight Autonomic Network Architecture
4 * LANA BSD Socket interface for communication with user level.
5 * PF_LANA protocol family socket handler.
7 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
8 * Swiss federal institute of technology (ETH Zurich)
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/spinlock.h>
15 #include <linux/notifier.h>
16 #include <linux/rcupdate.h>
17 #include <linux/seqlock.h>
18 #include <linux/bug.h>
19 #include <linux/percpu.h>
20 #include <linux/prefetch.h>
21 #include <linux/atomic.h>
22 #include <linux/slab.h>
25 #include "xt_fblock.h"
26 #include "xt_builder.h"
29 #include "xt_engine.h"
30 #include "xt_builder.h"
32 #define AF_LANA 27 /* For now.. */
33 #define PF_LANA AF_LANA
35 /* LANA protocol types on top of the PF_LANA family */
36 #define LANA_PROTO_AUTO 0 /* Auto-select if none is given */
37 #define LANA_PROTO_RAW 1 /* LANA raw proto, currently the only one */
38 /* Total num of protos available */
41 /* Protocols in LANA family */
42 struct lana_protocol
{
44 const struct proto_ops
*ops
;
49 struct fb_pflana_priv
{
52 struct lana_sock
*sock_self
;
62 static DEFINE_MUTEX(proto_tab_lock
);
64 static struct lana_protocol
*proto_tab
[LANA_NPROTO
] __read_mostly
;
66 static int fb_pflana_netrx(const struct fblock
* const fb
,
68 enum path_type
* const dir
)
70 u8
*skb_head
= skb
->data
;
71 int skb_len
= skb
->len
;
73 struct fb_pflana_priv __percpu
*fb_priv_cpu
;
75 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(fb
->private_data
));
76 sk
= &fb_priv_cpu
->sock_self
->sk
;
78 if (skb_shared(skb
)) {
79 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
80 if (skb_head
!= skb
->data
) {
89 sock_queue_rcv_skb(sk
, skb
);
91 /* We are last in chain. */
92 write_next_idp_to_skb(skb
, fb
->idp
, IDP_UNKNOWN
);
96 static int fb_pflana_event(struct notifier_block
*self
, unsigned long cmd
,
102 struct fb_pflana_priv __percpu
*fb_priv
;
105 fb
= rcu_dereference_raw(container_of(self
, struct fblock_notifier
,
107 fb_priv
= (struct fb_pflana_priv __percpu
*)
108 rcu_dereference_raw(fb
->private_data
);
112 case FBLOCK_BIND_IDP
: {
114 struct fblock_bind_msg
*msg
= args
;
116 for_each_online_cpu(cpu
) {
117 struct fb_pflana_priv
*fb_priv_cpu
;
118 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
119 if (fb_priv_cpu
->port
[msg
->dir
] == IDP_UNKNOWN
) {
120 write_seqlock(&fb_priv_cpu
->lock
);
121 fb_priv_cpu
->port
[msg
->dir
] = msg
->idp
;
122 write_sequnlock(&fb_priv_cpu
->lock
);
131 printk(KERN_INFO
"[%s::bsdsock] port %s bound to IDP%u\n",
132 fb
->name
, path_names
[msg
->dir
], msg
->idp
);
134 case FBLOCK_UNBIND_IDP
: {
136 struct fblock_bind_msg
*msg
= args
;
138 for_each_online_cpu(cpu
) {
139 struct fb_pflana_priv
*fb_priv_cpu
;
140 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
141 if (fb_priv_cpu
->port
[msg
->dir
] == msg
->idp
) {
142 write_seqlock(&fb_priv_cpu
->lock
);
143 fb_priv_cpu
->port
[msg
->dir
] = IDP_UNKNOWN
;
144 write_sequnlock(&fb_priv_cpu
->lock
);
153 printk(KERN_INFO
"[%s::bsdsock] port %s unbound\n",
154 fb
->name
, path_names
[msg
->dir
]);
163 static struct fblock
*get_bound_fblock(struct fblock
*self
,
168 struct fb_pflana_priv __percpu
*fb_priv_cpu
;
169 fb_priv_cpu
= this_cpu_ptr(rcu_dereference_raw(self
->private_data
));
171 seq
= read_seqbegin(&fb_priv_cpu
->lock
);
172 fbidp
= fb_priv_cpu
->port
[dir
];
173 } while (read_seqretry(&fb_priv_cpu
->lock
, seq
));
174 return search_fblock(fbidp
);
177 static inline struct lana_sock
*to_lana_sk(const struct sock
*sk
)
179 return container_of(sk
, struct lana_sock
, sk
);
182 static struct fblock
*fb_pflana_build_fblock(char *name
);
184 static int lana_sk_init(struct sock
* sk
)
188 struct lana_sock
*lana
= to_lana_sk(sk
);
190 memset(name
, 0, sizeof(name
));
191 snprintf(name
, sizeof(name
), "%p", &lana
->sk
);
192 lana
->fb
= fb_pflana_build_fblock(name
);
196 for_each_online_cpu(cpu
) {
197 struct fb_pflana_priv
*fb_priv_cpu
;
198 fb_priv_cpu
= per_cpu_ptr(lana
->fb
->private_data
, cpu
);
199 fb_priv_cpu
->sock_self
= lana
;
206 static void fb_pflana_destroy_fblock(struct fblock
*fb
);
208 static void lana_sk_free(struct sock
*sk
)
210 struct fblock
*fb_bound
;
211 struct lana_sock
*lana
;
213 lana
= to_lana_sk(sk
);
214 fb_bound
= get_bound_fblock(lana
->fb
, TYPE_INGRESS
);
216 fblock_unbind(fb_bound
, lana
->fb
);
217 put_fblock(fb_bound
);
219 fb_bound
= get_bound_fblock(lana
->fb
, TYPE_EGRESS
);
221 fblock_unbind(lana
->fb
, fb_bound
);
222 put_fblock(fb_bound
);
225 fb_pflana_destroy_fblock(lana
->fb
);
228 static int lana_raw_release(struct socket
*sock
)
230 struct sock
*sk
= sock
->sk
;
233 sk
->sk_prot
->close(sk
, 0);
239 static int lana_raw_bind(struct socket
*sock
, struct sockaddr
*addr
, int len
)
242 struct sock
*sk
= sock
->sk
;
243 struct net_device
*dev
= NULL
;
244 struct lana_sock
*lana
= to_lana_sk(sk
);
246 if (len
< sizeof(struct sockaddr
))
248 if (addr
->sa_family
!= AF_LANA
)
251 idx
= addr
->sa_data
[0];
252 dev
= dev_get_by_index(sock_net(sk
), idx
);
262 static unsigned int lana_raw_poll(struct file
*file
, struct socket
*sock
,
265 unsigned int mask
= 0;
266 struct sock
*sk
= sock
->sk
;
267 poll_wait(file
, sk_sleep(sk
), wait
);
268 if (!skb_queue_empty(&sk
->sk_receive_queue
))
269 mask
|= POLLIN
| POLLRDNORM
;
273 static int lana_raw_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
274 struct msghdr
*msg
, size_t len
)
276 struct sock
*sk
= sock
->sk
;
277 return sk
->sk_prot
->sendmsg(iocb
, sk
, msg
, len
);
280 /* Todo later: send bound dev from fb_eth, not from userspace */
281 static int lana_proto_sendmsg(struct kiocb
*iocb
, struct sock
*sk
,
282 struct msghdr
*msg
, size_t len
)
286 struct net
*net
= sock_net(sk
);
287 struct net_device
*dev
;
288 struct sockaddr
*target
;
290 struct lana_sock
*lana
= to_lana_sk(sk
);
291 struct fblock
*fb
= lana
->fb
;
292 struct fb_pflana_priv
*fb_priv_cpu
;
294 if (msg
->msg_name
== NULL
)
295 return -EDESTADDRREQ
;
296 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
299 target
= (struct sockaddr
*) msg
->msg_name
;
300 if (unlikely(target
->sa_family
!= AF_LANA
))
301 return -EAFNOSUPPORT
;
304 if (sk
->sk_bound_dev_if
|| lana
->bound
) {
305 dev
= dev_get_by_index(net
, lana
->bound
? lana
->ifindex
:
306 sk
->sk_bound_dev_if
);
308 dev
= dev_getfirstbyhwtype(sock_net(sk
), ETH_P_ALL
); //FIXME
312 if (!dev
|| !(dev
->flags
& IFF_UP
) || unlikely(len
> dev
->mtu
)) {
317 skb
= sock_alloc_send_skb(sk
, LL_ALLOCATED_SPACE(dev
) + len
,
318 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
322 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
324 skb_reset_mac_header(skb
);
325 skb_reset_network_header(skb
);
327 err
= memcpy_fromiovec((void *) skb_put(skb
, len
), msg
->msg_iov
, len
);
333 skb
->protocol
= htons(ETH_P_ALL
); //FIXME
338 fb_priv_cpu
= this_cpu_ptr(rcu_dereference(fb
->private_data
));
340 seq
= read_seqbegin(&fb_priv_cpu
->lock
);
341 write_next_idp_to_skb(skb
, fb
->idp
,
342 fb_priv_cpu
->port
[TYPE_EGRESS
]);
343 } while (read_seqretry(&fb_priv_cpu
->lock
, seq
));
345 process_packet(skb
, TYPE_EGRESS
);
348 return (err
>= 0) ? len
: err
;
356 static int lana_proto_recvmsg(struct kiocb
*iocb
, struct sock
*sk
,
357 struct msghdr
*msg
, size_t len
, int noblock
,
358 int flags
, int *addr_len
)
364 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
366 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
370 msg
->msg_namelen
= 0;
372 *addr_len
= msg
->msg_namelen
;
375 msg
->msg_flags
|= MSG_TRUNC
;
378 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
380 sock_recv_ts_and_drops(msg
, sk
, skb
);
381 skb_free_datagram(sk
, skb
);
383 return err
? : copied
;
386 static int lana_proto_backlog_rcv(struct sock
*sk
, struct sk_buff
*skb
)
388 int err
= -EPROTONOSUPPORT
;
392 switch (sk
->sk_protocol
) {
394 err
= sock_queue_rcv_skb(sk
, skb
);
400 err
= -EPROTONOSUPPORT
;
404 return err
? NET_RX_DROP
: NET_RX_SUCCESS
;
408 static int lana_common_stream_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
409 struct msghdr
*msg
, size_t len
, int flags
)
413 size_t target
, chunk
, copied
= 0;
414 struct sock
*sk
= sock
->sk
;
417 msg
->msg_namelen
= 0;
419 timeout
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
420 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
422 skb
= skb_dequeue(&sk
->sk_receive_queue
);
424 if (copied
>= target
)
426 err
= sock_error(sk
);
427 if (err
|| sk
->sk_shutdown
& RCV_SHUTDOWN
)
432 timeout
= sk_wait_data(sk
, &timeout
);
433 if (signal_pending(current
)) {
434 err
= sock_intr_errno(timeout
);
439 chunk
= min_t(size_t, skb
->len
, len
);
440 if (memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
)) {
441 skb_queue_head(&sk
->sk_receive_queue
, skb
);
448 sock_recv_ts_and_drops(msg
, sk
, skb
);
449 if (!(flags
& MSG_PEEK
)) {
450 skb_pull(skb
, chunk
);
452 skb_queue_head(&sk
->sk_receive_queue
, skb
);
457 /* put message back and return */
458 skb_queue_head(&sk
->sk_receive_queue
, skb
);
464 return copied
? : err
;
468 static void lana_proto_destruct(struct sock
*sk
)
470 skb_queue_purge(&sk
->sk_receive_queue
);
473 static int lana_proto_init(struct sock
*sk
)
475 sk
->sk_destruct
= lana_proto_destruct
;
479 static void lana_proto_close(struct sock
*sk
, long timeout
)
481 sk_common_release(sk
);
484 static void lana_proto_hash(struct sock
*sk
)
488 static void lana_proto_unhash(struct sock
*sk
)
492 static int lana_proto_get_port(struct sock
*sk
, unsigned short sport
)
497 static struct lana_protocol
*pflana_proto_get(int proto
)
499 struct lana_protocol
*ret
= NULL
;
501 if (proto
< 0 || proto
>= LANA_NPROTO
)
504 ret
= rcu_dereference_raw(proto_tab
[proto
]);
510 static int lana_family_create(struct net
*net
, struct socket
*sock
,
511 int protocol
, int kern
)
514 struct lana_protocol
*lp
;
515 struct lana_sock
*ls
;
517 if (!net_eq(net
, &init_net
))
518 return -EAFNOSUPPORT
;
520 if (protocol
== LANA_PROTO_AUTO
) {
521 switch (sock
->type
) {
523 if (!capable(CAP_SYS_ADMIN
))
525 protocol
= LANA_PROTO_RAW
;
528 return -EPROTONOSUPPORT
;
532 lp
= pflana_proto_get(protocol
);
534 return -EPROTONOSUPPORT
;
536 sk
= sk_alloc(net
, PF_LANA
, GFP_KERNEL
, lp
->proto
);
539 if (lana_sk_init(sk
) < 0) {
544 sock_init_data(sock
, sk
);
545 sock
->state
= SS_UNCONNECTED
;
548 sk
->sk_backlog_rcv
= sk
->sk_prot
->backlog_rcv
;
549 sk
->sk_protocol
= protocol
;
550 sk
->sk_family
= PF_LANA
;
551 sk
->sk_type
= sock
->type
;
552 sk
->sk_prot
->init(sk
);
560 static const struct net_proto_family lana_family_ops
= {
562 .create
= lana_family_create
,
563 .owner
= THIS_MODULE
,
566 static const struct proto_ops lana_raw_ops
= {
568 .owner
= THIS_MODULE
,
569 .release
= lana_raw_release
,
570 .recvmsg
= sock_common_recvmsg
,
571 .sendmsg
= lana_raw_sendmsg
,
572 .poll
= lana_raw_poll
,
573 .bind
= lana_raw_bind
,
574 .setsockopt
= sock_no_setsockopt
,
575 .getsockopt
= sock_no_getsockopt
,
576 .connect
= sock_no_connect
,
577 .socketpair
= sock_no_socketpair
,
578 .accept
= sock_no_accept
,
579 .getname
= sock_no_getname
,
580 .ioctl
= sock_no_ioctl
,
581 .listen
= sock_no_listen
,
582 .shutdown
= sock_no_shutdown
,
583 .mmap
= sock_no_mmap
,
584 .sendpage
= sock_no_sendpage
,
587 static struct proto lana_proto __read_mostly
= {
589 .owner
= THIS_MODULE
,
590 .obj_size
= sizeof(struct lana_sock
),
591 .backlog_rcv
= lana_proto_backlog_rcv
,
592 .close
= lana_proto_close
,
593 .init
= lana_proto_init
,
594 .recvmsg
= lana_proto_recvmsg
,
595 .sendmsg
= lana_proto_sendmsg
,
596 .hash
= lana_proto_hash
,
597 .unhash
= lana_proto_unhash
,
598 .get_port
= lana_proto_get_port
,
601 static struct lana_protocol lana_proto_raw __read_mostly
= {
602 .protocol
= LANA_PROTO_RAW
,
603 .ops
= &lana_raw_ops
,
604 .proto
= &lana_proto
,
605 .owner
= THIS_MODULE
,
608 int pflana_proto_register(int proto
, struct lana_protocol
*lp
)
612 if (!lp
|| proto
< 0 || proto
>= LANA_NPROTO
)
614 if (rcu_dereference_raw(proto_tab
[proto
]))
617 err
= proto_register(lp
->proto
, 1);
621 mutex_lock(&proto_tab_lock
);
622 lp
->protocol
= proto
;
623 rcu_assign_pointer(proto_tab
[proto
], lp
);
624 mutex_unlock(&proto_tab_lock
);
627 if (lp
->owner
!= THIS_MODULE
)
628 __module_get(lp
->owner
);
631 EXPORT_SYMBOL(pflana_proto_register
);
633 void pflana_proto_unregister(struct lana_protocol
*lp
)
637 if (lp
->protocol
< 0 || lp
->protocol
>= LANA_NPROTO
)
639 if (!rcu_dereference_raw(proto_tab
[lp
->protocol
]))
642 BUG_ON(proto_tab
[lp
->protocol
] != lp
);
644 mutex_lock(&proto_tab_lock
);
645 rcu_assign_pointer(proto_tab
[lp
->protocol
], NULL
);
646 mutex_unlock(&proto_tab_lock
);
649 proto_unregister(lp
->proto
);
650 if (lp
->owner
!= THIS_MODULE
)
651 module_put(lp
->owner
);
653 EXPORT_SYMBOL(pflana_proto_unregister
);
655 static int init_fb_pflana(void)
658 for (i
= 0; i
< LANA_NPROTO
; ++i
)
659 rcu_assign_pointer(proto_tab
[i
], NULL
);
661 ret
= pflana_proto_register(LANA_PROTO_RAW
, &lana_proto_raw
);
665 ret
= sock_register(&lana_family_ops
);
667 pflana_proto_unregister(&lana_proto_raw
);
673 static void cleanup_fb_pflana(void)
676 sock_unregister(PF_LANA
);
677 for (i
= 0; i
< LANA_NPROTO
; ++i
)
678 pflana_proto_unregister(rcu_dereference_raw(proto_tab
[i
]));
681 static struct fblock
*fb_pflana_build_fblock(char *name
)
686 struct fb_pflana_priv __percpu
*fb_priv
;
688 fb
= alloc_fblock(GFP_ATOMIC
);
691 fb_priv
= alloc_percpu(struct fb_pflana_priv
);
695 for_each_online_cpu(cpu
) {
696 struct fb_pflana_priv
*fb_priv_cpu
;
697 fb_priv_cpu
= per_cpu_ptr(fb_priv
, cpu
);
698 seqlock_init(&fb_priv_cpu
->lock
);
699 fb_priv_cpu
->port
[0] = IDP_UNKNOWN
;
700 fb_priv_cpu
->port
[1] = IDP_UNKNOWN
;
704 ret
= init_fblock(fb
, name
, fb_priv
);
707 fb
->netfb_rx
= fb_pflana_netrx
;
708 fb
->event_rx
= fb_pflana_event
;
710 ret
= register_fblock_namespace(fb
);
713 __module_get(THIS_MODULE
);
716 cleanup_fblock_ctor(fb
);
718 free_percpu(fb_priv
);
725 static void fb_pflana_destroy_fblock(struct fblock
*fb
)
727 unregister_fblock_namespace_no_rcu(fb
);
729 free_percpu(rcu_dereference_raw(fb
->private_data
));
731 module_put(THIS_MODULE
);
734 static int __init
init_fb_pflana_module(void)
736 return init_fb_pflana();
739 static void __exit
cleanup_fb_pflana_module(void)
745 module_init(init_fb_pflana_module
);
746 module_exit(cleanup_fb_pflana_module
);
748 MODULE_LICENSE("GPL");
749 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
750 MODULE_DESCRIPTION("LANA PF_LANA module");