honor neighbor states when sending kernel packets
[cor_2_6_31.git] / net / cor / cor.h
blob8c84dca07d2b24d3d58bea2c7f7535c022df4ca0
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2008 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include <asm/atomic.h>
23 #include <linux/types.h>
24 #include <linux/netdevice.h>
25 #include <linux/skbuff.h>
26 #include <linux/spinlock.h>
27 #include <linux/workqueue.h>
28 #include <linux/kref.h>
30 #include "settings.h"
33 /* options */
34 #define PIDOUT_NEWCONN 16
35 #define PIDOUT_SENDDEF_THRES 8
36 #define PIDOUT_SENDDEF_COUNT 16
40 #define ETH_P_COR 0x1022
41 #define AF_COR 37
42 #define PF_COR AF_COR
44 #define SOCKADDRTYPE_PORT 1
45 struct cor_sockaddr {
46 int type;
48 union {
49 __be64 port;
50 } addr;
53 #define MAX_CONN_CMD_LEN 4096
56 #define PACKET_TYPE_ANNOUNCE 1
57 #define PACKET_TYPE_DATA 2
60 * Kernel packet data - these commands are sent by the neighbor
61 * The end nodes may cause these commands to be sent, but they see them beyond
62 * the first hop.
65 /* KP_PADDING[1] */
66 #define KP_PADDING 1
69 * KP_PING[1] cookie[4]
70 * KP_PONG[1] cookie[4] respdelay[4]
72 * This is needed to find out whether the other node is reachable. After a new
73 * neighbor is seen, ping requests are sent and the neighbor is only reachable
74 * after a few pongs are received. These requests are also used to find out
75 * whether a neighber is gone.
77 * respdelay:
78 * The receiver of a ping may delay the sending of the pong e.g. to create
79 * bigger kernel packets. The respdelay is the time in microseconds the packet
80 * was delayed.
82 #define KP_PING 2
83 #define KP_PONG 3
85 /* KP_ACK[1] sent_conn_id[4] seqno[4]
87 * sent_conn_id means that this is *not* the conn_id we use if we sent something
88 * through this conn, but the conn_id that the neighbor used to send us the
89 * packet
91 #define KP_ACK 4
94 * KP_SPEED[1] conn_id[4] speedinfo[2]
96 * speedinfo[2] =
97 * buffer_state_value = speedinfo % 181
98 * speed_value = speedinfo / 181
100 * buffer_state = 1024 * pow(2, buffer_state_value/3.0)
101 * speed = 1024 * pow(2, speed_value/12.0)
102 * see the .0 ...
104 * This has to be done either with floating points (which is no so nice) or
105 * you can calculate:
106 * buffer_state = pow(2, value/3) *
107 * 1024 * pow(pow(2, 1.0/3), buffer_state_value%3)
108 * where 1024 * pow(pow(2, 1.0/4), value%3) can be just a table lookup
109 * (the "1024" should be part of the value in the table, because it increases
110 * the accuracy)
112 * you can do the same with the speed
115 * Some values have special meanings:
116 * if speedinfo is the highest possible value(65535), it means both values
117 * are inifinite
118 * if buffer_state_value if > 91, you have to subtract 90 and make the
119 * resulting buffer_state negative
121 #define KP_SPEED 5
123 /* NOTE on connection ids:
124 * connection ids we send are used for the receive channel
125 * connection ids we receive are used for the send channel
129 * incoming connection
130 * KP_CONNECT[1] conn_id[4]
132 #define KP_CONNECT 6
135 * incoming connection successful,
136 * the first conn_id is the same as previously sent/received in KP_CONNECT
137 * the second conn_id is generated by us and used for the other direction
138 * KP_CONNECT_SUCCESS[1] conn_id[4] conn_id[4]
140 #define KP_CONNECT_SUCCESS 7
142 /* KP_CONN_DATA[1] conn_id[4] seqno[4] length[2] data[length] */
143 #define KP_CONN_DATA 8
146 * { KP_RESET_CONN[1] conn_id[4] }
147 * We send this, if there is an established connection we want to close.
149 #define KP_RESET_CONN 9
153 * Connection data which in interpreted when connection has no target yet
154 * These commands are sent by the end node.
156 * Format:
157 * cmd[2] length[4] parameter[length]
158 * unrecogniced commands are ignored
159 * parameters which are longer than expected are ignored as well
162 /* outgoing connection: CD_CONNECT_NB[2] length[4]
163 * addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen] */
164 #define CD_CONNECT_NB 1
166 /* connection to local open part: CD_CONNECT_PORT[2] length[4] port[8] */
167 #define CD_CONNECT_PORT 2
170 * CD_LIST_NEIGH sends CDR_BINDATA if the command was successful. The response
171 * format is:
173 * totalneighs[4] response_rows[4]
174 * for every row:
175 * numaddr[2] (addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen]
176 * )[numaddr]
178 * Neighbors have to be sorted by uptime, new neighbors first. This is so that
179 * the routing daemon can easily find out whether there are new neighbors. It
180 * only needs to send a query with offset 0. If the totalneighs stays the same
181 * while new were added, a connection to another neighbor was lost.
184 /* list connected neighbors: CD_LIST_NEIGH[2] length[4] limit[4] offset[4] */
185 #define CD_LIST_NEIGH 3
188 * Connection data response
189 * Format is the same as with connection data
193 * {CDR_EXECOK[2] || CDR_EXECFAILED[2]}
194 * reasoncode[2] reasontextlength[2] reasontext[reasontextlength]
195 * reasontextlength may be 0
197 #define CDR_EXECOK 32768
198 #define CDR_EXECOK_OK 33024
200 #define CDR_EXECFAILED 32769
201 #define CDR_EXECFAILED_UNKNOWN_COMMAND 33280
202 #define CDR_EXECFAILED_PERMISSION_DENIED 33281
203 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESSOURCES 33282
204 #define CDR_EXECFAILED_CMD_TOO_SHORT 33283
205 #define CDR_EXECFAILED_CMD_TOO_LONG 33284
206 #define CDR_EXECFAILED_TARGETADDRTYPE_UNKNOWN 33285
207 #define CDR_EXECFAILED_TARGETADDR_DOESNTEXIST 33286
208 #define CDR_EXECFAILED_TARGETADDR_PORTCLOSED 33287
209 #define CDR_EXECFAILED_LISTENERQUEUE_FULL 33288
212 * must be sent after CDR_EXEC{OK|FAILED}
213 * CDR_EXEOK_BINDATA[2] bindatalen[4] bindata[bindatalen] */
214 #define CDR_BINDATA 32770
217 /* result codes for rcv.c/proc_packet */
218 #define RC_DROP 0
219 #define RC_FINISHED 1
221 #define RC_RCV1_ANNOUNCE 2
222 #define RC_RCV1_KERNEL 3
223 #define RC_RCV1_CONN 4
225 struct htab_entry{
226 /* start of next element, *not* next htab_entry */
227 void *next;
230 struct htable{
231 struct htab_entry **htable;
232 __u32 htable_size;
233 __u32 cell_size;
234 __u32 num_elements;
236 int (*matches)(void *htentry, void *searcheditem);
237 __u32 key_offset;
238 __u32 entry_offset;
239 __u32 kref_offset;
242 struct ping_cookie{
243 unsigned long time;
244 __u32 cookie;
245 __u8 pongs; /* count of pongs for pings sent after this one */
248 #define NEIGHBOR_STATE_INITIAL 0
249 #define NEIGHBOR_STATE_ACTIVE 1
250 #define NEIGHBOR_STATE_STALLED 2
252 struct neighbor{
253 struct list_head nb_list;
255 struct kref ref;
257 struct net_device *dev;
258 char mac[MAX_ADDR_LEN];
260 char *addr;
261 __u16 addrlen;
263 struct delayed_work cmsg_timer;
264 struct mutex cmsg_lock;
265 struct list_head control_msgs_out;
267 * urgent messages; These are sent even if the neighbor state is not
268 * active. If the queue gets full, the oldest ones are dropped. It thus
269 * may only contain messages which are allowed to be dropped.
271 struct list_head ucontrol_msgs_out;
272 __u64 timeout;
273 __u32 cmlength;
274 __u32 ucmlength;
277 unsigned long last_ping_time; /* protected by cmsg_lock */
278 __u32 noping_cnt;/* protected by cmsg_lock */
280 struct mutex pingcookie_lock;
281 __u32 ping_intransit;
282 struct ping_cookie cookies[PING_COOKIES_PER_NEIGH];
283 __u32 lastcookie;
284 atomic_t latency; /* microsecs */
286 struct mutex state_lock;
287 union {
288 __u64 last_state_change;/* initial state */
290 * last_roundtrip:
291 * time of the last sent packet which has been acked or
292 * otherwise responded to (e.g. pong)
294 unsigned long last_roundtrip;/* active/stalled state */
295 }state_time;
296 __u8 state;
297 __u16 ping_success;
299 struct delayed_work stalltimeout_timer;
300 __u8 str_timer_pending;
303 atomic_t kpacket_seqno;
304 atomic_t ooo_packets;
307 * connecions which receive data from/send data to this node
308 * used when terminating all connections of a neighbor
310 struct mutex conn_list_lock;
311 struct list_head rcv_conn_list;
312 struct list_head snd_conn_list;
315 * the timer has to be inited when adding the neighbor
316 * init_timer(struct timer_list * timer);
317 * add_timer(struct timer_list * timer);
319 spinlock_t retrans_lock;
320 struct timer_list retrans_timer;
323 * next_retransmit are linked with
324 * skb_procstate->funcstate.retransmit_queue
325 * because the sk_buff next/prev fields are needed by the hashtable
327 struct sk_buff_head retrans_list;
329 struct conn *firstboundconn;
332 struct cor_sched_data{
333 spinlock_t lock;
334 struct list_head conn_list;
335 struct sk_buff_head requeue_queue;
338 #define TYPE_BUF 0
339 #define TYPE_SKB 1
341 struct data_buf_item{
342 struct list_head buf_list;
344 union {
345 struct {
346 char *buf;
347 __u32 datalen;
349 }buf;
351 struct sk_buff *skb;
352 }data;
354 __u8 type;
357 struct data_buf{
358 struct list_head items;
359 struct data_buf_item *lastread;
360 __u64 first_offset;
361 __u64 read_offset;
363 __u32 totalsize;
364 __u32 read_remaining;
366 __u16 last_read_offset;
368 __u16 last_buflen;
371 struct connlistener;
373 struct bindnode{
374 struct list_head lh;
375 struct connlistener *owner;
376 __be64 port;
379 #define SOCKSTATE_LISTENER 1
380 #define SOCKSTATE_CONN 2
382 struct sock_hdr {
383 /* The first member of connlistener/conn (see sock.c) */
384 __u8 sockstate;
387 struct connlistener {
388 /* The first member has to be the same as in conn (see sock.c) */
389 __u8 sockstate;
390 struct bindnode *bn;
391 struct mutex lock;
392 int queue_maxlen;
393 int queue_len;
394 struct list_head conn_queue;
395 wait_queue_head_t wait;
400 * There are 2 conn objects per bi-directional connection. They refer to each
401 * other with in the reversedir field. To distinguish them, the variables on
402 * the stack are usually called rconn and sconn. rconn refers to the conn object
403 * which has received a command. sconn is the other conn object. This means that
404 * in send functions rconn means the connection we want to send the command to.
407 struct conn{
408 /* The first member has to be the same as in connlistener (see sock.c)*/
409 __u8 sockstate;
411 #define SOURCE_NONE 0
412 #define SOURCE_IN 1
413 #define SOURCE_SOCK 2
415 #define TARGET_UNCONNECTED 0
416 #define TARGET_OUT 1
417 #define TARGET_SOCK 2
419 __u8 sourcetype:4,
420 targettype:4;
421 __u8 isreset;
422 __u8 qdisc_active;
423 struct list_head queue_list;
425 struct kref ref;
427 struct mutex rcv_lock;
429 /* state */
430 __u32 credits;
431 /* credit rate */
432 __s32 sender_crate;
433 __s32 resp_crate;
435 union{
436 struct{
437 struct neighbor *nb;
438 /* list of all connections from this neighbor */
439 struct list_head nb_list;
441 struct sk_buff_head reorder_queue;
443 struct htab_entry htab_entry;
444 __u32 conn_id;
445 __u32 next_seqno;
446 __u32 ooo_packets;
447 }in;
449 struct{
450 struct list_head cl_list;
451 wait_queue_head_t wait;
452 struct socket *sock;
453 int flags;
454 }sock;
455 }source;
457 union{
458 struct{
459 __u32 paramlen;
460 __u32 cmdread;
461 __u16 cmd;
462 __u8 *cmdparams;
464 __u32 stall_timeout_ms;
465 }unconnected;
467 struct{
468 /* has to be first (because it is first in target
469 * kernel too)
471 struct neighbor *nb;
472 /* list of all connections to this neighbor */
473 struct list_head nb_list;
475 __u32 conn_id;
476 __u32 seqno;
478 __u32 stall_timeout_ms;
479 }out;
481 struct{
482 wait_queue_head_t wait;
483 }sock;
484 }target;
486 struct data_buf buf;
488 struct conn *reversedir;
491 /* inside skb->cb */
492 struct skb_procstate{
493 struct conn *rconn;
495 union{
496 struct{
497 struct work_struct work;
498 }rcv;
500 struct{
501 __u32 offset;
502 }announce;
504 struct{
505 __u32 conn_id;
506 __u32 seqno;
507 }rcv2;
509 struct{
510 struct htab_entry htab_entry;
511 struct kref ref;
512 unsigned long timeout;
513 __u32 conn_id;
514 __u32 seqno;
515 struct neighbor *nb;
516 }retransmit_queue;
517 }funcstate;
521 /* common.c */
522 extern char *htable_get(struct htable *ht, __u32 key, void *searcheditem);
524 extern int htable_delete(struct htable *ht, __u32 key, void *searcheditem,
525 void (*free) (struct kref *ref));
527 extern void htable_insert(struct htable *ht, char *newelement, __u32 key);
529 extern void htable_init(struct htable *ht, int (*matches)(void *htentry,
530 void *searcheditem), __u32 entry_offset,
531 __u32 kref_offset);
533 extern struct conn *get_conn(__u32 conn_id);
535 extern void free_conn(struct kref *ref);
537 extern int conn_init_out(struct conn *rconn, struct neighbor *nb);
539 extern void conn_init_sock_source(struct conn *conn);
540 extern void conn_init_sock_target(struct conn *conn);
542 extern void close_port(struct connlistener *listener);
544 extern struct connlistener *open_port(__be64 port);
546 extern int connect_port(struct conn *rconn, __be64 port);
548 extern int connect_neigh(struct conn *rconn,
549 __u16 addrtypelen, __u8 *addrtype,
550 __u16 addrlen, __u8 *addr);
552 extern struct conn* alloc_conn(gfp_t allocflags);
554 extern void reset_conn(struct conn *conn);
556 /* neighbor.c */
557 extern void neighbor_free(struct kref *ref);
559 extern struct neighbor *get_neigh_by_mac(struct sk_buff *skb);
561 extern struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
562 __u16 addrlen, __u8 *addr);
564 extern __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit,
565 __u32 offset);
567 extern int get_neigh_state(struct neighbor *nb);
569 extern void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay);
571 extern __u32 add_ping_req(struct neighbor *nb);
573 extern int time_to_send_ping(struct neighbor *nb);
575 extern int force_ping(struct neighbor *nb);
577 extern void rcv_announce(struct sk_buff *skb);
579 extern int __init cor_neighbor_init(void);
581 /* rcv.c */
582 extern void drain_ooo_queue(struct conn *rconn);
584 extern void conn_rcv_buildskb(char *data, __u32 datalen, __u32 conn_id,
585 __u32 seqno);
587 extern int __init cor_rcv_init(void);
589 /* kpacket_parse.c */
590 extern void kernel_packet(struct neighbor *nb, struct sk_buff *skb, __u32 seqno);
592 /* kpacket_gen.c */
593 extern void schedule_controlmsg_timerfunc(struct neighbor *nb);
595 struct control_msg_out;
597 extern struct control_msg_out *alloc_control_msg(void);
599 extern void free_control_msg(struct control_msg_out *cm);
601 extern void send_pong(struct control_msg_out *cm, struct neighbor *nb,
602 __u32 cookie);
604 extern void send_reset_conn(struct control_msg_out *cm, struct neighbor *nb,
605 __u32 conn_id);
607 extern void send_ack(struct control_msg_out *cm, struct neighbor *nb,
608 __u32 conn_id, __u32 seqno);
610 extern void send_connect_success(struct control_msg_out *cm,
611 struct neighbor *nb, __u32 rcvd_conn_id, __u32 gen_conn_id);
613 extern void send_connect_nb(struct control_msg_out *cm, struct neighbor *nb,
614 __u32 conn_id);
616 extern void send_conndata(struct control_msg_out *cm, struct neighbor *nb,
617 __u32 connid, __u32 seqno, char *data_orig, char *data,
618 __u32 datalen);
620 /* cpacket_parse.c */
621 extern void parse(struct conn *rconn);
623 /* snd.c */
624 extern void retransmit_timerfunc(unsigned long arg);
626 extern struct sk_buff *create_packet_conn(struct conn *target, int size,
627 gfp_t alloc_flags);
629 extern struct sk_buff *create_packet_kernel(struct neighbor *nb, int size,
630 gfp_t alloc_flags);
632 extern void send_conn_flushdata(struct conn *rconn, char *data, __u32 datalen);
634 extern void send_packet(struct sk_buff *skb, struct neighbor *nb,
635 int retransmit);
637 extern void ack_received(struct neighbor *nb, __u32 conn_id, __u32 seqno);
639 extern void flush_out(struct conn *rconn);
641 extern int __init cor_snd_init(void);
643 /* forward.c */
644 extern void databuf_pull(struct data_buf *data, char *dst, int len);
646 extern size_t databuf_pulluser(struct conn *sconn, struct msghdr *msg);
648 extern void databuf_ack(struct data_buf *buf, __u64 pos);
650 extern void databuf_ackread(struct data_buf *buf);
652 extern int databuf_maypush(struct data_buf *buf);
654 extern void databuf_free(struct data_buf *data);
656 extern void databuf_init(struct data_buf *data);
658 extern int receive_userbuf(struct conn *rconn, struct msghdr *msg);
660 extern void receive_buf(struct conn *rconn, char *buf, int len);
662 extern int receive_skb(struct conn *rconn, struct sk_buff *skb);
664 extern void wake_sender(struct conn *rconn);
666 extern void forward_init(void);
670 static inline struct skb_procstate *skb_pstate(struct sk_buff *skb)
672 return (struct skb_procstate *) &(skb->cb[0]);
675 static inline struct sk_buff *skb_from_pstate(struct skb_procstate *ps)
677 return (struct sk_buff *) (((char *)ps) - offsetof(struct sk_buff,cb));
681 static inline __u32 mss(struct neighbor *nb)
683 return nb->dev->mtu - LL_RESERVED_SPACE(nb->dev) - 9;
687 static inline void put_u64(char *dst, __u64 value, int convbo)
689 char *p_value = (char *) &value;
691 if (convbo)
692 value = cpu_to_be64(value);
694 dst[0] = p_value[0];
695 dst[1] = p_value[1];
696 dst[2] = p_value[2];
697 dst[3] = p_value[3];
698 dst[4] = p_value[4];
699 dst[5] = p_value[5];
700 dst[6] = p_value[6];
701 dst[7] = p_value[7];
704 static inline void put_u32(char *dst, __u32 value, int convbo)
706 char *p_value = (char *) &value;
708 if (convbo)
709 value = cpu_to_be32(value);
711 dst[0] = p_value[0];
712 dst[1] = p_value[1];
713 dst[2] = p_value[2];
714 dst[3] = p_value[3];
717 static inline void put_u16(char *dst, __u16 value, int convbo)
719 char *p_value = (char *) &value;
721 if (convbo)
722 value = cpu_to_be16(value);
724 dst[0] = p_value[0];
725 dst[1] = p_value[1];
728 static inline char *cor_pull_skb(struct sk_buff *skb, unsigned int len)
730 char *ptr = skb_pull(skb, len);
732 if(ptr == 0)
733 return 0;
735 return ptr - len;