2 * Connection oriented routing
3 * Copyright (C) 2007-2008 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <asm/atomic.h>
23 #include <linux/types.h>
24 #include <linux/netdevice.h>
25 #include <linux/skbuff.h>
26 #include <linux/spinlock.h>
27 #include <linux/workqueue.h>
28 #include <linux/kref.h>
34 #define PIDOUT_NEWCONN 16
35 #define PIDOUT_SENDDEF_THRES 8
36 #define PIDOUT_SENDDEF_COUNT 16
40 #define ETH_P_COR 0x1022
44 #define SOCKADDRTYPE_PORT 1
53 #define MAX_CONN_CMD_LEN 4096
56 #define PACKET_TYPE_ANNOUNCE 1
57 #define PACKET_TYPE_DATA 2
60 * Kernel packet data - these commands are sent by the neighbor
61 * The end nodes may cause these commands to be sent, but they see them beyond
69 * KP_PING[1] cookie[4]
70 * KP_PONG[1] cookie[4] respdelay[4]
72 * This is needed to find out whether the other node is reachable. After a new
73 * neighbor is seen, ping requests are sent and the neighbor is only reachable
74 * after a few pongs are received. These requests are also used to find out
75 * whether a neighber is gone.
78 * The receiver of a ping may delay the sending of the pong e.g. to create
79 * bigger kernel packets. The respdelay is the time in microseconds the packet
85 /* KP_ACK[1] seqno[4] */
89 * KP_ACK_CONN[1] conn_id[4] seqno[4] window[1]
90 * KP_ACK_CONN_OOO[1] conn_id[4] seqno[4] window[1] seqno_ooo[4] length[4]
92 * conn_id is the conn_id we use if we sent something through this conn and
93 * *not* the conn_id that the neighbor used to send us the data
95 * seqno = the seqno which is expected in the next non-out-of-order packet
96 * seqno_ooo, length = in case
98 * window = amount of data which can be sent without receiving the next ack
99 * packets with lower seqno do not overwrite the last window size
100 * note: the other side may also reduce the window size
103 * 1...255 = 64*2^((value-1)/11) end result is rounded down to an integer
106 #define KP_ACK_CONN 5
107 #define KP_ACK_CONN_OOO 6
110 * NOTE on connection ids:
111 * connection ids we send are used for the receive channel
112 * connection ids we receive are used for the send channel
116 * incoming connection
117 * KP_CONNECT[1] conn_id[4]
122 * incoming connection successful,
123 * the first conn_id is the same as previously sent/received in KP_CONNECT
124 * the second conn_id is generated by us and used for the other direction
125 * KP_CONNECT_SUCCESS[1] conn_id[4] conn_id[4]
127 #define KP_CONNECT_SUCCESS 8
129 /* KP_CONN_DATA[1] conn_id[4] seqno[4] length[2] data[length] */
130 #define KP_CONN_DATA 9
132 /* KP_PING_CONN[1] conn_id[4] */
133 #define KP_PING_CONN 10
136 * KP_PONG_CONN[1] conn_id[4]
137 * conn_id is the conn_id we use if we sent something through this conn and
138 * *not* the conn_id that the neighbor used to send us the data
140 #define KP_PONG_CONN 11
143 * { KP_RESET_CONN[1] conn_id[4] }
144 * We send this, if there is an established connection we want to close.
146 #define KP_RESET_CONN 12
149 * KP_CONNID_UNKNOWN[1] sent_conn_id[4]
150 * We send this, if we receive an invalid conn_id
153 #define KP_CONNID_UNKNOWN 13
156 * Connection data which in interpreted when connection has no target yet
157 * These commands are sent by the end node.
160 * cmd[2] length[4] parameter[length]
161 * unrecogniced commands are ignored
162 * parameters which are longer than expected are ignored as well
165 /* outgoing connection: CD_CONNECT_NB[2] length[4]
166 * addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen] */
167 #define CD_CONNECT_NB 1
169 /* connection to local open part: CD_CONNECT_PORT[2] length[4] port[8] */
170 #define CD_CONNECT_PORT 2
173 * CD_LIST_NEIGH sends CDR_BINDATA if the command was successful. The response
176 * totalneighs[4] response_rows[4]
178 * numaddr[2] (addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen]
181 * Neighbors have to be sorted by uptime, new neighbors first. This is so that
182 * the routing daemon can easily find out whether there are new neighbors. It
183 * only needs to send a query with offset 0. If the totalneighs stays the same
184 * while new were added, a connection to another neighbor was lost.
187 /* list connected neighbors: CD_LIST_NEIGH[2] length[4] limit[4] offset[4] */
188 #define CD_LIST_NEIGH 3
191 * CD_SET_(FORWARD|BACKWARD)_TIMEOUT[2] length[4] timeout_ms[4]
193 * If there is no successful communication with the previous or neighbor for
194 * this period, the connection will be reset. This value must be between
195 * NB_STALL_TIME and NB_KILL_TIME. Otherwise it will silently behave as if it
196 * was set to exactly one of these limits.
198 #define CD_SET_FORWARD_TIMEOUT 4
199 #define CD_SET_BACKWARD_TIMEOUT 5
202 * Connection data response
203 * Format is the same as with connection data
207 * {CDR_EXECOK[2] || CDR_EXECFAILED[2]}
208 * reasoncode[2] reasontextlength[2] reasontext[reasontextlength]
209 * reasontextlength may be 0
211 #define CDR_EXECOK 32768
212 #define CDR_EXECOK_OK 33024
215 #define CDR_EXECFAILED 32769
216 #define CDR_EXECFAILED_UNKNOWN_COMMAND 33280
217 #define CDR_EXECFAILED_PERMISSION_DENIED 33281
218 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESSOURCES 33282
219 #define CDR_EXECFAILED_CMD_TOO_SHORT 33283
220 #define CDR_EXECFAILED_CMD_TOO_LONG 33284
221 #define CDR_EXECFAILED_TARGETADDRTYPE_UNKNOWN 33285
222 #define CDR_EXECFAILED_TARGETADDR_DOESNTEXIST 33286
223 #define CDR_EXECFAILED_TARGETADDR_PORTCLOSED 33287
224 #define CDR_EXECFAILED_LISTENERQUEUE_FULL 33288
225 #define CDR_EXECFAILED_ILLEGAL_COMMAND 33289
228 * must be sent after CDR_EXEC{OK|FAILED}
229 * CDR_EXEOK_BINDATA[2] bindatalen[4] bindata[bindatalen] */
230 #define CDR_BINDATA 32770
233 /* result codes for rcv.c/proc_packet */
235 #define RC_FINISHED 1
237 #define RC_RCV1_ANNOUNCE 2
238 #define RC_RCV1_KERNEL 3
239 #define RC_RCV1_CONN 4
242 /* start of next element, *not* next htab_entry */
247 struct htab_entry
**htable
;
252 int (*matches
)(void *htentry
, void *searcheditem
);
261 __u8 pongs
; /* count of pongs for pings sent after this one */
264 #define NEIGHBOR_STATE_INITIAL 0
265 #define NEIGHBOR_STATE_ACTIVE 1
266 #define NEIGHBOR_STATE_STALLED 2
267 #define NEIGHBOR_STATE_KILLED 3
270 struct list_head nb_list
;
274 struct net_device
*dev
;
275 char mac
[MAX_ADDR_LEN
];
280 struct delayed_work cmsg_timer
;
281 struct mutex cmsg_lock
;
282 struct list_head control_msgs_out
;
284 * urgent messages; These are sent even if the neighbor state is not
285 * active. If the queue gets full, the oldest ones are dropped. It thus
286 * may only contain messages which are allowed to be dropped.
288 struct list_head ucontrol_msgs_out
;
294 unsigned long last_ping_time
; /* protected by cmsg_lock */
295 __u32 noping_cnt
;/* protected by cmsg_lock */
297 struct mutex pingcookie_lock
;
298 __u32 ping_intransit
;
299 struct ping_cookie cookies
[PING_COOKIES_PER_NEIGH
];
301 atomic_t latency
; /* microsecs */
303 spinlock_t state_lock
;
305 __u64 last_state_change
;/* initial state */
308 * time of the last sent packet which has been acked or
309 * otherwise responded to (e.g. pong)
311 unsigned long last_roundtrip
;/* active/stalled state */
316 struct delayed_work stalltimeout_timer
;
317 __u8 str_timer_pending
;
320 atomic_t kpacket_seqno
;
321 atomic_t ooo_packets
;
324 * connecions which receive data from/send data to this node
325 * used when terminating all connections of a neighbor
327 struct mutex conn_list_lock
;
328 struct list_head rcv_conn_list
;
329 struct list_head snd_conn_list
;
332 * the timer has to be inited when adding the neighbor
333 * init_timer(struct timer_list * timer);
334 * add_timer(struct timer_list * timer);
336 spinlock_t retrans_lock
;
337 struct delayed_work retrans_timer_conn
;
338 struct timer_list retrans_timer
;
339 __u8 retrans_timer_conn_running
;
340 __u8 retrans_timer_running
;
342 struct list_head retrans_list
;
343 struct list_head retrans_list_conn
;
345 struct conn
*firstboundconn
;
348 struct cor_sched_data
{
350 struct list_head conn_list
;
351 struct sk_buff_head requeue_queue
;
357 struct data_buf_item
{
358 struct list_head buf_list
;
374 struct list_head items
;
375 struct data_buf_item
*lastread
;
379 __u32 read_remaining
;
381 __u16 last_read_offset
;
390 struct connlistener
*owner
;
394 #define SOCKSTATE_LISTENER 1
395 #define SOCKSTATE_CONN 2
398 /* The first member of connlistener/conn (see sock.c) */
402 struct connlistener
{
403 /* The first member has to be the same as in conn (see sock.c) */
409 struct list_head conn_queue
;
410 wait_queue_head_t wait
;
415 * There are 2 conn objects per bi-directional connection. They refer to each
416 * other with in the reversedir field. To distinguish them, the variables on
417 * the stack are usually called rconn and sconn. rconn refers to the conn object
418 * which has received a command. sconn is the other conn object. This means that
419 * in send functions rconn means the connection we want to send the command to.
423 /* The first member has to be the same as in connlistener (see sock.c)*/
426 #define SOURCE_NONE 0
428 #define SOURCE_SOCK 2
430 #define TARGET_UNCONNECTED 0
432 #define TARGET_SOCK 2
441 * 0... connection active
442 * 1... connection is about to be reset, target does not need to be
444 * 2... connection is reset
445 * 3... connection is reset + no pointers to "struct conn *reversedir"
446 * remaining except from this conn
450 struct list_head queue_list
;
454 struct mutex rcv_lock
;
465 /* list of all connections from this neighbor */
466 struct list_head nb_list
;
468 struct sk_buff_head reorder_queue
;
470 struct htab_entry htab_entry
;
477 struct list_head cl_list
;
478 wait_queue_head_t wait
;
491 __u32 stall_timeout_ms
;
495 /* has to be first (because it is first in target
499 /* list of all connections to this neighbor */
500 struct list_head nb_list
;
501 /* protected by nb->retrans_lock, sorted by seqno */
502 struct list_head retrans_list
;
505 __u32 seqno_nextsend
;
507 __u32 seqno_windowlimit
;
508 __u32 kp_windowsetseqno
;
510 __u32 stall_timeout_ms
;
514 wait_queue_head_t wait
;
520 struct conn
*reversedir
;
524 struct skb_procstate
{
527 struct work_struct work
;
542 extern __u8
enc_window(__u32 window_bytes
);
544 extern __u32
dec_window(__u8 window
);
546 extern char *htable_get(struct htable
*ht
, __u32 key
, void *searcheditem
);
548 extern int htable_delete(struct htable
*ht
, __u32 key
, void *searcheditem
,
549 void (*free
) (struct kref
*ref
));
551 extern void htable_insert(struct htable
*ht
, char *newelement
, __u32 key
);
553 extern void htable_init(struct htable
*ht
, int (*matches
)(void *htentry
,
554 void *searcheditem
), __u32 entry_offset
,
557 extern struct conn
*get_conn(__u32 conn_id
);
559 extern void free_conn(struct kref
*ref
);
561 extern int conn_init_out(struct conn
*rconn
, struct neighbor
*nb
);
563 extern void conn_init_sock_source(struct conn
*conn
);
564 extern void conn_init_sock_target(struct conn
*conn
);
566 extern void close_port(struct connlistener
*listener
);
568 extern struct connlistener
*open_port(__be64 port
);
570 extern int connect_port(struct conn
*rconn
, __be64 port
);
572 extern int connect_neigh(struct conn
*rconn
,
573 __u16 addrtypelen
, __u8
*addrtype
,
574 __u16 addrlen
, __u8
*addr
);
576 extern struct conn
* alloc_conn(gfp_t allocflags
);
578 extern void reset_conn(struct conn
*conn
);
581 extern void neighbor_free(struct kref
*ref
);
583 extern struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
);
585 extern struct neighbor
*find_neigh(__u16 addrtypelen
, __u8
*addrtype
,
586 __u16 addrlen
, __u8
*addr
);
588 extern __u32
generate_neigh_list(char *buf
, __u32 buflen
, __u32 limit
,
591 extern int get_neigh_state(struct neighbor
*nb
);
593 extern void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
);
595 extern __u32
add_ping_req(struct neighbor
*nb
);
597 extern int time_to_send_ping(struct neighbor
*nb
);
599 extern int force_ping(struct neighbor
*nb
);
601 extern void rcv_announce(struct sk_buff
*skb
);
603 extern int __init
cor_neighbor_init(void);
606 extern void drain_ooo_queue(struct conn
*rconn
);
608 extern void conn_rcv_buildskb(char *data
, __u32 datalen
, __u32 conn_id
,
611 extern int __init
cor_rcv_init(void);
613 /* kpacket_parse.c */
614 extern void kernel_packet(struct neighbor
*nb
, struct sk_buff
*skb
, __u32 seqno
);
617 extern void schedule_controlmsg_timerfunc(struct neighbor
*nb
);
619 struct control_msg_out
;
621 extern struct control_msg_out
*alloc_control_msg(void);
623 extern void free_control_msg(struct control_msg_out
*cm
);
625 extern void retransmit_timerfunc(unsigned long arg
);
627 extern void kern_ack_rcvd(struct neighbor
*nb
, __u32 seqno
);
629 extern void send_pong(struct control_msg_out
*cm
, struct neighbor
*nb
,
632 extern void send_reset_conn(struct control_msg_out
*cm
, struct neighbor
*nb
,
635 extern void send_ack(struct control_msg_out
*cm
, struct neighbor
*nb
,
638 extern void send_ack_conn(struct control_msg_out
*cm
, struct neighbor
*nb
,
639 __u32 conn_id
, __u32 seqno
, __u8 window
);
641 extern void send_ack_conn_ooo(struct control_msg_out
*cm
, struct neighbor
*nb
,
642 __u32 conn_id
, __u32 seqno
, __u8 window
, __u32 seqno_ooo
,
645 extern void send_connect_success(struct control_msg_out
*cm
,
646 struct neighbor
*nb
, __u32 rcvd_conn_id
, __u32 gen_conn_id
);
648 extern void send_connect_nb(struct control_msg_out
*cm
, struct neighbor
*nb
,
651 extern void send_conndata(struct control_msg_out
*cm
, struct neighbor
*nb
,
652 __u32 connid
, __u32 seqno
, char *data_orig
, char *data
,
655 extern void cor_kgen_init(void);
657 /* cpacket_parse.c */
658 extern void parse(struct conn
*rconn
);
661 extern struct sk_buff
*create_packet(struct neighbor
*nb
, int size
,
662 gfp_t alloc_flags
, __u32 conn_id
, __u32 seqno
);
664 extern void retransmit_conn_timerfunc(struct work_struct
*work
);
666 extern void conn_ack_rcvd(__u32 kpacket_seqno
, struct conn
*rconn
, __u32 seqno
,
667 __u8 window
, __u32 seqno_ooo
, __u32 length
);
669 extern void flush_out(struct conn
*rconn
);
671 extern int __init
cor_snd_init(void);
674 extern void databuf_pull(struct data_buf
*data
, char *dst
, int len
);
676 extern size_t databuf_pulluser(struct conn
*sconn
, struct msghdr
*msg
);
678 extern void databuf_pullold(struct data_buf
*data
, __u32 startpos
, char *dst
,
681 extern void databuf_ack(struct data_buf
*buf
, __u32 pos
);
683 extern void databuf_ackread(struct data_buf
*buf
);
685 extern int databuf_maypush(struct data_buf
*buf
);
687 extern void databuf_free(struct data_buf
*data
);
689 extern void databuf_init(struct data_buf
*data
);
691 extern int receive_userbuf(struct conn
*rconn
, struct msghdr
*msg
);
693 extern void receive_buf(struct conn
*rconn
, char *buf
, int len
);
695 extern int receive_skb(struct conn
*rconn
, struct sk_buff
*skb
);
697 extern void wake_sender(struct conn
*rconn
);
699 extern void forward_init(void);
703 static inline struct skb_procstate
*skb_pstate(struct sk_buff
*skb
)
705 return (struct skb_procstate
*) &(skb
->cb
[0]);
708 static inline struct sk_buff
*skb_from_pstate(struct skb_procstate
*ps
)
710 return (struct sk_buff
*) (((char *)ps
) - offsetof(struct sk_buff
,cb
));
714 static inline __u32
mss(struct neighbor
*nb
)
716 return nb
->dev
->mtu
- LL_RESERVED_SPACE(nb
->dev
) - 9;
720 static inline void put_u64(char *dst
, __u64 value
, int convbo
)
722 char *p_value
= (char *) &value
;
725 value
= cpu_to_be64(value
);
737 static inline void put_u32(char *dst
, __u32 value
, int convbo
)
739 char *p_value
= (char *) &value
;
742 value
= cpu_to_be32(value
);
750 static inline void put_u16(char *dst
, __u16 value
, int convbo
)
752 char *p_value
= (char *) &value
;
755 value
= cpu_to_be16(value
);
761 static inline char *cor_pull_skb(struct sk_buff
*skb
, unsigned int len
)
763 char *ptr
= skb_pull(skb
, len
);
765 if(unlikely(ptr
== 0))