2 * Connection oriented routing
3 * Copyright (C) 2007-2008 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <asm/atomic.h>
23 #include <linux/types.h>
24 #include <linux/netdevice.h>
25 #include <linux/skbuff.h>
26 #include <linux/spinlock.h>
27 #include <linux/workqueue.h>
33 #define PIDOUT_NEWCONN 16
34 #define PIDOUT_SENDDEF_THRES 8
35 #define PIDOUT_SENDDEF_COUNT 16
39 #define ETH_P_COR 0x1022
43 #define SOCKADDRTYPE_PORT 1
52 #define MAX_CONN_CMD_LEN 4096
55 #define PACKET_TYPE_ANNOUNCE 1
56 #define PACKET_TYPE_DATA 2
59 * Kernel packet data - these commands are sent by the neighbor
60 * The end nodes may cause these commands to be sent, but they see them beyond
68 * KP_PING[1] cookie[4]
69 * KP_PONG[1] cookie[4] respdelay[4]
71 * This is needed to find out whether the other node is reachable. After a new
72 * neighbor is seen, ping requests are sent and the neighbor is only reachable
73 * after a few pongs are received. These requests are also used to find out
74 * whether a neighber is gone.
77 * The receiver of a ping may delay the sending of the pong e.g. to create
78 * bigger kernel packets. The respdelay is the time in microseconds the packet
84 /* KP_ACK[1] sent_conn_id[4] seqno[4]
86 * sent_conn_id means that this is *not* the conn_id we use if we sent something
87 * through this conn, but the conn_id that the neighbor used to send us the
93 * KP_SPEED[1] conn_id[4] speedinfo[2]
96 * buffer_state_value = speedinfo % 181
97 * speed_value = speedinfo / 181
99 * buffer_state = 1024 * pow(2, buffer_state_value/3.0)
100 * speed = 1024 * pow(2, speed_value/12.0)
103 * This has to be done either with floating points (which is no so nice) or
105 * buffer_state = pow(2, value/3) *
106 * 1024 * pow(pow(2, 1.0/3), buffer_state_value%3)
107 * where 1024 * pow(pow(2, 1.0/4), value%3) can be just a table lookup
108 * (the "1024" should be part of the value in the table, because it increases
111 * you can do the same with the speed
114 * Some values have special meanings:
115 * if speedinfo is the highest possible value(65535), it means both values
117 * if buffer_state_value if > 91, you have to subtract 90 and make the
118 * resulting buffer_state negative
122 /* NOTE on connection ids:
123 * connection ids we send are used for the receive channel
124 * connection ids we receive are used for the send channel
128 * incoming connection
129 * KP_CONNECT[1] conn_id[4]
134 * incoming connection successful,
135 * the first conn_id is the same as previously sent/received in KP_CONNECT
136 * the second conn_id is generated by us and used for the other direction
137 * KP_CONNECT_SUCCESS[1] conn_id[4] conn_id[4]
139 #define KP_CONNECT_SUCCESS 7
141 /* KP_CONN_DATA[1] conn_id[4] seqno[4] length[2] data[length] */
142 #define KP_CONN_DATA 8
145 * { KP_RESET_CONN[1] conn_id[4] }
146 * We send this, if there is an established connection we want to close.
148 #define KP_RESET_CONN 9
152 * Connection data which in interpreted when connection has no target yet
153 * These commands are sent by the end node.
156 * cmd[2] length[4] parameter[length]
157 * unrecogniced commands are ignored
158 * parameters which are longer than expected are ignored as well
161 /* outgoing connection: CD_CONNECT_NB[2] length[4]
162 * addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen] */
163 #define CD_CONNECT_NB 1
165 /* connection to local open part: CD_CONNECT_PORT[2] length[4] port[8] */
166 #define CD_CONNECT_PORT 2
169 * CD_LIST_NEIGH sends CDR_BINDATA if the command was successful. The response
172 * totalneighs[4] response_rows[4]
174 * numaddr[2] (addrtypelen[2] addrlen[2] addrtype[addrtypelen] addr[addrlen]
177 * Neighbors have to be sorted by uptime, new neighbors first. This is so that
178 * the routing daemon can easily find out whether there are new neighbors. It
179 * only needs to send a query with offset 0. If the totalneighs stays the same
180 * while new were added, a connection to another neighbor was lost.
183 /* list connected neighbors: CD_LIST_NEIGH[2] length[4] limit[4] offset[4] */
184 #define CD_LIST_NEIGH 3
187 * Connection data response
188 * Format is the same as with connection data
192 * {CDR_EXECOK[2] || CDR_EXECFAILED[2]}
193 * reasoncode[2] reasontextlength[2] reasontext[reasontextlength]
194 * reasontextlength may be 0
196 #define CDR_EXECOK 32768
197 #define CDR_EXECOK_OK 33024
199 #define CDR_EXECFAILED 32769
200 #define CDR_EXECFAILED_UNKNOWN_COMMAND 33280
201 #define CDR_EXECFAILED_PERMISSION_DENIED 33281
202 #define CDR_EXECFAILED_TEMPORARILY_OUT_OF_RESSOURCES 33282
203 #define CDR_EXECFAILED_CMD_TOO_SHORT 33283
204 #define CDR_EXECFAILED_CMD_TOO_LONG 33284
205 #define CDR_EXECFAILED_TARGETADDRTYPE_UNKNOWN 33285
206 #define CDR_EXECFAILED_TARGETADDR_DOESNTEXIST 33286
207 #define CDR_EXECFAILED_TARGETADDR_PORTCLOSED 33287
208 #define CDR_EXECFAILED_LISTENERQUEUE_FULL 33288
211 * must be sent after CDR_EXEC{OK|FAILED}
212 * CDR_EXEOK_BINDATA[2] bindatalen[4] bindata[bindatalen] */
213 #define CDR_BINDATA 32770
216 /* result codes for rcv.c/proc_packet */
218 #define RC_FINISHED 1
220 #define RC_RCV1_ANNOUNCE 2
221 #define RC_RCV1_KERNEL 3
222 #define RC_RCV1_CONN 4
226 struct ref_counter_def
{
228 * implemented by user, frees struct containing the mem_counter when
231 void (*free
)(struct ref_counter
*cnt
);
237 /* initialised with 1 */
240 /* should be a pointer to a global variable */
241 struct ref_counter_def
*def
;
245 /* start of next element, *not* next htab_entry */
250 struct htab_entry
**htable
;
255 int (*matches
)(void *htentry
, void *searcheditem
);
258 __u32 ref_counter_offset
;
262 struct list_head nb_list
;
264 struct ref_counter refs
;
266 struct net_device
*dev
;
267 char mac
[MAX_ADDR_LEN
];
272 struct delayed_work cmsg_timer
;
273 struct mutex cmsg_lock
;
274 struct list_head control_msgs_out
;
277 atomic_t kpacket_seqno
;
279 atomic_t ooo_packets
;
284 * connecions which receive data from/send data to this node
285 * used when terminating all connections of a neighbor
287 struct mutex conn_list_lock
;
288 struct list_head rcv_conn_list
;
289 struct list_head snd_conn_list
;
292 * the timer has to be inited when adding the neighbor
293 * init_timer(struct timer_list * timer);
294 * add_timer(struct timer_list * timer);
296 spinlock_t retrans_lock
;
297 struct timer_list retrans_timer
;
300 * next_retransmit are linked with
301 * skb_procstate->funcstate.retransmit_queue
302 * because the sk_buff next/prev fields are needed by the hashtable
304 struct sk_buff_head retrans_list
;
306 struct conn
*firstboundconn
;
309 struct cor_sched_data
{
311 struct list_head conn_list
;
312 struct sk_buff_head requeue_queue
;
318 struct data_buf_item
{
319 struct list_head buf_list
;
335 struct list_head items
;
336 struct data_buf_item
*lastread
;
341 __u32 read_remaining
;
343 __u32 last_read_offset
;
352 struct connlistener
*owner
;
356 #define SOCKSTATE_LISTENER 1
357 #define SOCKSTATE_CONN 2
360 /* The first member of connlistener/conn (see sock.c) */
364 struct connlistener
{
365 /* The first member has to be the same as in conn (see sock.c) */
371 struct list_head conn_queue
;
372 wait_queue_head_t wait
;
377 * There are 2 conn objects per bi-directional connection. They refer to each
378 * other with in the reversedir field. To distinguish them, the variables on
379 * the stack are usually called rconn and sconn. rconn refers to the conn object
380 * which has received a command. sconn is the other conn object. This means that
381 * in send functions rconn means the connection we want to send the command to.
385 /* The first member has to be the same as in connlistener (see sock.c)*/
388 #define SOURCE_NONE 0
390 #define SOURCE_SOCK 2
392 #define TARGET_UNCONNECTED 0
394 #define TARGET_SOCK 2
400 struct list_head queue_list
;
402 /* todo: convert to kref */
403 struct ref_counter refs
;
405 struct mutex rcv_lock
;
416 /* list of all connections from this neighbor */
417 struct list_head nb_list
;
419 struct sk_buff_head reorder_queue
;
421 struct htab_entry htab_entry
;
428 struct list_head cl_list
;
429 wait_queue_head_t wait
;
444 /* has to be first (because it is first in target
448 /* list of all connections to this neighbor */
449 struct list_head nb_list
;
456 wait_queue_head_t wait
;
462 struct conn
*reversedir
;
466 struct skb_procstate
{
471 struct work_struct work
;
484 struct htab_entry htab_entry
;
485 struct ref_counter refs
;
486 unsigned long timeout
;
496 extern void ref_counter_decr(struct ref_counter
*cnt
);
498 extern int ref_counter_incr(struct ref_counter
*cnt
);
500 extern void ref_counter_init(struct ref_counter
*cnt
,
501 struct ref_counter_def
*def
);
503 extern char *htable_get(struct htable
*ht
, __u32 key
, void *searcheditem
);
505 extern int htable_delete(struct htable
*ht
, __u32 key
, void *searcheditem
);
507 extern void htable_insert(struct htable
*ht
, char *newelement
, __u32 key
);
509 extern void htable_init(struct htable
*ht
, int (*matches
)(void *htentry
,
510 void *searcheditem
), __u32 entry_offset
,
511 __u32 ref_counter_offset
);
513 extern struct conn
*get_conn(__u32 conn_id
);
515 extern int conn_init_out(struct conn
*rconn
, struct neighbor
*nb
);
517 extern void conn_init_sock_source(struct conn
*conn
);
518 extern void conn_init_sock_target(struct conn
*conn
);
520 extern void close_port(struct connlistener
*listener
);
522 extern struct connlistener
*open_port(__be64 port
);
524 extern int connect_port(struct conn
*rconn
, __be64 port
);
526 extern int connect_neigh(struct conn
*rconn
,
527 __u16 addrtypelen
, __u8
*addrtype
,
528 __u16 addrlen
, __u8
*addr
);
530 extern struct conn
* alloc_conn(gfp_t allocflags
);
532 extern void reset_conn(struct conn
*conn
);
535 extern struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
);
537 extern struct neighbor
*find_neigh(__u16 addrtypelen
, __u8
*addrtype
,
538 __u16 addrlen
, __u8
*addr
);
540 extern __u32
generate_neigh_list(char *buf
, __u32 buflen
, __u32 limit
,
543 extern void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
);
545 extern __u32
add_ping_req(struct neighbor
*nb
);
547 extern void rcv_announce(struct sk_buff
*skb
);
549 extern int __init
cor_neighbor_init(void);
552 extern void drain_ooo_queue(struct conn
*rconn
);
554 extern void conn_rcv_buildskb(char *data
, __u32 datalen
, __u32 conn_id
,
557 extern int __init
cor_rcv_init(void);
559 /* kpacket_parse.c */
560 extern void kernel_packet(struct neighbor
*nb
, struct sk_buff
*skb
, __u32 seqno
);
563 extern void schedule_controlmsg_timerfunc(struct neighbor
*nb
);
565 struct control_msg_out
;
567 extern struct control_msg_out
*alloc_control_msg(void);
569 extern void free_control_msg(struct control_msg_out
*cm
);
571 extern void send_pong(struct control_msg_out
*cm
, struct neighbor
*nb
,
574 extern void send_reset_conn(struct control_msg_out
*cm
, struct neighbor
*nb
,
577 extern void send_ack(struct control_msg_out
*cm
, struct neighbor
*nb
,
578 __u32 conn_id
, __u32 seqno
);
580 extern void send_connect_success(struct control_msg_out
*cm
,
581 struct neighbor
*nb
, __u32 rcvd_conn_id
, __u32 gen_conn_id
);
583 extern void send_connect_nb(struct control_msg_out
*cm
, struct neighbor
*nb
,
586 extern void send_conndata(struct control_msg_out
*cm
, struct neighbor
*nb
,
587 __u32 connid
, __u32 seqno
, char *data_orig
, char *data
,
590 /* cpacket_parse.c */
591 extern void parse(struct conn
*rconn
);
594 extern void retransmit_timerfunc(unsigned long arg
);
596 extern struct sk_buff
*create_packet_conn(struct conn
*target
, int size
,
599 extern struct sk_buff
*create_packet_kernel(struct neighbor
*nb
, int size
,
602 extern void send_conn_flushdata(struct conn
*rconn
, char *data
, __u32 datalen
);
604 extern void send_packet(struct sk_buff
*skb
, struct neighbor
*nb
,
607 extern void ack_received(struct neighbor
*nb
, __u32 conn_id
, __u32 seqno
);
609 extern void flush_out(struct conn
*rconn
);
611 extern int __init
cor_snd_init(void);
614 extern void databuf_pull(struct data_buf
*data
, char *dst
, int len
);
616 extern size_t databuf_pulluser(struct conn
*sconn
, struct msghdr
*msg
);
618 extern void databuf_ack(struct data_buf
*buf
, __u64 pos
);
620 extern void databuf_ackread(struct data_buf
*buf
);
622 extern int databuf_maypush(struct data_buf
*buf
);
624 extern void databuf_free(struct data_buf
*data
);
626 extern void databuf_init(struct data_buf
*data
);
628 extern int receive_userbuf(struct conn
*rconn
, struct msghdr
*msg
);
630 extern void receive_buf(struct conn
*rconn
, char *buf
, int len
);
632 extern int receive_skb(struct conn
*rconn
, struct sk_buff
*skb
);
634 extern void wake_sender(struct conn
*rconn
);
636 extern void forward_init(void);
640 static inline struct skb_procstate
*skb_pstate(struct sk_buff
*skb
)
642 return (struct skb_procstate
*) &(skb
->cb
[0]);
645 static inline struct sk_buff
*skb_from_pstate(struct skb_procstate
*ps
)
647 return (struct sk_buff
*) (((char *)ps
) - offsetof(struct sk_buff
,cb
));
651 static inline __u32
mss(struct neighbor
*nb
)
653 return nb
->dev
->mtu
- LL_RESERVED_SPACE(nb
->dev
) - 9;
657 static inline void put_u64(char *dst
, __u64 value
, int convbo
)
659 char *p_value
= (char *) &value
;
662 value
= cpu_to_be64(value
);
674 static inline void put_u32(char *dst
, __u32 value
, int convbo
)
676 char *p_value
= (char *) &value
;
679 value
= cpu_to_be32(value
);
687 static inline void put_u16(char *dst
, __u16 value
, int convbo
)
689 char *p_value
= (char *) &value
;
692 value
= cpu_to_be16(value
);
698 static inline char *cor_pull_skb(struct sk_buff
*skb
, unsigned int len
)
700 char *ptr
= skb_pull(skb
, len
);