retransmit bugfixes, ping timing
[cor_2_6_31.git] / net / cor / neighbor.c
blob44d6e1da3f2f0a1e251c219d9eb634e19ca91370
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2009 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 get_random_bytes((char *) &seqno, sizeof(seqno));
125 mutex_init(&(nb->pingcookie_lock));
126 atomic_set(&(nb->latency), 0);
127 spin_lock_init(&(nb->state_lock));
128 atomic_set(&(nb->kpacket_seqno), seqno);
129 mutex_init(&(nb->conn_list_lock));
130 INIT_LIST_HEAD(&(nb->rcv_conn_list));
131 INIT_LIST_HEAD(&(nb->snd_conn_list));
132 spin_lock_init(&(nb->retrans_lock));
133 INIT_LIST_HEAD(&(nb->retrans_list));
134 INIT_LIST_HEAD(&(nb->retrans_list_conn));
136 return nb;
139 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
141 struct list_head *currlh;
142 struct neighbor *ret = 0;
145 char source_hw[MAX_ADDR_LEN];
146 memset(source_hw, 0, MAX_ADDR_LEN);
147 if (skb->dev->header_ops != 0 &&
148 skb->dev->header_ops->parse != 0)
149 skb->dev->header_ops->parse(skb, source_hw);
151 mutex_lock(&(neighbor_operation_lock));
153 currlh = nb_list.next;
155 while (currlh != &nb_list) {
156 struct neighbor *curr = container_of(currlh, struct neighbor,
157 nb_list);
159 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
160 ret = curr;
161 kref_get(&(ret->ref));
162 break;
165 currlh = currlh->next;
168 mutex_unlock(&(neighbor_operation_lock));
170 return ret;
173 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
174 __u16 addrlen, __u8 *addr)
176 struct list_head *currlh;
177 struct neighbor *ret = 0;
179 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
180 return 0;
182 mutex_lock(&(neighbor_operation_lock));
184 currlh = nb_list.next;
186 while (currlh != &nb_list) {
187 struct neighbor *curr = container_of(currlh, struct neighbor,
188 nb_list);
190 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
191 addrlen) == 0) {
192 ret = curr;
193 kref_get(&(ret->ref));
195 goto out;
198 currlh = currlh->next;
201 out:
202 mutex_unlock(&(neighbor_operation_lock));
204 return ret;
207 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
209 struct list_head *currlh;
211 char *p_totalneighs = buf;
212 char *p_response_rows = buf + 4;
214 int bufferfull = 0;
216 __u32 total = 0;
217 __u32 cnt = 0;
219 __u32 buf_offset = 8;
221 BUG_ON(buf == 0);
222 BUG_ON(buflen < 8);
224 mutex_lock(&(neighbor_operation_lock));
226 currlh = nb_list.next;
228 while (currlh != &nb_list) {
229 struct neighbor *curr = container_of(currlh, struct neighbor,
230 nb_list);
232 __u8 state;
233 unsigned long iflags;
234 /* get_neigh_state not used here because it would deadlock */
235 spin_lock_irqsave( &(curr->state_lock), iflags );
236 state = curr->state;
237 spin_unlock_irqrestore( &(curr->state_lock), iflags );
239 if (state != NEIGHBOR_STATE_ACTIVE)
240 goto cont2;
242 if (total < offset)
243 goto cont;
245 if (unlikely(buflen - buf_offset - 6 - 2 - curr->addrlen < 0))
246 bufferfull = 1;
248 if (bufferfull)
249 goto cont;
251 put_u16(buf + buf_offset, 1, 1);/* numaddr */
252 buf_offset += 2;
253 put_u16(buf + buf_offset, 2, 1);/* addrtypelen */
254 buf_offset += 2;
255 put_u16(buf + buf_offset, curr->addrlen, 1);/* addren */
256 buf_offset += 2;
257 buf[buf_offset] = 'i'; /* addrtype */
258 buf_offset += 1;
259 buf[buf_offset] = 'd';
260 buf_offset += 1;
261 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
262 buf_offset += curr->addrlen;
264 BUG_ON(buf_offset > buflen);
266 cnt++;
268 cont:
269 total++;
270 cont2:
271 currlh = currlh->next;
274 mutex_unlock(&(neighbor_operation_lock));
276 put_u32(p_totalneighs, total, 1);
277 put_u32(p_response_rows, cnt, 1);
279 return buf_offset;
282 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
284 unsigned long iflags;
286 BUG_ON(nb == 0);
288 spin_lock_irqsave( &(nb->state_lock), iflags );
290 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
291 nb->state_time.last_roundtrip))
292 nb->state_time.last_roundtrip = time;
294 spin_unlock_irqrestore( &(nb->state_lock), iflags );
297 static void reset_stall_conns(struct neighbor *nb,
298 int stall_time_ms, int resetall)
300 struct list_head *currlh;
302 start:
303 mutex_lock(&(nb->conn_list_lock));
304 currlh = nb->snd_conn_list.next;
306 while (currlh != &(nb->snd_conn_list)) {
307 struct conn *rconn = container_of(currlh, struct conn,
308 target.out.nb_list);
309 BUG_ON(rconn->targettype != TARGET_OUT);
311 if (resetall || stall_time_ms >=
312 rconn->target.out.stall_timeout_ms) {
314 * reset_conn must not be called with conn_list_lock
315 * held
317 mutex_unlock(&(nb->conn_list_lock));
318 reset_conn(rconn);
319 goto start;
321 currlh = currlh->next;
323 mutex_unlock(&(nb->conn_list_lock));
326 static void stall_timerfunc(struct work_struct *work);
328 static void stall_timer(struct neighbor *nb, int fromtimer)
330 int stall_time_ms;
331 __u8 nbstate;
333 int resetall;
335 unsigned long iflags;
337 spin_lock_irqsave( &(nb->state_lock), iflags );
338 stall_time_ms = jiffies_to_msecs(jiffies -
339 nb->state_time.last_roundtrip);
340 nbstate = nb->state;
342 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED))
343 nb->str_timer_pending = 0;
344 spin_unlock_irqrestore( &(nb->state_lock), iflags );
346 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
347 kref_put(&(nb->ref), neighbor_free);
348 return;
351 resetall = (stall_time_ms > NB_KILL_TIME_MS);
353 /*if(resetall)
354 printk(KERN_ERR "reset_all");*/
356 reset_stall_conns(nb, stall_time_ms, resetall);
358 if (resetall) {
359 spin_lock_irqsave( &(nb->state_lock), iflags );
360 nb->state = NEIGHBOR_STATE_KILLED;
361 spin_unlock_irqrestore( &(nb->state_lock), iflags );
363 list_del(&(nb->nb_list));
364 kref_put(&(nb->ref), neighbor_free); /* nb_list */
366 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
368 } else {
369 if (fromtimer == 0) {
370 int pending;
371 spin_lock_irqsave( &(nb->state_lock), iflags );
372 pending = nb->str_timer_pending;
373 spin_unlock_irqrestore( &(nb->state_lock), iflags );
375 if (pending)
376 return;
378 kref_get(&(nb->ref));
381 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timerfunc);
382 schedule_delayed_work(&(nb->stalltimeout_timer),
383 msecs_to_jiffies(STALL_TIMER_INTERVAL_MS));
387 static void stall_timerfunc(struct work_struct *work)
389 struct neighbor *nb = container_of(to_delayed_work(work),
390 struct neighbor, stalltimeout_timer);
391 stall_timer(nb, 1);
395 int get_neigh_state(struct neighbor *nb)
397 int ret;
398 int switchedtostalled = 0;
399 unsigned long iflags;
401 BUG_ON(nb == 0);
403 spin_lock_irqsave( &(nb->state_lock), iflags );
405 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
406 time_after_eq(jiffies, nb->state_time.last_roundtrip +
407 msecs_to_jiffies(NB_STALL_TIME_MS)) &&
408 nb->ping_intransit >= NB_STALL_MINPINGS_MS))) {
409 nb->state = NEIGHBOR_STATE_STALLED;
410 switchedtostalled = 1;
413 ret = nb->state;
415 spin_unlock_irqrestore( &(nb->state_lock), iflags );
417 if (unlikely(switchedtostalled)) {
418 /*printk(KERN_ERR "switched to stalled");*/
419 stall_timer(nb, 0);
422 return ret;
425 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
427 int i;
429 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
430 if (nb->cookies[i].cookie == cookie)
431 return &(nb->cookies[i]);
433 return 0;
436 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
438 struct ping_cookie *c;
439 int i;
441 unsigned long cookie_sendtime;
442 __s64 newlatency;
444 unsigned long iflags;
446 mutex_lock(&(nb->pingcookie_lock));
448 c = find_cookie(nb, cookie);
450 if (unlikely(c == 0))
451 goto out;
453 cookie_sendtime = c->time;
455 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
456 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
457 if (unlikely(newlatency < 0))
458 newlatency = 0;
459 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
460 newlatency = ((__s64)256)*256*256*256 - 1;
462 atomic_set(&(nb->latency), (__u32) newlatency);
464 c->cookie = 0;
465 nb->ping_intransit--;
467 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
468 if (nb->cookies[i].cookie != 0 &&
469 time_before(nb->cookies[i].time, c->time)) {
470 nb->cookies[i].pongs++;
471 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
472 nb->cookies[i].cookie = 0;
473 nb->cookies[i].pongs = 0;
474 nb->ping_intransit--;
479 spin_lock_irqsave( &(nb->state_lock), iflags );
481 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
482 nb->state == NEIGHBOR_STATE_STALLED)) {
483 nb->ping_success++;
485 if (nb->state == NEIGHBOR_STATE_INITIAL) {
486 __u64 jiffies64 = get_jiffies_64();
487 if (nb->state_time.last_state_change == 0)
488 nb->state_time.last_state_change = jiffies64;
489 if (jiffies64 <= (nb->state_time.last_state_change +
490 msecs_to_jiffies(INITIAL_TIME_MS)))
491 goto out2;
494 if (nb->ping_success >= PING_SUCCESS_CNT) {
495 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
496 printk(KERN_ERR "switched from initial to active");
497 else
498 printk(KERN_ERR "switched from stalled to active");
500 nb->state = NEIGHBOR_STATE_ACTIVE;
501 nb->ping_success = 0;
502 nb->state_time.last_roundtrip = jiffies;
504 } else {
505 nb->state_time.last_roundtrip = cookie_sendtime;
508 out2:
509 spin_unlock_irqrestore( &(nb->state_lock), iflags );
511 out:
512 mutex_unlock(&(nb->pingcookie_lock));
515 __u32 add_ping_req(struct neighbor *nb)
517 struct ping_cookie *c;
518 __u32 i;
520 __u32 cookie;
522 mutex_lock(&(nb->pingcookie_lock));
524 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
525 if (nb->cookies[i].cookie == 0)
526 goto found;
529 get_random_bytes((char *) &i, sizeof(i));
530 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
531 PING_COOKIES_FIFO;
533 found:
534 c = &(nb->cookies[i]);
535 c->time = jiffies;
536 c->pongs = 0;
537 nb->lastcookie++;
538 if (unlikely(nb->lastcookie == 0))
539 nb->lastcookie++;
540 c->cookie = nb->lastcookie;
542 nb->ping_intransit++;
544 cookie = c->cookie;
546 nb->last_ping_time = jiffies;
548 mutex_unlock(&(nb->pingcookie_lock));
550 return cookie;
553 static int neighbor_idle(struct neighbor *nb)
555 int ret;
556 mutex_lock(&(nb->conn_list_lock));
557 ret = (list_empty(&(nb->rcv_conn_list)) &&
558 list_empty(&(nb->snd_conn_list)));
559 mutex_unlock(&(nb->conn_list_lock));
560 return ret;
564 * Check additional to the checks and timings already done in kpacket_gen.c
565 * This is primarily to make sure that we do not invalidate other ping cookies
566 * which might still receive responses. It does this by requiring a certain
567 * mimimum delay between pings, depending on how many pings are already in
568 * transit.
570 int time_to_send_ping(struct neighbor *nb)
572 int rc = 1;
574 int state = get_neigh_state(nb);
575 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
576 neighbor_idle(nb));
578 #warning ping_intransit is not counted correctly when packets are lost
579 mutex_lock(&(nb->pingcookie_lock));
580 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
581 __u32 mindelay = (((__u32)atomic_read(&(nb->latency)))/1000) <<
582 (nb->ping_intransit + 1 -
583 PING_COOKIES_NOTHROTTLE);
585 if (mindelay > PING_THROTTLE_LIMIT_MS)
586 mindelay = PING_THROTTLE_LIMIT_MS;
588 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
589 rc = 0;
592 if (jiffies_to_msecs(jiffies - nb->last_ping_time) <
593 PING_MININTERVAL_MS)
594 rc = 0;
596 if (rc != 0) {
597 __u32 forcetime;
598 #warning todo reduce forcetime if one ping is intransit
599 if (unlikely(state != NEIGHBOR_STATE_ACTIVE))
600 forcetime = PING_FORCETIME_MS;
601 else if (idle)
602 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
603 else
604 forcetime = PING_FORCETIME_ACTIVE_MS;
606 if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
607 rc = 2;
610 mutex_unlock(&(nb->pingcookie_lock));
612 return rc;
615 static void add_neighbor(struct neighbor *nb)
617 struct list_head *currlh = nb_list.next;
619 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
621 while (currlh != &nb_list) {
622 struct neighbor *curr = container_of(currlh, struct neighbor,
623 nb_list);
625 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
626 curr->addrlen) == 0)
627 goto already_present;
629 currlh = currlh->next;
631 /* kref_get not needed here, because the caller leaves its ref to us */
632 printk(KERN_ERR "add_neigh");
633 list_add_tail(&(nb->nb_list), &nb_list);
634 schedule_controlmsg_timerfunc(nb);
635 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
636 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
638 if (0) {
639 already_present:
640 kmem_cache_free(nb_slab, nb);
644 static __u32 pull_u32(struct sk_buff *skb, int convbo)
646 char *ptr = cor_pull_skb(skb, 4);
648 __u32 ret = 0;
650 BUG_ON(0 == ptr);
652 ((char *)&ret)[0] = ptr[0];
653 ((char *)&ret)[1] = ptr[1];
654 ((char *)&ret)[2] = ptr[2];
655 ((char *)&ret)[3] = ptr[3];
657 if (convbo)
658 return be32_to_cpu(ret);
659 return ret;
662 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
663 char *cmddata)
665 __u16 addrtypelen;
666 char *addrtype;
667 __u16 addrlen;
668 char *addr;
670 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
672 if (nb->addr != 0)
673 return 0;
675 if (len < 4)
676 return 0;
678 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
679 cmddata += 2;
680 len -= 2;
682 if (len < 2)
683 return 0;
685 addrlen = be16_to_cpu(*((__u16 *) cmddata));
686 cmddata += 2;
687 len -= 2;
689 addrtype = cmddata;
690 cmddata += addrtypelen;
691 len -= addrtypelen;
693 addr = cmddata;
694 cmddata += addrlen;
695 len -= addrlen;
697 if (len < 0)
698 return 0;
700 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
701 return 0;
703 nb->addr = kmalloc(addrlen, GFP_KERNEL);
704 if (unlikely(nb->addr == 0))
705 return 1;
707 memcpy(nb->addr, addr, addrlen);
708 nb->addrlen = addrlen;
710 return 0;
713 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
714 char *cmddata)
716 if (cmd == NEIGHCMD_ADDADDR) {
717 apply_announce_addaddr(nb, cmd, len, cmddata);
718 } else {
719 /* ignore unknown cmds */
723 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
724 char *source_hw)
726 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
728 if (unlikely(nb == 0))
729 return;
731 while (len >= 8) {
732 __u32 cmd;
733 __u32 cmdlen;
735 cmd = be32_to_cpu(*((__u32 *) msg));
736 msg += 4;
737 len -= 4;
738 cmdlen = be32_to_cpu(*((__u32 *) msg));
739 msg += 4;
740 len -= 4;
742 BUG_ON(cmdlen > len);
744 apply_announce_cmd(nb, cmd, cmdlen, msg);
746 msg += cmdlen;
747 len -= cmdlen;
750 BUG_ON(len != 0);
752 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
754 dev_hold(dev);
755 nb->dev = dev;
756 add_neighbor(nb);
759 static int check_announce_cmds(char *msg, __u32 len)
761 while (len >= 8) {
762 __u32 cmd;
763 __u32 cmdlen;
765 cmd = be32_to_cpu(*((__u32 *) msg));
766 msg += 4;
767 len -= 4;
768 cmdlen = be32_to_cpu(*((__u32 *) msg));
769 msg += 4;
770 len -= 4;
772 /* malformated packet */
773 if (unlikely(cmdlen > len))
774 return 1;
776 msg += cmdlen;
777 len -= cmdlen;
780 if (unlikely(len != 0))
781 return 1;
783 return 0;
786 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
787 char *source_hw)
789 __u32 min_announce_version;
790 __u32 max_announce_version;
791 __u32 min_cor_version;
792 __u32 max_cor_version;
794 if (unlikely(len < 16))
795 return;
797 min_announce_version = be32_to_cpu(*((__u32 *) msg));
798 msg += 4;
799 len -= 4;
800 max_announce_version = be32_to_cpu(*((__u32 *) msg));
801 msg += 4;
802 len -= 4;
803 min_cor_version = be32_to_cpu(*((__u32 *) msg));
804 msg += 4;
805 len -= 4;
806 max_cor_version = be32_to_cpu(*((__u32 *) msg));
807 msg += 4;
808 len -= 4;
810 if (min_announce_version != 0)
811 return;
812 if (min_cor_version != 0)
813 return;
814 if (check_announce_cmds(msg, len)) {
815 return;
817 apply_announce_cmds(msg, len, dev, source_hw);
820 struct announce_in {
821 /* lh has to be first */
822 struct list_head lh;
823 struct sk_buff_head skbs; /* sorted by offset */
824 struct net_device *dev;
825 char source_hw[MAX_ADDR_LEN];
826 __u32 announce_proto_version;
827 __u32 packet_version;
828 __u32 total_size;
829 __u32 received_size;
830 __u64 last_received_packet;
833 LIST_HEAD(announce_list);
835 struct kmem_cache *announce_in_slab;
837 static void merge_announce(struct announce_in *ann)
839 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
840 __u32 copy = 0;
842 if (msg == 0) {
843 /* try again when next packet arrives */
844 return;
847 while (copy != ann->total_size) {
848 __u32 currcpy;
849 __u32 offset = 0;
850 struct sk_buff *skb;
851 struct skb_procstate *ps;
853 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
854 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
855 "empty while merging packets\n");
856 goto free;
859 skb = skb_dequeue(&(ann->skbs));
860 ps = skb_pstate(skb);
862 currcpy = skb->len;
863 if (unlikely(ps->funcstate.announce.offset > copy)) {
864 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
865 "value found\n");
866 goto free;
869 if (unlikely(ps->funcstate.announce.offset < copy)) {
870 offset = copy - ps->funcstate.announce.offset;
871 currcpy -= offset;
874 if (unlikely(currcpy + copy > ann->total_size))
875 goto free;
877 memcpy(msg + copy, skb->data + offset, currcpy);
878 copy += currcpy;
879 kfree_skb(skb);
882 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
884 free:
885 if (msg != 0)
886 kfree(msg);
888 dev_put(ann->dev);
889 list_del(&(ann->lh));
890 kmem_cache_free(announce_in_slab, ann);
893 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
895 struct skb_procstate *ps = skb_pstate(skb);
897 __u32 offset = ps->funcstate.announce.offset;
898 __u32 len = skb->len;
900 __u32 curroffset = 0;
901 __u32 prevoffset = 0;
902 __u32 prevlen = 0;
904 struct sk_buff *curr = ann->skbs.next;
906 if (unlikely(len + offset > ann->total_size)) {
907 /* invalid header */
908 kfree_skb(skb);
909 return 0;
913 * Try to find the right place to insert in the sorted list. This
914 * means to process the list until we find a skb which has a greater
915 * offset, so we can insert before it to keep the sort order. However,
916 * this is complicated by the fact that the new skb must not be inserted
917 * between 2 skbs if there is no data missing in between. So the loop
918 * runs has to keep running until there is either a gap to insert or
919 * we see that this data has already been received.
921 while ((void *) curr != (void *) &(ann->skbs)) {
922 struct skb_procstate *currps = skb_pstate(skb);
924 curroffset = currps->funcstate.announce.offset;
926 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
927 break;
929 prevoffset = curroffset;
930 prevlen = curr->len;
931 curr = curr->next;
933 if ((offset+len) <= (prevoffset+prevlen)) {
934 /* we already have this data */
935 kfree_skb(skb);
936 return 0;
941 * Calculate how much data was really received, by substracting
942 * the bytes we already have.
944 if (unlikely(prevoffset + prevlen > offset)) {
945 len -= (prevoffset + prevlen) - offset;
946 offset = prevoffset + prevlen;
949 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
950 (offset + len) > curroffset))
951 len = curroffset - offset;
953 ann->received_size += len;
954 BUG_ON(ann->received_size > ann->total_size);
955 __skb_queue_before(&(ann->skbs), curr, skb);
956 ann->last_received_packet = get_jiffies_64();
958 if (ann->received_size == ann->total_size)
959 merge_announce(ann);
960 else if (unlikely(ann->skbs.qlen >= 16))
961 return 1;
963 return 0;
966 void rcv_announce(struct sk_buff *skb)
968 struct skb_procstate *ps = skb_pstate(skb);
969 struct announce_in *curr = 0;
970 struct announce_in *leastactive = 0;
971 __u32 list_size = 0;
973 __u32 announce_proto_version = pull_u32(skb, 1);
974 __u32 packet_version = pull_u32(skb, 1);
975 __u32 total_size = pull_u32(skb, 1);
977 char source_hw[MAX_ADDR_LEN];
978 memset(source_hw, 0, MAX_ADDR_LEN);
979 if (skb->dev->header_ops != 0 &&
980 skb->dev->header_ops->parse != 0)
981 skb->dev->header_ops->parse(skb, source_hw);
983 ps->funcstate.announce.offset = pull_u32(skb, 1);
985 if (total_size > 8192)
986 goto discard;
988 mutex_lock(&(neighbor_operation_lock));
990 if (announce_proto_version != 0)
991 goto discard;
993 curr = (struct announce_in *) announce_list.next;
995 while (((struct list_head *) curr) != &(announce_list)) {
996 list_size++;
997 if (curr->dev == skb->dev &&
998 memcmp(curr->source_hw, source_hw, MAX_ADDR_LEN) == 0 &&
999 curr->announce_proto_version == announce_proto_version &&
1000 curr->packet_version == packet_version &&
1001 curr->total_size == total_size)
1002 goto found;
1004 if (leastactive == 0 || curr->last_received_packet <
1005 leastactive->last_received_packet)
1006 leastactive = curr;
1008 curr = (struct announce_in *) curr->lh.next;
1011 if (list_size >= 128) {
1012 BUG_ON(leastactive == 0);
1013 curr = leastactive;
1015 curr->last_received_packet = get_jiffies_64();
1017 while (!skb_queue_empty(&(curr->skbs))) {
1018 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1019 kfree_skb(skb2);
1022 dev_put(curr->dev);
1023 } else {
1024 curr = kmem_cache_alloc(announce_in_slab,
1025 GFP_KERNEL);
1026 if (curr == 0)
1027 goto discard;
1029 skb_queue_head_init(&(curr->skbs));
1030 list_add_tail((struct list_head *) curr, &announce_list);
1033 curr->packet_version = packet_version;
1034 curr->total_size = total_size;
1035 curr->received_size = 0;
1036 curr->announce_proto_version = announce_proto_version;
1037 curr->dev = skb->dev;
1038 dev_hold(curr->dev);
1039 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1041 found:
1042 if (_rcv_announce(skb, curr)) {
1043 list_del((struct list_head *) curr);
1044 dev_put(curr->dev);
1045 kmem_cache_free(announce_in_slab, curr);
1048 if (0) {
1049 discard:
1050 kfree_skb(skb);
1053 mutex_unlock(&(neighbor_operation_lock));
1056 struct announce {
1057 struct kref ref;
1059 __u32 packet_version;
1060 char *announce_msg;
1061 __u32 announce_msg_len;
1064 struct announce *last_announce;
1066 struct announce_data {
1067 struct delayed_work announce_work;
1069 struct net_device *dev;
1071 struct announce *ann;
1073 struct list_head lh;
1075 __u32 curr_announce_msg_offset;
1076 __u64 scheduled_announce_timer;
1079 static void _splitsend_announce(struct announce_data *ann)
1081 struct sk_buff *skb;
1082 __u32 packet_size = 256;
1083 __u32 remainingdata = ann->ann->announce_msg_len -
1084 ann->curr_announce_msg_offset;
1085 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1086 __u32 overhead = 17 + headroom;
1087 char *header;
1088 char *ptr;
1090 if (remainingdata < packet_size)
1091 packet_size = remainingdata;
1093 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1094 if (unlikely(0 == skb))
1095 return;
1097 skb->protocol = htons(ETH_P_COR);
1098 skb->dev = ann->dev;
1099 skb_reserve(skb, headroom);
1101 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1102 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1103 goto out_err;
1105 skb_reset_network_header(skb);
1107 header = skb_put(skb, 17);
1108 if (unlikely(header == 0))
1109 goto out_err;
1111 header[0] = PACKET_TYPE_ANNOUNCE;
1113 put_u32(header + 1, 0, 1); /* announce proto version */
1114 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1115 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1116 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1118 ptr = skb_put(skb, packet_size);
1119 if (unlikely(ptr == 0))
1120 goto out_err;
1122 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset, packet_size);
1123 dev_queue_xmit(skb);
1125 ann->curr_announce_msg_offset += packet_size;
1127 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1128 ann->curr_announce_msg_offset = 0;
1130 if (0) {
1131 out_err:
1132 if (skb != 0)
1133 kfree_skb(skb);
1137 static void announce_free(struct kref *ref)
1139 struct announce *ann = container_of(ref, struct announce, ref);
1140 kfree(&(ann->announce_msg));
1141 kfree(ann);
1144 static void splitsend_announce(struct work_struct *work)
1146 struct announce_data *ann = container_of(to_delayed_work(work),
1147 struct announce_data, announce_work);
1148 int reschedule = 0;
1150 mutex_lock(&(neighbor_operation_lock));
1152 if (unlikely(ann->dev == 0))
1153 goto out;
1155 reschedule = 1;
1157 if (unlikely(ann->ann == 0 && last_announce == 0))
1158 goto out;
1160 if (ann->curr_announce_msg_offset == 0 &&
1161 unlikely(ann->ann != last_announce)) {
1162 if (ann->ann != 0)
1163 kref_put(&(ann->ann->ref), announce_free);
1164 ann->ann = last_announce;
1165 kref_get(&(ann->ann->ref));
1168 _splitsend_announce(ann);
1169 out:
1170 mutex_unlock(&(neighbor_operation_lock));
1172 if (reschedule) {
1173 __u64 jiffies = get_jiffies_64();
1174 int delay;
1176 ann->scheduled_announce_timer += msecs_to_jiffies(
1177 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1179 delay = ann->scheduled_announce_timer - jiffies;
1180 if (delay < 0)
1181 delay = 0;
1183 INIT_DELAYED_WORK(&(ann->announce_work), splitsend_announce);
1184 schedule_delayed_work(&(ann->announce_work), delay);
1188 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1190 struct list_head *lh = announce_out_list.next;
1192 while (lh != &announce_out_list) {
1193 struct announce_data *curr = (struct announce_data *)(
1194 ((char *) lh) -
1195 offsetof(struct announce_data, lh));
1197 if (curr->dev == dev)
1198 return curr;
1201 return 0;
1204 static void announce_sent_adddev(struct net_device *dev)
1206 struct announce_data *ann;
1208 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1210 if (unlikely(ann == 0)) {
1211 printk(KERN_ERR "cor cannot allocate memory for sending "
1212 "announces");
1213 return;
1216 memset(ann, 0, sizeof(struct announce_data));
1218 dev_hold(dev);
1219 ann->dev = dev;
1221 mutex_lock(&(neighbor_operation_lock));
1222 list_add_tail(&(ann->lh), &announce_out_list);
1223 mutex_unlock(&(neighbor_operation_lock));
1225 ann->scheduled_announce_timer = get_jiffies_64();
1226 INIT_DELAYED_WORK(&(ann->announce_work), splitsend_announce);
1227 schedule_delayed_work(&(ann->announce_work), 1);
1230 static void announce_sent_rmdev(struct net_device *dev)
1232 struct announce_data *ann;
1234 mutex_lock(&(neighbor_operation_lock));
1236 ann = get_announce_by_netdev(dev);
1238 if (ann == 0)
1239 goto out;
1241 dev_put(ann->dev);
1242 ann->dev = 0;
1244 out:
1245 mutex_unlock(&(neighbor_operation_lock));
1248 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1249 void *ptr)
1251 struct net_device *dev = (struct net_device *) ptr;
1253 switch(event){
1254 case NETDEV_UP:
1255 announce_sent_adddev(dev);
1256 break;
1257 case NETDEV_DOWN:
1258 announce_sent_rmdev(dev);
1259 break;
1260 case NETDEV_REBOOT:
1261 case NETDEV_CHANGE:
1262 case NETDEV_REGISTER:
1263 case NETDEV_UNREGISTER:
1264 case NETDEV_CHANGEMTU:
1265 case NETDEV_CHANGEADDR:
1266 case NETDEV_GOING_DOWN:
1267 case NETDEV_CHANGENAME:
1268 case NETDEV_FEAT_CHANGE:
1269 case NETDEV_BONDING_FAILOVER:
1270 break;
1271 default:
1272 return 1;
1275 return 0;
1278 static int set_announce(char *msg, __u32 len)
1280 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1282 if (unlikely(ann == 0)) {
1283 kfree(msg);
1284 return 1;
1287 memset(ann, 0, sizeof(struct announce));
1289 ann->announce_msg = msg;
1290 ann->announce_msg_len = len;
1292 kref_init(&(ann->ref));
1294 mutex_lock(&(neighbor_operation_lock));
1296 if (last_announce != 0) {
1297 ann->packet_version = last_announce->packet_version + 1;
1298 kref_put(&(last_announce->ref), announce_free);
1301 last_announce = ann;
1303 mutex_unlock(&(neighbor_operation_lock));
1305 return 0;
1308 static int generate_announce(void)
1310 __u32 addrtypelen = strlen(addrtype);
1312 __u32 hdr_len = 16;
1313 __u32 cmd_hdr_len = 8;
1314 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1316 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1317 __u32 offset = 0;
1319 char *msg = kmalloc(len, GFP_KERNEL);
1320 if (unlikely(msg == 0))
1321 return 1;
1323 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1324 offset += 4;
1325 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1326 offset += 4;
1327 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1328 offset += 4;
1329 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1330 offset += 4;
1333 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1334 offset += 4;
1335 put_u32(msg + offset, cmd_len, 1); /* command length */
1336 offset += 4;
1338 /* addrtypelen, addrlen */
1339 put_u16(msg + offset, addrtypelen, 1);
1340 offset += 2;
1341 put_u16(msg + offset, addrlen, 1);
1342 offset += 2;
1344 /* addrtype, addr */
1345 memcpy(msg + offset, addrtype, addrtypelen);
1346 offset += addrtypelen;
1347 memcpy(msg + offset, addr, addrlen);
1348 offset += addrlen;
1350 BUG_ON(offset != len);
1352 return set_announce(msg, len);
1355 int __init cor_neighbor_init(void)
1357 addrlen = 16;
1359 addr = kmalloc(addrlen, GFP_KERNEL);
1360 if (unlikely(addr == 0))
1361 goto error_free2;
1363 get_random_bytes(addr, addrlen);
1365 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1366 0, 0);
1367 announce_in_slab = kmem_cache_create("cor_announce_in",
1368 sizeof(struct announce_in), 8, 0, 0);
1370 if (unlikely(generate_announce()))
1371 goto error_free1;
1373 memset(&netdev_notify, 0, sizeof(netdev_notify));
1374 netdev_notify.notifier_call = netdev_notify_func;
1375 register_netdevice_notifier(&netdev_notify);
1377 return 0;
1379 error_free1:
1380 kfree(addr);
1382 error_free2:
1383 return -ENOMEM;
1386 MODULE_LICENSE("GPL");