per conn neighbor stall timeout removal
[cor_2_6_31.git] / net / cor / neighbor.c
blobfc9941b535bc3726cb3e865ed3b5efc2a899824f
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2010 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 spin_lock_init(&(nb->credits_lock));
125 nb->jiffies_credit_update = nb->last_ping_time;
126 get_random_bytes((char *) &seqno, sizeof(seqno));
127 mutex_init(&(nb->pingcookie_lock));
128 atomic_set(&(nb->latency), 1000000);
129 atomic_set(&(nb->max_remote_cmsg_delay), 1000000);
130 spin_lock_init(&(nb->state_lock));
131 atomic_set(&(nb->kpacket_seqno), seqno);
132 mutex_init(&(nb->conn_list_lock));
133 INIT_LIST_HEAD(&(nb->rcv_conn_list));
134 INIT_LIST_HEAD(&(nb->snd_conn_list));
135 spin_lock_init(&(nb->retrans_lock));
136 INIT_LIST_HEAD(&(nb->retrans_list));
137 INIT_LIST_HEAD(&(nb->retrans_list_conn));
139 return nb;
142 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
144 struct list_head *currlh;
145 struct neighbor *ret = 0;
148 char source_hw[MAX_ADDR_LEN];
149 memset(source_hw, 0, MAX_ADDR_LEN);
150 if (skb->dev->header_ops != 0 &&
151 skb->dev->header_ops->parse != 0)
152 skb->dev->header_ops->parse(skb, source_hw);
154 mutex_lock(&(neighbor_operation_lock));
156 currlh = nb_list.next;
158 while (currlh != &nb_list) {
159 struct neighbor *curr = container_of(currlh, struct neighbor,
160 nb_list);
162 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
163 ret = curr;
164 kref_get(&(ret->ref));
165 break;
168 currlh = currlh->next;
171 mutex_unlock(&(neighbor_operation_lock));
173 return ret;
176 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
177 __u16 addrlen, __u8 *addr)
179 struct list_head *currlh;
180 struct neighbor *ret = 0;
182 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
183 return 0;
185 mutex_lock(&(neighbor_operation_lock));
187 currlh = nb_list.next;
189 while (currlh != &nb_list) {
190 struct neighbor *curr = container_of(currlh, struct neighbor,
191 nb_list);
193 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
194 addrlen) == 0) {
195 ret = curr;
196 kref_get(&(ret->ref));
198 goto out;
201 currlh = currlh->next;
204 out:
205 mutex_unlock(&(neighbor_operation_lock));
207 return ret;
212 * TODO:
214 * address flags
215 * credit exchange factor + unstable flag
216 * throughput bound conns: throughput,credits/msecs
217 * latency bound conns: latency (ms), credits/byte
219 #warning todo extend
220 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
222 struct list_head *currlh;
224 int bufferfull = 0;
226 __u32 total = 0;
227 __u32 cnt = 0;
229 __u32 buf_offset = 8;
230 __u32 headoffset = 0;
232 int rc;
234 BUG_ON(buf == 0);
235 BUG_ON(buflen < 8);
237 mutex_lock(&(neighbor_operation_lock));
239 currlh = nb_list.next;
241 while (currlh != &nb_list) {
242 struct neighbor *curr = container_of(currlh, struct neighbor,
243 nb_list);
244 __u8 state;
245 unsigned long iflags;
247 /* get_neigh_state not used here because it would deadlock */
248 spin_lock_irqsave( &(curr->state_lock), iflags );
249 state = curr->state;
250 spin_unlock_irqrestore( &(curr->state_lock), iflags );
252 if (state != NEIGHBOR_STATE_ACTIVE)
253 goto cont2;
255 if (total < offset)
256 goto cont;
258 if (unlikely(buflen < buf_offset + 4 + 4 + 2 + 4 +
259 curr->addrlen))
260 bufferfull = 1;
262 if (bufferfull)
263 goto cont;
265 /* numaddr */
266 rc = encode_len(buf + buf_offset, buflen - buf_offset, 1);
267 BUG_ON(rc <= 0);
268 buf_offset += rc;
270 /* addrtypelen */
271 rc = encode_len(buf + buf_offset, buflen - buf_offset, 2);
272 BUG_ON(rc <= 0);
273 buf_offset += rc;
275 /* addrlen */
276 rc = encode_len(buf + buf_offset, buflen - buf_offset,
277 curr->addrlen);
278 BUG_ON(rc <= 0);
279 buf_offset += rc;
281 buf[buf_offset] = 'i'; /* addrtype */
282 buf_offset += 1;
283 buf[buf_offset] = 'd';
284 buf_offset += 1;
285 BUG_ON(curr->addrlen > buflen - buf_offset);
286 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
287 buf_offset += curr->addrlen;
289 BUG_ON(buf_offset > buflen);
291 cnt++;
293 cont:
294 total++;
295 cont2:
296 currlh = currlh->next;
299 mutex_unlock(&(neighbor_operation_lock));
301 rc = encode_len(buf, 4, total);
302 BUG_ON(rc <= 0);
303 headoffset += rc;
305 rc = encode_len(buf + headoffset, 4, cnt);
306 BUG_ON(rc <= 0);
307 headoffset += rc;
309 if (likely(headoffset < 8))
310 memmove(buf+headoffset, buf+8, buf_offset);
312 return buf_offset + headoffset - 8;
315 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
317 unsigned long iflags;
319 BUG_ON(nb == 0);
321 spin_lock_irqsave( &(nb->state_lock), iflags );
323 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
324 nb->state_time.last_roundtrip))
325 nb->state_time.last_roundtrip = time;
327 spin_unlock_irqrestore( &(nb->state_lock), iflags );
330 static void _refresh_initial_debitsrate(struct net_device *dev,
331 __u32 debitsrate)
333 __u32 neighbors = 0;
334 struct list_head *currlh;
336 currlh = nb_list.next;
338 while (currlh != &nb_list) {
339 struct neighbor *curr = container_of(currlh, struct neighbor,
340 nb_list);
342 if (curr->dev == dev)
343 neighbors++;
345 currlh = currlh->next;
348 currlh = nb_list.next;
350 while (currlh != &nb_list) {
351 struct neighbor *curr = container_of(currlh, struct neighbor,
352 nb_list);
354 if (curr->dev == dev)
355 set_debitrate_initial(curr,
356 debitsrate/neighbors);
358 currlh = currlh->next;
362 /* neighbor operation lock has to be held while calling this */
363 static void refresh_initial_debitsrate(void)
365 struct list_head *currlh1;
366 __u32 ifcnt = 0;
368 currlh1 = nb_list.next;
370 while (currlh1 != &nb_list) {
371 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
372 nb_list);
374 struct list_head *currlh2;
375 currlh2 = nb_list.next;
376 while (currlh2 != currlh1) {
377 struct neighbor *curr2 = container_of(currlh2,
378 struct neighbor, nb_list);
379 if (curr1->dev == curr2->dev)
380 goto present1;
383 ifcnt++;
385 present1:
387 currlh1 = currlh1->next;
390 currlh1 = nb_list.next;
392 while (currlh1 != &nb_list) {
393 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
394 nb_list);
396 struct list_head *currlh2;
397 currlh2 = nb_list.next;
398 while (currlh2 != currlh1) {
399 struct neighbor *curr2 = container_of(currlh2,
400 struct neighbor, nb_list);
401 if (curr1->dev == curr2->dev)
402 goto present2;
405 _refresh_initial_debitsrate(curr1->dev,
406 CREDIT_RATE_INITIAL/ifcnt);
408 present2:
410 currlh1 = currlh1->next;
414 static void reset_all_conns(struct neighbor *nb)
416 while (1) {
417 struct conn *sconn;
419 mutex_lock(&(nb->conn_list_lock));
421 if (list_empty(&(nb->snd_conn_list))) {
422 BUG_ON(nb->num_send_conns != 0);
423 mutex_unlock(&(nb->conn_list_lock));
424 break;
427 sconn = container_of(nb->snd_conn_list.next, struct conn,
428 target.out.nb_list);
429 BUG_ON(sconn->targettype != TARGET_OUT);
432 * reset_conn must not be called with conn_list_lock
433 * held
435 mutex_unlock(&(nb->conn_list_lock));
436 reset_conn(sconn);
440 static void stall_timer(struct work_struct *work)
442 struct neighbor *nb = container_of(to_delayed_work(work),
443 struct neighbor, stalltimeout_timer);
445 int stall_time_ms;
446 __u8 nbstate;
448 unsigned long iflags;
450 spin_lock_irqsave( &(nb->state_lock), iflags );
451 stall_time_ms = jiffies_to_msecs(jiffies -
452 nb->state_time.last_roundtrip);
453 nbstate = nb->state;
454 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED))
455 nb->str_timer_pending = 0;
457 spin_unlock_irqrestore( &(nb->state_lock), iflags );
459 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
460 kref_put(&(nb->ref), neighbor_free);
461 return;
464 if (stall_time_ms < NB_KILL_TIME_MS) {
465 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timer);
466 schedule_delayed_work(&(nb->stalltimeout_timer),
467 msecs_to_jiffies(NB_KILL_TIME_MS -
468 stall_time_ms));
469 return;
472 printk(KERN_ERR "reset_all");
474 reset_all_conns(nb);
476 spin_lock_irqsave( &(nb->state_lock), iflags );
477 nb->state = NEIGHBOR_STATE_KILLED;
478 spin_unlock_irqrestore( &(nb->state_lock), iflags );
480 mutex_lock(&neighbor_operation_lock);
481 list_del(&(nb->nb_list));
482 refresh_initial_debitsrate();
483 mutex_unlock(&neighbor_operation_lock);
485 kref_put(&(nb->ref), neighbor_free); /* nb_list */
486 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
489 int get_neigh_state(struct neighbor *nb)
491 int ret;
492 unsigned long iflags;
493 int starttimer = 0;
494 int stall_time_ms;
496 BUG_ON(nb == 0);
498 spin_lock_irqsave( &(nb->state_lock), iflags );
500 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
501 time_after_eq(jiffies, nb->state_time.last_roundtrip +
502 msecs_to_jiffies(NB_STALL_TIME_MS)) && (
503 nb->ping_intransit >= NB_STALL_MINPINGS ||
504 nb->ping_intransit >= PING_COOKIES_PER_NEIGH)))) {
505 nb->state = NEIGHBOR_STATE_STALLED;
506 starttimer = (nb->str_timer_pending == 0);
507 stall_time_ms = jiffies - nb->state_time.last_roundtrip;
508 nb->str_timer_pending = 1;
509 printk(KERN_ERR "switched to stalled");
510 BUG_ON(nb->ping_intransit > PING_COOKIES_PER_NEIGH);
513 ret = nb->state;
515 spin_unlock_irqrestore( &(nb->state_lock), iflags );
518 if (unlikely(starttimer)) {
519 kref_get(&(nb->ref));
520 INIT_DELAYED_WORK(&(nb->stalltimeout_timer),
521 stall_timer);
522 schedule_delayed_work(&(nb->stalltimeout_timer),
523 NB_KILL_TIME_MS - stall_time_ms);
526 return ret;
529 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
531 int i;
533 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
534 if (nb->cookies[i].cookie == cookie)
535 return &(nb->cookies[i]);
537 return 0;
540 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
542 struct ping_cookie *c;
543 int i;
545 unsigned long cookie_sendtime;
546 __s64 newlatency;
548 unsigned long iflags;
550 mutex_lock(&(nb->pingcookie_lock));
552 c = find_cookie(nb, cookie);
554 if (unlikely(c == 0))
555 goto out;
557 cookie_sendtime = c->time;
559 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
560 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
561 if (unlikely(newlatency < 0))
562 newlatency = 0;
563 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
564 newlatency = ((__s64)256)*256*256*256 - 1;
566 atomic_set(&(nb->latency), (__u32) newlatency);
568 c->cookie = 0;
569 nb->ping_intransit--;
571 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
572 if (nb->cookies[i].cookie != 0 &&
573 time_before(nb->cookies[i].time, c->time)) {
574 nb->cookies[i].pongs++;
575 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
576 nb->cookies[i].cookie = 0;
577 nb->cookies[i].pongs = 0;
578 nb->ping_intransit--;
583 spin_lock_irqsave( &(nb->state_lock), iflags );
585 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
586 nb->state == NEIGHBOR_STATE_STALLED)) {
587 nb->ping_success++;
589 if (nb->state == NEIGHBOR_STATE_INITIAL) {
590 __u64 jiffies64 = get_jiffies_64();
591 if (nb->state_time.last_state_change == 0)
592 nb->state_time.last_state_change = jiffies64;
593 if (jiffies64 <= (nb->state_time.last_state_change +
594 msecs_to_jiffies(INITIAL_TIME_MS)))
595 goto out2;
598 if (nb->ping_success >= PING_SUCCESS_CNT) {
599 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
600 printk(KERN_ERR "switched from initial to active");
601 else
602 printk(KERN_ERR "switched from stalled to active");
604 nb->state = NEIGHBOR_STATE_ACTIVE;
605 nb->ping_success = 0;
606 nb->state_time.last_roundtrip = jiffies;
608 } else {
609 nb->state_time.last_roundtrip = cookie_sendtime;
612 out2:
613 spin_unlock_irqrestore( &(nb->state_lock), iflags );
615 out:
616 mutex_unlock(&(nb->pingcookie_lock));
619 __u32 add_ping_req(struct neighbor *nb)
621 struct ping_cookie *c;
622 __u32 i;
624 __u32 cookie;
626 mutex_lock(&(nb->pingcookie_lock));
628 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
629 if (nb->cookies[i].cookie == 0)
630 goto found;
633 get_random_bytes((char *) &i, sizeof(i));
634 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
635 PING_COOKIES_FIFO;
637 found:
638 c = &(nb->cookies[i]);
639 c->time = jiffies;
640 c->pongs = 0;
641 nb->lastcookie++;
642 if (unlikely(nb->lastcookie == 0))
643 nb->lastcookie++;
644 c->cookie = nb->lastcookie;
646 nb->ping_intransit++;
648 cookie = c->cookie;
650 nb->last_ping_time = jiffies;
652 mutex_unlock(&(nb->pingcookie_lock));
654 return cookie;
657 void unadd_ping_req(struct neighbor *nb, __u32 cookie)
659 int i;
661 if (cookie == 0)
662 return;
664 mutex_lock(&(nb->pingcookie_lock));
666 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
667 if (nb->cookies[i].cookie == cookie) {
668 nb->cookies[i].cookie = 0;
669 nb->ping_intransit--;
670 break;
674 mutex_unlock(&(nb->pingcookie_lock));
677 static int neighbor_idle(struct neighbor *nb)
679 int ret;
680 mutex_lock(&(nb->conn_list_lock));
681 ret = (list_empty(&(nb->rcv_conn_list)) &&
682 list_empty(&(nb->snd_conn_list)));
683 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
684 mutex_unlock(&(nb->conn_list_lock));
685 return ret;
689 * Check additional to the checks and timings already done in kpacket_gen.c
690 * This is primarily to make sure that we do not invalidate other ping cookies
691 * which might still receive responses. It does this by requiring a certain
692 * mimimum delay between pings, depending on how many pings are already in
693 * transit.
695 int time_to_send_ping(struct neighbor *nb)
697 int rc = 1;
699 int state = get_neigh_state(nb);
700 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
701 neighbor_idle(nb));
702 __u32 forcetime;
704 mutex_lock(&(nb->pingcookie_lock));
705 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
706 __u32 mindelay = (( ((__u32) atomic_read(&(nb->latency))) +
707 ((__u32) atomic_read(
708 &(nb->max_remote_cmsg_delay))) )/1000) <<
709 (nb->ping_intransit + 1 -
710 PING_COOKIES_NOTHROTTLE);
712 if (mindelay > PING_THROTTLE_LIMIT_MS)
713 mindelay = PING_THROTTLE_LIMIT_MS;
715 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
716 rc = 0;
719 if (unlikely(state != NEIGHBOR_STATE_ACTIVE) ||
720 nb->ping_intransit != 0)
721 forcetime = PING_FORCETIME_MS;
722 else if (idle)
723 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
724 else
725 forcetime = PING_FORCETIME_ACTIVE_MS;
727 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < (forcetime/2))
728 rc = 0;
729 else if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
730 rc = 2;
732 mutex_unlock(&(nb->pingcookie_lock));
734 return rc;
737 static void add_neighbor(struct neighbor *nb)
739 struct list_head *currlh = nb_list.next;
741 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
743 while (currlh != &nb_list) {
744 struct neighbor *curr = container_of(currlh, struct neighbor,
745 nb_list);
747 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
748 curr->addrlen) == 0)
749 goto already_present;
751 currlh = currlh->next;
754 /* kref_get not needed here, because the caller leaves its ref to us */
755 printk(KERN_ERR "add_neigh");
757 list_add_tail(&(nb->nb_list), &nb_list);
758 refresh_initial_debitsrate();
759 schedule_controlmsg_timerfunc(nb);
760 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
761 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
763 if (0) {
764 already_present:
765 kmem_cache_free(nb_slab, nb);
769 static __u32 pull_u32(struct sk_buff *skb, int convbo)
771 char *ptr = cor_pull_skb(skb, 4);
773 __u32 ret = 0;
775 BUG_ON(0 == ptr);
777 ((char *)&ret)[0] = ptr[0];
778 ((char *)&ret)[1] = ptr[1];
779 ((char *)&ret)[2] = ptr[2];
780 ((char *)&ret)[3] = ptr[3];
782 if (convbo)
783 return be32_to_cpu(ret);
784 return ret;
787 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
788 char *cmddata)
790 __u16 addrtypelen;
791 char *addrtype;
792 __u16 addrlen;
793 char *addr;
795 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
797 if (nb->addr != 0)
798 return 0;
800 if (len < 4)
801 return 0;
803 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
804 cmddata += 2;
805 len -= 2;
807 if (len < 2)
808 return 0;
810 addrlen = be16_to_cpu(*((__u16 *) cmddata));
811 cmddata += 2;
812 len -= 2;
814 addrtype = cmddata;
815 cmddata += addrtypelen;
816 len -= addrtypelen;
818 addr = cmddata;
819 cmddata += addrlen;
820 len -= addrlen;
822 if (len < 0)
823 return 0;
825 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
826 return 0;
828 nb->addr = kmalloc(addrlen, GFP_KERNEL);
829 if (unlikely(nb->addr == 0))
830 return 1;
832 memcpy(nb->addr, addr, addrlen);
833 nb->addrlen = addrlen;
835 return 0;
838 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
839 char *cmddata)
841 if (cmd == NEIGHCMD_ADDADDR) {
842 apply_announce_addaddr(nb, cmd, len, cmddata);
843 } else {
844 /* ignore unknown cmds */
848 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
849 char *source_hw)
851 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
853 if (unlikely(nb == 0))
854 return;
856 while (len >= 8) {
857 __u32 cmd;
858 __u32 cmdlen;
860 cmd = be32_to_cpu(*((__u32 *) msg));
861 msg += 4;
862 len -= 4;
863 cmdlen = be32_to_cpu(*((__u32 *) msg));
864 msg += 4;
865 len -= 4;
867 BUG_ON(cmdlen > len);
869 apply_announce_cmd(nb, cmd, cmdlen, msg);
871 msg += cmdlen;
872 len -= cmdlen;
875 BUG_ON(len != 0);
877 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
879 dev_hold(dev);
880 nb->dev = dev;
881 add_neighbor(nb);
884 static int check_announce_cmds(char *msg, __u32 len)
886 while (len >= 8) {
887 __u32 cmd;
888 __u32 cmdlen;
890 cmd = be32_to_cpu(*((__u32 *) msg));
891 msg += 4;
892 len -= 4;
893 cmdlen = be32_to_cpu(*((__u32 *) msg));
894 msg += 4;
895 len -= 4;
897 /* malformated packet */
898 if (unlikely(cmdlen > len))
899 return 1;
901 msg += cmdlen;
902 len -= cmdlen;
905 if (unlikely(len != 0))
906 return 1;
908 return 0;
911 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
912 char *source_hw)
914 __u32 min_announce_version;
915 __u32 max_announce_version;
916 __u32 min_cor_version;
917 __u32 max_cor_version;
919 if (unlikely(len < 16))
920 return;
922 min_announce_version = be32_to_cpu(*((__u32 *) msg));
923 msg += 4;
924 len -= 4;
925 max_announce_version = be32_to_cpu(*((__u32 *) msg));
926 msg += 4;
927 len -= 4;
928 min_cor_version = be32_to_cpu(*((__u32 *) msg));
929 msg += 4;
930 len -= 4;
931 max_cor_version = be32_to_cpu(*((__u32 *) msg));
932 msg += 4;
933 len -= 4;
935 if (min_announce_version != 0)
936 return;
937 if (min_cor_version != 0)
938 return;
939 if (check_announce_cmds(msg, len)) {
940 return;
942 apply_announce_cmds(msg, len, dev, source_hw);
945 struct announce_in {
946 /* lh has to be first */
947 struct list_head lh;
948 struct sk_buff_head skbs; /* sorted by offset */
949 struct net_device *dev;
950 char source_hw[MAX_ADDR_LEN];
951 __u32 announce_proto_version;
952 __u32 packet_version;
953 __u32 total_size;
954 __u32 received_size;
955 __u64 last_received_packet;
958 LIST_HEAD(announce_list);
960 struct kmem_cache *announce_in_slab;
962 static void merge_announce(struct announce_in *ann)
964 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
965 __u32 copy = 0;
967 if (msg == 0) {
968 /* try again when next packet arrives */
969 return;
972 while (copy != ann->total_size) {
973 __u32 currcpy;
974 __u32 offset = 0;
975 struct sk_buff *skb;
976 struct skb_procstate *ps;
978 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
979 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
980 "empty while merging packets\n");
981 goto free;
984 skb = skb_dequeue(&(ann->skbs));
985 ps = skb_pstate(skb);
987 currcpy = skb->len;
988 if (unlikely(ps->funcstate.announce.offset > copy)) {
989 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
990 "value found\n");
991 goto free;
994 if (unlikely(ps->funcstate.announce.offset < copy)) {
995 offset = copy - ps->funcstate.announce.offset;
996 currcpy -= offset;
999 if (unlikely(currcpy + copy > ann->total_size))
1000 goto free;
1002 memcpy(msg + copy, skb->data + offset, currcpy);
1003 copy += currcpy;
1004 kfree_skb(skb);
1007 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
1009 free:
1010 if (msg != 0)
1011 kfree(msg);
1013 dev_put(ann->dev);
1014 list_del(&(ann->lh));
1015 kmem_cache_free(announce_in_slab, ann);
1018 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
1020 struct skb_procstate *ps = skb_pstate(skb);
1022 __u32 offset = ps->funcstate.announce.offset;
1023 __u32 len = skb->len;
1025 __u32 curroffset = 0;
1026 __u32 prevoffset = 0;
1027 __u32 prevlen = 0;
1029 struct sk_buff *curr = ann->skbs.next;
1031 if (unlikely(len + offset > ann->total_size)) {
1032 /* invalid header */
1033 kfree_skb(skb);
1034 return 0;
1038 * Try to find the right place to insert in the sorted list. This
1039 * means to process the list until we find a skb which has a greater
1040 * offset, so we can insert before it to keep the sort order. However,
1041 * this is complicated by the fact that the new skb must not be inserted
1042 * between 2 skbs if there is no data missing in between. So the loop
1043 * runs has to keep running until there is either a gap to insert or
1044 * we see that this data has already been received.
1046 while ((void *) curr != (void *) &(ann->skbs)) {
1047 struct skb_procstate *currps = skb_pstate(skb);
1049 curroffset = currps->funcstate.announce.offset;
1051 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
1052 break;
1054 prevoffset = curroffset;
1055 prevlen = curr->len;
1056 curr = curr->next;
1058 if ((offset+len) <= (prevoffset+prevlen)) {
1059 /* we already have this data */
1060 kfree_skb(skb);
1061 return 0;
1066 * Calculate how much data was really received, by substracting
1067 * the bytes we already have.
1069 if (unlikely(prevoffset + prevlen > offset)) {
1070 len -= (prevoffset + prevlen) - offset;
1071 offset = prevoffset + prevlen;
1074 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
1075 (offset + len) > curroffset))
1076 len = curroffset - offset;
1078 ann->received_size += len;
1079 BUG_ON(ann->received_size > ann->total_size);
1080 __skb_queue_before(&(ann->skbs), curr, skb);
1081 ann->last_received_packet = get_jiffies_64();
1083 if (ann->received_size == ann->total_size)
1084 merge_announce(ann);
1085 else if (unlikely(ann->skbs.qlen >= 16))
1086 return 1;
1088 return 0;
1091 void rcv_announce(struct sk_buff *skb)
1093 struct skb_procstate *ps = skb_pstate(skb);
1094 struct announce_in *curr = 0;
1095 struct announce_in *leastactive = 0;
1096 __u32 list_size = 0;
1098 __u32 announce_proto_version = pull_u32(skb, 1);
1099 __u32 packet_version = pull_u32(skb, 1);
1100 __u32 total_size = pull_u32(skb, 1);
1102 char source_hw[MAX_ADDR_LEN];
1103 memset(source_hw, 0, MAX_ADDR_LEN);
1104 if (skb->dev->header_ops != 0 &&
1105 skb->dev->header_ops->parse != 0)
1106 skb->dev->header_ops->parse(skb, source_hw);
1108 ps->funcstate.announce.offset = pull_u32(skb, 1);
1110 if (total_size > 8192)
1111 goto discard;
1113 mutex_lock(&(neighbor_operation_lock));
1115 if (announce_proto_version != 0)
1116 goto discard;
1118 curr = (struct announce_in *) announce_list.next;
1120 while (((struct list_head *) curr) != &(announce_list)) {
1121 list_size++;
1122 if (curr->dev == skb->dev && memcmp(curr->source_hw, source_hw,
1123 MAX_ADDR_LEN) == 0 &&
1124 curr->announce_proto_version ==
1125 announce_proto_version &&
1126 curr->packet_version == packet_version &&
1127 curr->total_size == total_size)
1128 goto found;
1130 if (leastactive == 0 || curr->last_received_packet <
1131 leastactive->last_received_packet)
1132 leastactive = curr;
1134 curr = (struct announce_in *) curr->lh.next;
1137 if (list_size >= 128) {
1138 BUG_ON(leastactive == 0);
1139 curr = leastactive;
1141 curr->last_received_packet = get_jiffies_64();
1143 while (!skb_queue_empty(&(curr->skbs))) {
1144 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1145 kfree_skb(skb2);
1148 dev_put(curr->dev);
1149 } else {
1150 curr = kmem_cache_alloc(announce_in_slab,
1151 GFP_KERNEL);
1152 if (curr == 0)
1153 goto discard;
1155 skb_queue_head_init(&(curr->skbs));
1156 list_add_tail((struct list_head *) curr, &announce_list);
1159 curr->packet_version = packet_version;
1160 curr->total_size = total_size;
1161 curr->received_size = 0;
1162 curr->announce_proto_version = announce_proto_version;
1163 curr->dev = skb->dev;
1164 dev_hold(curr->dev);
1165 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1167 found:
1168 if (_rcv_announce(skb, curr)) {
1169 list_del((struct list_head *) curr);
1170 dev_put(curr->dev);
1171 kmem_cache_free(announce_in_slab, curr);
1174 if (0) {
1175 discard:
1176 kfree_skb(skb);
1179 mutex_unlock(&(neighbor_operation_lock));
1182 struct announce{
1183 struct kref ref;
1185 __u32 packet_version;
1186 char *announce_msg;
1187 __u32 announce_msg_len;
1190 struct announce *last_announce;
1192 static int send_announce_chunk(struct announce_data *ann)
1194 struct sk_buff *skb;
1195 __u32 packet_size = 256;
1196 __u32 remainingdata = ann->ann->announce_msg_len -
1197 ann->curr_announce_msg_offset;
1198 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1199 __u32 overhead = 17 + headroom;
1200 char *header;
1201 char *ptr;
1202 int rc = 0;
1204 if (remainingdata < packet_size)
1205 packet_size = remainingdata;
1207 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1208 if (unlikely(skb == 0))
1209 return 0;
1211 skb->protocol = htons(ETH_P_COR);
1212 skb->dev = ann->dev;
1213 skb_reserve(skb, headroom);
1215 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1216 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1217 goto out_err;
1219 skb_reset_network_header(skb);
1221 header = skb_put(skb, 17);
1222 if (unlikely(header == 0))
1223 goto out_err;
1225 header[0] = PACKET_TYPE_ANNOUNCE;
1227 put_u32(header + 1, 0, 1); /* announce proto version */
1228 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1229 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1230 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1232 ptr = skb_put(skb, packet_size);
1233 if (unlikely(ptr == 0))
1234 goto out_err;
1236 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset,
1237 packet_size);
1239 rc = dev_queue_xmit(skb);
1241 if (rc == 0) {
1242 ann->curr_announce_msg_offset += packet_size;
1244 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1245 ann->curr_announce_msg_offset = 0;
1248 if (0) {
1249 out_err:
1250 if (skb != 0)
1251 kfree_skb(skb);
1254 return rc;
1257 int send_announce_qos(struct announce_data *ann)
1259 int rc;
1260 mutex_lock(&(neighbor_operation_lock));
1261 rc = send_announce_chunk(ann);
1262 mutex_unlock(&(neighbor_operation_lock));
1263 return rc;
1266 static void announce_free(struct kref *ref)
1268 struct announce *ann = container_of(ref, struct announce, ref);
1269 kfree(&(ann->announce_msg));
1270 kfree(ann);
1273 void announce_data_free(struct kref *ref)
1275 struct announce_data *ann = container_of(ref, struct announce_data,
1276 ref);
1277 if (ann->ann != 0)
1278 kref_put(&(ann->ann->ref), announce_free);
1279 kfree(ann);
1282 static void send_announce(struct work_struct *work)
1284 struct announce_data *ann = container_of(to_delayed_work(work),
1285 struct announce_data, announce_work);
1286 int reschedule = 0;
1287 int rc = 0;
1289 mutex_lock(&(neighbor_operation_lock));
1291 if (unlikely(ann->dev == 0))
1292 goto out;
1293 reschedule = 1;
1295 if (unlikely(ann->ann == 0 && last_announce == 0))
1296 goto out;
1297 if (ann->curr_announce_msg_offset == 0 &&
1298 unlikely(ann->ann != last_announce)) {
1299 if (ann->ann != 0)
1300 kref_put(&(ann->ann->ref), announce_free);
1301 ann->ann = last_announce;
1302 kref_get(&(ann->ann->ref));
1305 rc = send_announce_chunk(ann);
1307 out:
1308 mutex_unlock(&(neighbor_operation_lock));
1310 if (rc != 0)
1311 qos_enqueue(ann->dev, &(ann->rb), QOS_CALLER_ANNOUNCE);
1313 if (unlikely(reschedule == 0)) {
1314 kref_put(&(ann->ref), announce_data_free);
1315 } else {
1316 __u64 jiffies = get_jiffies_64();
1317 int delay;
1319 ann->scheduled_announce_timer += msecs_to_jiffies(
1320 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1322 delay = ann->scheduled_announce_timer - jiffies;
1323 if (delay < 0)
1324 delay = 1;
1326 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1327 schedule_delayed_work(&(ann->announce_work), delay);
1331 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1333 struct list_head *lh = announce_out_list.next;
1335 while (lh != &announce_out_list) {
1336 struct announce_data *curr = (struct announce_data *)(
1337 ((char *) lh) -
1338 offsetof(struct announce_data, lh));
1340 if (curr->dev == dev)
1341 return curr;
1344 return 0;
1347 static void announce_send_adddev(struct net_device *dev)
1349 struct announce_data *ann;
1351 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1353 if (unlikely(ann == 0)) {
1354 printk(KERN_ERR "cor cannot allocate memory for sending "
1355 "announces");
1356 return;
1359 memset(ann, 0, sizeof(struct announce_data));
1361 kref_init(&(ann->ref));
1363 dev_hold(dev);
1364 ann->dev = dev;
1366 mutex_lock(&(neighbor_operation_lock));
1367 list_add_tail(&(ann->lh), &announce_out_list);
1368 mutex_unlock(&(neighbor_operation_lock));
1370 ann->scheduled_announce_timer = get_jiffies_64();
1371 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1372 schedule_delayed_work(&(ann->announce_work), 1);
1375 static void announce_send_rmdev(struct net_device *dev)
1377 struct announce_data *ann;
1379 mutex_lock(&(neighbor_operation_lock));
1381 ann = get_announce_by_netdev(dev);
1383 if (ann == 0)
1384 goto out;
1386 dev_put(ann->dev);
1387 ann->dev = 0;
1389 out:
1390 mutex_unlock(&(neighbor_operation_lock));
1393 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1394 void *ptr)
1396 struct net_device *dev = (struct net_device *) ptr;
1397 int rc;
1399 switch(event){
1400 case NETDEV_UP:
1401 rc = create_queue(dev);
1402 if (rc == 1)
1403 return 1;
1404 announce_send_adddev(dev);
1405 break;
1406 case NETDEV_DOWN:
1407 destroy_queue(dev);
1408 announce_send_rmdev(dev);
1409 break;
1410 case NETDEV_REBOOT:
1411 case NETDEV_CHANGE:
1412 case NETDEV_REGISTER:
1413 case NETDEV_UNREGISTER:
1414 case NETDEV_CHANGEMTU:
1415 case NETDEV_CHANGEADDR:
1416 case NETDEV_GOING_DOWN:
1417 case NETDEV_CHANGENAME:
1418 case NETDEV_FEAT_CHANGE:
1419 case NETDEV_BONDING_FAILOVER:
1420 break;
1421 default:
1422 return 1;
1425 return 0;
1428 static int set_announce(char *msg, __u32 len)
1430 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1432 if (unlikely(ann == 0)) {
1433 kfree(msg);
1434 return 1;
1437 memset(ann, 0, sizeof(struct announce));
1439 ann->announce_msg = msg;
1440 ann->announce_msg_len = len;
1442 kref_init(&(ann->ref));
1444 mutex_lock(&(neighbor_operation_lock));
1446 if (last_announce != 0) {
1447 ann->packet_version = last_announce->packet_version + 1;
1448 kref_put(&(last_announce->ref), announce_free);
1451 last_announce = ann;
1453 mutex_unlock(&(neighbor_operation_lock));
1455 return 0;
1458 static int generate_announce(void)
1460 __u32 addrtypelen = strlen(addrtype);
1462 __u32 hdr_len = 16;
1463 __u32 cmd_hdr_len = 8;
1464 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1466 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1467 __u32 offset = 0;
1469 char *msg = kmalloc(len, GFP_KERNEL);
1470 if (unlikely(msg == 0))
1471 return 1;
1473 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1474 offset += 4;
1475 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1476 offset += 4;
1477 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1478 offset += 4;
1479 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1480 offset += 4;
1483 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1484 offset += 4;
1485 put_u32(msg + offset, cmd_len, 1); /* command length */
1486 offset += 4;
1488 /* addrtypelen, addrlen */
1489 put_u16(msg + offset, addrtypelen, 1);
1490 offset += 2;
1491 put_u16(msg + offset, addrlen, 1);
1492 offset += 2;
1494 /* addrtype, addr */
1495 memcpy(msg + offset, addrtype, addrtypelen);
1496 offset += addrtypelen;
1497 memcpy(msg + offset, addr, addrlen);
1498 offset += addrlen;
1500 BUG_ON(offset != len);
1502 return set_announce(msg, len);
1505 int __init cor_neighbor_init(void)
1507 addrlen = 16;
1509 addr = kmalloc(addrlen, GFP_KERNEL);
1510 if (unlikely(addr == 0))
1511 goto error_free2;
1513 get_random_bytes(addr, addrlen);
1515 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1516 0, 0);
1517 announce_in_slab = kmem_cache_create("cor_announce_in",
1518 sizeof(struct announce_in), 8, 0, 0);
1520 if (unlikely(generate_announce()))
1521 goto error_free1;
1523 memset(&netdev_notify, 0, sizeof(netdev_notify));
1524 netdev_notify.notifier_call = netdev_notify_func;
1525 register_netdevice_notifier(&netdev_notify);
1527 return 0;
1529 error_free1:
1530 kfree(addr);
1532 error_free2:
1533 return -ENOMEM;
1536 MODULE_LICENSE("GPL");