qos fixes, reset bufferusage on conn reset, conn refcnt fix
[cor_2_6_31.git] / net / cor / neighbor.c
blob1a66fd50d40d02de2b95da54385193187624bbc1
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2010 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 spin_lock_init(&(nb->credits_lock));
125 nb->jiffies_credit_update = nb->last_ping_time;
126 get_random_bytes((char *) &seqno, sizeof(seqno));
127 mutex_init(&(nb->pingcookie_lock));
128 atomic_set(&(nb->latency), 1000000);
129 atomic_set(&(nb->max_remote_cmsg_delay), 1000000);
130 spin_lock_init(&(nb->state_lock));
131 atomic_set(&(nb->kpacket_seqno), seqno);
132 mutex_init(&(nb->conn_list_lock));
133 INIT_LIST_HEAD(&(nb->rcv_conn_list));
134 INIT_LIST_HEAD(&(nb->snd_conn_list));
135 spin_lock_init(&(nb->retrans_lock));
136 INIT_LIST_HEAD(&(nb->retrans_list));
137 INIT_LIST_HEAD(&(nb->retrans_list_conn));
139 return nb;
142 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
144 struct list_head *currlh;
145 struct neighbor *ret = 0;
148 char source_hw[MAX_ADDR_LEN];
149 memset(source_hw, 0, MAX_ADDR_LEN);
150 if (skb->dev->header_ops != 0 &&
151 skb->dev->header_ops->parse != 0)
152 skb->dev->header_ops->parse(skb, source_hw);
154 mutex_lock(&(neighbor_operation_lock));
156 currlh = nb_list.next;
158 while (currlh != &nb_list) {
159 struct neighbor *curr = container_of(currlh, struct neighbor,
160 nb_list);
162 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
163 ret = curr;
164 kref_get(&(ret->ref));
165 break;
168 currlh = currlh->next;
171 mutex_unlock(&(neighbor_operation_lock));
173 return ret;
176 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
177 __u16 addrlen, __u8 *addr)
179 struct list_head *currlh;
180 struct neighbor *ret = 0;
182 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
183 return 0;
185 mutex_lock(&(neighbor_operation_lock));
187 currlh = nb_list.next;
189 while (currlh != &nb_list) {
190 struct neighbor *curr = container_of(currlh, struct neighbor,
191 nb_list);
193 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
194 addrlen) == 0) {
195 ret = curr;
196 kref_get(&(ret->ref));
198 goto out;
201 currlh = currlh->next;
204 out:
205 mutex_unlock(&(neighbor_operation_lock));
207 return ret;
212 * TODO:
214 * address flags
215 * credit exchange factor + unstable flag
216 * throughput bound conns: throughput,credits/msecs
217 * latency bound conns: latency (ms), credits/byte
219 #warning todo extend
220 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
222 struct list_head *currlh;
224 char *p_totalneighs = buf;
225 char *p_response_rows = buf + 4;
227 int bufferfull = 0;
229 __u32 total = 0;
230 __u32 cnt = 0;
232 __u32 buf_offset = 8;
234 BUG_ON(buf == 0);
235 BUG_ON(buflen < 8);
237 mutex_lock(&(neighbor_operation_lock));
239 currlh = nb_list.next;
241 while (currlh != &nb_list) {
242 struct neighbor *curr = container_of(currlh, struct neighbor,
243 nb_list);
245 __u8 state;
246 unsigned long iflags;
247 /* get_neigh_state not used here because it would deadlock */
248 spin_lock_irqsave( &(curr->state_lock), iflags );
249 state = curr->state;
250 spin_unlock_irqrestore( &(curr->state_lock), iflags );
252 if (state != NEIGHBOR_STATE_ACTIVE)
253 goto cont2;
255 if (total < offset)
256 goto cont;
258 if (unlikely(buflen - buf_offset - 6 - 2 - curr->addrlen < 0))
259 bufferfull = 1;
261 if (bufferfull)
262 goto cont;
264 put_u16(buf + buf_offset, 1, 1);/* numaddr */
265 buf_offset += 2;
266 put_u16(buf + buf_offset, 2, 1);/* addrtypelen */
267 buf_offset += 2;
268 put_u16(buf + buf_offset, curr->addrlen, 1);/* addren */
269 buf_offset += 2;
270 buf[buf_offset] = 'i'; /* addrtype */
271 buf_offset += 1;
272 buf[buf_offset] = 'd';
273 buf_offset += 1;
274 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
275 buf_offset += curr->addrlen;
277 BUG_ON(buf_offset > buflen);
279 cnt++;
281 cont:
282 total++;
283 cont2:
284 currlh = currlh->next;
287 mutex_unlock(&(neighbor_operation_lock));
289 put_u32(p_totalneighs, total, 1);
290 put_u32(p_response_rows, cnt, 1);
292 return buf_offset;
295 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
297 unsigned long iflags;
299 BUG_ON(nb == 0);
301 spin_lock_irqsave( &(nb->state_lock), iflags );
303 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
304 nb->state_time.last_roundtrip))
305 nb->state_time.last_roundtrip = time;
307 spin_unlock_irqrestore( &(nb->state_lock), iflags );
310 static void _refresh_initial_debitsrate(struct net_device *dev,
311 __u32 debitsrate)
313 __u32 neighbors = 0;
314 struct list_head *currlh;
316 currlh = nb_list.next;
318 while (currlh != &nb_list) {
319 struct neighbor *curr = container_of(currlh, struct neighbor,
320 nb_list);
322 if (curr->dev == dev)
323 neighbors++;
325 currlh = currlh->next;
328 currlh = nb_list.next;
330 while (currlh != &nb_list) {
331 struct neighbor *curr = container_of(currlh, struct neighbor,
332 nb_list);
334 if (curr->dev == dev)
335 set_debitrate_initial(curr,
336 debitsrate/neighbors);
338 currlh = currlh->next;
342 /* neighbor operation lock has to be held while calling this */
343 static void refresh_initial_debitsrate(void)
345 struct list_head *currlh1;
346 __u32 ifcnt = 0;
348 currlh1 = nb_list.next;
350 while (currlh1 != &nb_list) {
351 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
352 nb_list);
354 struct list_head *currlh2;
355 currlh2 = nb_list.next;
356 while (currlh2 != currlh1) {
357 struct neighbor *curr2 = container_of(currlh2,
358 struct neighbor, nb_list);
359 if (curr1->dev == curr2->dev)
360 goto present1;
363 ifcnt++;
365 present1:
367 currlh1 = currlh1->next;
370 currlh1 = nb_list.next;
372 while (currlh1 != &nb_list) {
373 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
374 nb_list);
376 struct list_head *currlh2;
377 currlh2 = nb_list.next;
378 while (currlh2 != currlh1) {
379 struct neighbor *curr2 = container_of(currlh2,
380 struct neighbor, nb_list);
381 if (curr1->dev == curr2->dev)
382 goto present2;
385 _refresh_initial_debitsrate(curr1->dev,
386 CREDIT_RATE_INITIAL/ifcnt);
388 present2:
390 currlh1 = currlh1->next;
394 static void reset_stall_conns(struct neighbor *nb,
395 int stall_time_ms, int resetall)
397 struct list_head *currlh;
399 start:
400 mutex_lock(&(nb->conn_list_lock));
401 currlh = nb->snd_conn_list.next;
403 while (currlh != &(nb->snd_conn_list)) {
404 struct conn *sconn = container_of(currlh, struct conn,
405 target.out.nb_list);
406 BUG_ON(sconn->targettype != TARGET_OUT);
408 if (resetall || stall_time_ms >=
409 sconn->target.out.stall_timeout_ms) {
411 * reset_conn must not be called with conn_list_lock
412 * held
414 mutex_unlock(&(nb->conn_list_lock));
415 reset_conn(sconn);
416 goto start;
418 currlh = currlh->next;
421 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
422 mutex_unlock(&(nb->conn_list_lock));
425 static void stall_timer(struct work_struct *work)
427 struct neighbor *nb = container_of(to_delayed_work(work),
428 struct neighbor, stalltimeout_timer);
430 int stall_time_ms;
431 __u8 nbstate;
433 int resetall;
435 unsigned long iflags;
437 spin_lock_irqsave( &(nb->state_lock), iflags );
438 stall_time_ms = jiffies_to_msecs(jiffies -
439 nb->state_time.last_roundtrip);
440 nbstate = nb->state;
441 spin_unlock_irqrestore( &(nb->state_lock), iflags );
443 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
444 nb->str_timer_pending = 0;
445 kref_put(&(nb->ref), neighbor_free);
446 return;
449 resetall = (stall_time_ms > NB_KILL_TIME_MS);
451 /*if(resetall)
452 printk(KERN_ERR "reset_all");*/
454 reset_stall_conns(nb, stall_time_ms, resetall);
456 if (resetall) {
457 spin_lock_irqsave( &(nb->state_lock), iflags );
458 nb->state = NEIGHBOR_STATE_KILLED;
459 spin_unlock_irqrestore( &(nb->state_lock), iflags );
461 mutex_lock(&neighbor_operation_lock);
462 list_del(&(nb->nb_list));
463 refresh_initial_debitsrate();
464 mutex_unlock(&neighbor_operation_lock);
465 kref_put(&(nb->ref), neighbor_free); /* nb_list */
467 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
469 } else {
470 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timer);
471 schedule_delayed_work(&(nb->stalltimeout_timer),
472 msecs_to_jiffies(STALL_TIMER_INTERVAL_MS));
476 int get_neigh_state(struct neighbor *nb)
478 int ret;
479 int switchedtostalled = 0;
480 unsigned long iflags;
482 BUG_ON(nb == 0);
484 spin_lock_irqsave( &(nb->state_lock), iflags );
486 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
487 time_after_eq(jiffies, nb->state_time.last_roundtrip +
488 msecs_to_jiffies(NB_STALL_TIME_MS)) &&
489 nb->ping_intransit >= NB_STALL_MINPINGS_MS))) {
490 nb->state = NEIGHBOR_STATE_STALLED;
491 switchedtostalled = 1;
494 ret = nb->state;
496 spin_unlock_irqrestore( &(nb->state_lock), iflags );
498 if (unlikely(switchedtostalled)) {
499 /*printk(KERN_ERR "switched to stalled");*/
500 int pending;
501 spin_lock_irqsave( &(nb->state_lock), iflags );
502 pending = nb->str_timer_pending;
503 spin_unlock_irqrestore( &(nb->state_lock), iflags );
505 if (pending == 0) {
506 kref_get(&(nb->ref));
507 INIT_DELAYED_WORK(&(nb->stalltimeout_timer),
508 stall_timer);
509 schedule_delayed_work(&(nb->stalltimeout_timer), 1);
513 return ret;
516 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
518 int i;
520 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
521 if (nb->cookies[i].cookie == cookie)
522 return &(nb->cookies[i]);
524 return 0;
527 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
529 struct ping_cookie *c;
530 int i;
532 unsigned long cookie_sendtime;
533 __s64 newlatency;
535 unsigned long iflags;
537 mutex_lock(&(nb->pingcookie_lock));
539 c = find_cookie(nb, cookie);
541 if (unlikely(c == 0))
542 goto out;
544 cookie_sendtime = c->time;
546 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
547 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
548 if (unlikely(newlatency < 0))
549 newlatency = 0;
550 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
551 newlatency = ((__s64)256)*256*256*256 - 1;
553 atomic_set(&(nb->latency), (__u32) newlatency);
555 c->cookie = 0;
556 nb->ping_intransit--;
558 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
559 if (nb->cookies[i].cookie != 0 &&
560 time_before(nb->cookies[i].time, c->time)) {
561 nb->cookies[i].pongs++;
562 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
563 nb->cookies[i].cookie = 0;
564 nb->cookies[i].pongs = 0;
565 nb->ping_intransit--;
570 spin_lock_irqsave( &(nb->state_lock), iflags );
572 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
573 nb->state == NEIGHBOR_STATE_STALLED)) {
574 nb->ping_success++;
576 if (nb->state == NEIGHBOR_STATE_INITIAL) {
577 __u64 jiffies64 = get_jiffies_64();
578 if (nb->state_time.last_state_change == 0)
579 nb->state_time.last_state_change = jiffies64;
580 if (jiffies64 <= (nb->state_time.last_state_change +
581 msecs_to_jiffies(INITIAL_TIME_MS)))
582 goto out2;
585 if (nb->ping_success >= PING_SUCCESS_CNT) {
586 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
587 printk(KERN_ERR "switched from initial to active");
588 else
589 printk(KERN_ERR "switched from stalled to active");
591 nb->state = NEIGHBOR_STATE_ACTIVE;
592 nb->ping_success = 0;
593 nb->state_time.last_roundtrip = jiffies;
595 } else {
596 nb->state_time.last_roundtrip = cookie_sendtime;
599 out2:
600 spin_unlock_irqrestore( &(nb->state_lock), iflags );
602 out:
603 mutex_unlock(&(nb->pingcookie_lock));
606 __u32 add_ping_req(struct neighbor *nb)
608 struct ping_cookie *c;
609 __u32 i;
611 __u32 cookie;
613 mutex_lock(&(nb->pingcookie_lock));
615 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
616 if (nb->cookies[i].cookie == 0)
617 goto found;
620 get_random_bytes((char *) &i, sizeof(i));
621 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
622 PING_COOKIES_FIFO;
624 found:
625 c = &(nb->cookies[i]);
626 c->time = jiffies;
627 c->pongs = 0;
628 nb->lastcookie++;
629 if (unlikely(nb->lastcookie == 0))
630 nb->lastcookie++;
631 c->cookie = nb->lastcookie;
633 nb->ping_intransit++;
635 cookie = c->cookie;
637 nb->last_ping_time = jiffies;
639 mutex_unlock(&(nb->pingcookie_lock));
641 return cookie;
644 void unadd_ping_req(struct neighbor *nb, __u32 cookie)
646 int i;
648 if (cookie == 0)
649 return;
651 mutex_lock(&(nb->pingcookie_lock));
653 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
654 if (nb->cookies[i].cookie == cookie) {
655 nb->cookies[i].cookie = 0;
656 nb->ping_intransit--;
657 break;
661 mutex_unlock(&(nb->pingcookie_lock));
664 static int neighbor_idle(struct neighbor *nb)
666 int ret;
667 mutex_lock(&(nb->conn_list_lock));
668 ret = (list_empty(&(nb->rcv_conn_list)) &&
669 list_empty(&(nb->snd_conn_list)));
670 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
671 mutex_unlock(&(nb->conn_list_lock));
672 return ret;
676 * Check additional to the checks and timings already done in kpacket_gen.c
677 * This is primarily to make sure that we do not invalidate other ping cookies
678 * which might still receive responses. It does this by requiring a certain
679 * mimimum delay between pings, depending on how many pings are already in
680 * transit.
682 int time_to_send_ping(struct neighbor *nb)
684 int rc = 1;
686 int state = get_neigh_state(nb);
687 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
688 neighbor_idle(nb));
690 mutex_lock(&(nb->pingcookie_lock));
691 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
692 __u32 mindelay = (( ((__u32) atomic_read(&(nb->latency))) +
693 ((__u32) atomic_read(
694 &(nb->max_remote_cmsg_delay))) )/1000) <<
695 (nb->ping_intransit + 1 -
696 PING_COOKIES_NOTHROTTLE);
698 if (mindelay > PING_THROTTLE_LIMIT_MS)
699 mindelay = PING_THROTTLE_LIMIT_MS;
701 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
702 rc = 0;
705 if (jiffies_to_msecs(jiffies - nb->last_ping_time) <
706 PING_MININTERVAL_MS)
707 rc = 0;
709 if (rc != 0) {
710 __u32 forcetime;
711 if (unlikely(state != NEIGHBOR_STATE_ACTIVE) ||
712 nb->ping_intransit != 0)
713 forcetime = PING_FORCETIME_MS;
714 else if (idle)
715 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
716 else
717 forcetime = PING_FORCETIME_ACTIVE_MS;
719 if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
720 rc = 2;
723 mutex_unlock(&(nb->pingcookie_lock));
725 return rc;
728 static void add_neighbor(struct neighbor *nb)
730 struct list_head *currlh = nb_list.next;
732 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
734 while (currlh != &nb_list) {
735 struct neighbor *curr = container_of(currlh, struct neighbor,
736 nb_list);
738 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
739 curr->addrlen) == 0)
740 goto already_present;
742 currlh = currlh->next;
745 /* kref_get not needed here, because the caller leaves its ref to us */
746 printk(KERN_ERR "add_neigh");
748 list_add_tail(&(nb->nb_list), &nb_list);
749 refresh_initial_debitsrate();
750 schedule_controlmsg_timerfunc(nb);
751 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
752 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
754 if (0) {
755 already_present:
756 kmem_cache_free(nb_slab, nb);
760 static __u32 pull_u32(struct sk_buff *skb, int convbo)
762 char *ptr = cor_pull_skb(skb, 4);
764 __u32 ret = 0;
766 BUG_ON(0 == ptr);
768 ((char *)&ret)[0] = ptr[0];
769 ((char *)&ret)[1] = ptr[1];
770 ((char *)&ret)[2] = ptr[2];
771 ((char *)&ret)[3] = ptr[3];
773 if (convbo)
774 return be32_to_cpu(ret);
775 return ret;
778 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
779 char *cmddata)
781 __u16 addrtypelen;
782 char *addrtype;
783 __u16 addrlen;
784 char *addr;
786 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
788 if (nb->addr != 0)
789 return 0;
791 if (len < 4)
792 return 0;
794 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
795 cmddata += 2;
796 len -= 2;
798 if (len < 2)
799 return 0;
801 addrlen = be16_to_cpu(*((__u16 *) cmddata));
802 cmddata += 2;
803 len -= 2;
805 addrtype = cmddata;
806 cmddata += addrtypelen;
807 len -= addrtypelen;
809 addr = cmddata;
810 cmddata += addrlen;
811 len -= addrlen;
813 if (len < 0)
814 return 0;
816 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
817 return 0;
819 nb->addr = kmalloc(addrlen, GFP_KERNEL);
820 if (unlikely(nb->addr == 0))
821 return 1;
823 memcpy(nb->addr, addr, addrlen);
824 nb->addrlen = addrlen;
826 return 0;
829 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
830 char *cmddata)
832 if (cmd == NEIGHCMD_ADDADDR) {
833 apply_announce_addaddr(nb, cmd, len, cmddata);
834 } else {
835 /* ignore unknown cmds */
839 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
840 char *source_hw)
842 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
844 if (unlikely(nb == 0))
845 return;
847 while (len >= 8) {
848 __u32 cmd;
849 __u32 cmdlen;
851 cmd = be32_to_cpu(*((__u32 *) msg));
852 msg += 4;
853 len -= 4;
854 cmdlen = be32_to_cpu(*((__u32 *) msg));
855 msg += 4;
856 len -= 4;
858 BUG_ON(cmdlen > len);
860 apply_announce_cmd(nb, cmd, cmdlen, msg);
862 msg += cmdlen;
863 len -= cmdlen;
866 BUG_ON(len != 0);
868 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
870 dev_hold(dev);
871 nb->dev = dev;
872 add_neighbor(nb);
875 static int check_announce_cmds(char *msg, __u32 len)
877 while (len >= 8) {
878 __u32 cmd;
879 __u32 cmdlen;
881 cmd = be32_to_cpu(*((__u32 *) msg));
882 msg += 4;
883 len -= 4;
884 cmdlen = be32_to_cpu(*((__u32 *) msg));
885 msg += 4;
886 len -= 4;
888 /* malformated packet */
889 if (unlikely(cmdlen > len))
890 return 1;
892 msg += cmdlen;
893 len -= cmdlen;
896 if (unlikely(len != 0))
897 return 1;
899 return 0;
902 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
903 char *source_hw)
905 __u32 min_announce_version;
906 __u32 max_announce_version;
907 __u32 min_cor_version;
908 __u32 max_cor_version;
910 if (unlikely(len < 16))
911 return;
913 min_announce_version = be32_to_cpu(*((__u32 *) msg));
914 msg += 4;
915 len -= 4;
916 max_announce_version = be32_to_cpu(*((__u32 *) msg));
917 msg += 4;
918 len -= 4;
919 min_cor_version = be32_to_cpu(*((__u32 *) msg));
920 msg += 4;
921 len -= 4;
922 max_cor_version = be32_to_cpu(*((__u32 *) msg));
923 msg += 4;
924 len -= 4;
926 if (min_announce_version != 0)
927 return;
928 if (min_cor_version != 0)
929 return;
930 if (check_announce_cmds(msg, len)) {
931 return;
933 apply_announce_cmds(msg, len, dev, source_hw);
936 struct announce_in {
937 /* lh has to be first */
938 struct list_head lh;
939 struct sk_buff_head skbs; /* sorted by offset */
940 struct net_device *dev;
941 char source_hw[MAX_ADDR_LEN];
942 __u32 announce_proto_version;
943 __u32 packet_version;
944 __u32 total_size;
945 __u32 received_size;
946 __u64 last_received_packet;
949 LIST_HEAD(announce_list);
951 struct kmem_cache *announce_in_slab;
953 static void merge_announce(struct announce_in *ann)
955 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
956 __u32 copy = 0;
958 if (msg == 0) {
959 /* try again when next packet arrives */
960 return;
963 while (copy != ann->total_size) {
964 __u32 currcpy;
965 __u32 offset = 0;
966 struct sk_buff *skb;
967 struct skb_procstate *ps;
969 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
970 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
971 "empty while merging packets\n");
972 goto free;
975 skb = skb_dequeue(&(ann->skbs));
976 ps = skb_pstate(skb);
978 currcpy = skb->len;
979 if (unlikely(ps->funcstate.announce.offset > copy)) {
980 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
981 "value found\n");
982 goto free;
985 if (unlikely(ps->funcstate.announce.offset < copy)) {
986 offset = copy - ps->funcstate.announce.offset;
987 currcpy -= offset;
990 if (unlikely(currcpy + copy > ann->total_size))
991 goto free;
993 memcpy(msg + copy, skb->data + offset, currcpy);
994 copy += currcpy;
995 kfree_skb(skb);
998 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
1000 free:
1001 if (msg != 0)
1002 kfree(msg);
1004 dev_put(ann->dev);
1005 list_del(&(ann->lh));
1006 kmem_cache_free(announce_in_slab, ann);
1009 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
1011 struct skb_procstate *ps = skb_pstate(skb);
1013 __u32 offset = ps->funcstate.announce.offset;
1014 __u32 len = skb->len;
1016 __u32 curroffset = 0;
1017 __u32 prevoffset = 0;
1018 __u32 prevlen = 0;
1020 struct sk_buff *curr = ann->skbs.next;
1022 if (unlikely(len + offset > ann->total_size)) {
1023 /* invalid header */
1024 kfree_skb(skb);
1025 return 0;
1029 * Try to find the right place to insert in the sorted list. This
1030 * means to process the list until we find a skb which has a greater
1031 * offset, so we can insert before it to keep the sort order. However,
1032 * this is complicated by the fact that the new skb must not be inserted
1033 * between 2 skbs if there is no data missing in between. So the loop
1034 * runs has to keep running until there is either a gap to insert or
1035 * we see that this data has already been received.
1037 while ((void *) curr != (void *) &(ann->skbs)) {
1038 struct skb_procstate *currps = skb_pstate(skb);
1040 curroffset = currps->funcstate.announce.offset;
1042 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
1043 break;
1045 prevoffset = curroffset;
1046 prevlen = curr->len;
1047 curr = curr->next;
1049 if ((offset+len) <= (prevoffset+prevlen)) {
1050 /* we already have this data */
1051 kfree_skb(skb);
1052 return 0;
1057 * Calculate how much data was really received, by substracting
1058 * the bytes we already have.
1060 if (unlikely(prevoffset + prevlen > offset)) {
1061 len -= (prevoffset + prevlen) - offset;
1062 offset = prevoffset + prevlen;
1065 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
1066 (offset + len) > curroffset))
1067 len = curroffset - offset;
1069 ann->received_size += len;
1070 BUG_ON(ann->received_size > ann->total_size);
1071 __skb_queue_before(&(ann->skbs), curr, skb);
1072 ann->last_received_packet = get_jiffies_64();
1074 if (ann->received_size == ann->total_size)
1075 merge_announce(ann);
1076 else if (unlikely(ann->skbs.qlen >= 16))
1077 return 1;
1079 return 0;
1082 void rcv_announce(struct sk_buff *skb)
1084 struct skb_procstate *ps = skb_pstate(skb);
1085 struct announce_in *curr = 0;
1086 struct announce_in *leastactive = 0;
1087 __u32 list_size = 0;
1089 __u32 announce_proto_version = pull_u32(skb, 1);
1090 __u32 packet_version = pull_u32(skb, 1);
1091 __u32 total_size = pull_u32(skb, 1);
1093 char source_hw[MAX_ADDR_LEN];
1094 memset(source_hw, 0, MAX_ADDR_LEN);
1095 if (skb->dev->header_ops != 0 &&
1096 skb->dev->header_ops->parse != 0)
1097 skb->dev->header_ops->parse(skb, source_hw);
1099 ps->funcstate.announce.offset = pull_u32(skb, 1);
1101 if (total_size > 8192)
1102 goto discard;
1104 mutex_lock(&(neighbor_operation_lock));
1106 if (announce_proto_version != 0)
1107 goto discard;
1109 curr = (struct announce_in *) announce_list.next;
1111 while (((struct list_head *) curr) != &(announce_list)) {
1112 list_size++;
1113 if (curr->dev == skb->dev &&
1114 memcmp(curr->source_hw, source_hw, MAX_ADDR_LEN) == 0 &&
1115 curr->announce_proto_version == announce_proto_version &&
1116 curr->packet_version == packet_version &&
1117 curr->total_size == total_size)
1118 goto found;
1120 if (leastactive == 0 || curr->last_received_packet <
1121 leastactive->last_received_packet)
1122 leastactive = curr;
1124 curr = (struct announce_in *) curr->lh.next;
1127 if (list_size >= 128) {
1128 BUG_ON(leastactive == 0);
1129 curr = leastactive;
1131 curr->last_received_packet = get_jiffies_64();
1133 while (!skb_queue_empty(&(curr->skbs))) {
1134 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1135 kfree_skb(skb2);
1138 dev_put(curr->dev);
1139 } else {
1140 curr = kmem_cache_alloc(announce_in_slab,
1141 GFP_KERNEL);
1142 if (curr == 0)
1143 goto discard;
1145 skb_queue_head_init(&(curr->skbs));
1146 list_add_tail((struct list_head *) curr, &announce_list);
1149 curr->packet_version = packet_version;
1150 curr->total_size = total_size;
1151 curr->received_size = 0;
1152 curr->announce_proto_version = announce_proto_version;
1153 curr->dev = skb->dev;
1154 dev_hold(curr->dev);
1155 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1157 found:
1158 if (_rcv_announce(skb, curr)) {
1159 list_del((struct list_head *) curr);
1160 dev_put(curr->dev);
1161 kmem_cache_free(announce_in_slab, curr);
1164 if (0) {
1165 discard:
1166 kfree_skb(skb);
1169 mutex_unlock(&(neighbor_operation_lock));
1172 struct announce{
1173 struct kref ref;
1175 __u32 packet_version;
1176 char *announce_msg;
1177 __u32 announce_msg_len;
1180 struct announce *last_announce;
1182 static int send_announce_chunk(struct announce_data *ann)
1184 struct sk_buff *skb;
1185 __u32 packet_size = 256;
1186 __u32 remainingdata = ann->ann->announce_msg_len -
1187 ann->curr_announce_msg_offset;
1188 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1189 __u32 overhead = 17 + headroom;
1190 char *header;
1191 char *ptr;
1192 int rc = 0;
1194 if (remainingdata < packet_size)
1195 packet_size = remainingdata;
1197 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1198 if (unlikely(skb == 0))
1199 return 0;
1201 skb->protocol = htons(ETH_P_COR);
1202 skb->dev = ann->dev;
1203 skb_reserve(skb, headroom);
1205 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1206 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1207 goto out_err;
1209 skb_reset_network_header(skb);
1211 header = skb_put(skb, 17);
1212 if (unlikely(header == 0))
1213 goto out_err;
1215 header[0] = PACKET_TYPE_ANNOUNCE;
1217 put_u32(header + 1, 0, 1); /* announce proto version */
1218 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1219 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1220 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1222 ptr = skb_put(skb, packet_size);
1223 if (unlikely(ptr == 0))
1224 goto out_err;
1226 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset,
1227 packet_size);
1229 rc = dev_queue_xmit(skb);
1231 if (rc == 0) {
1232 ann->curr_announce_msg_offset += packet_size;
1234 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1235 ann->curr_announce_msg_offset = 0;
1238 if (0) {
1239 out_err:
1240 if (skb != 0)
1241 kfree_skb(skb);
1244 return rc;
1247 int send_announce_qos(struct announce_data *ann)
1249 int rc;
1250 mutex_lock(&(neighbor_operation_lock));
1251 rc = send_announce_chunk(ann);
1252 mutex_unlock(&(neighbor_operation_lock));
1253 return rc;
1256 static void announce_free(struct kref *ref)
1258 struct announce *ann = container_of(ref, struct announce, ref);
1259 kfree(&(ann->announce_msg));
1260 kfree(ann);
1263 void announce_data_free(struct kref *ref)
1265 struct announce_data *ann = container_of(ref, struct announce_data,
1266 ref);
1267 if (ann->ann != 0)
1268 kref_put(&(ann->ann->ref), announce_free);
1269 kfree(ann);
1272 static void send_announce(struct work_struct *work)
1274 struct announce_data *ann = container_of(to_delayed_work(work),
1275 struct announce_data, announce_work);
1276 int reschedule = 0;
1277 int rc = 0;
1279 mutex_lock(&(neighbor_operation_lock));
1281 if (unlikely(ann->dev == 0))
1282 goto out;
1283 reschedule = 1;
1285 if (unlikely(ann->ann == 0 && last_announce == 0))
1286 goto out;
1287 if (ann->curr_announce_msg_offset == 0 &&
1288 unlikely(ann->ann != last_announce)) {
1289 if (ann->ann != 0)
1290 kref_put(&(ann->ann->ref), announce_free);
1291 ann->ann = last_announce;
1292 kref_get(&(ann->ann->ref));
1295 rc = send_announce_chunk(ann);
1297 out:
1298 mutex_unlock(&(neighbor_operation_lock));
1300 if (rc != 0)
1301 qos_enqueue(ann->dev, &(ann->rb), QOS_CALLER_ANNOUNCE);
1303 if (unlikely(reschedule == 0)) {
1304 kref_put(&(ann->ref), announce_data_free);
1305 } else {
1306 __u64 jiffies = get_jiffies_64();
1307 int delay;
1309 ann->scheduled_announce_timer += msecs_to_jiffies(
1310 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1312 delay = ann->scheduled_announce_timer - jiffies;
1313 if (delay < 0)
1314 delay = 0;
1316 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1317 schedule_delayed_work(&(ann->announce_work), delay);
1321 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1323 struct list_head *lh = announce_out_list.next;
1325 while (lh != &announce_out_list) {
1326 struct announce_data *curr = (struct announce_data *)(
1327 ((char *) lh) -
1328 offsetof(struct announce_data, lh));
1330 if (curr->dev == dev)
1331 return curr;
1334 return 0;
1337 static void announce_send_adddev(struct net_device *dev)
1339 struct announce_data *ann;
1341 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1343 if (unlikely(ann == 0)) {
1344 printk(KERN_ERR "cor cannot allocate memory for sending "
1345 "announces");
1346 return;
1349 memset(ann, 0, sizeof(struct announce_data));
1351 kref_init(&(ann->ref));
1353 dev_hold(dev);
1354 ann->dev = dev;
1356 mutex_lock(&(neighbor_operation_lock));
1357 list_add_tail(&(ann->lh), &announce_out_list);
1358 mutex_unlock(&(neighbor_operation_lock));
1360 ann->scheduled_announce_timer = get_jiffies_64();
1361 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1362 schedule_delayed_work(&(ann->announce_work), 1);
1365 static void announce_send_rmdev(struct net_device *dev)
1367 struct announce_data *ann;
1369 mutex_lock(&(neighbor_operation_lock));
1371 ann = get_announce_by_netdev(dev);
1373 if (ann == 0)
1374 goto out;
1376 dev_put(ann->dev);
1377 ann->dev = 0;
1379 out:
1380 mutex_unlock(&(neighbor_operation_lock));
1383 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1384 void *ptr)
1386 struct net_device *dev = (struct net_device *) ptr;
1387 int rc;
1389 switch(event){
1390 case NETDEV_UP:
1391 rc = create_queue(dev);
1392 if (rc == 1)
1393 return 1;
1394 announce_send_adddev(dev);
1395 break;
1396 case NETDEV_DOWN:
1397 destroy_queue(dev);
1398 announce_send_rmdev(dev);
1399 break;
1400 case NETDEV_REBOOT:
1401 case NETDEV_CHANGE:
1402 case NETDEV_REGISTER:
1403 case NETDEV_UNREGISTER:
1404 case NETDEV_CHANGEMTU:
1405 case NETDEV_CHANGEADDR:
1406 case NETDEV_GOING_DOWN:
1407 case NETDEV_CHANGENAME:
1408 case NETDEV_FEAT_CHANGE:
1409 case NETDEV_BONDING_FAILOVER:
1410 break;
1411 default:
1412 return 1;
1415 return 0;
1418 static int set_announce(char *msg, __u32 len)
1420 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1422 if (unlikely(ann == 0)) {
1423 kfree(msg);
1424 return 1;
1427 memset(ann, 0, sizeof(struct announce));
1429 ann->announce_msg = msg;
1430 ann->announce_msg_len = len;
1432 kref_init(&(ann->ref));
1434 mutex_lock(&(neighbor_operation_lock));
1436 if (last_announce != 0) {
1437 ann->packet_version = last_announce->packet_version + 1;
1438 kref_put(&(last_announce->ref), announce_free);
1441 last_announce = ann;
1443 mutex_unlock(&(neighbor_operation_lock));
1445 return 0;
1448 static int generate_announce(void)
1450 __u32 addrtypelen = strlen(addrtype);
1452 __u32 hdr_len = 16;
1453 __u32 cmd_hdr_len = 8;
1454 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1456 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1457 __u32 offset = 0;
1459 char *msg = kmalloc(len, GFP_KERNEL);
1460 if (unlikely(msg == 0))
1461 return 1;
1463 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1464 offset += 4;
1465 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1466 offset += 4;
1467 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1468 offset += 4;
1469 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1470 offset += 4;
1473 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1474 offset += 4;
1475 put_u32(msg + offset, cmd_len, 1); /* command length */
1476 offset += 4;
1478 /* addrtypelen, addrlen */
1479 put_u16(msg + offset, addrtypelen, 1);
1480 offset += 2;
1481 put_u16(msg + offset, addrlen, 1);
1482 offset += 2;
1484 /* addrtype, addr */
1485 memcpy(msg + offset, addrtype, addrtypelen);
1486 offset += addrtypelen;
1487 memcpy(msg + offset, addr, addrlen);
1488 offset += addrlen;
1490 BUG_ON(offset != len);
1492 return set_announce(msg, len);
1495 int __init cor_neighbor_init(void)
1497 addrlen = 16;
1499 addr = kmalloc(addrlen, GFP_KERNEL);
1500 if (unlikely(addr == 0))
1501 goto error_free2;
1503 get_random_bytes(addr, addrlen);
1505 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1506 0, 0);
1507 announce_in_slab = kmem_cache_create("cor_announce_in",
1508 sizeof(struct announce_in), 8, 0, 0);
1510 if (unlikely(generate_announce()))
1511 goto error_free1;
1513 memset(&netdev_notify, 0, sizeof(netdev_notify));
1514 netdev_notify.notifier_call = netdev_notify_func;
1515 register_netdevice_notifier(&netdev_notify);
1517 return 0;
1519 error_free1:
1520 kfree(addr);
1522 error_free2:
1523 return -ENOMEM;
1526 MODULE_LICENSE("GPL");