credits, sock/cpacket buffer accounting+resume, type of service, MSG_MORE delayed...
[cor_2_6_31.git] / net / cor / neighbor.c
blobaaf2c64d112d2e9afa92102d58e339056a6a9488
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2010 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 spin_lock_init(&(nb->credits_lock));
125 nb->jiffies_credit_update = nb->last_ping_time;
126 get_random_bytes((char *) &seqno, sizeof(seqno));
127 mutex_init(&(nb->pingcookie_lock));
128 atomic_set(&(nb->latency), 1000000);
129 atomic_set(&(nb->max_remote_cmsg_delay), 1000000);
130 spin_lock_init(&(nb->state_lock));
131 atomic_set(&(nb->kpacket_seqno), seqno);
132 mutex_init(&(nb->conn_list_lock));
133 INIT_LIST_HEAD(&(nb->rcv_conn_list));
134 INIT_LIST_HEAD(&(nb->snd_conn_list));
135 spin_lock_init(&(nb->retrans_lock));
136 INIT_LIST_HEAD(&(nb->retrans_list));
137 INIT_LIST_HEAD(&(nb->retrans_list_conn));
139 return nb;
142 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
144 struct list_head *currlh;
145 struct neighbor *ret = 0;
148 char source_hw[MAX_ADDR_LEN];
149 memset(source_hw, 0, MAX_ADDR_LEN);
150 if (skb->dev->header_ops != 0 &&
151 skb->dev->header_ops->parse != 0)
152 skb->dev->header_ops->parse(skb, source_hw);
154 mutex_lock(&(neighbor_operation_lock));
156 currlh = nb_list.next;
158 while (currlh != &nb_list) {
159 struct neighbor *curr = container_of(currlh, struct neighbor,
160 nb_list);
162 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
163 ret = curr;
164 kref_get(&(ret->ref));
165 break;
168 currlh = currlh->next;
171 mutex_unlock(&(neighbor_operation_lock));
173 return ret;
176 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
177 __u16 addrlen, __u8 *addr)
179 struct list_head *currlh;
180 struct neighbor *ret = 0;
182 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
183 return 0;
185 mutex_lock(&(neighbor_operation_lock));
187 currlh = nb_list.next;
189 while (currlh != &nb_list) {
190 struct neighbor *curr = container_of(currlh, struct neighbor,
191 nb_list);
193 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
194 addrlen) == 0) {
195 ret = curr;
196 kref_get(&(ret->ref));
198 goto out;
201 currlh = currlh->next;
204 out:
205 mutex_unlock(&(neighbor_operation_lock));
207 return ret;
212 * TODO:
214 * address flags
215 * credit exchange factor + unstable flag
216 * throughput bound conns: throughput,credits/msecs
217 * latency bound conns: latency (ms), credits/byte
219 #warning todo extend
220 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
222 struct list_head *currlh;
224 char *p_totalneighs = buf;
225 char *p_response_rows = buf + 4;
227 int bufferfull = 0;
229 __u32 total = 0;
230 __u32 cnt = 0;
232 __u32 buf_offset = 8;
234 BUG_ON(buf == 0);
235 BUG_ON(buflen < 8);
237 mutex_lock(&(neighbor_operation_lock));
239 currlh = nb_list.next;
241 while (currlh != &nb_list) {
242 struct neighbor *curr = container_of(currlh, struct neighbor,
243 nb_list);
245 __u8 state;
246 unsigned long iflags;
247 /* get_neigh_state not used here because it would deadlock */
248 spin_lock_irqsave( &(curr->state_lock), iflags );
249 state = curr->state;
250 spin_unlock_irqrestore( &(curr->state_lock), iflags );
252 if (state != NEIGHBOR_STATE_ACTIVE)
253 goto cont2;
255 if (total < offset)
256 goto cont;
258 if (unlikely(buflen - buf_offset - 6 - 2 - curr->addrlen < 0))
259 bufferfull = 1;
261 if (bufferfull)
262 goto cont;
264 put_u16(buf + buf_offset, 1, 1);/* numaddr */
265 buf_offset += 2;
266 put_u16(buf + buf_offset, 2, 1);/* addrtypelen */
267 buf_offset += 2;
268 put_u16(buf + buf_offset, curr->addrlen, 1);/* addren */
269 buf_offset += 2;
270 buf[buf_offset] = 'i'; /* addrtype */
271 buf_offset += 1;
272 buf[buf_offset] = 'd';
273 buf_offset += 1;
274 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
275 buf_offset += curr->addrlen;
277 BUG_ON(buf_offset > buflen);
279 cnt++;
281 cont:
282 total++;
283 cont2:
284 currlh = currlh->next;
287 mutex_unlock(&(neighbor_operation_lock));
289 put_u32(p_totalneighs, total, 1);
290 put_u32(p_response_rows, cnt, 1);
292 return buf_offset;
295 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
297 unsigned long iflags;
299 BUG_ON(nb == 0);
301 spin_lock_irqsave( &(nb->state_lock), iflags );
303 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
304 nb->state_time.last_roundtrip))
305 nb->state_time.last_roundtrip = time;
307 spin_unlock_irqrestore( &(nb->state_lock), iflags );
310 static void _refresh_initial_debitsrate(struct net_device *dev,
311 __u32 debitsrate)
313 __u32 neighbors = 0;
314 struct list_head *currlh;
316 currlh = nb_list.next;
318 while (currlh != &nb_list) {
319 struct neighbor *curr = container_of(currlh, struct neighbor,
320 nb_list);
322 if (curr->dev == dev)
323 neighbors++;
325 currlh = currlh->next;
328 currlh = nb_list.next;
330 while (currlh != &nb_list) {
331 struct neighbor *curr = container_of(currlh, struct neighbor,
332 nb_list);
334 if (curr->dev == dev)
335 set_debitrate_initial(curr,
336 debitsrate/neighbors);
338 currlh = currlh->next;
342 /* neighbor operation lock has to be held while calling this */
343 static void refresh_initial_debitsrate(void)
345 struct list_head *currlh1;
346 __u32 ifcnt = 0;
348 currlh1 = nb_list.next;
350 while (currlh1 != &nb_list) {
351 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
352 nb_list);
354 struct list_head *currlh2;
355 currlh2 = nb_list.next;
356 while (currlh2 != currlh1) {
357 struct neighbor *curr2 = container_of(currlh2,
358 struct neighbor, nb_list);
359 if (curr1->dev == curr2->dev)
360 goto present1;
363 ifcnt++;
365 present1:
367 currlh1 = currlh1->next;
370 currlh1 = nb_list.next;
372 while (currlh1 != &nb_list) {
373 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
374 nb_list);
376 struct list_head *currlh2;
377 currlh2 = nb_list.next;
378 while (currlh2 != currlh1) {
379 struct neighbor *curr2 = container_of(currlh2,
380 struct neighbor, nb_list);
381 if (curr1->dev == curr2->dev)
382 goto present2;
385 _refresh_initial_debitsrate(curr1->dev,
386 CREDIT_RATE_INITIAL/ifcnt);
388 present2:
390 currlh1 = currlh1->next;
394 static void reset_stall_conns(struct neighbor *nb,
395 int stall_time_ms, int resetall)
397 struct list_head *currlh;
399 start:
400 mutex_lock(&(nb->conn_list_lock));
401 currlh = nb->snd_conn_list.next;
403 while (currlh != &(nb->snd_conn_list)) {
404 struct conn *sconn = container_of(currlh, struct conn,
405 target.out.nb_list);
406 BUG_ON(sconn->targettype != TARGET_OUT);
408 if (resetall || stall_time_ms >=
409 sconn->target.out.stall_timeout_ms) {
411 * reset_conn must not be called with conn_list_lock
412 * held
414 mutex_unlock(&(nb->conn_list_lock));
415 reset_conn(sconn);
416 goto start;
418 currlh = currlh->next;
421 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
422 mutex_unlock(&(nb->conn_list_lock));
425 static void stall_timer(struct work_struct *work)
427 struct neighbor *nb = container_of(to_delayed_work(work),
428 struct neighbor, stalltimeout_timer);
430 int stall_time_ms;
431 __u8 nbstate;
433 int resetall;
435 unsigned long iflags;
437 spin_lock_irqsave( &(nb->state_lock), iflags );
438 stall_time_ms = jiffies_to_msecs(jiffies -
439 nb->state_time.last_roundtrip);
440 nbstate = nb->state;
441 spin_unlock_irqrestore( &(nb->state_lock), iflags );
443 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
444 nb->str_timer_pending = 0;
445 kref_put(&(nb->ref), neighbor_free);
446 return;
449 resetall = (stall_time_ms > NB_KILL_TIME_MS);
451 /*if(resetall)
452 printk(KERN_ERR "reset_all");*/
454 reset_stall_conns(nb, stall_time_ms, resetall);
456 if (resetall) {
457 spin_lock_irqsave( &(nb->state_lock), iflags );
458 nb->state = NEIGHBOR_STATE_KILLED;
459 spin_unlock_irqrestore( &(nb->state_lock), iflags );
461 mutex_lock(&neighbor_operation_lock);
462 list_del(&(nb->nb_list));
463 refresh_initial_debitsrate();
464 mutex_unlock(&neighbor_operation_lock);
465 kref_put(&(nb->ref), neighbor_free); /* nb_list */
467 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
469 } else {
470 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timer);
471 schedule_delayed_work(&(nb->stalltimeout_timer),
472 msecs_to_jiffies(STALL_TIMER_INTERVAL_MS));
476 int get_neigh_state(struct neighbor *nb)
478 int ret;
479 int switchedtostalled = 0;
480 unsigned long iflags;
482 BUG_ON(nb == 0);
484 spin_lock_irqsave( &(nb->state_lock), iflags );
486 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
487 time_after_eq(jiffies, nb->state_time.last_roundtrip +
488 msecs_to_jiffies(NB_STALL_TIME_MS)) && (
489 nb->ping_intransit >= NB_STALL_MINPINGS ||
490 nb->ping_intransit >= PING_COOKIES_PER_NEIGH)))) {
491 nb->state = NEIGHBOR_STATE_STALLED;
492 switchedtostalled = 1;
493 printk(KERN_ERR "switched to stalled");
494 BUG_ON(nb->ping_intransit > PING_COOKIES_PER_NEIGH);
497 ret = nb->state;
499 spin_unlock_irqrestore( &(nb->state_lock), iflags );
501 if (unlikely(switchedtostalled)) {
502 /*printk(KERN_ERR "switched to stalled");*/
503 int pending;
504 spin_lock_irqsave( &(nb->state_lock), iflags );
505 pending = nb->str_timer_pending;
506 spin_unlock_irqrestore( &(nb->state_lock), iflags );
508 if (pending == 0) {
509 kref_get(&(nb->ref));
510 INIT_DELAYED_WORK(&(nb->stalltimeout_timer),
511 stall_timer);
512 schedule_delayed_work(&(nb->stalltimeout_timer), 1);
516 return ret;
519 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
521 int i;
523 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
524 if (nb->cookies[i].cookie == cookie)
525 return &(nb->cookies[i]);
527 return 0;
530 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
532 struct ping_cookie *c;
533 int i;
535 unsigned long cookie_sendtime;
536 __s64 newlatency;
538 unsigned long iflags;
540 mutex_lock(&(nb->pingcookie_lock));
542 c = find_cookie(nb, cookie);
544 if (unlikely(c == 0))
545 goto out;
547 cookie_sendtime = c->time;
549 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
550 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
551 if (unlikely(newlatency < 0))
552 newlatency = 0;
553 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
554 newlatency = ((__s64)256)*256*256*256 - 1;
556 atomic_set(&(nb->latency), (__u32) newlatency);
558 c->cookie = 0;
559 nb->ping_intransit--;
561 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
562 if (nb->cookies[i].cookie != 0 &&
563 time_before(nb->cookies[i].time, c->time)) {
564 nb->cookies[i].pongs++;
565 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
566 nb->cookies[i].cookie = 0;
567 nb->cookies[i].pongs = 0;
568 nb->ping_intransit--;
573 spin_lock_irqsave( &(nb->state_lock), iflags );
575 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
576 nb->state == NEIGHBOR_STATE_STALLED)) {
577 nb->ping_success++;
579 if (nb->state == NEIGHBOR_STATE_INITIAL) {
580 __u64 jiffies64 = get_jiffies_64();
581 if (nb->state_time.last_state_change == 0)
582 nb->state_time.last_state_change = jiffies64;
583 if (jiffies64 <= (nb->state_time.last_state_change +
584 msecs_to_jiffies(INITIAL_TIME_MS)))
585 goto out2;
588 if (nb->ping_success >= PING_SUCCESS_CNT) {
589 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
590 printk(KERN_ERR "switched from initial to active");
591 else
592 printk(KERN_ERR "switched from stalled to active");
594 nb->state = NEIGHBOR_STATE_ACTIVE;
595 nb->ping_success = 0;
596 nb->state_time.last_roundtrip = jiffies;
598 } else {
599 nb->state_time.last_roundtrip = cookie_sendtime;
602 out2:
603 spin_unlock_irqrestore( &(nb->state_lock), iflags );
605 out:
606 mutex_unlock(&(nb->pingcookie_lock));
609 __u32 add_ping_req(struct neighbor *nb)
611 struct ping_cookie *c;
612 __u32 i;
614 __u32 cookie;
616 mutex_lock(&(nb->pingcookie_lock));
618 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
619 if (nb->cookies[i].cookie == 0)
620 goto found;
623 get_random_bytes((char *) &i, sizeof(i));
624 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
625 PING_COOKIES_FIFO;
627 found:
628 c = &(nb->cookies[i]);
629 c->time = jiffies;
630 c->pongs = 0;
631 nb->lastcookie++;
632 if (unlikely(nb->lastcookie == 0))
633 nb->lastcookie++;
634 c->cookie = nb->lastcookie;
636 nb->ping_intransit++;
638 cookie = c->cookie;
640 nb->last_ping_time = jiffies;
642 mutex_unlock(&(nb->pingcookie_lock));
644 return cookie;
647 void unadd_ping_req(struct neighbor *nb, __u32 cookie)
649 int i;
651 if (cookie == 0)
652 return;
654 mutex_lock(&(nb->pingcookie_lock));
656 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
657 if (nb->cookies[i].cookie == cookie) {
658 nb->cookies[i].cookie = 0;
659 nb->ping_intransit--;
660 break;
664 mutex_unlock(&(nb->pingcookie_lock));
667 static int neighbor_idle(struct neighbor *nb)
669 int ret;
670 mutex_lock(&(nb->conn_list_lock));
671 ret = (list_empty(&(nb->rcv_conn_list)) &&
672 list_empty(&(nb->snd_conn_list)));
673 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
674 mutex_unlock(&(nb->conn_list_lock));
675 return ret;
679 * Check additional to the checks and timings already done in kpacket_gen.c
680 * This is primarily to make sure that we do not invalidate other ping cookies
681 * which might still receive responses. It does this by requiring a certain
682 * mimimum delay between pings, depending on how many pings are already in
683 * transit.
685 int time_to_send_ping(struct neighbor *nb)
687 int rc = 1;
689 int state = get_neigh_state(nb);
690 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
691 neighbor_idle(nb));
692 __u32 forcetime;
694 mutex_lock(&(nb->pingcookie_lock));
695 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
696 __u32 mindelay = (( ((__u32) atomic_read(&(nb->latency))) +
697 ((__u32) atomic_read(
698 &(nb->max_remote_cmsg_delay))) )/1000) <<
699 (nb->ping_intransit + 1 -
700 PING_COOKIES_NOTHROTTLE);
702 if (mindelay > PING_THROTTLE_LIMIT_MS)
703 mindelay = PING_THROTTLE_LIMIT_MS;
705 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
706 rc = 0;
709 if (unlikely(state != NEIGHBOR_STATE_ACTIVE) ||
710 nb->ping_intransit != 0)
711 forcetime = PING_FORCETIME_MS;
712 else if (idle)
713 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
714 else
715 forcetime = PING_FORCETIME_ACTIVE_MS;
717 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < (forcetime/2))
718 rc = 0;
719 else if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
720 rc = 2;
722 mutex_unlock(&(nb->pingcookie_lock));
724 return rc;
727 static void add_neighbor(struct neighbor *nb)
729 struct list_head *currlh = nb_list.next;
731 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
733 while (currlh != &nb_list) {
734 struct neighbor *curr = container_of(currlh, struct neighbor,
735 nb_list);
737 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
738 curr->addrlen) == 0)
739 goto already_present;
741 currlh = currlh->next;
744 /* kref_get not needed here, because the caller leaves its ref to us */
745 printk(KERN_ERR "add_neigh");
747 list_add_tail(&(nb->nb_list), &nb_list);
748 refresh_initial_debitsrate();
749 schedule_controlmsg_timerfunc(nb);
750 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
751 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
753 if (0) {
754 already_present:
755 kmem_cache_free(nb_slab, nb);
759 static __u32 pull_u32(struct sk_buff *skb, int convbo)
761 char *ptr = cor_pull_skb(skb, 4);
763 __u32 ret = 0;
765 BUG_ON(0 == ptr);
767 ((char *)&ret)[0] = ptr[0];
768 ((char *)&ret)[1] = ptr[1];
769 ((char *)&ret)[2] = ptr[2];
770 ((char *)&ret)[3] = ptr[3];
772 if (convbo)
773 return be32_to_cpu(ret);
774 return ret;
777 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
778 char *cmddata)
780 __u16 addrtypelen;
781 char *addrtype;
782 __u16 addrlen;
783 char *addr;
785 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
787 if (nb->addr != 0)
788 return 0;
790 if (len < 4)
791 return 0;
793 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
794 cmddata += 2;
795 len -= 2;
797 if (len < 2)
798 return 0;
800 addrlen = be16_to_cpu(*((__u16 *) cmddata));
801 cmddata += 2;
802 len -= 2;
804 addrtype = cmddata;
805 cmddata += addrtypelen;
806 len -= addrtypelen;
808 addr = cmddata;
809 cmddata += addrlen;
810 len -= addrlen;
812 if (len < 0)
813 return 0;
815 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
816 return 0;
818 nb->addr = kmalloc(addrlen, GFP_KERNEL);
819 if (unlikely(nb->addr == 0))
820 return 1;
822 memcpy(nb->addr, addr, addrlen);
823 nb->addrlen = addrlen;
825 return 0;
828 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
829 char *cmddata)
831 if (cmd == NEIGHCMD_ADDADDR) {
832 apply_announce_addaddr(nb, cmd, len, cmddata);
833 } else {
834 /* ignore unknown cmds */
838 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
839 char *source_hw)
841 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
843 if (unlikely(nb == 0))
844 return;
846 while (len >= 8) {
847 __u32 cmd;
848 __u32 cmdlen;
850 cmd = be32_to_cpu(*((__u32 *) msg));
851 msg += 4;
852 len -= 4;
853 cmdlen = be32_to_cpu(*((__u32 *) msg));
854 msg += 4;
855 len -= 4;
857 BUG_ON(cmdlen > len);
859 apply_announce_cmd(nb, cmd, cmdlen, msg);
861 msg += cmdlen;
862 len -= cmdlen;
865 BUG_ON(len != 0);
867 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
869 dev_hold(dev);
870 nb->dev = dev;
871 add_neighbor(nb);
874 static int check_announce_cmds(char *msg, __u32 len)
876 while (len >= 8) {
877 __u32 cmd;
878 __u32 cmdlen;
880 cmd = be32_to_cpu(*((__u32 *) msg));
881 msg += 4;
882 len -= 4;
883 cmdlen = be32_to_cpu(*((__u32 *) msg));
884 msg += 4;
885 len -= 4;
887 /* malformated packet */
888 if (unlikely(cmdlen > len))
889 return 1;
891 msg += cmdlen;
892 len -= cmdlen;
895 if (unlikely(len != 0))
896 return 1;
898 return 0;
901 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
902 char *source_hw)
904 __u32 min_announce_version;
905 __u32 max_announce_version;
906 __u32 min_cor_version;
907 __u32 max_cor_version;
909 if (unlikely(len < 16))
910 return;
912 min_announce_version = be32_to_cpu(*((__u32 *) msg));
913 msg += 4;
914 len -= 4;
915 max_announce_version = be32_to_cpu(*((__u32 *) msg));
916 msg += 4;
917 len -= 4;
918 min_cor_version = be32_to_cpu(*((__u32 *) msg));
919 msg += 4;
920 len -= 4;
921 max_cor_version = be32_to_cpu(*((__u32 *) msg));
922 msg += 4;
923 len -= 4;
925 if (min_announce_version != 0)
926 return;
927 if (min_cor_version != 0)
928 return;
929 if (check_announce_cmds(msg, len)) {
930 return;
932 apply_announce_cmds(msg, len, dev, source_hw);
935 struct announce_in {
936 /* lh has to be first */
937 struct list_head lh;
938 struct sk_buff_head skbs; /* sorted by offset */
939 struct net_device *dev;
940 char source_hw[MAX_ADDR_LEN];
941 __u32 announce_proto_version;
942 __u32 packet_version;
943 __u32 total_size;
944 __u32 received_size;
945 __u64 last_received_packet;
948 LIST_HEAD(announce_list);
950 struct kmem_cache *announce_in_slab;
952 static void merge_announce(struct announce_in *ann)
954 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
955 __u32 copy = 0;
957 if (msg == 0) {
958 /* try again when next packet arrives */
959 return;
962 while (copy != ann->total_size) {
963 __u32 currcpy;
964 __u32 offset = 0;
965 struct sk_buff *skb;
966 struct skb_procstate *ps;
968 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
969 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
970 "empty while merging packets\n");
971 goto free;
974 skb = skb_dequeue(&(ann->skbs));
975 ps = skb_pstate(skb);
977 currcpy = skb->len;
978 if (unlikely(ps->funcstate.announce.offset > copy)) {
979 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
980 "value found\n");
981 goto free;
984 if (unlikely(ps->funcstate.announce.offset < copy)) {
985 offset = copy - ps->funcstate.announce.offset;
986 currcpy -= offset;
989 if (unlikely(currcpy + copy > ann->total_size))
990 goto free;
992 memcpy(msg + copy, skb->data + offset, currcpy);
993 copy += currcpy;
994 kfree_skb(skb);
997 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
999 free:
1000 if (msg != 0)
1001 kfree(msg);
1003 dev_put(ann->dev);
1004 list_del(&(ann->lh));
1005 kmem_cache_free(announce_in_slab, ann);
1008 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
1010 struct skb_procstate *ps = skb_pstate(skb);
1012 __u32 offset = ps->funcstate.announce.offset;
1013 __u32 len = skb->len;
1015 __u32 curroffset = 0;
1016 __u32 prevoffset = 0;
1017 __u32 prevlen = 0;
1019 struct sk_buff *curr = ann->skbs.next;
1021 if (unlikely(len + offset > ann->total_size)) {
1022 /* invalid header */
1023 kfree_skb(skb);
1024 return 0;
1028 * Try to find the right place to insert in the sorted list. This
1029 * means to process the list until we find a skb which has a greater
1030 * offset, so we can insert before it to keep the sort order. However,
1031 * this is complicated by the fact that the new skb must not be inserted
1032 * between 2 skbs if there is no data missing in between. So the loop
1033 * runs has to keep running until there is either a gap to insert or
1034 * we see that this data has already been received.
1036 while ((void *) curr != (void *) &(ann->skbs)) {
1037 struct skb_procstate *currps = skb_pstate(skb);
1039 curroffset = currps->funcstate.announce.offset;
1041 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
1042 break;
1044 prevoffset = curroffset;
1045 prevlen = curr->len;
1046 curr = curr->next;
1048 if ((offset+len) <= (prevoffset+prevlen)) {
1049 /* we already have this data */
1050 kfree_skb(skb);
1051 return 0;
1056 * Calculate how much data was really received, by substracting
1057 * the bytes we already have.
1059 if (unlikely(prevoffset + prevlen > offset)) {
1060 len -= (prevoffset + prevlen) - offset;
1061 offset = prevoffset + prevlen;
1064 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
1065 (offset + len) > curroffset))
1066 len = curroffset - offset;
1068 ann->received_size += len;
1069 BUG_ON(ann->received_size > ann->total_size);
1070 __skb_queue_before(&(ann->skbs), curr, skb);
1071 ann->last_received_packet = get_jiffies_64();
1073 if (ann->received_size == ann->total_size)
1074 merge_announce(ann);
1075 else if (unlikely(ann->skbs.qlen >= 16))
1076 return 1;
1078 return 0;
1081 void rcv_announce(struct sk_buff *skb)
1083 struct skb_procstate *ps = skb_pstate(skb);
1084 struct announce_in *curr = 0;
1085 struct announce_in *leastactive = 0;
1086 __u32 list_size = 0;
1088 __u32 announce_proto_version = pull_u32(skb, 1);
1089 __u32 packet_version = pull_u32(skb, 1);
1090 __u32 total_size = pull_u32(skb, 1);
1092 char source_hw[MAX_ADDR_LEN];
1093 memset(source_hw, 0, MAX_ADDR_LEN);
1094 if (skb->dev->header_ops != 0 &&
1095 skb->dev->header_ops->parse != 0)
1096 skb->dev->header_ops->parse(skb, source_hw);
1098 ps->funcstate.announce.offset = pull_u32(skb, 1);
1100 if (total_size > 8192)
1101 goto discard;
1103 mutex_lock(&(neighbor_operation_lock));
1105 if (announce_proto_version != 0)
1106 goto discard;
1108 curr = (struct announce_in *) announce_list.next;
1110 while (((struct list_head *) curr) != &(announce_list)) {
1111 list_size++;
1112 if (curr->dev == skb->dev && memcmp(curr->source_hw, source_hw,
1113 MAX_ADDR_LEN) == 0 &&
1114 curr->announce_proto_version ==
1115 announce_proto_version &&
1116 curr->packet_version == packet_version &&
1117 curr->total_size == total_size)
1118 goto found;
1120 if (leastactive == 0 || curr->last_received_packet <
1121 leastactive->last_received_packet)
1122 leastactive = curr;
1124 curr = (struct announce_in *) curr->lh.next;
1127 if (list_size >= 128) {
1128 BUG_ON(leastactive == 0);
1129 curr = leastactive;
1131 curr->last_received_packet = get_jiffies_64();
1133 while (!skb_queue_empty(&(curr->skbs))) {
1134 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1135 kfree_skb(skb2);
1138 dev_put(curr->dev);
1139 } else {
1140 curr = kmem_cache_alloc(announce_in_slab,
1141 GFP_KERNEL);
1142 if (curr == 0)
1143 goto discard;
1145 skb_queue_head_init(&(curr->skbs));
1146 list_add_tail((struct list_head *) curr, &announce_list);
1149 curr->packet_version = packet_version;
1150 curr->total_size = total_size;
1151 curr->received_size = 0;
1152 curr->announce_proto_version = announce_proto_version;
1153 curr->dev = skb->dev;
1154 dev_hold(curr->dev);
1155 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1157 found:
1158 if (_rcv_announce(skb, curr)) {
1159 list_del((struct list_head *) curr);
1160 dev_put(curr->dev);
1161 kmem_cache_free(announce_in_slab, curr);
1164 if (0) {
1165 discard:
1166 kfree_skb(skb);
1169 mutex_unlock(&(neighbor_operation_lock));
1172 struct announce{
1173 struct kref ref;
1175 __u32 packet_version;
1176 char *announce_msg;
1177 __u32 announce_msg_len;
1180 struct announce *last_announce;
1182 static int send_announce_chunk(struct announce_data *ann)
1184 struct sk_buff *skb;
1185 __u32 packet_size = 256;
1186 __u32 remainingdata = ann->ann->announce_msg_len -
1187 ann->curr_announce_msg_offset;
1188 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1189 __u32 overhead = 17 + headroom;
1190 char *header;
1191 char *ptr;
1192 int rc = 0;
1194 if (remainingdata < packet_size)
1195 packet_size = remainingdata;
1197 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1198 if (unlikely(skb == 0))
1199 return 0;
1201 skb->protocol = htons(ETH_P_COR);
1202 skb->dev = ann->dev;
1203 skb_reserve(skb, headroom);
1205 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1206 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1207 goto out_err;
1209 skb_reset_network_header(skb);
1211 header = skb_put(skb, 17);
1212 if (unlikely(header == 0))
1213 goto out_err;
1215 header[0] = PACKET_TYPE_ANNOUNCE;
1217 put_u32(header + 1, 0, 1); /* announce proto version */
1218 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1219 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1220 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1222 ptr = skb_put(skb, packet_size);
1223 if (unlikely(ptr == 0))
1224 goto out_err;
1226 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset,
1227 packet_size);
1229 rc = dev_queue_xmit(skb);
1231 if (rc == 0) {
1232 ann->curr_announce_msg_offset += packet_size;
1234 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1235 ann->curr_announce_msg_offset = 0;
1238 if (0) {
1239 out_err:
1240 if (skb != 0)
1241 kfree_skb(skb);
1244 return rc;
1247 int send_announce_qos(struct announce_data *ann)
1249 int rc;
1250 mutex_lock(&(neighbor_operation_lock));
1251 rc = send_announce_chunk(ann);
1252 mutex_unlock(&(neighbor_operation_lock));
1253 return rc;
1256 static void announce_free(struct kref *ref)
1258 struct announce *ann = container_of(ref, struct announce, ref);
1259 kfree(&(ann->announce_msg));
1260 kfree(ann);
1263 void announce_data_free(struct kref *ref)
1265 struct announce_data *ann = container_of(ref, struct announce_data,
1266 ref);
1267 if (ann->ann != 0)
1268 kref_put(&(ann->ann->ref), announce_free);
1269 kfree(ann);
1272 static void send_announce(struct work_struct *work)
1274 struct announce_data *ann = container_of(to_delayed_work(work),
1275 struct announce_data, announce_work);
1276 int reschedule = 0;
1277 int rc = 0;
1279 mutex_lock(&(neighbor_operation_lock));
1281 if (unlikely(ann->dev == 0))
1282 goto out;
1283 reschedule = 1;
1285 if (unlikely(ann->ann == 0 && last_announce == 0))
1286 goto out;
1287 if (ann->curr_announce_msg_offset == 0 &&
1288 unlikely(ann->ann != last_announce)) {
1289 if (ann->ann != 0)
1290 kref_put(&(ann->ann->ref), announce_free);
1291 ann->ann = last_announce;
1292 kref_get(&(ann->ann->ref));
1295 rc = send_announce_chunk(ann);
1297 out:
1298 mutex_unlock(&(neighbor_operation_lock));
1300 if (rc != 0)
1301 qos_enqueue(ann->dev, &(ann->rb), QOS_CALLER_ANNOUNCE);
1303 if (unlikely(reschedule == 0)) {
1304 kref_put(&(ann->ref), announce_data_free);
1305 } else {
1306 __u64 jiffies = get_jiffies_64();
1307 int delay;
1309 ann->scheduled_announce_timer += msecs_to_jiffies(
1310 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1312 delay = ann->scheduled_announce_timer - jiffies;
1313 if (delay < 0)
1314 delay = 0;
1316 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1317 schedule_delayed_work(&(ann->announce_work), delay);
1321 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1323 struct list_head *lh = announce_out_list.next;
1325 while (lh != &announce_out_list) {
1326 struct announce_data *curr = (struct announce_data *)(
1327 ((char *) lh) -
1328 offsetof(struct announce_data, lh));
1330 if (curr->dev == dev)
1331 return curr;
1334 return 0;
1337 static void announce_send_adddev(struct net_device *dev)
1339 struct announce_data *ann;
1341 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1343 if (unlikely(ann == 0)) {
1344 printk(KERN_ERR "cor cannot allocate memory for sending "
1345 "announces");
1346 return;
1349 memset(ann, 0, sizeof(struct announce_data));
1351 kref_init(&(ann->ref));
1353 dev_hold(dev);
1354 ann->dev = dev;
1356 mutex_lock(&(neighbor_operation_lock));
1357 list_add_tail(&(ann->lh), &announce_out_list);
1358 mutex_unlock(&(neighbor_operation_lock));
1360 ann->scheduled_announce_timer = get_jiffies_64();
1361 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1362 schedule_delayed_work(&(ann->announce_work), 1);
1365 static void announce_send_rmdev(struct net_device *dev)
1367 struct announce_data *ann;
1369 mutex_lock(&(neighbor_operation_lock));
1371 ann = get_announce_by_netdev(dev);
1373 if (ann == 0)
1374 goto out;
1376 dev_put(ann->dev);
1377 ann->dev = 0;
1379 out:
1380 mutex_unlock(&(neighbor_operation_lock));
1383 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1384 void *ptr)
1386 struct net_device *dev = (struct net_device *) ptr;
1387 int rc;
1389 switch(event){
1390 case NETDEV_UP:
1391 rc = create_queue(dev);
1392 if (rc == 1)
1393 return 1;
1394 announce_send_adddev(dev);
1395 break;
1396 case NETDEV_DOWN:
1397 destroy_queue(dev);
1398 announce_send_rmdev(dev);
1399 break;
1400 case NETDEV_REBOOT:
1401 case NETDEV_CHANGE:
1402 case NETDEV_REGISTER:
1403 case NETDEV_UNREGISTER:
1404 case NETDEV_CHANGEMTU:
1405 case NETDEV_CHANGEADDR:
1406 case NETDEV_GOING_DOWN:
1407 case NETDEV_CHANGENAME:
1408 case NETDEV_FEAT_CHANGE:
1409 case NETDEV_BONDING_FAILOVER:
1410 break;
1411 default:
1412 return 1;
1415 return 0;
1418 static int set_announce(char *msg, __u32 len)
1420 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1422 if (unlikely(ann == 0)) {
1423 kfree(msg);
1424 return 1;
1427 memset(ann, 0, sizeof(struct announce));
1429 ann->announce_msg = msg;
1430 ann->announce_msg_len = len;
1432 kref_init(&(ann->ref));
1434 mutex_lock(&(neighbor_operation_lock));
1436 if (last_announce != 0) {
1437 ann->packet_version = last_announce->packet_version + 1;
1438 kref_put(&(last_announce->ref), announce_free);
1441 last_announce = ann;
1443 mutex_unlock(&(neighbor_operation_lock));
1445 return 0;
1448 static int generate_announce(void)
1450 __u32 addrtypelen = strlen(addrtype);
1452 __u32 hdr_len = 16;
1453 __u32 cmd_hdr_len = 8;
1454 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1456 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1457 __u32 offset = 0;
1459 char *msg = kmalloc(len, GFP_KERNEL);
1460 if (unlikely(msg == 0))
1461 return 1;
1463 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1464 offset += 4;
1465 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1466 offset += 4;
1467 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1468 offset += 4;
1469 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1470 offset += 4;
1473 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1474 offset += 4;
1475 put_u32(msg + offset, cmd_len, 1); /* command length */
1476 offset += 4;
1478 /* addrtypelen, addrlen */
1479 put_u16(msg + offset, addrtypelen, 1);
1480 offset += 2;
1481 put_u16(msg + offset, addrlen, 1);
1482 offset += 2;
1484 /* addrtype, addr */
1485 memcpy(msg + offset, addrtype, addrtypelen);
1486 offset += addrtypelen;
1487 memcpy(msg + offset, addr, addrlen);
1488 offset += addrlen;
1490 BUG_ON(offset != len);
1492 return set_announce(msg, len);
1495 int __init cor_neighbor_init(void)
1497 addrlen = 16;
1499 addr = kmalloc(addrlen, GFP_KERNEL);
1500 if (unlikely(addr == 0))
1501 goto error_free2;
1503 get_random_bytes(addr, addrlen);
1505 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1506 0, 0);
1507 announce_in_slab = kmem_cache_create("cor_announce_in",
1508 sizeof(struct announce_in), 8, 0, 0);
1510 if (unlikely(generate_announce()))
1511 goto error_free1;
1513 memset(&netdev_notify, 0, sizeof(netdev_notify));
1514 netdev_notify.notifier_call = netdev_notify_func;
1515 register_netdevice_notifier(&netdev_notify);
1517 return 0;
1519 error_free1:
1520 kfree(addr);
1522 error_free2:
1523 return -ENOMEM;
1526 MODULE_LICENSE("GPL");