conn reference renaming and locking logic
[cor_2_6_31.git] / net / cor / neighbor.c
blob49a933c916b18da5b14725919e03926cd574cd25
1 /**
2 * Connection oriented routing
3 * Copyright (C) 2007-2011 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 spin_lock_init(&(nb->credits_lock));
125 nb->jiffies_credit_update = nb->last_ping_time;
126 nb->jiffies_credit_decay = nb->last_ping_time;
127 get_random_bytes((char *) &seqno, sizeof(seqno));
128 mutex_init(&(nb->pingcookie_lock));
129 atomic_set(&(nb->latency), 1000000);
130 atomic_set(&(nb->max_remote_cmsg_delay), 1000000);
131 spin_lock_init(&(nb->state_lock));
132 atomic_set(&(nb->kpacket_seqno), seqno);
133 mutex_init(&(nb->conn_list_lock));
134 INIT_LIST_HEAD(&(nb->rcv_conn_list));
135 INIT_LIST_HEAD(&(nb->snd_conn_list));
136 spin_lock_init(&(nb->retrans_lock));
137 INIT_LIST_HEAD(&(nb->retrans_list));
138 INIT_LIST_HEAD(&(nb->retrans_list_conn));
140 return nb;
143 #warning todo check interface
144 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
146 struct list_head *currlh;
147 struct neighbor *ret = 0;
150 char source_hw[MAX_ADDR_LEN];
151 memset(source_hw, 0, MAX_ADDR_LEN);
152 if (skb->dev->header_ops != 0 &&
153 skb->dev->header_ops->parse != 0)
154 skb->dev->header_ops->parse(skb, source_hw);
156 mutex_lock(&(neighbor_operation_lock));
158 currlh = nb_list.next;
160 while (currlh != &nb_list) {
161 struct neighbor *curr = container_of(currlh, struct neighbor,
162 nb_list);
164 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
165 ret = curr;
166 kref_get(&(ret->ref));
167 break;
170 currlh = currlh->next;
173 mutex_unlock(&(neighbor_operation_lock));
175 return ret;
178 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
179 __u16 addrlen, __u8 *addr)
181 struct list_head *currlh;
182 struct neighbor *ret = 0;
184 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
185 return 0;
187 mutex_lock(&(neighbor_operation_lock));
189 currlh = nb_list.next;
191 while (currlh != &nb_list) {
192 struct neighbor *curr = container_of(currlh, struct neighbor,
193 nb_list);
195 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
196 addrlen) == 0) {
197 ret = curr;
198 kref_get(&(ret->ref));
200 goto out;
203 currlh = currlh->next;
206 out:
207 mutex_unlock(&(neighbor_operation_lock));
209 return ret;
213 * TODO:
215 * address flags
216 * credit exchange factor + unstable flag
217 * throughput bound conns: throughput,credits/msecs
218 * latency bound conns: latency (ms), credits/byte
220 #warning todo extend
221 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
223 struct list_head *currlh;
225 int bufferfull = 0;
227 __u32 total = 0;
228 __u32 cnt = 0;
230 __u32 buf_offset = 8;
231 __u32 headoffset = 0;
233 int rc;
236 * The variable length headers rowcount and fieldlength need to be
237 * generated after the data. This is done by reserving the maximum space
238 * they could take. If they end up being smaller, the data is moved so
239 * that there is no gap.
242 BUG_ON(buf == 0);
243 BUG_ON(buflen < buf_offset);
245 /* num_fields */
246 rc = encode_len(buf + buf_offset, buflen - buf_offset, 2);
247 BUG_ON(rc <= 0);
248 buf_offset += rc;
250 /* addr field */
251 BUG_ON(buflen < buf_offset + 2);
252 put_u16(buf + buf_offset, LIST_NEIGH_FIELD_ADDR, 1);
253 buf_offset += 2;
255 rc = encode_len(buf + buf_offset, buflen - buf_offset, 0);
256 BUG_ON(rc <= 0);
257 buf_offset += rc;
259 /* latency field */
260 BUG_ON(buflen < buf_offset + 2);
261 put_u16(buf + buf_offset, LIST_NEIGH_FIELD_LATENCY, 1);
262 buf_offset += 2;
264 rc = encode_len(buf + buf_offset, buflen - buf_offset, 1);
265 BUG_ON(rc <= 0);
266 buf_offset += rc;
268 mutex_lock(&(neighbor_operation_lock));
270 currlh = nb_list.next;
272 while (currlh != &nb_list) {
273 struct neighbor *curr = container_of(currlh, struct neighbor,
274 nb_list);
275 __u8 state;
276 unsigned long iflags;
278 __u32 addroffset = buf_offset;
280 /* get_neigh_state not used here because it would deadlock */
281 spin_lock_irqsave(&(curr->state_lock), iflags);
282 state = curr->state;
283 spin_unlock_irqrestore(&(curr->state_lock), iflags);
285 if (state != NEIGHBOR_STATE_ACTIVE)
286 goto cont2;
288 if (total < offset)
289 goto cont;
291 if (unlikely(buflen < buf_offset + 4+ 4 + 4 + 4 + 2 +
292 curr->addrlen + 1))
293 bufferfull = 1;
295 if (bufferfull)
296 goto cont;
298 buf_offset += 4; /* reserve bufferspace for fieldlen */
299 /* numaddr */
300 rc = encode_len(buf + buf_offset, buflen - buf_offset, 1);
301 BUG_ON(rc <= 0);
302 buf_offset += rc;
304 /* addrtypelen */
305 rc = encode_len(buf + buf_offset, buflen - buf_offset, 2);
306 BUG_ON(rc <= 0);
307 buf_offset += rc;
309 /* addrlen */
310 rc = encode_len(buf + buf_offset, buflen - buf_offset,
311 curr->addrlen);
312 BUG_ON(rc <= 0);
313 buf_offset += rc;
315 buf[buf_offset] = 'i'; /* addrtype */
316 buf_offset += 1;
317 buf[buf_offset] = 'd';
318 buf_offset += 1;
319 BUG_ON(curr->addrlen > buflen - buf_offset);
320 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
321 buf_offset += curr->addrlen;
323 /* fieldlen */
324 rc = encode_len(buf + addroffset, 4, buf_offset - addroffset -
326 BUG_ON(rc <= 0);
327 BUG_ON(rc > 4);
328 if (likely(rc < 4))
329 memmove(buf+addroffset+rc, buf+addroffset + 4,
330 buf_offset - addroffset - 4);
331 buf_offset -= (4-rc);
333 buf[buf_offset] = enc_log_64_11(atomic_read(&(curr->latency)));
334 buf_offset += 1;
336 BUG_ON(buf_offset > buflen);
338 cnt++;
340 cont:
341 total++;
342 cont2:
343 currlh = currlh->next;
346 mutex_unlock(&(neighbor_operation_lock));
348 rc = encode_len(buf, 4, total);
349 BUG_ON(rc <= 0);
350 BUG_ON(rc > 4);
351 headoffset += rc;
353 rc = encode_len(buf + headoffset, 4, cnt);
354 BUG_ON(rc <= 0);
355 BUG_ON(rc > 4);
356 headoffset += rc;
358 if (likely(headoffset < 8))
359 memmove(buf+headoffset, buf+8, buf_offset);
361 return buf_offset + headoffset - 8;
364 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
366 unsigned long iflags;
368 BUG_ON(nb == 0);
370 spin_lock_irqsave(&(nb->state_lock), iflags);
372 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
373 nb->state_time.last_roundtrip))
374 nb->state_time.last_roundtrip = time;
376 spin_unlock_irqrestore(&(nb->state_lock), iflags);
379 static void _refresh_initial_debitsrate(struct net_device *dev,
380 __u32 debitsrate)
382 __u32 neighbors = 0;
383 struct list_head *currlh;
385 currlh = nb_list.next;
387 while (currlh != &nb_list) {
388 struct neighbor *curr = container_of(currlh, struct neighbor,
389 nb_list);
391 if (curr->dev == dev)
392 neighbors++;
394 currlh = currlh->next;
397 currlh = nb_list.next;
399 while (currlh != &nb_list) {
400 struct neighbor *curr = container_of(currlh, struct neighbor,
401 nb_list);
403 if (curr->dev == dev)
404 set_creditrate_initial(curr,
405 debitsrate/neighbors);
407 currlh = currlh->next;
411 /* neighbor operation lock has to be held while calling this */
412 static void refresh_initial_debitsrate(void)
414 struct list_head *currlh1;
415 __u32 ifcnt = 0;
416 __u32 creditrate;
418 currlh1 = nb_list.next;
420 while (currlh1 != &nb_list) {
421 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
422 nb_list);
424 struct list_head *currlh2;
425 currlh2 = nb_list.next;
426 while (currlh2 != currlh1) {
427 struct neighbor *curr2 = container_of(currlh2,
428 struct neighbor, nb_list);
429 if (curr1->dev == curr2->dev)
430 goto present1;
433 ifcnt++;
435 present1:
437 currlh1 = currlh1->next;
440 creditrate = creditrate_initial();
442 currlh1 = nb_list.next;
444 while (currlh1 != &nb_list) {
445 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
446 nb_list);
448 struct list_head *currlh2;
449 currlh2 = nb_list.next;
450 while (currlh2 != currlh1) {
451 struct neighbor *curr2 = container_of(currlh2,
452 struct neighbor, nb_list);
453 if (curr1->dev == curr2->dev)
454 goto present2;
457 _refresh_initial_debitsrate(curr1->dev, creditrate/ifcnt);
459 present2:
461 currlh1 = currlh1->next;
465 static void reset_all_conns(struct neighbor *nb)
467 while (1) {
468 struct conn *sconn;
470 mutex_lock(&(nb->conn_list_lock));
472 if (list_empty(&(nb->snd_conn_list))) {
473 BUG_ON(nb->num_send_conns != 0);
474 mutex_unlock(&(nb->conn_list_lock));
475 break;
478 sconn = container_of(nb->snd_conn_list.next, struct conn,
479 target.out.nb_list);
480 BUG_ON(sconn->targettype != TARGET_OUT);
483 * reset_conn must not be called with conn_list_lock
484 * held
486 mutex_unlock(&(nb->conn_list_lock));
487 reset_conn(sconn);
491 static void stall_timer(struct work_struct *work)
493 struct neighbor *nb = container_of(to_delayed_work(work),
494 struct neighbor, stalltimeout_timer);
496 int stall_time_ms;
497 __u8 nbstate;
499 unsigned long iflags;
501 spin_lock_irqsave(&(nb->state_lock), iflags);
502 stall_time_ms = jiffies_to_msecs(jiffies -
503 nb->state_time.last_roundtrip);
504 nbstate = nb->state;
505 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED))
506 nb->str_timer_pending = 0;
508 spin_unlock_irqrestore(&(nb->state_lock), iflags);
510 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
511 kref_put(&(nb->ref), neighbor_free);
512 return;
515 if (stall_time_ms < NB_KILL_TIME_MS) {
516 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timer);
517 schedule_delayed_work(&(nb->stalltimeout_timer),
518 msecs_to_jiffies(NB_KILL_TIME_MS -
519 stall_time_ms));
520 return;
523 printk(KERN_ERR "reset_all");
525 reset_all_conns(nb);
527 spin_lock_irqsave(&(nb->state_lock), iflags);
528 nb->state = NEIGHBOR_STATE_KILLED;
529 spin_unlock_irqrestore(&(nb->state_lock), iflags);
531 mutex_lock(&neighbor_operation_lock);
532 list_del(&(nb->nb_list));
533 refresh_initial_debitsrate();
534 mutex_unlock(&neighbor_operation_lock);
536 kref_put(&(nb->ref), neighbor_free); /* nb_list */
537 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
540 int get_neigh_state(struct neighbor *nb)
542 int ret;
543 unsigned long iflags;
544 int starttimer = 0;
545 int stall_time_ms;
547 BUG_ON(nb == 0);
549 spin_lock_irqsave(&(nb->state_lock), iflags);
551 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
552 time_after_eq(jiffies, nb->state_time.last_roundtrip +
553 msecs_to_jiffies(NB_STALL_TIME_MS)) && (
554 nb->ping_intransit >= NB_STALL_MINPINGS ||
555 nb->ping_intransit >= PING_COOKIES_PER_NEIGH)))) {
556 nb->state = NEIGHBOR_STATE_STALLED;
557 starttimer = (nb->str_timer_pending == 0);
558 stall_time_ms = jiffies - nb->state_time.last_roundtrip;
559 nb->str_timer_pending = 1;
560 printk(KERN_ERR "switched to stalled");
561 BUG_ON(nb->ping_intransit > PING_COOKIES_PER_NEIGH);
564 ret = nb->state;
566 spin_unlock_irqrestore(&(nb->state_lock), iflags);
569 if (unlikely(starttimer)) {
570 kref_get(&(nb->ref));
571 INIT_DELAYED_WORK(&(nb->stalltimeout_timer),
572 stall_timer);
573 schedule_delayed_work(&(nb->stalltimeout_timer),
574 NB_KILL_TIME_MS - stall_time_ms);
577 return ret;
580 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
582 int i;
584 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
585 if (nb->cookies[i].cookie == cookie)
586 return &(nb->cookies[i]);
588 return 0;
591 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
593 struct ping_cookie *c;
594 int i;
596 unsigned long cookie_sendtime;
597 __s64 newlatency;
599 unsigned long iflags;
601 mutex_lock(&(nb->pingcookie_lock));
603 c = find_cookie(nb, cookie);
605 if (unlikely(c == 0))
606 goto out;
608 cookie_sendtime = c->time;
611 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
612 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
613 if (unlikely(newlatency < 0))
614 newlatency = 0;
615 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
616 newlatency = ((__s64)256)*256*256*256 - 1;
618 atomic_set(&(nb->latency), (__u32) newlatency);
620 c->cookie = 0;
621 nb->ping_intransit--;
623 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
624 if (nb->cookies[i].cookie != 0 &&
625 time_before(nb->cookies[i].time, c->time)) {
626 nb->cookies[i].pongs++;
627 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
628 nb->cookies[i].cookie = 0;
629 nb->cookies[i].pongs = 0;
630 nb->ping_intransit--;
635 spin_lock_irqsave(&(nb->state_lock), iflags);
637 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
638 nb->state == NEIGHBOR_STATE_STALLED)) {
639 nb->ping_success++;
641 if (nb->state == NEIGHBOR_STATE_INITIAL) {
642 __u64 jiffies64 = get_jiffies_64();
643 if (nb->state_time.last_state_change == 0)
644 nb->state_time.last_state_change = jiffies64;
645 if (jiffies64 <= (nb->state_time.last_state_change +
646 msecs_to_jiffies(INITIAL_TIME_MS)))
647 goto out2;
650 if (nb->ping_success >= PING_SUCCESS_CNT) {
651 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
652 printk(KERN_ERR "switched from initial to active");
653 else
654 printk(KERN_ERR "switched from stalled to active");
656 nb->state = NEIGHBOR_STATE_ACTIVE;
657 nb->ping_success = 0;
658 nb->state_time.last_roundtrip = jiffies;
660 } else {
661 nb->state_time.last_roundtrip = cookie_sendtime;
664 out2:
665 spin_unlock_irqrestore(&(nb->state_lock), iflags);
667 out:
668 mutex_unlock(&(nb->pingcookie_lock));
671 __u32 add_ping_req(struct neighbor *nb)
673 struct ping_cookie *c;
674 __u32 i;
676 __u32 cookie;
678 mutex_lock(&(nb->pingcookie_lock));
680 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
681 if (nb->cookies[i].cookie == 0)
682 goto found;
685 get_random_bytes((char *) &i, sizeof(i));
686 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
687 PING_COOKIES_FIFO;
689 found:
690 c = &(nb->cookies[i]);
691 c->time = jiffies;
692 c->pongs = 0;
693 nb->lastcookie++;
694 if (unlikely(nb->lastcookie == 0))
695 nb->lastcookie++;
696 c->cookie = nb->lastcookie;
698 nb->ping_intransit++;
700 cookie = c->cookie;
702 nb->last_ping_time = jiffies;
704 mutex_unlock(&(nb->pingcookie_lock));
706 return cookie;
709 void unadd_ping_req(struct neighbor *nb, __u32 cookie)
711 int i;
713 if (cookie == 0)
714 return;
716 mutex_lock(&(nb->pingcookie_lock));
718 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
719 if (nb->cookies[i].cookie == cookie) {
720 nb->cookies[i].cookie = 0;
721 nb->ping_intransit--;
722 break;
726 mutex_unlock(&(nb->pingcookie_lock));
729 static int neighbor_idle(struct neighbor *nb)
731 int ret;
732 mutex_lock(&(nb->conn_list_lock));
733 ret = (list_empty(&(nb->rcv_conn_list)) &&
734 list_empty(&(nb->snd_conn_list)));
735 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
736 mutex_unlock(&(nb->conn_list_lock));
737 return ret;
741 * Check additional to the checks and timings already done in kpacket_gen.c
742 * This is primarily to make sure that we do not invalidate other ping cookies
743 * which might still receive responses. It does this by requiring a certain
744 * mimimum delay between pings, depending on how many pings are already in
745 * transit.
747 int time_to_send_ping(struct neighbor *nb)
749 int rc = 1;
751 int state = get_neigh_state(nb);
752 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
753 neighbor_idle(nb));
754 __u32 forcetime;
756 #warning todo send pings for some time after the neighbor gets idle (initial latency measurement + tos_privacy)
758 mutex_lock(&(nb->pingcookie_lock));
759 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
760 __u32 mindelay = (( ((__u32) atomic_read(&(nb->latency))) +
761 ((__u32) atomic_read(
762 &(nb->max_remote_cmsg_delay))) )/1000) <<
763 (nb->ping_intransit + 1 -
764 PING_COOKIES_NOTHROTTLE);
766 if (mindelay > PING_THROTTLE_LIMIT_MS)
767 mindelay = PING_THROTTLE_LIMIT_MS;
769 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
770 rc = 0;
773 if (unlikely(state != NEIGHBOR_STATE_ACTIVE) ||
774 nb->ping_intransit != 0)
775 forcetime = PING_FORCETIME_MS;
776 else if (idle)
777 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
778 else
779 forcetime = PING_FORCETIME_ACTIVE_MS;
781 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < (forcetime/2))
782 rc = 0;
783 else if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
784 rc = 2;
786 mutex_unlock(&(nb->pingcookie_lock));
788 return rc;
791 static void add_neighbor(struct neighbor *nb)
793 struct list_head *currlh = nb_list.next;
795 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
797 while (currlh != &nb_list) {
798 struct neighbor *curr = container_of(currlh, struct neighbor,
799 nb_list);
801 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
802 curr->addrlen) == 0)
803 goto already_present;
805 currlh = currlh->next;
808 /* kref_get not needed here, because the caller leaves its ref to us */
809 printk(KERN_ERR "add_neigh");
811 list_add_tail(&(nb->nb_list), &nb_list);
812 refresh_initial_debitsrate();
813 schedule_controlmsg_timerfunc(nb);
814 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
815 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
817 if (0) {
818 already_present:
819 kmem_cache_free(nb_slab, nb);
823 static __u32 pull_u32(struct sk_buff *skb, int convbo)
825 char *ptr = cor_pull_skb(skb, 4);
827 __u32 ret = 0;
829 BUG_ON(0 == ptr);
831 ((char *)&ret)[0] = ptr[0];
832 ((char *)&ret)[1] = ptr[1];
833 ((char *)&ret)[2] = ptr[2];
834 ((char *)&ret)[3] = ptr[3];
836 if (convbo)
837 return be32_to_cpu(ret);
838 return ret;
841 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
842 char *cmddata)
844 __u16 addrtypelen;
845 char *addrtype;
846 __u16 addrlen;
847 char *addr;
849 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
851 if (nb->addr != 0)
852 return 0;
854 if (len < 4)
855 return 0;
857 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
858 cmddata += 2;
859 len -= 2;
861 if (len < 2)
862 return 0;
864 addrlen = be16_to_cpu(*((__u16 *) cmddata));
865 cmddata += 2;
866 len -= 2;
868 addrtype = cmddata;
869 cmddata += addrtypelen;
870 len -= addrtypelen;
872 addr = cmddata;
873 cmddata += addrlen;
874 len -= addrlen;
876 if (len < 0)
877 return 0;
879 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
880 return 0;
882 nb->addr = kmalloc(addrlen, GFP_KERNEL);
883 if (unlikely(nb->addr == 0))
884 return 1;
886 memcpy(nb->addr, addr, addrlen);
887 nb->addrlen = addrlen;
889 return 0;
892 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
893 char *cmddata)
895 if (cmd == NEIGHCMD_ADDADDR) {
896 apply_announce_addaddr(nb, cmd, len, cmddata);
897 } else {
898 /* ignore unknown cmds */
902 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
903 char *source_hw)
905 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
907 if (unlikely(nb == 0))
908 return;
910 while (len >= 8) {
911 __u32 cmd;
912 __u32 cmdlen;
914 cmd = be32_to_cpu(*((__u32 *) msg));
915 msg += 4;
916 len -= 4;
917 cmdlen = be32_to_cpu(*((__u32 *) msg));
918 msg += 4;
919 len -= 4;
921 BUG_ON(cmdlen > len);
923 apply_announce_cmd(nb, cmd, cmdlen, msg);
925 msg += cmdlen;
926 len -= cmdlen;
929 BUG_ON(len != 0);
931 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
933 dev_hold(dev);
934 nb->dev = dev;
935 add_neighbor(nb);
938 static int check_announce_cmds(char *msg, __u32 len)
940 while (len >= 8) {
941 __u32 cmd;
942 __u32 cmdlen;
944 cmd = be32_to_cpu(*((__u32 *) msg));
945 msg += 4;
946 len -= 4;
947 cmdlen = be32_to_cpu(*((__u32 *) msg));
948 msg += 4;
949 len -= 4;
951 /* malformated packet */
952 if (unlikely(cmdlen > len))
953 return 1;
955 msg += cmdlen;
956 len -= cmdlen;
959 if (unlikely(len != 0))
960 return 1;
962 return 0;
965 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
966 char *source_hw)
968 __u32 min_announce_version;
969 __u32 max_announce_version;
970 __u32 min_cor_version;
971 __u32 max_cor_version;
973 if (unlikely(len < 16))
974 return;
976 min_announce_version = be32_to_cpu(*((__u32 *) msg));
977 msg += 4;
978 len -= 4;
979 max_announce_version = be32_to_cpu(*((__u32 *) msg));
980 msg += 4;
981 len -= 4;
982 min_cor_version = be32_to_cpu(*((__u32 *) msg));
983 msg += 4;
984 len -= 4;
985 max_cor_version = be32_to_cpu(*((__u32 *) msg));
986 msg += 4;
987 len -= 4;
989 if (min_announce_version != 0)
990 return;
991 if (min_cor_version != 0)
992 return;
993 if (check_announce_cmds(msg, len)) {
994 return;
996 apply_announce_cmds(msg, len, dev, source_hw);
999 struct announce_in {
1000 /* lh has to be first */
1001 struct list_head lh;
1002 struct sk_buff_head skbs; /* sorted by offset */
1003 struct net_device *dev;
1004 char source_hw[MAX_ADDR_LEN];
1005 __u32 announce_proto_version;
1006 __u32 packet_version;
1007 __u32 total_size;
1008 __u32 received_size;
1009 __u64 last_received_packet;
1012 LIST_HEAD(announce_list);
1014 struct kmem_cache *announce_in_slab;
1016 static void merge_announce(struct announce_in *ann)
1018 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
1019 __u32 copy = 0;
1021 if (msg == 0) {
1022 /* try again when next packet arrives */
1023 return;
1026 while (copy != ann->total_size) {
1027 __u32 currcpy;
1028 __u32 offset = 0;
1029 struct sk_buff *skb;
1030 struct skb_procstate *ps;
1032 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
1033 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
1034 "empty while merging packets\n");
1035 goto free;
1038 skb = skb_dequeue(&(ann->skbs));
1039 ps = skb_pstate(skb);
1041 currcpy = skb->len;
1042 if (unlikely(ps->funcstate.announce.offset > copy)) {
1043 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
1044 "value found\n");
1045 goto free;
1048 if (unlikely(ps->funcstate.announce.offset < copy)) {
1049 offset = copy - ps->funcstate.announce.offset;
1050 currcpy -= offset;
1053 if (unlikely(currcpy + copy > ann->total_size))
1054 goto free;
1056 memcpy(msg + copy, skb->data + offset, currcpy);
1057 copy += currcpy;
1058 kfree_skb(skb);
1061 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
1063 free:
1064 if (msg != 0)
1065 kfree(msg);
1067 dev_put(ann->dev);
1068 list_del(&(ann->lh));
1069 kmem_cache_free(announce_in_slab, ann);
1072 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
1074 struct skb_procstate *ps = skb_pstate(skb);
1076 __u32 offset = ps->funcstate.announce.offset;
1077 __u32 len = skb->len;
1079 __u32 curroffset = 0;
1080 __u32 prevoffset = 0;
1081 __u32 prevlen = 0;
1083 struct sk_buff *curr = ann->skbs.next;
1085 if (unlikely(len + offset > ann->total_size)) {
1086 /* invalid header */
1087 kfree_skb(skb);
1088 return 0;
1092 * Try to find the right place to insert in the sorted list. This
1093 * means to process the list until we find a skb which has a greater
1094 * offset, so we can insert before it to keep the sort order. However,
1095 * this is complicated by the fact that the new skb must not be inserted
1096 * between 2 skbs if there is no data missing in between. So the loop
1097 * runs has to keep running until there is either a gap to insert or
1098 * we see that this data has already been received.
1100 while ((void *) curr != (void *) &(ann->skbs)) {
1101 struct skb_procstate *currps = skb_pstate(skb);
1103 curroffset = currps->funcstate.announce.offset;
1105 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
1106 break;
1108 prevoffset = curroffset;
1109 prevlen = curr->len;
1110 curr = curr->next;
1112 if ((offset+len) <= (prevoffset+prevlen)) {
1113 /* we already have this data */
1114 kfree_skb(skb);
1115 return 0;
1120 * Calculate how much data was really received, by substracting
1121 * the bytes we already have.
1123 if (unlikely(prevoffset + prevlen > offset)) {
1124 len -= (prevoffset + prevlen) - offset;
1125 offset = prevoffset + prevlen;
1128 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
1129 (offset + len) > curroffset))
1130 len = curroffset - offset;
1132 ann->received_size += len;
1133 BUG_ON(ann->received_size > ann->total_size);
1134 __skb_queue_before(&(ann->skbs), curr, skb);
1135 ann->last_received_packet = get_jiffies_64();
1137 if (ann->received_size == ann->total_size)
1138 merge_announce(ann);
1139 else if (unlikely(ann->skbs.qlen >= 16))
1140 return 1;
1142 return 0;
1145 void rcv_announce(struct sk_buff *skb)
1147 struct skb_procstate *ps = skb_pstate(skb);
1148 struct announce_in *curr = 0;
1149 struct announce_in *leastactive = 0;
1150 __u32 list_size = 0;
1152 __u32 announce_proto_version = pull_u32(skb, 1);
1153 __u32 packet_version = pull_u32(skb, 1);
1154 __u32 total_size = pull_u32(skb, 1);
1156 char source_hw[MAX_ADDR_LEN];
1157 memset(source_hw, 0, MAX_ADDR_LEN);
1158 if (skb->dev->header_ops != 0 &&
1159 skb->dev->header_ops->parse != 0)
1160 skb->dev->header_ops->parse(skb, source_hw);
1162 ps->funcstate.announce.offset = pull_u32(skb, 1);
1164 if (total_size > 8192)
1165 goto discard;
1167 mutex_lock(&(neighbor_operation_lock));
1169 if (announce_proto_version != 0)
1170 goto discard;
1172 curr = (struct announce_in *) announce_list.next;
1174 while (((struct list_head *) curr) != &(announce_list)) {
1175 list_size++;
1176 if (curr->dev == skb->dev && memcmp(curr->source_hw, source_hw,
1177 MAX_ADDR_LEN) == 0 &&
1178 curr->announce_proto_version ==
1179 announce_proto_version &&
1180 curr->packet_version == packet_version &&
1181 curr->total_size == total_size)
1182 goto found;
1184 if (leastactive == 0 || curr->last_received_packet <
1185 leastactive->last_received_packet)
1186 leastactive = curr;
1188 curr = (struct announce_in *) curr->lh.next;
1191 if (list_size >= 128) {
1192 BUG_ON(leastactive == 0);
1193 curr = leastactive;
1195 curr->last_received_packet = get_jiffies_64();
1197 while (!skb_queue_empty(&(curr->skbs))) {
1198 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1199 kfree_skb(skb2);
1202 dev_put(curr->dev);
1203 } else {
1204 curr = kmem_cache_alloc(announce_in_slab,
1205 GFP_KERNEL);
1206 if (curr == 0)
1207 goto discard;
1209 skb_queue_head_init(&(curr->skbs));
1210 list_add_tail((struct list_head *) curr, &announce_list);
1213 curr->packet_version = packet_version;
1214 curr->total_size = total_size;
1215 curr->received_size = 0;
1216 curr->announce_proto_version = announce_proto_version;
1217 curr->dev = skb->dev;
1218 dev_hold(curr->dev);
1219 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1221 found:
1222 if (_rcv_announce(skb, curr)) {
1223 list_del((struct list_head *) curr);
1224 dev_put(curr->dev);
1225 kmem_cache_free(announce_in_slab, curr);
1228 if (0) {
1229 discard:
1230 kfree_skb(skb);
1233 mutex_unlock(&(neighbor_operation_lock));
1236 struct announce{
1237 struct kref ref;
1239 __u32 packet_version;
1240 char *announce_msg;
1241 __u32 announce_msg_len;
1244 struct announce *last_announce;
1246 static int send_announce_chunk(struct announce_data *ann)
1248 struct sk_buff *skb;
1249 __u32 packet_size = 256;
1250 __u32 remainingdata = ann->ann->announce_msg_len -
1251 ann->curr_announce_msg_offset;
1252 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1253 __u32 overhead = 17 + headroom;
1254 char *header;
1255 char *ptr;
1256 int rc = 0;
1258 if (remainingdata < packet_size)
1259 packet_size = remainingdata;
1261 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1262 if (unlikely(skb == 0))
1263 return 0;
1265 skb->protocol = htons(ETH_P_COR);
1266 skb->dev = ann->dev;
1267 skb_reserve(skb, headroom);
1269 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1270 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1271 goto out_err;
1273 skb_reset_network_header(skb);
1275 header = skb_put(skb, 17);
1276 if (unlikely(header == 0))
1277 goto out_err;
1279 header[0] = PACKET_TYPE_ANNOUNCE;
1281 put_u32(header + 1, 0, 1); /* announce proto version */
1282 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1283 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1284 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1286 ptr = skb_put(skb, packet_size);
1287 if (unlikely(ptr == 0))
1288 goto out_err;
1290 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset,
1291 packet_size);
1293 rc = dev_queue_xmit(skb);
1295 if (rc == 0) {
1296 ann->curr_announce_msg_offset += packet_size;
1298 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1299 ann->curr_announce_msg_offset = 0;
1302 if (0) {
1303 out_err:
1304 if (skb != 0)
1305 kfree_skb(skb);
1308 return rc;
1311 int send_announce_qos(struct announce_data *ann)
1313 int rc;
1314 mutex_lock(&(neighbor_operation_lock));
1315 rc = send_announce_chunk(ann);
1316 mutex_unlock(&(neighbor_operation_lock));
1317 return rc;
1320 static void announce_free(struct kref *ref)
1322 struct announce *ann = container_of(ref, struct announce, ref);
1323 kfree(&(ann->announce_msg));
1324 kfree(ann);
1327 void announce_data_free(struct kref *ref)
1329 struct announce_data *ann = container_of(ref, struct announce_data,
1330 ref);
1331 if (ann->ann != 0)
1332 kref_put(&(ann->ann->ref), announce_free);
1333 kfree(ann);
1336 static void send_announce(struct work_struct *work)
1338 struct announce_data *ann = container_of(to_delayed_work(work),
1339 struct announce_data, announce_work);
1340 int reschedule = 0;
1341 int rc = 0;
1343 mutex_lock(&(neighbor_operation_lock));
1345 if (unlikely(ann->dev == 0))
1346 goto out;
1347 reschedule = 1;
1349 if (unlikely(ann->ann == 0 && last_announce == 0))
1350 goto out;
1351 if (ann->curr_announce_msg_offset == 0 &&
1352 unlikely(ann->ann != last_announce)) {
1353 if (ann->ann != 0)
1354 kref_put(&(ann->ann->ref), announce_free);
1355 ann->ann = last_announce;
1356 kref_get(&(ann->ann->ref));
1359 rc = send_announce_chunk(ann);
1361 out:
1362 mutex_unlock(&(neighbor_operation_lock));
1364 if (rc != 0)
1365 qos_enqueue(ann->dev, &(ann->rb), QOS_CALLER_ANNOUNCE);
1367 if (unlikely(reschedule == 0)) {
1368 kref_put(&(ann->ref), announce_data_free);
1369 } else {
1370 __u64 jiffies = get_jiffies_64();
1371 int delay;
1373 ann->scheduled_announce_timer += msecs_to_jiffies(
1374 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1376 delay = ann->scheduled_announce_timer - jiffies;
1377 if (delay < 0)
1378 delay = 1;
1380 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1381 schedule_delayed_work(&(ann->announce_work), delay);
1385 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1387 struct list_head *lh = announce_out_list.next;
1389 while (lh != &announce_out_list) {
1390 struct announce_data *curr = (struct announce_data *)(
1391 ((char *) lh) -
1392 offsetof(struct announce_data, lh));
1394 if (curr->dev == dev)
1395 return curr;
1398 return 0;
1401 static void announce_send_adddev(struct net_device *dev)
1403 struct announce_data *ann;
1405 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1407 if (unlikely(ann == 0)) {
1408 printk(KERN_ERR "cor cannot allocate memory for sending "
1409 "announces");
1410 return;
1413 memset(ann, 0, sizeof(struct announce_data));
1415 kref_init(&(ann->ref));
1417 dev_hold(dev);
1418 ann->dev = dev;
1420 mutex_lock(&(neighbor_operation_lock));
1421 list_add_tail(&(ann->lh), &announce_out_list);
1422 mutex_unlock(&(neighbor_operation_lock));
1424 ann->scheduled_announce_timer = get_jiffies_64();
1425 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1426 schedule_delayed_work(&(ann->announce_work), 1);
1429 static void announce_send_rmdev(struct net_device *dev)
1431 struct announce_data *ann;
1433 mutex_lock(&(neighbor_operation_lock));
1435 ann = get_announce_by_netdev(dev);
1437 if (ann == 0)
1438 goto out;
1440 dev_put(ann->dev);
1441 ann->dev = 0;
1443 out:
1444 mutex_unlock(&(neighbor_operation_lock));
1447 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1448 void *ptr)
1450 struct net_device *dev = (struct net_device *) ptr;
1451 int rc;
1453 switch(event){
1454 case NETDEV_UP:
1455 rc = create_queue(dev);
1456 if (rc == 1)
1457 return 1;
1458 announce_send_adddev(dev);
1459 break;
1460 case NETDEV_DOWN:
1461 destroy_queue(dev);
1462 announce_send_rmdev(dev);
1463 break;
1464 case NETDEV_REBOOT:
1465 case NETDEV_CHANGE:
1466 case NETDEV_REGISTER:
1467 case NETDEV_UNREGISTER:
1468 case NETDEV_CHANGEMTU:
1469 case NETDEV_CHANGEADDR:
1470 case NETDEV_GOING_DOWN:
1471 case NETDEV_CHANGENAME:
1472 case NETDEV_FEAT_CHANGE:
1473 case NETDEV_BONDING_FAILOVER:
1474 break;
1475 default:
1476 return 1;
1479 return 0;
1482 static int set_announce(char *msg, __u32 len)
1484 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1486 if (unlikely(ann == 0)) {
1487 kfree(msg);
1488 return 1;
1491 memset(ann, 0, sizeof(struct announce));
1493 ann->announce_msg = msg;
1494 ann->announce_msg_len = len;
1496 kref_init(&(ann->ref));
1498 mutex_lock(&(neighbor_operation_lock));
1500 if (last_announce != 0) {
1501 ann->packet_version = last_announce->packet_version + 1;
1502 kref_put(&(last_announce->ref), announce_free);
1505 last_announce = ann;
1507 mutex_unlock(&(neighbor_operation_lock));
1509 return 0;
1512 static int generate_announce(void)
1514 __u32 addrtypelen = strlen(addrtype);
1516 __u32 hdr_len = 16;
1517 __u32 cmd_hdr_len = 8;
1518 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1520 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1521 __u32 offset = 0;
1523 char *msg = kmalloc(len, GFP_KERNEL);
1524 if (unlikely(msg == 0))
1525 return 1;
1527 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1528 offset += 4;
1529 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1530 offset += 4;
1531 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1532 offset += 4;
1533 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1534 offset += 4;
1537 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1538 offset += 4;
1539 put_u32(msg + offset, cmd_len, 1); /* command length */
1540 offset += 4;
1542 /* addrtypelen, addrlen */
1543 put_u16(msg + offset, addrtypelen, 1);
1544 offset += 2;
1545 put_u16(msg + offset, addrlen, 1);
1546 offset += 2;
1548 /* addrtype, addr */
1549 memcpy(msg + offset, addrtype, addrtypelen);
1550 offset += addrtypelen;
1551 memcpy(msg + offset, addr, addrlen);
1552 offset += addrlen;
1554 BUG_ON(offset != len);
1556 return set_announce(msg, len);
1559 int __init cor_neighbor_init(void)
1561 addrlen = 16;
1563 addr = kmalloc(addrlen, GFP_KERNEL);
1564 if (unlikely(addr == 0))
1565 goto error_free2;
1567 get_random_bytes(addr, addrlen);
1569 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1570 0, 0);
1571 announce_in_slab = kmem_cache_create("cor_announce_in",
1572 sizeof(struct announce_in), 8, 0, 0);
1574 if (unlikely(generate_announce()))
1575 goto error_free1;
1577 memset(&netdev_notify, 0, sizeof(netdev_notify));
1578 netdev_notify.notifier_call = netdev_notify_func;
1579 register_netdevice_notifier(&netdev_notify);
1581 return 0;
1583 error_free1:
1584 kfree(addr);
1586 error_free2:
1587 return -ENOMEM;
1590 MODULE_LICENSE("GPL");