2 * Connection oriented routing
3 * Copyright (C) 2007-2011 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
28 * starts with 0, increments every time the data field changes
30 * total data size of all merged packets
32 * used to determine the order when merging the split packet
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
49 * commanddata [commandlength]
54 #define NEIGHCMD_ADDADDR 1
59 * addrtype [addrtypelen]
65 DEFINE_MUTEX(neighbor_operation_lock
);
66 DEFINE_MUTEX(neighbor_list_lock
);
68 char *addrtype
= "id";
74 struct kmem_cache
*nb_slab
;
76 LIST_HEAD(announce_out_list
);
78 struct notifier_block netdev_notify
;
81 #define ADDRTYPE_UNKNOWN 0
84 static int get_addrtype(__u32 addrtypelen
, char *addrtype
)
86 if (addrtypelen
== 2 &&
87 (addrtype
[0] == 'i' || addrtype
[0] == 'I') &&
88 (addrtype
[1] == 'd' || addrtype
[1] == 'D'))
91 return ADDRTYPE_UNKNOWN
;
94 void neighbor_free(struct kref
*ref
)
96 struct neighbor
*nb
= container_of(ref
, struct neighbor
, ref
);
97 printk(KERN_ERR
"neighbor free");
98 BUG_ON(nb
->nb_list
.next
!= LIST_POISON1
);
99 BUG_ON(nb
->nb_list
.prev
!= LIST_POISON2
);
106 kmem_cache_free(nb_slab
, nb
);
109 static struct neighbor
*alloc_neighbor(gfp_t allocflags
)
111 struct neighbor
*nb
= kmem_cache_alloc(nb_slab
, allocflags
);
114 if (unlikely(nb
== 0))
117 memset(nb
, 0, sizeof(struct neighbor
));
119 kref_init(&(nb
->ref
));
120 init_timer(&(nb
->cmsg_timer
));
121 nb
->cmsg_timer
.function
= controlmsg_timerfunc
;
122 nb
->cmsg_timer
.data
= (unsigned long) nb
;
123 INIT_WORK(&(nb
->cmsg_work
), controlmsg_workfunc
);
124 atomic_set(&(nb
->cmsg_work_scheduled
), 0);
125 atomic_set(&(nb
->cmsg_timer_running
), 0);
126 mutex_init(&(nb
->cmsg_lock
));
127 mutex_init(&(nb
->send_cmsg_lock
));
128 INIT_LIST_HEAD(&(nb
->control_msgs_out
));
129 INIT_LIST_HEAD(&(nb
->ucontrol_msgs_out
));
130 nb
->last_ping_time
= jiffies
;
131 nb
->cmsg_interval
= 1000000;
132 atomic_set(&(nb
->ooo_packets
), 0);
133 spin_lock_init(&(nb
->credits_lock
));
134 nb
->jiffies_credit_update
= nb
->last_ping_time
;
135 nb
->jiffies_credit_decay
= nb
->last_ping_time
;
136 spin_lock_init(&(nb
->busytill_lock
));
137 nb
->busy_till
= jiffies
;
138 mutex_init(&(nb
->pingcookie_lock
));
139 atomic_set(&(nb
->latency
), 1000000);
140 atomic_set(&(nb
->max_remote_cmsg_delay
), 1000000);
141 spin_lock_init(&(nb
->state_lock
));
142 get_random_bytes((char *) &seqno
, sizeof(seqno
));
143 atomic_set(&(nb
->kpacket_seqno
), seqno
);
144 spin_lock_init(&(nb
->conn_list_lock
));
145 INIT_LIST_HEAD(&(nb
->rcv_conn_list
));
146 spin_lock_init(&(nb
->retrans_lock
));
147 INIT_LIST_HEAD(&(nb
->retrans_list
));
148 INIT_LIST_HEAD(&(nb
->retrans_list_conn
));
153 int is_from_nb(struct sk_buff
*skb
, struct neighbor
*nb
)
157 char source_hw
[MAX_ADDR_LEN
];
158 memset(source_hw
, 0, MAX_ADDR_LEN
);
159 if (skb
->dev
->header_ops
!= 0 &&
160 skb
->dev
->header_ops
->parse
!= 0)
161 skb
->dev
->header_ops
->parse(skb
, source_hw
);
163 mutex_lock(&(neighbor_operation_lock
));
164 rc
= (skb
->dev
== nb
->dev
&& memcmp(nb
->mac
, source_hw
,
166 mutex_unlock(&(neighbor_operation_lock
));
170 struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
)
172 struct list_head
*currlh
;
173 struct neighbor
*ret
= 0;
176 char source_hw
[MAX_ADDR_LEN
];
177 memset(source_hw
, 0, MAX_ADDR_LEN
);
178 if (skb
->dev
->header_ops
!= 0 &&
179 skb
->dev
->header_ops
->parse
!= 0)
180 skb
->dev
->header_ops
->parse(skb
, source_hw
);
182 mutex_lock(&(neighbor_list_lock
));
184 currlh
= nb_list
.next
;
186 while (currlh
!= &nb_list
) {
187 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
190 if (skb
->dev
== curr
->dev
&& memcmp(curr
->mac
, source_hw
,
191 MAX_ADDR_LEN
) == 0) {
193 kref_get(&(ret
->ref
));
197 currlh
= currlh
->next
;
200 mutex_unlock(&(neighbor_list_lock
));
205 struct neighbor
*find_neigh(__u16 addrtypelen
, __u8
*addrtype
,
206 __u16 addrlen
, __u8
*addr
)
208 struct list_head
*currlh
;
209 struct neighbor
*ret
= 0;
211 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
214 mutex_lock(&(neighbor_list_lock
));
216 currlh
= nb_list
.next
;
218 while (currlh
!= &nb_list
) {
219 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
222 if (curr
->addrlen
== addrlen
&& memcmp(curr
->addr
, addr
,
225 kref_get(&(ret
->ref
));
230 currlh
= currlh
->next
;
234 mutex_unlock(&(neighbor_list_lock
));
243 * credit exchange factor + unstable flag
244 * throughput bound conns: throughput,credits/msecs
245 * latency bound conns: latency (ms), credits/byte
248 #warning todo pregenerate (lift response size limit)
249 __u32
generate_neigh_list(char *buf
, __u32 buflen
, __u32 limit
, __u32 offset
)
251 struct list_head
*currlh
;
258 __u32 buf_offset
= 8;
259 __u32 headoffset
= 0;
264 * The variable length headers rowcount and fieldlength need to be
265 * generated after the data. This is done by reserving the maximum space
266 * they could take. If they end up being smaller, the data is moved so
267 * that there is no gap.
271 BUG_ON(buflen
< buf_offset
);
274 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
279 BUG_ON(buflen
< buf_offset
+ 2);
280 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_ADDR
, 1);
283 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 0);
288 BUG_ON(buflen
< buf_offset
+ 2);
289 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_LATENCY
, 1);
292 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
296 mutex_lock(&(neighbor_list_lock
));
298 currlh
= nb_list
.next
;
300 while (currlh
!= &nb_list
) {
301 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
304 unsigned long iflags
;
306 __u32 addroffset
= buf_offset
;
308 /* get_neigh_state not used here because it would deadlock */
309 spin_lock_irqsave(&(curr
->state_lock
), iflags
);
311 spin_unlock_irqrestore(&(curr
->state_lock
), iflags
);
313 if (state
!= NEIGHBOR_STATE_ACTIVE
)
319 if (unlikely(buflen
< buf_offset
+ 4+ 4 + 4 + 4 + 2 +
326 buf_offset
+= 4; /* reserve bufferspace for fieldlen */
328 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
333 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
338 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
,
343 buf
[buf_offset
] = 'i'; /* addrtype */
345 buf
[buf_offset
] = 'd';
347 BUG_ON(curr
->addrlen
> buflen
- buf_offset
);
348 memcpy(buf
+ buf_offset
, curr
->addr
, curr
->addrlen
); /* addr */
349 buf_offset
+= curr
->addrlen
;
352 rc
= encode_len(buf
+ addroffset
, 4, buf_offset
- addroffset
-
357 memmove(buf
+addroffset
+rc
, buf
+addroffset
+ 4,
358 buf_offset
- addroffset
- 4);
359 buf_offset
-= (4-rc
);
361 buf
[buf_offset
] = enc_log_64_11(atomic_read(&(curr
->latency
)));
364 BUG_ON(buf_offset
> buflen
);
371 currlh
= currlh
->next
;
374 mutex_unlock(&(neighbor_list_lock
));
376 rc
= encode_len(buf
, 4, total
);
381 rc
= encode_len(buf
+ headoffset
, 4, cnt
);
386 if (likely(headoffset
< 8))
387 memmove(buf
+headoffset
, buf
+8, buf_offset
);
389 return buf_offset
+ headoffset
- 8;
392 static void _refresh_initial_debitsrate(struct net_device
*dev
,
396 struct list_head
*currlh
;
397 currlh
= nb_list
.next
;
399 while (currlh
!= &nb_list
) {
400 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
403 if (curr
->dev
== dev
)
406 currlh
= currlh
->next
;
409 currlh
= nb_list
.next
;
411 while (currlh
!= &nb_list
) {
412 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
415 if (curr
->dev
== dev
)
416 set_creditrate_initial(curr
,
417 debitsrate
/neighbors
);
419 currlh
= currlh
->next
;
423 /* neighbor list lock has to be held while calling this */
424 static void refresh_initial_debitsrate(void)
426 struct list_head
*currlh1
;
430 currlh1
= nb_list
.next
;
432 while (currlh1
!= &nb_list
) {
433 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
436 struct list_head
*currlh2
;
437 currlh2
= nb_list
.next
;
438 while (currlh2
!= currlh1
) {
439 struct neighbor
*curr2
= container_of(currlh2
,
440 struct neighbor
, nb_list
);
441 if (curr1
->dev
== curr2
->dev
)
449 currlh1
= currlh1
->next
;
452 creditrate
= creditrate_initial();
454 currlh1
= nb_list
.next
;
456 while (currlh1
!= &nb_list
) {
457 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
460 struct list_head
*currlh2
;
461 currlh2
= nb_list
.next
;
462 while (currlh2
!= currlh1
) {
463 struct neighbor
*curr2
= container_of(currlh2
,
464 struct neighbor
, nb_list
);
465 if (curr1
->dev
== curr2
->dev
)
469 _refresh_initial_debitsrate(curr1
->dev
, creditrate
/ifcnt
);
473 currlh1
= currlh1
->next
;
477 static void stall_timer(struct work_struct
*work
);
479 static void reset_all_conns(struct neighbor
*nb
)
482 unsigned long iflags
;
486 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
488 if (list_empty(&(nb
->rcv_conn_list
))) {
489 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
493 src_in
= container_of(nb
->rcv_conn_list
.next
, struct conn
,
495 kref_get(&(src_in
->ref
));
497 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
499 if (src_in
->is_client
) {
500 mutex_lock(&(src_in
->rcv_lock
));
501 mutex_lock(&(src_in
->reversedir
->rcv_lock
));
503 mutex_lock(&(src_in
->reversedir
->rcv_lock
));
504 mutex_lock(&(src_in
->rcv_lock
));
507 if (unlikely(unlikely(src_in
->sourcetype
!= SOURCE_IN
) ||
508 unlikely(src_in
->source
.in
.nb
!= nb
))) {
513 rc
= send_reset_conn(nb
, src_in
->reversedir
->target
.out
.conn_id
,
514 src_in
->source
.in
.conn_id
, 1);
516 if (unlikely(rc
!= 0))
519 if (src_in
->reversedir
->isreset
== 0)
520 src_in
->reversedir
->isreset
= 1;
523 if (src_in
->is_client
) {
524 mutex_unlock(&(src_in
->rcv_lock
));
525 mutex_unlock(&(src_in
->reversedir
->rcv_lock
));
527 mutex_unlock(&(src_in
->reversedir
->rcv_lock
));
528 mutex_unlock(&(src_in
->rcv_lock
));
533 kref_put(&(src_in
->ref
), free_conn
);
535 kref_put(&(src_in
->ref
), free_conn
);
536 kref_get(&(nb
->ref
));
537 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
),
539 schedule_delayed_work(&(nb
->stalltimeout_timer
), 100);
545 static void reset_neighbor(struct neighbor
*nb
)
548 unsigned long iflags
;
550 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
551 removenblist
= (nb
->state
!= NEIGHBOR_STATE_KILLED
);
552 nb
->state
= NEIGHBOR_STATE_KILLED
;
553 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
556 printk(KERN_ERR
"reset_neighbor");
561 mutex_lock(&neighbor_list_lock
);
562 list_del(&(nb
->nb_list
));
563 refresh_initial_debitsrate();
564 mutex_unlock(&neighbor_list_lock
);
566 #warning todo empty control_msg list
568 kref_put(&(nb
->ref
), neighbor_free
); /* nb_list */
572 static void reset_neighbor_dev(struct net_device
*dev
)
574 struct list_head
*currlh
;
577 mutex_lock(&neighbor_list_lock
);
579 currlh
= nb_list
.next
;
581 while (currlh
!= &nb_list
) {
582 unsigned long iflags
;
583 struct neighbor
*currnb
= container_of(currlh
, struct neighbor
,
587 if (currnb
->dev
!= dev
)
590 spin_lock_irqsave(&(currnb
->state_lock
), iflags
);
591 state
= currnb
->state
;
592 spin_unlock_irqrestore(&(currnb
->state_lock
), iflags
);
594 if (state
!= NEIGHBOR_STATE_KILLED
) {
595 mutex_unlock(&neighbor_list_lock
);
596 reset_neighbor(currnb
);
601 currlh
= currlh
->next
;
604 mutex_unlock(&neighbor_list_lock
);
607 static void stall_timer(struct work_struct
*work
)
609 struct neighbor
*nb
= container_of(to_delayed_work(work
),
610 struct neighbor
, stalltimeout_timer
);
615 unsigned long iflags
;
617 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
619 if (unlikely(nbstate
!= NEIGHBOR_STATE_STALLED
))
620 nb
->str_timer_pending
= 0;
622 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
624 if (unlikely(nbstate
== NEIGHBOR_STATE_ACTIVE
))
627 stall_time_ms
= ktime_to_ms(ktime_get()) -
628 ktime_to_ms(nb
->state_time
.last_roundtrip
);
630 if (nbstate
== NEIGHBOR_STATE_STALLED
&&
631 stall_time_ms
< NB_KILL_TIME_MS
) {
632 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
), stall_timer
);
633 schedule_delayed_work(&(nb
->stalltimeout_timer
),
634 msecs_to_jiffies(NB_KILL_TIME_MS
-
642 kref_put(&(nb
->ref
), neighbor_free
); /* stall_timer */
645 int get_neigh_state(struct neighbor
*nb
)
648 unsigned long iflags
;
654 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
656 stall_time_ms
= ktime_to_ms(ktime_get()) -
657 ktime_to_ms(nb
->state_time
.last_roundtrip
);
659 if (unlikely(likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) && unlikely(
660 stall_time_ms
> NB_STALL_TIME_MS
&& (
661 nb
->ping_intransit
>= NB_STALL_MINPINGS
||
662 nb
->ping_intransit
>= PING_COOKIES_PER_NEIGH
)))) {
663 nb
->state
= NEIGHBOR_STATE_STALLED
;
664 starttimer
= (nb
->str_timer_pending
== 0);
666 nb
->str_timer_pending
= 1;
667 printk(KERN_ERR
"switched to stalled");
668 BUG_ON(nb
->ping_intransit
> PING_COOKIES_PER_NEIGH
);
673 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
676 if (unlikely(starttimer
)) {
677 kref_get(&(nb
->ref
));
678 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
),
680 schedule_delayed_work(&(nb
->stalltimeout_timer
),
681 NB_KILL_TIME_MS
- stall_time_ms
);
687 static struct ping_cookie
*find_cookie(struct neighbor
*nb
, __u32 cookie
)
691 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
692 if (nb
->cookies
[i
].cookie
== cookie
)
693 return &(nb
->cookies
[i
]);
698 void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
)
700 struct ping_cookie
*c
;
707 unsigned long iflags
;
709 mutex_lock(&(nb
->pingcookie_lock
));
711 c
= find_cookie(nb
, cookie
);
713 if (unlikely(c
== 0))
716 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
717 if (nb
->cookies
[i
].cookie
!= 0 &&
718 ktime_before(nb
->cookies
[i
].time
, c
->time
)) {
719 nb
->cookies
[i
].pongs
++;
720 if (nb
->cookies
[i
].pongs
>= PING_PONGLIMIT
) {
721 nb
->cookies
[i
].cookie
= 0;
722 nb
->cookies
[i
].pongs
= 0;
723 nb
->ping_intransit
--;
729 nb
->ping_intransit
--;
731 oldlatency
= ((__s64
) ((__u32
)atomic_read(&(nb
->latency
)))) * 1000;
732 pinglatency
= ktime_to_ns(ktime_get()) - ktime_to_ns(c
->time
) -
734 if (unlikely(unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
) &&
735 nb
->ping_success
< 16))
736 newlatency
= (oldlatency
* nb
->ping_success
+ pinglatency
) /
737 (nb
->ping_success
+ 1);
739 newlatency
= (oldlatency
* 15 + pinglatency
) / 16;
741 newlatency
= (newlatency
+ 500) / 1000;
743 if (unlikely(newlatency
< 0))
745 if (unlikely(newlatency
>= (1LL << 32)))
746 newlatency
= (1LL << 32) - 1;
748 atomic_set(&(nb
->latency
), (__u32
) newlatency
);
751 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
753 if (unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
||
754 nb
->state
== NEIGHBOR_STATE_STALLED
)) {
757 if (nb
->state
== NEIGHBOR_STATE_INITIAL
) {
758 __u64 jiffies64
= get_jiffies_64();
759 if (nb
->state_time
.last_state_change
== 0)
760 nb
->state_time
.last_state_change
= jiffies64
;
761 if (jiffies64
<= (nb
->state_time
.last_state_change
+
762 msecs_to_jiffies(INITIAL_TIME_MS
)))
766 if (nb
->ping_success
>= PING_SUCCESS_CNT
) {
767 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
768 printk(KERN_ERR "switched from initial to active");
770 printk(KERN_ERR "switched from stalled to active");
773 if (nb
->state
== NEIGHBOR_STATE_INITIAL
)
774 set_busy_till(nb
, 0);
776 nb
->state
= NEIGHBOR_STATE_ACTIVE
;
777 nb
->ping_success
= 0;
778 nb
->state_time
.last_roundtrip
= c
->time
;
781 nb
->state_time
.last_roundtrip
= c
->time
;
785 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
788 mutex_unlock(&(nb
->pingcookie_lock
));
791 __u32
add_ping_req(struct neighbor
*nb
, unsigned long *last_ping_time
)
793 struct ping_cookie
*c
;
798 mutex_lock(&(nb
->pingcookie_lock
));
800 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
801 if (nb
->cookies
[i
].cookie
== 0)
805 get_random_bytes((char *) &i
, sizeof(i
));
806 i
= (i
% (PING_COOKIES_PER_NEIGH
- PING_COOKIES_FIFO
)) +
810 c
= &(nb
->cookies
[i
]);
811 c
->time
= ktime_get();
814 if (unlikely(nb
->lastcookie
== 0))
816 c
->cookie
= nb
->lastcookie
;
818 nb
->ping_intransit
++;
822 *last_ping_time
= nb
->last_ping_time
;
823 nb
->last_ping_time
= jiffies
;
825 mutex_unlock(&(nb
->pingcookie_lock
));
830 void unadd_ping_req(struct neighbor
*nb
, __u32 cookie
,
831 unsigned long last_ping_time
)
838 mutex_lock(&(nb
->pingcookie_lock
));
840 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
841 if (nb
->cookies
[i
].cookie
== cookie
) {
842 nb
->cookies
[i
].cookie
= 0;
843 nb
->ping_intransit
--;
848 nb
->last_ping_time
= last_ping_time
;
850 mutex_unlock(&(nb
->pingcookie_lock
));
853 void set_busy_till(struct neighbor
*nb
, int initial
)
855 unsigned long iflags
;
856 unsigned long newval
;
858 /* improve latency measurement and make traffic analysis harder */
860 spin_lock_irqsave(&(nb
->busytill_lock
), iflags
);
861 if (unlikely(initial
)) {
862 newval
= jiffies
+ msecs_to_jiffies(ACTIVEDELAY_INITIAL_MS
);
866 get_random_bytes((char *) &rand
, 4);
868 newval
= jiffies
+ msecs_to_jiffies(ACTIVEDELAY_NOCONN_MIN_MS
+
869 (1LL << 32) * (ACTIVEDELAY_NOCONN_MAX_MS
-
870 ACTIVEDELAY_NOCONN_MIN_MS
) / rand
);
873 if (time_after(newval
, nb
->busy_till
))
874 nb
->busy_till
= newval
;
876 spin_unlock_irqrestore(&(nb
->busytill_lock
), iflags
);
879 static int get_ping_forcetime(struct neighbor
*nb
)
881 unsigned long iflags
;
882 int state
= get_neigh_state(nb
);
885 spin_lock_irqsave(&(nb
->busytill_lock
), iflags
);
886 if (time_after_eq(nb
->busy_till
, jiffies
))
889 nb
->busy_till
= jiffies
;
890 spin_unlock_irqrestore(&(nb
->busytill_lock
), iflags
);
892 if (unlikely(state
!= NEIGHBOR_STATE_ACTIVE
))
893 return PING_FORCETIME_MS
;
896 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
897 idle
= list_empty(&(nb
->rcv_conn_list
));
898 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
902 return PING_FORCETIME_ACTIVEIDLE_MS
;
904 return PING_FORCETIME_ACTIVE_MS
;
908 * Check additional to the checks and timings already done in kpacket_gen.c
909 * This is primarily to make sure that we do not invalidate other ping cookies
910 * which might still receive responses. It does this by requiring a certain
911 * mimimum delay between pings, depending on how many pings are already in
914 int time_to_send_ping(struct neighbor
*nb
)
918 __u32 forcetime
= get_ping_forcetime(nb
);
920 mutex_lock(&(nb
->pingcookie_lock
));
921 if (nb
->ping_intransit
>= PING_COOKIES_NOTHROTTLE
) {
922 __u32 mindelay
= (( ((__u32
) atomic_read(&(nb
->latency
))) +
923 ((__u32
) atomic_read(
924 &(nb
->max_remote_cmsg_delay
))) )/1000) <<
925 (nb
->ping_intransit
+ 1 -
926 PING_COOKIES_NOTHROTTLE
);
928 if (mindelay
> PING_THROTTLE_LIMIT_MS
)
929 mindelay
= PING_THROTTLE_LIMIT_MS
;
931 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < mindelay
)
935 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < (forcetime
/2))
937 else if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) >= forcetime
)
940 mutex_unlock(&(nb
->pingcookie_lock
));
945 int get_next_ping_time(struct neighbor
*nb
)
948 __u32 forcetime
= get_ping_forcetime(nb
);
950 mutex_lock(&(nb
->pingcookie_lock
));
951 ret
= round_jiffies_up(nb
->last_ping_time
+
952 msecs_to_jiffies(forcetime
));
953 mutex_unlock(&(nb
->pingcookie_lock
));
958 static void add_neighbor(struct neighbor
*nb
)
960 struct list_head
*currlh
;
962 mutex_lock(&neighbor_list_lock
);
964 currlh
= nb_list
.next
;
966 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
968 while (currlh
!= &nb_list
) {
969 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
972 if (curr
->addrlen
== nb
->addrlen
&& memcmp(curr
->addr
, nb
->addr
,
974 goto already_present
;
976 currlh
= currlh
->next
;
979 /* kref_get not needed here, because the caller leaves its ref to us */
980 printk(KERN_ERR
"add_neigh");
982 INIT_DELAYED_WORK(&(nb
->retrans_timer
), retransmit_timerfunc
);
983 INIT_DELAYED_WORK(&(nb
->retrans_timer_conn
), retransmit_conn_timerfunc
);
985 mutex_lock(&(nb
->cmsg_lock
));
986 nb
->last_ping_time
= jiffies
;
987 nb
->cmsg_interval
= 1000000;
988 schedule_controlmsg_timer(nb
);
989 mutex_unlock(&(nb
->cmsg_lock
));
991 list_add_tail(&(nb
->nb_list
), &nb_list
);
993 refresh_initial_debitsrate();
997 kmem_cache_free(nb_slab
, nb
);
1000 mutex_unlock(&neighbor_list_lock
);
1003 static __u32
pull_u32(struct sk_buff
*skb
, int convbo
)
1005 char *ptr
= cor_pull_skb(skb
, 4);
1011 ((char *)&ret
)[0] = ptr
[0];
1012 ((char *)&ret
)[1] = ptr
[1];
1013 ((char *)&ret
)[2] = ptr
[2];
1014 ((char *)&ret
)[3] = ptr
[3];
1017 return be32_to_cpu(ret
);
1021 static int apply_announce_addaddr(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
1029 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
1037 addrtypelen
= be16_to_cpu(*((__u16
*) cmddata
));
1044 addrlen
= be16_to_cpu(*((__u16
*) cmddata
));
1049 cmddata
+= addrtypelen
;
1059 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
1062 nb
->addr
= kmalloc(addrlen
, GFP_KERNEL
);
1063 if (unlikely(nb
->addr
== 0))
1066 memcpy(nb
->addr
, addr
, addrlen
);
1067 nb
->addrlen
= addrlen
;
1072 static void apply_announce_cmd(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
1075 if (cmd
== NEIGHCMD_ADDADDR
) {
1076 apply_announce_addaddr(nb
, cmd
, len
, cmddata
);
1078 /* ignore unknown cmds */
1082 static void apply_announce_cmds(char *msg
, __u32 len
, struct net_device
*dev
,
1085 struct neighbor
*nb
= alloc_neighbor(GFP_KERNEL
);
1087 if (unlikely(nb
== 0))
1094 cmd
= be32_to_cpu(*((__u32
*) msg
));
1097 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
1101 BUG_ON(cmdlen
> len
);
1103 apply_announce_cmd(nb
, cmd
, cmdlen
, msg
);
1111 memcpy(nb
->mac
, source_hw
, MAX_ADDR_LEN
);
1118 static int check_announce_cmds(char *msg
, __u32 len
)
1124 cmd
= be32_to_cpu(*((__u32
*) msg
));
1127 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
1131 /* malformated packet */
1132 if (unlikely(cmdlen
> len
))
1139 if (unlikely(len
!= 0))
1145 static void parse_announce(char *msg
, __u32 len
, struct net_device
*dev
,
1148 __u32 min_announce_version
;
1149 __u32 max_announce_version
;
1150 __u32 min_cor_version
;
1151 __u32 max_cor_version
;
1153 if (unlikely(len
< 16))
1156 min_announce_version
= be32_to_cpu(*((__u32
*) msg
));
1159 max_announce_version
= be32_to_cpu(*((__u32
*) msg
));
1162 min_cor_version
= be32_to_cpu(*((__u32
*) msg
));
1165 max_cor_version
= be32_to_cpu(*((__u32
*) msg
));
1169 if (min_announce_version
!= 0)
1171 if (min_cor_version
!= 0)
1173 if (check_announce_cmds(msg
, len
)) {
1176 apply_announce_cmds(msg
, len
, dev
, source_hw
);
1179 struct announce_in
{
1180 /* lh has to be first */
1181 struct list_head lh
;
1182 struct sk_buff_head skbs
; /* sorted by offset */
1183 struct net_device
*dev
;
1184 char source_hw
[MAX_ADDR_LEN
];
1185 __u32 announce_proto_version
;
1186 __u32 packet_version
;
1188 __u32 received_size
;
1189 __u64 last_received_packet
;
1192 LIST_HEAD(announce_list
);
1194 struct kmem_cache
*announce_in_slab
;
1196 static void merge_announce(struct announce_in
*ann
)
1198 char *msg
= kmalloc(ann
->total_size
, GFP_KERNEL
);
1202 /* try again when next packet arrives */
1206 while (copy
!= ann
->total_size
) {
1209 struct sk_buff
*skb
;
1210 struct skb_procstate
*ps
;
1212 if (unlikely(skb_queue_empty(&(ann
->skbs
)))) {
1213 printk(KERN_ERR
"net/cor/neighbor.c: sk_head ran "
1214 "empty while merging packets\n");
1218 skb
= skb_dequeue(&(ann
->skbs
));
1219 ps
= skb_pstate(skb
);
1222 if (unlikely(ps
->funcstate
.announce
.offset
> copy
)) {
1223 printk(KERN_ERR
"net/cor/neighbor.c: invalid offset"
1228 if (unlikely(ps
->funcstate
.announce
.offset
< copy
)) {
1229 offset
= copy
- ps
->funcstate
.announce
.offset
;
1233 if (unlikely(currcpy
+ copy
> ann
->total_size
))
1236 memcpy(msg
+ copy
, skb
->data
+ offset
, currcpy
);
1241 parse_announce(msg
, ann
->total_size
, ann
->dev
, ann
->source_hw
);
1248 list_del(&(ann
->lh
));
1249 kmem_cache_free(announce_in_slab
, ann
);
1252 static int _rcv_announce(struct sk_buff
*skb
, struct announce_in
*ann
)
1254 struct skb_procstate
*ps
= skb_pstate(skb
);
1256 __u32 offset
= ps
->funcstate
.announce
.offset
;
1257 __u32 len
= skb
->len
;
1259 __u32 curroffset
= 0;
1260 __u32 prevoffset
= 0;
1263 struct sk_buff
*curr
= ann
->skbs
.next
;
1265 if (unlikely(len
+ offset
> ann
->total_size
)) {
1266 /* invalid header */
1272 * Try to find the right place to insert in the sorted list. This
1273 * means to process the list until we find a skb which has a greater
1274 * offset, so we can insert before it to keep the sort order. However,
1275 * this is complicated by the fact that the new skb must not be inserted
1276 * between 2 skbs if there is no data missing in between. So the loop
1277 * runs has to keep running until there is either a gap to insert or
1278 * we see that this data has already been received.
1280 while ((void *) curr
!= (void *) &(ann
->skbs
)) {
1281 struct skb_procstate
*currps
= skb_pstate(skb
);
1283 curroffset
= currps
->funcstate
.announce
.offset
;
1285 if (curroffset
> offset
&& (prevoffset
+ prevlen
) < curroffset
)
1288 prevoffset
= curroffset
;
1289 prevlen
= curr
->len
;
1292 if ((offset
+len
) <= (prevoffset
+prevlen
)) {
1293 /* we already have this data */
1300 * Calculate how much data was really received, by substracting
1301 * the bytes we already have.
1303 if (unlikely(prevoffset
+ prevlen
> offset
)) {
1304 len
-= (prevoffset
+ prevlen
) - offset
;
1305 offset
= prevoffset
+ prevlen
;
1308 if (unlikely((void *) curr
!= (void *) &(ann
->skbs
) &&
1309 (offset
+ len
) > curroffset
))
1310 len
= curroffset
- offset
;
1312 ann
->received_size
+= len
;
1313 BUG_ON(ann
->received_size
> ann
->total_size
);
1314 __skb_queue_before(&(ann
->skbs
), curr
, skb
);
1315 ann
->last_received_packet
= get_jiffies_64();
1317 if (ann
->received_size
== ann
->total_size
)
1318 merge_announce(ann
);
1319 else if (unlikely(ann
->skbs
.qlen
>= 16))
1325 void rcv_announce(struct sk_buff
*skb
)
1327 struct skb_procstate
*ps
= skb_pstate(skb
);
1328 struct announce_in
*curr
= 0;
1329 struct announce_in
*leastactive
= 0;
1330 __u32 list_size
= 0;
1332 __u32 announce_proto_version
= pull_u32(skb
, 1);
1333 __u32 packet_version
= pull_u32(skb
, 1);
1334 __u32 total_size
= pull_u32(skb
, 1);
1336 char source_hw
[MAX_ADDR_LEN
];
1337 memset(source_hw
, 0, MAX_ADDR_LEN
);
1338 if (skb
->dev
->header_ops
!= 0 &&
1339 skb
->dev
->header_ops
->parse
!= 0)
1340 skb
->dev
->header_ops
->parse(skb
, source_hw
);
1342 ps
->funcstate
.announce
.offset
= pull_u32(skb
, 1);
1344 if (total_size
> 8192)
1347 mutex_lock(&(neighbor_operation_lock
));
1349 if (announce_proto_version
!= 0)
1352 curr
= (struct announce_in
*) announce_list
.next
;
1354 while (((struct list_head
*) curr
) != &(announce_list
)) {
1356 if (curr
->dev
== skb
->dev
&& memcmp(curr
->source_hw
, source_hw
,
1357 MAX_ADDR_LEN
) == 0 &&
1358 curr
->announce_proto_version
==
1359 announce_proto_version
&&
1360 curr
->packet_version
== packet_version
&&
1361 curr
->total_size
== total_size
)
1364 if (leastactive
== 0 || curr
->last_received_packet
<
1365 leastactive
->last_received_packet
)
1368 curr
= (struct announce_in
*) curr
->lh
.next
;
1371 if (list_size
>= 128) {
1372 BUG_ON(leastactive
== 0);
1375 curr
->last_received_packet
= get_jiffies_64();
1377 while (!skb_queue_empty(&(curr
->skbs
))) {
1378 struct sk_buff
*skb2
= skb_dequeue(&(curr
->skbs
));
1384 curr
= kmem_cache_alloc(announce_in_slab
,
1389 skb_queue_head_init(&(curr
->skbs
));
1390 list_add_tail((struct list_head
*) curr
, &announce_list
);
1393 curr
->packet_version
= packet_version
;
1394 curr
->total_size
= total_size
;
1395 curr
->received_size
= 0;
1396 curr
->announce_proto_version
= announce_proto_version
;
1397 curr
->dev
= skb
->dev
;
1398 dev_hold(curr
->dev
);
1399 memcpy(curr
->source_hw
, source_hw
, MAX_ADDR_LEN
);
1402 if (_rcv_announce(skb
, curr
)) {
1403 list_del((struct list_head
*) curr
);
1405 kmem_cache_free(announce_in_slab
, curr
);
1413 mutex_unlock(&(neighbor_operation_lock
));
1419 __u32 packet_version
;
1421 __u32 announce_msg_len
;
1424 struct announce
*last_announce
;
1426 static int send_announce_chunk(struct announce_data
*ann
)
1428 struct sk_buff
*skb
;
1429 __u32 packet_size
= 256;
1430 __u32 remainingdata
= ann
->ann
->announce_msg_len
-
1431 ann
->curr_announce_msg_offset
;
1432 __u32 headroom
= LL_ALLOCATED_SPACE(ann
->dev
);
1433 __u32 overhead
= 17 + headroom
;
1438 if (remainingdata
< packet_size
)
1439 packet_size
= remainingdata
;
1441 skb
= alloc_skb(packet_size
+ overhead
, GFP_KERNEL
);
1442 if (unlikely(skb
== 0))
1445 skb
->protocol
= htons(ETH_P_COR
);
1446 skb
->dev
= ann
->dev
;
1447 skb_reserve(skb
, headroom
);
1449 if(unlikely(dev_hard_header(skb
, ann
->dev
, ETH_P_COR
,
1450 ann
->dev
->broadcast
, ann
->dev
->dev_addr
, skb
->len
) < 0))
1453 skb_reset_network_header(skb
);
1455 header
= skb_put(skb
, 17);
1456 if (unlikely(header
== 0))
1459 header
[0] = PACKET_TYPE_ANNOUNCE
;
1461 put_u32(header
+ 1, 0, 1); /* announce proto version */
1462 put_u32(header
+ 5, ann
->ann
->packet_version
, 1); /* packet version */
1463 put_u32(header
+ 9, ann
->ann
->announce_msg_len
, 1); /* total size */
1464 put_u32(header
+ 13, ann
->curr_announce_msg_offset
, 1); /* offset */
1466 ptr
= skb_put(skb
, packet_size
);
1467 if (unlikely(ptr
== 0))
1470 memcpy(ptr
, ann
->ann
->announce_msg
+ ann
->curr_announce_msg_offset
,
1473 rc
= dev_queue_xmit(skb
);
1476 ann
->curr_announce_msg_offset
+= packet_size
;
1478 if (ann
->curr_announce_msg_offset
== ann
->ann
->announce_msg_len
)
1479 ann
->curr_announce_msg_offset
= 0;
1491 int send_announce_qos(struct announce_data
*ann
)
1494 mutex_lock(&(neighbor_operation_lock
));
1495 rc
= send_announce_chunk(ann
);
1496 mutex_unlock(&(neighbor_operation_lock
));
1500 static void announce_free(struct kref
*ref
)
1502 struct announce
*ann
= container_of(ref
, struct announce
, ref
);
1503 kfree(&(ann
->announce_msg
));
1507 void announce_data_free(struct kref
*ref
)
1509 struct announce_data
*ann
= container_of(ref
, struct announce_data
,
1512 kref_put(&(ann
->ann
->ref
), announce_free
);
1516 static void send_announce(struct work_struct
*work
)
1518 struct announce_data
*ann
= container_of(to_delayed_work(work
),
1519 struct announce_data
, announce_work
);
1523 mutex_lock(&(neighbor_operation_lock
));
1525 if (unlikely(ann
->dev
== 0))
1529 if (unlikely(ann
->ann
== 0 && last_announce
== 0))
1531 if (ann
->curr_announce_msg_offset
== 0 &&
1532 unlikely(ann
->ann
!= last_announce
)) {
1534 kref_put(&(ann
->ann
->ref
), announce_free
);
1535 ann
->ann
= last_announce
;
1536 kref_get(&(ann
->ann
->ref
));
1539 rc
= send_announce_chunk(ann
);
1542 mutex_unlock(&(neighbor_operation_lock
));
1545 qos_enqueue(ann
->dev
, &(ann
->rb
), QOS_CALLER_ANNOUNCE
);
1547 if (unlikely(reschedule
== 0)) {
1548 kref_put(&(ann
->ref
), announce_data_free
);
1550 __u64 jiffies
= get_jiffies_64();
1553 ann
->scheduled_announce_timer
+= msecs_to_jiffies(
1554 ANNOUNCE_SEND_PACKETINTELVAL_MS
);
1556 delay
= ann
->scheduled_announce_timer
- jiffies
;
1560 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1561 schedule_delayed_work(&(ann
->announce_work
), delay
);
1565 static struct announce_data
*get_announce_by_netdev(struct net_device
*dev
)
1567 struct list_head
*lh
= announce_out_list
.next
;
1569 while (lh
!= &announce_out_list
) {
1570 struct announce_data
*curr
= (struct announce_data
*)(
1572 offsetof(struct announce_data
, lh
));
1574 if (curr
->dev
== dev
)
1581 static void announce_send_adddev(struct net_device
*dev
)
1583 struct announce_data
*ann
;
1585 ann
= kmalloc(sizeof(struct announce_data
), GFP_KERNEL
);
1587 if (unlikely(ann
== 0)) {
1588 printk(KERN_ERR
"cor cannot allocate memory for sending "
1593 memset(ann
, 0, sizeof(struct announce_data
));
1595 kref_init(&(ann
->ref
));
1600 mutex_lock(&(neighbor_operation_lock
));
1601 list_add_tail(&(ann
->lh
), &announce_out_list
);
1602 mutex_unlock(&(neighbor_operation_lock
));
1604 ann
->scheduled_announce_timer
= get_jiffies_64();
1605 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1606 schedule_delayed_work(&(ann
->announce_work
), 1);
1609 static void announce_send_rmdev(struct net_device
*dev
)
1611 struct announce_data
*ann
;
1613 mutex_lock(&(neighbor_operation_lock
));
1615 ann
= get_announce_by_netdev(dev
);
1624 mutex_unlock(&(neighbor_operation_lock
));
1627 int netdev_notify_func(struct notifier_block
*not, unsigned long event
,
1630 struct net_device
*dev
= (struct net_device
*) ptr
;
1635 rc
= create_queue(dev
);
1638 announce_send_adddev(dev
);
1641 announce_send_rmdev(dev
);
1642 reset_neighbor_dev(dev
);
1647 case NETDEV_REGISTER
:
1648 case NETDEV_UNREGISTER
:
1649 case NETDEV_CHANGEMTU
:
1650 case NETDEV_CHANGEADDR
:
1651 case NETDEV_GOING_DOWN
:
1652 case NETDEV_CHANGENAME
:
1653 case NETDEV_FEAT_CHANGE
:
1654 case NETDEV_BONDING_FAILOVER
:
1663 static int set_announce(char *msg
, __u32 len
)
1665 struct announce
*ann
= kmalloc(sizeof(struct announce
), GFP_KERNEL
);
1667 if (unlikely(ann
== 0)) {
1672 memset(ann
, 0, sizeof(struct announce
));
1674 ann
->announce_msg
= msg
;
1675 ann
->announce_msg_len
= len
;
1677 kref_init(&(ann
->ref
));
1679 mutex_lock(&(neighbor_operation_lock
));
1681 if (last_announce
!= 0) {
1682 ann
->packet_version
= last_announce
->packet_version
+ 1;
1683 kref_put(&(last_announce
->ref
), announce_free
);
1686 last_announce
= ann
;
1688 mutex_unlock(&(neighbor_operation_lock
));
1693 static int generate_announce(void)
1695 __u32 addrtypelen
= strlen(addrtype
);
1698 __u32 cmd_hdr_len
= 8;
1699 __u32 cmd_len
= 2 + 2 + addrtypelen
+ addrlen
;
1701 __u32 len
= hdr_len
+ cmd_hdr_len
+ cmd_len
;
1704 char *msg
= kmalloc(len
, GFP_KERNEL
);
1705 if (unlikely(msg
== 0))
1708 put_u32(msg
+ offset
, 0, 1); /* min_announce_proto_version */
1710 put_u32(msg
+ offset
, 0, 1); /* max_announce_proto_version */
1712 put_u32(msg
+ offset
, 0, 1); /* min_cor_proto_version */
1714 put_u32(msg
+ offset
, 0, 1); /* max_cor_proto_version */
1718 put_u32(msg
+ offset
, NEIGHCMD_ADDADDR
, 1); /* command */
1720 put_u32(msg
+ offset
, cmd_len
, 1); /* command length */
1723 /* addrtypelen, addrlen */
1724 put_u16(msg
+ offset
, addrtypelen
, 1);
1726 put_u16(msg
+ offset
, addrlen
, 1);
1729 /* addrtype, addr */
1730 memcpy(msg
+ offset
, addrtype
, addrtypelen
);
1731 offset
+= addrtypelen
;
1732 memcpy(msg
+ offset
, addr
, addrlen
);
1735 BUG_ON(offset
!= len
);
1737 return set_announce(msg
, len
);
1740 int __init
cor_neighbor_init(void)
1744 addr
= kmalloc(addrlen
, GFP_KERNEL
);
1745 if (unlikely(addr
== 0))
1748 get_random_bytes(addr
, addrlen
);
1750 nb_slab
= kmem_cache_create("cor_neighbor", sizeof(struct neighbor
), 8,
1752 announce_in_slab
= kmem_cache_create("cor_announce_in",
1753 sizeof(struct announce_in
), 8, 0, 0);
1755 if (unlikely(generate_announce()))
1758 memset(&netdev_notify
, 0, sizeof(netdev_notify
));
1759 netdev_notify
.notifier_call
= netdev_notify_func
;
1760 register_netdevice_notifier(&netdev_notify
);
1771 MODULE_LICENSE("GPL");