2 * Connection oriented routing
3 * Copyright (C) 2007-2011 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
28 * starts with 0, increments every time the data field changes
30 * total data size of all merged packets
32 * used to determine the order when merging the split packet
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
49 * commanddata [commandlength]
54 #define NEIGHCMD_ADDADDR 1
59 * addrtype [addrtypelen]
65 DEFINE_MUTEX(neighbor_operation_lock
);
66 DEFINE_MUTEX(neighbor_list_lock
);
68 char *addrtype
= "id";
74 struct kmem_cache
*nb_slab
;
76 LIST_HEAD(announce_out_list
);
78 struct notifier_block netdev_notify
;
81 #define ADDRTYPE_UNKNOWN 0
84 static int get_addrtype(__u32 addrtypelen
, char *addrtype
)
86 if (addrtypelen
== 2 &&
87 (addrtype
[0] == 'i' || addrtype
[0] == 'I') &&
88 (addrtype
[1] == 'd' || addrtype
[1] == 'D'))
91 return ADDRTYPE_UNKNOWN
;
94 void neighbor_free(struct kref
*ref
)
96 struct neighbor
*nb
= container_of(ref
, struct neighbor
, ref
);
97 printk(KERN_ERR
"neighbor free");
98 BUG_ON(nb
->nb_list
.next
!= LIST_POISON1
);
99 BUG_ON(nb
->nb_list
.prev
!= LIST_POISON2
);
106 kmem_cache_free(nb_slab
, nb
);
109 static struct neighbor
*alloc_neighbor(gfp_t allocflags
)
111 struct neighbor
*nb
= kmem_cache_alloc(nb_slab
, allocflags
);
114 if (unlikely(nb
== 0))
117 memset(nb
, 0, sizeof(struct neighbor
));
119 kref_init(&(nb
->ref
));
120 init_timer(&(nb
->cmsg_timer
));
121 nb
->cmsg_timer
.function
= controlmsg_timerfunc
;
122 nb
->cmsg_timer
.data
= (unsigned long) nb
;
123 INIT_WORK(&(nb
->cmsg_work
), controlmsg_workfunc
);
124 atomic_set(&(nb
->cmsg_work_scheduled
), 0);
125 atomic_set(&(nb
->cmsg_timer_running
), 0);
126 mutex_init(&(nb
->cmsg_lock
));
127 mutex_init(&(nb
->send_cmsg_lock
));
128 INIT_LIST_HEAD(&(nb
->control_msgs_out
));
129 INIT_LIST_HEAD(&(nb
->ucontrol_msgs_out
));
130 nb
->last_ping_time
= jiffies
;
131 nb
->cmsg_interval
= 1000000;
132 atomic_set(&(nb
->ooo_packets
), 0);
133 spin_lock_init(&(nb
->credits_lock
));
134 nb
->jiffies_credit_update
= nb
->last_ping_time
;
135 nb
->jiffies_credit_decay
= nb
->last_ping_time
;
136 spin_lock_init(&(nb
->busytill_lock
));
137 nb
->busy_till
= jiffies
;
138 atomic_set(&(nb
->latency
), 1000000);
139 atomic_set(&(nb
->max_remote_cmsg_delay
), 1000000);
140 spin_lock_init(&(nb
->state_lock
));
141 get_random_bytes((char *) &seqno
, sizeof(seqno
));
142 atomic_set(&(nb
->kpacket_seqno
), seqno
);
143 spin_lock_init(&(nb
->conn_list_lock
));
144 INIT_LIST_HEAD(&(nb
->rcv_conn_list
));
145 spin_lock_init(&(nb
->retrans_lock
));
146 INIT_LIST_HEAD(&(nb
->retrans_list
));
147 INIT_LIST_HEAD(&(nb
->retrans_list_conn
));
152 int is_from_nb(struct sk_buff
*skb
, struct neighbor
*nb
)
156 char source_hw
[MAX_ADDR_LEN
];
157 memset(source_hw
, 0, MAX_ADDR_LEN
);
158 if (skb
->dev
->header_ops
!= 0 &&
159 skb
->dev
->header_ops
->parse
!= 0)
160 skb
->dev
->header_ops
->parse(skb
, source_hw
);
162 mutex_lock(&(neighbor_operation_lock
));
163 rc
= (skb
->dev
== nb
->dev
&& memcmp(nb
->mac
, source_hw
,
165 mutex_unlock(&(neighbor_operation_lock
));
169 struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
)
171 struct list_head
*currlh
;
172 struct neighbor
*ret
= 0;
175 char source_hw
[MAX_ADDR_LEN
];
176 memset(source_hw
, 0, MAX_ADDR_LEN
);
177 if (skb
->dev
->header_ops
!= 0 &&
178 skb
->dev
->header_ops
->parse
!= 0)
179 skb
->dev
->header_ops
->parse(skb
, source_hw
);
181 mutex_lock(&(neighbor_list_lock
));
183 currlh
= nb_list
.next
;
185 while (currlh
!= &nb_list
) {
186 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
189 if (skb
->dev
== curr
->dev
&& memcmp(curr
->mac
, source_hw
,
190 MAX_ADDR_LEN
) == 0) {
192 kref_get(&(ret
->ref
));
196 currlh
= currlh
->next
;
199 mutex_unlock(&(neighbor_list_lock
));
204 struct neighbor
*find_neigh(__u16 addrtypelen
, __u8
*addrtype
,
205 __u16 addrlen
, __u8
*addr
)
207 struct list_head
*currlh
;
208 struct neighbor
*ret
= 0;
210 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
213 mutex_lock(&(neighbor_list_lock
));
215 currlh
= nb_list
.next
;
217 while (currlh
!= &nb_list
) {
218 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
221 if (curr
->addrlen
== addrlen
&& memcmp(curr
->addr
, addr
,
224 kref_get(&(ret
->ref
));
229 currlh
= currlh
->next
;
233 mutex_unlock(&(neighbor_list_lock
));
242 * credit exchange factor + unstable flag
243 * throughput bound conns: throughput,credits/msecs
244 * latency bound conns: latency (ms), credits/byte
247 #warning todo pregenerate (lift response size limit)
248 __u32
generate_neigh_list(char *buf
, __u32 buflen
, __u32 limit
, __u32 offset
)
250 struct list_head
*currlh
;
257 __u32 buf_offset
= 8;
258 __u32 headoffset
= 0;
263 * The variable length headers rowcount and fieldlength need to be
264 * generated after the data. This is done by reserving the maximum space
265 * they could take. If they end up being smaller, the data is moved so
266 * that there is no gap.
270 BUG_ON(buflen
< buf_offset
);
273 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
278 BUG_ON(buflen
< buf_offset
+ 2);
279 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_ADDR
, 1);
282 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 0);
287 BUG_ON(buflen
< buf_offset
+ 2);
288 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_LATENCY
, 1);
291 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
295 mutex_lock(&(neighbor_list_lock
));
297 currlh
= nb_list
.next
;
299 while (currlh
!= &nb_list
) {
300 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
303 unsigned long iflags
;
305 __u32 addroffset
= buf_offset
;
307 /* get_neigh_state not used here because it would deadlock */
308 spin_lock_irqsave(&(curr
->state_lock
), iflags
);
310 spin_unlock_irqrestore(&(curr
->state_lock
), iflags
);
312 if (state
!= NEIGHBOR_STATE_ACTIVE
)
318 if (unlikely(buflen
< buf_offset
+ 4+ 4 + 4 + 4 + 2 +
325 buf_offset
+= 4; /* reserve bufferspace for fieldlen */
327 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
332 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
337 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
,
342 buf
[buf_offset
] = 'i'; /* addrtype */
344 buf
[buf_offset
] = 'd';
346 BUG_ON(curr
->addrlen
> buflen
- buf_offset
);
347 memcpy(buf
+ buf_offset
, curr
->addr
, curr
->addrlen
); /* addr */
348 buf_offset
+= curr
->addrlen
;
351 rc
= encode_len(buf
+ addroffset
, 4, buf_offset
- addroffset
-
356 memmove(buf
+addroffset
+rc
, buf
+addroffset
+ 4,
357 buf_offset
- addroffset
- 4);
358 buf_offset
-= (4-rc
);
360 buf
[buf_offset
] = enc_log_64_11(atomic_read(&(curr
->latency
)));
363 BUG_ON(buf_offset
> buflen
);
370 currlh
= currlh
->next
;
373 mutex_unlock(&(neighbor_list_lock
));
375 rc
= encode_len(buf
, 4, total
);
380 rc
= encode_len(buf
+ headoffset
, 4, cnt
);
385 if (likely(headoffset
< 8))
386 memmove(buf
+headoffset
, buf
+8, buf_offset
);
388 return buf_offset
+ headoffset
- 8;
391 static void _refresh_initial_debitsrate(struct net_device
*dev
,
395 struct list_head
*currlh
;
396 currlh
= nb_list
.next
;
398 while (currlh
!= &nb_list
) {
399 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
402 if (curr
->dev
== dev
)
405 currlh
= currlh
->next
;
408 currlh
= nb_list
.next
;
410 while (currlh
!= &nb_list
) {
411 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
414 if (curr
->dev
== dev
)
415 set_creditrate_initial(curr
,
416 debitsrate
/neighbors
);
418 currlh
= currlh
->next
;
422 /* neighbor list lock has to be held while calling this */
423 static void refresh_initial_debitsrate(void)
425 struct list_head
*currlh1
;
429 currlh1
= nb_list
.next
;
431 while (currlh1
!= &nb_list
) {
432 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
435 struct list_head
*currlh2
;
436 currlh2
= nb_list
.next
;
437 while (currlh2
!= currlh1
) {
438 struct neighbor
*curr2
= container_of(currlh2
,
439 struct neighbor
, nb_list
);
440 if (curr1
->dev
== curr2
->dev
)
448 currlh1
= currlh1
->next
;
451 creditrate
= creditrate_initial();
453 currlh1
= nb_list
.next
;
455 while (currlh1
!= &nb_list
) {
456 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
459 struct list_head
*currlh2
;
460 currlh2
= nb_list
.next
;
461 while (currlh2
!= currlh1
) {
462 struct neighbor
*curr2
= container_of(currlh2
,
463 struct neighbor
, nb_list
);
464 if (curr1
->dev
== curr2
->dev
)
468 _refresh_initial_debitsrate(curr1
->dev
, creditrate
/ifcnt
);
472 currlh1
= currlh1
->next
;
476 static void stall_timer(struct work_struct
*work
);
478 static void reset_all_conns(struct neighbor
*nb
)
481 unsigned long iflags
;
485 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
487 if (list_empty(&(nb
->rcv_conn_list
))) {
488 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
492 src_in
= container_of(nb
->rcv_conn_list
.next
, struct conn
,
494 kref_get(&(src_in
->ref
));
496 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
498 if (src_in
->is_client
) {
499 mutex_lock(&(src_in
->rcv_lock
));
500 mutex_lock(&(src_in
->reversedir
->rcv_lock
));
502 mutex_lock(&(src_in
->reversedir
->rcv_lock
));
503 mutex_lock(&(src_in
->rcv_lock
));
506 if (unlikely(unlikely(src_in
->sourcetype
!= SOURCE_IN
) ||
507 unlikely(src_in
->source
.in
.nb
!= nb
))) {
512 rc
= send_reset_conn(nb
, src_in
->reversedir
->target
.out
.conn_id
,
513 src_in
->source
.in
.conn_id
, 1);
515 if (unlikely(rc
!= 0))
518 if (src_in
->reversedir
->isreset
== 0)
519 src_in
->reversedir
->isreset
= 1;
522 if (src_in
->is_client
) {
523 mutex_unlock(&(src_in
->rcv_lock
));
524 mutex_unlock(&(src_in
->reversedir
->rcv_lock
));
526 mutex_unlock(&(src_in
->reversedir
->rcv_lock
));
527 mutex_unlock(&(src_in
->rcv_lock
));
532 kref_put(&(src_in
->ref
), free_conn
);
534 kref_put(&(src_in
->ref
), free_conn
);
535 kref_get(&(nb
->ref
));
536 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
),
538 schedule_delayed_work(&(nb
->stalltimeout_timer
), 100);
544 static void reset_neighbor(struct neighbor
*nb
)
547 unsigned long iflags
;
549 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
550 removenblist
= (nb
->state
!= NEIGHBOR_STATE_KILLED
);
551 nb
->state
= NEIGHBOR_STATE_KILLED
;
552 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
555 printk(KERN_ERR
"reset_neighbor");
560 mutex_lock(&neighbor_list_lock
);
561 list_del(&(nb
->nb_list
));
562 refresh_initial_debitsrate();
563 mutex_unlock(&neighbor_list_lock
);
565 #warning todo empty control_msg list
567 kref_put(&(nb
->ref
), neighbor_free
); /* nb_list */
571 static void reset_neighbor_dev(struct net_device
*dev
)
573 struct list_head
*currlh
;
576 mutex_lock(&neighbor_list_lock
);
578 currlh
= nb_list
.next
;
580 while (currlh
!= &nb_list
) {
581 unsigned long iflags
;
582 struct neighbor
*currnb
= container_of(currlh
, struct neighbor
,
586 if (currnb
->dev
!= dev
)
589 spin_lock_irqsave(&(currnb
->state_lock
), iflags
);
590 state
= currnb
->state
;
591 spin_unlock_irqrestore(&(currnb
->state_lock
), iflags
);
593 if (state
!= NEIGHBOR_STATE_KILLED
) {
594 mutex_unlock(&neighbor_list_lock
);
595 reset_neighbor(currnb
);
600 currlh
= currlh
->next
;
603 mutex_unlock(&neighbor_list_lock
);
606 static void stall_timer(struct work_struct
*work
)
608 struct neighbor
*nb
= container_of(to_delayed_work(work
),
609 struct neighbor
, stalltimeout_timer
);
614 unsigned long iflags
;
616 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
618 if (unlikely(nbstate
!= NEIGHBOR_STATE_STALLED
))
619 nb
->str_timer_pending
= 0;
621 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
623 if (unlikely(nbstate
== NEIGHBOR_STATE_ACTIVE
))
626 stall_time_ms
= ktime_to_ms(ktime_get()) -
627 ktime_to_ms(nb
->state_time
.last_roundtrip
);
629 if (nbstate
== NEIGHBOR_STATE_STALLED
&&
630 stall_time_ms
< NB_KILL_TIME_MS
) {
631 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
), stall_timer
);
632 schedule_delayed_work(&(nb
->stalltimeout_timer
),
633 msecs_to_jiffies(NB_KILL_TIME_MS
-
641 kref_put(&(nb
->ref
), neighbor_free
); /* stall_timer */
644 int get_neigh_state(struct neighbor
*nb
)
647 unsigned long iflags
;
653 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
655 stall_time_ms
= ktime_to_ms(ktime_get()) -
656 ktime_to_ms(nb
->state_time
.last_roundtrip
);
658 if (unlikely(likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) && unlikely(
659 stall_time_ms
> NB_STALL_TIME_MS
&& (
660 nb
->ping_intransit
>= NB_STALL_MINPINGS
||
661 nb
->ping_intransit
>= PING_COOKIES_PER_NEIGH
)))) {
662 nb
->state
= NEIGHBOR_STATE_STALLED
;
663 starttimer
= (nb
->str_timer_pending
== 0);
665 nb
->str_timer_pending
= 1;
666 printk(KERN_ERR
"switched to stalled");
667 BUG_ON(nb
->ping_intransit
> PING_COOKIES_PER_NEIGH
);
672 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
675 if (unlikely(starttimer
)) {
676 kref_get(&(nb
->ref
));
677 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
),
679 schedule_delayed_work(&(nb
->stalltimeout_timer
),
680 NB_KILL_TIME_MS
- stall_time_ms
);
686 static struct ping_cookie
*find_cookie(struct neighbor
*nb
, __u32 cookie
)
690 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
691 if (nb
->cookies
[i
].cookie
== cookie
)
692 return &(nb
->cookies
[i
]);
697 void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
)
699 unsigned long iflags
;
701 struct ping_cookie
*c
;
709 int call_connidreuse
= 0;
711 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
713 c
= find_cookie(nb
, cookie
);
715 if (unlikely(c
== 0))
718 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
719 if (nb
->cookies
[i
].cookie
!= 0 &&
720 ktime_before(nb
->cookies
[i
].time
, c
->time
)) {
721 nb
->cookies
[i
].pongs
++;
722 if (nb
->cookies
[i
].pongs
>= PING_PONGLIMIT
) {
723 nb
->cookies
[i
].cookie
= 0;
724 nb
->cookies
[i
].pongs
= 0;
725 nb
->ping_intransit
--;
731 nb
->ping_intransit
--;
733 call_connidreuse
= ktime_before_eq(nb
->last_roundtrip_end
, c
->time
);
735 nb
->last_roundtrip_end
= now
;
737 oldlatency
= ((__s64
) ((__u32
)atomic_read(&(nb
->latency
)))) * 1000;
738 pinglatency
= ktime_to_ns(now
) - ktime_to_ns(c
->time
) -
740 if (unlikely(unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
) &&
741 nb
->ping_success
< 16))
742 newlatency
= (oldlatency
* nb
->ping_success
+ pinglatency
) /
743 (nb
->ping_success
+ 1);
745 newlatency
= (oldlatency
* 15 + pinglatency
) / 16;
747 newlatency
= (newlatency
+ 500) / 1000;
749 if (unlikely(newlatency
< 0))
751 if (unlikely(newlatency
>= (1LL << 32)))
752 newlatency
= (1LL << 32) - 1;
754 atomic_set(&(nb
->latency
), (__u32
) newlatency
);
756 if (unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
||
757 nb
->state
== NEIGHBOR_STATE_STALLED
)) {
759 call_connidreuse
= 0;
761 if (nb
->state
== NEIGHBOR_STATE_INITIAL
) {
762 __u64 jiffies64
= get_jiffies_64();
763 if (nb
->state_time
.last_state_change
== 0)
764 nb
->state_time
.last_state_change
= jiffies64
;
765 if (jiffies64
<= (nb
->state_time
.last_state_change
+
766 msecs_to_jiffies(INITIAL_TIME_MS
)))
770 if (nb
->ping_success
>= PING_SUCCESS_CNT
) {
771 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
772 printk(KERN_ERR "switched from initial to active");
774 printk(KERN_ERR "switched from stalled to active");
777 if (nb
->state
== NEIGHBOR_STATE_INITIAL
)
778 set_busy_till(nb
, 0);
780 nb
->state
= NEIGHBOR_STATE_ACTIVE
;
781 nb
->ping_success
= 0;
782 nb
->state_time
.last_roundtrip
= c
->time
;
785 nb
->state_time
.last_roundtrip
= c
->time
;
789 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
791 if (call_connidreuse
)
792 connid_used_pingsuccess(nb
);
795 __u32
add_ping_req(struct neighbor
*nb
, unsigned long *last_ping_time
)
797 unsigned long iflags
;
798 struct ping_cookie
*c
;
803 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
805 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
806 if (nb
->cookies
[i
].cookie
== 0)
810 get_random_bytes((char *) &i
, sizeof(i
));
811 i
= (i
% (PING_COOKIES_PER_NEIGH
- PING_COOKIES_FIFO
)) +
815 c
= &(nb
->cookies
[i
]);
816 c
->time
= ktime_get();
819 if (unlikely(nb
->lastcookie
== 0))
821 c
->cookie
= nb
->lastcookie
;
823 nb
->ping_intransit
++;
827 *last_ping_time
= nb
->last_ping_time
;
828 nb
->last_ping_time
= jiffies
;
830 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
835 void unadd_ping_req(struct neighbor
*nb
, __u32 cookie
,
836 unsigned long last_ping_time
)
838 unsigned long iflags
;
844 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
846 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
847 if (nb
->cookies
[i
].cookie
== cookie
) {
848 nb
->cookies
[i
].cookie
= 0;
849 nb
->ping_intransit
--;
854 nb
->last_ping_time
= last_ping_time
;
856 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
859 void set_busy_till(struct neighbor
*nb
, int initial
)
861 unsigned long iflags
;
862 unsigned long newval
;
864 /* improve latency measurement and make traffic analysis harder */
866 spin_lock_irqsave(&(nb
->busytill_lock
), iflags
);
867 if (unlikely(initial
)) {
868 newval
= jiffies
+ msecs_to_jiffies(ACTIVEDELAY_INITIAL_MS
);
872 get_random_bytes((char *) &rand
, 4);
874 newval
= jiffies
+ msecs_to_jiffies(ACTIVEDELAY_NOCONN_MIN_MS
+
875 (1LL << 32) * (ACTIVEDELAY_NOCONN_MAX_MS
-
876 ACTIVEDELAY_NOCONN_MIN_MS
) / rand
);
879 if (time_after(newval
, nb
->busy_till
))
880 nb
->busy_till
= newval
;
882 spin_unlock_irqrestore(&(nb
->busytill_lock
), iflags
);
885 static int get_ping_forcetime(struct neighbor
*nb
)
887 unsigned long iflags
;
888 int state
= get_neigh_state(nb
);
891 spin_lock_irqsave(&(nb
->busytill_lock
), iflags
);
892 if (time_after_eq(nb
->busy_till
, jiffies
))
895 nb
->busy_till
= jiffies
;
896 spin_unlock_irqrestore(&(nb
->busytill_lock
), iflags
);
898 if (unlikely(state
!= NEIGHBOR_STATE_ACTIVE
))
899 return PING_FORCETIME_MS
;
902 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
903 idle
= list_empty(&(nb
->rcv_conn_list
));
904 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
908 return PING_FORCETIME_ACTIVEIDLE_MS
;
910 return PING_FORCETIME_ACTIVE_MS
;
914 * Check additional to the checks and timings already done in kpacket_gen.c
915 * This is primarily to make sure that we do not invalidate other ping cookies
916 * which might still receive responses. It does this by requiring a certain
917 * mimimum delay between pings, depending on how many pings are already in
920 int time_to_send_ping(struct neighbor
*nb
)
922 unsigned long iflags
;
925 __u32 forcetime
= get_ping_forcetime(nb
);
927 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
928 if (nb
->ping_intransit
>= PING_COOKIES_NOTHROTTLE
) {
929 __u32 mindelay
= (( ((__u32
) atomic_read(&(nb
->latency
))) +
930 ((__u32
) atomic_read(
931 &(nb
->max_remote_cmsg_delay
))) )/1000) <<
932 (nb
->ping_intransit
+ 1 -
933 PING_COOKIES_NOTHROTTLE
);
935 if (mindelay
> PING_THROTTLE_LIMIT_MS
)
936 mindelay
= PING_THROTTLE_LIMIT_MS
;
938 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < mindelay
)
942 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < (forcetime
/2))
944 else if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) >= forcetime
&&
948 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
953 int get_next_ping_time(struct neighbor
*nb
)
955 unsigned long iflags
;
957 __u32 forcetime
= get_ping_forcetime(nb
);
959 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
960 ret
= round_jiffies_up(nb
->last_ping_time
+
961 msecs_to_jiffies(forcetime
));
962 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
967 static void add_neighbor(struct neighbor
*nb
)
969 struct list_head
*currlh
;
971 mutex_lock(&neighbor_list_lock
);
973 currlh
= nb_list
.next
;
975 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
977 while (currlh
!= &nb_list
) {
978 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
981 if (curr
->addrlen
== nb
->addrlen
&& memcmp(curr
->addr
, nb
->addr
,
983 goto already_present
;
985 currlh
= currlh
->next
;
988 /* kref_get not needed here, because the caller leaves its ref to us */
989 printk(KERN_ERR
"add_neigh");
991 INIT_DELAYED_WORK(&(nb
->retrans_timer
), retransmit_timerfunc
);
992 INIT_DELAYED_WORK(&(nb
->retrans_timer_conn
), retransmit_conn_timerfunc
);
994 mutex_lock(&(nb
->cmsg_lock
));
995 nb
->last_ping_time
= jiffies
;
996 nb
->cmsg_interval
= 1000000;
997 schedule_controlmsg_timer(nb
);
998 mutex_unlock(&(nb
->cmsg_lock
));
1000 list_add_tail(&(nb
->nb_list
), &nb_list
);
1002 refresh_initial_debitsrate();
1006 kmem_cache_free(nb_slab
, nb
);
1009 mutex_unlock(&neighbor_list_lock
);
1012 static __u32
pull_u32(struct sk_buff
*skb
, int convbo
)
1014 char *ptr
= cor_pull_skb(skb
, 4);
1020 ((char *)&ret
)[0] = ptr
[0];
1021 ((char *)&ret
)[1] = ptr
[1];
1022 ((char *)&ret
)[2] = ptr
[2];
1023 ((char *)&ret
)[3] = ptr
[3];
1026 return be32_to_cpu(ret
);
1030 static int apply_announce_addaddr(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
1038 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
1046 addrtypelen
= be16_to_cpu(*((__u16
*) cmddata
));
1053 addrlen
= be16_to_cpu(*((__u16
*) cmddata
));
1058 cmddata
+= addrtypelen
;
1068 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
1071 nb
->addr
= kmalloc(addrlen
, GFP_KERNEL
);
1072 if (unlikely(nb
->addr
== 0))
1075 memcpy(nb
->addr
, addr
, addrlen
);
1076 nb
->addrlen
= addrlen
;
1081 static void apply_announce_cmd(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
1084 if (cmd
== NEIGHCMD_ADDADDR
) {
1085 apply_announce_addaddr(nb
, cmd
, len
, cmddata
);
1087 /* ignore unknown cmds */
1091 static void apply_announce_cmds(char *msg
, __u32 len
, struct net_device
*dev
,
1094 struct neighbor
*nb
= alloc_neighbor(GFP_KERNEL
);
1096 if (unlikely(nb
== 0))
1103 cmd
= be32_to_cpu(*((__u32
*) msg
));
1106 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
1110 BUG_ON(cmdlen
> len
);
1112 apply_announce_cmd(nb
, cmd
, cmdlen
, msg
);
1120 memcpy(nb
->mac
, source_hw
, MAX_ADDR_LEN
);
1127 static int check_announce_cmds(char *msg
, __u32 len
)
1133 cmd
= be32_to_cpu(*((__u32
*) msg
));
1136 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
1140 /* malformated packet */
1141 if (unlikely(cmdlen
> len
))
1148 if (unlikely(len
!= 0))
1154 static void parse_announce(char *msg
, __u32 len
, struct net_device
*dev
,
1157 __u32 min_announce_version
;
1158 __u32 max_announce_version
;
1159 __u32 min_cor_version
;
1160 __u32 max_cor_version
;
1162 if (unlikely(len
< 16))
1165 min_announce_version
= be32_to_cpu(*((__u32
*) msg
));
1168 max_announce_version
= be32_to_cpu(*((__u32
*) msg
));
1171 min_cor_version
= be32_to_cpu(*((__u32
*) msg
));
1174 max_cor_version
= be32_to_cpu(*((__u32
*) msg
));
1178 if (min_announce_version
!= 0)
1180 if (min_cor_version
!= 0)
1182 if (check_announce_cmds(msg
, len
)) {
1185 apply_announce_cmds(msg
, len
, dev
, source_hw
);
1188 struct announce_in
{
1189 /* lh has to be first */
1190 struct list_head lh
;
1191 struct sk_buff_head skbs
; /* sorted by offset */
1192 struct net_device
*dev
;
1193 char source_hw
[MAX_ADDR_LEN
];
1194 __u32 announce_proto_version
;
1195 __u32 packet_version
;
1197 __u32 received_size
;
1198 __u64 last_received_packet
;
1201 LIST_HEAD(announce_list
);
1203 struct kmem_cache
*announce_in_slab
;
1205 static void merge_announce(struct announce_in
*ann
)
1207 char *msg
= kmalloc(ann
->total_size
, GFP_KERNEL
);
1211 /* try again when next packet arrives */
1215 while (copy
!= ann
->total_size
) {
1218 struct sk_buff
*skb
;
1219 struct skb_procstate
*ps
;
1221 if (unlikely(skb_queue_empty(&(ann
->skbs
)))) {
1222 printk(KERN_ERR
"net/cor/neighbor.c: sk_head ran "
1223 "empty while merging packets\n");
1227 skb
= skb_dequeue(&(ann
->skbs
));
1228 ps
= skb_pstate(skb
);
1231 if (unlikely(ps
->funcstate
.announce
.offset
> copy
)) {
1232 printk(KERN_ERR
"net/cor/neighbor.c: invalid offset"
1237 if (unlikely(ps
->funcstate
.announce
.offset
< copy
)) {
1238 offset
= copy
- ps
->funcstate
.announce
.offset
;
1242 if (unlikely(currcpy
+ copy
> ann
->total_size
))
1245 memcpy(msg
+ copy
, skb
->data
+ offset
, currcpy
);
1250 parse_announce(msg
, ann
->total_size
, ann
->dev
, ann
->source_hw
);
1257 list_del(&(ann
->lh
));
1258 kmem_cache_free(announce_in_slab
, ann
);
1261 static int _rcv_announce(struct sk_buff
*skb
, struct announce_in
*ann
)
1263 struct skb_procstate
*ps
= skb_pstate(skb
);
1265 __u32 offset
= ps
->funcstate
.announce
.offset
;
1266 __u32 len
= skb
->len
;
1268 __u32 curroffset
= 0;
1269 __u32 prevoffset
= 0;
1272 struct sk_buff
*curr
= ann
->skbs
.next
;
1274 if (unlikely(len
+ offset
> ann
->total_size
)) {
1275 /* invalid header */
1281 * Try to find the right place to insert in the sorted list. This
1282 * means to process the list until we find a skb which has a greater
1283 * offset, so we can insert before it to keep the sort order. However,
1284 * this is complicated by the fact that the new skb must not be inserted
1285 * between 2 skbs if there is no data missing in between. So the loop
1286 * runs has to keep running until there is either a gap to insert or
1287 * we see that this data has already been received.
1289 while ((void *) curr
!= (void *) &(ann
->skbs
)) {
1290 struct skb_procstate
*currps
= skb_pstate(skb
);
1292 curroffset
= currps
->funcstate
.announce
.offset
;
1294 if (curroffset
> offset
&& (prevoffset
+ prevlen
) < curroffset
)
1297 prevoffset
= curroffset
;
1298 prevlen
= curr
->len
;
1301 if ((offset
+len
) <= (prevoffset
+prevlen
)) {
1302 /* we already have this data */
1309 * Calculate how much data was really received, by substracting
1310 * the bytes we already have.
1312 if (unlikely(prevoffset
+ prevlen
> offset
)) {
1313 len
-= (prevoffset
+ prevlen
) - offset
;
1314 offset
= prevoffset
+ prevlen
;
1317 if (unlikely((void *) curr
!= (void *) &(ann
->skbs
) &&
1318 (offset
+ len
) > curroffset
))
1319 len
= curroffset
- offset
;
1321 ann
->received_size
+= len
;
1322 BUG_ON(ann
->received_size
> ann
->total_size
);
1323 __skb_queue_before(&(ann
->skbs
), curr
, skb
);
1324 ann
->last_received_packet
= get_jiffies_64();
1326 if (ann
->received_size
== ann
->total_size
)
1327 merge_announce(ann
);
1328 else if (unlikely(ann
->skbs
.qlen
>= 16))
1334 void rcv_announce(struct sk_buff
*skb
)
1336 struct skb_procstate
*ps
= skb_pstate(skb
);
1337 struct announce_in
*curr
= 0;
1338 struct announce_in
*leastactive
= 0;
1339 __u32 list_size
= 0;
1341 __u32 announce_proto_version
= pull_u32(skb
, 1);
1342 __u32 packet_version
= pull_u32(skb
, 1);
1343 __u32 total_size
= pull_u32(skb
, 1);
1345 char source_hw
[MAX_ADDR_LEN
];
1346 memset(source_hw
, 0, MAX_ADDR_LEN
);
1347 if (skb
->dev
->header_ops
!= 0 &&
1348 skb
->dev
->header_ops
->parse
!= 0)
1349 skb
->dev
->header_ops
->parse(skb
, source_hw
);
1351 ps
->funcstate
.announce
.offset
= pull_u32(skb
, 1);
1353 if (total_size
> 8192)
1356 mutex_lock(&(neighbor_operation_lock
));
1358 if (announce_proto_version
!= 0)
1361 curr
= (struct announce_in
*) announce_list
.next
;
1363 while (((struct list_head
*) curr
) != &(announce_list
)) {
1365 if (curr
->dev
== skb
->dev
&& memcmp(curr
->source_hw
, source_hw
,
1366 MAX_ADDR_LEN
) == 0 &&
1367 curr
->announce_proto_version
==
1368 announce_proto_version
&&
1369 curr
->packet_version
== packet_version
&&
1370 curr
->total_size
== total_size
)
1373 if (leastactive
== 0 || curr
->last_received_packet
<
1374 leastactive
->last_received_packet
)
1377 curr
= (struct announce_in
*) curr
->lh
.next
;
1380 if (list_size
>= 128) {
1381 BUG_ON(leastactive
== 0);
1384 curr
->last_received_packet
= get_jiffies_64();
1386 while (!skb_queue_empty(&(curr
->skbs
))) {
1387 struct sk_buff
*skb2
= skb_dequeue(&(curr
->skbs
));
1393 curr
= kmem_cache_alloc(announce_in_slab
,
1398 skb_queue_head_init(&(curr
->skbs
));
1399 list_add_tail((struct list_head
*) curr
, &announce_list
);
1402 curr
->packet_version
= packet_version
;
1403 curr
->total_size
= total_size
;
1404 curr
->received_size
= 0;
1405 curr
->announce_proto_version
= announce_proto_version
;
1406 curr
->dev
= skb
->dev
;
1407 dev_hold(curr
->dev
);
1408 memcpy(curr
->source_hw
, source_hw
, MAX_ADDR_LEN
);
1411 if (_rcv_announce(skb
, curr
)) {
1412 list_del((struct list_head
*) curr
);
1414 kmem_cache_free(announce_in_slab
, curr
);
1422 mutex_unlock(&(neighbor_operation_lock
));
1428 __u32 packet_version
;
1430 __u32 announce_msg_len
;
1433 struct announce
*last_announce
;
1435 static int send_announce_chunk(struct announce_data
*ann
)
1437 struct sk_buff
*skb
;
1438 __u32 packet_size
= 256;
1439 __u32 remainingdata
= ann
->ann
->announce_msg_len
-
1440 ann
->curr_announce_msg_offset
;
1441 __u32 headroom
= LL_ALLOCATED_SPACE(ann
->dev
);
1442 __u32 overhead
= 17 + headroom
;
1447 if (remainingdata
< packet_size
)
1448 packet_size
= remainingdata
;
1450 skb
= alloc_skb(packet_size
+ overhead
, GFP_KERNEL
);
1451 if (unlikely(skb
== 0))
1454 skb
->protocol
= htons(ETH_P_COR
);
1455 skb
->dev
= ann
->dev
;
1456 skb_reserve(skb
, headroom
);
1458 if(unlikely(dev_hard_header(skb
, ann
->dev
, ETH_P_COR
,
1459 ann
->dev
->broadcast
, ann
->dev
->dev_addr
, skb
->len
) < 0))
1462 skb_reset_network_header(skb
);
1464 header
= skb_put(skb
, 17);
1465 if (unlikely(header
== 0))
1468 header
[0] = PACKET_TYPE_ANNOUNCE
;
1470 put_u32(header
+ 1, 0, 1); /* announce proto version */
1471 put_u32(header
+ 5, ann
->ann
->packet_version
, 1); /* packet version */
1472 put_u32(header
+ 9, ann
->ann
->announce_msg_len
, 1); /* total size */
1473 put_u32(header
+ 13, ann
->curr_announce_msg_offset
, 1); /* offset */
1475 ptr
= skb_put(skb
, packet_size
);
1476 if (unlikely(ptr
== 0))
1479 memcpy(ptr
, ann
->ann
->announce_msg
+ ann
->curr_announce_msg_offset
,
1482 rc
= dev_queue_xmit(skb
);
1485 ann
->curr_announce_msg_offset
+= packet_size
;
1487 if (ann
->curr_announce_msg_offset
== ann
->ann
->announce_msg_len
)
1488 ann
->curr_announce_msg_offset
= 0;
1500 int send_announce_qos(struct announce_data
*ann
)
1503 mutex_lock(&(neighbor_operation_lock
));
1504 rc
= send_announce_chunk(ann
);
1505 mutex_unlock(&(neighbor_operation_lock
));
1509 static void announce_free(struct kref
*ref
)
1511 struct announce
*ann
= container_of(ref
, struct announce
, ref
);
1512 kfree(&(ann
->announce_msg
));
1516 void announce_data_free(struct kref
*ref
)
1518 struct announce_data
*ann
= container_of(ref
, struct announce_data
,
1521 kref_put(&(ann
->ann
->ref
), announce_free
);
1525 static void send_announce(struct work_struct
*work
)
1527 struct announce_data
*ann
= container_of(to_delayed_work(work
),
1528 struct announce_data
, announce_work
);
1532 mutex_lock(&(neighbor_operation_lock
));
1534 if (unlikely(ann
->dev
== 0))
1538 if (unlikely(ann
->ann
== 0 && last_announce
== 0))
1540 if (ann
->curr_announce_msg_offset
== 0 &&
1541 unlikely(ann
->ann
!= last_announce
)) {
1543 kref_put(&(ann
->ann
->ref
), announce_free
);
1544 ann
->ann
= last_announce
;
1545 kref_get(&(ann
->ann
->ref
));
1548 rc
= send_announce_chunk(ann
);
1551 mutex_unlock(&(neighbor_operation_lock
));
1554 qos_enqueue(ann
->dev
, &(ann
->rb
), QOS_CALLER_ANNOUNCE
);
1556 if (unlikely(reschedule
== 0)) {
1557 kref_put(&(ann
->ref
), announce_data_free
);
1559 __u64 jiffies
= get_jiffies_64();
1562 ann
->scheduled_announce_timer
+= msecs_to_jiffies(
1563 ANNOUNCE_SEND_PACKETINTELVAL_MS
);
1565 delay
= ann
->scheduled_announce_timer
- jiffies
;
1569 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1570 schedule_delayed_work(&(ann
->announce_work
), delay
);
1574 static struct announce_data
*get_announce_by_netdev(struct net_device
*dev
)
1576 struct list_head
*lh
= announce_out_list
.next
;
1578 while (lh
!= &announce_out_list
) {
1579 struct announce_data
*curr
= (struct announce_data
*)(
1581 offsetof(struct announce_data
, lh
));
1583 if (curr
->dev
== dev
)
1590 static void announce_send_adddev(struct net_device
*dev
)
1592 struct announce_data
*ann
;
1594 ann
= kmalloc(sizeof(struct announce_data
), GFP_KERNEL
);
1596 if (unlikely(ann
== 0)) {
1597 printk(KERN_ERR
"cor cannot allocate memory for sending "
1602 memset(ann
, 0, sizeof(struct announce_data
));
1604 kref_init(&(ann
->ref
));
1609 mutex_lock(&(neighbor_operation_lock
));
1610 list_add_tail(&(ann
->lh
), &announce_out_list
);
1611 mutex_unlock(&(neighbor_operation_lock
));
1613 ann
->scheduled_announce_timer
= get_jiffies_64();
1614 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1615 schedule_delayed_work(&(ann
->announce_work
), 1);
1618 static void announce_send_rmdev(struct net_device
*dev
)
1620 struct announce_data
*ann
;
1622 mutex_lock(&(neighbor_operation_lock
));
1624 ann
= get_announce_by_netdev(dev
);
1633 mutex_unlock(&(neighbor_operation_lock
));
1636 int netdev_notify_func(struct notifier_block
*not, unsigned long event
,
1639 struct net_device
*dev
= (struct net_device
*) ptr
;
1644 rc
= create_queue(dev
);
1647 announce_send_adddev(dev
);
1650 announce_send_rmdev(dev
);
1651 reset_neighbor_dev(dev
);
1656 case NETDEV_REGISTER
:
1657 case NETDEV_UNREGISTER
:
1658 case NETDEV_CHANGEMTU
:
1659 case NETDEV_CHANGEADDR
:
1660 case NETDEV_GOING_DOWN
:
1661 case NETDEV_CHANGENAME
:
1662 case NETDEV_FEAT_CHANGE
:
1663 case NETDEV_BONDING_FAILOVER
:
1672 static int set_announce(char *msg
, __u32 len
)
1674 struct announce
*ann
= kmalloc(sizeof(struct announce
), GFP_KERNEL
);
1676 if (unlikely(ann
== 0)) {
1681 memset(ann
, 0, sizeof(struct announce
));
1683 ann
->announce_msg
= msg
;
1684 ann
->announce_msg_len
= len
;
1686 kref_init(&(ann
->ref
));
1688 mutex_lock(&(neighbor_operation_lock
));
1690 if (last_announce
!= 0) {
1691 ann
->packet_version
= last_announce
->packet_version
+ 1;
1692 kref_put(&(last_announce
->ref
), announce_free
);
1695 last_announce
= ann
;
1697 mutex_unlock(&(neighbor_operation_lock
));
1702 static int generate_announce(void)
1704 __u32 addrtypelen
= strlen(addrtype
);
1707 __u32 cmd_hdr_len
= 8;
1708 __u32 cmd_len
= 2 + 2 + addrtypelen
+ addrlen
;
1710 __u32 len
= hdr_len
+ cmd_hdr_len
+ cmd_len
;
1713 char *msg
= kmalloc(len
, GFP_KERNEL
);
1714 if (unlikely(msg
== 0))
1717 put_u32(msg
+ offset
, 0, 1); /* min_announce_proto_version */
1719 put_u32(msg
+ offset
, 0, 1); /* max_announce_proto_version */
1721 put_u32(msg
+ offset
, 0, 1); /* min_cor_proto_version */
1723 put_u32(msg
+ offset
, 0, 1); /* max_cor_proto_version */
1727 put_u32(msg
+ offset
, NEIGHCMD_ADDADDR
, 1); /* command */
1729 put_u32(msg
+ offset
, cmd_len
, 1); /* command length */
1732 /* addrtypelen, addrlen */
1733 put_u16(msg
+ offset
, addrtypelen
, 1);
1735 put_u16(msg
+ offset
, addrlen
, 1);
1738 /* addrtype, addr */
1739 memcpy(msg
+ offset
, addrtype
, addrtypelen
);
1740 offset
+= addrtypelen
;
1741 memcpy(msg
+ offset
, addr
, addrlen
);
1744 BUG_ON(offset
!= len
);
1746 return set_announce(msg
, len
);
1749 int __init
cor_neighbor_init(void)
1753 addr
= kmalloc(addrlen
, GFP_KERNEL
);
1754 if (unlikely(addr
== 0))
1757 get_random_bytes(addr
, addrlen
);
1759 nb_slab
= kmem_cache_create("cor_neighbor", sizeof(struct neighbor
), 8,
1761 announce_in_slab
= kmem_cache_create("cor_announce_in",
1762 sizeof(struct announce_in
), 8, 0, 0);
1764 if (unlikely(generate_announce()))
1767 memset(&netdev_notify
, 0, sizeof(netdev_notify
));
1768 netdev_notify
.notifier_call
= netdev_notify_func
;
1769 register_netdevice_notifier(&netdev_notify
);
1780 MODULE_LICENSE("GPL");