2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/skbuff.h>
36 #include <linux/netdevice.h>
38 #include <linux/if_vlan.h>
39 #include <linux/jhash.h>
40 #include <net/neighbour.h>
46 #define VLAN_NONE 0xfff
48 /* identifies sync vs async L2T_WRITE_REQs */
49 #define F_SYNC_WR (1 << 12)
52 L2T_STATE_VALID
, /* entry is up to date */
53 L2T_STATE_STALE
, /* entry may be used but needs revalidation */
54 L2T_STATE_RESOLVING
, /* entry needs address resolution */
55 L2T_STATE_SYNC_WRITE
, /* synchronous write of entry underway */
57 /* when state is one of the below the entry is not hashed */
58 L2T_STATE_SWITCHING
, /* entry is being used by a switching filter */
59 L2T_STATE_UNUSED
/* entry not in use */
64 atomic_t nfree
; /* number of free entries */
65 struct l2t_entry
*rover
; /* starting point for next allocation */
66 struct l2t_entry l2tab
[L2T_SIZE
];
69 static inline unsigned int vlan_prio(const struct l2t_entry
*e
)
74 static inline void l2t_hold(struct l2t_data
*d
, struct l2t_entry
*e
)
76 if (atomic_add_return(1, &e
->refcnt
) == 1) /* 0 -> 1 transition */
77 atomic_dec(&d
->nfree
);
81 * To avoid having to check address families we do not allow v4 and v6
82 * neighbors to be on the same hash chain. We keep v4 entries in the first
83 * half of available hash buckets and v6 in the second.
86 L2T_SZ_HALF
= L2T_SIZE
/ 2,
87 L2T_HASH_MASK
= L2T_SZ_HALF
- 1
90 static inline unsigned int arp_hash(const u32
*key
, int ifindex
)
92 return jhash_2words(*key
, ifindex
, 0) & L2T_HASH_MASK
;
95 static inline unsigned int ipv6_hash(const u32
*key
, int ifindex
)
97 u32
xor = key
[0] ^ key
[1] ^ key
[2] ^ key
[3];
99 return L2T_SZ_HALF
+ (jhash_2words(xor, ifindex
, 0) & L2T_HASH_MASK
);
102 static unsigned int addr_hash(const u32
*addr
, int addr_len
, int ifindex
)
104 return addr_len
== 4 ? arp_hash(addr
, ifindex
) :
105 ipv6_hash(addr
, ifindex
);
109 * Checks if an L2T entry is for the given IP/IPv6 address. It does not check
110 * whether the L2T entry and the address are of the same address family.
111 * Callers ensure an address is only checked against L2T entries of the same
112 * family, something made trivial by the separation of IP and IPv6 hash chains
113 * mentioned above. Returns 0 if there's a match,
115 static int addreq(const struct l2t_entry
*e
, const u32
*addr
)
118 return (e
->addr
[0] ^ addr
[0]) | (e
->addr
[1] ^ addr
[1]) |
119 (e
->addr
[2] ^ addr
[2]) | (e
->addr
[3] ^ addr
[3]);
120 return e
->addr
[0] ^ addr
[0];
123 static void neigh_replace(struct l2t_entry
*e
, struct neighbour
*n
)
127 neigh_release(e
->neigh
);
132 * Write an L2T entry. Must be called with the entry locked.
133 * The write may be synchronous or asynchronous.
135 static int write_l2e(struct adapter
*adap
, struct l2t_entry
*e
, int sync
)
138 struct cpl_l2t_write_req
*req
;
140 skb
= alloc_skb(sizeof(*req
), GFP_ATOMIC
);
144 req
= (struct cpl_l2t_write_req
*)__skb_put(skb
, sizeof(*req
));
147 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ
,
148 e
->idx
| (sync
? F_SYNC_WR
: 0) |
149 TID_QID(adap
->sge
.fw_evtq
.abs_id
)));
150 req
->params
= htons(L2T_W_PORT(e
->lport
) | L2T_W_NOREPLY(!sync
));
151 req
->l2t_idx
= htons(e
->idx
);
152 req
->vlan
= htons(e
->vlan
);
154 memcpy(e
->dmac
, e
->neigh
->ha
, sizeof(e
->dmac
));
155 memcpy(req
->dst_mac
, e
->dmac
, sizeof(req
->dst_mac
));
157 set_wr_txq(skb
, CPL_PRIORITY_CONTROL
, 0);
158 t4_ofld_send(adap
, skb
);
160 if (sync
&& e
->state
!= L2T_STATE_SWITCHING
)
161 e
->state
= L2T_STATE_SYNC_WRITE
;
166 * Send packets waiting in an L2T entry's ARP queue. Must be called with the
169 static void send_pending(struct adapter
*adap
, struct l2t_entry
*e
)
171 while (e
->arpq_head
) {
172 struct sk_buff
*skb
= e
->arpq_head
;
174 e
->arpq_head
= skb
->next
;
176 t4_ofld_send(adap
, skb
);
182 * Process a CPL_L2T_WRITE_RPL. Wake up the ARP queue if it completes a
183 * synchronous L2T_WRITE. Note that the TID in the reply is really the L2T
184 * index it refers to.
186 void do_l2t_write_rpl(struct adapter
*adap
, const struct cpl_l2t_write_rpl
*rpl
)
188 unsigned int tid
= GET_TID(rpl
);
189 unsigned int idx
= tid
& (L2T_SIZE
- 1);
191 if (unlikely(rpl
->status
!= CPL_ERR_NONE
)) {
192 dev_err(adap
->pdev_dev
,
193 "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
198 if (tid
& F_SYNC_WR
) {
199 struct l2t_entry
*e
= &adap
->l2t
->l2tab
[idx
];
202 if (e
->state
!= L2T_STATE_SWITCHING
) {
203 send_pending(adap
, e
);
204 e
->state
= (e
->neigh
->nud_state
& NUD_STALE
) ?
205 L2T_STATE_STALE
: L2T_STATE_VALID
;
207 spin_unlock(&e
->lock
);
212 * Add a packet to an L2T entry's queue of packets awaiting resolution.
213 * Must be called with the entry's lock held.
215 static inline void arpq_enqueue(struct l2t_entry
*e
, struct sk_buff
*skb
)
219 e
->arpq_tail
->next
= skb
;
225 int cxgb4_l2t_send(struct net_device
*dev
, struct sk_buff
*skb
,
228 struct adapter
*adap
= netdev2adap(dev
);
232 case L2T_STATE_STALE
: /* entry is stale, kick off revalidation */
233 neigh_event_send(e
->neigh
, NULL
);
234 spin_lock_bh(&e
->lock
);
235 if (e
->state
== L2T_STATE_STALE
)
236 e
->state
= L2T_STATE_VALID
;
237 spin_unlock_bh(&e
->lock
);
238 case L2T_STATE_VALID
: /* fast-path, send the packet on */
239 return t4_ofld_send(adap
, skb
);
240 case L2T_STATE_RESOLVING
:
241 case L2T_STATE_SYNC_WRITE
:
242 spin_lock_bh(&e
->lock
);
243 if (e
->state
!= L2T_STATE_SYNC_WRITE
&&
244 e
->state
!= L2T_STATE_RESOLVING
) {
245 spin_unlock_bh(&e
->lock
);
248 arpq_enqueue(e
, skb
);
249 spin_unlock_bh(&e
->lock
);
251 if (e
->state
== L2T_STATE_RESOLVING
&&
252 !neigh_event_send(e
->neigh
, NULL
)) {
253 spin_lock_bh(&e
->lock
);
254 if (e
->state
== L2T_STATE_RESOLVING
&& e
->arpq_head
)
255 write_l2e(adap
, e
, 1);
256 spin_unlock_bh(&e
->lock
);
261 EXPORT_SYMBOL(cxgb4_l2t_send
);
264 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
266 static struct l2t_entry
*alloc_l2e(struct l2t_data
*d
)
268 struct l2t_entry
*end
, *e
, **p
;
270 if (!atomic_read(&d
->nfree
))
273 /* there's definitely a free entry */
274 for (e
= d
->rover
, end
= &d
->l2tab
[L2T_SIZE
]; e
!= end
; ++e
)
275 if (atomic_read(&e
->refcnt
) == 0)
278 for (e
= d
->l2tab
; atomic_read(&e
->refcnt
); ++e
)
282 atomic_dec(&d
->nfree
);
285 * The entry we found may be an inactive entry that is
286 * presently in the hash table. We need to remove it.
288 if (e
->state
< L2T_STATE_SWITCHING
)
289 for (p
= &d
->l2tab
[e
->hash
].first
; *p
; p
= &(*p
)->next
)
296 e
->state
= L2T_STATE_UNUSED
;
301 * Called when an L2T entry has no more users.
303 static void t4_l2e_free(struct l2t_entry
*e
)
307 spin_lock_bh(&e
->lock
);
308 if (atomic_read(&e
->refcnt
) == 0) { /* hasn't been recycled */
310 neigh_release(e
->neigh
);
314 spin_unlock_bh(&e
->lock
);
316 d
= container_of(e
, struct l2t_data
, l2tab
[e
->idx
]);
317 atomic_inc(&d
->nfree
);
320 void cxgb4_l2t_release(struct l2t_entry
*e
)
322 if (atomic_dec_and_test(&e
->refcnt
))
325 EXPORT_SYMBOL(cxgb4_l2t_release
);
328 * Update an L2T entry that was previously used for the same next hop as neigh.
329 * Must be called with softirqs disabled.
331 static void reuse_entry(struct l2t_entry
*e
, struct neighbour
*neigh
)
333 unsigned int nud_state
;
335 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
336 if (neigh
!= e
->neigh
)
337 neigh_replace(e
, neigh
);
338 nud_state
= neigh
->nud_state
;
339 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)) ||
340 !(nud_state
& NUD_VALID
))
341 e
->state
= L2T_STATE_RESOLVING
;
342 else if (nud_state
& NUD_CONNECTED
)
343 e
->state
= L2T_STATE_VALID
;
345 e
->state
= L2T_STATE_STALE
;
346 spin_unlock(&e
->lock
);
349 struct l2t_entry
*cxgb4_l2t_get(struct l2t_data
*d
, struct neighbour
*neigh
,
350 const struct net_device
*physdev
,
351 unsigned int priority
)
356 int addr_len
= neigh
->tbl
->key_len
;
357 u32
*addr
= (u32
*)neigh
->primary_key
;
358 int ifidx
= neigh
->dev
->ifindex
;
359 int hash
= addr_hash(addr
, addr_len
, ifidx
);
361 if (neigh
->dev
->flags
& IFF_LOOPBACK
)
362 lport
= netdev2pinfo(physdev
)->tx_chan
+ 4;
364 lport
= netdev2pinfo(physdev
)->lport
;
366 if (neigh
->dev
->priv_flags
& IFF_802_1Q_VLAN
)
367 vlan
= vlan_dev_vlan_id(neigh
->dev
);
371 write_lock_bh(&d
->lock
);
372 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
373 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
&&
374 e
->vlan
== vlan
&& e
->lport
== lport
) {
376 if (atomic_read(&e
->refcnt
) == 1)
377 reuse_entry(e
, neigh
);
381 /* Need to allocate a new entry */
384 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
385 e
->state
= L2T_STATE_RESOLVING
;
386 memcpy(e
->addr
, addr
, addr_len
);
390 e
->v6
= addr_len
== 16;
391 atomic_set(&e
->refcnt
, 1);
392 neigh_replace(e
, neigh
);
394 e
->next
= d
->l2tab
[hash
].first
;
395 d
->l2tab
[hash
].first
= e
;
396 spin_unlock(&e
->lock
);
399 write_unlock_bh(&d
->lock
);
402 EXPORT_SYMBOL(cxgb4_l2t_get
);
405 * Called when address resolution fails for an L2T entry to handle packets
406 * on the arpq head. If a packet specifies a failure handler it is invoked,
407 * otherwise the packet is sent to the device.
409 static void handle_failed_resolution(struct adapter
*adap
, struct sk_buff
*arpq
)
412 struct sk_buff
*skb
= arpq
;
413 const struct l2t_skb_cb
*cb
= L2T_SKB_CB(skb
);
417 if (cb
->arp_err_handler
)
418 cb
->arp_err_handler(cb
->handle
, skb
);
420 t4_ofld_send(adap
, skb
);
425 * Called when the host's neighbor layer makes a change to some entry that is
426 * loaded into the HW L2 table.
428 void t4_l2t_update(struct adapter
*adap
, struct neighbour
*neigh
)
431 struct sk_buff
*arpq
= NULL
;
432 struct l2t_data
*d
= adap
->l2t
;
433 int addr_len
= neigh
->tbl
->key_len
;
434 u32
*addr
= (u32
*) neigh
->primary_key
;
435 int ifidx
= neigh
->dev
->ifindex
;
436 int hash
= addr_hash(addr
, addr_len
, ifidx
);
438 read_lock_bh(&d
->lock
);
439 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
440 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
) {
442 if (atomic_read(&e
->refcnt
))
444 spin_unlock(&e
->lock
);
447 read_unlock_bh(&d
->lock
);
451 read_unlock(&d
->lock
);
453 if (neigh
!= e
->neigh
)
454 neigh_replace(e
, neigh
);
456 if (e
->state
== L2T_STATE_RESOLVING
) {
457 if (neigh
->nud_state
& NUD_FAILED
) {
459 e
->arpq_head
= e
->arpq_tail
= NULL
;
460 } else if ((neigh
->nud_state
& (NUD_CONNECTED
| NUD_STALE
)) &&
462 write_l2e(adap
, e
, 1);
465 e
->state
= neigh
->nud_state
& NUD_CONNECTED
?
466 L2T_STATE_VALID
: L2T_STATE_STALE
;
467 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)))
468 write_l2e(adap
, e
, 0);
471 spin_unlock_bh(&e
->lock
);
474 handle_failed_resolution(adap
, arpq
);
478 * Allocate an L2T entry for use by a switching rule. Such entries need to be
479 * explicitly freed and while busy they are not on any hash chain, so normal
480 * address resolution updates do not see them.
482 struct l2t_entry
*t4_l2t_alloc_switching(struct l2t_data
*d
)
486 write_lock_bh(&d
->lock
);
489 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
490 e
->state
= L2T_STATE_SWITCHING
;
491 atomic_set(&e
->refcnt
, 1);
492 spin_unlock(&e
->lock
);
494 write_unlock_bh(&d
->lock
);
499 * Sets/updates the contents of a switching L2T entry that has been allocated
500 * with an earlier call to @t4_l2t_alloc_switching.
502 int t4_l2t_set_switching(struct adapter
*adap
, struct l2t_entry
*e
, u16 vlan
,
503 u8 port
, u8
*eth_addr
)
507 memcpy(e
->dmac
, eth_addr
, ETH_ALEN
);
508 return write_l2e(adap
, e
, 0);
511 struct l2t_data
*t4_init_l2t(void)
516 d
= t4_alloc_mem(sizeof(*d
));
521 atomic_set(&d
->nfree
, L2T_SIZE
);
522 rwlock_init(&d
->lock
);
524 for (i
= 0; i
< L2T_SIZE
; ++i
) {
526 d
->l2tab
[i
].state
= L2T_STATE_UNUSED
;
527 spin_lock_init(&d
->l2tab
[i
].lock
);
528 atomic_set(&d
->l2tab
[i
].refcnt
, 0);
533 #include <linux/module.h>
534 #include <linux/debugfs.h>
535 #include <linux/seq_file.h>
537 static inline void *l2t_get_idx(struct seq_file
*seq
, loff_t pos
)
539 struct l2t_entry
*l2tab
= seq
->private;
541 return pos
>= L2T_SIZE
? NULL
: &l2tab
[pos
];
544 static void *l2t_seq_start(struct seq_file
*seq
, loff_t
*pos
)
546 return *pos
? l2t_get_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
549 static void *l2t_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
551 v
= l2t_get_idx(seq
, *pos
);
557 static void l2t_seq_stop(struct seq_file
*seq
, void *v
)
561 static char l2e_state(const struct l2t_entry
*e
)
564 case L2T_STATE_VALID
: return 'V';
565 case L2T_STATE_STALE
: return 'S';
566 case L2T_STATE_SYNC_WRITE
: return 'W';
567 case L2T_STATE_RESOLVING
: return e
->arpq_head
? 'A' : 'R';
568 case L2T_STATE_SWITCHING
: return 'X';
574 static int l2t_seq_show(struct seq_file
*seq
, void *v
)
576 if (v
== SEQ_START_TOKEN
)
577 seq_puts(seq
, " Idx IP address "
578 "Ethernet address VLAN/P LP State Users Port\n");
581 struct l2t_entry
*e
= v
;
583 spin_lock_bh(&e
->lock
);
584 if (e
->state
== L2T_STATE_SWITCHING
)
587 sprintf(ip
, e
->v6
? "%pI6c" : "%pI4", e
->addr
);
588 seq_printf(seq
, "%4u %-25s %17pM %4d %u %2u %c %5u %s\n",
590 e
->vlan
& VLAN_VID_MASK
, vlan_prio(e
), e
->lport
,
591 l2e_state(e
), atomic_read(&e
->refcnt
),
592 e
->neigh
? e
->neigh
->dev
->name
: "");
593 spin_unlock_bh(&e
->lock
);
598 static const struct seq_operations l2t_seq_ops
= {
599 .start
= l2t_seq_start
,
600 .next
= l2t_seq_next
,
601 .stop
= l2t_seq_stop
,
605 static int l2t_seq_open(struct inode
*inode
, struct file
*file
)
607 int rc
= seq_open(file
, &l2t_seq_ops
);
610 struct adapter
*adap
= inode
->i_private
;
611 struct seq_file
*seq
= file
->private_data
;
613 seq
->private = adap
->l2t
->l2tab
;
618 const struct file_operations t4_l2t_fops
= {
619 .owner
= THIS_MODULE
,
620 .open
= l2t_seq_open
,
623 .release
= seq_release
,