2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/skbuff.h>
36 #include <linux/netdevice.h>
38 #include <linux/if_vlan.h>
39 #include <linux/jhash.h>
40 #include <linux/module.h>
41 #include <linux/debugfs.h>
42 #include <linux/seq_file.h>
43 #include <net/neighbour.h>
49 #define VLAN_NONE 0xfff
51 /* identifies sync vs async L2T_WRITE_REQs */
52 #define F_SYNC_WR (1 << 12)
55 L2T_STATE_VALID
, /* entry is up to date */
56 L2T_STATE_STALE
, /* entry may be used but needs revalidation */
57 L2T_STATE_RESOLVING
, /* entry needs address resolution */
58 L2T_STATE_SYNC_WRITE
, /* synchronous write of entry underway */
60 /* when state is one of the below the entry is not hashed */
61 L2T_STATE_SWITCHING
, /* entry is being used by a switching filter */
62 L2T_STATE_UNUSED
/* entry not in use */
67 atomic_t nfree
; /* number of free entries */
68 struct l2t_entry
*rover
; /* starting point for next allocation */
69 struct l2t_entry l2tab
[L2T_SIZE
];
72 static inline unsigned int vlan_prio(const struct l2t_entry
*e
)
77 static inline void l2t_hold(struct l2t_data
*d
, struct l2t_entry
*e
)
79 if (atomic_add_return(1, &e
->refcnt
) == 1) /* 0 -> 1 transition */
80 atomic_dec(&d
->nfree
);
84 * To avoid having to check address families we do not allow v4 and v6
85 * neighbors to be on the same hash chain. We keep v4 entries in the first
86 * half of available hash buckets and v6 in the second.
89 L2T_SZ_HALF
= L2T_SIZE
/ 2,
90 L2T_HASH_MASK
= L2T_SZ_HALF
- 1
93 static inline unsigned int arp_hash(const u32
*key
, int ifindex
)
95 return jhash_2words(*key
, ifindex
, 0) & L2T_HASH_MASK
;
98 static inline unsigned int ipv6_hash(const u32
*key
, int ifindex
)
100 u32
xor = key
[0] ^ key
[1] ^ key
[2] ^ key
[3];
102 return L2T_SZ_HALF
+ (jhash_2words(xor, ifindex
, 0) & L2T_HASH_MASK
);
105 static unsigned int addr_hash(const u32
*addr
, int addr_len
, int ifindex
)
107 return addr_len
== 4 ? arp_hash(addr
, ifindex
) :
108 ipv6_hash(addr
, ifindex
);
112 * Checks if an L2T entry is for the given IP/IPv6 address. It does not check
113 * whether the L2T entry and the address are of the same address family.
114 * Callers ensure an address is only checked against L2T entries of the same
115 * family, something made trivial by the separation of IP and IPv6 hash chains
116 * mentioned above. Returns 0 if there's a match,
118 static int addreq(const struct l2t_entry
*e
, const u32
*addr
)
121 return (e
->addr
[0] ^ addr
[0]) | (e
->addr
[1] ^ addr
[1]) |
122 (e
->addr
[2] ^ addr
[2]) | (e
->addr
[3] ^ addr
[3]);
123 return e
->addr
[0] ^ addr
[0];
126 static void neigh_replace(struct l2t_entry
*e
, struct neighbour
*n
)
130 neigh_release(e
->neigh
);
135 * Write an L2T entry. Must be called with the entry locked.
136 * The write may be synchronous or asynchronous.
138 static int write_l2e(struct adapter
*adap
, struct l2t_entry
*e
, int sync
)
141 struct cpl_l2t_write_req
*req
;
143 skb
= alloc_skb(sizeof(*req
), GFP_ATOMIC
);
147 req
= (struct cpl_l2t_write_req
*)__skb_put(skb
, sizeof(*req
));
150 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ
,
151 e
->idx
| (sync
? F_SYNC_WR
: 0) |
152 TID_QID(adap
->sge
.fw_evtq
.abs_id
)));
153 req
->params
= htons(L2T_W_PORT(e
->lport
) | L2T_W_NOREPLY(!sync
));
154 req
->l2t_idx
= htons(e
->idx
);
155 req
->vlan
= htons(e
->vlan
);
157 memcpy(e
->dmac
, e
->neigh
->ha
, sizeof(e
->dmac
));
158 memcpy(req
->dst_mac
, e
->dmac
, sizeof(req
->dst_mac
));
160 set_wr_txq(skb
, CPL_PRIORITY_CONTROL
, 0);
161 t4_ofld_send(adap
, skb
);
163 if (sync
&& e
->state
!= L2T_STATE_SWITCHING
)
164 e
->state
= L2T_STATE_SYNC_WRITE
;
169 * Send packets waiting in an L2T entry's ARP queue. Must be called with the
172 static void send_pending(struct adapter
*adap
, struct l2t_entry
*e
)
174 while (e
->arpq_head
) {
175 struct sk_buff
*skb
= e
->arpq_head
;
177 e
->arpq_head
= skb
->next
;
179 t4_ofld_send(adap
, skb
);
185 * Process a CPL_L2T_WRITE_RPL. Wake up the ARP queue if it completes a
186 * synchronous L2T_WRITE. Note that the TID in the reply is really the L2T
187 * index it refers to.
189 void do_l2t_write_rpl(struct adapter
*adap
, const struct cpl_l2t_write_rpl
*rpl
)
191 unsigned int tid
= GET_TID(rpl
);
192 unsigned int idx
= tid
& (L2T_SIZE
- 1);
194 if (unlikely(rpl
->status
!= CPL_ERR_NONE
)) {
195 dev_err(adap
->pdev_dev
,
196 "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
201 if (tid
& F_SYNC_WR
) {
202 struct l2t_entry
*e
= &adap
->l2t
->l2tab
[idx
];
205 if (e
->state
!= L2T_STATE_SWITCHING
) {
206 send_pending(adap
, e
);
207 e
->state
= (e
->neigh
->nud_state
& NUD_STALE
) ?
208 L2T_STATE_STALE
: L2T_STATE_VALID
;
210 spin_unlock(&e
->lock
);
215 * Add a packet to an L2T entry's queue of packets awaiting resolution.
216 * Must be called with the entry's lock held.
218 static inline void arpq_enqueue(struct l2t_entry
*e
, struct sk_buff
*skb
)
222 e
->arpq_tail
->next
= skb
;
228 int cxgb4_l2t_send(struct net_device
*dev
, struct sk_buff
*skb
,
231 struct adapter
*adap
= netdev2adap(dev
);
235 case L2T_STATE_STALE
: /* entry is stale, kick off revalidation */
236 neigh_event_send(e
->neigh
, NULL
);
237 spin_lock_bh(&e
->lock
);
238 if (e
->state
== L2T_STATE_STALE
)
239 e
->state
= L2T_STATE_VALID
;
240 spin_unlock_bh(&e
->lock
);
241 case L2T_STATE_VALID
: /* fast-path, send the packet on */
242 return t4_ofld_send(adap
, skb
);
243 case L2T_STATE_RESOLVING
:
244 case L2T_STATE_SYNC_WRITE
:
245 spin_lock_bh(&e
->lock
);
246 if (e
->state
!= L2T_STATE_SYNC_WRITE
&&
247 e
->state
!= L2T_STATE_RESOLVING
) {
248 spin_unlock_bh(&e
->lock
);
251 arpq_enqueue(e
, skb
);
252 spin_unlock_bh(&e
->lock
);
254 if (e
->state
== L2T_STATE_RESOLVING
&&
255 !neigh_event_send(e
->neigh
, NULL
)) {
256 spin_lock_bh(&e
->lock
);
257 if (e
->state
== L2T_STATE_RESOLVING
&& e
->arpq_head
)
258 write_l2e(adap
, e
, 1);
259 spin_unlock_bh(&e
->lock
);
264 EXPORT_SYMBOL(cxgb4_l2t_send
);
267 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
269 static struct l2t_entry
*alloc_l2e(struct l2t_data
*d
)
271 struct l2t_entry
*end
, *e
, **p
;
273 if (!atomic_read(&d
->nfree
))
276 /* there's definitely a free entry */
277 for (e
= d
->rover
, end
= &d
->l2tab
[L2T_SIZE
]; e
!= end
; ++e
)
278 if (atomic_read(&e
->refcnt
) == 0)
281 for (e
= d
->l2tab
; atomic_read(&e
->refcnt
); ++e
)
285 atomic_dec(&d
->nfree
);
288 * The entry we found may be an inactive entry that is
289 * presently in the hash table. We need to remove it.
291 if (e
->state
< L2T_STATE_SWITCHING
)
292 for (p
= &d
->l2tab
[e
->hash
].first
; *p
; p
= &(*p
)->next
)
299 e
->state
= L2T_STATE_UNUSED
;
304 * Called when an L2T entry has no more users.
306 static void t4_l2e_free(struct l2t_entry
*e
)
310 spin_lock_bh(&e
->lock
);
311 if (atomic_read(&e
->refcnt
) == 0) { /* hasn't been recycled */
313 neigh_release(e
->neigh
);
316 while (e
->arpq_head
) {
317 struct sk_buff
*skb
= e
->arpq_head
;
319 e
->arpq_head
= skb
->next
;
324 spin_unlock_bh(&e
->lock
);
326 d
= container_of(e
, struct l2t_data
, l2tab
[e
->idx
]);
327 atomic_inc(&d
->nfree
);
330 void cxgb4_l2t_release(struct l2t_entry
*e
)
332 if (atomic_dec_and_test(&e
->refcnt
))
335 EXPORT_SYMBOL(cxgb4_l2t_release
);
338 * Update an L2T entry that was previously used for the same next hop as neigh.
339 * Must be called with softirqs disabled.
341 static void reuse_entry(struct l2t_entry
*e
, struct neighbour
*neigh
)
343 unsigned int nud_state
;
345 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
346 if (neigh
!= e
->neigh
)
347 neigh_replace(e
, neigh
);
348 nud_state
= neigh
->nud_state
;
349 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)) ||
350 !(nud_state
& NUD_VALID
))
351 e
->state
= L2T_STATE_RESOLVING
;
352 else if (nud_state
& NUD_CONNECTED
)
353 e
->state
= L2T_STATE_VALID
;
355 e
->state
= L2T_STATE_STALE
;
356 spin_unlock(&e
->lock
);
359 struct l2t_entry
*cxgb4_l2t_get(struct l2t_data
*d
, struct neighbour
*neigh
,
360 const struct net_device
*physdev
,
361 unsigned int priority
)
366 int addr_len
= neigh
->tbl
->key_len
;
367 u32
*addr
= (u32
*)neigh
->primary_key
;
368 int ifidx
= neigh
->dev
->ifindex
;
369 int hash
= addr_hash(addr
, addr_len
, ifidx
);
371 if (neigh
->dev
->flags
& IFF_LOOPBACK
)
372 lport
= netdev2pinfo(physdev
)->tx_chan
+ 4;
374 lport
= netdev2pinfo(physdev
)->lport
;
376 if (neigh
->dev
->priv_flags
& IFF_802_1Q_VLAN
)
377 vlan
= vlan_dev_vlan_id(neigh
->dev
);
381 write_lock_bh(&d
->lock
);
382 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
383 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
&&
384 e
->vlan
== vlan
&& e
->lport
== lport
) {
386 if (atomic_read(&e
->refcnt
) == 1)
387 reuse_entry(e
, neigh
);
391 /* Need to allocate a new entry */
394 spin_lock(&e
->lock
); /* avoid race with t4_l2t_free */
395 e
->state
= L2T_STATE_RESOLVING
;
396 memcpy(e
->addr
, addr
, addr_len
);
400 e
->v6
= addr_len
== 16;
401 atomic_set(&e
->refcnt
, 1);
402 neigh_replace(e
, neigh
);
404 e
->next
= d
->l2tab
[hash
].first
;
405 d
->l2tab
[hash
].first
= e
;
406 spin_unlock(&e
->lock
);
409 write_unlock_bh(&d
->lock
);
412 EXPORT_SYMBOL(cxgb4_l2t_get
);
415 * Called when address resolution fails for an L2T entry to handle packets
416 * on the arpq head. If a packet specifies a failure handler it is invoked,
417 * otherwise the packet is sent to the device.
419 static void handle_failed_resolution(struct adapter
*adap
, struct sk_buff
*arpq
)
422 struct sk_buff
*skb
= arpq
;
423 const struct l2t_skb_cb
*cb
= L2T_SKB_CB(skb
);
427 if (cb
->arp_err_handler
)
428 cb
->arp_err_handler(cb
->handle
, skb
);
430 t4_ofld_send(adap
, skb
);
435 * Called when the host's neighbor layer makes a change to some entry that is
436 * loaded into the HW L2 table.
438 void t4_l2t_update(struct adapter
*adap
, struct neighbour
*neigh
)
441 struct sk_buff
*arpq
= NULL
;
442 struct l2t_data
*d
= adap
->l2t
;
443 int addr_len
= neigh
->tbl
->key_len
;
444 u32
*addr
= (u32
*) neigh
->primary_key
;
445 int ifidx
= neigh
->dev
->ifindex
;
446 int hash
= addr_hash(addr
, addr_len
, ifidx
);
448 read_lock_bh(&d
->lock
);
449 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
)
450 if (!addreq(e
, addr
) && e
->ifindex
== ifidx
) {
452 if (atomic_read(&e
->refcnt
))
454 spin_unlock(&e
->lock
);
457 read_unlock_bh(&d
->lock
);
461 read_unlock(&d
->lock
);
463 if (neigh
!= e
->neigh
)
464 neigh_replace(e
, neigh
);
466 if (e
->state
== L2T_STATE_RESOLVING
) {
467 if (neigh
->nud_state
& NUD_FAILED
) {
469 e
->arpq_head
= e
->arpq_tail
= NULL
;
470 } else if ((neigh
->nud_state
& (NUD_CONNECTED
| NUD_STALE
)) &&
472 write_l2e(adap
, e
, 1);
475 e
->state
= neigh
->nud_state
& NUD_CONNECTED
?
476 L2T_STATE_VALID
: L2T_STATE_STALE
;
477 if (memcmp(e
->dmac
, neigh
->ha
, sizeof(e
->dmac
)))
478 write_l2e(adap
, e
, 0);
481 spin_unlock_bh(&e
->lock
);
484 handle_failed_resolution(adap
, arpq
);
487 struct l2t_data
*t4_init_l2t(void)
492 d
= t4_alloc_mem(sizeof(*d
));
497 atomic_set(&d
->nfree
, L2T_SIZE
);
498 rwlock_init(&d
->lock
);
500 for (i
= 0; i
< L2T_SIZE
; ++i
) {
502 d
->l2tab
[i
].state
= L2T_STATE_UNUSED
;
503 spin_lock_init(&d
->l2tab
[i
].lock
);
504 atomic_set(&d
->l2tab
[i
].refcnt
, 0);
509 static inline void *l2t_get_idx(struct seq_file
*seq
, loff_t pos
)
511 struct l2t_entry
*l2tab
= seq
->private;
513 return pos
>= L2T_SIZE
? NULL
: &l2tab
[pos
];
516 static void *l2t_seq_start(struct seq_file
*seq
, loff_t
*pos
)
518 return *pos
? l2t_get_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
521 static void *l2t_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
523 v
= l2t_get_idx(seq
, *pos
);
529 static void l2t_seq_stop(struct seq_file
*seq
, void *v
)
533 static char l2e_state(const struct l2t_entry
*e
)
536 case L2T_STATE_VALID
: return 'V';
537 case L2T_STATE_STALE
: return 'S';
538 case L2T_STATE_SYNC_WRITE
: return 'W';
539 case L2T_STATE_RESOLVING
: return e
->arpq_head
? 'A' : 'R';
540 case L2T_STATE_SWITCHING
: return 'X';
546 static int l2t_seq_show(struct seq_file
*seq
, void *v
)
548 if (v
== SEQ_START_TOKEN
)
549 seq_puts(seq
, " Idx IP address "
550 "Ethernet address VLAN/P LP State Users Port\n");
553 struct l2t_entry
*e
= v
;
555 spin_lock_bh(&e
->lock
);
556 if (e
->state
== L2T_STATE_SWITCHING
)
559 sprintf(ip
, e
->v6
? "%pI6c" : "%pI4", e
->addr
);
560 seq_printf(seq
, "%4u %-25s %17pM %4d %u %2u %c %5u %s\n",
562 e
->vlan
& VLAN_VID_MASK
, vlan_prio(e
), e
->lport
,
563 l2e_state(e
), atomic_read(&e
->refcnt
),
564 e
->neigh
? e
->neigh
->dev
->name
: "");
565 spin_unlock_bh(&e
->lock
);
570 static const struct seq_operations l2t_seq_ops
= {
571 .start
= l2t_seq_start
,
572 .next
= l2t_seq_next
,
573 .stop
= l2t_seq_stop
,
577 static int l2t_seq_open(struct inode
*inode
, struct file
*file
)
579 int rc
= seq_open(file
, &l2t_seq_ops
);
582 struct adapter
*adap
= inode
->i_private
;
583 struct seq_file
*seq
= file
->private_data
;
585 seq
->private = adap
->l2t
->l2tab
;
590 const struct file_operations t4_l2t_fops
= {
591 .owner
= THIS_MODULE
,
592 .open
= l2t_seq_open
,
595 .release
= seq_release
,