2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * This file is part of the Chelsio T4 support code.
15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved.
17 * This program is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this
20 * release for licensing terms and conditions.
24 #include <sys/sunddi.h>
25 #include <sys/sunndi.h>
26 #include <sys/atomic.h>
28 #include <sys/pattr.h>
29 #include <sys/strsubr.h>
30 #include <sys/stream.h>
31 #include <sys/strsun.h>
32 #include <sys/ethernet.h>
33 #include <sys/containerof.h>
35 #include <inet/ipclassifier.h>
38 #include "common/common.h"
39 #include "common/t4_msg.h"
40 #include "common/t4_regs.h"
41 #include "common/t4_regs_values.h"
44 /* identifies sync vs async L2T_WRITE_REQs */
46 #define V_SYNC_WR(x) ((x) << S_SYNC_WR)
47 #define F_SYNC_WR V_SYNC_WR(1)
48 #define VLAN_NONE 0xfff
51 * jhash.h: Jenkins hash support.
53 * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
55 * http://burtleburtle.net/bob/hash/
57 * These are the credits from Bob's sources:
59 * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
60 * hash(), hash2(), hash3, and mix() are externally useful functions.
61 * Routines to test the hash are included if SELF_TEST is defined.
62 * You can use this free for any purpose. It has no warranty.
65 /* NOTE: Arguments are modified. */
66 #define __jhash_mix(a, b, c) \
68 a -= b; a -= c; a ^= (c>>13); \
69 b -= c; b -= a; b ^= (a<<8); \
70 c -= a; c -= b; c ^= (b>>13); \
71 a -= b; a -= c; a ^= (c>>12); \
72 b -= c; b -= a; b ^= (a<<16); \
73 c -= a; c -= b; c ^= (b>>5); \
74 a -= b; a -= c; a ^= (c>>3); \
75 b -= c; b -= a; b ^= (a<<10); \
76 c -= a; c -= b; c ^= (b>>15); \
79 /* The golden ration: an arbitrary value */
80 #define JHASH_GOLDEN_RATIO 0x9e3779b9
83 * A special ultra-optimized versions that knows they are hashing exactly
86 * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
87 * done at the end is not done here.
90 jhash_3words(u32 a
, u32 b
, u32 c
, u32 initval
)
92 a
+= JHASH_GOLDEN_RATIO
;
93 b
+= JHASH_GOLDEN_RATIO
;
102 jhash_2words(u32 a
, u32 b
, u32 initval
)
104 return (jhash_3words(a
, b
, 0, initval
));
107 #if defined(__GNUC__)
108 #define likely(x) __builtin_expect((x), 1)
109 #define unlikely(x) __builtin_expect((x), 0)
111 #define likely(x) (x)
112 #define unlikely(x) (x)
113 #endif /* defined(__GNUC__) */
116 L2T_STATE_VALID
, /* entry is up to date */
117 L2T_STATE_STALE
, /* entry may be used but needs revalidation */
118 L2T_STATE_RESOLVING
, /* entry needs address resolution */
119 L2T_STATE_SYNC_WRITE
, /* synchronous write of entry underway */
121 /* when state is one of the below the entry is not hashed */
122 L2T_STATE_SWITCHING
, /* entry is being used by a switching filter */
123 L2T_STATE_UNUSED
/* entry not in use */
129 volatile uint_t nfree
; /* number of free entries */
130 struct l2t_entry
*rover
; /* starting point for next allocation */
131 struct l2t_entry l2tab
[];
134 #define VLAN_NONE 0xfff
135 #define SA(x) ((struct sockaddr *)(x))
136 #define SIN(x) ((struct sockaddr_in *)(x))
137 #define SINADDR(x) (SIN(x)->sin_addr.s_addr)
138 #define atomic_read(x) atomic_add_int_nv(x, 0)
140 #ifdef TCP_OFFLOAD_ENABLE
142 * Allocate a free L2T entry.
143 * Must be called with l2t_data.lockatomic_load_acq_int held.
145 static struct l2t_entry
*
146 alloc_l2e(struct l2t_data
*d
)
148 struct l2t_entry
*end
, *e
, **p
;
150 ASSERT(rw_write_held(&d
->lock
));
152 if (!atomic_read(&d
->nfree
))
155 /* there's definitely a free entry */
156 for (e
= d
->rover
, end
= &d
->l2tab
[d
->l2t_size
]; e
!= end
; ++e
)
157 if (atomic_read(&e
->refcnt
) == 0)
160 for (e
= d
->l2tab
; atomic_read(&e
->refcnt
); ++e
)
164 atomic_dec_uint(&d
->nfree
);
167 * The entry we found may be an inactive entry that is
168 * presently in the hash table. We need to remove it.
170 if (e
->state
< L2T_STATE_SWITCHING
) {
171 for (p
= &d
->l2tab
[e
->hash
].first
; *p
; p
= &(*p
)->next
) {
180 e
->state
= L2T_STATE_UNUSED
;
185 * Write an L2T entry. Must be called with the entry locked.
186 * The write may be synchronous or asynchronous.
189 write_l2e(adapter_t
*sc
, struct l2t_entry
*e
, int sync
)
192 struct cpl_l2t_write_req
*req
;
193 int idx
= e
->idx
+ sc
->vres
.l2t
.start
;
195 ASSERT(MUTEX_HELD(&e
->lock
));
197 if ((m
= allocb(sizeof (*req
), BPRI_HI
)) == NULL
)
200 /* LINTED: E_BAD_PTR_CAST_ALIGN */
201 req
= (struct cpl_l2t_write_req
*)m
->b_wptr
;
203 /* LINTED: E_CONSTANT_CONDITION */
205 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ
, idx
|
206 V_SYNC_WR(sync
) | V_TID_QID(sc
->sge
.fwq
.abs_id
)));
207 req
->params
= htons(V_L2T_W_PORT(e
->lport
) | V_L2T_W_NOREPLY(!sync
));
208 req
->l2t_idx
= htons(idx
);
209 req
->vlan
= htons(e
->vlan
);
210 (void) memcpy(req
->dst_mac
, e
->dmac
, sizeof (req
->dst_mac
));
212 m
->b_wptr
+= sizeof (*req
);
214 (void) t4_mgmt_tx(sc
, m
);
216 if (sync
&& e
->state
!= L2T_STATE_SWITCHING
)
217 e
->state
= L2T_STATE_SYNC_WRITE
;
224 t4_init_l2t(struct adapter
*sc
)
229 l2t_size
= sc
->vres
.l2t
.size
;
233 d
= kmem_zalloc(sizeof(*d
) + l2t_size
* sizeof (struct l2t_entry
), KM_SLEEP
);
237 d
->l2t_size
= l2t_size
;
240 (void) atomic_swap_uint(&d
->nfree
, l2t_size
);
241 rw_init(&d
->lock
, NULL
, RW_DRIVER
, NULL
);
243 for (i
= 0; i
< l2t_size
; i
++) {
244 /* LINTED: E_ASSIGN_NARROW_CONV */
246 d
->l2tab
[i
].state
= L2T_STATE_UNUSED
;
247 mutex_init(&d
->l2tab
[i
].lock
, NULL
, MUTEX_DRIVER
, NULL
);
248 (void) atomic_swap_uint(&d
->l2tab
[i
].refcnt
, 0);
251 #ifdef TCP_OFFLOAD_ENABLE
252 (void) t4_register_cpl_handler(sc
, CPL_L2T_WRITE_RPL
, do_l2t_write_rpl
);
259 t4_free_l2t(struct l2t_data
*d
)
263 for (i
= 0; i
< L2T_SIZE
; i
++)
264 mutex_destroy(&d
->l2tab
[i
].lock
);
265 rw_destroy(&d
->lock
);
266 kmem_free(d
, sizeof (*d
));
271 #ifdef TCP_OFFLOAD_ENABLE
273 l2t_hold(struct l2t_data
*d
, struct l2t_entry
*e
)
275 if (atomic_inc_uint_nv(&e
->refcnt
) == 1) /* 0 -> 1 transition */
276 atomic_dec_uint(&d
->nfree
);
280 * To avoid having to check address families we do not allow v4 and v6
281 * neighbors to be on the same hash chain. We keep v4 entries in the first
282 * half of available hash buckets and v6 in the second.
285 L2T_SZ_HALF
= L2T_SIZE
/ 2,
286 L2T_HASH_MASK
= L2T_SZ_HALF
- 1
289 static inline unsigned int
290 arp_hash(const uint32_t *key
, int ifindex
)
292 return (jhash_2words(*key
, ifindex
, 0) & L2T_HASH_MASK
);
295 static inline unsigned int
296 ipv6_hash(const uint32_t *key
, int ifindex
)
298 uint32_t xor = key
[0] ^ key
[1] ^ key
[2] ^ key
[3];
300 return (L2T_SZ_HALF
+ (jhash_2words(xor, ifindex
, 0) & L2T_HASH_MASK
));
303 static inline unsigned int
304 addr_hash(const uint32_t *addr
, int addr_len
, int ifindex
)
306 return (addr_len
== 4 ? arp_hash(addr
, ifindex
) :
307 ipv6_hash(addr
, ifindex
));
311 * Checks if an L2T entry is for the given IP/IPv6 address. It does not check
312 * whether the L2T entry and the address are of the same address family.
313 * Callers ensure an address is only checked against L2T entries of the same
314 * family, something made trivial by the separation of IP and IPv6 hash chains
315 * mentioned above. Returns 0 if there's a match,
318 addreq(const struct l2t_entry
*e
, const uint32_t *addr
)
321 return ((e
->addr
[0] ^ addr
[0]) | (e
->addr
[1] ^ addr
[1]) |
322 (e
->addr
[2] ^ addr
[2]) | (e
->addr
[3] ^ addr
[3]));
323 return (e
->addr
[0] ^ addr
[0]);
327 * Add a packet to an L2T entry's queue of packets awaiting resolution.
328 * Must be called with the entry's lock held.
331 arpq_enqueue(struct l2t_entry
*e
, mblk_t
*m
)
333 ASSERT(MUTEX_HELD(&e
->lock
));
335 ASSERT(m
->b_next
== NULL
);
336 if (e
->arpq_head
!= NULL
)
337 e
->arpq_tail
->b_next
= m
;
344 t4_l2t_send(struct adapter
*sc
, mblk_t
*m
, struct l2t_entry
*e
)
353 case L2T_STATE_STALE
: /* entry is stale, kick off revalidation */
356 case L2T_STATE_VALID
: /* fast-path, send the packet on */
357 (void) t4_wrq_tx(sc
, MBUF_EQ(m
), m
);
360 case L2T_STATE_RESOLVING
:
361 case L2T_STATE_SYNC_WRITE
:
362 mutex_enter(&e
->lock
);
363 if (e
->state
!= L2T_STATE_SYNC_WRITE
&&
364 e
->state
!= L2T_STATE_RESOLVING
) {
365 /* state changed by the time we got here */
366 mutex_exit(&e
->lock
);
370 mutex_exit(&e
->lock
);
372 bzero(&ip2m
, sizeof (ip2m
));
373 sin
= (sin_t
*)&ip2m
.ip2mac_pa
;
374 sin
->sin_family
= AF_INET
;
375 sin
->sin_addr
.s_addr
= e
->in_addr
;
376 ip2m
.ip2mac_ifindex
= e
->ifindex
;
378 if (e
->state
== L2T_STATE_RESOLVING
) {
379 (void) ip2mac(IP2MAC_RESOLVE
, &ip2m
, t4_l2t_update
, e
,
381 if (ip2m
.ip2mac_err
== EINPROGRESS
)
383 else if (ip2m
.ip2mac_err
== 0)
384 t4_l2t_update(&ip2m
, e
);
394 * Called when an L2T entry has no more users. The entry is left in the hash
395 * table since it is likely to be reused but we also bump nfree to indicate
396 * that the entry can be reallocated for a different neighbor. We also drop
397 * the existing neighbor reference in case the neighbor is going away and is
398 * waiting on our reference.
400 * Because entries can be reallocated to other neighbors once their ref count
401 * drops to 0 we need to take the entry's lock to avoid races with a new
405 t4_l2e_free(struct l2t_entry
*e
)
409 mutex_enter(&e
->lock
);
410 /* LINTED: E_NOP_IF_STMT */
411 if (atomic_read(&e
->refcnt
) == 0) { /* hasn't been recycled */
413 * Don't need to worry about the arpq, an L2T entry can't be
414 * released if any packets are waiting for resolution as we
415 * need to be able to communicate with the device to close a
419 mutex_exit(&e
->lock
);
421 d
= __containerof(e
, struct l2t_data
, l2tab
[e
->idx
]);
422 atomic_inc_uint(&d
->nfree
);
427 t4_l2t_release(struct l2t_entry
*e
)
429 if (atomic_dec_uint_nv(&e
->refcnt
) == 0)
435 do_l2t_write_rpl(struct sge_iq
*iq
, const struct rss_header
*rss
, mblk_t
*m
)
437 struct adapter
*sc
= iq
->adapter
;
438 const struct cpl_l2t_write_rpl
*rpl
= (const void *)(rss
+ 1);
439 unsigned int tid
= GET_TID(rpl
);
440 unsigned int idx
= tid
% L2T_SIZE
;
442 if (likely(rpl
->status
!= CPL_ERR_NONE
)) {
443 cxgb_printf(sc
->dip
, CE_WARN
,
444 "Unexpected L2T_WRITE_RPL status %u for entry %u",
453 * The TOE wants an L2 table entry that it can use to reach the next hop over
454 * the specified port. Produce such an entry - create one if needed.
456 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
457 * top of the real cxgbe interface.
460 t4_l2t_get(struct port_info
*pi
, conn_t
*connp
)
463 struct l2t_data
*d
= pi
->adapter
->l2t
;
468 connp
->conn_ixa
->ixa_ire
->ire_ill
->ill_phyint
->phyint_ifindex
;
469 unsigned int smt_idx
= pi
->port_id
;
470 addr
= (uint32_t *)&connp
->conn_faddr_v4
;
471 addr_len
= sizeof (connp
->conn_faddr_v4
);
473 hash
= addr_hash(addr
, addr_len
, index
);
475 rw_enter(&d
->lock
, RW_WRITER
);
476 for (e
= d
->l2tab
[hash
].first
; e
; e
= e
->next
) {
477 if (!addreq(e
, addr
) && e
->smt_idx
== smt_idx
) {
483 /* Need to allocate a new entry */
486 mutex_enter(&e
->lock
); /* avoid race with t4_l2t_free */
487 e
->state
= L2T_STATE_RESOLVING
;
488 (void) memcpy(e
->addr
, addr
, addr_len
);
489 e
->in_addr
= connp
->conn_faddr_v4
;
491 /* LINTED: E_ASSIGN_NARROW_CONV */
492 e
->smt_idx
= smt_idx
;
493 /* LINTED: E_ASSIGN_NARROW_CONV */
495 e
->lport
= pi
->lport
;
496 e
->arpq_head
= e
->arpq_tail
= NULL
;
497 e
->v6
= (addr_len
== 16);
499 (void) atomic_swap_uint(&e
->refcnt
, 1);
501 e
->next
= d
->l2tab
[hash
].first
;
502 d
->l2tab
[hash
].first
= e
;
503 mutex_exit(&e
->lock
);
514 * Called when the host's neighbor layer makes a change to some entry that is
515 * loaded into the HW L2 table.
518 t4_l2t_update(ip2mac_t
*ip2macp
, void *arg
)
520 struct l2t_entry
*e
= (struct l2t_entry
*)arg
;
521 struct adapter
*sc
= e
->sc
;
524 if (ip2macp
->ip2mac_err
!= 0) {
525 ASSERT(0); /* Don't know what to do. Needs to be investigated */
528 mutex_enter(&e
->lock
);
529 if (atomic_read(&e
->refcnt
) != 0)
531 e
->state
= L2T_STATE_STALE
;
532 mutex_exit(&e
->lock
);
534 /* The TOE has no interest in this LLE */
538 if (atomic_read(&e
->refcnt
) != 0) {
540 /* Entry is referenced by at least 1 offloaded connection. */
542 cp
= (uchar_t
*)LLADDR(&ip2macp
->ip2mac_ha
);
543 bcopy(cp
, e
->dmac
, 6);
544 (void) write_l2e(sc
, e
, 1);
545 e
->state
= L2T_STATE_VALID
;
548 mutex_exit(&e
->lock
);