2 * Copyright(c) 2017 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 * This file contains OPA VNIC encapsulation/decapsulation function.
52 #include <linux/if_ether.h>
53 #include <linux/if_vlan.h>
55 #include "opa_vnic_internal.h"
57 /* OPA 16B Header fields */
58 #define OPA_16B_LID_MASK 0xFFFFFull
59 #define OPA_16B_SLID_HIGH_SHFT 8
60 #define OPA_16B_SLID_MASK 0xF00ull
61 #define OPA_16B_DLID_MASK 0xF000ull
62 #define OPA_16B_DLID_HIGH_SHFT 12
63 #define OPA_16B_LEN_SHFT 20
64 #define OPA_16B_SC_SHFT 20
65 #define OPA_16B_RC_SHFT 25
66 #define OPA_16B_PKEY_SHFT 16
68 #define OPA_VNIC_L4_HDR_SHFT 16
70 /* L2+L4 hdr len is 20 bytes (5 quad words) */
71 #define OPA_VNIC_HDR_QW_LEN 5
73 static inline void opa_vnic_make_header(u8
*hdr
, u32 slid
, u32 dlid
, u16 len
,
74 u16 pkey
, u16 entropy
, u8 sc
, u8 rc
,
75 u8 l4_type
, u16 l4_hdr
)
77 /* h[1]: LT=1, 16B L2=10 */
78 u32 h
[OPA_VNIC_HDR_QW_LEN
] = {0, 0xc0000000, 0, 0, 0};
82 h
[4] = l4_hdr
<< OPA_VNIC_L4_HDR_SHFT
;
84 /* Extract and set 4 upper bits and 20 lower bits of the lids */
85 h
[0] |= (slid
& OPA_16B_LID_MASK
);
86 h
[2] |= ((slid
>> (20 - OPA_16B_SLID_HIGH_SHFT
)) & OPA_16B_SLID_MASK
);
88 h
[1] |= (dlid
& OPA_16B_LID_MASK
);
89 h
[2] |= ((dlid
>> (20 - OPA_16B_DLID_HIGH_SHFT
)) & OPA_16B_DLID_MASK
);
91 h
[0] |= (len
<< OPA_16B_LEN_SHFT
);
92 h
[1] |= (rc
<< OPA_16B_RC_SHFT
);
93 h
[1] |= (sc
<< OPA_16B_SC_SHFT
);
94 h
[2] |= ((u32
)pkey
<< OPA_16B_PKEY_SHFT
);
96 memcpy(hdr
, h
, OPA_VNIC_HDR_LEN
);
100 * Using a simple hash table for mac table implementation with the last octet
101 * of mac address as a key.
103 static void opa_vnic_free_mac_tbl(struct hlist_head
*mactbl
)
105 struct opa_vnic_mac_tbl_node
*node
;
106 struct hlist_node
*tmp
;
112 vnic_hash_for_each_safe(mactbl
, bkt
, tmp
, node
, hlist
) {
113 hash_del(&node
->hlist
);
119 static struct hlist_head
*opa_vnic_alloc_mac_tbl(void)
121 u32 size
= sizeof(struct hlist_head
) * OPA_VNIC_MAC_TBL_SIZE
;
122 struct hlist_head
*mactbl
;
124 mactbl
= kzalloc(size
, GFP_KERNEL
);
126 return ERR_PTR(-ENOMEM
);
128 vnic_hash_init(mactbl
);
132 /* opa_vnic_release_mac_tbl - empty and free the mac table */
133 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter
*adapter
)
135 struct hlist_head
*mactbl
;
137 mutex_lock(&adapter
->mactbl_lock
);
138 mactbl
= rcu_access_pointer(adapter
->mactbl
);
139 rcu_assign_pointer(adapter
->mactbl
, NULL
);
141 opa_vnic_free_mac_tbl(mactbl
);
142 adapter
->info
.vport
.mac_tbl_digest
= 0;
143 mutex_unlock(&adapter
->mactbl_lock
);
147 * opa_vnic_query_mac_tbl - query the mac table for a section
149 * This function implements query of specific function of the mac table.
150 * The function also expects the requested range to be valid.
152 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter
*adapter
,
153 struct opa_veswport_mactable
*tbl
)
155 struct opa_vnic_mac_tbl_node
*node
;
156 struct hlist_head
*mactbl
;
158 u16 loffset
, lnum_entries
;
161 mactbl
= rcu_dereference(adapter
->mactbl
);
165 loffset
= be16_to_cpu(tbl
->offset
);
166 lnum_entries
= be16_to_cpu(tbl
->num_entries
);
168 vnic_hash_for_each(mactbl
, bkt
, node
, hlist
) {
169 struct __opa_vnic_mactable_entry
*nentry
= &node
->entry
;
170 struct opa_veswport_mactable_entry
*entry
;
172 if ((node
->index
< loffset
) ||
173 (node
->index
>= (loffset
+ lnum_entries
)))
176 /* populate entry in the tbl corresponding to the index */
177 entry
= &tbl
->tbl_entries
[node
->index
- loffset
];
178 memcpy(entry
->mac_addr
, nentry
->mac_addr
,
179 ARRAY_SIZE(entry
->mac_addr
));
180 memcpy(entry
->mac_addr_mask
, nentry
->mac_addr_mask
,
181 ARRAY_SIZE(entry
->mac_addr_mask
));
182 entry
->dlid_sd
= cpu_to_be32(nentry
->dlid_sd
);
184 tbl
->mac_tbl_digest
= cpu_to_be32(adapter
->info
.vport
.mac_tbl_digest
);
190 * opa_vnic_update_mac_tbl - update mac table section
192 * This function updates the specified section of the mac table.
193 * The procedure includes following steps.
194 * - Allocate a new mac (hash) table.
195 * - Add the specified entries to the new table.
196 * (except the ones that are requested to be deleted).
197 * - Add all the other entries from the old mac table.
198 * - If there is a failure, free the new table and return.
199 * - Switch to the new table.
200 * - Free the old table and return.
202 * The function also expects the requested range to be valid.
204 int opa_vnic_update_mac_tbl(struct opa_vnic_adapter
*adapter
,
205 struct opa_veswport_mactable
*tbl
)
207 struct opa_vnic_mac_tbl_node
*node
, *new_node
;
208 struct hlist_head
*new_mactbl
, *old_mactbl
;
211 u16 loffset
, lnum_entries
;
213 mutex_lock(&adapter
->mactbl_lock
);
214 /* allocate new mac table */
215 new_mactbl
= opa_vnic_alloc_mac_tbl();
216 if (IS_ERR(new_mactbl
)) {
217 mutex_unlock(&adapter
->mactbl_lock
);
218 return PTR_ERR(new_mactbl
);
221 loffset
= be16_to_cpu(tbl
->offset
);
222 lnum_entries
= be16_to_cpu(tbl
->num_entries
);
224 /* add updated entries to the new mac table */
225 for (i
= 0; i
< lnum_entries
; i
++) {
226 struct __opa_vnic_mactable_entry
*nentry
;
227 struct opa_veswport_mactable_entry
*entry
=
228 &tbl
->tbl_entries
[i
];
229 u8
*mac_addr
= entry
->mac_addr
;
230 u8 empty_mac
[ETH_ALEN
] = { 0 };
232 v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
233 loffset
+ i
, mac_addr
[0], mac_addr
[1], mac_addr
[2],
234 mac_addr
[3], mac_addr
[4], mac_addr
[5],
237 /* if the entry is being removed, do not add it */
238 if (!memcmp(mac_addr
, empty_mac
, ARRAY_SIZE(empty_mac
)))
241 node
= kzalloc(sizeof(*node
), GFP_KERNEL
);
247 node
->index
= loffset
+ i
;
248 nentry
= &node
->entry
;
249 memcpy(nentry
->mac_addr
, entry
->mac_addr
,
250 ARRAY_SIZE(nentry
->mac_addr
));
251 memcpy(nentry
->mac_addr_mask
, entry
->mac_addr_mask
,
252 ARRAY_SIZE(nentry
->mac_addr_mask
));
253 nentry
->dlid_sd
= be32_to_cpu(entry
->dlid_sd
);
254 key
= node
->entry
.mac_addr
[OPA_VNIC_MAC_HASH_IDX
];
255 vnic_hash_add(new_mactbl
, &node
->hlist
, key
);
258 /* add other entries from current mac table to new mac table */
259 old_mactbl
= rcu_access_pointer(adapter
->mactbl
);
263 vnic_hash_for_each(old_mactbl
, bkt
, node
, hlist
) {
264 if ((node
->index
>= loffset
) &&
265 (node
->index
< (loffset
+ lnum_entries
)))
268 new_node
= kzalloc(sizeof(*new_node
), GFP_KERNEL
);
274 new_node
->index
= node
->index
;
275 memcpy(&new_node
->entry
, &node
->entry
, sizeof(node
->entry
));
276 key
= new_node
->entry
.mac_addr
[OPA_VNIC_MAC_HASH_IDX
];
277 vnic_hash_add(new_mactbl
, &new_node
->hlist
, key
);
281 /* switch to new table */
282 rcu_assign_pointer(adapter
->mactbl
, new_mactbl
);
285 adapter
->info
.vport
.mac_tbl_digest
= be32_to_cpu(tbl
->mac_tbl_digest
);
287 /* upon failure, free the new table; otherwise, free the old table */
289 opa_vnic_free_mac_tbl(new_mactbl
);
291 opa_vnic_free_mac_tbl(old_mactbl
);
293 mutex_unlock(&adapter
->mactbl_lock
);
297 /* opa_vnic_chk_mac_tbl - check mac table for dlid */
298 static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter
*adapter
,
299 struct ethhdr
*mac_hdr
)
301 struct opa_vnic_mac_tbl_node
*node
;
302 struct hlist_head
*mactbl
;
307 mactbl
= rcu_dereference(adapter
->mactbl
);
308 if (unlikely(!mactbl
))
311 key
= mac_hdr
->h_dest
[OPA_VNIC_MAC_HASH_IDX
];
312 vnic_hash_for_each_possible(mactbl
, node
, hlist
, key
) {
313 struct __opa_vnic_mactable_entry
*entry
= &node
->entry
;
315 /* if related to source mac, skip */
316 if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry
->dlid_sd
)))
319 if (!memcmp(node
->entry
.mac_addr
, mac_hdr
->h_dest
,
320 ARRAY_SIZE(node
->entry
.mac_addr
))) {
321 /* mac address found */
322 dlid
= OPA_VNIC_DLID_SD_GET_DLID(node
->entry
.dlid_sd
);
332 /* opa_vnic_get_dlid - find and return the DLID */
333 static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter
*adapter
,
334 struct sk_buff
*skb
, u8 def_port
)
336 struct __opa_veswport_info
*info
= &adapter
->info
;
337 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb_mac_header(skb
);
340 dlid
= opa_vnic_chk_mac_tbl(adapter
, mac_hdr
);
344 if (is_multicast_ether_addr(mac_hdr
->h_dest
)) {
345 dlid
= info
->vesw
.u_mcast_dlid
;
347 if (is_local_ether_addr(mac_hdr
->h_dest
)) {
348 dlid
= ((uint32_t)mac_hdr
->h_dest
[5] << 16) |
349 ((uint32_t)mac_hdr
->h_dest
[4] << 8) |
352 v_warn("Null dlid in MAC address\n");
353 } else if (def_port
!= OPA_VNIC_INVALID_PORT
) {
354 if (def_port
< OPA_VESW_MAX_NUM_DEF_PORT
)
355 dlid
= info
->vesw
.u_ucast_dlid
[def_port
];
362 /* opa_vnic_get_sc - return the service class */
363 static u8
opa_vnic_get_sc(struct __opa_veswport_info
*info
,
366 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb_mac_header(skb
);
370 if (!__vlan_get_tag(skb
, &vlan_tci
)) {
371 u8 pcp
= OPA_VNIC_VLAN_PCP(vlan_tci
);
373 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
374 sc
= info
->vport
.pcp_to_sc_mc
[pcp
];
376 sc
= info
->vport
.pcp_to_sc_uc
[pcp
];
378 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
379 sc
= info
->vport
.non_vlan_sc_mc
;
381 sc
= info
->vport
.non_vlan_sc_uc
;
387 u8
opa_vnic_get_vl(struct opa_vnic_adapter
*adapter
, struct sk_buff
*skb
)
389 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb_mac_header(skb
);
390 struct __opa_veswport_info
*info
= &adapter
->info
;
393 if (skb_vlan_tag_present(skb
)) {
394 u8 pcp
= skb_vlan_tag_get(skb
) >> VLAN_PRIO_SHIFT
;
396 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
397 vl
= info
->vport
.pcp_to_vl_mc
[pcp
];
399 vl
= info
->vport
.pcp_to_vl_uc
[pcp
];
401 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
402 vl
= info
->vport
.non_vlan_vl_mc
;
404 vl
= info
->vport
.non_vlan_vl_uc
;
410 /* opa_vnic_get_rc - return the routing control */
411 static u8
opa_vnic_get_rc(struct __opa_veswport_info
*info
,
416 switch (vlan_get_protocol(skb
)) {
417 case htons(ETH_P_IPV6
):
418 proto
= ipv6_hdr(skb
)->nexthdr
;
419 if (proto
== IPPROTO_TCP
)
420 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
,
422 else if (proto
== IPPROTO_UDP
)
423 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
,
426 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
, IPV6
);
428 case htons(ETH_P_IP
):
429 proto
= ip_hdr(skb
)->protocol
;
430 if (proto
== IPPROTO_TCP
)
431 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
,
433 else if (proto
== IPPROTO_UDP
)
434 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
,
437 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
, IPV4
);
440 rout_ctrl
= OPA_VNIC_ENCAP_RC_EXT(info
->vesw
.rc
, DEFAULT
);
446 /* opa_vnic_calc_entropy - calculate the packet entropy */
447 u8
opa_vnic_calc_entropy(struct sk_buff
*skb
)
449 u32 hash
= skb_get_hash(skb
);
451 /* store XOR of all bytes in lower 8 bits */
455 /* return lower 8 bits as entropy */
456 return (u8
)(hash
& 0xFF);
459 /* opa_vnic_get_def_port - get default port based on entropy */
460 static inline u8
opa_vnic_get_def_port(struct opa_vnic_adapter
*adapter
,
465 /* Add the upper and lower 4-bits of entropy to get the flow id */
466 flow_id
= ((entropy
& 0xf) + (entropy
>> 4));
467 return adapter
->flow_tbl
[flow_id
& (OPA_VNIC_FLOW_TBL_SIZE
- 1)];
470 /* Calculate packet length including OPA header, crc and padding */
471 static inline int opa_vnic_wire_length(struct sk_buff
*skb
)
475 /* padding for 8 bytes size alignment */
476 pad_len
= -(skb
->len
+ OPA_VNIC_ICRC_TAIL_LEN
) & 0x7;
477 pad_len
+= OPA_VNIC_ICRC_TAIL_LEN
;
479 return (skb
->len
+ pad_len
) >> 3;
482 /* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
483 void opa_vnic_encap_skb(struct opa_vnic_adapter
*adapter
, struct sk_buff
*skb
)
485 struct __opa_veswport_info
*info
= &adapter
->info
;
486 struct opa_vnic_skb_mdata
*mdata
;
487 u8 def_port
, sc
, rc
, entropy
, *hdr
;
491 hdr
= skb_push(skb
, OPA_VNIC_HDR_LEN
);
493 entropy
= opa_vnic_calc_entropy(skb
);
494 def_port
= opa_vnic_get_def_port(adapter
, entropy
);
495 len
= opa_vnic_wire_length(skb
);
496 dlid
= opa_vnic_get_dlid(adapter
, skb
, def_port
);
497 sc
= opa_vnic_get_sc(info
, skb
);
498 rc
= opa_vnic_get_rc(info
, skb
);
499 l4_hdr
= info
->vesw
.vesw_id
;
501 mdata
= skb_push(skb
, sizeof(*mdata
));
502 mdata
->vl
= opa_vnic_get_vl(adapter
, skb
);
503 mdata
->entropy
= entropy
;
505 if (unlikely(!dlid
)) {
506 mdata
->flags
= OPA_VNIC_SKB_MDATA_ENCAP_ERR
;
510 opa_vnic_make_header(hdr
, info
->vport
.encap_slid
, dlid
, len
,
511 info
->vesw
.pkey
, entropy
, sc
, rc
,
512 OPA_VNIC_L4_ETHR
, l4_hdr
);