4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
26 * MAC Services Module - misc utilities
29 #include <sys/types.h>
31 #include <sys/mac_impl.h>
32 #include <sys/mac_client_priv.h>
33 #include <sys/mac_client_impl.h>
34 #include <sys/mac_soft_ring.h>
35 #include <sys/strsubr.h>
36 #include <sys/strsun.h>
38 #include <sys/pattr.h>
39 #include <sys/pci_tools.h>
41 #include <inet/ip_impl.h>
43 #include <sys/vtrace.h>
45 #include <sys/sunndi.h>
46 #include <inet/ipsec_impl.h>
47 #include <inet/sadb.h>
48 #include <inet/ipsecesp.h>
49 #include <inet/ipsecah.h>
52 * Copy an mblk, preserving its hardware checksum flags.
55 mac_copymsg_cksum(mblk_t
*mp
)
58 uint32_t start
, stuff
, end
, value
, flags
;
64 hcksum_retrieve(mp
, NULL
, NULL
, &start
, &stuff
, &end
, &value
, &flags
);
65 (void) hcksum_assoc(mp1
, NULL
, NULL
, start
, stuff
, end
, value
,
72 * Copy an mblk chain, presenting the hardware checksum flags of the
76 mac_copymsgchain_cksum(mblk_t
*mp
)
81 for (; mp
!= NULL
; mp
= mp
->b_next
) {
82 if ((*nmpp
= mac_copymsg_cksum(mp
)) == NULL
) {
87 nmpp
= &((*nmpp
)->b_next
);
94 * Process the specified mblk chain for proper handling of hardware
95 * checksum offload. This routine is invoked for loopback traffic
96 * between MAC clients.
97 * The function handles a NULL mblk chain passed as argument.
100 mac_fix_cksum(mblk_t
*mp_chain
)
102 mblk_t
*mp
, *prev
= NULL
, *new_chain
= mp_chain
, *mp1
;
103 uint32_t flags
, start
, stuff
, end
, value
;
105 for (mp
= mp_chain
; mp
!= NULL
; prev
= mp
, mp
= mp
->b_next
) {
108 struct ether_header
*ehp
;
111 hcksum_retrieve(mp
, NULL
, NULL
, &start
, &stuff
, &end
, &value
,
117 * Since the processing of checksum offload for loopback
118 * traffic requires modification of the packet contents,
119 * ensure sure that we are always modifying our own copy.
121 if (DB_REF(mp
) > 1) {
125 mp1
->b_next
= mp
->b_next
;
136 * Ethernet, and optionally VLAN header.
138 /* LINTED: improper alignment cast */
139 ehp
= (struct ether_header
*)mp
->b_rptr
;
140 if (ntohs(ehp
->ether_type
) == VLAN_TPID
) {
141 struct ether_vlan_header
*evhp
;
143 ASSERT(MBLKL(mp
) >= sizeof (struct ether_vlan_header
));
144 /* LINTED: improper alignment cast */
145 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
146 sap
= ntohs(evhp
->ether_type
);
147 offset
= sizeof (struct ether_vlan_header
);
149 sap
= ntohs(ehp
->ether_type
);
150 offset
= sizeof (struct ether_header
);
153 if (MBLKL(mp
) <= offset
) {
155 if (mp
->b_cont
== NULL
) {
156 /* corrupted packet, skip it */
158 prev
->b_next
= mp
->b_next
;
160 new_chain
= mp
->b_next
;
170 if (flags
& (HCK_FULLCKSUM
| HCK_IPV4_HDRCKSUM
)) {
174 * In order to compute the full and header
175 * checksums, we need to find and parse
176 * the IP and/or ULP headers.
179 sap
= (sap
< ETHERTYPE_802_MIN
) ? 0 : sap
;
184 if (sap
!= ETHERTYPE_IP
)
187 ASSERT(MBLKL(mp
) >= offset
+ sizeof (ipha_t
));
188 /* LINTED: improper alignment cast */
189 ipha
= (ipha_t
*)(mp
->b_rptr
+ offset
);
191 if (flags
& HCK_FULLCKSUM
) {
198 * Pointer to checksum field in ULP header.
200 proto
= ipha
->ipha_protocol
;
201 ASSERT(ipha
->ipha_version_and_hdr_length
==
202 IP_SIMPLE_HDR_VERSION
);
206 /* LINTED: improper alignment cast */
207 up
= IPH_TCPH_CHECKSUMP(ipha
,
208 IP_SIMPLE_HDR_LENGTH
);
212 /* LINTED: improper alignment cast */
213 up
= IPH_UDPH_CHECKSUMP(ipha
,
214 IP_SIMPLE_HDR_LENGTH
);
218 cmn_err(CE_WARN
, "mac_fix_cksum: "
219 "unexpected protocol: %d", proto
);
224 * Pseudo-header checksum.
226 src
= ipha
->ipha_src
;
227 dst
= ipha
->ipha_dst
;
228 len
= ntohs(ipha
->ipha_length
) -
229 IP_SIMPLE_HDR_LENGTH
;
231 cksum
= (dst
>> 16) + (dst
& 0xFFFF) +
232 (src
>> 16) + (src
& 0xFFFF);
236 * The checksum value stored in the packet needs
237 * to be correct. Compute it here.
240 cksum
+= (((proto
) == IPPROTO_UDP
) ?
241 IP_UDP_CSUM_COMP
: IP_TCP_CSUM_COMP
);
242 cksum
= IP_CSUM(mp
, IP_SIMPLE_HDR_LENGTH
+
244 *(up
) = (uint16_t)(cksum
? cksum
: ~cksum
);
247 * Flag the packet so that it appears
248 * that the checksum has already been
249 * verified by the hardware.
251 flags
&= ~HCK_FULLCKSUM
;
252 flags
|= HCK_FULLCKSUM_OK
;
256 if (flags
& HCK_IPV4_HDRCKSUM
) {
257 ASSERT(ipha
!= NULL
);
258 ipha
->ipha_hdr_checksum
=
259 (uint16_t)ip_csum_hdr(ipha
);
260 flags
&= ~HCK_IPV4_HDRCKSUM
;
261 flags
|= HCK_IPV4_HDRCKSUM_OK
;
266 if (flags
& HCK_PARTIALCKSUM
) {
267 uint16_t *up
, partial
, cksum
;
268 uchar_t
*ipp
; /* ptr to beginning of IP header */
270 if (mp
->b_cont
!= NULL
) {
273 mp1
= msgpullup(mp
, offset
+ end
);
276 mp1
->b_next
= mp
->b_next
;
286 ipp
= mp
->b_rptr
+ offset
;
287 /* LINTED: cast may result in improper alignment */
288 up
= (uint16_t *)((uchar_t
*)ipp
+ stuff
);
292 cksum
= IP_BCSUM_PARTIAL(mp
->b_rptr
+ offset
+ start
,
293 end
- start
, partial
);
295 *up
= cksum
? cksum
: ~cksum
;
298 * Since we already computed the whole checksum,
299 * indicate to the stack that it has already
300 * been verified by the hardware.
302 flags
&= ~HCK_PARTIALCKSUM
;
303 flags
|= HCK_FULLCKSUM_OK
;
307 (void) hcksum_assoc(mp
, NULL
, NULL
, start
, stuff
, end
,
308 value
, flags
, KM_NOSLEEP
);
315 * Add VLAN tag to the specified mblk.
318 mac_add_vlan_tag(mblk_t
*mp
, uint_t pri
, uint16_t vid
)
321 struct ether_vlan_header
*evhp
;
322 struct ether_header
*ehp
;
323 uint32_t start
, stuff
, end
, value
, flags
;
325 ASSERT(pri
!= 0 || vid
!= 0);
328 * Allocate an mblk for the new tagged ethernet header,
329 * and copy the MAC addresses and ethertype from the
333 hmp
= allocb(sizeof (struct ether_vlan_header
), BPRI_MED
);
339 evhp
= (struct ether_vlan_header
*)hmp
->b_rptr
;
340 ehp
= (struct ether_header
*)mp
->b_rptr
;
342 bcopy(ehp
, evhp
, (ETHERADDRL
* 2));
343 evhp
->ether_type
= ehp
->ether_type
;
344 evhp
->ether_tpid
= htons(ETHERTYPE_VLAN
);
346 hmp
->b_wptr
+= sizeof (struct ether_vlan_header
);
347 mp
->b_rptr
+= sizeof (struct ether_header
);
350 * Free the original message if it's now empty. Link the
351 * rest of messages to the header message.
353 hcksum_retrieve(mp
, NULL
, NULL
, &start
, &stuff
, &end
, &value
, &flags
);
354 (void) hcksum_assoc(hmp
, NULL
, NULL
, start
, stuff
, end
, value
, flags
,
356 if (MBLKL(mp
) == 0) {
357 hmp
->b_cont
= mp
->b_cont
;
362 ASSERT(MBLKL(hmp
) >= sizeof (struct ether_vlan_header
));
365 * Initialize the new TCI (Tag Control Information).
367 evhp
->ether_tci
= htons(VLAN_TCI(pri
, 0, vid
));
373 * Adds a VLAN tag with the specified VID and priority to each mblk of
374 * the specified chain.
377 mac_add_vlan_tag_chain(mblk_t
*mp_chain
, uint_t pri
, uint16_t vid
)
379 mblk_t
*next_mp
, **prev
, *mp
;
385 next_mp
= mp
->b_next
;
387 if ((mp
= mac_add_vlan_tag(mp
, pri
, vid
)) == NULL
) {
388 freemsgchain(next_mp
);
393 mp
= mp
->b_next
= next_mp
;
403 mac_strip_vlan_tag(mblk_t
*mp
)
406 struct ether_vlan_header
*evhp
;
408 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
409 if (ntohs(evhp
->ether_tpid
) == ETHERTYPE_VLAN
) {
410 ASSERT(MBLKL(mp
) >= sizeof (struct ether_vlan_header
));
412 if (DB_REF(mp
) > 1) {
420 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
422 ovbcopy(mp
->b_rptr
, mp
->b_rptr
+ VLAN_TAGSZ
, 2 * ETHERADDRL
);
423 mp
->b_rptr
+= VLAN_TAGSZ
;
429 * Strip VLAN tag from each mblk of the chain.
432 mac_strip_vlan_tag_chain(mblk_t
*mp_chain
)
434 mblk_t
*mp
, *next_mp
, **prev
;
440 next_mp
= mp
->b_next
;
442 if ((mp
= mac_strip_vlan_tag(mp
)) == NULL
) {
443 freemsgchain(next_mp
);
448 mp
= mp
->b_next
= next_mp
;
455 * Default callback function. Used when the datapath is not yet initialized.
459 mac_pkt_drop(void *arg
, mac_resource_handle_t resource
, mblk_t
*mp
,
464 while (mp1
!= NULL
) {
473 * Determines the IPv6 header length accounting for all the optional IPv6
474 * headers (hop-by-hop, destination, routing and fragment). The header length
475 * and next header value (a transport header) is captured.
477 * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
481 mac_ip_hdr_length_v6(ip6_t
*ip6h
, uint8_t *endptr
, uint16_t *hdr_length
,
482 uint8_t *next_hdr
, ip6_frag_t
**fragp
)
492 if (((uchar_t
*)ip6h
+ IPV6_HDR_LEN
) > endptr
)
494 ASSERT(IPH_HDR_VERSION(ip6h
) == IPV6_VERSION
);
495 length
= IPV6_HDR_LEN
;
496 whereptr
= ((uint8_t *)&ip6h
[1]); /* point to next hdr */
501 nexthdrp
= &ip6h
->ip6_nxt
;
502 while (whereptr
< endptr
) {
503 /* Is there enough left for len + nexthdr? */
504 if (whereptr
+ MIN_EHDR_LEN
> endptr
)
508 case IPPROTO_HOPOPTS
:
509 case IPPROTO_DSTOPTS
:
510 /* Assumes the headers are identical for hbh and dst */
511 desthdr
= (ip6_dest_t
*)whereptr
;
512 ehdrlen
= 8 * (desthdr
->ip6d_len
+ 1);
513 if ((uchar_t
*)desthdr
+ ehdrlen
> endptr
)
515 nexthdrp
= &desthdr
->ip6d_nxt
;
517 case IPPROTO_ROUTING
:
518 rthdr
= (ip6_rthdr_t
*)whereptr
;
519 ehdrlen
= 8 * (rthdr
->ip6r_len
+ 1);
520 if ((uchar_t
*)rthdr
+ ehdrlen
> endptr
)
522 nexthdrp
= &rthdr
->ip6r_nxt
;
524 case IPPROTO_FRAGMENT
:
525 fraghdr
= (ip6_frag_t
*)whereptr
;
526 ehdrlen
= sizeof (ip6_frag_t
);
527 if ((uchar_t
*)&fraghdr
[1] > endptr
)
529 nexthdrp
= &fraghdr
->ip6f_nxt
;
534 /* No next header means we're finished */
536 *hdr_length
= length
;
537 *next_hdr
= *nexthdrp
;
542 *hdr_length
= length
;
543 *next_hdr
= *nexthdrp
;
546 case IPPROTO_HOPOPTS
:
547 case IPPROTO_DSTOPTS
:
548 case IPPROTO_ROUTING
:
549 case IPPROTO_FRAGMENT
:
551 * If any know extension headers are still to be processed,
552 * the packet's malformed (or at least all the IP header(s) are
553 * not in the same mblk - and that should never happen.
559 * If we get here, we know that all of the IP headers were in
560 * the same mblk, even if the ULP header is in the next mblk.
562 *hdr_length
= length
;
563 *next_hdr
= *nexthdrp
;
569 * The following set of routines are there to take care of interrupt
570 * re-targeting for legacy (fixed) interrupts. Some older versions
571 * of the popular NICs like e1000g do not support MSI-X interrupts
572 * and they reserve fixed interrupts for RX/TX rings. To re-target
573 * these interrupts, PCITOOL ioctls need to be used.
575 typedef struct mac_dladm_intr
{
578 char driver_path
[MAXPATHLEN
];
579 char nexus_path
[MAXPATHLEN
];
582 /* Bind the interrupt to cpu_num */
584 mac_set_intr(ldi_handle_t lh
, processorid_t cpu_num
, int oldcpuid
, int ino
)
586 pcitool_intr_set_t iset
;
589 iset
.old_cpu
= oldcpuid
;
591 iset
.cpu_id
= cpu_num
;
592 iset
.user_version
= PCITOOL_VERSION
;
593 err
= ldi_ioctl(lh
, PCITOOL_DEVICE_SET_INTR
, (intptr_t)&iset
, FKIOCTL
,
600 * Search interrupt information. iget is filled in with the info to search
603 mac_search_intrinfo(pcitool_intr_get_t
*iget_p
, mac_dladm_intr_t
*dln
)
606 char driver_path
[2 * MAXPATHLEN
];
608 for (i
= 0; i
< iget_p
->num_devs
; i
++) {
609 (void) strlcpy(driver_path
, iget_p
->dev
[i
].path
, MAXPATHLEN
);
610 (void) snprintf(&driver_path
[strlen(driver_path
)], MAXPATHLEN
,
611 ":%s%d", iget_p
->dev
[i
].driver_name
,
612 iget_p
->dev
[i
].dev_inst
);
613 /* Match the device path for the device path */
614 if (strcmp(driver_path
, dln
->driver_path
) == 0) {
615 dln
->ino
= iget_p
->ino
;
616 dln
->cpu_id
= iget_p
->cpu_id
;
624 * Get information about ino, i.e. if this is the interrupt for our
625 * device and where it is bound etc.
628 mac_get_single_intr(ldi_handle_t lh
, int oldcpuid
, int ino
,
629 mac_dladm_intr_t
*dln
)
631 pcitool_intr_get_t
*iget_p
;
638 * Check if SLEEP is OK, i.e if could come here in response to
639 * changing the fanout due to some callback from the driver, say
640 * link speed changes.
642 ipsz
= PCITOOL_IGET_SIZE(0);
643 iget_p
= kmem_zalloc(ipsz
, KM_SLEEP
);
645 iget_p
->num_devs_ret
= 0;
646 iget_p
->user_version
= PCITOOL_VERSION
;
647 iget_p
->cpu_id
= oldcpuid
;
650 err
= ldi_ioctl(lh
, PCITOOL_DEVICE_GET_INTR
, (intptr_t)iget_p
,
651 FKIOCTL
, kcred
, NULL
);
653 kmem_free(iget_p
, ipsz
);
656 if (iget_p
->num_devs
== 0) {
657 kmem_free(iget_p
, ipsz
);
660 inum
= iget_p
->num_devs
;
661 if (iget_p
->num_devs_ret
< iget_p
->num_devs
) {
663 nipsz
= PCITOOL_IGET_SIZE(iget_p
->num_devs
);
665 kmem_free(iget_p
, ipsz
);
667 iget_p
= kmem_zalloc(ipsz
, KM_SLEEP
);
669 iget_p
->num_devs_ret
= inum
;
670 iget_p
->cpu_id
= oldcpuid
;
672 iget_p
->user_version
= PCITOOL_VERSION
;
673 err
= ldi_ioctl(lh
, PCITOOL_DEVICE_GET_INTR
, (intptr_t)iget_p
,
674 FKIOCTL
, kcred
, NULL
);
676 kmem_free(iget_p
, ipsz
);
680 if (iget_p
->num_devs
!= iget_p
->num_devs_ret
) {
681 kmem_free(iget_p
, ipsz
);
686 if (mac_search_intrinfo(iget_p
, dln
)) {
687 kmem_free(iget_p
, ipsz
);
690 kmem_free(iget_p
, ipsz
);
695 * Get the interrupts and check each one to see if it is for our device.
698 mac_validate_intr(ldi_handle_t lh
, mac_dladm_intr_t
*dln
, processorid_t cpuid
)
700 pcitool_intr_info_t intr_info
;
705 err
= ldi_ioctl(lh
, PCITOOL_SYSTEM_INTR_INFO
, (intptr_t)&intr_info
,
706 FKIOCTL
, kcred
, NULL
);
710 for (oldcpuid
= 0; oldcpuid
< intr_info
.num_cpu
; oldcpuid
++) {
711 for (ino
= 0; ino
< intr_info
.num_intr
; ino
++) {
712 if (mac_get_single_intr(lh
, oldcpuid
, ino
, dln
)) {
713 if (dln
->cpu_id
== cpuid
)
723 * Obtain the nexus parent node info. for mdip.
726 mac_get_nexus_node(dev_info_t
*mdip
, mac_dladm_intr_t
*dln
)
728 struct dev_info
*tdip
= (struct dev_info
*)mdip
;
729 struct ddi_minor_data
*minordata
;
732 char pathname
[MAXPATHLEN
];
734 while (tdip
!= NULL
) {
736 * The netboot code could call this function while walking the
737 * device tree so we need to use ndi_devi_tryenter() here to
740 if (ndi_devi_tryenter((dev_info_t
*)tdip
, &circ
) == 0)
743 for (minordata
= tdip
->devi_minor
; minordata
!= NULL
;
744 minordata
= minordata
->next
) {
745 if (strncmp(minordata
->ddm_node_type
, DDI_NT_INTRCTL
,
746 strlen(DDI_NT_INTRCTL
)) == 0) {
747 pdip
= minordata
->dip
;
748 (void) ddi_pathname(pdip
, pathname
);
749 (void) snprintf(dln
->nexus_path
, MAXPATHLEN
,
750 "/devices%s:intr", pathname
);
751 (void) ddi_pathname_minor(minordata
, pathname
);
752 ndi_devi_exit((dev_info_t
*)tdip
, circ
);
756 ndi_devi_exit((dev_info_t
*)tdip
, circ
);
757 tdip
= tdip
->devi_parent
;
763 * For a primary MAC client, if the user has set a list or CPUs or
764 * we have obtained it implicitly, we try to retarget the interrupt
765 * for that device on one of the CPUs in the list.
766 * We assign the interrupt to the same CPU as the poll thread.
769 mac_check_interrupt_binding(dev_info_t
*mdip
, int32_t cpuid
)
771 ldi_handle_t lh
= NULL
;
772 ldi_ident_t li
= NULL
;
775 mac_dladm_intr_t dln
;
777 struct ddi_minor_data
*minordata
;
779 dln
.nexus_path
[0] = '\0';
780 dln
.driver_path
[0] = '\0';
782 minordata
= ((struct dev_info
*)mdip
)->devi_minor
;
783 while (minordata
!= NULL
) {
784 if (minordata
->type
== DDM_MINOR
)
786 minordata
= minordata
->next
;
788 if (minordata
== NULL
)
791 (void) ddi_pathname_minor(minordata
, dln
.driver_path
);
793 dip
= mac_get_nexus_node(mdip
, &dln
);
798 err
= ldi_ident_from_major(ddi_driver_major(dip
), &li
);
802 err
= ldi_open_by_name(dln
.nexus_path
, FREAD
|FWRITE
, kcred
, &lh
, li
);
806 ret
= mac_validate_intr(lh
, &dln
, cpuid
);
808 (void) ldi_close(lh
, FREAD
|FWRITE
, kcred
);
813 if ((err
= (mac_set_intr(lh
, cpuid
, dln
.cpu_id
, dln
.ino
)))
815 (void) ldi_close(lh
, FREAD
|FWRITE
, kcred
);
818 (void) ldi_close(lh
, FREAD
|FWRITE
, kcred
);
823 mac_client_set_intr_cpu(void *arg
, mac_client_handle_t mch
, int32_t cpuid
)
825 dev_info_t
*mdip
= (dev_info_t
*)arg
;
826 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
827 mac_resource_props_t
*mrp
;
828 mac_perim_handle_t mph
;
829 flow_entry_t
*flent
= mcip
->mci_flent
;
830 mac_soft_ring_set_t
*rx_srs
;
833 if (!mac_check_interrupt_binding(mdip
, cpuid
))
835 mac_perim_enter_by_mh((mac_handle_t
)mcip
->mci_mip
, &mph
);
836 mrp
= MCIP_RESOURCE_PROPS(mcip
);
837 mrp
->mrp_rx_intr_cpu
= cpuid
;
838 if (flent
!= NULL
&& flent
->fe_rx_srs_cnt
== 2) {
839 rx_srs
= flent
->fe_rx_srs
[1];
840 srs_cpu
= &rx_srs
->srs_cpu
;
841 srs_cpu
->mc_rx_intr_cpu
= cpuid
;
847 mac_client_intr_cpu(mac_client_handle_t mch
)
849 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
851 mac_soft_ring_set_t
*rx_srs
;
852 flow_entry_t
*flent
= mcip
->mci_flent
;
853 mac_resource_props_t
*mrp
= MCIP_RESOURCE_PROPS(mcip
);
858 * Check if we need to retarget the interrupt. We do this only
859 * for the primary MAC client. We do this if we have the only
860 * exclusive ring in the group.
862 if (mac_is_primary_client(mcip
) && flent
->fe_rx_srs_cnt
== 2) {
863 rx_srs
= flent
->fe_rx_srs
[1];
864 srs_cpu
= &rx_srs
->srs_cpu
;
865 ring
= rx_srs
->srs_ring
;
866 mintr
= &ring
->mr_info
.mri_intr
;
868 * If ddi_handle is present or the poll CPU is
869 * already bound to the interrupt CPU, return -1.
871 if (mintr
->mi_ddi_handle
!= NULL
||
872 ((mrp
->mrp_ncpus
!= 0) &&
873 (mrp
->mrp_rx_intr_cpu
== srs_cpu
->mc_rx_pollid
))) {
876 return (srs_cpu
->mc_rx_pollid
);
882 mac_get_devinfo(mac_handle_t mh
)
884 mac_impl_t
*mip
= (mac_impl_t
*)mh
;
886 return ((void *)mip
->mi_dip
);
889 #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
890 #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
891 #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
894 mac_pkt_hash(uint_t media
, mblk_t
*mp
, uint8_t policy
, boolean_t is_outbound
)
896 struct ether_header
*ehp
;
901 boolean_t ip_fragmented
;
904 * We may want to have one of these per MAC type plugin in the
905 * future. For now supports only ethernet.
907 if (media
!= DL_ETHER
)
910 /* for now we support only outbound packets */
912 ASSERT(IS_P2ALIGNED(mp
->b_rptr
, sizeof (uint16_t)));
913 ASSERT(MBLKL(mp
) >= sizeof (struct ether_header
));
915 /* compute L2 hash */
917 ehp
= (struct ether_header
*)mp
->b_rptr
;
919 if ((policy
& MAC_PKT_HASH_L2
) != 0) {
920 uchar_t
*mac_src
= ehp
->ether_shost
.ether_addr_octet
;
921 uchar_t
*mac_dst
= ehp
->ether_dhost
.ether_addr_octet
;
922 hash
= PKT_HASH_MAC(mac_src
) ^ PKT_HASH_MAC(mac_dst
);
923 policy
&= ~MAC_PKT_HASH_L2
;
929 /* skip ethernet header */
931 sap
= ntohs(ehp
->ether_type
);
932 if (sap
== ETHERTYPE_VLAN
) {
933 struct ether_vlan_header
*evhp
;
934 mblk_t
*newmp
= NULL
;
936 skip_len
= sizeof (struct ether_vlan_header
);
937 if (MBLKL(mp
) < skip_len
) {
938 /* the vlan tag is the payload, pull up first */
939 newmp
= msgpullup(mp
, -1);
940 if ((newmp
== NULL
) || (MBLKL(newmp
) < skip_len
)) {
943 evhp
= (struct ether_vlan_header
*)newmp
->b_rptr
;
945 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
948 sap
= ntohs(evhp
->ether_type
);
951 skip_len
= sizeof (struct ether_header
);
954 /* if ethernet header is in its own mblk, skip it */
955 if (MBLKL(mp
) <= skip_len
) {
956 skip_len
-= MBLKL(mp
);
962 sap
= (sap
< ETHERTYPE_802_MIN
) ? 0 : sap
;
964 /* compute IP src/dst addresses hash and skip IPv{4,6} header */
971 * If the header is not aligned or the header doesn't fit
972 * in the mblk, bail now. Note that this may cause packets
975 iphp
= (ipha_t
*)(mp
->b_rptr
+ skip_len
);
976 if (((unsigned char *)iphp
+ sizeof (ipha_t
) > mp
->b_wptr
) ||
977 !OK_32PTR((char *)iphp
))
980 proto
= iphp
->ipha_protocol
;
981 skip_len
+= IPH_HDR_LENGTH(iphp
);
983 /* Check if the packet is fragmented. */
984 ip_fragmented
= ntohs(iphp
->ipha_fragment_offset_and_flags
) &
988 * For fragmented packets, use addresses in addition to
989 * the frag_id to generate the hash inorder to get
990 * better distribution.
992 if (ip_fragmented
|| (policy
& MAC_PKT_HASH_L3
) != 0) {
993 uint8_t *ip_src
= (uint8_t *)&(iphp
->ipha_src
);
994 uint8_t *ip_dst
= (uint8_t *)&(iphp
->ipha_dst
);
996 hash
^= (PKT_HASH_4BYTES(ip_src
) ^
997 PKT_HASH_4BYTES(ip_dst
));
998 policy
&= ~MAC_PKT_HASH_L3
;
1001 if (ip_fragmented
) {
1002 uint8_t *identp
= (uint8_t *)&iphp
->ipha_ident
;
1003 hash
^= PKT_HASH_2BYTES(identp
);
1008 case ETHERTYPE_IPV6
: {
1010 ip6_frag_t
*frag
= NULL
;
1011 uint16_t hdr_length
;
1014 * If the header is not aligned or the header doesn't fit
1015 * in the mblk, bail now. Note that this may cause packets
1019 ip6hp
= (ip6_t
*)(mp
->b_rptr
+ skip_len
);
1020 if (((unsigned char *)ip6hp
+ IPV6_HDR_LEN
> mp
->b_wptr
) ||
1021 !OK_32PTR((char *)ip6hp
))
1024 if (!mac_ip_hdr_length_v6(ip6hp
, mp
->b_wptr
, &hdr_length
,
1027 skip_len
+= hdr_length
;
1030 * For fragmented packets, use addresses in addition to
1031 * the frag_id to generate the hash inorder to get
1032 * better distribution.
1034 if (frag
!= NULL
|| (policy
& MAC_PKT_HASH_L3
) != 0) {
1035 uint8_t *ip_src
= &(ip6hp
->ip6_src
.s6_addr8
[12]);
1036 uint8_t *ip_dst
= &(ip6hp
->ip6_dst
.s6_addr8
[12]);
1038 hash
^= (PKT_HASH_4BYTES(ip_src
) ^
1039 PKT_HASH_4BYTES(ip_dst
));
1040 policy
&= ~MAC_PKT_HASH_L3
;
1044 uint8_t *identp
= (uint8_t *)&frag
->ip6f_ident
;
1045 hash
^= PKT_HASH_4BYTES(identp
);
1057 /* if ip header is in its own mblk, skip it */
1058 if (MBLKL(mp
) <= skip_len
) {
1059 skip_len
-= MBLKL(mp
);
1065 /* parse ULP header */
1073 * These Internet Protocols are intentionally designed
1074 * for hashing from the git-go. Port numbers are in the first
1075 * word for transports, SPI is first for ESP.
1077 if (mp
->b_rptr
+ skip_len
+ 4 > mp
->b_wptr
)
1079 hash
^= PKT_HASH_4BYTES((mp
->b_rptr
+ skip_len
));
1083 ah_t
*ah
= (ah_t
*)(mp
->b_rptr
+ skip_len
);
1084 uint_t ah_length
= AH_TOTAL_LEN(ah
);
1086 if ((unsigned char *)ah
+ sizeof (ah_t
) > mp
->b_wptr
)
1089 proto
= ah
->ah_nexthdr
;
1090 skip_len
+= ah_length
;
1092 /* if AH header is in its own mblk, skip it */
1093 if (MBLKL(mp
) <= skip_len
) {
1094 skip_len
-= MBLKL(mp
);