4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/strsun.h>
30 #include <sys/mac_impl.h>
31 #include <sys/mac_client_impl.h>
32 #include <sys/mac_stat.h>
34 #include <sys/dls_impl.h>
35 #include <sys/mac_soft_ring.h>
36 #include <sys/ethernet.h>
37 #include <sys/cpupart.h>
39 #include <sys/pool_pset.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <netinet/sctp.h>
47 typedef struct flow_stats_s
{
57 /* global flow table, will be a per exclusive-zone table later */
58 static mod_hash_t
*flow_hash
;
59 static krwlock_t flow_tab_lock
;
61 static kmem_cache_t
*flow_cache
;
62 static kmem_cache_t
*flow_tab_cache
;
63 static flow_ops_t flow_l2_ops
;
70 #define FS_OFF(f) (offsetof(flow_stats_t, f))
71 static flow_stats_info_t flow_stats_list
[] = {
72 {"rbytes", FS_OFF(fs_ibytes
)},
73 {"ipackets", FS_OFF(fs_ipackets
)},
74 {"ierrors", FS_OFF(fs_ierrors
)},
75 {"obytes", FS_OFF(fs_obytes
)},
76 {"opackets", FS_OFF(fs_opackets
)},
77 {"oerrors", FS_OFF(fs_oerrors
)}
79 #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
82 * Checks whether a flow mask is legal.
84 static flow_tab_info_t
*mac_flow_tab_info_get(flow_mask_t
);
87 flow_stat_init(kstat_named_t
*knp
)
91 for (i
= 0; i
< FS_SIZE
; i
++, knp
++) {
92 kstat_named_init(knp
, flow_stats_list
[i
].fs_name
,
98 flow_stat_update(kstat_t
*ksp
, int rw
)
100 flow_entry_t
*fep
= ksp
->ks_private
;
101 kstat_named_t
*knp
= ksp
->ks_data
;
104 mac_rx_stats_t
*mac_rx_stat
;
105 mac_tx_stats_t
*mac_tx_stat
;
106 flow_stats_t flow_stats
;
107 mac_soft_ring_set_t
*mac_srs
;
109 if (rw
!= KSTAT_READ
)
112 bzero(&flow_stats
, sizeof (flow_stats_t
));
114 for (i
= 0; i
< fep
->fe_rx_srs_cnt
; i
++) {
115 mac_srs
= (mac_soft_ring_set_t
*)fep
->fe_rx_srs
[i
];
116 if (mac_srs
== NULL
) /* Multicast flow */
118 mac_rx_stat
= &mac_srs
->srs_rx
.sr_stat
;
120 flow_stats
.fs_ibytes
+= mac_rx_stat
->mrs_intrbytes
+
121 mac_rx_stat
->mrs_pollbytes
+ mac_rx_stat
->mrs_lclbytes
;
123 flow_stats
.fs_ipackets
+= mac_rx_stat
->mrs_intrcnt
+
124 mac_rx_stat
->mrs_pollcnt
+ mac_rx_stat
->mrs_lclcnt
;
126 flow_stats
.fs_ierrors
+= mac_rx_stat
->mrs_ierrors
;
129 mac_srs
= (mac_soft_ring_set_t
*)fep
->fe_tx_srs
;
130 if (mac_srs
== NULL
) /* Multicast flow */
132 mac_tx_stat
= &mac_srs
->srs_tx
.st_stat
;
134 flow_stats
.fs_obytes
= mac_tx_stat
->mts_obytes
;
135 flow_stats
.fs_opackets
= mac_tx_stat
->mts_opackets
;
136 flow_stats
.fs_oerrors
= mac_tx_stat
->mts_oerrors
;
139 for (i
= 0; i
< FS_SIZE
; i
++, knp
++) {
141 ((uchar_t
*)&flow_stats
+ flow_stats_list
[i
].fs_offset
);
142 knp
->value
.ui64
= *statp
;
148 flow_stat_create(flow_entry_t
*fep
)
152 uint_t nstats
= FS_SIZE
;
155 * Fow now, flow entries are only manipulated and visible from the
158 ksp
= kstat_create_zone("unix", 0, (char *)fep
->fe_flow_name
, "flow",
159 KSTAT_TYPE_NAMED
, nstats
, 0, GLOBAL_ZONEID
);
163 ksp
->ks_update
= flow_stat_update
;
164 ksp
->ks_private
= fep
;
167 knp
= (kstat_named_t
*)ksp
->ks_data
;
173 flow_stat_destroy(flow_entry_t
*fep
)
175 if (fep
->fe_ksp
!= NULL
) {
176 kstat_delete(fep
->fe_ksp
);
182 * Initialize the flow table
187 flow_cache
= kmem_cache_create("flow_entry_cache",
188 sizeof (flow_entry_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
189 flow_tab_cache
= kmem_cache_create("flow_tab_cache",
190 sizeof (flow_tab_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
191 flow_hash
= mod_hash_create_extended("flow_hash",
192 100, mod_hash_null_keydtor
, mod_hash_null_valdtor
,
193 mod_hash_bystr
, NULL
, mod_hash_strkey_cmp
, KM_SLEEP
);
194 rw_init(&flow_tab_lock
, NULL
, RW_DEFAULT
, NULL
);
198 * Cleanup and release the flow table
203 kmem_cache_destroy(flow_cache
);
204 kmem_cache_destroy(flow_tab_cache
);
205 mod_hash_destroy_hash(flow_hash
);
206 rw_destroy(&flow_tab_lock
);
210 * mac_create_flow(): create a flow_entry_t.
213 mac_flow_create(flow_desc_t
*fd
, mac_resource_props_t
*mrp
, char *name
,
214 void *client_cookie
, uint_t type
, flow_entry_t
**flentp
)
216 flow_entry_t
*flent
= *flentp
;
220 err
= mac_validate_props(NULL
, mrp
);
226 flent
= kmem_cache_alloc(flow_cache
, KM_SLEEP
);
227 bzero(flent
, sizeof (*flent
));
228 mutex_init(&flent
->fe_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
229 cv_init(&flent
->fe_cv
, NULL
, CV_DEFAULT
, NULL
);
231 /* Initialize the receiver function to a safe routine */
232 flent
->fe_cb_fn
= (flow_fn_t
)mac_pkt_drop
;
233 flent
->fe_index
= -1;
235 (void) strlcpy(flent
->fe_flow_name
, name
, MAXFLOWNAMELEN
);
237 /* This is an initial flow, will be configured later */
243 flent
->fe_client_cookie
= client_cookie
;
244 flent
->fe_type
= type
;
247 bcopy(fd
, &flent
->fe_flow_desc
, sizeof (*fd
));
251 * We have already set fe_resource_props for a Link.
253 if (type
& FLOW_USER
) {
254 bcopy(mrp
, &flent
->fe_resource_props
,
255 sizeof (mac_resource_props_t
));
258 * The effective resource list should reflect the priority
259 * that we set implicitly.
261 if (!(mrp
->mrp_mask
& MRP_PRIORITY
))
262 mrp
->mrp_mask
|= MRP_PRIORITY
;
263 if (type
& FLOW_USER
)
264 mrp
->mrp_priority
= MPL_SUBFLOW_DEFAULT
;
266 mrp
->mrp_priority
= MPL_LINK_DEFAULT
;
267 bzero(mrp
->mrp_pool
, MAXPATHLEN
);
268 bzero(&mrp
->mrp_cpus
, sizeof (mac_cpus_t
));
269 bcopy(mrp
, &flent
->fe_effective_props
,
270 sizeof (mac_resource_props_t
));
272 flow_stat_create(flent
);
279 * Validate flow entry and add it to a flow table.
282 mac_flow_add(flow_tab_t
*ft
, flow_entry_t
*flent
)
284 flow_entry_t
**headp
, **p
;
285 flow_ops_t
*ops
= &ft
->ft_ops
;
290 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
293 * Check for invalid bits in mask.
295 mask
= flent
->fe_flow_desc
.fd_mask
;
296 if ((mask
& ft
->ft_mask
) == 0 || (mask
& ~ft
->ft_mask
) != 0)
302 if ((err
= ops
->fo_accept_fe(ft
, flent
)) != 0) {
303 DTRACE_PROBE3(accept_failed
, flow_tab_t
*, ft
,
304 flow_entry_t
*, flent
, int, err
);
309 * Flent is valid. now calculate hash and insert it
312 index
= ops
->fo_hash_fe(ft
, flent
);
315 * We do not need a lock up until now because we were
316 * not accessing the flow table.
318 rw_enter(&ft
->ft_lock
, RW_WRITER
);
319 headp
= &ft
->ft_table
[index
];
322 * Check for duplicate flow.
324 for (p
= headp
; *p
!= NULL
; p
= &(*p
)->fe_next
) {
325 if ((*p
)->fe_flow_desc
.fd_mask
!=
326 flent
->fe_flow_desc
.fd_mask
)
329 if (ft
->ft_ops
.fo_match_fe(ft
, *p
, flent
)) {
330 rw_exit(&ft
->ft_lock
);
331 DTRACE_PROBE3(dup_flow
, flow_tab_t
*, ft
,
332 flow_entry_t
*, flent
, int, err
);
338 * Insert flow to hash list.
340 err
= ops
->fo_insert_fe(ft
, headp
, flent
);
342 rw_exit(&ft
->ft_lock
);
343 DTRACE_PROBE3(insert_failed
, flow_tab_t
*, ft
,
344 flow_entry_t
*, flent
, int, err
);
349 * Save the hash index so it can be used by mac_flow_remove().
351 flent
->fe_index
= (int)index
;
354 * Save the flow tab back reference.
356 flent
->fe_flow_tab
= ft
;
357 FLOW_MARK(flent
, FE_FLOW_TAB
);
359 rw_exit(&ft
->ft_lock
);
364 * Remove a flow from a mac client's subflow table
367 mac_flow_rem_subflow(flow_entry_t
*flent
)
369 flow_tab_t
*ft
= flent
->fe_flow_tab
;
370 mac_client_impl_t
*mcip
= ft
->ft_mcip
;
371 mac_handle_t mh
= (mac_handle_t
)ft
->ft_mip
;
373 ASSERT(MAC_PERIM_HELD(mh
));
375 mac_flow_remove(ft
, flent
, B_FALSE
);
376 if (flent
->fe_mcip
== NULL
) {
378 * The interface is not yet plumbed and mac_client_flow_add
381 if (FLOW_TAB_EMPTY(ft
)) {
382 mac_flow_tab_destroy(ft
);
383 mcip
->mci_subflow_tab
= NULL
;
386 mac_flow_wait(flent
, FLOW_DRIVER_UPCALL
);
387 mac_link_flow_clean((mac_client_handle_t
)mcip
, flent
);
389 mac_fastpath_enable(mh
);
393 * Add a flow to a mac client's subflow table and instantiate the flow
394 * in the mac by creating the associated SRSs etc.
397 mac_flow_add_subflow(mac_client_handle_t mch
, flow_entry_t
*flent
,
398 boolean_t instantiate_flow
)
400 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
401 mac_handle_t mh
= (mac_handle_t
)mcip
->mci_mip
;
402 flow_tab_info_t
*ftinfo
;
406 boolean_t ft_created
= B_FALSE
;
408 ASSERT(MAC_PERIM_HELD(mh
));
410 if ((err
= mac_fastpath_disable(mh
)) != 0)
414 * If the subflow table exists already just add the new subflow
415 * to the existing table, else we create a new subflow table below.
417 ft
= mcip
->mci_subflow_tab
;
419 mask
= flent
->fe_flow_desc
.fd_mask
;
421 * Try to create a new table and then add the subflow to the
422 * newly created subflow table
424 if ((ftinfo
= mac_flow_tab_info_get(mask
)) == NULL
) {
425 mac_fastpath_enable(mh
);
429 mac_flow_tab_create(ftinfo
->fti_ops
, mask
, ftinfo
->fti_size
,
434 err
= mac_flow_add(ft
, flent
);
437 mac_flow_tab_destroy(ft
);
438 mac_fastpath_enable(mh
);
442 if (instantiate_flow
) {
443 /* Now activate the flow by creating its SRSs */
444 ASSERT(MCIP_DATAPATH_SETUP(mcip
));
445 err
= mac_link_flow_init((mac_client_handle_t
)mcip
, flent
);
447 mac_flow_remove(ft
, flent
, B_FALSE
);
449 mac_flow_tab_destroy(ft
);
450 mac_fastpath_enable(mh
);
454 FLOW_MARK(flent
, FE_UF_NO_DATAPATH
);
457 ASSERT(mcip
->mci_subflow_tab
== NULL
);
459 mcip
->mci_subflow_tab
= ft
;
460 if (instantiate_flow
)
461 mac_client_update_classifier(mcip
, B_TRUE
);
467 * Remove flow entry from flow table.
470 mac_flow_remove(flow_tab_t
*ft
, flow_entry_t
*flent
, boolean_t temp
)
474 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
475 if (!(flent
->fe_flags
& FE_FLOW_TAB
))
478 rw_enter(&ft
->ft_lock
, RW_WRITER
);
480 * If this is a permanent removal from the flow table, mark it
481 * CONDEMNED to prevent future references. If this is a temporary
482 * removal from the table, say to update the flow descriptor then
483 * we don't mark it CONDEMNED
486 FLOW_MARK(flent
, FE_CONDEMNED
);
488 * Locate the specified flent.
490 fp
= &ft
->ft_table
[flent
->fe_index
];
492 fp
= &(*fp
)->fe_next
;
495 * The flent must exist. Otherwise it's a bug.
498 *fp
= flent
->fe_next
;
499 flent
->fe_next
= NULL
;
502 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
503 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
506 flent
->fe_index
= -1;
507 FLOW_UNMARK(flent
, FE_FLOW_TAB
);
509 rw_exit(&ft
->ft_lock
);
513 * This is the flow lookup routine used by the mac sw classifier engine.
516 mac_flow_lookup(flow_tab_t
*ft
, mblk_t
*mp
, uint_t flags
, flow_entry_t
**flentp
)
520 flow_ops_t
*ops
= &ft
->ft_ops
;
521 boolean_t retried
= B_FALSE
;
529 * Walk the list of predeclared accept functions.
530 * Each of these would accumulate enough state to allow the next
531 * accept routine to make progress.
533 for (i
= 0; i
< FLOW_MAX_ACCEPT
&& ops
->fo_accept
[i
] != NULL
; i
++) {
534 if ((err
= (ops
->fo_accept
[i
])(ft
, &s
)) != 0) {
538 * ENOBUFS indicates that the mp could be too short
539 * and may need a pullup.
541 if (err
!= ENOBUFS
|| retried
)
545 * The pullup is done on the last processed mblk, not
546 * the starting one. pullup is not done if the mblk
547 * has references or if b_cont is NULL.
550 if (DB_REF(last
) > 1 || last
->b_cont
== NULL
||
551 pullupmsg(last
, -1) == 0)
555 DTRACE_PROBE2(need_pullup
, flow_tab_t
*, ft
,
562 * The packet is considered sane. We may now attempt to
563 * find the corresponding flent.
565 rw_enter(&ft
->ft_lock
, RW_READER
);
566 flent
= ft
->ft_table
[ops
->fo_hash(ft
, &s
)];
567 for (; flent
!= NULL
; flent
= flent
->fe_next
) {
568 if (flent
->fe_match(ft
, flent
, &s
)) {
569 FLOW_TRY_REFHOLD(flent
, err
);
573 rw_exit(&ft
->ft_lock
);
577 rw_exit(&ft
->ft_lock
);
583 * The caller is assumed to have proper perimeter protection.
586 mac_flow_walk_nolock(flow_tab_t
*ft
, int (*fn
)(flow_entry_t
*, void *),
595 for (i
= 0; i
< ft
->ft_size
; i
++) {
596 for (flent
= ft
->ft_table
[i
]; flent
!= NULL
;
597 flent
= flent
->fe_next
) {
599 err
= (*fn
)(flent
, arg
);
604 VERIFY(cnt
== ft
->ft_flow_count
);
609 * Same as the above except a mutex is used for protection here.
612 mac_flow_walk(flow_tab_t
*ft
, int (*fn
)(flow_entry_t
*, void *),
620 rw_enter(&ft
->ft_lock
, RW_WRITER
);
621 err
= mac_flow_walk_nolock(ft
, fn
, arg
);
622 rw_exit(&ft
->ft_lock
);
626 static boolean_t
mac_flow_clean(flow_entry_t
*);
629 * Destroy a flow entry. Called when the last reference on a flow is released.
632 mac_flow_destroy(flow_entry_t
*flent
)
634 ASSERT(flent
->fe_refcnt
== 0);
636 if ((flent
->fe_type
& FLOW_USER
) != 0) {
637 ASSERT(mac_flow_clean(flent
));
639 mac_flow_cleanup(flent
);
641 mac_misc_stat_delete(flent
);
642 mutex_destroy(&flent
->fe_lock
);
643 cv_destroy(&flent
->fe_cv
);
644 flow_stat_destroy(flent
);
645 kmem_cache_free(flow_cache
, flent
);
650 * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
651 * mac_link_flow_modify() should really be moved/reworked into the
652 * two functions below. This would consolidate all the mac property
653 * checking in one place. I'm leaving this alone for now since it's
654 * out of scope of the new flows work.
658 mac_flow_modify_props(flow_entry_t
*flent
, mac_resource_props_t
*mrp
)
660 uint32_t changed_mask
= 0;
661 mac_resource_props_t
*fmrp
= &flent
->fe_effective_props
;
664 if ((mrp
->mrp_mask
& MRP_MAXBW
) != 0 &&
665 (!(fmrp
->mrp_mask
& MRP_MAXBW
) ||
666 (fmrp
->mrp_maxbw
!= mrp
->mrp_maxbw
))) {
667 changed_mask
|= MRP_MAXBW
;
668 if (mrp
->mrp_maxbw
== MRP_MAXBW_RESETVAL
) {
669 fmrp
->mrp_mask
&= ~MRP_MAXBW
;
672 fmrp
->mrp_mask
|= MRP_MAXBW
;
673 fmrp
->mrp_maxbw
= mrp
->mrp_maxbw
;
677 if ((mrp
->mrp_mask
& MRP_PRIORITY
) != 0) {
678 if (fmrp
->mrp_priority
!= mrp
->mrp_priority
)
679 changed_mask
|= MRP_PRIORITY
;
680 if (mrp
->mrp_priority
== MPL_RESET
) {
681 fmrp
->mrp_priority
= MPL_SUBFLOW_DEFAULT
;
682 fmrp
->mrp_mask
&= ~MRP_PRIORITY
;
684 fmrp
->mrp_priority
= mrp
->mrp_priority
;
685 fmrp
->mrp_mask
|= MRP_PRIORITY
;
690 if ((mrp
->mrp_mask
& MRP_CPUS
) != 0) {
691 if ((fmrp
->mrp_ncpus
== mrp
->mrp_ncpus
) &&
692 (fmrp
->mrp_fanout_mode
== mrp
->mrp_fanout_mode
)) {
693 for (i
= 0; i
< mrp
->mrp_ncpus
; i
++) {
694 if (mrp
->mrp_cpu
[i
] != fmrp
->mrp_cpu
[i
])
697 if (i
== mrp
->mrp_ncpus
) {
699 * The new set of cpus passed is exactly
700 * the same as the existing set.
702 return (changed_mask
);
705 changed_mask
|= MRP_CPUS
;
706 MAC_COPY_CPUS(mrp
, fmrp
);
710 * Modify the rings property.
712 if (mrp
->mrp_mask
& MRP_RX_RINGS
|| mrp
->mrp_mask
& MRP_TX_RINGS
)
713 mac_set_rings_effective(flent
->fe_mcip
);
715 if ((mrp
->mrp_mask
& MRP_POOL
) != 0) {
716 if (strcmp(fmrp
->mrp_pool
, mrp
->mrp_pool
) != 0)
717 changed_mask
|= MRP_POOL
;
718 if (strlen(mrp
->mrp_pool
) == 0)
719 fmrp
->mrp_mask
&= ~MRP_POOL
;
721 fmrp
->mrp_mask
|= MRP_POOL
;
722 (void) strncpy(fmrp
->mrp_pool
, mrp
->mrp_pool
, MAXPATHLEN
);
724 return (changed_mask
);
728 mac_flow_modify(flow_tab_t
*ft
, flow_entry_t
*flent
, mac_resource_props_t
*mrp
)
730 uint32_t changed_mask
;
731 mac_client_impl_t
*mcip
= flent
->fe_mcip
;
732 mac_resource_props_t
*mcip_mrp
= MCIP_RESOURCE_PROPS(mcip
);
733 mac_resource_props_t
*emrp
= MCIP_EFFECTIVE_PROPS(mcip
);
734 cpupart_t
*cpupart
= NULL
;
735 boolean_t use_default
= B_FALSE
;
737 ASSERT(flent
!= NULL
);
738 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
740 rw_enter(&ft
->ft_lock
, RW_WRITER
);
742 /* Update the cached values inside the subflow entry */
743 changed_mask
= mac_flow_modify_props(flent
, mrp
);
744 rw_exit(&ft
->ft_lock
);
746 * Push the changed parameters to the scheduling code in the
747 * SRS's, to take effect right away.
749 if (changed_mask
& MRP_MAXBW
) {
750 mac_srs_update_bwlimit(flent
, mrp
);
752 * If bandwidth is changed, we may have to change
753 * the number of soft ring to be used for fanout.
754 * Call mac_flow_update_fanout() if MAC_BIND_CPU
755 * is not set and there is no user supplied cpu
756 * info. This applies only to link at this time.
758 if (!(flent
->fe_type
& FLOW_USER
) &&
759 !(changed_mask
& MRP_CPUS
) &&
760 !(mcip_mrp
->mrp_mask
& MRP_CPUS_USERSPEC
)) {
761 mac_fanout_setup(mcip
, flent
, mcip_mrp
,
762 mac_rx_deliver
, mcip
, NULL
, NULL
);
765 if (mrp
->mrp_mask
& MRP_PRIORITY
)
766 mac_flow_update_priority(mcip
, flent
);
768 if (changed_mask
& MRP_CPUS
)
769 mac_fanout_setup(mcip
, flent
, mrp
, mac_rx_deliver
, mcip
, NULL
,
772 if (mrp
->mrp_mask
& MRP_POOL
) {
774 cpupart
= mac_pset_find(mrp
, &use_default
);
775 mac_fanout_setup(mcip
, flent
, mrp
, mac_rx_deliver
, mcip
, NULL
,
777 mac_set_pool_effective(use_default
, cpupart
, mrp
, emrp
);
783 * This function waits for a certain condition to be met and is generally
784 * used before a destructive or quiescing operation.
787 mac_flow_wait(flow_entry_t
*flent
, mac_flow_state_t event
)
789 mutex_enter(&flent
->fe_lock
);
790 flent
->fe_flags
|= FE_WAITER
;
793 case FLOW_DRIVER_UPCALL
:
795 * We want to make sure the driver upcalls have finished before
796 * we signal the Rx SRS worker to quit.
798 while (flent
->fe_refcnt
!= 1)
799 cv_wait(&flent
->fe_cv
, &flent
->fe_lock
);
804 * Wait for the fe_user_refcnt to drop to 0. The flow has
805 * been removed from the global flow hash.
807 ASSERT(!(flent
->fe_flags
& FE_G_FLOW_HASH
));
808 while (flent
->fe_user_refcnt
!= 0)
809 cv_wait(&flent
->fe_cv
, &flent
->fe_lock
);
816 flent
->fe_flags
&= ~FE_WAITER
;
817 mutex_exit(&flent
->fe_lock
);
821 mac_flow_clean(flow_entry_t
*flent
)
823 ASSERT(flent
->fe_next
== NULL
);
824 ASSERT(flent
->fe_tx_srs
== NULL
);
825 ASSERT(flent
->fe_rx_srs_cnt
== 0 && flent
->fe_rx_srs
[0] == NULL
);
826 ASSERT(flent
->fe_mbg
== NULL
);
832 mac_flow_cleanup(flow_entry_t
*flent
)
834 if ((flent
->fe_type
& FLOW_USER
) == 0) {
835 ASSERT((flent
->fe_mbg
== NULL
&& flent
->fe_mcip
!= NULL
) ||
836 (flent
->fe_mbg
!= NULL
&& flent
->fe_mcip
== NULL
));
837 ASSERT(flent
->fe_refcnt
== 0);
839 ASSERT(flent
->fe_refcnt
== 1);
842 if (flent
->fe_mbg
!= NULL
) {
843 ASSERT(flent
->fe_tx_srs
== NULL
);
844 /* This is a multicast or broadcast flow entry */
845 mac_bcast_grp_free(flent
->fe_mbg
);
846 flent
->fe_mbg
= NULL
;
849 if (flent
->fe_tx_srs
!= NULL
) {
850 ASSERT(flent
->fe_mbg
== NULL
);
851 mac_srs_free(flent
->fe_tx_srs
);
852 flent
->fe_tx_srs
= NULL
;
856 * In the normal case fe_rx_srs_cnt is 1. However in the error case
857 * when mac_unicast_add fails we may not have set up any SRS
858 * in which case fe_rx_srs_cnt will be zero.
860 if (flent
->fe_rx_srs_cnt
!= 0) {
861 ASSERT(flent
->fe_rx_srs_cnt
== 1);
862 mac_srs_free(flent
->fe_rx_srs
[0]);
863 flent
->fe_rx_srs
[0] = NULL
;
864 flent
->fe_rx_srs_cnt
= 0;
866 ASSERT(flent
->fe_rx_srs
[0] == NULL
);
870 mac_flow_get_desc(flow_entry_t
*flent
, flow_desc_t
*fd
)
873 * Grab the fe_lock to see a self-consistent fe_flow_desc.
874 * Updates to the fe_flow_desc happen under the fe_lock
875 * after removing the flent from the flow table
877 mutex_enter(&flent
->fe_lock
);
878 bcopy(&flent
->fe_flow_desc
, fd
, sizeof (*fd
));
879 mutex_exit(&flent
->fe_lock
);
883 * Update a field of a flow entry. The mac perimeter ensures that
884 * this is the only thread doing a modify operation on this mac end point.
885 * So the flow table can't change or disappear. The ft_lock protects access
886 * to the flow entry, and holding the lock ensures that there isn't any thread
887 * accessing the flow entry or attempting a flow table lookup. However
888 * data threads that are using the flow entry based on the old descriptor
889 * will continue to use the flow entry. If strong coherence is required
890 * then the flow will have to be quiesced before the descriptor can be
894 mac_flow_set_desc(flow_entry_t
*flent
, flow_desc_t
*fd
)
896 flow_tab_t
*ft
= flent
->fe_flow_tab
;
897 flow_desc_t old_desc
;
902 * The flow hasn't yet been inserted into the table,
903 * so only the caller knows about this flow, however for
904 * uniformity we grab the fe_lock here.
906 mutex_enter(&flent
->fe_lock
);
907 bcopy(fd
, &flent
->fe_flow_desc
, sizeof (*fd
));
908 mutex_exit(&flent
->fe_lock
);
911 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
914 * Need to remove the flow entry from the table and reinsert it,
915 * into a potentially diference hash line. The hash depends on
916 * the new descriptor fields. However access to fe_desc itself
917 * is always under the fe_lock. This helps log and stat functions
918 * see a self-consistent fe_flow_desc.
920 mac_flow_remove(ft
, flent
, B_TRUE
);
921 old_desc
= flent
->fe_flow_desc
;
923 mutex_enter(&flent
->fe_lock
);
924 bcopy(fd
, &flent
->fe_flow_desc
, sizeof (*fd
));
925 mutex_exit(&flent
->fe_lock
);
927 if (mac_flow_add(ft
, flent
) != 0) {
929 * The add failed say due to an invalid flow descriptor.
932 flent
->fe_flow_desc
= old_desc
;
933 err
= mac_flow_add(ft
, flent
);
939 mac_flow_set_name(flow_entry_t
*flent
, const char *name
)
941 flow_tab_t
*ft
= flent
->fe_flow_tab
;
945 * The flow hasn't yet been inserted into the table,
946 * so only the caller knows about this flow
948 (void) strlcpy(flent
->fe_flow_name
, name
, MAXFLOWNAMELEN
);
950 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
953 mutex_enter(&flent
->fe_lock
);
954 (void) strlcpy(flent
->fe_flow_name
, name
, MAXFLOWNAMELEN
);
955 mutex_exit(&flent
->fe_lock
);
959 * Return the client-private cookie that was associated with
960 * the flow when it was created.
963 mac_flow_get_client_cookie(flow_entry_t
*flent
)
965 return (flent
->fe_client_cookie
);
969 * Forward declarations.
971 static uint32_t flow_l2_hash(flow_tab_t
*, flow_state_t
*);
972 static uint32_t flow_l2_hash_fe(flow_tab_t
*, flow_entry_t
*);
973 static int flow_l2_accept(flow_tab_t
*, flow_state_t
*);
974 static uint32_t flow_ether_hash(flow_tab_t
*, flow_state_t
*);
975 static uint32_t flow_ether_hash_fe(flow_tab_t
*, flow_entry_t
*);
976 static int flow_ether_accept(flow_tab_t
*, flow_state_t
*);
982 mac_flow_tab_create(flow_ops_t
*ops
, flow_mask_t mask
, uint_t size
,
983 mac_impl_t
*mip
, flow_tab_t
**ftp
)
988 ft
= kmem_cache_alloc(flow_tab_cache
, KM_SLEEP
);
989 bzero(ft
, sizeof (*ft
));
991 ft
->ft_table
= kmem_zalloc(size
* sizeof (flow_entry_t
*), KM_SLEEP
);
994 * We make a copy of the ops vector instead of just pointing to it
995 * because we might want to customize the ops vector on a per table
996 * basis (e.g. for optimization).
998 new_ops
= &ft
->ft_ops
;
999 bcopy(ops
, new_ops
, sizeof (*ops
));
1005 * Optimizations for DL_ETHER media.
1007 if (mip
->mi_info
.mi_nativemedia
== DL_ETHER
) {
1008 if (new_ops
->fo_hash
== flow_l2_hash
)
1009 new_ops
->fo_hash
= flow_ether_hash
;
1010 if (new_ops
->fo_hash_fe
== flow_l2_hash_fe
)
1011 new_ops
->fo_hash_fe
= flow_ether_hash_fe
;
1012 if (new_ops
->fo_accept
[0] == flow_l2_accept
)
1013 new_ops
->fo_accept
[0] = flow_ether_accept
;
1019 mac_flow_l2tab_create(mac_impl_t
*mip
, flow_tab_t
**ftp
)
1021 mac_flow_tab_create(&flow_l2_ops
, FLOW_LINK_DST
| FLOW_LINK_VID
,
1026 * Destroy flow table.
1029 mac_flow_tab_destroy(flow_tab_t
*ft
)
1034 ASSERT(ft
->ft_flow_count
== 0);
1035 kmem_free(ft
->ft_table
, ft
->ft_size
* sizeof (flow_entry_t
*));
1036 bzero(ft
, sizeof (*ft
));
1037 kmem_cache_free(flow_tab_cache
, ft
);
1041 * Add a new flow entry to the global flow hash table
1044 mac_flow_hash_add(flow_entry_t
*flent
)
1048 rw_enter(&flow_tab_lock
, RW_WRITER
);
1049 err
= mod_hash_insert(flow_hash
,
1050 (mod_hash_key_t
)flent
->fe_flow_name
, (mod_hash_val_t
)flent
);
1052 rw_exit(&flow_tab_lock
);
1055 /* Mark as inserted into the global flow hash table */
1056 FLOW_MARK(flent
, FE_G_FLOW_HASH
);
1057 rw_exit(&flow_tab_lock
);
1062 * Remove a flow entry from the global flow hash table
1065 mac_flow_hash_remove(flow_entry_t
*flent
)
1069 rw_enter(&flow_tab_lock
, RW_WRITER
);
1070 VERIFY(mod_hash_remove(flow_hash
,
1071 (mod_hash_key_t
)flent
->fe_flow_name
, &val
) == 0);
1073 /* Clear the mark that says inserted into the global flow hash table */
1074 FLOW_UNMARK(flent
, FE_G_FLOW_HASH
);
1075 rw_exit(&flow_tab_lock
);
1079 * Retrieve a flow entry from the global flow hash table.
1082 mac_flow_lookup_byname(char *name
, flow_entry_t
**flentp
)
1085 flow_entry_t
*flent
;
1087 rw_enter(&flow_tab_lock
, RW_READER
);
1088 err
= mod_hash_find(flow_hash
, (mod_hash_key_t
)name
,
1089 (mod_hash_val_t
*)&flent
);
1091 rw_exit(&flow_tab_lock
);
1094 ASSERT(flent
!= NULL
);
1095 FLOW_USER_REFHOLD(flent
);
1096 rw_exit(&flow_tab_lock
);
1103 * Initialize or release mac client flows by walking the subflow table.
1104 * These are typically invoked during plumb/unplumb of links.
1108 mac_link_init_flows_cb(flow_entry_t
*flent
, void *arg
)
1110 mac_client_impl_t
*mcip
= arg
;
1112 if (mac_link_flow_init(arg
, flent
) != 0) {
1113 cmn_err(CE_WARN
, "Failed to initialize flow '%s' on link '%s'",
1114 flent
->fe_flow_name
, mcip
->mci_name
);
1116 FLOW_UNMARK(flent
, FE_UF_NO_DATAPATH
);
1122 mac_link_init_flows(mac_client_handle_t mch
)
1124 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1126 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
1127 mac_link_init_flows_cb
, mcip
);
1129 * If mac client had subflow(s) configured before plumb, change
1130 * function to mac_rx_srs_subflow_process and in case of hardware
1131 * classification, disable polling.
1133 mac_client_update_classifier(mcip
, B_TRUE
);
1138 mac_link_has_flows(mac_client_handle_t mch
)
1140 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1142 if (!FLOW_TAB_EMPTY(mcip
->mci_subflow_tab
))
1149 mac_link_release_flows_cb(flow_entry_t
*flent
, void *arg
)
1151 FLOW_MARK(flent
, FE_UF_NO_DATAPATH
);
1152 mac_flow_wait(flent
, FLOW_DRIVER_UPCALL
);
1153 mac_link_flow_clean(arg
, flent
);
1158 mac_link_release_flows(mac_client_handle_t mch
)
1160 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1163 * Change the mci_flent callback back to mac_rx_srs_process()
1164 * because flows are about to be deactivated.
1166 mac_client_update_classifier(mcip
, B_FALSE
);
1167 (void) mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
1168 mac_link_release_flows_cb
, mcip
);
1172 mac_rename_flow(flow_entry_t
*fep
, const char *new_name
)
1174 mac_flow_set_name(fep
, new_name
);
1175 if (fep
->fe_ksp
!= NULL
) {
1176 flow_stat_destroy(fep
);
1177 flow_stat_create(fep
);
1182 * mac_link_flow_init()
1183 * Internal flow interface used for allocating SRSs and related
1184 * data structures. Not meant to be used by mac clients.
1187 mac_link_flow_init(mac_client_handle_t mch
, flow_entry_t
*sub_flow
)
1189 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1190 mac_impl_t
*mip
= mcip
->mci_mip
;
1193 ASSERT(mch
!= NULL
);
1194 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
1196 if ((err
= mac_datapath_setup(mcip
, sub_flow
, SRST_FLOW
)) != 0)
1199 sub_flow
->fe_mcip
= mcip
;
1205 * mac_link_flow_add()
1206 * Used by flowadm(1m) or kernel mac clients for creating flows.
1209 mac_link_flow_add(datalink_id_t linkid
, char *flow_name
,
1210 flow_desc_t
*flow_desc
, mac_resource_props_t
*mrp
)
1212 flow_entry_t
*flent
= NULL
;
1214 dls_dl_handle_t dlh
;
1216 boolean_t link_held
= B_FALSE
;
1217 boolean_t hash_added
= B_FALSE
;
1218 mac_perim_handle_t mph
;
1220 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1222 FLOW_USER_REFRELE(flent
);
1227 * First create a flow entry given the description provided
1230 err
= mac_flow_create(flow_desc
, mrp
, flow_name
, NULL
,
1231 FLOW_USER
| FLOW_OTHER
, &flent
);
1237 * We've got a local variable referencing this flow now, so we need
1238 * to hold it. We'll release this flow before returning.
1239 * All failures until we return will undo any action that may internally
1240 * held the flow, so the last REFRELE will assure a clean freeing
1243 FLOW_REFHOLD(flent
);
1245 flent
->fe_link_id
= linkid
;
1246 FLOW_MARK(flent
, FE_INCIPIENT
);
1248 err
= mac_perim_enter_by_linkid(linkid
, &mph
);
1250 FLOW_FINAL_REFRELE(flent
);
1255 * dls will eventually be merged with mac so it's ok
1256 * to call dls' internal functions.
1258 err
= dls_devnet_hold_link(linkid
, &dlh
, &dlp
);
1265 * Add the flow to the global flow table, this table will be per
1266 * exclusive zone so each zone can have its own flow namespace.
1267 * RFE 6625651 will fix this.
1270 if ((err
= mac_flow_hash_add(flent
)) != 0)
1273 hash_added
= B_TRUE
;
1276 * do not allow flows to be configured on an anchor VNIC
1278 if (mac_capab_get(dlp
->dl_mh
, MAC_CAPAB_ANCHOR_VNIC
, NULL
)) {
1284 * Add the subflow to the subflow table. Also instantiate the flow
1285 * in the mac if there is an active user (we check if the MAC client's
1286 * datapath has been setup).
1288 err
= mac_flow_add_subflow(dlp
->dl_mch
, flent
,
1289 MCIP_DATAPATH_SETUP((mac_client_impl_t
*)dlp
->dl_mch
));
1293 FLOW_UNMARK(flent
, FE_INCIPIENT
);
1294 dls_devnet_rele_link(dlh
, dlp
);
1295 mac_perim_exit(mph
);
1300 mac_flow_hash_remove(flent
);
1303 dls_devnet_rele_link(dlh
, dlp
);
1306 * Wait for any transient global flow hash refs to clear
1307 * and then release the creation reference on the flow
1309 mac_flow_wait(flent
, FLOW_USER_REF
);
1310 FLOW_FINAL_REFRELE(flent
);
1311 mac_perim_exit(mph
);
1316 * mac_link_flow_clean()
1317 * Internal flow interface used for freeing SRSs and related
1318 * data structures. Not meant to be used by mac clients.
1321 mac_link_flow_clean(mac_client_handle_t mch
, flow_entry_t
*sub_flow
)
1323 mac_client_impl_t
*mcip
= (mac_client_impl_t
*)mch
;
1324 mac_impl_t
*mip
= mcip
->mci_mip
;
1325 boolean_t last_subflow
;
1327 ASSERT(mch
!= NULL
);
1328 ASSERT(MAC_PERIM_HELD((mac_handle_t
)mip
));
1331 * This sub flow entry may fail to be fully initialized by
1332 * mac_link_flow_init(). If so, simply return.
1334 if (sub_flow
->fe_mcip
== NULL
)
1337 last_subflow
= FLOW_TAB_EMPTY(mcip
->mci_subflow_tab
);
1339 * Tear down the data path
1341 mac_datapath_teardown(mcip
, sub_flow
, SRST_FLOW
);
1342 sub_flow
->fe_mcip
= NULL
;
1345 * Delete the SRSs associated with this subflow. If this is being
1346 * driven by flowadm(8) then the subflow will be deleted by
1347 * dls_rem_flow. However if this is a result of the interface being
1348 * unplumbed then the subflow itself won't be deleted.
1350 mac_flow_cleanup(sub_flow
);
1353 * If all the subflows are gone, renable some of the stuff
1354 * we disabled when adding a subflow, polling etc.
1358 * The subflow table itself is not protected by any locks or
1359 * refcnts. Hence quiesce the client upfront before clearing
1362 mac_client_quiesce(mcip
);
1363 mac_client_update_classifier(mcip
, B_FALSE
);
1364 mac_flow_tab_destroy(mcip
->mci_subflow_tab
);
1365 mcip
->mci_subflow_tab
= NULL
;
1366 mac_client_restart(mcip
);
1371 * mac_link_flow_remove()
1372 * Used by flowadm(1m) or kernel mac clients for removing flows.
1375 mac_link_flow_remove(char *flow_name
)
1377 flow_entry_t
*flent
;
1378 mac_perim_handle_t mph
;
1380 datalink_id_t linkid
;
1382 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1386 linkid
= flent
->fe_link_id
;
1387 FLOW_USER_REFRELE(flent
);
1390 * The perim must be acquired before acquiring any other references
1391 * to maintain the lock and perimeter hierarchy. Please note the
1392 * FLOW_REFRELE above.
1394 err
= mac_perim_enter_by_linkid(linkid
, &mph
);
1399 * Note the second lookup of the flow, because a concurrent thread
1400 * may have removed it already while we were waiting to enter the
1403 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1405 mac_perim_exit(mph
);
1408 FLOW_USER_REFRELE(flent
);
1411 * Remove the flow from the subflow table and deactivate the flow
1412 * by quiescing and removings its SRSs
1414 mac_flow_rem_subflow(flent
);
1417 * Finally, remove the flow from the global table.
1419 mac_flow_hash_remove(flent
);
1422 * Wait for any transient global flow hash refs to clear
1423 * and then release the creation reference on the flow
1425 mac_flow_wait(flent
, FLOW_USER_REF
);
1426 FLOW_FINAL_REFRELE(flent
);
1428 mac_perim_exit(mph
);
1434 * mac_link_flow_modify()
1435 * Modifies the properties of a flow identified by its name.
1438 mac_link_flow_modify(char *flow_name
, mac_resource_props_t
*mrp
)
1440 flow_entry_t
*flent
;
1441 mac_client_impl_t
*mcip
;
1443 mac_perim_handle_t mph
;
1444 datalink_id_t linkid
;
1445 flow_tab_t
*flow_tab
;
1447 err
= mac_validate_props(NULL
, mrp
);
1451 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1455 linkid
= flent
->fe_link_id
;
1456 FLOW_USER_REFRELE(flent
);
1459 * The perim must be acquired before acquiring any other references
1460 * to maintain the lock and perimeter hierarchy. Please note the
1461 * FLOW_REFRELE above.
1463 err
= mac_perim_enter_by_linkid(linkid
, &mph
);
1468 * Note the second lookup of the flow, because a concurrent thread
1469 * may have removed it already while we were waiting to enter the
1472 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1474 mac_perim_exit(mph
);
1477 FLOW_USER_REFRELE(flent
);
1480 * If this flow is attached to a MAC client, then pass the request
1481 * along to the client.
1482 * Otherwise, just update the cached values.
1484 mcip
= flent
->fe_mcip
;
1485 mac_update_resources(mrp
, &flent
->fe_resource_props
, B_TRUE
);
1487 if ((flow_tab
= mcip
->mci_subflow_tab
) == NULL
) {
1490 mac_flow_modify(flow_tab
, flent
, mrp
);
1493 (void) mac_flow_modify_props(flent
, mrp
);
1497 mac_perim_exit(mph
);
1503 * State structure and misc functions used by mac_link_flow_walk().
1506 int (*ws_func
)(mac_flowinfo_t
*, void *);
1508 } flow_walk_state_t
;
1511 mac_link_flowinfo_copy(mac_flowinfo_t
*finfop
, flow_entry_t
*flent
)
1513 (void) strlcpy(finfop
->fi_flow_name
, flent
->fe_flow_name
,
1515 finfop
->fi_link_id
= flent
->fe_link_id
;
1516 finfop
->fi_flow_desc
= flent
->fe_flow_desc
;
1517 finfop
->fi_resource_props
= flent
->fe_resource_props
;
1521 mac_link_flow_walk_cb(flow_entry_t
*flent
, void *arg
)
1523 flow_walk_state_t
*statep
= arg
;
1524 mac_flowinfo_t
*finfo
;
1527 finfo
= kmem_zalloc(sizeof (*finfo
), KM_SLEEP
);
1528 mac_link_flowinfo_copy(finfo
, flent
);
1529 err
= statep
->ws_func(finfo
, statep
->ws_arg
);
1530 kmem_free(finfo
, sizeof (*finfo
));
1535 * mac_link_flow_walk()
1536 * Invokes callback 'func' for all flows belonging to the specified link.
1539 mac_link_flow_walk(datalink_id_t linkid
,
1540 int (*func
)(mac_flowinfo_t
*, void *), void *arg
)
1542 mac_client_impl_t
*mcip
;
1543 mac_perim_handle_t mph
;
1544 flow_walk_state_t state
;
1545 dls_dl_handle_t dlh
;
1549 err
= mac_perim_enter_by_linkid(linkid
, &mph
);
1553 err
= dls_devnet_hold_link(linkid
, &dlh
, &dlp
);
1555 mac_perim_exit(mph
);
1559 mcip
= (mac_client_impl_t
*)dlp
->dl_mch
;
1560 state
.ws_func
= func
;
1563 err
= mac_flow_walk_nolock(mcip
->mci_subflow_tab
,
1564 mac_link_flow_walk_cb
, &state
);
1566 dls_devnet_rele_link(dlh
, dlp
);
1567 mac_perim_exit(mph
);
1572 * mac_link_flow_info()
1573 * Retrieves information about a specific flow.
1576 mac_link_flow_info(char *flow_name
, mac_flowinfo_t
*finfo
)
1578 flow_entry_t
*flent
;
1581 err
= mac_flow_lookup_byname(flow_name
, &flent
);
1585 mac_link_flowinfo_copy(finfo
, flent
);
1586 FLOW_USER_REFRELE(flent
);
1591 * Hash function macro that takes an Ethernet address and VLAN id as input.
1593 #define HASH_ETHER_VID(a, v, s) \
1594 ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
1597 * Generic layer-2 address hashing function that takes an address and address
1598 * length as input. This is the DJB hash function.
1601 flow_l2_addrhash(uint8_t *addr
, size_t addrlen
, size_t htsize
)
1603 uint32_t hash
= 5381;
1606 for (i
= 0; i
< addrlen
; i
++)
1607 hash
= ((hash
<< 5) + hash
) + addr
[i
];
1608 return (hash
% htsize
);
1611 #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
1613 #define CHECK_AND_ADJUST_START_PTR(s, start) { \
1614 if ((s)->fs_mp->b_wptr == (start)) { \
1615 mblk_t *next = (s)->fs_mp->b_cont; \
1619 (s)->fs_mp = next; \
1620 (start) = next->b_rptr; \
1626 flow_l2_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
1628 flow_l2info_t
*l2
= &s
->fs_l2info
;
1629 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1631 return (l2
->l2_vid
== fd
->fd_vid
&&
1632 bcmp(l2
->l2_daddr
, fd
->fd_dst_mac
, fd
->fd_mac_len
) == 0);
1636 * Layer 2 hash function.
1637 * Must be paired with flow_l2_accept() within a set of flow_ops
1638 * because it assumes the dest address is already extracted.
1641 flow_l2_hash(flow_tab_t
*ft
, flow_state_t
*s
)
1643 return (flow_l2_addrhash(s
->fs_l2info
.l2_daddr
,
1644 ft
->ft_mip
->mi_type
->mt_addr_length
, ft
->ft_size
));
1648 * This is the generic layer 2 accept function.
1649 * It makes use of mac_header_info() to extract the header length,
1650 * sap, vlan ID and destination address.
1653 flow_l2_accept(flow_tab_t
*ft
, flow_state_t
*s
)
1656 flow_l2info_t
*l2
= &s
->fs_l2info
;
1657 mac_header_info_t mhi
;
1660 is_ether
= (ft
->ft_mip
->mi_info
.mi_nativemedia
== DL_ETHER
);
1661 if ((err
= mac_header_info((mac_handle_t
)ft
->ft_mip
,
1662 s
->fs_mp
, &mhi
)) != 0) {
1669 l2
->l2_start
= s
->fs_mp
->b_rptr
;
1670 l2
->l2_daddr
= (uint8_t *)mhi
.mhi_daddr
;
1672 if (is_ether
&& mhi
.mhi_bindsap
== ETHERTYPE_VLAN
&&
1673 ((s
->fs_flags
& FLOW_IGNORE_VLAN
) == 0)) {
1674 struct ether_vlan_header
*evhp
=
1675 (struct ether_vlan_header
*)l2
->l2_start
;
1677 if (PKT_TOO_SMALL(s
, l2
->l2_start
+ sizeof (*evhp
)))
1680 l2
->l2_sap
= ntohs(evhp
->ether_type
);
1681 l2
->l2_vid
= VLAN_ID(ntohs(evhp
->ether_tci
));
1682 l2
->l2_hdrsize
= sizeof (*evhp
);
1684 l2
->l2_sap
= mhi
.mhi_bindsap
;
1686 l2
->l2_hdrsize
= (uint32_t)mhi
.mhi_hdrsize
;
1692 * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
1693 * accept(). The notable difference is that dest address is now extracted
1694 * by hash() rather than by accept(). This saves a few memory references
1695 * for flow tables that do not care about mac addresses.
1698 flow_ether_hash(flow_tab_t
*ft
, flow_state_t
*s
)
1700 flow_l2info_t
*l2
= &s
->fs_l2info
;
1701 struct ether_vlan_header
*evhp
;
1703 evhp
= (struct ether_vlan_header
*)l2
->l2_start
;
1704 l2
->l2_daddr
= evhp
->ether_dhost
.ether_addr_octet
;
1705 return (HASH_ETHER_VID(l2
->l2_daddr
, l2
->l2_vid
, ft
->ft_size
));
1709 flow_ether_hash_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
1711 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1713 ASSERT((fd
->fd_mask
& FLOW_LINK_VID
) != 0 || fd
->fd_vid
== 0);
1714 return (HASH_ETHER_VID(fd
->fd_dst_mac
, fd
->fd_vid
, ft
->ft_size
));
1719 flow_ether_accept(flow_tab_t
*ft
, flow_state_t
*s
)
1721 flow_l2info_t
*l2
= &s
->fs_l2info
;
1722 struct ether_vlan_header
*evhp
;
1725 evhp
= (struct ether_vlan_header
*)s
->fs_mp
->b_rptr
;
1726 l2
->l2_start
= (uchar_t
*)evhp
;
1728 if (PKT_TOO_SMALL(s
, l2
->l2_start
+ sizeof (struct ether_header
)))
1731 if ((sap
= ntohs(evhp
->ether_tpid
)) == ETHERTYPE_VLAN
&&
1732 ((s
->fs_flags
& FLOW_IGNORE_VLAN
) == 0)) {
1733 if (PKT_TOO_SMALL(s
, l2
->l2_start
+ sizeof (*evhp
)))
1736 l2
->l2_sap
= ntohs(evhp
->ether_type
);
1737 l2
->l2_vid
= VLAN_ID(ntohs(evhp
->ether_tci
));
1738 l2
->l2_hdrsize
= sizeof (struct ether_vlan_header
);
1742 l2
->l2_hdrsize
= sizeof (struct ether_header
);
1748 * Validates a layer 2 flow entry.
1751 flow_l2_accept_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
1753 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1756 * Dest address is mandatory, and 0 length addresses are not yet
1759 if ((fd
->fd_mask
& FLOW_LINK_DST
) == 0 || fd
->fd_mac_len
== 0)
1762 if ((fd
->fd_mask
& FLOW_LINK_VID
) != 0) {
1764 * VLAN flows are only supported over ethernet macs.
1766 if (ft
->ft_mip
->mi_info
.mi_nativemedia
!= DL_ETHER
)
1769 if (fd
->fd_vid
== 0)
1773 flent
->fe_match
= flow_l2_match
;
1778 * Calculates hash index of flow entry.
1781 flow_l2_hash_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
1783 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1785 ASSERT((fd
->fd_mask
& FLOW_LINK_VID
) == 0 && fd
->fd_vid
== 0);
1786 return (flow_l2_addrhash(fd
->fd_dst_mac
,
1787 ft
->ft_mip
->mi_type
->mt_addr_length
, ft
->ft_size
));
1791 * This is used for duplicate flow checking.
1795 flow_l2_match_fe(flow_tab_t
*ft
, flow_entry_t
*f1
, flow_entry_t
*f2
)
1797 flow_desc_t
*fd1
= &f1
->fe_flow_desc
, *fd2
= &f2
->fe_flow_desc
;
1799 ASSERT(fd1
->fd_mac_len
== fd2
->fd_mac_len
&& fd1
->fd_mac_len
!= 0);
1800 return (bcmp(&fd1
->fd_dst_mac
, &fd2
->fd_dst_mac
,
1801 fd1
->fd_mac_len
) == 0 && fd1
->fd_vid
== fd2
->fd_vid
);
1805 * Generic flow entry insertion function.
1806 * Used by flow tables that do not have ordering requirements.
1810 flow_generic_insert_fe(flow_tab_t
*ft
, flow_entry_t
**headp
,
1811 flow_entry_t
*flent
)
1813 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
1815 if (*headp
!= NULL
) {
1816 ASSERT(flent
->fe_next
== NULL
);
1817 flent
->fe_next
= *headp
;
1824 * IP version independent DSField matching function.
1828 flow_ip_dsfield_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
1830 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1831 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1833 switch (l3info
->l3_version
) {
1834 case IPV4_VERSION
: {
1835 ipha_t
*ipha
= (ipha_t
*)l3info
->l3_start
;
1837 return ((ipha
->ipha_type_of_service
&
1838 fd
->fd_dsfield_mask
) == fd
->fd_dsfield
);
1840 case IPV6_VERSION
: {
1841 ip6_t
*ip6h
= (ip6_t
*)l3info
->l3_start
;
1843 return ((IPV6_FLOW_TCLASS(ip6h
->ip6_vcf
) &
1844 fd
->fd_dsfield_mask
) == fd
->fd_dsfield
);
1852 * IP v4 and v6 address matching.
1853 * The netmask only needs to be applied on the packet but not on the
1854 * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
1859 flow_ip_v4_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
1861 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1862 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1863 ipha_t
*ipha
= (ipha_t
*)l3info
->l3_start
;
1866 addr
= (l3info
->l3_dst_or_src
? ipha
->ipha_dst
: ipha
->ipha_src
);
1867 if ((fd
->fd_mask
& FLOW_IP_LOCAL
) != 0) {
1868 return ((addr
& V4_PART_OF_V6(fd
->fd_local_netmask
)) ==
1869 V4_PART_OF_V6(fd
->fd_local_addr
));
1871 return ((addr
& V4_PART_OF_V6(fd
->fd_remote_netmask
)) ==
1872 V4_PART_OF_V6(fd
->fd_remote_addr
));
1877 flow_ip_v6_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
1879 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1880 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1881 ip6_t
*ip6h
= (ip6_t
*)l3info
->l3_start
;
1884 addrp
= (l3info
->l3_dst_or_src
? &ip6h
->ip6_dst
: &ip6h
->ip6_src
);
1885 if ((fd
->fd_mask
& FLOW_IP_LOCAL
) != 0) {
1886 return (V6_MASK_EQ(*addrp
, fd
->fd_local_netmask
,
1887 fd
->fd_local_addr
));
1889 return (V6_MASK_EQ(*addrp
, fd
->fd_remote_netmask
, fd
->fd_remote_addr
));
1894 flow_ip_proto_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
1896 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1897 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1899 return (l3info
->l3_protocol
== fd
->fd_protocol
);
1903 flow_ip_hash(flow_tab_t
*ft
, flow_state_t
*s
)
1905 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1906 flow_mask_t mask
= ft
->ft_mask
;
1908 if ((mask
& FLOW_IP_LOCAL
) != 0) {
1909 l3info
->l3_dst_or_src
= ((s
->fs_flags
& FLOW_INBOUND
) != 0);
1910 } else if ((mask
& FLOW_IP_REMOTE
) != 0) {
1911 l3info
->l3_dst_or_src
= ((s
->fs_flags
& FLOW_OUTBOUND
) != 0);
1912 } else if ((mask
& FLOW_IP_DSFIELD
) != 0) {
1914 * DSField flents are arranged as a single list.
1919 * IP addr flents are hashed into two lists, v4 or v6.
1921 ASSERT(ft
->ft_size
>= 2);
1922 return ((l3info
->l3_version
== IPV4_VERSION
) ? 0 : 1);
1926 flow_ip_proto_hash(flow_tab_t
*ft
, flow_state_t
*s
)
1928 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1930 return (l3info
->l3_protocol
% ft
->ft_size
);
1935 flow_ip_accept(flow_tab_t
*ft
, flow_state_t
*s
)
1937 flow_l2info_t
*l2info
= &s
->fs_l2info
;
1938 flow_l3info_t
*l3info
= &s
->fs_l3info
;
1939 uint16_t sap
= l2info
->l2_sap
;
1942 l3_start
= l2info
->l2_start
+ l2info
->l2_hdrsize
;
1945 * Adjust start pointer if we're at the end of an mblk.
1947 CHECK_AND_ADJUST_START_PTR(s
, l3_start
);
1949 l3info
->l3_start
= l3_start
;
1950 if (!OK_32PTR(l3_start
))
1954 case ETHERTYPE_IP
: {
1955 ipha_t
*ipha
= (ipha_t
*)l3_start
;
1957 if (PKT_TOO_SMALL(s
, l3_start
+ IP_SIMPLE_HDR_LENGTH
))
1960 l3info
->l3_hdrsize
= IPH_HDR_LENGTH(ipha
);
1961 l3info
->l3_protocol
= ipha
->ipha_protocol
;
1962 l3info
->l3_version
= IPV4_VERSION
;
1963 l3info
->l3_fragmented
=
1964 IS_V4_FRAGMENT(ipha
->ipha_fragment_offset_and_flags
);
1967 case ETHERTYPE_IPV6
: {
1968 ip6_t
*ip6h
= (ip6_t
*)l3_start
;
1969 ip6_frag_t
*frag
= NULL
;
1970 uint16_t ip6_hdrlen
;
1973 if (!mac_ip_hdr_length_v6(ip6h
, s
->fs_mp
->b_wptr
, &ip6_hdrlen
,
1977 l3info
->l3_hdrsize
= ip6_hdrlen
;
1978 l3info
->l3_protocol
= nexthdr
;
1979 l3info
->l3_version
= IPV6_VERSION
;
1980 l3info
->l3_fragmented
= (frag
!= NULL
);
1991 flow_ip_proto_accept_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
1993 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
1995 switch (fd
->fd_protocol
) {
2000 case IPPROTO_ICMPV6
:
2001 flent
->fe_match
= flow_ip_proto_match
;
2010 flow_ip_accept_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
2012 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2015 in6_addr_t
*addr
, *netmask
;
2018 * DSField does not require a IP version.
2020 if (fd
->fd_mask
== FLOW_IP_DSFIELD
) {
2021 if (fd
->fd_dsfield_mask
== 0)
2024 flent
->fe_match
= flow_ip_dsfield_match
;
2029 * IP addresses must come with a version to avoid ambiguity.
2031 if ((fd
->fd_mask
& FLOW_IP_VERSION
) == 0)
2034 version
= fd
->fd_ipversion
;
2035 if (version
!= IPV4_VERSION
&& version
!= IPV6_VERSION
)
2038 mask
= fd
->fd_mask
& ~FLOW_IP_VERSION
;
2041 addr
= &fd
->fd_local_addr
;
2042 netmask
= &fd
->fd_local_netmask
;
2044 case FLOW_IP_REMOTE
:
2045 addr
= &fd
->fd_remote_addr
;
2046 netmask
= &fd
->fd_remote_netmask
;
2053 * Apply netmask onto specified address.
2055 V6_MASK_COPY(*addr
, *netmask
, *addr
);
2056 if (version
== IPV4_VERSION
) {
2057 ipaddr_t v4addr
= V4_PART_OF_V6((*addr
));
2058 ipaddr_t v4mask
= V4_PART_OF_V6((*netmask
));
2060 if (v4addr
== 0 || v4mask
== 0)
2062 flent
->fe_match
= flow_ip_v4_match
;
2064 if (IN6_IS_ADDR_UNSPECIFIED(addr
) ||
2065 IN6_IS_ADDR_UNSPECIFIED(netmask
))
2067 flent
->fe_match
= flow_ip_v6_match
;
2073 flow_ip_proto_hash_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
2075 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2077 return (fd
->fd_protocol
% ft
->ft_size
);
2081 flow_ip_hash_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
2083 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2086 * DSField flents are arranged as a single list.
2088 if ((fd
->fd_mask
& FLOW_IP_DSFIELD
) != 0)
2092 * IP addr flents are hashed into two lists, v4 or v6.
2094 ASSERT(ft
->ft_size
>= 2);
2095 return ((fd
->fd_ipversion
== IPV4_VERSION
) ? 0 : 1);
2100 flow_ip_proto_match_fe(flow_tab_t
*ft
, flow_entry_t
*f1
, flow_entry_t
*f2
)
2102 flow_desc_t
*fd1
= &f1
->fe_flow_desc
, *fd2
= &f2
->fe_flow_desc
;
2104 return (fd1
->fd_protocol
== fd2
->fd_protocol
);
2109 flow_ip_match_fe(flow_tab_t
*ft
, flow_entry_t
*f1
, flow_entry_t
*f2
)
2111 flow_desc_t
*fd1
= &f1
->fe_flow_desc
, *fd2
= &f2
->fe_flow_desc
;
2112 in6_addr_t
*a1
, *m1
, *a2
, *m2
;
2114 ASSERT(fd1
->fd_mask
== fd2
->fd_mask
);
2115 if (fd1
->fd_mask
== FLOW_IP_DSFIELD
) {
2116 return (fd1
->fd_dsfield
== fd2
->fd_dsfield
&&
2117 fd1
->fd_dsfield_mask
== fd2
->fd_dsfield_mask
);
2121 * flow_ip_accept_fe() already validated the version.
2123 ASSERT((fd1
->fd_mask
& FLOW_IP_VERSION
) != 0);
2124 if (fd1
->fd_ipversion
!= fd2
->fd_ipversion
)
2127 switch (fd1
->fd_mask
& ~FLOW_IP_VERSION
) {
2129 a1
= &fd1
->fd_local_addr
;
2130 m1
= &fd1
->fd_local_netmask
;
2131 a2
= &fd2
->fd_local_addr
;
2132 m2
= &fd2
->fd_local_netmask
;
2134 case FLOW_IP_REMOTE
:
2135 a1
= &fd1
->fd_remote_addr
;
2136 m1
= &fd1
->fd_remote_netmask
;
2137 a2
= &fd2
->fd_remote_addr
;
2138 m2
= &fd2
->fd_remote_netmask
;
2142 * This is unreachable given the checks in
2143 * flow_ip_accept_fe().
2148 if (fd1
->fd_ipversion
== IPV4_VERSION
) {
2149 return (V4_PART_OF_V6((*a1
)) == V4_PART_OF_V6((*a2
)) &&
2150 V4_PART_OF_V6((*m1
)) == V4_PART_OF_V6((*m2
)));
2153 return (IN6_ARE_ADDR_EQUAL(a1
, a2
) &&
2154 IN6_ARE_ADDR_EQUAL(m1
, m2
));
2159 flow_ip_mask2plen(in6_addr_t
*v6mask
)
2162 int plen
= IPV6_ABITS
;
2165 for (i
= 3; i
>= 0; i
--) {
2166 if (v6mask
->s6_addr32
[i
] == 0) {
2170 bits
= ffs(ntohl(v6mask
->s6_addr32
[i
])) - 1;
2180 flow_ip_insert_fe(flow_tab_t
*ft
, flow_entry_t
**headp
,
2181 flow_entry_t
*flent
)
2183 flow_entry_t
**p
= headp
;
2184 flow_desc_t
*fd0
, *fd
;
2188 ASSERT(MAC_PERIM_HELD((mac_handle_t
)ft
->ft_mip
));
2191 * No special ordering needed for dsfield.
2193 fd0
= &flent
->fe_flow_desc
;
2194 if ((fd0
->fd_mask
& FLOW_IP_DSFIELD
) != 0) {
2196 ASSERT(flent
->fe_next
== NULL
);
2197 flent
->fe_next
= *p
;
2204 * IP address flows are arranged in descending prefix length order.
2206 m0
= ((fd0
->fd_mask
& FLOW_IP_LOCAL
) != 0) ?
2207 &fd0
->fd_local_netmask
: &fd0
->fd_remote_netmask
;
2208 plen0
= flow_ip_mask2plen(m0
);
2211 for (; *p
!= NULL
; p
= &(*p
)->fe_next
) {
2212 fd
= &(*p
)->fe_flow_desc
;
2215 * Normally a dsfield flent shouldn't end up on the same
2216 * list as an IP address because flow tables are (for now)
2217 * disjoint. If we decide to support both IP and dsfield
2218 * in the same table in the future, this check will allow
2221 if ((fd
->fd_mask
& FLOW_IP_DSFIELD
) != 0)
2225 * We also allow for the mixing of local and remote address
2226 * flents within one list.
2228 m
= ((fd
->fd_mask
& FLOW_IP_LOCAL
) != 0) ?
2229 &fd
->fd_local_netmask
: &fd
->fd_remote_netmask
;
2230 plen
= flow_ip_mask2plen(m
);
2236 ASSERT(flent
->fe_next
== NULL
);
2237 flent
->fe_next
= *p
;
2244 * Transport layer protocol and port matching functions.
2249 flow_transport_lport_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
2251 flow_l3info_t
*l3info
= &s
->fs_l3info
;
2252 flow_l4info_t
*l4info
= &s
->fs_l4info
;
2253 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2255 return (fd
->fd_protocol
== l3info
->l3_protocol
&&
2256 fd
->fd_local_port
== l4info
->l4_hash_port
);
2261 flow_transport_rport_match(flow_tab_t
*ft
, flow_entry_t
*flent
, flow_state_t
*s
)
2263 flow_l3info_t
*l3info
= &s
->fs_l3info
;
2264 flow_l4info_t
*l4info
= &s
->fs_l4info
;
2265 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2267 return (fd
->fd_protocol
== l3info
->l3_protocol
&&
2268 fd
->fd_remote_port
== l4info
->l4_hash_port
);
2272 * Transport hash function.
2273 * Since we only support either local or remote port flows,
2274 * we only need to extract one of the ports to be used for
2278 flow_transport_hash(flow_tab_t
*ft
, flow_state_t
*s
)
2280 flow_l3info_t
*l3info
= &s
->fs_l3info
;
2281 flow_l4info_t
*l4info
= &s
->fs_l4info
;
2282 uint8_t proto
= l3info
->l3_protocol
;
2283 boolean_t dst_or_src
;
2285 if ((ft
->ft_mask
& FLOW_ULP_PORT_LOCAL
) != 0) {
2286 dst_or_src
= ((s
->fs_flags
& FLOW_INBOUND
) != 0);
2288 dst_or_src
= ((s
->fs_flags
& FLOW_OUTBOUND
) != 0);
2291 l4info
->l4_hash_port
= dst_or_src
? l4info
->l4_dst_port
:
2292 l4info
->l4_src_port
;
2294 return ((l4info
->l4_hash_port
^ (proto
<< 4)) % ft
->ft_size
);
2298 * Unlike other accept() functions above, we do not need to get the header
2299 * size because this is our highest layer so far. If we want to do support
2300 * other higher layer protocols, we would need to save the l4_hdrsize
2301 * in the code below.
2306 flow_transport_accept(flow_tab_t
*ft
, flow_state_t
*s
)
2308 flow_l3info_t
*l3info
= &s
->fs_l3info
;
2309 flow_l4info_t
*l4info
= &s
->fs_l4info
;
2310 uint8_t proto
= l3info
->l3_protocol
;
2313 l4_start
= l3info
->l3_start
+ l3info
->l3_hdrsize
;
2316 * Adjust start pointer if we're at the end of an mblk.
2318 CHECK_AND_ADJUST_START_PTR(s
, l4_start
);
2320 l4info
->l4_start
= l4_start
;
2321 if (!OK_32PTR(l4_start
))
2324 if (l3info
->l3_fragmented
== B_TRUE
)
2329 struct tcphdr
*tcph
= (struct tcphdr
*)l4_start
;
2331 if (PKT_TOO_SMALL(s
, l4_start
+ sizeof (*tcph
)))
2334 l4info
->l4_src_port
= tcph
->th_sport
;
2335 l4info
->l4_dst_port
= tcph
->th_dport
;
2339 struct udphdr
*udph
= (struct udphdr
*)l4_start
;
2341 if (PKT_TOO_SMALL(s
, l4_start
+ sizeof (*udph
)))
2344 l4info
->l4_src_port
= udph
->uh_sport
;
2345 l4info
->l4_dst_port
= udph
->uh_dport
;
2348 case IPPROTO_SCTP
: {
2349 sctp_hdr_t
*sctph
= (sctp_hdr_t
*)l4_start
;
2351 if (PKT_TOO_SMALL(s
, l4_start
+ sizeof (*sctph
)))
2354 l4info
->l4_src_port
= sctph
->sh_sport
;
2355 l4info
->l4_dst_port
= sctph
->sh_dport
;
2366 * Validates transport flow entry.
2367 * The protocol field must be present.
2372 flow_transport_accept_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
2374 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2375 flow_mask_t mask
= fd
->fd_mask
;
2377 if ((mask
& FLOW_IP_PROTOCOL
) == 0)
2380 switch (fd
->fd_protocol
) {
2389 switch (mask
& ~FLOW_IP_PROTOCOL
) {
2390 case FLOW_ULP_PORT_LOCAL
:
2391 if (fd
->fd_local_port
== 0)
2394 flent
->fe_match
= flow_transport_lport_match
;
2396 case FLOW_ULP_PORT_REMOTE
:
2397 if (fd
->fd_remote_port
== 0)
2400 flent
->fe_match
= flow_transport_rport_match
;
2404 * transport-only flows conflicts with our table type.
2406 return (EOPNOTSUPP
);
2415 flow_transport_hash_fe(flow_tab_t
*ft
, flow_entry_t
*flent
)
2417 flow_desc_t
*fd
= &flent
->fe_flow_desc
;
2420 port
= ((fd
->fd_mask
& FLOW_ULP_PORT_LOCAL
) != 0) ?
2421 fd
->fd_local_port
: fd
->fd_remote_port
;
2423 return ((port
^ (fd
->fd_protocol
<< 4)) % ft
->ft_size
);
2428 flow_transport_match_fe(flow_tab_t
*ft
, flow_entry_t
*f1
, flow_entry_t
*f2
)
2430 flow_desc_t
*fd1
= &f1
->fe_flow_desc
, *fd2
= &f2
->fe_flow_desc
;
2432 if (fd1
->fd_protocol
!= fd2
->fd_protocol
)
2435 if ((fd1
->fd_mask
& FLOW_ULP_PORT_LOCAL
) != 0)
2436 return (fd1
->fd_local_port
== fd2
->fd_local_port
);
2438 if ((fd1
->fd_mask
& FLOW_ULP_PORT_REMOTE
) != 0)
2439 return (fd1
->fd_remote_port
== fd2
->fd_remote_port
);
2444 static flow_ops_t flow_l2_ops
= {
2448 flow_generic_insert_fe
,
2453 static flow_ops_t flow_ip_ops
= {
2459 {flow_l2_accept
, flow_ip_accept
}
2462 static flow_ops_t flow_ip_proto_ops
= {
2463 flow_ip_proto_accept_fe
,
2464 flow_ip_proto_hash_fe
,
2465 flow_ip_proto_match_fe
,
2466 flow_generic_insert_fe
,
2468 {flow_l2_accept
, flow_ip_accept
}
2471 static flow_ops_t flow_transport_ops
= {
2472 flow_transport_accept_fe
,
2473 flow_transport_hash_fe
,
2474 flow_transport_match_fe
,
2475 flow_generic_insert_fe
,
2476 flow_transport_hash
,
2477 {flow_l2_accept
, flow_ip_accept
, flow_transport_accept
}
2480 static flow_tab_info_t flow_tab_info_list
[] = {
2481 {&flow_ip_ops
, FLOW_IP_VERSION
| FLOW_IP_LOCAL
, 2},
2482 {&flow_ip_ops
, FLOW_IP_VERSION
| FLOW_IP_REMOTE
, 2},
2483 {&flow_ip_ops
, FLOW_IP_DSFIELD
, 1},
2484 {&flow_ip_proto_ops
, FLOW_IP_PROTOCOL
, 256},
2485 {&flow_transport_ops
, FLOW_IP_PROTOCOL
| FLOW_ULP_PORT_LOCAL
, 1024},
2486 {&flow_transport_ops
, FLOW_IP_PROTOCOL
| FLOW_ULP_PORT_REMOTE
, 1024}
2489 #define FLOW_MAX_TAB_INFO \
2490 ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
2492 static flow_tab_info_t
*
2493 mac_flow_tab_info_get(flow_mask_t mask
)
2497 for (i
= 0; i
< FLOW_MAX_TAB_INFO
; i
++) {
2498 if (mask
== flow_tab_info_list
[i
].fti_mask
)
2499 return (&flow_tab_info_list
[i
]);