4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 #include <sys/systm.h>
29 #include <sys/socket.h>
30 #include <sys/strsubr.h>
31 #include <sys/strsun.h>
32 #include <netinet/in.h>
33 #include <ipp/ipgpc/classifier.h>
37 #include <inet/ipp_common.h>
39 /* Implementation file for classifier used in ipgpc module */
42 * CHECK_MATCH_STATUS(match_status, slctrs_srchd, selector_mask)
44 * determines what the result of the selector search and what action needs to
46 * if a NORMAL_MATCH occurs, business as usual NORMAL_MATCH
47 * if the selector was not searched because only DONTCARE keys are loaded,
48 * the selector is marked as not being searched
49 * otherwise, memory error occurred or no matches were found, classify()
50 * should return the error match status immediately
52 #define CHECK_MATCH_STATUS(match_status, slctrs_srchd, selector_mask) \
53 (((match_status) == NORMAL_MATCH) ? \
55 (((match_status) == DONTCARE_ONLY_MATCH) ? \
56 (*(slctrs_srchd) ^= (selector_mask), NORMAL_MATCH) : \
59 /* used to determine if an action instance already exists */
60 boolean_t ipgpc_action_exist
= B_FALSE
;
61 int ipgpc_debug
= 0; /* IPGPC debugging level */
64 static int common_classify(ipgpc_packet_t
*, ht_match_t
*, uint16_t *);
65 static void update_stats(int, uint_t
);
66 static int bestmatch(ht_match_t
*, uint16_t);
67 static void get_port_info(ipgpc_packet_t
*, void *, int, mblk_t
*);
70 * common_classify(packet, fid_table, slctrs_srchd)
72 * searches each of the common selectors
73 * - will return NORMAL_MATCH on success. NO_MATCHES on error
76 common_classify(ipgpc_packet_t
*packet
, ht_match_t
*fid_table
,
77 uint16_t *slctrs_srchd
)
81 /* Find on packet direction */
83 ipgpc_findfilters(IPGPC_TABLE_DIR
, packet
->direction
, fid_table
);
84 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
85 ipgpc_table_list
[DIR_IDX
].info
.mask
) != NORMAL_MATCH
) {
86 return (match_status
);
89 /* Find on IF_INDEX of packet */
91 ipgpc_findfilters(IPGPC_TABLE_IF
, packet
->if_index
, fid_table
);
92 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
93 ipgpc_table_list
[IF_IDX
].info
.mask
) != NORMAL_MATCH
) {
94 return (match_status
);
97 /* Find on DS field */
99 ipgpc_findfilters(IPGPC_BA_DSID
, packet
->dsfield
, fid_table
);
100 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
101 ipgpc_ds_table_id
.info
.mask
) != NORMAL_MATCH
) {
102 return (match_status
);
105 /* Find on UID of packet */
107 ipgpc_findfilters(IPGPC_TABLE_UID
, packet
->uid
, fid_table
);
108 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
109 ipgpc_table_list
[UID_IDX
].info
.mask
) != NORMAL_MATCH
) {
110 return (match_status
);
113 /* Find on PROJID of packet */
115 ipgpc_findfilters(IPGPC_TABLE_PROJID
, packet
->projid
, fid_table
);
116 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
117 ipgpc_table_list
[PROJID_IDX
].info
.mask
) != NORMAL_MATCH
) {
118 return (match_status
);
121 /* Find on IP Protocol field */
122 if (packet
->proto
> 0) {
123 match_status
= ipgpc_findfilters(IPGPC_TABLE_PROTOID
,
124 packet
->proto
, fid_table
);
125 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
126 ipgpc_table_list
[PROTOID_IDX
].info
.mask
)
128 return (match_status
);
132 *slctrs_srchd
^= ipgpc_table_list
[PROTOID_IDX
].info
.mask
;
135 /* Find on IP Source Port field */
136 if (packet
->sport
> 0) {
137 match_status
= ipgpc_findfilters(IPGPC_TRIE_SPORTID
,
138 packet
->sport
, fid_table
);
139 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
140 ipgpc_trie_list
[IPGPC_TRIE_SPORTID
].info
.mask
)
142 return (match_status
);
146 *slctrs_srchd
^= ipgpc_trie_list
[IPGPC_TRIE_SPORTID
].info
.mask
;
149 /* Find on IP Destination Port field */
150 if (packet
->dport
> 0) {
151 match_status
= ipgpc_findfilters(IPGPC_TRIE_DPORTID
,
152 packet
->dport
, fid_table
);
153 if (CHECK_MATCH_STATUS(match_status
, slctrs_srchd
,
154 ipgpc_trie_list
[IPGPC_TRIE_DPORTID
].info
.mask
)
156 return (match_status
);
160 *slctrs_srchd
^= ipgpc_trie_list
[IPGPC_TRIE_DPORTID
].info
.mask
;
162 return (NORMAL_MATCH
);
166 * update_stats(class_id, nbytes)
168 * if ipgpc_gather_stats == TRUE
169 * updates the statistics for class pointed to be the input classid
170 * and the global ipgpc kstats
171 * updates the last time the class was matched with the current hrtime value,
172 * number of packets and number of bytes with nbytes
175 update_stats(int class_id
, uint_t nbytes
)
177 if (ipgpc_gather_stats
) {
178 /* update global stats */
179 BUMP_STATS(ipgpc_npackets
);
180 UPDATE_STATS(ipgpc_nbytes
, nbytes
);
181 if (ipgpc_cid_list
[class_id
].aclass
.gather_stats
) {
182 /* update per class stats */
183 SET_STATS(ipgpc_cid_list
[class_id
].stats
.last_match
,
185 BUMP_STATS(ipgpc_cid_list
[class_id
].stats
.npackets
);
186 UPDATE_STATS(ipgpc_cid_list
[class_id
].stats
.nbytes
,
193 * FREE_FID_TABLE(fid_table, p, q, i)
195 * searches fid_table for dynamically allocated memory and frees it
198 #define FREE_FID_TABLE(fid_table, p, q, i) \
199 /* free all allocated memory in fid_table */ \
200 for (i = 0; i < HASH_SIZE; ++i) { \
201 if (fid_table[i].next != NULL) { \
202 p = fid_table[i].next; \
203 while (p != NULL) { \
206 kmem_cache_free(ht_match_cache, q); \
213 * ipgpc_classify(af, packet)
215 * The function that drives the packet classification algorithm. Given a
216 * address family (either AF_INET or AF_INET6) the input packet structure
217 * is matched against all the selector structures. For each search of
218 * a selector structure, all matched filters are collected. Once all
219 * selectors are searched, the best match of all matched filters is
220 * determined. Finally, the class associated with the best matching filter
221 * is returned. If no filters were matched, the default class is returned.
222 * If a memory error occurred, NULL is returned.
225 ipgpc_classify(int af
, ipgpc_packet_t
*packet
)
228 uint16_t slctrs_srchd
;
230 ht_match_t fid_table
[HASH_SIZE
];
235 if (ipgpc_num_fltrs
== 0) {
236 /* zero filters are loaded, return default class */
237 update_stats(ipgpc_def_class_id
, packet
->len
);
239 * no need to free fid_table. Since zero selectors were
240 * searched and dynamic memory wasn't allocated.
242 return (&ipgpc_cid_list
[ipgpc_def_class_id
].aclass
);
246 slctrs_srchd
= ALL_MATCH_MASK
;
247 bzero(fid_table
, sizeof (ht_match_t
) * HASH_SIZE
);
249 /* first search all address family independent selectors */
250 rc
= common_classify(packet
, fid_table
, &slctrs_srchd
);
251 if (rc
!= NORMAL_MATCH
) {
252 /* free all dynamic allocated memory */
253 FREE_FID_TABLE(fid_table
, p
, q
, i
);
254 if (rc
== NO_MATCHES
) {
255 update_stats(ipgpc_def_class_id
, packet
->len
);
256 return (&ipgpc_cid_list
[ipgpc_def_class_id
].aclass
);
257 } else { /* memory error */
262 switch (af
) { /* switch off of address family */
264 /* Find on IPv4 Source Address field */
265 match_status
= ipgpc_findfilters(IPGPC_TRIE_SADDRID
,
266 V4_PART_OF_V6(packet
->saddr
), fid_table
);
267 if (CHECK_MATCH_STATUS(match_status
, &slctrs_srchd
,
268 ipgpc_trie_list
[IPGPC_TRIE_SADDRID
].info
.mask
)
270 /* free all dynamic allocated memory */
271 FREE_FID_TABLE(fid_table
, p
, q
, i
);
272 if (match_status
== NO_MATCHES
) {
273 update_stats(ipgpc_def_class_id
, packet
->len
);
274 return (&ipgpc_cid_list
[ipgpc_def_class_id
].
276 } else { /* memory error */
280 /* Find on IPv4 Destination Address field */
281 match_status
= ipgpc_findfilters(IPGPC_TRIE_DADDRID
,
282 V4_PART_OF_V6(packet
->daddr
), fid_table
);
283 if (CHECK_MATCH_STATUS(match_status
, &slctrs_srchd
,
284 ipgpc_trie_list
[IPGPC_TRIE_DADDRID
].info
.mask
)
286 /* free all dynamic allocated memory */
287 FREE_FID_TABLE(fid_table
, p
, q
, i
);
288 if (match_status
== NO_MATCHES
) {
289 update_stats(ipgpc_def_class_id
, packet
->len
);
290 return (&ipgpc_cid_list
[ipgpc_def_class_id
].
292 } else { /* memory error */
298 /* Find on IPv6 Source Address field */
299 match_status
= ipgpc_findfilters6(IPGPC_TRIE_SADDRID6
,
300 packet
->saddr
, fid_table
);
301 if (CHECK_MATCH_STATUS(match_status
, &slctrs_srchd
,
302 ipgpc_trie_list
[IPGPC_TRIE_SADDRID6
].info
.mask
)
304 /* free all dynamic allocated memory */
305 FREE_FID_TABLE(fid_table
, p
, q
, i
);
306 if (match_status
== NO_MATCHES
) {
307 update_stats(ipgpc_def_class_id
, packet
->len
);
308 return (&ipgpc_cid_list
[ipgpc_def_class_id
].
310 } else { /* memory error */
314 /* Find on IPv6 Destination Address field */
315 match_status
= ipgpc_findfilters6(IPGPC_TRIE_DADDRID6
,
316 packet
->daddr
, fid_table
);
317 if (CHECK_MATCH_STATUS(match_status
, &slctrs_srchd
,
318 ipgpc_trie_list
[IPGPC_TRIE_DADDRID6
].info
.mask
)
320 /* free all dynamic allocated memory */
321 FREE_FID_TABLE(fid_table
, p
, q
, i
);
322 if (match_status
== NO_MATCHES
) {
323 update_stats(ipgpc_def_class_id
, packet
->len
);
324 return (&ipgpc_cid_list
[ipgpc_def_class_id
].
332 ipgpc0dbg(("ipgpc_classify(): Unknown Address Family"));
333 /* free all dynamic allocated memory */
334 FREE_FID_TABLE(fid_table
, p
, q
, i
);
338 /* zero selectors were searched, return default */
339 if (slctrs_srchd
== 0) {
341 * no need to free fid_table. Since zero selectors were
342 * searched and dynamic memory wasn't allocated
344 update_stats(ipgpc_def_class_id
, packet
->len
);
345 return (&ipgpc_cid_list
[ipgpc_def_class_id
].aclass
);
348 /* Perform best match search */
349 class_id
= bestmatch(fid_table
, slctrs_srchd
);
350 /* free all dynamic allocated memory */
351 FREE_FID_TABLE(fid_table
, p
, q
, i
);
353 update_stats(class_id
, packet
->len
);
354 return (&ipgpc_cid_list
[class_id
].aclass
);
358 * bestmatch(fid_table, bestmask)
360 * determines the bestmatching filter in fid_table which matches the criteria
361 * described below and returns the class id
364 bestmatch(ht_match_t
*fid_table
, uint16_t bestmask
)
375 for (i
= 0; i
< HASH_SIZE
; ++i
) {
376 if (fid_table
[i
].key
== 0) {
379 for (item
= &fid_table
[i
]; item
!= NULL
; item
= item
->next
) {
382 * 1. Matches in all selectors searched
383 * 2. highest priority of filters that meet 1.
384 * 3. best precedence of filters that meet 2
385 * with the same priority
387 if ((key
= item
->key
) == 0) {
390 if (ipgpc_fid_list
[key
].info
<= 0) {
395 * check to see if fid has been inserted into a
396 * selector structure we did not search
397 * if so, then this filter is not a valid match
398 * and bestmatch() should continue
399 * this statement will == 0
400 * - a selector has been searched and this filter
401 * either describes don't care or has inserted a
402 * value into this selector structure
403 * - a selector has not been searched and this filter
404 * has described don't care for this selector
406 if (((~bestmask
) & ipgpc_fid_list
[key
].insert_map
)
412 * tests to see if the map of selectors that
413 * were matched, equals the map of selectors
414 * structures this filter inserts into
416 if (item
->match_map
!= ipgpc_fid_list
[key
].insert_map
) {
420 if (bestmatch
== -1) { /* first matching filter */
421 /* this filter becomes the bestmatch */
423 ipgpc_fid_list
[key
].filter
.priority
;
425 ipgpc_fid_list
[key
].filter
.precedence
;
426 best_prio
= ((uint64_t)temp_prio
<< 32) |
427 (uint64_t)~temp_prec
;
433 * calculate the real priority by combining priority
437 ((uint64_t)ipgpc_fid_list
[key
].filter
.priority
439 (uint64_t)~ipgpc_fid_list
[key
].filter
.precedence
;
441 /* check to see if this is the new bestmatch */
442 if (real_prio
> best_prio
) {
444 ipgpc3dbg(("bestmatch: filter %s " \
445 "REJECTED because of better priority %d" \
446 " and/or precedence %d",
447 ipgpc_fid_list
[oldbm
].filter
.filter_name
,
448 ipgpc_fid_list
[oldbm
].filter
.priority
,
449 ipgpc_fid_list
[oldbm
].filter
.precedence
));
450 best_prio
= real_prio
;
453 ipgpc3dbg(("bestmatch: filter %s " \
454 "REJECTED because of beter priority %d" \
455 " and/or precedence %d",
456 ipgpc_fid_list
[key
].filter
.filter_name
,
457 ipgpc_fid_list
[key
].filter
.priority
,
458 ipgpc_fid_list
[key
].filter
.precedence
));
462 if (bestmatch
== -1) { /* no best matches were found */
463 ipgpc3dbg(("bestmatch: No filters ACCEPTED"));
464 return (ipgpc_def_class_id
);
466 ipgpc3dbg(("bestmatch: filter %s ACCEPTED with priority %d " \
468 ipgpc_fid_list
[bestmatch
].filter
.filter_name
,
469 ipgpc_fid_list
[bestmatch
].filter
.priority
,
470 ipgpc_fid_list
[bestmatch
].filter
.precedence
));
471 return (ipgpc_fid_list
[bestmatch
].class_id
);
476 * get_port_info(packet, iph, af, mp)
478 * Gets the source and destination ports from the ULP header, if present.
479 * If this is a fragment, don't try to get the port information even if this
480 * is the first fragment. The reason being we won't have this information
481 * in subsequent fragments and may end up classifying the first fragment
482 * differently than others. This is not desired.
483 * For IPv6 packets, step through the extension headers, if present, in
484 * order to get to the ULP header.
487 get_port_info(ipgpc_packet_t
*packet
, void *iph
, int af
, mblk_t
*mp
)
494 ipha_t
*ipha
= (ipha_t
*)iph
;
496 u2
= ntohs(ipha
->ipha_fragment_offset_and_flags
);
497 u1
= u2
& (IPH_MF
| IPH_OFFSET
);
501 iplen
= (ipha
->ipha_version_and_hdr_length
& 0xF) << 2;
502 up
= (uint16_t *)(mp
->b_rptr
+ iplen
);
503 packet
->sport
= (uint16_t)*up
++;
504 packet
->dport
= (uint16_t)*up
;
505 } else { /* AF_INET6 */
506 uint_t length
= IPV6_HDR_LEN
;
507 ip6_t
*ip6h
= (ip6_t
*)iph
;
509 uint8_t *nexthdrp
, *whereptr
, *endptr
;
514 whereptr
= ((uint8_t *)&ip6h
[1]);
516 nexthdrp
= &ip6h
->ip6_nxt
;
517 while (whereptr
< endptr
) {
519 case IPPROTO_HOPOPTS
:
520 hbhhdr
= (ip6_hbh_t
*)whereptr
;
521 ehdrlen
= 8 * (hbhhdr
->ip6h_len
+ 1);
522 if ((uchar_t
*)hbhhdr
+ ehdrlen
> endptr
)
524 nexthdrp
= &hbhhdr
->ip6h_nxt
;
526 case IPPROTO_DSTOPTS
:
527 desthdr
= (ip6_dest_t
*)whereptr
;
528 ehdrlen
= 8 * (desthdr
->ip6d_len
+ 1);
529 if ((uchar_t
*)desthdr
+ ehdrlen
> endptr
)
531 nexthdrp
= &desthdr
->ip6d_nxt
;
533 case IPPROTO_ROUTING
:
534 rthdr
= (ip6_rthdr_t
*)whereptr
;
535 ehdrlen
= 8 * (rthdr
->ip6r_len
+ 1);
536 if ((uchar_t
*)rthdr
+ ehdrlen
> endptr
)
538 nexthdrp
= &rthdr
->ip6r_nxt
;
540 case IPPROTO_FRAGMENT
:
546 * Verify we have at least ICMP_MIN_TP_HDR_LEN
547 * bytes of the ULP's header to get the port
550 if (((uchar_t
*)ip6h
+ length
+
551 ICMP_MIN_TP_HDR_LEN
) > endptr
) {
554 /* Get the protocol and the ports */
555 packet
->proto
= *nexthdrp
;
556 up
= (uint16_t *)((uchar_t
*)ip6h
+ length
);
557 packet
->sport
= (uint16_t)*up
++;
558 packet
->dport
= (uint16_t)*up
;
565 packet
->proto
= *nexthdrp
;
578 * find_ids(packet, mp)
580 * attempt to discern the uid and projid of the originator of a packet by
581 * looking at the dblks making up the packet - yeuch!
583 * We do it by skipping any fragments with a credp of NULL (originated in
584 * kernel), taking the first value that isn't NULL to be the credp for the
585 * whole packet. We also suck the projid from the same fragment.
588 find_ids(ipgpc_packet_t
*packet
, mblk_t
*mp
)
592 cr
= msg_getcred(mp
, NULL
);
594 packet
->uid
= crgetuid(cr
);
595 packet
->projid
= crgetprojid(cr
);
597 packet
->uid
= (uid_t
)-1;
603 * parse_packet(packet, mp)
605 * parses the given message block into a ipgpc_packet_t structure
608 parse_packet(ipgpc_packet_t
*packet
, mblk_t
*mp
)
612 /* parse message block for IP header and ports */
613 ipha
= (ipha_t
*)mp
->b_rptr
; /* get ip header */
614 V4_PART_OF_V6(packet
->saddr
) = (int32_t)ipha
->ipha_src
;
615 V4_PART_OF_V6(packet
->daddr
) = (int32_t)ipha
->ipha_dst
;
616 packet
->dsfield
= ipha
->ipha_type_of_service
;
617 packet
->proto
= ipha
->ipha_protocol
;
620 find_ids(packet
, mp
);
621 packet
->len
= msgdsize(mp
);
622 /* parse out TCP/UDP ports, if appropriate */
623 if ((packet
->proto
== IPPROTO_TCP
) || (packet
->proto
== IPPROTO_UDP
) ||
624 (packet
->proto
== IPPROTO_SCTP
)) {
625 get_port_info(packet
, ipha
, AF_INET
, mp
);
630 * parse_packet6(packet, mp)
632 * parses the message block into a ipgpc_packet_t structure for IPv6 traffic
635 parse_packet6(ipgpc_packet_t
*packet
, mblk_t
*mp
)
637 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
639 /* parse message block for IP header and ports */
640 bcopy(ip6h
->ip6_src
.s6_addr32
, packet
->saddr
.s6_addr32
,
641 sizeof (ip6h
->ip6_src
.s6_addr32
));
642 bcopy(ip6h
->ip6_dst
.s6_addr32
, packet
->daddr
.s6_addr32
,
643 sizeof (ip6h
->ip6_dst
.s6_addr32
));
644 /* Will be (re-)assigned in get_port_info */
645 packet
->proto
= ip6h
->ip6_nxt
;
646 packet
->dsfield
= __IPV6_TCLASS_FROM_FLOW(ip6h
->ip6_vcf
);
647 find_ids(packet
, mp
);
648 packet
->len
= msgdsize(mp
);
651 /* Need to pullup everything. */
652 if (mp
->b_cont
!= NULL
) {
653 if (!pullupmsg(mp
, -1)) {
654 ipgpc0dbg(("parse_packet6(): pullup error, can't " \
658 ip6h
= (ip6_t
*)mp
->b_rptr
;
660 get_port_info(packet
, ip6h
, AF_INET6
, mp
);
665 * print_packet(af, packet)
667 * prints the contents of the packet structure for specified address family
670 print_packet(int af
, ipgpc_packet_t
*pkt
)
672 char saddrbuf
[INET6_ADDRSTRLEN
];
673 char daddrbuf
[INET6_ADDRSTRLEN
];
676 (void) inet_ntop(af
, &V4_PART_OF_V6(pkt
->saddr
), saddrbuf
,
678 (void) inet_ntop(af
, &V4_PART_OF_V6(pkt
->daddr
), daddrbuf
,
681 ipgpc4dbg(("print_packet: saddr = %s, daddr = %s, sport = %u" \
682 ", dport = %u, proto = %u, dsfield = %x, uid = %d," \
683 " if_index = %d, projid = %d, direction = %d", saddrbuf
,
684 daddrbuf
, ntohs(pkt
->sport
), ntohs(pkt
->dport
), pkt
->proto
,
685 pkt
->dsfield
, pkt
->uid
, pkt
->if_index
,
686 pkt
->projid
, pkt
->direction
));
687 } else if (af
== AF_INET6
) {
688 (void) inet_ntop(af
, pkt
->saddr
.s6_addr32
, saddrbuf
,
690 (void) inet_ntop(af
, pkt
->daddr
.s6_addr32
, daddrbuf
,
693 ipgpc4dbg(("print_packet: saddr = %s, daddr = %s, sport = %u" \
694 ", dport = %u, proto = %u, dsfield = %x, uid = %d," \
695 " if_index = %d, projid = %d, direction = %d", saddrbuf
,
696 daddrbuf
, ntohs(pkt
->sport
), ntohs(pkt
->dport
), pkt
->proto
,
697 pkt
->dsfield
, pkt
->uid
, pkt
->if_index
,
698 pkt
->projid
, pkt
->direction
));
701 #endif /* IPGPC_DEBUG */