1 #include <linux/kernel.h>
2 #include <linux/skbuff.h>
3 #include <linux/export.h>
5 #include <linux/ipv6.h>
6 #include <linux/if_vlan.h>
9 #include <linux/igmp.h>
10 #include <linux/icmp.h>
11 #include <linux/sctp.h>
12 #include <linux/dccp.h>
13 #include <linux/if_tunnel.h>
14 #include <linux/if_pppox.h>
15 #include <linux/ppp_defs.h>
16 #include <linux/stddef.h>
17 #include <linux/if_ether.h>
18 #include <linux/mpls.h>
19 #include <net/flow_dissector.h>
20 #include <scsi/fc/fc_fcoe.h>
22 static bool skb_flow_dissector_uses_key(struct flow_dissector
*flow_dissector
,
23 enum flow_dissector_key_id key_id
)
25 return flow_dissector
->used_keys
& (1 << key_id
);
28 static void skb_flow_dissector_set_key(struct flow_dissector
*flow_dissector
,
29 enum flow_dissector_key_id key_id
)
31 flow_dissector
->used_keys
|= (1 << key_id
);
34 static void *skb_flow_dissector_target(struct flow_dissector
*flow_dissector
,
35 enum flow_dissector_key_id key_id
,
36 void *target_container
)
38 return ((char *) target_container
) + flow_dissector
->offset
[key_id
];
41 void skb_flow_dissector_init(struct flow_dissector
*flow_dissector
,
42 const struct flow_dissector_key
*key
,
43 unsigned int key_count
)
47 memset(flow_dissector
, 0, sizeof(*flow_dissector
));
49 for (i
= 0; i
< key_count
; i
++, key
++) {
50 /* User should make sure that every key target offset is withing
51 * boundaries of unsigned short.
53 BUG_ON(key
->offset
> USHRT_MAX
);
54 BUG_ON(skb_flow_dissector_uses_key(flow_dissector
,
57 skb_flow_dissector_set_key(flow_dissector
, key
->key_id
);
58 flow_dissector
->offset
[key
->key_id
] = key
->offset
;
61 /* Ensure that the dissector always includes control and basic key.
62 * That way we are able to avoid handling lack of these in fast path.
64 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector
,
65 FLOW_DISSECTOR_KEY_CONTROL
));
66 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector
,
67 FLOW_DISSECTOR_KEY_BASIC
));
69 EXPORT_SYMBOL(skb_flow_dissector_init
);
72 * __skb_flow_get_ports - extract the upper layer ports and return them
73 * @skb: sk_buff to extract the ports from
74 * @thoff: transport header offset
75 * @ip_proto: protocol for which to get port offset
76 * @data: raw buffer pointer to the packet, if NULL use skb->data
77 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
79 * The function will try to retrieve the ports at offset thoff + poff where poff
80 * is the protocol port offset returned from proto_ports_offset
82 __be32
__skb_flow_get_ports(const struct sk_buff
*skb
, int thoff
, u8 ip_proto
,
85 int poff
= proto_ports_offset(ip_proto
);
89 hlen
= skb_headlen(skb
);
93 __be32
*ports
, _ports
;
95 ports
= __skb_header_pointer(skb
, thoff
+ poff
,
96 sizeof(_ports
), data
, hlen
, &_ports
);
103 EXPORT_SYMBOL(__skb_flow_get_ports
);
106 * __skb_flow_dissect - extract the flow_keys struct and return it
107 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
108 * @flow_dissector: list of keys to dissect
109 * @target_container: target structure to put dissected values into
110 * @data: raw buffer pointer to the packet, if NULL use skb->data
111 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
112 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
113 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
115 * The function will try to retrieve individual keys into target specified
116 * by flow_dissector from either the skbuff or a raw buffer specified by the
119 * Caller must take care of zeroing target container memory.
121 bool __skb_flow_dissect(const struct sk_buff
*skb
,
122 struct flow_dissector
*flow_dissector
,
123 void *target_container
,
124 void *data
, __be16 proto
, int nhoff
, int hlen
)
126 struct flow_dissector_key_control
*key_control
;
127 struct flow_dissector_key_basic
*key_basic
;
128 struct flow_dissector_key_addrs
*key_addrs
;
129 struct flow_dissector_key_ports
*key_ports
;
130 struct flow_dissector_key_tags
*key_tags
;
131 struct flow_dissector_key_keyid
*key_keyid
;
136 proto
= skb
->protocol
;
137 nhoff
= skb_network_offset(skb
);
138 hlen
= skb_headlen(skb
);
141 /* It is ensured by skb_flow_dissector_init() that control key will
144 key_control
= skb_flow_dissector_target(flow_dissector
,
145 FLOW_DISSECTOR_KEY_CONTROL
,
148 /* It is ensured by skb_flow_dissector_init() that basic key will
151 key_basic
= skb_flow_dissector_target(flow_dissector
,
152 FLOW_DISSECTOR_KEY_BASIC
,
155 if (skb_flow_dissector_uses_key(flow_dissector
,
156 FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
157 struct ethhdr
*eth
= eth_hdr(skb
);
158 struct flow_dissector_key_eth_addrs
*key_eth_addrs
;
160 key_eth_addrs
= skb_flow_dissector_target(flow_dissector
,
161 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
163 memcpy(key_eth_addrs
, ð
->h_dest
, sizeof(*key_eth_addrs
));
168 case htons(ETH_P_IP
): {
169 const struct iphdr
*iph
;
172 iph
= __skb_header_pointer(skb
, nhoff
, sizeof(_iph
), data
, hlen
, &_iph
);
173 if (!iph
|| iph
->ihl
< 5)
175 nhoff
+= iph
->ihl
* 4;
177 ip_proto
= iph
->protocol
;
178 if (ip_is_fragment(iph
))
181 if (!skb_flow_dissector_uses_key(flow_dissector
,
182 FLOW_DISSECTOR_KEY_IPV4_ADDRS
))
185 key_addrs
= skb_flow_dissector_target(flow_dissector
,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS
, target_container
);
187 memcpy(&key_addrs
->v4addrs
, &iph
->saddr
,
188 sizeof(key_addrs
->v4addrs
));
189 key_control
->addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
192 case htons(ETH_P_IPV6
): {
193 const struct ipv6hdr
*iph
;
198 iph
= __skb_header_pointer(skb
, nhoff
, sizeof(_iph
), data
, hlen
, &_iph
);
202 ip_proto
= iph
->nexthdr
;
203 nhoff
+= sizeof(struct ipv6hdr
);
205 if (skb_flow_dissector_uses_key(flow_dissector
,
206 FLOW_DISSECTOR_KEY_IPV6_ADDRS
)) {
207 struct flow_dissector_key_ipv6_addrs
*key_ipv6_addrs
;
209 key_ipv6_addrs
= skb_flow_dissector_target(flow_dissector
,
210 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
213 memcpy(key_ipv6_addrs
, &iph
->saddr
, sizeof(*key_ipv6_addrs
));
214 key_control
->addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
217 flow_label
= ip6_flowlabel(iph
);
219 if (skb_flow_dissector_uses_key(flow_dissector
,
220 FLOW_DISSECTOR_KEY_FLOW_LABEL
)) {
221 key_tags
= skb_flow_dissector_target(flow_dissector
,
222 FLOW_DISSECTOR_KEY_FLOW_LABEL
,
224 key_tags
->flow_label
= ntohl(flow_label
);
230 case htons(ETH_P_8021AD
):
231 case htons(ETH_P_8021Q
): {
232 const struct vlan_hdr
*vlan
;
233 struct vlan_hdr _vlan
;
235 vlan
= __skb_header_pointer(skb
, nhoff
, sizeof(_vlan
), data
, hlen
, &_vlan
);
239 if (skb_flow_dissector_uses_key(flow_dissector
,
240 FLOW_DISSECTOR_KEY_VLANID
)) {
241 key_tags
= skb_flow_dissector_target(flow_dissector
,
242 FLOW_DISSECTOR_KEY_VLANID
,
245 key_tags
->vlan_id
= skb_vlan_tag_get_id(skb
);
248 proto
= vlan
->h_vlan_encapsulated_proto
;
249 nhoff
+= sizeof(*vlan
);
252 case htons(ETH_P_PPP_SES
): {
254 struct pppoe_hdr hdr
;
257 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
261 nhoff
+= PPPOE_SES_HLEN
;
265 case htons(PPP_IPV6
):
271 case htons(ETH_P_TIPC
): {
276 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
279 key_basic
->n_proto
= proto
;
280 key_control
->thoff
= (u16
)nhoff
;
282 if (skb_flow_dissector_uses_key(flow_dissector
,
283 FLOW_DISSECTOR_KEY_TIPC_ADDRS
)) {
284 key_addrs
= skb_flow_dissector_target(flow_dissector
,
285 FLOW_DISSECTOR_KEY_TIPC_ADDRS
,
287 key_addrs
->tipcaddrs
.srcnode
= hdr
->srcnode
;
288 key_control
->addr_type
= FLOW_DISSECTOR_KEY_TIPC_ADDRS
;
293 case htons(ETH_P_MPLS_UC
):
294 case htons(ETH_P_MPLS_MC
): {
295 struct mpls_label
*hdr
, _hdr
[2];
297 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
,
302 if ((ntohl(hdr
[0].entry
) & MPLS_LS_LABEL_MASK
) >>
303 MPLS_LS_LABEL_SHIFT
== MPLS_LABEL_ENTROPY
) {
304 if (skb_flow_dissector_uses_key(flow_dissector
,
305 FLOW_DISSECTOR_KEY_MPLS_ENTROPY
)) {
306 key_keyid
= skb_flow_dissector_target(flow_dissector
,
307 FLOW_DISSECTOR_KEY_MPLS_ENTROPY
,
309 key_keyid
->keyid
= hdr
[1].entry
&
310 htonl(MPLS_LS_LABEL_MASK
);
313 key_basic
->n_proto
= proto
;
314 key_basic
->ip_proto
= ip_proto
;
315 key_control
->thoff
= (u16
)nhoff
;
323 case htons(ETH_P_FCOE
):
324 key_control
->thoff
= (u16
)(nhoff
+ FCOE_HEADER_LEN
);
338 hdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_hdr
), data
, hlen
, &_hdr
);
342 * Only look inside GRE if version zero and no
345 if (hdr
->flags
& (GRE_VERSION
| GRE_ROUTING
))
350 if (hdr
->flags
& GRE_CSUM
)
352 if (hdr
->flags
& GRE_KEY
) {
356 keyid
= __skb_header_pointer(skb
, nhoff
, sizeof(_keyid
),
357 data
, hlen
, &_keyid
);
362 if (skb_flow_dissector_uses_key(flow_dissector
,
363 FLOW_DISSECTOR_KEY_GRE_KEYID
)) {
364 key_keyid
= skb_flow_dissector_target(flow_dissector
,
365 FLOW_DISSECTOR_KEY_GRE_KEYID
,
367 key_keyid
->keyid
= *keyid
;
371 if (hdr
->flags
& GRE_SEQ
)
373 if (proto
== htons(ETH_P_TEB
)) {
374 const struct ethhdr
*eth
;
377 eth
= __skb_header_pointer(skb
, nhoff
,
382 proto
= eth
->h_proto
;
383 nhoff
+= sizeof(*eth
);
388 case NEXTHDR_ROUTING
:
390 u8 _opthdr
[2], *opthdr
;
392 if (proto
!= htons(ETH_P_IPV6
))
395 opthdr
= __skb_header_pointer(skb
, nhoff
, sizeof(_opthdr
),
396 data
, hlen
, &_opthdr
);
400 ip_proto
= opthdr
[0];
401 nhoff
+= (opthdr
[1] + 1) << 3;
406 proto
= htons(ETH_P_IP
);
409 proto
= htons(ETH_P_IPV6
);
412 proto
= htons(ETH_P_MPLS_UC
);
418 key_basic
->n_proto
= proto
;
419 key_basic
->ip_proto
= ip_proto
;
420 key_control
->thoff
= (u16
)nhoff
;
422 if (skb_flow_dissector_uses_key(flow_dissector
,
423 FLOW_DISSECTOR_KEY_PORTS
)) {
424 key_ports
= skb_flow_dissector_target(flow_dissector
,
425 FLOW_DISSECTOR_KEY_PORTS
,
427 key_ports
->ports
= __skb_flow_get_ports(skb
, nhoff
, ip_proto
,
433 EXPORT_SYMBOL(__skb_flow_dissect
);
435 static u32 hashrnd __read_mostly
;
436 static __always_inline
void __flow_hash_secret_init(void)
438 net_get_random_once(&hashrnd
, sizeof(hashrnd
));
441 static __always_inline u32
__flow_hash_words(u32
*words
, u32 length
, u32 keyval
)
443 return jhash2(words
, length
, keyval
);
446 static inline void *flow_keys_hash_start(struct flow_keys
*flow
)
448 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET
% sizeof(u32
));
449 return (void *)flow
+ FLOW_KEYS_HASH_OFFSET
;
452 static inline size_t flow_keys_hash_length(struct flow_keys
*flow
)
454 size_t diff
= FLOW_KEYS_HASH_OFFSET
+ sizeof(flow
->addrs
);
455 BUILD_BUG_ON((sizeof(*flow
) - FLOW_KEYS_HASH_OFFSET
) % sizeof(u32
));
456 BUILD_BUG_ON(offsetof(typeof(*flow
), addrs
) !=
457 sizeof(*flow
) - sizeof(flow
->addrs
));
459 switch (flow
->control
.addr_type
) {
460 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
461 diff
-= sizeof(flow
->addrs
.v4addrs
);
463 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
464 diff
-= sizeof(flow
->addrs
.v6addrs
);
466 case FLOW_DISSECTOR_KEY_TIPC_ADDRS
:
467 diff
-= sizeof(flow
->addrs
.tipcaddrs
);
470 return (sizeof(*flow
) - diff
) / sizeof(u32
);
473 __be32
flow_get_u32_src(const struct flow_keys
*flow
)
475 switch (flow
->control
.addr_type
) {
476 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
477 return flow
->addrs
.v4addrs
.src
;
478 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
479 return (__force __be32
)ipv6_addr_hash(
480 &flow
->addrs
.v6addrs
.src
);
481 case FLOW_DISSECTOR_KEY_TIPC_ADDRS
:
482 return flow
->addrs
.tipcaddrs
.srcnode
;
487 EXPORT_SYMBOL(flow_get_u32_src
);
489 __be32
flow_get_u32_dst(const struct flow_keys
*flow
)
491 switch (flow
->control
.addr_type
) {
492 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
493 return flow
->addrs
.v4addrs
.dst
;
494 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
495 return (__force __be32
)ipv6_addr_hash(
496 &flow
->addrs
.v6addrs
.dst
);
501 EXPORT_SYMBOL(flow_get_u32_dst
);
503 static inline void __flow_hash_consistentify(struct flow_keys
*keys
)
507 switch (keys
->control
.addr_type
) {
508 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
509 addr_diff
= (__force u32
)keys
->addrs
.v4addrs
.dst
-
510 (__force u32
)keys
->addrs
.v4addrs
.src
;
511 if ((addr_diff
< 0) ||
513 ((__force u16
)keys
->ports
.dst
<
514 (__force u16
)keys
->ports
.src
))) {
515 swap(keys
->addrs
.v4addrs
.src
, keys
->addrs
.v4addrs
.dst
);
516 swap(keys
->ports
.src
, keys
->ports
.dst
);
519 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
520 addr_diff
= memcmp(&keys
->addrs
.v6addrs
.dst
,
521 &keys
->addrs
.v6addrs
.src
,
522 sizeof(keys
->addrs
.v6addrs
.dst
));
523 if ((addr_diff
< 0) ||
525 ((__force u16
)keys
->ports
.dst
<
526 (__force u16
)keys
->ports
.src
))) {
527 for (i
= 0; i
< 4; i
++)
528 swap(keys
->addrs
.v6addrs
.src
.s6_addr32
[i
],
529 keys
->addrs
.v6addrs
.dst
.s6_addr32
[i
]);
530 swap(keys
->ports
.src
, keys
->ports
.dst
);
536 static inline u32
__flow_hash_from_keys(struct flow_keys
*keys
, u32 keyval
)
540 __flow_hash_consistentify(keys
);
542 hash
= __flow_hash_words((u32
*)flow_keys_hash_start(keys
),
543 flow_keys_hash_length(keys
), keyval
);
550 u32
flow_hash_from_keys(struct flow_keys
*keys
)
552 __flow_hash_secret_init();
553 return __flow_hash_from_keys(keys
, hashrnd
);
555 EXPORT_SYMBOL(flow_hash_from_keys
);
557 static inline u32
___skb_get_hash(const struct sk_buff
*skb
,
558 struct flow_keys
*keys
, u32 keyval
)
560 if (!skb_flow_dissect_flow_keys(skb
, keys
))
563 return __flow_hash_from_keys(keys
, keyval
);
566 struct _flow_keys_digest_data
{
575 void make_flow_keys_digest(struct flow_keys_digest
*digest
,
576 const struct flow_keys
*flow
)
578 struct _flow_keys_digest_data
*data
=
579 (struct _flow_keys_digest_data
*)digest
;
581 BUILD_BUG_ON(sizeof(*data
) > sizeof(*digest
));
583 memset(digest
, 0, sizeof(*digest
));
585 data
->n_proto
= flow
->basic
.n_proto
;
586 data
->ip_proto
= flow
->basic
.ip_proto
;
587 data
->ports
= flow
->ports
.ports
;
588 data
->src
= flow
->addrs
.v4addrs
.src
;
589 data
->dst
= flow
->addrs
.v4addrs
.dst
;
591 EXPORT_SYMBOL(make_flow_keys_digest
);
594 * __skb_get_hash: calculate a flow hash
595 * @skb: sk_buff to calculate flow hash from
597 * This function calculates a flow hash based on src/dst addresses
598 * and src/dst port numbers. Sets hash in skb to non-zero hash value
599 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
600 * if hash is a canonical 4-tuple hash over transport ports.
602 void __skb_get_hash(struct sk_buff
*skb
)
604 struct flow_keys keys
;
607 __flow_hash_secret_init();
609 hash
= ___skb_get_hash(skb
, &keys
, hashrnd
);
612 if (keys
.ports
.ports
)
617 EXPORT_SYMBOL(__skb_get_hash
);
619 __u32
skb_get_hash_perturb(const struct sk_buff
*skb
, u32 perturb
)
621 struct flow_keys keys
;
623 return ___skb_get_hash(skb
, &keys
, perturb
);
625 EXPORT_SYMBOL(skb_get_hash_perturb
);
627 u32
__skb_get_poff(const struct sk_buff
*skb
, void *data
,
628 const struct flow_keys
*keys
, int hlen
)
630 u32 poff
= keys
->control
.thoff
;
632 switch (keys
->basic
.ip_proto
) {
634 /* access doff as u8 to avoid unaligned access */
638 doff
= __skb_header_pointer(skb
, poff
+ 12, sizeof(_doff
),
643 poff
+= max_t(u32
, sizeof(struct tcphdr
), (*doff
& 0xF0) >> 2);
647 case IPPROTO_UDPLITE
:
648 poff
+= sizeof(struct udphdr
);
650 /* For the rest, we do not really care about header
651 * extensions at this point for now.
654 poff
+= sizeof(struct icmphdr
);
657 poff
+= sizeof(struct icmp6hdr
);
660 poff
+= sizeof(struct igmphdr
);
663 poff
+= sizeof(struct dccp_hdr
);
666 poff
+= sizeof(struct sctphdr
);
674 * skb_get_poff - get the offset to the payload
675 * @skb: sk_buff to get the payload offset from
677 * The function will get the offset to the payload as far as it could
678 * be dissected. The main user is currently BPF, so that we can dynamically
679 * truncate packets without needing to push actual payload to the user
680 * space and can analyze headers only, instead.
682 u32
skb_get_poff(const struct sk_buff
*skb
)
684 struct flow_keys keys
;
686 if (!skb_flow_dissect_flow_keys(skb
, &keys
))
689 return __skb_get_poff(skb
, skb
->data
, &keys
, skb_headlen(skb
));
692 static const struct flow_dissector_key flow_keys_dissector_keys
[] = {
694 .key_id
= FLOW_DISSECTOR_KEY_CONTROL
,
695 .offset
= offsetof(struct flow_keys
, control
),
698 .key_id
= FLOW_DISSECTOR_KEY_BASIC
,
699 .offset
= offsetof(struct flow_keys
, basic
),
702 .key_id
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
703 .offset
= offsetof(struct flow_keys
, addrs
.v4addrs
),
706 .key_id
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
707 .offset
= offsetof(struct flow_keys
, addrs
.v6addrs
),
710 .key_id
= FLOW_DISSECTOR_KEY_TIPC_ADDRS
,
711 .offset
= offsetof(struct flow_keys
, addrs
.tipcaddrs
),
714 .key_id
= FLOW_DISSECTOR_KEY_PORTS
,
715 .offset
= offsetof(struct flow_keys
, ports
),
718 .key_id
= FLOW_DISSECTOR_KEY_VLANID
,
719 .offset
= offsetof(struct flow_keys
, tags
),
722 .key_id
= FLOW_DISSECTOR_KEY_FLOW_LABEL
,
723 .offset
= offsetof(struct flow_keys
, tags
),
726 .key_id
= FLOW_DISSECTOR_KEY_GRE_KEYID
,
727 .offset
= offsetof(struct flow_keys
, keyid
),
731 static const struct flow_dissector_key flow_keys_buf_dissector_keys
[] = {
733 .key_id
= FLOW_DISSECTOR_KEY_CONTROL
,
734 .offset
= offsetof(struct flow_keys
, control
),
737 .key_id
= FLOW_DISSECTOR_KEY_BASIC
,
738 .offset
= offsetof(struct flow_keys
, basic
),
742 struct flow_dissector flow_keys_dissector __read_mostly
;
743 EXPORT_SYMBOL(flow_keys_dissector
);
745 struct flow_dissector flow_keys_buf_dissector __read_mostly
;
747 static int __init
init_default_flow_dissectors(void)
749 skb_flow_dissector_init(&flow_keys_dissector
,
750 flow_keys_dissector_keys
,
751 ARRAY_SIZE(flow_keys_dissector_keys
));
752 skb_flow_dissector_init(&flow_keys_buf_dissector
,
753 flow_keys_buf_dissector_keys
,
754 ARRAY_SIZE(flow_keys_buf_dissector_keys
));
758 late_initcall_sync(init_default_flow_dissectors
);