1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2013 Google Inc.
4 * Author: Willem de Bruijn (willemb@google.com)
6 * A basic test of packet socket fanout behavior.
9 * - create fanout fails as expected with illegal flag combinations
10 * - join fanout fails as expected with diverging types or flags
13 * Open a pair of packet sockets and a pair of INET sockets, send a known
14 * number of packets across the two INET sockets and count the number of
15 * packets enqueued onto the two packet sockets.
17 * The test currently runs for
18 * - PACKET_FANOUT_HASH
19 * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
22 * - PACKET_FANOUT_ROLLOVER
23 * - PACKET_FANOUT_CBPF
24 * - PACKET_FANOUT_EBPF
27 * - functionality: PACKET_FANOUT_FLAG_DEFRAG
30 #define _GNU_SOURCE /* for sched_setaffinity */
32 #include <arpa/inet.h>
35 #include <linux/unistd.h> /* for __NR_bpf */
36 #include <linux/filter.h>
37 #include <linux/bpf.h>
38 #include <linux/if_packet.h>
40 #include <net/ethernet.h>
41 #include <netinet/ip.h>
42 #include <netinet/udp.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
53 #include <sys/types.h>
56 #include "psock_lib.h"
57 #include "../kselftest.h"
59 #define RING_NUM_FRAMES 20
61 static uint32_t cfg_max_num_members
;
63 static void loopback_set_up_down(int state_up
)
65 struct ifreq ifreq
= {};
68 fd
= socket(AF_PACKET
, SOCK_RAW
, 0);
70 perror("socket loopback");
73 strcpy(ifreq
.ifr_name
, "lo");
74 err
= ioctl(fd
, SIOCGIFFLAGS
, &ifreq
);
76 perror("SIOCGIFFLAGS");
79 if (state_up
!= !!(ifreq
.ifr_flags
& IFF_UP
)) {
80 ifreq
.ifr_flags
^= IFF_UP
;
81 err
= ioctl(fd
, SIOCSIFFLAGS
, &ifreq
);
83 perror("SIOCSIFFLAGS");
90 /* Open a socket in a given fanout mode.
91 * @return -1 if mode is bad, a valid socket otherwise */
92 static int sock_fanout_open(uint16_t typeflags
, uint16_t group_id
)
94 struct sockaddr_ll addr
= {0};
95 struct fanout_args args
;
98 fd
= socket(PF_PACKET
, SOCK_RAW
, 0);
100 perror("socket packet");
104 pair_udp_setfilter(fd
);
106 addr
.sll_family
= AF_PACKET
;
107 addr
.sll_protocol
= htons(ETH_P_IP
);
108 addr
.sll_ifindex
= if_nametoindex("lo");
109 if (addr
.sll_ifindex
== 0) {
110 perror("if_nametoindex");
113 if (bind(fd
, (void *) &addr
, sizeof(addr
))) {
114 perror("bind packet");
118 if (cfg_max_num_members
) {
120 args
.type_flags
= typeflags
;
121 args
.max_num_members
= cfg_max_num_members
;
122 err
= setsockopt(fd
, SOL_PACKET
, PACKET_FANOUT
, &args
,
125 val
= (((int) typeflags
) << 16) | group_id
;
126 err
= setsockopt(fd
, SOL_PACKET
, PACKET_FANOUT
, &val
,
131 perror("close packet");
140 static void sock_fanout_set_cbpf(int fd
)
142 struct sock_filter bpf_filter
[] = {
143 BPF_STMT(BPF_LD
| BPF_B
| BPF_ABS
, 80), /* ldb [80] */
144 BPF_STMT(BPF_RET
| BPF_A
, 0), /* ret A */
146 struct sock_fprog bpf_prog
;
148 bpf_prog
.filter
= bpf_filter
;
149 bpf_prog
.len
= ARRAY_SIZE(bpf_filter
);
151 if (setsockopt(fd
, SOL_PACKET
, PACKET_FANOUT_DATA
, &bpf_prog
,
153 perror("fanout data cbpf");
158 static void sock_fanout_getopts(int fd
, uint16_t *typeflags
, uint16_t *group_id
)
161 socklen_t sockopt_len
= sizeof(sockopt
);
163 if (getsockopt(fd
, SOL_PACKET
, PACKET_FANOUT
,
164 &sockopt
, &sockopt_len
)) {
165 perror("failed to getsockopt");
168 *typeflags
= sockopt
>> 16;
169 *group_id
= sockopt
& 0xfffff;
172 static void sock_fanout_set_ebpf(int fd
)
174 static char log_buf
[65536];
176 const int len_off
= __builtin_offsetof(struct __sk_buff
, len
);
177 struct bpf_insn prog
[] = {
178 { BPF_ALU64
| BPF_MOV
| BPF_X
, 6, 1, 0, 0 },
179 { BPF_LDX
| BPF_W
| BPF_MEM
, 0, 6, len_off
, 0 },
180 { BPF_JMP
| BPF_JGE
| BPF_K
, 0, 0, 1, DATA_LEN
},
181 { BPF_JMP
| BPF_JA
| BPF_K
, 0, 0, 4, 0 },
182 { BPF_LD
| BPF_B
| BPF_ABS
, 0, 0, 0, 0x50 },
183 { BPF_JMP
| BPF_JEQ
| BPF_K
, 0, 0, 2, DATA_CHAR
},
184 { BPF_JMP
| BPF_JEQ
| BPF_K
, 0, 0, 1, DATA_CHAR_1
},
185 { BPF_ALU
| BPF_MOV
| BPF_K
, 0, 0, 0, 0 },
186 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
191 memset(&attr
, 0, sizeof(attr
));
192 attr
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
193 attr
.insns
= (unsigned long) prog
;
194 attr
.insn_cnt
= ARRAY_SIZE(prog
);
195 attr
.license
= (unsigned long) "GPL";
196 attr
.log_buf
= (unsigned long) log_buf
;
197 attr
.log_size
= sizeof(log_buf
);
200 pfd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &attr
, sizeof(attr
));
203 fprintf(stderr
, "bpf verifier:\n%s\n", log_buf
);
207 if (setsockopt(fd
, SOL_PACKET
, PACKET_FANOUT_DATA
, &pfd
, sizeof(pfd
))) {
208 perror("fanout data ebpf");
213 perror("close ebpf");
218 static char *sock_fanout_open_ring(int fd
)
220 struct tpacket_req req
= {
221 .tp_block_size
= getpagesize(),
222 .tp_frame_size
= getpagesize(),
223 .tp_block_nr
= RING_NUM_FRAMES
,
224 .tp_frame_nr
= RING_NUM_FRAMES
,
227 int val
= TPACKET_V2
;
229 if (setsockopt(fd
, SOL_PACKET
, PACKET_VERSION
, (void *) &val
,
231 perror("packetsock ring setsockopt version");
234 if (setsockopt(fd
, SOL_PACKET
, PACKET_RX_RING
, (void *) &req
,
236 perror("packetsock ring setsockopt");
240 ring
= mmap(0, req
.tp_block_size
* req
.tp_block_nr
,
241 PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
, 0);
242 if (ring
== MAP_FAILED
) {
243 perror("packetsock ring mmap");
250 static int sock_fanout_read_ring(int fd
, void *ring
)
252 struct tpacket2_hdr
*header
= ring
;
255 while (count
< RING_NUM_FRAMES
&& header
->tp_status
& TP_STATUS_USER
) {
257 header
= ring
+ (count
* getpagesize());
263 static int sock_fanout_read(int fds
[], char *rings
[], const int expect
[])
267 ret
[0] = sock_fanout_read_ring(fds
[0], rings
[0]);
268 ret
[1] = sock_fanout_read_ring(fds
[1], rings
[1]);
270 fprintf(stderr
, "info: count=%d,%d, expect=%d,%d\n",
271 ret
[0], ret
[1], expect
[0], expect
[1]);
273 if ((!(ret
[0] == expect
[0] && ret
[1] == expect
[1])) &&
274 (!(ret
[0] == expect
[1] && ret
[1] == expect
[0]))) {
275 fprintf(stderr
, "warning: incorrect queue lengths\n");
282 /* Test that creating/joining a fanout group fails for unbound socket without
283 * a specified protocol
285 static void test_unbound_fanout(void)
287 int val
, fd0
, fd1
, err
;
289 fprintf(stderr
, "test: unbound fanout\n");
290 fd0
= socket(PF_PACKET
, SOCK_RAW
, 0);
292 perror("socket packet");
295 /* Try to create a new fanout group. Should fail. */
296 val
= (PACKET_FANOUT_HASH
<< 16) | 1;
297 err
= setsockopt(fd0
, SOL_PACKET
, PACKET_FANOUT
, &val
, sizeof(val
));
299 fprintf(stderr
, "ERROR: unbound socket fanout create\n");
302 fd1
= sock_fanout_open(PACKET_FANOUT_HASH
, 1);
304 fprintf(stderr
, "ERROR: failed to open HASH socket\n");
307 /* Try to join an existing fanout group. Should fail. */
308 err
= setsockopt(fd0
, SOL_PACKET
, PACKET_FANOUT
, &val
, sizeof(val
));
310 fprintf(stderr
, "ERROR: unbound socket fanout join\n");
317 /* Test illegal mode + flag combination */
318 static void test_control_single(void)
320 fprintf(stderr
, "test: control single socket\n");
322 if (sock_fanout_open(PACKET_FANOUT_ROLLOVER
|
323 PACKET_FANOUT_FLAG_ROLLOVER
, 0) != -1) {
324 fprintf(stderr
, "ERROR: opened socket with dual rollover\n");
329 /* Test illegal group with different modes or flags */
330 static void test_control_group(int toggle
)
335 fprintf(stderr
, "test: control multiple sockets with link down toggle\n");
337 fprintf(stderr
, "test: control multiple sockets\n");
339 fds
[0] = sock_fanout_open(PACKET_FANOUT_HASH
, 0);
341 fprintf(stderr
, "ERROR: failed to open HASH socket\n");
345 loopback_set_up_down(0);
346 if (sock_fanout_open(PACKET_FANOUT_HASH
|
347 PACKET_FANOUT_FLAG_DEFRAG
, 0) != -1) {
348 fprintf(stderr
, "ERROR: joined group with wrong flag defrag\n");
351 if (sock_fanout_open(PACKET_FANOUT_HASH
|
352 PACKET_FANOUT_FLAG_ROLLOVER
, 0) != -1) {
353 fprintf(stderr
, "ERROR: joined group with wrong flag ro\n");
356 if (sock_fanout_open(PACKET_FANOUT_CPU
, 0) != -1) {
357 fprintf(stderr
, "ERROR: joined group with wrong mode\n");
360 fds
[1] = sock_fanout_open(PACKET_FANOUT_HASH
, 0);
362 fprintf(stderr
, "ERROR: failed to join group\n");
366 loopback_set_up_down(1);
367 if (close(fds
[1]) || close(fds
[0])) {
368 fprintf(stderr
, "ERROR: closing sockets\n");
373 /* Test illegal max_num_members values */
374 static void test_control_group_max_num_members(void)
378 fprintf(stderr
, "test: control multiple sockets, max_num_members\n");
380 /* expected failure on greater than PACKET_FANOUT_MAX */
381 cfg_max_num_members
= (1 << 16) + 1;
382 if (sock_fanout_open(PACKET_FANOUT_HASH
, 0) != -1) {
383 fprintf(stderr
, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
387 cfg_max_num_members
= 256;
388 fds
[0] = sock_fanout_open(PACKET_FANOUT_HASH
, 0);
390 fprintf(stderr
, "ERROR: failed open\n");
394 /* expected failure on joining group with different max_num_members */
395 cfg_max_num_members
= 257;
396 if (sock_fanout_open(PACKET_FANOUT_HASH
, 0) != -1) {
397 fprintf(stderr
, "ERROR: set different max_num_members\n");
401 /* success on joining group with same max_num_members */
402 cfg_max_num_members
= 256;
403 fds
[1] = sock_fanout_open(PACKET_FANOUT_HASH
, 0);
405 fprintf(stderr
, "ERROR: failed to join group\n");
409 /* success on joining group with max_num_members unspecified */
410 cfg_max_num_members
= 0;
411 fds
[2] = sock_fanout_open(PACKET_FANOUT_HASH
, 0);
413 fprintf(stderr
, "ERROR: failed to join group\n");
417 if (close(fds
[2]) || close(fds
[1]) || close(fds
[0])) {
418 fprintf(stderr
, "ERROR: closing sockets\n");
423 /* Test creating a unique fanout group ids */
424 static void test_unique_fanout_group_ids(void)
427 uint16_t typeflags
, first_group_id
, second_group_id
;
429 fprintf(stderr
, "test: unique ids\n");
431 fds
[0] = sock_fanout_open(PACKET_FANOUT_HASH
|
432 PACKET_FANOUT_FLAG_UNIQUEID
, 0);
434 fprintf(stderr
, "ERROR: failed to create a unique id group.\n");
438 sock_fanout_getopts(fds
[0], &typeflags
, &first_group_id
);
439 if (typeflags
!= PACKET_FANOUT_HASH
) {
440 fprintf(stderr
, "ERROR: unexpected typeflags %x\n", typeflags
);
444 if (sock_fanout_open(PACKET_FANOUT_CPU
, first_group_id
) != -1) {
445 fprintf(stderr
, "ERROR: joined group with wrong type.\n");
449 fds
[1] = sock_fanout_open(PACKET_FANOUT_HASH
, first_group_id
);
452 "ERROR: failed to join previously created group.\n");
456 fds
[2] = sock_fanout_open(PACKET_FANOUT_HASH
|
457 PACKET_FANOUT_FLAG_UNIQUEID
, 0);
460 "ERROR: failed to create a second unique id group.\n");
464 sock_fanout_getopts(fds
[2], &typeflags
, &second_group_id
);
465 if (sock_fanout_open(PACKET_FANOUT_HASH
| PACKET_FANOUT_FLAG_UNIQUEID
,
466 second_group_id
) != -1) {
468 "ERROR: specified a group id when requesting unique id\n");
472 if (close(fds
[0]) || close(fds
[1]) || close(fds
[2])) {
473 fprintf(stderr
, "ERROR: closing sockets\n");
478 static int test_datapath(uint16_t typeflags
, int port_off
,
479 const int expect1
[], const int expect2
[])
481 const int expect0
[] = { 0, 0 };
483 uint8_t type
= typeflags
& 0xFF;
484 int fds
[2], fds_udp
[2][2], ret
;
486 fprintf(stderr
, "\ntest: datapath 0x%hx ports %hu,%hu\n",
487 typeflags
, (uint16_t)PORT_BASE
,
488 (uint16_t)(PORT_BASE
+ port_off
));
490 fds
[0] = sock_fanout_open(typeflags
, 0);
491 fds
[1] = sock_fanout_open(typeflags
, 0);
492 if (fds
[0] == -1 || fds
[1] == -1) {
493 fprintf(stderr
, "ERROR: failed open\n");
496 if (type
== PACKET_FANOUT_CBPF
)
497 sock_fanout_set_cbpf(fds
[0]);
498 else if (type
== PACKET_FANOUT_EBPF
)
499 sock_fanout_set_ebpf(fds
[0]);
501 rings
[0] = sock_fanout_open_ring(fds
[0]);
502 rings
[1] = sock_fanout_open_ring(fds
[1]);
503 pair_udp_open(fds_udp
[0], PORT_BASE
);
504 pair_udp_open(fds_udp
[1], PORT_BASE
+ port_off
);
505 sock_fanout_read(fds
, rings
, expect0
);
507 /* Send data, but not enough to overflow a queue */
508 pair_udp_send(fds_udp
[0], 15);
509 pair_udp_send_char(fds_udp
[1], 5, DATA_CHAR_1
);
510 ret
= sock_fanout_read(fds
, rings
, expect1
);
512 /* Send more data, overflow the queue */
513 pair_udp_send_char(fds_udp
[0], 15, DATA_CHAR_1
);
514 /* TODO: ensure consistent order between expect1 and expect2 */
515 ret
|= sock_fanout_read(fds
, rings
, expect2
);
517 if (munmap(rings
[1], RING_NUM_FRAMES
* getpagesize()) ||
518 munmap(rings
[0], RING_NUM_FRAMES
* getpagesize())) {
519 fprintf(stderr
, "close rings\n");
522 if (close(fds_udp
[1][1]) || close(fds_udp
[1][0]) ||
523 close(fds_udp
[0][1]) || close(fds_udp
[0][0]) ||
524 close(fds
[1]) || close(fds
[0])) {
525 fprintf(stderr
, "close datapath\n");
532 static int set_cpuaffinity(int cpuid
)
537 CPU_SET(cpuid
, &mask
);
538 if (sched_setaffinity(0, sizeof(mask
), &mask
)) {
539 if (errno
!= EINVAL
) {
540 fprintf(stderr
, "setaffinity %d\n", cpuid
);
549 int main(int argc
, char **argv
)
551 const int expect_hash
[2][2] = { { 15, 5 }, { 20, 5 } };
552 const int expect_hash_rb
[2][2] = { { 15, 5 }, { 20, 15 } };
553 const int expect_lb
[2][2] = { { 10, 10 }, { 18, 17 } };
554 const int expect_rb
[2][2] = { { 15, 5 }, { 20, 15 } };
555 const int expect_cpu0
[2][2] = { { 20, 0 }, { 20, 0 } };
556 const int expect_cpu1
[2][2] = { { 0, 20 }, { 0, 20 } };
557 const int expect_bpf
[2][2] = { { 15, 5 }, { 15, 20 } };
558 const int expect_uniqueid
[2][2] = { { 20, 20}, { 20, 20 } };
559 int port_off
= 2, tries
= 20, ret
;
561 test_unbound_fanout();
562 test_control_single();
563 test_control_group(0);
564 test_control_group(1);
565 test_control_group_max_num_members();
566 test_unique_fanout_group_ids();
568 /* PACKET_FANOUT_MAX */
569 cfg_max_num_members
= 1 << 16;
570 /* find a set of ports that do not collide onto the same socket */
571 ret
= test_datapath(PACKET_FANOUT_HASH
, port_off
,
572 expect_hash
[0], expect_hash
[1]);
574 fprintf(stderr
, "info: trying alternate ports (%d)\n", tries
);
575 ret
= test_datapath(PACKET_FANOUT_HASH
, ++port_off
,
576 expect_hash
[0], expect_hash
[1]);
578 fprintf(stderr
, "too many collisions\n");
583 ret
|= test_datapath(PACKET_FANOUT_HASH
| PACKET_FANOUT_FLAG_ROLLOVER
,
584 port_off
, expect_hash_rb
[0], expect_hash_rb
[1]);
585 ret
|= test_datapath(PACKET_FANOUT_LB
,
586 port_off
, expect_lb
[0], expect_lb
[1]);
587 ret
|= test_datapath(PACKET_FANOUT_ROLLOVER
,
588 port_off
, expect_rb
[0], expect_rb
[1]);
590 ret
|= test_datapath(PACKET_FANOUT_CBPF
,
591 port_off
, expect_bpf
[0], expect_bpf
[1]);
592 ret
|= test_datapath(PACKET_FANOUT_EBPF
,
593 port_off
, expect_bpf
[0], expect_bpf
[1]);
596 ret
|= test_datapath(PACKET_FANOUT_CPU
, port_off
,
597 expect_cpu0
[0], expect_cpu0
[1]);
598 if (!set_cpuaffinity(1))
599 /* TODO: test that choice alternates with previous */
600 ret
|= test_datapath(PACKET_FANOUT_CPU
, port_off
,
601 expect_cpu1
[0], expect_cpu1
[1]);
603 ret
|= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID
, port_off
,
604 expect_uniqueid
[0], expect_uniqueid
[1]);
609 printf("OK. All tests passed\n");