2 * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
3 * a BPF program (both classic and extended) to read the first word from an
4 * incoming packet (expected to be in network byte-order), calculate a modulus
5 * of that number, and then dispatch the packet to the Nth socket using the
6 * result. These tests are run for each supported address family and protocol.
7 * Additionally, a few edge cases in the implementation are tested.
13 #include <linux/bpf.h>
14 #include <linux/filter.h>
15 #include <linux/unistd.h>
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
21 #include <sys/epoll.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
27 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
36 uint16_t send_port_min
;
39 static size_t sockaddr_size(void)
41 return sizeof(struct sockaddr_storage
);
44 static struct sockaddr
*new_any_sockaddr(int family
, uint16_t port
)
46 struct sockaddr_storage
*addr
;
47 struct sockaddr_in
*addr4
;
48 struct sockaddr_in6
*addr6
;
50 addr
= malloc(sizeof(struct sockaddr_storage
));
51 memset(addr
, 0, sizeof(struct sockaddr_storage
));
55 addr4
= (struct sockaddr_in
*)addr
;
56 addr4
->sin_family
= AF_INET
;
57 addr4
->sin_addr
.s_addr
= htonl(INADDR_ANY
);
58 addr4
->sin_port
= htons(port
);
61 addr6
= (struct sockaddr_in6
*)addr
;
62 addr6
->sin6_family
= AF_INET6
;
63 addr6
->sin6_addr
= in6addr_any
;
64 addr6
->sin6_port
= htons(port
);
67 error(1, 0, "Unsupported family %d", family
);
69 return (struct sockaddr
*)addr
;
72 static struct sockaddr
*new_loopback_sockaddr(int family
, uint16_t port
)
74 struct sockaddr
*addr
= new_any_sockaddr(family
, port
);
75 struct sockaddr_in
*addr4
;
76 struct sockaddr_in6
*addr6
;
80 addr4
= (struct sockaddr_in
*)addr
;
81 addr4
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
84 addr6
= (struct sockaddr_in6
*)addr
;
85 addr6
->sin6_addr
= in6addr_loopback
;
88 error(1, 0, "Unsupported family %d", family
);
93 static void attach_ebpf(int fd
, uint16_t mod
)
95 static char bpf_log_buf
[65536];
96 static const char bpf_license
[] = "GPL";
99 const struct bpf_insn prog
[] = {
100 /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
101 { BPF_ALU64
| BPF_MOV
| BPF_X
, BPF_REG_6
, BPF_REG_1
, 0, 0 },
102 /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
103 { BPF_LD
| BPF_ABS
| BPF_W
, 0, 0, 0, 0 },
104 /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
105 { BPF_ALU64
| BPF_MOD
| BPF_K
, BPF_REG_0
, 0, 0, mod
},
106 /* BPF_EXIT_INSN() */
107 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
111 memset(&attr
, 0, sizeof(attr
));
112 attr
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
113 attr
.insn_cnt
= ARRAY_SIZE(prog
);
114 attr
.insns
= (uint64_t)prog
;
115 attr
.license
= (uint64_t)bpf_license
;
116 attr
.log_buf
= (uint64_t)bpf_log_buf
;
117 attr
.log_size
= sizeof(bpf_log_buf
);
119 attr
.kern_version
= 0;
121 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &attr
, sizeof(attr
));
123 error(1, errno
, "ebpf error. log:\n%s\n", bpf_log_buf
);
125 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
127 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_EBPF");
132 static void attach_cbpf(int fd
, uint16_t mod
)
134 struct sock_filter code
[] = {
135 /* A = (uint32_t)skb[0] */
136 { BPF_LD
| BPF_W
| BPF_ABS
, 0, 0, 0 },
138 { BPF_ALU
| BPF_MOD
, 0, 0, mod
},
140 { BPF_RET
| BPF_A
, 0, 0, 0 },
142 struct sock_fprog p
= {
143 .len
= ARRAY_SIZE(code
),
147 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &p
, sizeof(p
)))
148 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_CBPF");
151 static void build_recv_group(const struct test_params p
, int fd
[], uint16_t mod
,
152 void (*attach_bpf
)(int, uint16_t))
154 struct sockaddr
* const addr
=
155 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
158 for (i
= 0; i
< p
.recv_socks
; ++i
) {
159 fd
[i
] = socket(p
.recv_family
, p
.protocol
, 0);
161 error(1, errno
, "failed to create recv %d", i
);
164 if (setsockopt(fd
[i
], SOL_SOCKET
, SO_REUSEPORT
, &opt
,
166 error(1, errno
, "failed to set SO_REUSEPORT on %d", i
);
169 attach_bpf(fd
[i
], mod
);
171 if (bind(fd
[i
], addr
, sockaddr_size()))
172 error(1, errno
, "failed to bind recv socket %d", i
);
174 if (p
.protocol
== SOCK_STREAM
) {
176 if (setsockopt(fd
[i
], SOL_TCP
, TCP_FASTOPEN
, &opt
,
179 "failed to set TCP_FASTOPEN on %d", i
);
180 if (listen(fd
[i
], p
.recv_socks
* 10))
181 error(1, errno
, "failed to listen on socket");
187 static void send_from(struct test_params p
, uint16_t sport
, char *buf
,
190 struct sockaddr
* const saddr
= new_any_sockaddr(p
.send_family
, sport
);
191 struct sockaddr
* const daddr
=
192 new_loopback_sockaddr(p
.send_family
, p
.recv_port
);
193 const int fd
= socket(p
.send_family
, p
.protocol
, 0);
196 error(1, errno
, "failed to create send socket");
198 if (bind(fd
, saddr
, sockaddr_size()))
199 error(1, errno
, "failed to bind send socket");
201 if (sendto(fd
, buf
, len
, MSG_FASTOPEN
, daddr
, sockaddr_size()) < 0)
202 error(1, errno
, "failed to send message");
209 static void test_recv_order(const struct test_params p
, int fd
[], int mod
)
211 char recv_buf
[8], send_buf
[8];
213 struct iovec recv_io
= { recv_buf
, 8 };
214 struct epoll_event ev
;
215 int epfd
, conn
, i
, sport
, expected
;
216 uint32_t data
, ndata
;
218 epfd
= epoll_create(1);
220 error(1, errno
, "failed to create epoll");
221 for (i
= 0; i
< p
.recv_socks
; ++i
) {
224 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, fd
[i
], &ev
))
225 error(1, errno
, "failed to register sock %d epoll", i
);
228 memset(&msg
, 0, sizeof(msg
));
229 msg
.msg_iov
= &recv_io
;
232 for (data
= 0; data
< p
.recv_socks
* 2; ++data
) {
233 sport
= p
.send_port_min
+ data
;
235 memcpy(send_buf
, &ndata
, sizeof(ndata
));
236 send_from(p
, sport
, send_buf
, sizeof(ndata
));
238 i
= epoll_wait(epfd
, &ev
, 1, -1);
240 error(1, errno
, "epoll wait failed");
242 if (p
.protocol
== SOCK_STREAM
) {
243 conn
= accept(ev
.data
.fd
, NULL
, NULL
);
245 error(1, errno
, "error accepting");
246 i
= recvmsg(conn
, &msg
, 0);
249 i
= recvmsg(ev
.data
.fd
, &msg
, 0);
252 error(1, errno
, "recvmsg error");
253 if (i
!= sizeof(ndata
))
254 error(1, 0, "expected size %zd got %d",
257 for (i
= 0; i
< p
.recv_socks
; ++i
)
258 if (ev
.data
.fd
== fd
[i
])
260 memcpy(&ndata
, recv_buf
, sizeof(ndata
));
261 fprintf(stderr
, "Socket %d: %d\n", i
, ntohl(ndata
));
263 expected
= (sport
% mod
);
265 error(1, 0, "expected socket %d", expected
);
269 static void test_reuseport_ebpf(struct test_params p
)
271 int i
, fd
[p
.recv_socks
];
273 fprintf(stderr
, "Testing EBPF mod %zd...\n", p
.recv_socks
);
274 build_recv_group(p
, fd
, p
.recv_socks
, attach_ebpf
);
275 test_recv_order(p
, fd
, p
.recv_socks
);
277 p
.send_port_min
+= p
.recv_socks
* 2;
278 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
279 attach_ebpf(fd
[0], p
.recv_socks
/ 2);
280 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
282 for (i
= 0; i
< p
.recv_socks
; ++i
)
286 static void test_reuseport_cbpf(struct test_params p
)
288 int i
, fd
[p
.recv_socks
];
290 fprintf(stderr
, "Testing CBPF mod %zd...\n", p
.recv_socks
);
291 build_recv_group(p
, fd
, p
.recv_socks
, attach_cbpf
);
292 test_recv_order(p
, fd
, p
.recv_socks
);
294 p
.send_port_min
+= p
.recv_socks
* 2;
295 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
296 attach_cbpf(fd
[0], p
.recv_socks
/ 2);
297 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
299 for (i
= 0; i
< p
.recv_socks
; ++i
)
303 static void test_extra_filter(const struct test_params p
)
305 struct sockaddr
* const addr
=
306 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
309 fprintf(stderr
, "Testing too many filters...\n");
310 fd1
= socket(p
.recv_family
, p
.protocol
, 0);
312 error(1, errno
, "failed to create socket 1");
313 fd2
= socket(p
.recv_family
, p
.protocol
, 0);
315 error(1, errno
, "failed to create socket 2");
318 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
319 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
320 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
321 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
323 attach_ebpf(fd1
, 10);
324 attach_ebpf(fd2
, 10);
326 if (bind(fd1
, addr
, sockaddr_size()))
327 error(1, errno
, "failed to bind recv socket 1");
329 if (!bind(fd2
, addr
, sockaddr_size()) && errno
!= EADDRINUSE
)
330 error(1, errno
, "bind socket 2 should fail with EADDRINUSE");
335 static void test_filter_no_reuseport(const struct test_params p
)
337 struct sockaddr
* const addr
=
338 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
339 const char bpf_license
[] = "GPL";
340 struct bpf_insn ecode
[] = {
341 { BPF_ALU64
| BPF_MOV
| BPF_K
, BPF_REG_0
, 0, 0, 10 },
342 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
344 struct sock_filter ccode
[] = {{ BPF_RET
| BPF_A
, 0, 0, 0 }};
345 union bpf_attr eprog
;
346 struct sock_fprog cprog
;
349 fprintf(stderr
, "Testing filters on non-SO_REUSEPORT socket...\n");
351 memset(&eprog
, 0, sizeof(eprog
));
352 eprog
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
353 eprog
.insn_cnt
= ARRAY_SIZE(ecode
);
354 eprog
.insns
= (uint64_t)ecode
;
355 eprog
.license
= (uint64_t)bpf_license
;
356 eprog
.kern_version
= 0;
358 memset(&cprog
, 0, sizeof(cprog
));
359 cprog
.len
= ARRAY_SIZE(ccode
);
360 cprog
.filter
= ccode
;
363 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &eprog
, sizeof(eprog
));
365 error(1, errno
, "ebpf error");
366 fd
= socket(p
.recv_family
, p
.protocol
, 0);
368 error(1, errno
, "failed to create socket 1");
370 if (bind(fd
, addr
, sockaddr_size()))
371 error(1, errno
, "failed to bind recv socket 1");
374 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
375 sizeof(bpf_fd
)) || errno
!= EINVAL
)
376 error(1, errno
, "setsockopt should have returned EINVAL");
379 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &cprog
,
380 sizeof(cprog
)) || errno
!= EINVAL
)
381 error(1, errno
, "setsockopt should have returned EINVAL");
386 static void test_filter_without_bind(void)
388 int fd1
, fd2
, opt
= 1;
390 fprintf(stderr
, "Testing filter add without bind...\n");
391 fd1
= socket(AF_INET
, SOCK_DGRAM
, 0);
393 error(1, errno
, "failed to create socket 1");
394 fd2
= socket(AF_INET
, SOCK_DGRAM
, 0);
396 error(1, errno
, "failed to create socket 2");
397 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
398 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
399 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
400 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
402 attach_ebpf(fd1
, 10);
403 attach_cbpf(fd2
, 10);
409 void enable_fastopen(void)
411 int fd
= open("/proc/sys/net/ipv4/tcp_fastopen", 0);
412 int rw_mask
= 3; /* bit 1: client side; bit-2 server side */
417 error(1, errno
, "Unable to open tcp_fastopen sysctl");
418 if (read(fd
, buf
, sizeof(buf
)) <= 0)
419 error(1, errno
, "Unable to read tcp_fastopen sysctl");
423 if ((val
& rw_mask
) != rw_mask
) {
424 fd
= open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR
);
427 "Unable to open tcp_fastopen sysctl for writing");
429 size
= snprintf(buf
, 16, "%d", val
);
430 if (write(fd
, buf
, size
) <= 0)
431 error(1, errno
, "Unable to write tcp_fastopen sysctl");
438 fprintf(stderr
, "---- IPv4 UDP ----\n");
439 /* NOTE: UDP socket lookups traverse a different code path when there
440 * are > 10 sockets in a group. Run the bpf test through both paths.
442 test_reuseport_ebpf((struct test_params
) {
443 .recv_family
= AF_INET
,
444 .send_family
= AF_INET
,
445 .protocol
= SOCK_DGRAM
,
448 .send_port_min
= 9000});
449 test_reuseport_ebpf((struct test_params
) {
450 .recv_family
= AF_INET
,
451 .send_family
= AF_INET
,
452 .protocol
= SOCK_DGRAM
,
455 .send_port_min
= 9000});
456 test_reuseport_cbpf((struct test_params
) {
457 .recv_family
= AF_INET
,
458 .send_family
= AF_INET
,
459 .protocol
= SOCK_DGRAM
,
462 .send_port_min
= 9020});
463 test_reuseport_cbpf((struct test_params
) {
464 .recv_family
= AF_INET
,
465 .send_family
= AF_INET
,
466 .protocol
= SOCK_DGRAM
,
469 .send_port_min
= 9020});
470 test_extra_filter((struct test_params
) {
471 .recv_family
= AF_INET
,
472 .protocol
= SOCK_DGRAM
,
474 test_filter_no_reuseport((struct test_params
) {
475 .recv_family
= AF_INET
,
476 .protocol
= SOCK_DGRAM
,
479 fprintf(stderr
, "---- IPv6 UDP ----\n");
480 test_reuseport_ebpf((struct test_params
) {
481 .recv_family
= AF_INET6
,
482 .send_family
= AF_INET6
,
483 .protocol
= SOCK_DGRAM
,
486 .send_port_min
= 9040});
487 test_reuseport_ebpf((struct test_params
) {
488 .recv_family
= AF_INET6
,
489 .send_family
= AF_INET6
,
490 .protocol
= SOCK_DGRAM
,
493 .send_port_min
= 9040});
494 test_reuseport_cbpf((struct test_params
) {
495 .recv_family
= AF_INET6
,
496 .send_family
= AF_INET6
,
497 .protocol
= SOCK_DGRAM
,
500 .send_port_min
= 9060});
501 test_reuseport_cbpf((struct test_params
) {
502 .recv_family
= AF_INET6
,
503 .send_family
= AF_INET6
,
504 .protocol
= SOCK_DGRAM
,
507 .send_port_min
= 9060});
508 test_extra_filter((struct test_params
) {
509 .recv_family
= AF_INET6
,
510 .protocol
= SOCK_DGRAM
,
512 test_filter_no_reuseport((struct test_params
) {
513 .recv_family
= AF_INET6
,
514 .protocol
= SOCK_DGRAM
,
517 fprintf(stderr
, "---- IPv6 UDP w/ mapped IPv4 ----\n");
518 test_reuseport_ebpf((struct test_params
) {
519 .recv_family
= AF_INET6
,
520 .send_family
= AF_INET
,
521 .protocol
= SOCK_DGRAM
,
524 .send_port_min
= 9080});
525 test_reuseport_ebpf((struct test_params
) {
526 .recv_family
= AF_INET6
,
527 .send_family
= AF_INET
,
528 .protocol
= SOCK_DGRAM
,
531 .send_port_min
= 9080});
532 test_reuseport_cbpf((struct test_params
) {
533 .recv_family
= AF_INET6
,
534 .send_family
= AF_INET
,
535 .protocol
= SOCK_DGRAM
,
538 .send_port_min
= 9100});
539 test_reuseport_cbpf((struct test_params
) {
540 .recv_family
= AF_INET6
,
541 .send_family
= AF_INET
,
542 .protocol
= SOCK_DGRAM
,
545 .send_port_min
= 9100});
547 /* TCP fastopen is required for the TCP tests */
549 fprintf(stderr
, "---- IPv4 TCP ----\n");
550 test_reuseport_ebpf((struct test_params
) {
551 .recv_family
= AF_INET
,
552 .send_family
= AF_INET
,
553 .protocol
= SOCK_STREAM
,
556 .send_port_min
= 9120});
557 test_reuseport_cbpf((struct test_params
) {
558 .recv_family
= AF_INET
,
559 .send_family
= AF_INET
,
560 .protocol
= SOCK_STREAM
,
563 .send_port_min
= 9160});
564 test_extra_filter((struct test_params
) {
565 .recv_family
= AF_INET
,
566 .protocol
= SOCK_STREAM
,
568 test_filter_no_reuseport((struct test_params
) {
569 .recv_family
= AF_INET
,
570 .protocol
= SOCK_STREAM
,
573 fprintf(stderr
, "---- IPv6 TCP ----\n");
574 test_reuseport_ebpf((struct test_params
) {
575 .recv_family
= AF_INET6
,
576 .send_family
= AF_INET6
,
577 .protocol
= SOCK_STREAM
,
580 .send_port_min
= 9200});
581 test_reuseport_cbpf((struct test_params
) {
582 .recv_family
= AF_INET6
,
583 .send_family
= AF_INET6
,
584 .protocol
= SOCK_STREAM
,
587 .send_port_min
= 9240});
588 test_extra_filter((struct test_params
) {
589 .recv_family
= AF_INET6
,
590 .protocol
= SOCK_STREAM
,
592 test_filter_no_reuseport((struct test_params
) {
593 .recv_family
= AF_INET6
,
594 .protocol
= SOCK_STREAM
,
597 fprintf(stderr
, "---- IPv6 TCP w/ mapped IPv4 ----\n");
598 test_reuseport_ebpf((struct test_params
) {
599 .recv_family
= AF_INET6
,
600 .send_family
= AF_INET
,
601 .protocol
= SOCK_STREAM
,
604 .send_port_min
= 9320});
605 test_reuseport_cbpf((struct test_params
) {
606 .recv_family
= AF_INET6
,
607 .send_family
= AF_INET
,
608 .protocol
= SOCK_STREAM
,
611 .send_port_min
= 9360});
613 test_filter_without_bind();
615 fprintf(stderr
, "SUCCESS\n");