2 * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
3 * a BPF program (both classic and extended) to read the first word from an
4 * incoming packet (expected to be in network byte-order), calculate a modulus
5 * of that number, and then dispatch the packet to the Nth socket using the
6 * result. These tests are run for each supported address family and protocol.
7 * Additionally, a few edge cases in the implementation are tested.
13 #include <linux/bpf.h>
14 #include <linux/filter.h>
15 #include <linux/unistd.h>
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
21 #include <sys/epoll.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/resource.h>
28 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
37 uint16_t send_port_min
;
40 static size_t sockaddr_size(void)
42 return sizeof(struct sockaddr_storage
);
45 static struct sockaddr
*new_any_sockaddr(int family
, uint16_t port
)
47 struct sockaddr_storage
*addr
;
48 struct sockaddr_in
*addr4
;
49 struct sockaddr_in6
*addr6
;
51 addr
= malloc(sizeof(struct sockaddr_storage
));
52 memset(addr
, 0, sizeof(struct sockaddr_storage
));
56 addr4
= (struct sockaddr_in
*)addr
;
57 addr4
->sin_family
= AF_INET
;
58 addr4
->sin_addr
.s_addr
= htonl(INADDR_ANY
);
59 addr4
->sin_port
= htons(port
);
62 addr6
= (struct sockaddr_in6
*)addr
;
63 addr6
->sin6_family
= AF_INET6
;
64 addr6
->sin6_addr
= in6addr_any
;
65 addr6
->sin6_port
= htons(port
);
68 error(1, 0, "Unsupported family %d", family
);
70 return (struct sockaddr
*)addr
;
73 static struct sockaddr
*new_loopback_sockaddr(int family
, uint16_t port
)
75 struct sockaddr
*addr
= new_any_sockaddr(family
, port
);
76 struct sockaddr_in
*addr4
;
77 struct sockaddr_in6
*addr6
;
81 addr4
= (struct sockaddr_in
*)addr
;
82 addr4
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
85 addr6
= (struct sockaddr_in6
*)addr
;
86 addr6
->sin6_addr
= in6addr_loopback
;
89 error(1, 0, "Unsupported family %d", family
);
94 static void attach_ebpf(int fd
, uint16_t mod
)
96 static char bpf_log_buf
[65536];
97 static const char bpf_license
[] = "GPL";
100 const struct bpf_insn prog
[] = {
101 /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
102 { BPF_ALU64
| BPF_MOV
| BPF_X
, BPF_REG_6
, BPF_REG_1
, 0, 0 },
103 /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
104 { BPF_LD
| BPF_ABS
| BPF_W
, 0, 0, 0, 0 },
105 /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
106 { BPF_ALU64
| BPF_MOD
| BPF_K
, BPF_REG_0
, 0, 0, mod
},
107 /* BPF_EXIT_INSN() */
108 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
112 memset(&attr
, 0, sizeof(attr
));
113 attr
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
114 attr
.insn_cnt
= ARRAY_SIZE(prog
);
115 attr
.insns
= (unsigned long) &prog
;
116 attr
.license
= (unsigned long) &bpf_license
;
117 attr
.log_buf
= (unsigned long) &bpf_log_buf
;
118 attr
.log_size
= sizeof(bpf_log_buf
);
120 attr
.kern_version
= 0;
122 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &attr
, sizeof(attr
));
124 error(1, errno
, "ebpf error. log:\n%s\n", bpf_log_buf
);
126 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
128 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_EBPF");
133 static void attach_cbpf(int fd
, uint16_t mod
)
135 struct sock_filter code
[] = {
136 /* A = (uint32_t)skb[0] */
137 { BPF_LD
| BPF_W
| BPF_ABS
, 0, 0, 0 },
139 { BPF_ALU
| BPF_MOD
, 0, 0, mod
},
141 { BPF_RET
| BPF_A
, 0, 0, 0 },
143 struct sock_fprog p
= {
144 .len
= ARRAY_SIZE(code
),
148 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &p
, sizeof(p
)))
149 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_CBPF");
152 static void build_recv_group(const struct test_params p
, int fd
[], uint16_t mod
,
153 void (*attach_bpf
)(int, uint16_t))
155 struct sockaddr
* const addr
=
156 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
159 for (i
= 0; i
< p
.recv_socks
; ++i
) {
160 fd
[i
] = socket(p
.recv_family
, p
.protocol
, 0);
162 error(1, errno
, "failed to create recv %d", i
);
165 if (setsockopt(fd
[i
], SOL_SOCKET
, SO_REUSEPORT
, &opt
,
167 error(1, errno
, "failed to set SO_REUSEPORT on %d", i
);
170 attach_bpf(fd
[i
], mod
);
172 if (bind(fd
[i
], addr
, sockaddr_size()))
173 error(1, errno
, "failed to bind recv socket %d", i
);
175 if (p
.protocol
== SOCK_STREAM
) {
177 if (setsockopt(fd
[i
], SOL_TCP
, TCP_FASTOPEN
, &opt
,
180 "failed to set TCP_FASTOPEN on %d", i
);
181 if (listen(fd
[i
], p
.recv_socks
* 10))
182 error(1, errno
, "failed to listen on socket");
188 static void send_from(struct test_params p
, uint16_t sport
, char *buf
,
191 struct sockaddr
* const saddr
= new_any_sockaddr(p
.send_family
, sport
);
192 struct sockaddr
* const daddr
=
193 new_loopback_sockaddr(p
.send_family
, p
.recv_port
);
194 const int fd
= socket(p
.send_family
, p
.protocol
, 0), one
= 1;
197 error(1, errno
, "failed to create send socket");
199 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &one
, sizeof(one
)))
200 error(1, errno
, "failed to set reuseaddr");
202 if (bind(fd
, saddr
, sockaddr_size()))
203 error(1, errno
, "failed to bind send socket");
205 if (sendto(fd
, buf
, len
, MSG_FASTOPEN
, daddr
, sockaddr_size()) < 0)
206 error(1, errno
, "failed to send message");
213 static void test_recv_order(const struct test_params p
, int fd
[], int mod
)
215 char recv_buf
[8], send_buf
[8];
217 struct iovec recv_io
= { recv_buf
, 8 };
218 struct epoll_event ev
;
219 int epfd
, conn
, i
, sport
, expected
;
220 uint32_t data
, ndata
;
222 epfd
= epoll_create(1);
224 error(1, errno
, "failed to create epoll");
225 for (i
= 0; i
< p
.recv_socks
; ++i
) {
228 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, fd
[i
], &ev
))
229 error(1, errno
, "failed to register sock %d epoll", i
);
232 memset(&msg
, 0, sizeof(msg
));
233 msg
.msg_iov
= &recv_io
;
236 for (data
= 0; data
< p
.recv_socks
* 2; ++data
) {
237 sport
= p
.send_port_min
+ data
;
239 memcpy(send_buf
, &ndata
, sizeof(ndata
));
240 send_from(p
, sport
, send_buf
, sizeof(ndata
));
242 i
= epoll_wait(epfd
, &ev
, 1, -1);
244 error(1, errno
, "epoll wait failed");
246 if (p
.protocol
== SOCK_STREAM
) {
247 conn
= accept(ev
.data
.fd
, NULL
, NULL
);
249 error(1, errno
, "error accepting");
250 i
= recvmsg(conn
, &msg
, 0);
253 i
= recvmsg(ev
.data
.fd
, &msg
, 0);
256 error(1, errno
, "recvmsg error");
257 if (i
!= sizeof(ndata
))
258 error(1, 0, "expected size %zd got %d",
261 for (i
= 0; i
< p
.recv_socks
; ++i
)
262 if (ev
.data
.fd
== fd
[i
])
264 memcpy(&ndata
, recv_buf
, sizeof(ndata
));
265 fprintf(stderr
, "Socket %d: %d\n", i
, ntohl(ndata
));
267 expected
= (sport
% mod
);
269 error(1, 0, "expected socket %d", expected
);
273 static void test_reuseport_ebpf(struct test_params p
)
275 int i
, fd
[p
.recv_socks
];
277 fprintf(stderr
, "Testing EBPF mod %zd...\n", p
.recv_socks
);
278 build_recv_group(p
, fd
, p
.recv_socks
, attach_ebpf
);
279 test_recv_order(p
, fd
, p
.recv_socks
);
281 p
.send_port_min
+= p
.recv_socks
* 2;
282 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
283 attach_ebpf(fd
[0], p
.recv_socks
/ 2);
284 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
286 for (i
= 0; i
< p
.recv_socks
; ++i
)
290 static void test_reuseport_cbpf(struct test_params p
)
292 int i
, fd
[p
.recv_socks
];
294 fprintf(stderr
, "Testing CBPF mod %zd...\n", p
.recv_socks
);
295 build_recv_group(p
, fd
, p
.recv_socks
, attach_cbpf
);
296 test_recv_order(p
, fd
, p
.recv_socks
);
298 p
.send_port_min
+= p
.recv_socks
* 2;
299 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
300 attach_cbpf(fd
[0], p
.recv_socks
/ 2);
301 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
303 for (i
= 0; i
< p
.recv_socks
; ++i
)
307 static void test_extra_filter(const struct test_params p
)
309 struct sockaddr
* const addr
=
310 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
313 fprintf(stderr
, "Testing too many filters...\n");
314 fd1
= socket(p
.recv_family
, p
.protocol
, 0);
316 error(1, errno
, "failed to create socket 1");
317 fd2
= socket(p
.recv_family
, p
.protocol
, 0);
319 error(1, errno
, "failed to create socket 2");
322 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
323 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
324 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
325 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
327 attach_ebpf(fd1
, 10);
328 attach_ebpf(fd2
, 10);
330 if (bind(fd1
, addr
, sockaddr_size()))
331 error(1, errno
, "failed to bind recv socket 1");
333 if (!bind(fd2
, addr
, sockaddr_size()) && errno
!= EADDRINUSE
)
334 error(1, errno
, "bind socket 2 should fail with EADDRINUSE");
339 static void test_filter_no_reuseport(const struct test_params p
)
341 struct sockaddr
* const addr
=
342 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
343 const char bpf_license
[] = "GPL";
344 struct bpf_insn ecode
[] = {
345 { BPF_ALU64
| BPF_MOV
| BPF_K
, BPF_REG_0
, 0, 0, 10 },
346 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
348 struct sock_filter ccode
[] = {{ BPF_RET
| BPF_A
, 0, 0, 0 }};
349 union bpf_attr eprog
;
350 struct sock_fprog cprog
;
353 fprintf(stderr
, "Testing filters on non-SO_REUSEPORT socket...\n");
355 memset(&eprog
, 0, sizeof(eprog
));
356 eprog
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
357 eprog
.insn_cnt
= ARRAY_SIZE(ecode
);
358 eprog
.insns
= (unsigned long) &ecode
;
359 eprog
.license
= (unsigned long) &bpf_license
;
360 eprog
.kern_version
= 0;
362 memset(&cprog
, 0, sizeof(cprog
));
363 cprog
.len
= ARRAY_SIZE(ccode
);
364 cprog
.filter
= ccode
;
367 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &eprog
, sizeof(eprog
));
369 error(1, errno
, "ebpf error");
370 fd
= socket(p
.recv_family
, p
.protocol
, 0);
372 error(1, errno
, "failed to create socket 1");
374 if (bind(fd
, addr
, sockaddr_size()))
375 error(1, errno
, "failed to bind recv socket 1");
378 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
379 sizeof(bpf_fd
)) || errno
!= EINVAL
)
380 error(1, errno
, "setsockopt should have returned EINVAL");
383 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &cprog
,
384 sizeof(cprog
)) || errno
!= EINVAL
)
385 error(1, errno
, "setsockopt should have returned EINVAL");
390 static void test_filter_without_bind(void)
392 int fd1
, fd2
, opt
= 1;
394 fprintf(stderr
, "Testing filter add without bind...\n");
395 fd1
= socket(AF_INET
, SOCK_DGRAM
, 0);
397 error(1, errno
, "failed to create socket 1");
398 fd2
= socket(AF_INET
, SOCK_DGRAM
, 0);
400 error(1, errno
, "failed to create socket 2");
401 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
402 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
403 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
404 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
406 attach_ebpf(fd1
, 10);
407 attach_cbpf(fd2
, 10);
413 void enable_fastopen(void)
415 int fd
= open("/proc/sys/net/ipv4/tcp_fastopen", 0);
416 int rw_mask
= 3; /* bit 1: client side; bit-2 server side */
421 error(1, errno
, "Unable to open tcp_fastopen sysctl");
422 if (read(fd
, buf
, sizeof(buf
)) <= 0)
423 error(1, errno
, "Unable to read tcp_fastopen sysctl");
427 if ((val
& rw_mask
) != rw_mask
) {
428 fd
= open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR
);
431 "Unable to open tcp_fastopen sysctl for writing");
433 size
= snprintf(buf
, 16, "%d", val
);
434 if (write(fd
, buf
, size
) <= 0)
435 error(1, errno
, "Unable to write tcp_fastopen sysctl");
440 static struct rlimit rlim_old
;
442 static __attribute__((constructor
)) void main_ctor(void)
444 getrlimit(RLIMIT_MEMLOCK
, &rlim_old
);
446 if (rlim_old
.rlim_cur
!= RLIM_INFINITY
) {
447 struct rlimit rlim_new
;
449 rlim_new
.rlim_cur
= rlim_old
.rlim_cur
+ (1UL << 20);
450 rlim_new
.rlim_max
= rlim_old
.rlim_max
+ (1UL << 20);
451 setrlimit(RLIMIT_MEMLOCK
, &rlim_new
);
455 static __attribute__((destructor
)) void main_dtor(void)
457 setrlimit(RLIMIT_MEMLOCK
, &rlim_old
);
462 fprintf(stderr
, "---- IPv4 UDP ----\n");
463 /* NOTE: UDP socket lookups traverse a different code path when there
464 * are > 10 sockets in a group. Run the bpf test through both paths.
466 test_reuseport_ebpf((struct test_params
) {
467 .recv_family
= AF_INET
,
468 .send_family
= AF_INET
,
469 .protocol
= SOCK_DGRAM
,
472 .send_port_min
= 9000});
473 test_reuseport_ebpf((struct test_params
) {
474 .recv_family
= AF_INET
,
475 .send_family
= AF_INET
,
476 .protocol
= SOCK_DGRAM
,
479 .send_port_min
= 9000});
480 test_reuseport_cbpf((struct test_params
) {
481 .recv_family
= AF_INET
,
482 .send_family
= AF_INET
,
483 .protocol
= SOCK_DGRAM
,
486 .send_port_min
= 9020});
487 test_reuseport_cbpf((struct test_params
) {
488 .recv_family
= AF_INET
,
489 .send_family
= AF_INET
,
490 .protocol
= SOCK_DGRAM
,
493 .send_port_min
= 9020});
494 test_extra_filter((struct test_params
) {
495 .recv_family
= AF_INET
,
496 .protocol
= SOCK_DGRAM
,
498 test_filter_no_reuseport((struct test_params
) {
499 .recv_family
= AF_INET
,
500 .protocol
= SOCK_DGRAM
,
503 fprintf(stderr
, "---- IPv6 UDP ----\n");
504 test_reuseport_ebpf((struct test_params
) {
505 .recv_family
= AF_INET6
,
506 .send_family
= AF_INET6
,
507 .protocol
= SOCK_DGRAM
,
510 .send_port_min
= 9040});
511 test_reuseport_ebpf((struct test_params
) {
512 .recv_family
= AF_INET6
,
513 .send_family
= AF_INET6
,
514 .protocol
= SOCK_DGRAM
,
517 .send_port_min
= 9040});
518 test_reuseport_cbpf((struct test_params
) {
519 .recv_family
= AF_INET6
,
520 .send_family
= AF_INET6
,
521 .protocol
= SOCK_DGRAM
,
524 .send_port_min
= 9060});
525 test_reuseport_cbpf((struct test_params
) {
526 .recv_family
= AF_INET6
,
527 .send_family
= AF_INET6
,
528 .protocol
= SOCK_DGRAM
,
531 .send_port_min
= 9060});
532 test_extra_filter((struct test_params
) {
533 .recv_family
= AF_INET6
,
534 .protocol
= SOCK_DGRAM
,
536 test_filter_no_reuseport((struct test_params
) {
537 .recv_family
= AF_INET6
,
538 .protocol
= SOCK_DGRAM
,
541 fprintf(stderr
, "---- IPv6 UDP w/ mapped IPv4 ----\n");
542 test_reuseport_ebpf((struct test_params
) {
543 .recv_family
= AF_INET6
,
544 .send_family
= AF_INET
,
545 .protocol
= SOCK_DGRAM
,
548 .send_port_min
= 9080});
549 test_reuseport_ebpf((struct test_params
) {
550 .recv_family
= AF_INET6
,
551 .send_family
= AF_INET
,
552 .protocol
= SOCK_DGRAM
,
555 .send_port_min
= 9080});
556 test_reuseport_cbpf((struct test_params
) {
557 .recv_family
= AF_INET6
,
558 .send_family
= AF_INET
,
559 .protocol
= SOCK_DGRAM
,
562 .send_port_min
= 9100});
563 test_reuseport_cbpf((struct test_params
) {
564 .recv_family
= AF_INET6
,
565 .send_family
= AF_INET
,
566 .protocol
= SOCK_DGRAM
,
569 .send_port_min
= 9100});
571 /* TCP fastopen is required for the TCP tests */
573 fprintf(stderr
, "---- IPv4 TCP ----\n");
574 test_reuseport_ebpf((struct test_params
) {
575 .recv_family
= AF_INET
,
576 .send_family
= AF_INET
,
577 .protocol
= SOCK_STREAM
,
580 .send_port_min
= 9120});
581 test_reuseport_cbpf((struct test_params
) {
582 .recv_family
= AF_INET
,
583 .send_family
= AF_INET
,
584 .protocol
= SOCK_STREAM
,
587 .send_port_min
= 9160});
588 test_extra_filter((struct test_params
) {
589 .recv_family
= AF_INET
,
590 .protocol
= SOCK_STREAM
,
592 test_filter_no_reuseport((struct test_params
) {
593 .recv_family
= AF_INET
,
594 .protocol
= SOCK_STREAM
,
597 fprintf(stderr
, "---- IPv6 TCP ----\n");
598 test_reuseport_ebpf((struct test_params
) {
599 .recv_family
= AF_INET6
,
600 .send_family
= AF_INET6
,
601 .protocol
= SOCK_STREAM
,
604 .send_port_min
= 9200});
605 test_reuseport_cbpf((struct test_params
) {
606 .recv_family
= AF_INET6
,
607 .send_family
= AF_INET6
,
608 .protocol
= SOCK_STREAM
,
611 .send_port_min
= 9240});
612 test_extra_filter((struct test_params
) {
613 .recv_family
= AF_INET6
,
614 .protocol
= SOCK_STREAM
,
616 test_filter_no_reuseport((struct test_params
) {
617 .recv_family
= AF_INET6
,
618 .protocol
= SOCK_STREAM
,
621 fprintf(stderr
, "---- IPv6 TCP w/ mapped IPv4 ----\n");
622 test_reuseport_ebpf((struct test_params
) {
623 .recv_family
= AF_INET6
,
624 .send_family
= AF_INET
,
625 .protocol
= SOCK_STREAM
,
628 .send_port_min
= 9320});
629 test_reuseport_cbpf((struct test_params
) {
630 .recv_family
= AF_INET6
,
631 .send_family
= AF_INET
,
632 .protocol
= SOCK_STREAM
,
635 .send_port_min
= 9360});
637 test_filter_without_bind();
639 fprintf(stderr
, "SUCCESS\n");