2 * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
3 * a BPF program (both classic and extended) to read the first word from an
4 * incoming packet (expected to be in network byte-order), calculate a modulus
5 * of that number, and then dispatch the packet to the Nth socket using the
6 * result. These tests are run for each supported address family and protocol.
7 * Additionally, a few edge cases in the implementation are tested.
13 #include <linux/bpf.h>
14 #include <linux/filter.h>
15 #include <linux/unistd.h>
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
21 #include <sys/epoll.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/resource.h>
27 #include "../kselftest.h"
35 uint16_t send_port_min
;
38 static size_t sockaddr_size(void)
40 return sizeof(struct sockaddr_storage
);
43 static struct sockaddr
*new_any_sockaddr(int family
, uint16_t port
)
45 struct sockaddr_storage
*addr
;
46 struct sockaddr_in
*addr4
;
47 struct sockaddr_in6
*addr6
;
49 addr
= malloc(sizeof(struct sockaddr_storage
));
50 memset(addr
, 0, sizeof(struct sockaddr_storage
));
54 addr4
= (struct sockaddr_in
*)addr
;
55 addr4
->sin_family
= AF_INET
;
56 addr4
->sin_addr
.s_addr
= htonl(INADDR_ANY
);
57 addr4
->sin_port
= htons(port
);
60 addr6
= (struct sockaddr_in6
*)addr
;
61 addr6
->sin6_family
= AF_INET6
;
62 addr6
->sin6_addr
= in6addr_any
;
63 addr6
->sin6_port
= htons(port
);
66 error(1, 0, "Unsupported family %d", family
);
68 return (struct sockaddr
*)addr
;
71 static struct sockaddr
*new_loopback_sockaddr(int family
, uint16_t port
)
73 struct sockaddr
*addr
= new_any_sockaddr(family
, port
);
74 struct sockaddr_in
*addr4
;
75 struct sockaddr_in6
*addr6
;
79 addr4
= (struct sockaddr_in
*)addr
;
80 addr4
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
83 addr6
= (struct sockaddr_in6
*)addr
;
84 addr6
->sin6_addr
= in6addr_loopback
;
87 error(1, 0, "Unsupported family %d", family
);
92 static void attach_ebpf(int fd
, uint16_t mod
)
94 static char bpf_log_buf
[65536];
95 static const char bpf_license
[] = "GPL";
98 const struct bpf_insn prog
[] = {
99 /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
100 { BPF_ALU64
| BPF_MOV
| BPF_X
, BPF_REG_6
, BPF_REG_1
, 0, 0 },
101 /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
102 { BPF_LD
| BPF_ABS
| BPF_W
, 0, 0, 0, 0 },
103 /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
104 { BPF_ALU64
| BPF_MOD
| BPF_K
, BPF_REG_0
, 0, 0, mod
},
105 /* BPF_EXIT_INSN() */
106 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
110 memset(&attr
, 0, sizeof(attr
));
111 attr
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
112 attr
.insn_cnt
= ARRAY_SIZE(prog
);
113 attr
.insns
= (unsigned long) &prog
;
114 attr
.license
= (unsigned long) &bpf_license
;
115 attr
.log_buf
= (unsigned long) &bpf_log_buf
;
116 attr
.log_size
= sizeof(bpf_log_buf
);
118 attr
.kern_version
= 0;
120 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &attr
, sizeof(attr
));
122 error(1, errno
, "ebpf error. log:\n%s\n", bpf_log_buf
);
124 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
126 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_EBPF");
131 static void attach_cbpf(int fd
, uint16_t mod
)
133 struct sock_filter code
[] = {
134 /* A = (uint32_t)skb[0] */
135 { BPF_LD
| BPF_W
| BPF_ABS
, 0, 0, 0 },
137 { BPF_ALU
| BPF_MOD
, 0, 0, mod
},
139 { BPF_RET
| BPF_A
, 0, 0, 0 },
141 struct sock_fprog p
= {
142 .len
= ARRAY_SIZE(code
),
146 if (setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &p
, sizeof(p
)))
147 error(1, errno
, "failed to set SO_ATTACH_REUSEPORT_CBPF");
150 static void build_recv_group(const struct test_params p
, int fd
[], uint16_t mod
,
151 void (*attach_bpf
)(int, uint16_t))
153 struct sockaddr
* const addr
=
154 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
157 for (i
= 0; i
< p
.recv_socks
; ++i
) {
158 fd
[i
] = socket(p
.recv_family
, p
.protocol
, 0);
160 error(1, errno
, "failed to create recv %d", i
);
163 if (setsockopt(fd
[i
], SOL_SOCKET
, SO_REUSEPORT
, &opt
,
165 error(1, errno
, "failed to set SO_REUSEPORT on %d", i
);
168 attach_bpf(fd
[i
], mod
);
170 if (bind(fd
[i
], addr
, sockaddr_size()))
171 error(1, errno
, "failed to bind recv socket %d", i
);
173 if (p
.protocol
== SOCK_STREAM
) {
175 if (setsockopt(fd
[i
], SOL_TCP
, TCP_FASTOPEN
, &opt
,
178 "failed to set TCP_FASTOPEN on %d", i
);
179 if (listen(fd
[i
], p
.recv_socks
* 10))
180 error(1, errno
, "failed to listen on socket");
186 static void send_from(struct test_params p
, uint16_t sport
, char *buf
,
189 struct sockaddr
* const saddr
= new_any_sockaddr(p
.send_family
, sport
);
190 struct sockaddr
* const daddr
=
191 new_loopback_sockaddr(p
.send_family
, p
.recv_port
);
192 const int fd
= socket(p
.send_family
, p
.protocol
, 0), one
= 1;
195 error(1, errno
, "failed to create send socket");
197 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &one
, sizeof(one
)))
198 error(1, errno
, "failed to set reuseaddr");
200 if (bind(fd
, saddr
, sockaddr_size()))
201 error(1, errno
, "failed to bind send socket");
203 if (sendto(fd
, buf
, len
, MSG_FASTOPEN
, daddr
, sockaddr_size()) < 0)
204 error(1, errno
, "failed to send message");
211 static void test_recv_order(const struct test_params p
, int fd
[], int mod
)
213 char recv_buf
[8], send_buf
[8];
215 struct iovec recv_io
= { recv_buf
, 8 };
216 struct epoll_event ev
;
217 int epfd
, conn
, i
, sport
, expected
;
218 uint32_t data
, ndata
;
220 epfd
= epoll_create(1);
222 error(1, errno
, "failed to create epoll");
223 for (i
= 0; i
< p
.recv_socks
; ++i
) {
226 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, fd
[i
], &ev
))
227 error(1, errno
, "failed to register sock %d epoll", i
);
230 memset(&msg
, 0, sizeof(msg
));
231 msg
.msg_iov
= &recv_io
;
234 for (data
= 0; data
< p
.recv_socks
* 2; ++data
) {
235 sport
= p
.send_port_min
+ data
;
237 memcpy(send_buf
, &ndata
, sizeof(ndata
));
238 send_from(p
, sport
, send_buf
, sizeof(ndata
));
240 i
= epoll_wait(epfd
, &ev
, 1, -1);
242 error(1, errno
, "epoll wait failed");
244 if (p
.protocol
== SOCK_STREAM
) {
245 conn
= accept(ev
.data
.fd
, NULL
, NULL
);
247 error(1, errno
, "error accepting");
248 i
= recvmsg(conn
, &msg
, 0);
251 i
= recvmsg(ev
.data
.fd
, &msg
, 0);
254 error(1, errno
, "recvmsg error");
255 if (i
!= sizeof(ndata
))
256 error(1, 0, "expected size %zd got %d",
259 for (i
= 0; i
< p
.recv_socks
; ++i
)
260 if (ev
.data
.fd
== fd
[i
])
262 memcpy(&ndata
, recv_buf
, sizeof(ndata
));
263 fprintf(stderr
, "Socket %d: %d\n", i
, ntohl(ndata
));
265 expected
= (sport
% mod
);
267 error(1, 0, "expected socket %d", expected
);
271 static void test_reuseport_ebpf(struct test_params p
)
273 int i
, fd
[p
.recv_socks
];
275 fprintf(stderr
, "Testing EBPF mod %zd...\n", p
.recv_socks
);
276 build_recv_group(p
, fd
, p
.recv_socks
, attach_ebpf
);
277 test_recv_order(p
, fd
, p
.recv_socks
);
279 p
.send_port_min
+= p
.recv_socks
* 2;
280 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
281 attach_ebpf(fd
[0], p
.recv_socks
/ 2);
282 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
284 for (i
= 0; i
< p
.recv_socks
; ++i
)
288 static void test_reuseport_cbpf(struct test_params p
)
290 int i
, fd
[p
.recv_socks
];
292 fprintf(stderr
, "Testing CBPF mod %zd...\n", p
.recv_socks
);
293 build_recv_group(p
, fd
, p
.recv_socks
, attach_cbpf
);
294 test_recv_order(p
, fd
, p
.recv_socks
);
296 p
.send_port_min
+= p
.recv_socks
* 2;
297 fprintf(stderr
, "Reprograming, testing mod %zd...\n", p
.recv_socks
/ 2);
298 attach_cbpf(fd
[0], p
.recv_socks
/ 2);
299 test_recv_order(p
, fd
, p
.recv_socks
/ 2);
301 for (i
= 0; i
< p
.recv_socks
; ++i
)
305 static void test_extra_filter(const struct test_params p
)
307 struct sockaddr
* const addr
=
308 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
311 fprintf(stderr
, "Testing too many filters...\n");
312 fd1
= socket(p
.recv_family
, p
.protocol
, 0);
314 error(1, errno
, "failed to create socket 1");
315 fd2
= socket(p
.recv_family
, p
.protocol
, 0);
317 error(1, errno
, "failed to create socket 2");
320 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
321 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
322 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
323 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
325 attach_ebpf(fd1
, 10);
326 attach_ebpf(fd2
, 10);
328 if (bind(fd1
, addr
, sockaddr_size()))
329 error(1, errno
, "failed to bind recv socket 1");
331 if (!bind(fd2
, addr
, sockaddr_size()) || errno
!= EADDRINUSE
)
332 error(1, errno
, "bind socket 2 should fail with EADDRINUSE");
337 static void test_filter_no_reuseport(const struct test_params p
)
339 struct sockaddr
* const addr
=
340 new_any_sockaddr(p
.recv_family
, p
.recv_port
);
341 const char bpf_license
[] = "GPL";
342 struct bpf_insn ecode
[] = {
343 { BPF_ALU64
| BPF_MOV
| BPF_K
, BPF_REG_0
, 0, 0, 10 },
344 { BPF_JMP
| BPF_EXIT
, 0, 0, 0, 0 }
346 struct sock_filter ccode
[] = {{ BPF_RET
| BPF_A
, 0, 0, 0 }};
347 union bpf_attr eprog
;
348 struct sock_fprog cprog
;
351 fprintf(stderr
, "Testing filters on non-SO_REUSEPORT socket...\n");
353 memset(&eprog
, 0, sizeof(eprog
));
354 eprog
.prog_type
= BPF_PROG_TYPE_SOCKET_FILTER
;
355 eprog
.insn_cnt
= ARRAY_SIZE(ecode
);
356 eprog
.insns
= (unsigned long) &ecode
;
357 eprog
.license
= (unsigned long) &bpf_license
;
358 eprog
.kern_version
= 0;
360 memset(&cprog
, 0, sizeof(cprog
));
361 cprog
.len
= ARRAY_SIZE(ccode
);
362 cprog
.filter
= ccode
;
365 bpf_fd
= syscall(__NR_bpf
, BPF_PROG_LOAD
, &eprog
, sizeof(eprog
));
367 error(1, errno
, "ebpf error");
368 fd
= socket(p
.recv_family
, p
.protocol
, 0);
370 error(1, errno
, "failed to create socket 1");
372 if (bind(fd
, addr
, sockaddr_size()))
373 error(1, errno
, "failed to bind recv socket 1");
376 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_EBPF
, &bpf_fd
,
377 sizeof(bpf_fd
)) || errno
!= EINVAL
)
378 error(1, errno
, "setsockopt should have returned EINVAL");
381 if (!setsockopt(fd
, SOL_SOCKET
, SO_ATTACH_REUSEPORT_CBPF
, &cprog
,
382 sizeof(cprog
)) || errno
!= EINVAL
)
383 error(1, errno
, "setsockopt should have returned EINVAL");
388 static void test_filter_without_bind(void)
390 int fd1
, fd2
, opt
= 1;
392 fprintf(stderr
, "Testing filter add without bind...\n");
393 fd1
= socket(AF_INET
, SOCK_DGRAM
, 0);
395 error(1, errno
, "failed to create socket 1");
396 fd2
= socket(AF_INET
, SOCK_DGRAM
, 0);
398 error(1, errno
, "failed to create socket 2");
399 if (setsockopt(fd1
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
400 error(1, errno
, "failed to set SO_REUSEPORT on socket 1");
401 if (setsockopt(fd2
, SOL_SOCKET
, SO_REUSEPORT
, &opt
, sizeof(opt
)))
402 error(1, errno
, "failed to set SO_REUSEPORT on socket 2");
404 attach_ebpf(fd1
, 10);
405 attach_cbpf(fd2
, 10);
411 void enable_fastopen(void)
413 int fd
= open("/proc/sys/net/ipv4/tcp_fastopen", 0);
414 int rw_mask
= 3; /* bit 1: client side; bit-2 server side */
419 error(1, errno
, "Unable to open tcp_fastopen sysctl");
420 if (read(fd
, buf
, sizeof(buf
)) <= 0)
421 error(1, errno
, "Unable to read tcp_fastopen sysctl");
425 if ((val
& rw_mask
) != rw_mask
) {
426 fd
= open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR
);
429 "Unable to open tcp_fastopen sysctl for writing");
431 size
= snprintf(buf
, 16, "%d", val
);
432 if (write(fd
, buf
, size
) <= 0)
433 error(1, errno
, "Unable to write tcp_fastopen sysctl");
438 static struct rlimit rlim_old
;
440 static __attribute__((constructor
)) void main_ctor(void)
442 getrlimit(RLIMIT_MEMLOCK
, &rlim_old
);
444 if (rlim_old
.rlim_cur
!= RLIM_INFINITY
) {
445 struct rlimit rlim_new
;
447 rlim_new
.rlim_cur
= rlim_old
.rlim_cur
+ (1UL << 20);
448 rlim_new
.rlim_max
= rlim_old
.rlim_max
+ (1UL << 20);
449 setrlimit(RLIMIT_MEMLOCK
, &rlim_new
);
453 static __attribute__((destructor
)) void main_dtor(void)
455 setrlimit(RLIMIT_MEMLOCK
, &rlim_old
);
460 fprintf(stderr
, "---- IPv4 UDP ----\n");
461 /* NOTE: UDP socket lookups traverse a different code path when there
462 * are > 10 sockets in a group. Run the bpf test through both paths.
464 test_reuseport_ebpf((struct test_params
) {
465 .recv_family
= AF_INET
,
466 .send_family
= AF_INET
,
467 .protocol
= SOCK_DGRAM
,
470 .send_port_min
= 9000});
471 test_reuseport_ebpf((struct test_params
) {
472 .recv_family
= AF_INET
,
473 .send_family
= AF_INET
,
474 .protocol
= SOCK_DGRAM
,
477 .send_port_min
= 9000});
478 test_reuseport_cbpf((struct test_params
) {
479 .recv_family
= AF_INET
,
480 .send_family
= AF_INET
,
481 .protocol
= SOCK_DGRAM
,
484 .send_port_min
= 9020});
485 test_reuseport_cbpf((struct test_params
) {
486 .recv_family
= AF_INET
,
487 .send_family
= AF_INET
,
488 .protocol
= SOCK_DGRAM
,
491 .send_port_min
= 9020});
492 test_extra_filter((struct test_params
) {
493 .recv_family
= AF_INET
,
494 .protocol
= SOCK_DGRAM
,
496 test_filter_no_reuseport((struct test_params
) {
497 .recv_family
= AF_INET
,
498 .protocol
= SOCK_DGRAM
,
501 fprintf(stderr
, "---- IPv6 UDP ----\n");
502 test_reuseport_ebpf((struct test_params
) {
503 .recv_family
= AF_INET6
,
504 .send_family
= AF_INET6
,
505 .protocol
= SOCK_DGRAM
,
508 .send_port_min
= 9040});
509 test_reuseport_ebpf((struct test_params
) {
510 .recv_family
= AF_INET6
,
511 .send_family
= AF_INET6
,
512 .protocol
= SOCK_DGRAM
,
515 .send_port_min
= 9040});
516 test_reuseport_cbpf((struct test_params
) {
517 .recv_family
= AF_INET6
,
518 .send_family
= AF_INET6
,
519 .protocol
= SOCK_DGRAM
,
522 .send_port_min
= 9060});
523 test_reuseport_cbpf((struct test_params
) {
524 .recv_family
= AF_INET6
,
525 .send_family
= AF_INET6
,
526 .protocol
= SOCK_DGRAM
,
529 .send_port_min
= 9060});
530 test_extra_filter((struct test_params
) {
531 .recv_family
= AF_INET6
,
532 .protocol
= SOCK_DGRAM
,
534 test_filter_no_reuseport((struct test_params
) {
535 .recv_family
= AF_INET6
,
536 .protocol
= SOCK_DGRAM
,
539 fprintf(stderr
, "---- IPv6 UDP w/ mapped IPv4 ----\n");
540 test_reuseport_ebpf((struct test_params
) {
541 .recv_family
= AF_INET6
,
542 .send_family
= AF_INET
,
543 .protocol
= SOCK_DGRAM
,
546 .send_port_min
= 9080});
547 test_reuseport_ebpf((struct test_params
) {
548 .recv_family
= AF_INET6
,
549 .send_family
= AF_INET
,
550 .protocol
= SOCK_DGRAM
,
553 .send_port_min
= 9080});
554 test_reuseport_cbpf((struct test_params
) {
555 .recv_family
= AF_INET6
,
556 .send_family
= AF_INET
,
557 .protocol
= SOCK_DGRAM
,
560 .send_port_min
= 9100});
561 test_reuseport_cbpf((struct test_params
) {
562 .recv_family
= AF_INET6
,
563 .send_family
= AF_INET
,
564 .protocol
= SOCK_DGRAM
,
567 .send_port_min
= 9100});
569 /* TCP fastopen is required for the TCP tests */
571 fprintf(stderr
, "---- IPv4 TCP ----\n");
572 test_reuseport_ebpf((struct test_params
) {
573 .recv_family
= AF_INET
,
574 .send_family
= AF_INET
,
575 .protocol
= SOCK_STREAM
,
578 .send_port_min
= 9120});
579 test_reuseport_cbpf((struct test_params
) {
580 .recv_family
= AF_INET
,
581 .send_family
= AF_INET
,
582 .protocol
= SOCK_STREAM
,
585 .send_port_min
= 9160});
586 test_extra_filter((struct test_params
) {
587 .recv_family
= AF_INET
,
588 .protocol
= SOCK_STREAM
,
590 test_filter_no_reuseport((struct test_params
) {
591 .recv_family
= AF_INET
,
592 .protocol
= SOCK_STREAM
,
595 fprintf(stderr
, "---- IPv6 TCP ----\n");
596 test_reuseport_ebpf((struct test_params
) {
597 .recv_family
= AF_INET6
,
598 .send_family
= AF_INET6
,
599 .protocol
= SOCK_STREAM
,
602 .send_port_min
= 9200});
603 test_reuseport_cbpf((struct test_params
) {
604 .recv_family
= AF_INET6
,
605 .send_family
= AF_INET6
,
606 .protocol
= SOCK_STREAM
,
609 .send_port_min
= 9240});
610 test_extra_filter((struct test_params
) {
611 .recv_family
= AF_INET6
,
612 .protocol
= SOCK_STREAM
,
614 test_filter_no_reuseport((struct test_params
) {
615 .recv_family
= AF_INET6
,
616 .protocol
= SOCK_STREAM
,
619 fprintf(stderr
, "---- IPv6 TCP w/ mapped IPv4 ----\n");
620 test_reuseport_ebpf((struct test_params
) {
621 .recv_family
= AF_INET6
,
622 .send_family
= AF_INET
,
623 .protocol
= SOCK_STREAM
,
626 .send_port_min
= 9320});
627 test_reuseport_cbpf((struct test_params
) {
628 .recv_family
= AF_INET6
,
629 .send_family
= AF_INET
,
630 .protocol
= SOCK_STREAM
,
633 .send_port_min
= 9360});
635 test_filter_without_bind();
637 fprintf(stderr
, "SUCCESS\n");