Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cris-mirror.git] / tools / testing / selftests / net / reuseport_bpf.c
blobcad14cd0ea922f839d61ca8f78c8e73c9fdb89f5
1 /*
2 * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
3 * a BPF program (both classic and extended) to read the first word from an
4 * incoming packet (expected to be in network byte-order), calculate a modulus
5 * of that number, and then dispatch the packet to the Nth socket using the
6 * result. These tests are run for each supported address family and protocol.
7 * Additionally, a few edge cases in the implementation are tested.
8 */
10 #include <errno.h>
11 #include <error.h>
12 #include <fcntl.h>
13 #include <linux/bpf.h>
14 #include <linux/filter.h>
15 #include <linux/unistd.h>
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/epoll.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/resource.h>
25 #include <unistd.h>
27 #ifndef ARRAY_SIZE
28 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
29 #endif
31 struct test_params {
32 int recv_family;
33 int send_family;
34 int protocol;
35 size_t recv_socks;
36 uint16_t recv_port;
37 uint16_t send_port_min;
40 static size_t sockaddr_size(void)
42 return sizeof(struct sockaddr_storage);
45 static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
47 struct sockaddr_storage *addr;
48 struct sockaddr_in *addr4;
49 struct sockaddr_in6 *addr6;
51 addr = malloc(sizeof(struct sockaddr_storage));
52 memset(addr, 0, sizeof(struct sockaddr_storage));
54 switch (family) {
55 case AF_INET:
56 addr4 = (struct sockaddr_in *)addr;
57 addr4->sin_family = AF_INET;
58 addr4->sin_addr.s_addr = htonl(INADDR_ANY);
59 addr4->sin_port = htons(port);
60 break;
61 case AF_INET6:
62 addr6 = (struct sockaddr_in6 *)addr;
63 addr6->sin6_family = AF_INET6;
64 addr6->sin6_addr = in6addr_any;
65 addr6->sin6_port = htons(port);
66 break;
67 default:
68 error(1, 0, "Unsupported family %d", family);
70 return (struct sockaddr *)addr;
73 static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
75 struct sockaddr *addr = new_any_sockaddr(family, port);
76 struct sockaddr_in *addr4;
77 struct sockaddr_in6 *addr6;
79 switch (family) {
80 case AF_INET:
81 addr4 = (struct sockaddr_in *)addr;
82 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
83 break;
84 case AF_INET6:
85 addr6 = (struct sockaddr_in6 *)addr;
86 addr6->sin6_addr = in6addr_loopback;
87 break;
88 default:
89 error(1, 0, "Unsupported family %d", family);
91 return addr;
94 static void attach_ebpf(int fd, uint16_t mod)
96 static char bpf_log_buf[65536];
97 static const char bpf_license[] = "GPL";
99 int bpf_fd;
100 const struct bpf_insn prog[] = {
101 /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
102 { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
103 /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
104 { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
105 /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
106 { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
107 /* BPF_EXIT_INSN() */
108 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
110 union bpf_attr attr;
112 memset(&attr, 0, sizeof(attr));
113 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
114 attr.insn_cnt = ARRAY_SIZE(prog);
115 attr.insns = (unsigned long) &prog;
116 attr.license = (unsigned long) &bpf_license;
117 attr.log_buf = (unsigned long) &bpf_log_buf;
118 attr.log_size = sizeof(bpf_log_buf);
119 attr.log_level = 1;
120 attr.kern_version = 0;
122 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
123 if (bpf_fd < 0)
124 error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
126 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
127 sizeof(bpf_fd)))
128 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
130 close(bpf_fd);
133 static void attach_cbpf(int fd, uint16_t mod)
135 struct sock_filter code[] = {
136 /* A = (uint32_t)skb[0] */
137 { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
138 /* A = A % mod */
139 { BPF_ALU | BPF_MOD, 0, 0, mod },
140 /* return A */
141 { BPF_RET | BPF_A, 0, 0, 0 },
143 struct sock_fprog p = {
144 .len = ARRAY_SIZE(code),
145 .filter = code,
148 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
149 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
152 static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
153 void (*attach_bpf)(int, uint16_t))
155 struct sockaddr * const addr =
156 new_any_sockaddr(p.recv_family, p.recv_port);
157 int i, opt;
159 for (i = 0; i < p.recv_socks; ++i) {
160 fd[i] = socket(p.recv_family, p.protocol, 0);
161 if (fd[i] < 0)
162 error(1, errno, "failed to create recv %d", i);
164 opt = 1;
165 if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
166 sizeof(opt)))
167 error(1, errno, "failed to set SO_REUSEPORT on %d", i);
169 if (i == 0)
170 attach_bpf(fd[i], mod);
172 if (bind(fd[i], addr, sockaddr_size()))
173 error(1, errno, "failed to bind recv socket %d", i);
175 if (p.protocol == SOCK_STREAM) {
176 opt = 4;
177 if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
178 sizeof(opt)))
179 error(1, errno,
180 "failed to set TCP_FASTOPEN on %d", i);
181 if (listen(fd[i], p.recv_socks * 10))
182 error(1, errno, "failed to listen on socket");
185 free(addr);
188 static void send_from(struct test_params p, uint16_t sport, char *buf,
189 size_t len)
191 struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
192 struct sockaddr * const daddr =
193 new_loopback_sockaddr(p.send_family, p.recv_port);
194 const int fd = socket(p.send_family, p.protocol, 0), one = 1;
196 if (fd < 0)
197 error(1, errno, "failed to create send socket");
199 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
200 error(1, errno, "failed to set reuseaddr");
202 if (bind(fd, saddr, sockaddr_size()))
203 error(1, errno, "failed to bind send socket");
205 if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
206 error(1, errno, "failed to send message");
208 close(fd);
209 free(saddr);
210 free(daddr);
213 static void test_recv_order(const struct test_params p, int fd[], int mod)
215 char recv_buf[8], send_buf[8];
216 struct msghdr msg;
217 struct iovec recv_io = { recv_buf, 8 };
218 struct epoll_event ev;
219 int epfd, conn, i, sport, expected;
220 uint32_t data, ndata;
222 epfd = epoll_create(1);
223 if (epfd < 0)
224 error(1, errno, "failed to create epoll");
225 for (i = 0; i < p.recv_socks; ++i) {
226 ev.events = EPOLLIN;
227 ev.data.fd = fd[i];
228 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
229 error(1, errno, "failed to register sock %d epoll", i);
232 memset(&msg, 0, sizeof(msg));
233 msg.msg_iov = &recv_io;
234 msg.msg_iovlen = 1;
236 for (data = 0; data < p.recv_socks * 2; ++data) {
237 sport = p.send_port_min + data;
238 ndata = htonl(data);
239 memcpy(send_buf, &ndata, sizeof(ndata));
240 send_from(p, sport, send_buf, sizeof(ndata));
242 i = epoll_wait(epfd, &ev, 1, -1);
243 if (i < 0)
244 error(1, errno, "epoll wait failed");
246 if (p.protocol == SOCK_STREAM) {
247 conn = accept(ev.data.fd, NULL, NULL);
248 if (conn < 0)
249 error(1, errno, "error accepting");
250 i = recvmsg(conn, &msg, 0);
251 close(conn);
252 } else {
253 i = recvmsg(ev.data.fd, &msg, 0);
255 if (i < 0)
256 error(1, errno, "recvmsg error");
257 if (i != sizeof(ndata))
258 error(1, 0, "expected size %zd got %d",
259 sizeof(ndata), i);
261 for (i = 0; i < p.recv_socks; ++i)
262 if (ev.data.fd == fd[i])
263 break;
264 memcpy(&ndata, recv_buf, sizeof(ndata));
265 fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
267 expected = (sport % mod);
268 if (i != expected)
269 error(1, 0, "expected socket %d", expected);
273 static void test_reuseport_ebpf(struct test_params p)
275 int i, fd[p.recv_socks];
277 fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
278 build_recv_group(p, fd, p.recv_socks, attach_ebpf);
279 test_recv_order(p, fd, p.recv_socks);
281 p.send_port_min += p.recv_socks * 2;
282 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
283 attach_ebpf(fd[0], p.recv_socks / 2);
284 test_recv_order(p, fd, p.recv_socks / 2);
286 for (i = 0; i < p.recv_socks; ++i)
287 close(fd[i]);
290 static void test_reuseport_cbpf(struct test_params p)
292 int i, fd[p.recv_socks];
294 fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
295 build_recv_group(p, fd, p.recv_socks, attach_cbpf);
296 test_recv_order(p, fd, p.recv_socks);
298 p.send_port_min += p.recv_socks * 2;
299 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
300 attach_cbpf(fd[0], p.recv_socks / 2);
301 test_recv_order(p, fd, p.recv_socks / 2);
303 for (i = 0; i < p.recv_socks; ++i)
304 close(fd[i]);
307 static void test_extra_filter(const struct test_params p)
309 struct sockaddr * const addr =
310 new_any_sockaddr(p.recv_family, p.recv_port);
311 int fd1, fd2, opt;
313 fprintf(stderr, "Testing too many filters...\n");
314 fd1 = socket(p.recv_family, p.protocol, 0);
315 if (fd1 < 0)
316 error(1, errno, "failed to create socket 1");
317 fd2 = socket(p.recv_family, p.protocol, 0);
318 if (fd2 < 0)
319 error(1, errno, "failed to create socket 2");
321 opt = 1;
322 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
323 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
324 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
325 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
327 attach_ebpf(fd1, 10);
328 attach_ebpf(fd2, 10);
330 if (bind(fd1, addr, sockaddr_size()))
331 error(1, errno, "failed to bind recv socket 1");
333 if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
334 error(1, errno, "bind socket 2 should fail with EADDRINUSE");
336 free(addr);
339 static void test_filter_no_reuseport(const struct test_params p)
341 struct sockaddr * const addr =
342 new_any_sockaddr(p.recv_family, p.recv_port);
343 const char bpf_license[] = "GPL";
344 struct bpf_insn ecode[] = {
345 { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
346 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
348 struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
349 union bpf_attr eprog;
350 struct sock_fprog cprog;
351 int fd, bpf_fd;
353 fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
355 memset(&eprog, 0, sizeof(eprog));
356 eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
357 eprog.insn_cnt = ARRAY_SIZE(ecode);
358 eprog.insns = (unsigned long) &ecode;
359 eprog.license = (unsigned long) &bpf_license;
360 eprog.kern_version = 0;
362 memset(&cprog, 0, sizeof(cprog));
363 cprog.len = ARRAY_SIZE(ccode);
364 cprog.filter = ccode;
367 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
368 if (bpf_fd < 0)
369 error(1, errno, "ebpf error");
370 fd = socket(p.recv_family, p.protocol, 0);
371 if (fd < 0)
372 error(1, errno, "failed to create socket 1");
374 if (bind(fd, addr, sockaddr_size()))
375 error(1, errno, "failed to bind recv socket 1");
377 errno = 0;
378 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
379 sizeof(bpf_fd)) || errno != EINVAL)
380 error(1, errno, "setsockopt should have returned EINVAL");
382 errno = 0;
383 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
384 sizeof(cprog)) || errno != EINVAL)
385 error(1, errno, "setsockopt should have returned EINVAL");
387 free(addr);
390 static void test_filter_without_bind(void)
392 int fd1, fd2, opt = 1;
394 fprintf(stderr, "Testing filter add without bind...\n");
395 fd1 = socket(AF_INET, SOCK_DGRAM, 0);
396 if (fd1 < 0)
397 error(1, errno, "failed to create socket 1");
398 fd2 = socket(AF_INET, SOCK_DGRAM, 0);
399 if (fd2 < 0)
400 error(1, errno, "failed to create socket 2");
401 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
402 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
403 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
404 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
406 attach_ebpf(fd1, 10);
407 attach_cbpf(fd2, 10);
409 close(fd1);
410 close(fd2);
413 void enable_fastopen(void)
415 int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
416 int rw_mask = 3; /* bit 1: client side; bit-2 server side */
417 int val, size;
418 char buf[16];
420 if (fd < 0)
421 error(1, errno, "Unable to open tcp_fastopen sysctl");
422 if (read(fd, buf, sizeof(buf)) <= 0)
423 error(1, errno, "Unable to read tcp_fastopen sysctl");
424 val = atoi(buf);
425 close(fd);
427 if ((val & rw_mask) != rw_mask) {
428 fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
429 if (fd < 0)
430 error(1, errno,
431 "Unable to open tcp_fastopen sysctl for writing");
432 val |= rw_mask;
433 size = snprintf(buf, 16, "%d", val);
434 if (write(fd, buf, size) <= 0)
435 error(1, errno, "Unable to write tcp_fastopen sysctl");
436 close(fd);
440 static struct rlimit rlim_old, rlim_new;
442 static __attribute__((constructor)) void main_ctor(void)
444 getrlimit(RLIMIT_MEMLOCK, &rlim_old);
445 rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
446 rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
447 setrlimit(RLIMIT_MEMLOCK, &rlim_new);
450 static __attribute__((destructor)) void main_dtor(void)
452 setrlimit(RLIMIT_MEMLOCK, &rlim_old);
455 int main(void)
457 fprintf(stderr, "---- IPv4 UDP ----\n");
458 /* NOTE: UDP socket lookups traverse a different code path when there
459 * are > 10 sockets in a group. Run the bpf test through both paths.
461 test_reuseport_ebpf((struct test_params) {
462 .recv_family = AF_INET,
463 .send_family = AF_INET,
464 .protocol = SOCK_DGRAM,
465 .recv_socks = 10,
466 .recv_port = 8000,
467 .send_port_min = 9000});
468 test_reuseport_ebpf((struct test_params) {
469 .recv_family = AF_INET,
470 .send_family = AF_INET,
471 .protocol = SOCK_DGRAM,
472 .recv_socks = 20,
473 .recv_port = 8000,
474 .send_port_min = 9000});
475 test_reuseport_cbpf((struct test_params) {
476 .recv_family = AF_INET,
477 .send_family = AF_INET,
478 .protocol = SOCK_DGRAM,
479 .recv_socks = 10,
480 .recv_port = 8001,
481 .send_port_min = 9020});
482 test_reuseport_cbpf((struct test_params) {
483 .recv_family = AF_INET,
484 .send_family = AF_INET,
485 .protocol = SOCK_DGRAM,
486 .recv_socks = 20,
487 .recv_port = 8001,
488 .send_port_min = 9020});
489 test_extra_filter((struct test_params) {
490 .recv_family = AF_INET,
491 .protocol = SOCK_DGRAM,
492 .recv_port = 8002});
493 test_filter_no_reuseport((struct test_params) {
494 .recv_family = AF_INET,
495 .protocol = SOCK_DGRAM,
496 .recv_port = 8008});
498 fprintf(stderr, "---- IPv6 UDP ----\n");
499 test_reuseport_ebpf((struct test_params) {
500 .recv_family = AF_INET6,
501 .send_family = AF_INET6,
502 .protocol = SOCK_DGRAM,
503 .recv_socks = 10,
504 .recv_port = 8003,
505 .send_port_min = 9040});
506 test_reuseport_ebpf((struct test_params) {
507 .recv_family = AF_INET6,
508 .send_family = AF_INET6,
509 .protocol = SOCK_DGRAM,
510 .recv_socks = 20,
511 .recv_port = 8003,
512 .send_port_min = 9040});
513 test_reuseport_cbpf((struct test_params) {
514 .recv_family = AF_INET6,
515 .send_family = AF_INET6,
516 .protocol = SOCK_DGRAM,
517 .recv_socks = 10,
518 .recv_port = 8004,
519 .send_port_min = 9060});
520 test_reuseport_cbpf((struct test_params) {
521 .recv_family = AF_INET6,
522 .send_family = AF_INET6,
523 .protocol = SOCK_DGRAM,
524 .recv_socks = 20,
525 .recv_port = 8004,
526 .send_port_min = 9060});
527 test_extra_filter((struct test_params) {
528 .recv_family = AF_INET6,
529 .protocol = SOCK_DGRAM,
530 .recv_port = 8005});
531 test_filter_no_reuseport((struct test_params) {
532 .recv_family = AF_INET6,
533 .protocol = SOCK_DGRAM,
534 .recv_port = 8009});
536 fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
537 test_reuseport_ebpf((struct test_params) {
538 .recv_family = AF_INET6,
539 .send_family = AF_INET,
540 .protocol = SOCK_DGRAM,
541 .recv_socks = 20,
542 .recv_port = 8006,
543 .send_port_min = 9080});
544 test_reuseport_ebpf((struct test_params) {
545 .recv_family = AF_INET6,
546 .send_family = AF_INET,
547 .protocol = SOCK_DGRAM,
548 .recv_socks = 10,
549 .recv_port = 8006,
550 .send_port_min = 9080});
551 test_reuseport_cbpf((struct test_params) {
552 .recv_family = AF_INET6,
553 .send_family = AF_INET,
554 .protocol = SOCK_DGRAM,
555 .recv_socks = 10,
556 .recv_port = 8007,
557 .send_port_min = 9100});
558 test_reuseport_cbpf((struct test_params) {
559 .recv_family = AF_INET6,
560 .send_family = AF_INET,
561 .protocol = SOCK_DGRAM,
562 .recv_socks = 20,
563 .recv_port = 8007,
564 .send_port_min = 9100});
566 /* TCP fastopen is required for the TCP tests */
567 enable_fastopen();
568 fprintf(stderr, "---- IPv4 TCP ----\n");
569 test_reuseport_ebpf((struct test_params) {
570 .recv_family = AF_INET,
571 .send_family = AF_INET,
572 .protocol = SOCK_STREAM,
573 .recv_socks = 10,
574 .recv_port = 8008,
575 .send_port_min = 9120});
576 test_reuseport_cbpf((struct test_params) {
577 .recv_family = AF_INET,
578 .send_family = AF_INET,
579 .protocol = SOCK_STREAM,
580 .recv_socks = 10,
581 .recv_port = 8009,
582 .send_port_min = 9160});
583 test_extra_filter((struct test_params) {
584 .recv_family = AF_INET,
585 .protocol = SOCK_STREAM,
586 .recv_port = 8010});
587 test_filter_no_reuseport((struct test_params) {
588 .recv_family = AF_INET,
589 .protocol = SOCK_STREAM,
590 .recv_port = 8011});
592 fprintf(stderr, "---- IPv6 TCP ----\n");
593 test_reuseport_ebpf((struct test_params) {
594 .recv_family = AF_INET6,
595 .send_family = AF_INET6,
596 .protocol = SOCK_STREAM,
597 .recv_socks = 10,
598 .recv_port = 8012,
599 .send_port_min = 9200});
600 test_reuseport_cbpf((struct test_params) {
601 .recv_family = AF_INET6,
602 .send_family = AF_INET6,
603 .protocol = SOCK_STREAM,
604 .recv_socks = 10,
605 .recv_port = 8013,
606 .send_port_min = 9240});
607 test_extra_filter((struct test_params) {
608 .recv_family = AF_INET6,
609 .protocol = SOCK_STREAM,
610 .recv_port = 8014});
611 test_filter_no_reuseport((struct test_params) {
612 .recv_family = AF_INET6,
613 .protocol = SOCK_STREAM,
614 .recv_port = 8015});
616 fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
617 test_reuseport_ebpf((struct test_params) {
618 .recv_family = AF_INET6,
619 .send_family = AF_INET,
620 .protocol = SOCK_STREAM,
621 .recv_socks = 10,
622 .recv_port = 8016,
623 .send_port_min = 9320});
624 test_reuseport_cbpf((struct test_params) {
625 .recv_family = AF_INET6,
626 .send_family = AF_INET,
627 .protocol = SOCK_STREAM,
628 .recv_socks = 10,
629 .recv_port = 8017,
630 .send_port_min = 9360});
632 test_filter_without_bind();
634 fprintf(stderr, "SUCCESS\n");
635 return 0;