treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / samples / bpf / xdpsock_user.c
blob0b5acd7223062aaf98d553552246efebfd30bd84
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017 - 2018 Intel Corporation. */
4 #include <asm/barrier.h>
5 #include <errno.h>
6 #include <getopt.h>
7 #include <libgen.h>
8 #include <linux/bpf.h>
9 #include <linux/compiler.h>
10 #include <linux/if_link.h>
11 #include <linux/if_xdp.h>
12 #include <linux/if_ether.h>
13 #include <linux/ip.h>
14 #include <linux/udp.h>
15 #include <arpa/inet.h>
16 #include <locale.h>
17 #include <net/ethernet.h>
18 #include <net/if.h>
19 #include <poll.h>
20 #include <pthread.h>
21 #include <signal.h>
22 #include <stdbool.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/resource.h>
28 #include <sys/socket.h>
29 #include <sys/types.h>
30 #include <time.h>
31 #include <unistd.h>
33 #include <bpf/libbpf.h>
34 #include <bpf/xsk.h>
35 #include <bpf/bpf.h>
36 #include "xdpsock.h"
38 #ifndef SOL_XDP
39 #define SOL_XDP 283
40 #endif
42 #ifndef AF_XDP
43 #define AF_XDP 44
44 #endif
46 #ifndef PF_XDP
47 #define PF_XDP AF_XDP
48 #endif
50 #define NUM_FRAMES (4 * 1024)
51 #define MIN_PKT_SIZE 64
53 #define DEBUG_HEXDUMP 0
55 typedef __u64 u64;
56 typedef __u32 u32;
57 typedef __u16 u16;
58 typedef __u8 u8;
60 static unsigned long prev_time;
62 enum benchmark_type {
63 BENCH_RXDROP = 0,
64 BENCH_TXONLY = 1,
65 BENCH_L2FWD = 2,
68 static enum benchmark_type opt_bench = BENCH_RXDROP;
69 static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
70 static const char *opt_if = "";
71 static int opt_ifindex;
72 static int opt_queue;
73 static unsigned long opt_duration;
74 static unsigned long start_time;
75 static bool benchmark_done;
76 static u32 opt_batch_size = 64;
77 static int opt_pkt_count;
78 static u16 opt_pkt_size = MIN_PKT_SIZE;
79 static u32 opt_pkt_fill_pattern = 0x12345678;
80 static int opt_poll;
81 static int opt_interval = 1;
82 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
83 static u32 opt_umem_flags;
84 static int opt_unaligned_chunks;
85 static int opt_mmap_flags;
86 static u32 opt_xdp_bind_flags;
87 static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
88 static int opt_timeout = 1000;
89 static bool opt_need_wakeup = true;
90 static u32 opt_num_xsks = 1;
91 static u32 prog_id;
93 struct xsk_umem_info {
94 struct xsk_ring_prod fq;
95 struct xsk_ring_cons cq;
96 struct xsk_umem *umem;
97 void *buffer;
100 struct xsk_socket_info {
101 struct xsk_ring_cons rx;
102 struct xsk_ring_prod tx;
103 struct xsk_umem_info *umem;
104 struct xsk_socket *xsk;
105 unsigned long rx_npkts;
106 unsigned long tx_npkts;
107 unsigned long prev_rx_npkts;
108 unsigned long prev_tx_npkts;
109 u32 outstanding_tx;
112 static int num_socks;
113 struct xsk_socket_info *xsks[MAX_SOCKS];
115 static unsigned long get_nsecs(void)
117 struct timespec ts;
119 clock_gettime(CLOCK_MONOTONIC, &ts);
120 return ts.tv_sec * 1000000000UL + ts.tv_nsec;
123 static void print_benchmark(bool running)
125 const char *bench_str = "INVALID";
127 if (opt_bench == BENCH_RXDROP)
128 bench_str = "rxdrop";
129 else if (opt_bench == BENCH_TXONLY)
130 bench_str = "txonly";
131 else if (opt_bench == BENCH_L2FWD)
132 bench_str = "l2fwd";
134 printf("%s:%d %s ", opt_if, opt_queue, bench_str);
135 if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
136 printf("xdp-skb ");
137 else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
138 printf("xdp-drv ");
139 else
140 printf(" ");
142 if (opt_poll)
143 printf("poll() ");
145 if (running) {
146 printf("running...");
147 fflush(stdout);
151 static void dump_stats(void)
153 unsigned long now = get_nsecs();
154 long dt = now - prev_time;
155 int i;
157 prev_time = now;
159 for (i = 0; i < num_socks && xsks[i]; i++) {
160 char *fmt = "%-15s %'-11.0f %'-11lu\n";
161 double rx_pps, tx_pps;
163 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
164 1000000000. / dt;
165 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
166 1000000000. / dt;
168 printf("\n sock%d@", i);
169 print_benchmark(false);
170 printf("\n");
172 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
173 dt / 1000000000.);
174 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
175 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
177 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
178 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
182 static bool is_benchmark_done(void)
184 if (opt_duration > 0) {
185 unsigned long dt = (get_nsecs() - start_time);
187 if (dt >= opt_duration)
188 benchmark_done = true;
190 return benchmark_done;
193 static void *poller(void *arg)
195 (void)arg;
196 while (!is_benchmark_done()) {
197 sleep(opt_interval);
198 dump_stats();
201 return NULL;
204 static void remove_xdp_program(void)
206 u32 curr_prog_id = 0;
208 if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
209 printf("bpf_get_link_xdp_id failed\n");
210 exit(EXIT_FAILURE);
212 if (prog_id == curr_prog_id)
213 bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
214 else if (!curr_prog_id)
215 printf("couldn't find a prog id on a given interface\n");
216 else
217 printf("program on interface changed, not removing\n");
220 static void int_exit(int sig)
222 benchmark_done = true;
225 static void xdpsock_cleanup(void)
227 struct xsk_umem *umem = xsks[0]->umem->umem;
228 int i;
230 dump_stats();
231 for (i = 0; i < num_socks; i++)
232 xsk_socket__delete(xsks[i]->xsk);
233 (void)xsk_umem__delete(umem);
234 remove_xdp_program();
237 static void __exit_with_error(int error, const char *file, const char *func,
238 int line)
240 fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
241 line, error, strerror(error));
242 dump_stats();
243 remove_xdp_program();
244 exit(EXIT_FAILURE);
247 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
248 __LINE__)
249 static void swap_mac_addresses(void *data)
251 struct ether_header *eth = (struct ether_header *)data;
252 struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
253 struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
254 struct ether_addr tmp;
256 tmp = *src_addr;
257 *src_addr = *dst_addr;
258 *dst_addr = tmp;
261 static void hex_dump(void *pkt, size_t length, u64 addr)
263 const unsigned char *address = (unsigned char *)pkt;
264 const unsigned char *line = address;
265 size_t line_size = 32;
266 unsigned char c;
267 char buf[32];
268 int i = 0;
270 if (!DEBUG_HEXDUMP)
271 return;
273 sprintf(buf, "addr=%llu", addr);
274 printf("length = %zu\n", length);
275 printf("%s | ", buf);
276 while (length-- > 0) {
277 printf("%02X ", *address++);
278 if (!(++i % line_size) || (length == 0 && i % line_size)) {
279 if (length == 0) {
280 while (i++ % line_size)
281 printf("__ ");
283 printf(" | "); /* right close */
284 while (line < address) {
285 c = *line++;
286 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
288 printf("\n");
289 if (length > 0)
290 printf("%s | ", buf);
293 printf("\n");
296 static void *memset32_htonl(void *dest, u32 val, u32 size)
298 u32 *ptr = (u32 *)dest;
299 int i;
301 val = htonl(val);
303 for (i = 0; i < (size & (~0x3)); i += 4)
304 ptr[i >> 2] = val;
306 for (; i < size; i++)
307 ((char *)dest)[i] = ((char *)&val)[i & 3];
309 return dest;
313 * This function code has been taken from
314 * Linux kernel lib/checksum.c
316 static inline unsigned short from32to16(unsigned int x)
318 /* add up 16-bit and 16-bit for 16+c bit */
319 x = (x & 0xffff) + (x >> 16);
320 /* add up carry.. */
321 x = (x & 0xffff) + (x >> 16);
322 return x;
326 * This function code has been taken from
327 * Linux kernel lib/checksum.c
329 static unsigned int do_csum(const unsigned char *buff, int len)
331 unsigned int result = 0;
332 int odd;
334 if (len <= 0)
335 goto out;
336 odd = 1 & (unsigned long)buff;
337 if (odd) {
338 #ifdef __LITTLE_ENDIAN
339 result += (*buff << 8);
340 #else
341 result = *buff;
342 #endif
343 len--;
344 buff++;
346 if (len >= 2) {
347 if (2 & (unsigned long)buff) {
348 result += *(unsigned short *)buff;
349 len -= 2;
350 buff += 2;
352 if (len >= 4) {
353 const unsigned char *end = buff +
354 ((unsigned int)len & ~3);
355 unsigned int carry = 0;
357 do {
358 unsigned int w = *(unsigned int *)buff;
360 buff += 4;
361 result += carry;
362 result += w;
363 carry = (w > result);
364 } while (buff < end);
365 result += carry;
366 result = (result & 0xffff) + (result >> 16);
368 if (len & 2) {
369 result += *(unsigned short *)buff;
370 buff += 2;
373 if (len & 1)
374 #ifdef __LITTLE_ENDIAN
375 result += *buff;
376 #else
377 result += (*buff << 8);
378 #endif
379 result = from32to16(result);
380 if (odd)
381 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
382 out:
383 return result;
386 __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
389 * This is a version of ip_compute_csum() optimized for IP headers,
390 * which always checksum on 4 octet boundaries.
391 * This function code has been taken from
392 * Linux kernel lib/checksum.c
394 __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
396 return (__force __sum16)~do_csum(iph, ihl * 4);
400 * Fold a partial checksum
401 * This function code has been taken from
402 * Linux kernel include/asm-generic/checksum.h
404 static inline __sum16 csum_fold(__wsum csum)
406 u32 sum = (__force u32)csum;
408 sum = (sum & 0xffff) + (sum >> 16);
409 sum = (sum & 0xffff) + (sum >> 16);
410 return (__force __sum16)~sum;
414 * This function code has been taken from
415 * Linux kernel lib/checksum.c
417 static inline u32 from64to32(u64 x)
419 /* add up 32-bit and 32-bit for 32+c bit */
420 x = (x & 0xffffffff) + (x >> 32);
421 /* add up carry.. */
422 x = (x & 0xffffffff) + (x >> 32);
423 return (u32)x;
426 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
427 __u32 len, __u8 proto, __wsum sum);
430 * This function code has been taken from
431 * Linux kernel lib/checksum.c
433 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
434 __u32 len, __u8 proto, __wsum sum)
436 unsigned long long s = (__force u32)sum;
438 s += (__force u32)saddr;
439 s += (__force u32)daddr;
440 #ifdef __BIG_ENDIAN__
441 s += proto + len;
442 #else
443 s += (proto + len) << 8;
444 #endif
445 return (__force __wsum)from64to32(s);
449 * This function has been taken from
450 * Linux kernel include/asm-generic/checksum.h
452 static inline __sum16
453 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
454 __u8 proto, __wsum sum)
456 return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
459 static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
460 u8 proto, u16 *udp_pkt)
462 u32 csum = 0;
463 u32 cnt = 0;
465 /* udp hdr and data */
466 for (; cnt < len; cnt += 2)
467 csum += udp_pkt[cnt >> 1];
469 return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
472 #define ETH_FCS_SIZE 4
474 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
475 sizeof(struct udphdr))
477 #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
478 #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
479 #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
480 #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
482 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
484 static void gen_eth_hdr_data(void)
486 struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
487 sizeof(struct ethhdr) +
488 sizeof(struct iphdr));
489 struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
490 sizeof(struct ethhdr));
491 struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
493 /* ethernet header */
494 memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
495 memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
496 eth_hdr->h_proto = htons(ETH_P_IP);
498 /* IP header */
499 ip_hdr->version = IPVERSION;
500 ip_hdr->ihl = 0x5; /* 20 byte header */
501 ip_hdr->tos = 0x0;
502 ip_hdr->tot_len = htons(IP_PKT_SIZE);
503 ip_hdr->id = 0;
504 ip_hdr->frag_off = 0;
505 ip_hdr->ttl = IPDEFTTL;
506 ip_hdr->protocol = IPPROTO_UDP;
507 ip_hdr->saddr = htonl(0x0a0a0a10);
508 ip_hdr->daddr = htonl(0x0a0a0a20);
510 /* IP header checksum */
511 ip_hdr->check = 0;
512 ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
514 /* UDP header */
515 udp_hdr->source = htons(0x1000);
516 udp_hdr->dest = htons(0x1000);
517 udp_hdr->len = htons(UDP_PKT_SIZE);
519 /* UDP data */
520 memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
521 UDP_PKT_DATA_SIZE);
523 /* UDP header checksum */
524 udp_hdr->check = 0;
525 udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
526 IPPROTO_UDP, (u16 *)udp_hdr);
529 static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
531 memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
532 PKT_SIZE);
535 static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
537 struct xsk_umem_info *umem;
538 struct xsk_umem_config cfg = {
539 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
540 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
541 .frame_size = opt_xsk_frame_size,
542 .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
543 .flags = opt_umem_flags
545 int ret;
547 umem = calloc(1, sizeof(*umem));
548 if (!umem)
549 exit_with_error(errno);
551 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
552 &cfg);
553 if (ret)
554 exit_with_error(-ret);
556 umem->buffer = buffer;
557 return umem;
560 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
562 int ret, i;
563 u32 idx;
565 ret = xsk_ring_prod__reserve(&umem->fq,
566 XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
567 if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
568 exit_with_error(-ret);
569 for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
570 *xsk_ring_prod__fill_addr(&umem->fq, idx++) =
571 i * opt_xsk_frame_size;
572 xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
575 static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
576 bool rx, bool tx)
578 struct xsk_socket_config cfg;
579 struct xsk_socket_info *xsk;
580 struct xsk_ring_cons *rxr;
581 struct xsk_ring_prod *txr;
582 int ret;
584 xsk = calloc(1, sizeof(*xsk));
585 if (!xsk)
586 exit_with_error(errno);
588 xsk->umem = umem;
589 cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
590 cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
591 if (opt_num_xsks > 1)
592 cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
593 else
594 cfg.libbpf_flags = 0;
595 cfg.xdp_flags = opt_xdp_flags;
596 cfg.bind_flags = opt_xdp_bind_flags;
598 rxr = rx ? &xsk->rx : NULL;
599 txr = tx ? &xsk->tx : NULL;
600 ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
601 rxr, txr, &cfg);
602 if (ret)
603 exit_with_error(-ret);
605 ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
606 if (ret)
607 exit_with_error(-ret);
609 return xsk;
612 static struct option long_options[] = {
613 {"rxdrop", no_argument, 0, 'r'},
614 {"txonly", no_argument, 0, 't'},
615 {"l2fwd", no_argument, 0, 'l'},
616 {"interface", required_argument, 0, 'i'},
617 {"queue", required_argument, 0, 'q'},
618 {"poll", no_argument, 0, 'p'},
619 {"xdp-skb", no_argument, 0, 'S'},
620 {"xdp-native", no_argument, 0, 'N'},
621 {"interval", required_argument, 0, 'n'},
622 {"zero-copy", no_argument, 0, 'z'},
623 {"copy", no_argument, 0, 'c'},
624 {"frame-size", required_argument, 0, 'f'},
625 {"no-need-wakeup", no_argument, 0, 'm'},
626 {"unaligned", no_argument, 0, 'u'},
627 {"shared-umem", no_argument, 0, 'M'},
628 {"force", no_argument, 0, 'F'},
629 {"duration", required_argument, 0, 'd'},
630 {"batch-size", required_argument, 0, 'b'},
631 {"tx-pkt-count", required_argument, 0, 'C'},
632 {"tx-pkt-size", required_argument, 0, 's'},
633 {"tx-pkt-pattern", required_argument, 0, 'P'},
634 {0, 0, 0, 0}
637 static void usage(const char *prog)
639 const char *str =
640 " Usage: %s [OPTIONS]\n"
641 " Options:\n"
642 " -r, --rxdrop Discard all incoming packets (default)\n"
643 " -t, --txonly Only send packets\n"
644 " -l, --l2fwd MAC swap L2 forwarding\n"
645 " -i, --interface=n Run on interface n\n"
646 " -q, --queue=n Use queue n (default 0)\n"
647 " -p, --poll Use poll syscall\n"
648 " -S, --xdp-skb=n Use XDP skb-mod\n"
649 " -N, --xdp-native=n Enforce XDP native mode\n"
650 " -n, --interval=n Specify statistics update interval (default 1 sec).\n"
651 " -z, --zero-copy Force zero-copy mode.\n"
652 " -c, --copy Force copy mode.\n"
653 " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
654 " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n"
655 " -u, --unaligned Enable unaligned chunk placement\n"
656 " -M, --shared-umem Enable XDP_SHARED_UMEM\n"
657 " -F, --force Force loading the XDP prog\n"
658 " -d, --duration=n Duration in secs to run command.\n"
659 " Default: forever.\n"
660 " -b, --batch-size=n Batch size for sending or receiving\n"
661 " packets. Default: %d\n"
662 " -C, --tx-pkt-count=n Number of packets to send.\n"
663 " Default: Continuous packets.\n"
664 " -s, --tx-pkt-size=n Transmit packet size.\n"
665 " (Default: %d bytes)\n"
666 " Min size: %d, Max size %d.\n"
667 " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
668 "\n";
669 fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
670 opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
671 XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
673 exit(EXIT_FAILURE);
676 static void parse_command_line(int argc, char **argv)
678 int option_index, c;
680 opterr = 0;
682 for (;;) {
683 c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:",
684 long_options, &option_index);
685 if (c == -1)
686 break;
688 switch (c) {
689 case 'r':
690 opt_bench = BENCH_RXDROP;
691 break;
692 case 't':
693 opt_bench = BENCH_TXONLY;
694 break;
695 case 'l':
696 opt_bench = BENCH_L2FWD;
697 break;
698 case 'i':
699 opt_if = optarg;
700 break;
701 case 'q':
702 opt_queue = atoi(optarg);
703 break;
704 case 'p':
705 opt_poll = 1;
706 break;
707 case 'S':
708 opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
709 opt_xdp_bind_flags |= XDP_COPY;
710 break;
711 case 'N':
712 /* default, set below */
713 break;
714 case 'n':
715 opt_interval = atoi(optarg);
716 break;
717 case 'z':
718 opt_xdp_bind_flags |= XDP_ZEROCOPY;
719 break;
720 case 'c':
721 opt_xdp_bind_flags |= XDP_COPY;
722 break;
723 case 'u':
724 opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
725 opt_unaligned_chunks = 1;
726 opt_mmap_flags = MAP_HUGETLB;
727 break;
728 case 'F':
729 opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
730 break;
731 case 'f':
732 opt_xsk_frame_size = atoi(optarg);
733 break;
734 case 'm':
735 opt_need_wakeup = false;
736 opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
737 break;
738 case 'M':
739 opt_num_xsks = MAX_SOCKS;
740 break;
741 case 'd':
742 opt_duration = atoi(optarg);
743 opt_duration *= 1000000000;
744 break;
745 case 'b':
746 opt_batch_size = atoi(optarg);
747 break;
748 case 'C':
749 opt_pkt_count = atoi(optarg);
750 break;
751 case 's':
752 opt_pkt_size = atoi(optarg);
753 if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
754 opt_pkt_size < MIN_PKT_SIZE) {
755 fprintf(stderr,
756 "ERROR: Invalid frame size %d\n",
757 opt_pkt_size);
758 usage(basename(argv[0]));
760 break;
761 case 'P':
762 opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
763 break;
764 default:
765 usage(basename(argv[0]));
769 if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
770 opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
772 opt_ifindex = if_nametoindex(opt_if);
773 if (!opt_ifindex) {
774 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
775 opt_if);
776 usage(basename(argv[0]));
779 if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
780 !opt_unaligned_chunks) {
781 fprintf(stderr, "--frame-size=%d is not a power of two\n",
782 opt_xsk_frame_size);
783 usage(basename(argv[0]));
787 static void kick_tx(struct xsk_socket_info *xsk)
789 int ret;
791 ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
792 if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
793 return;
794 exit_with_error(errno);
797 static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
798 struct pollfd *fds)
800 struct xsk_umem_info *umem = xsk->umem;
801 u32 idx_cq = 0, idx_fq = 0;
802 unsigned int rcvd;
803 size_t ndescs;
805 if (!xsk->outstanding_tx)
806 return;
808 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
809 kick_tx(xsk);
811 ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
812 xsk->outstanding_tx;
814 /* re-add completed Tx buffers */
815 rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
816 if (rcvd > 0) {
817 unsigned int i;
818 int ret;
820 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
821 while (ret != rcvd) {
822 if (ret < 0)
823 exit_with_error(-ret);
824 if (xsk_ring_prod__needs_wakeup(&umem->fq))
825 ret = poll(fds, num_socks, opt_timeout);
826 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
829 for (i = 0; i < rcvd; i++)
830 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
831 *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
833 xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
834 xsk_ring_cons__release(&xsk->umem->cq, rcvd);
835 xsk->outstanding_tx -= rcvd;
836 xsk->tx_npkts += rcvd;
840 static inline void complete_tx_only(struct xsk_socket_info *xsk,
841 int batch_size)
843 unsigned int rcvd;
844 u32 idx;
846 if (!xsk->outstanding_tx)
847 return;
849 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
850 kick_tx(xsk);
852 rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
853 if (rcvd > 0) {
854 xsk_ring_cons__release(&xsk->umem->cq, rcvd);
855 xsk->outstanding_tx -= rcvd;
856 xsk->tx_npkts += rcvd;
860 static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
862 unsigned int rcvd, i;
863 u32 idx_rx = 0, idx_fq = 0;
864 int ret;
866 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
867 if (!rcvd) {
868 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
869 ret = poll(fds, num_socks, opt_timeout);
870 return;
873 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
874 while (ret != rcvd) {
875 if (ret < 0)
876 exit_with_error(-ret);
877 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
878 ret = poll(fds, num_socks, opt_timeout);
879 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
882 for (i = 0; i < rcvd; i++) {
883 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
884 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
885 u64 orig = xsk_umem__extract_addr(addr);
887 addr = xsk_umem__add_offset_to_addr(addr);
888 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
890 hex_dump(pkt, len, addr);
891 *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
894 xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
895 xsk_ring_cons__release(&xsk->rx, rcvd);
896 xsk->rx_npkts += rcvd;
899 static void rx_drop_all(void)
901 struct pollfd fds[MAX_SOCKS] = {};
902 int i, ret;
904 for (i = 0; i < num_socks; i++) {
905 fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
906 fds[i].events = POLLIN;
909 for (;;) {
910 if (opt_poll) {
911 ret = poll(fds, num_socks, opt_timeout);
912 if (ret <= 0)
913 continue;
916 for (i = 0; i < num_socks; i++)
917 rx_drop(xsks[i], fds);
919 if (benchmark_done)
920 break;
924 static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
926 u32 idx;
927 unsigned int i;
929 while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
930 batch_size) {
931 complete_tx_only(xsk, batch_size);
934 for (i = 0; i < batch_size; i++) {
935 struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
936 idx + i);
937 tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
938 tx_desc->len = PKT_SIZE;
941 xsk_ring_prod__submit(&xsk->tx, batch_size);
942 xsk->outstanding_tx += batch_size;
943 frame_nb += batch_size;
944 frame_nb %= NUM_FRAMES;
945 complete_tx_only(xsk, batch_size);
948 static inline int get_batch_size(int pkt_cnt)
950 if (!opt_pkt_count)
951 return opt_batch_size;
953 if (pkt_cnt + opt_batch_size <= opt_pkt_count)
954 return opt_batch_size;
956 return opt_pkt_count - pkt_cnt;
959 static void complete_tx_only_all(void)
961 bool pending;
962 int i;
964 do {
965 pending = false;
966 for (i = 0; i < num_socks; i++) {
967 if (xsks[i]->outstanding_tx) {
968 complete_tx_only(xsks[i], opt_batch_size);
969 pending = !!xsks[i]->outstanding_tx;
972 } while (pending);
975 static void tx_only_all(void)
977 struct pollfd fds[MAX_SOCKS] = {};
978 u32 frame_nb[MAX_SOCKS] = {};
979 int pkt_cnt = 0;
980 int i, ret;
982 for (i = 0; i < num_socks; i++) {
983 fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
984 fds[0].events = POLLOUT;
987 while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
988 int batch_size = get_batch_size(pkt_cnt);
990 if (opt_poll) {
991 ret = poll(fds, num_socks, opt_timeout);
992 if (ret <= 0)
993 continue;
995 if (!(fds[0].revents & POLLOUT))
996 continue;
999 for (i = 0; i < num_socks; i++)
1000 tx_only(xsks[i], frame_nb[i], batch_size);
1002 pkt_cnt += batch_size;
1004 if (benchmark_done)
1005 break;
1008 if (opt_pkt_count)
1009 complete_tx_only_all();
1012 static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
1014 unsigned int rcvd, i;
1015 u32 idx_rx = 0, idx_tx = 0;
1016 int ret;
1018 complete_tx_l2fwd(xsk, fds);
1020 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
1021 if (!rcvd) {
1022 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
1023 ret = poll(fds, num_socks, opt_timeout);
1024 return;
1027 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
1028 while (ret != rcvd) {
1029 if (ret < 0)
1030 exit_with_error(-ret);
1031 if (xsk_ring_prod__needs_wakeup(&xsk->tx))
1032 kick_tx(xsk);
1033 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
1036 for (i = 0; i < rcvd; i++) {
1037 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
1038 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
1039 u64 orig = addr;
1041 addr = xsk_umem__add_offset_to_addr(addr);
1042 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
1044 swap_mac_addresses(pkt);
1046 hex_dump(pkt, len, addr);
1047 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
1048 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
1051 xsk_ring_prod__submit(&xsk->tx, rcvd);
1052 xsk_ring_cons__release(&xsk->rx, rcvd);
1054 xsk->rx_npkts += rcvd;
1055 xsk->outstanding_tx += rcvd;
1058 static void l2fwd_all(void)
1060 struct pollfd fds[MAX_SOCKS] = {};
1061 int i, ret;
1063 for (i = 0; i < num_socks; i++) {
1064 fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
1065 fds[i].events = POLLOUT | POLLIN;
1068 for (;;) {
1069 if (opt_poll) {
1070 ret = poll(fds, num_socks, opt_timeout);
1071 if (ret <= 0)
1072 continue;
1075 for (i = 0; i < num_socks; i++)
1076 l2fwd(xsks[i], fds);
1078 if (benchmark_done)
1079 break;
1083 static void load_xdp_program(char **argv, struct bpf_object **obj)
1085 struct bpf_prog_load_attr prog_load_attr = {
1086 .prog_type = BPF_PROG_TYPE_XDP,
1088 char xdp_filename[256];
1089 int prog_fd;
1091 snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
1092 prog_load_attr.file = xdp_filename;
1094 if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
1095 exit(EXIT_FAILURE);
1096 if (prog_fd < 0) {
1097 fprintf(stderr, "ERROR: no program found: %s\n",
1098 strerror(prog_fd));
1099 exit(EXIT_FAILURE);
1102 if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
1103 fprintf(stderr, "ERROR: link set xdp fd failed\n");
1104 exit(EXIT_FAILURE);
1108 static void enter_xsks_into_map(struct bpf_object *obj)
1110 struct bpf_map *map;
1111 int i, xsks_map;
1113 map = bpf_object__find_map_by_name(obj, "xsks_map");
1114 xsks_map = bpf_map__fd(map);
1115 if (xsks_map < 0) {
1116 fprintf(stderr, "ERROR: no xsks map found: %s\n",
1117 strerror(xsks_map));
1118 exit(EXIT_FAILURE);
1121 for (i = 0; i < num_socks; i++) {
1122 int fd = xsk_socket__fd(xsks[i]->xsk);
1123 int key, ret;
1125 key = i;
1126 ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
1127 if (ret) {
1128 fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
1129 exit(EXIT_FAILURE);
1134 int main(int argc, char **argv)
1136 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
1137 bool rx = false, tx = false;
1138 struct xsk_umem_info *umem;
1139 struct bpf_object *obj;
1140 pthread_t pt;
1141 int i, ret;
1142 void *bufs;
1144 parse_command_line(argc, argv);
1146 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
1147 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
1148 strerror(errno));
1149 exit(EXIT_FAILURE);
1152 if (opt_num_xsks > 1)
1153 load_xdp_program(argv, &obj);
1155 /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
1156 bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
1157 PROT_READ | PROT_WRITE,
1158 MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
1159 if (bufs == MAP_FAILED) {
1160 printf("ERROR: mmap failed\n");
1161 exit(EXIT_FAILURE);
1164 /* Create sockets... */
1165 umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
1166 if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
1167 rx = true;
1168 xsk_populate_fill_ring(umem);
1170 if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
1171 tx = true;
1172 for (i = 0; i < opt_num_xsks; i++)
1173 xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
1175 if (opt_bench == BENCH_TXONLY) {
1176 gen_eth_hdr_data();
1178 for (i = 0; i < NUM_FRAMES; i++)
1179 gen_eth_frame(umem, i * opt_xsk_frame_size);
1182 if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
1183 enter_xsks_into_map(obj);
1185 signal(SIGINT, int_exit);
1186 signal(SIGTERM, int_exit);
1187 signal(SIGABRT, int_exit);
1189 setlocale(LC_ALL, "");
1191 ret = pthread_create(&pt, NULL, poller, NULL);
1192 if (ret)
1193 exit_with_error(ret);
1195 prev_time = get_nsecs();
1196 start_time = prev_time;
1198 if (opt_bench == BENCH_RXDROP)
1199 rx_drop_all();
1200 else if (opt_bench == BENCH_TXONLY)
1201 tx_only_all();
1202 else
1203 l2fwd_all();
1205 benchmark_done = true;
1207 pthread_join(pt, NULL);
1209 xdpsock_cleanup();
1211 return 0;