Linux 4.19.133
[linux/fpc-iii.git] / samples / bpf / xdp_monitor_user.c
blobdd558cbb23094b723b09e09f1b03e075e6e0e7d2
1 /* SPDX-License-Identifier: GPL-2.0
2 * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3 */
4 static const char *__doc__=
5 "XDP monitor tool, based on tracepoints\n"
8 static const char *__doc_err_only__=
9 " NOTICE: Only tracking XDP redirect errors\n"
10 " Enable TX success stats via '--stats'\n"
11 " (which comes with a per packet processing overhead)\n"
14 #include <errno.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <stdbool.h>
18 #include <stdint.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include <unistd.h>
22 #include <locale.h>
24 #include <sys/resource.h>
25 #include <getopt.h>
26 #include <net/if.h>
27 #include <time.h>
29 #include <bpf/bpf.h>
30 #include "bpf_load.h"
31 #include "bpf_util.h"
33 static int verbose = 1;
34 static bool debug = false;
36 static const struct option long_options[] = {
37 {"help", no_argument, NULL, 'h' },
38 {"debug", no_argument, NULL, 'D' },
39 {"stats", no_argument, NULL, 'S' },
40 {"sec", required_argument, NULL, 's' },
41 {0, 0, NULL, 0 }
44 /* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
45 #define EXIT_FAIL_MEM 5
47 static void usage(char *argv[])
49 int i;
50 printf("\nDOCUMENTATION:\n%s\n", __doc__);
51 printf("\n");
52 printf(" Usage: %s (options-see-below)\n",
53 argv[0]);
54 printf(" Listing options:\n");
55 for (i = 0; long_options[i].name != 0; i++) {
56 printf(" --%-15s", long_options[i].name);
57 if (long_options[i].flag != NULL)
58 printf(" flag (internal value:%d)",
59 *long_options[i].flag);
60 else
61 printf("short-option: -%c",
62 long_options[i].val);
63 printf("\n");
65 printf("\n");
68 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
69 static __u64 gettime(void)
71 struct timespec t;
72 int res;
74 res = clock_gettime(CLOCK_MONOTONIC, &t);
75 if (res < 0) {
76 fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
77 exit(EXIT_FAILURE);
79 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
82 enum {
83 REDIR_SUCCESS = 0,
84 REDIR_ERROR = 1,
86 #define REDIR_RES_MAX 2
87 static const char *redir_names[REDIR_RES_MAX] = {
88 [REDIR_SUCCESS] = "Success",
89 [REDIR_ERROR] = "Error",
91 static const char *err2str(int err)
93 if (err < REDIR_RES_MAX)
94 return redir_names[err];
95 return NULL;
97 /* enum xdp_action */
98 #define XDP_UNKNOWN XDP_REDIRECT + 1
99 #define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
100 static const char *xdp_action_names[XDP_ACTION_MAX] = {
101 [XDP_ABORTED] = "XDP_ABORTED",
102 [XDP_DROP] = "XDP_DROP",
103 [XDP_PASS] = "XDP_PASS",
104 [XDP_TX] = "XDP_TX",
105 [XDP_REDIRECT] = "XDP_REDIRECT",
106 [XDP_UNKNOWN] = "XDP_UNKNOWN",
108 static const char *action2str(int action)
110 if (action < XDP_ACTION_MAX)
111 return xdp_action_names[action];
112 return NULL;
115 /* Common stats data record shared with _kern.c */
116 struct datarec {
117 __u64 processed;
118 __u64 dropped;
119 __u64 info;
120 __u64 err;
122 #define MAX_CPUS 64
124 /* Userspace structs for collection of stats from maps */
125 struct record {
126 __u64 timestamp;
127 struct datarec total;
128 struct datarec *cpu;
130 struct u64rec {
131 __u64 processed;
133 struct record_u64 {
134 /* record for _kern side __u64 values */
135 __u64 timestamp;
136 struct u64rec total;
137 struct u64rec *cpu;
140 struct stats_record {
141 struct record_u64 xdp_redirect[REDIR_RES_MAX];
142 struct record_u64 xdp_exception[XDP_ACTION_MAX];
143 struct record xdp_cpumap_kthread;
144 struct record xdp_cpumap_enqueue[MAX_CPUS];
145 struct record xdp_devmap_xmit;
148 static bool map_collect_record(int fd, __u32 key, struct record *rec)
150 /* For percpu maps, userspace gets a value per possible CPU */
151 unsigned int nr_cpus = bpf_num_possible_cpus();
152 struct datarec values[nr_cpus];
153 __u64 sum_processed = 0;
154 __u64 sum_dropped = 0;
155 __u64 sum_info = 0;
156 __u64 sum_err = 0;
157 int i;
159 if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
160 fprintf(stderr,
161 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
162 return false;
164 /* Get time as close as possible to reading map contents */
165 rec->timestamp = gettime();
167 /* Record and sum values from each CPU */
168 for (i = 0; i < nr_cpus; i++) {
169 rec->cpu[i].processed = values[i].processed;
170 sum_processed += values[i].processed;
171 rec->cpu[i].dropped = values[i].dropped;
172 sum_dropped += values[i].dropped;
173 rec->cpu[i].info = values[i].info;
174 sum_info += values[i].info;
175 rec->cpu[i].err = values[i].err;
176 sum_err += values[i].err;
178 rec->total.processed = sum_processed;
179 rec->total.dropped = sum_dropped;
180 rec->total.info = sum_info;
181 rec->total.err = sum_err;
182 return true;
185 static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
187 /* For percpu maps, userspace gets a value per possible CPU */
188 unsigned int nr_cpus = bpf_num_possible_cpus();
189 struct u64rec values[nr_cpus];
190 __u64 sum_total = 0;
191 int i;
193 if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
194 fprintf(stderr,
195 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
196 return false;
198 /* Get time as close as possible to reading map contents */
199 rec->timestamp = gettime();
201 /* Record and sum values from each CPU */
202 for (i = 0; i < nr_cpus; i++) {
203 rec->cpu[i].processed = values[i].processed;
204 sum_total += values[i].processed;
206 rec->total.processed = sum_total;
207 return true;
210 static double calc_period(struct record *r, struct record *p)
212 double period_ = 0;
213 __u64 period = 0;
215 period = r->timestamp - p->timestamp;
216 if (period > 0)
217 period_ = ((double) period / NANOSEC_PER_SEC);
219 return period_;
222 static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
224 double period_ = 0;
225 __u64 period = 0;
227 period = r->timestamp - p->timestamp;
228 if (period > 0)
229 period_ = ((double) period / NANOSEC_PER_SEC);
231 return period_;
234 static double calc_pps(struct datarec *r, struct datarec *p, double period)
236 __u64 packets = 0;
237 double pps = 0;
239 if (period > 0) {
240 packets = r->processed - p->processed;
241 pps = packets / period;
243 return pps;
246 static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
248 __u64 packets = 0;
249 double pps = 0;
251 if (period > 0) {
252 packets = r->processed - p->processed;
253 pps = packets / period;
255 return pps;
258 static double calc_drop(struct datarec *r, struct datarec *p, double period)
260 __u64 packets = 0;
261 double pps = 0;
263 if (period > 0) {
264 packets = r->dropped - p->dropped;
265 pps = packets / period;
267 return pps;
270 static double calc_info(struct datarec *r, struct datarec *p, double period)
272 __u64 packets = 0;
273 double pps = 0;
275 if (period > 0) {
276 packets = r->info - p->info;
277 pps = packets / period;
279 return pps;
282 static double calc_err(struct datarec *r, struct datarec *p, double period)
284 __u64 packets = 0;
285 double pps = 0;
287 if (period > 0) {
288 packets = r->err - p->err;
289 pps = packets / period;
291 return pps;
294 static void stats_print(struct stats_record *stats_rec,
295 struct stats_record *stats_prev,
296 bool err_only)
298 unsigned int nr_cpus = bpf_num_possible_cpus();
299 int rec_i = 0, i, to_cpu;
300 double t = 0, pps = 0;
302 /* Header */
303 printf("%-15s %-7s %-12s %-12s %-9s\n",
304 "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
306 /* tracepoint: xdp:xdp_redirect_* */
307 if (err_only)
308 rec_i = REDIR_ERROR;
310 for (; rec_i < REDIR_RES_MAX; rec_i++) {
311 struct record_u64 *rec, *prev;
312 char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
313 char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
315 rec = &stats_rec->xdp_redirect[rec_i];
316 prev = &stats_prev->xdp_redirect[rec_i];
317 t = calc_period_u64(rec, prev);
319 for (i = 0; i < nr_cpus; i++) {
320 struct u64rec *r = &rec->cpu[i];
321 struct u64rec *p = &prev->cpu[i];
323 pps = calc_pps_u64(r, p, t);
324 if (pps > 0)
325 printf(fmt1, "XDP_REDIRECT", i,
326 rec_i ? 0.0: pps, rec_i ? pps : 0.0,
327 err2str(rec_i));
329 pps = calc_pps_u64(&rec->total, &prev->total, t);
330 printf(fmt2, "XDP_REDIRECT", "total",
331 rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
334 /* tracepoint: xdp:xdp_exception */
335 for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
336 struct record_u64 *rec, *prev;
337 char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
338 char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
340 rec = &stats_rec->xdp_exception[rec_i];
341 prev = &stats_prev->xdp_exception[rec_i];
342 t = calc_period_u64(rec, prev);
344 for (i = 0; i < nr_cpus; i++) {
345 struct u64rec *r = &rec->cpu[i];
346 struct u64rec *p = &prev->cpu[i];
348 pps = calc_pps_u64(r, p, t);
349 if (pps > 0)
350 printf(fmt1, "Exception", i,
351 0.0, pps, action2str(rec_i));
353 pps = calc_pps_u64(&rec->total, &prev->total, t);
354 if (pps > 0)
355 printf(fmt2, "Exception", "total",
356 0.0, pps, action2str(rec_i));
359 /* cpumap enqueue stats */
360 for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
361 char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
362 char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
363 struct record *rec, *prev;
364 char *info_str = "";
365 double drop, info;
367 rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
368 prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
369 t = calc_period(rec, prev);
370 for (i = 0; i < nr_cpus; i++) {
371 struct datarec *r = &rec->cpu[i];
372 struct datarec *p = &prev->cpu[i];
374 pps = calc_pps(r, p, t);
375 drop = calc_drop(r, p, t);
376 info = calc_info(r, p, t);
377 if (info > 0) {
378 info_str = "bulk-average";
379 info = pps / info; /* calc average bulk size */
381 if (pps > 0)
382 printf(fmt1, "cpumap-enqueue",
383 i, to_cpu, pps, drop, info, info_str);
385 pps = calc_pps(&rec->total, &prev->total, t);
386 if (pps > 0) {
387 drop = calc_drop(&rec->total, &prev->total, t);
388 info = calc_info(&rec->total, &prev->total, t);
389 if (info > 0) {
390 info_str = "bulk-average";
391 info = pps / info; /* calc average bulk size */
393 printf(fmt2, "cpumap-enqueue",
394 "sum", to_cpu, pps, drop, info, info_str);
398 /* cpumap kthread stats */
400 char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
401 char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
402 struct record *rec, *prev;
403 double drop, info;
404 char *i_str = "";
406 rec = &stats_rec->xdp_cpumap_kthread;
407 prev = &stats_prev->xdp_cpumap_kthread;
408 t = calc_period(rec, prev);
409 for (i = 0; i < nr_cpus; i++) {
410 struct datarec *r = &rec->cpu[i];
411 struct datarec *p = &prev->cpu[i];
413 pps = calc_pps(r, p, t);
414 drop = calc_drop(r, p, t);
415 info = calc_info(r, p, t);
416 if (info > 0)
417 i_str = "sched";
418 if (pps > 0 || drop > 0)
419 printf(fmt1, "cpumap-kthread",
420 i, pps, drop, info, i_str);
422 pps = calc_pps(&rec->total, &prev->total, t);
423 drop = calc_drop(&rec->total, &prev->total, t);
424 info = calc_info(&rec->total, &prev->total, t);
425 if (info > 0)
426 i_str = "sched-sum";
427 printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
430 /* devmap ndo_xdp_xmit stats */
432 char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
433 char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
434 struct record *rec, *prev;
435 double drop, info, err;
436 char *i_str = "";
437 char *err_str = "";
439 rec = &stats_rec->xdp_devmap_xmit;
440 prev = &stats_prev->xdp_devmap_xmit;
441 t = calc_period(rec, prev);
442 for (i = 0; i < nr_cpus; i++) {
443 struct datarec *r = &rec->cpu[i];
444 struct datarec *p = &prev->cpu[i];
446 pps = calc_pps(r, p, t);
447 drop = calc_drop(r, p, t);
448 info = calc_info(r, p, t);
449 err = calc_err(r, p, t);
450 if (info > 0) {
451 i_str = "bulk-average";
452 info = (pps+drop) / info; /* calc avg bulk */
454 if (err > 0)
455 err_str = "drv-err";
456 if (pps > 0 || drop > 0)
457 printf(fmt1, "devmap-xmit",
458 i, pps, drop, info, i_str, err_str);
460 pps = calc_pps(&rec->total, &prev->total, t);
461 drop = calc_drop(&rec->total, &prev->total, t);
462 info = calc_info(&rec->total, &prev->total, t);
463 err = calc_err(&rec->total, &prev->total, t);
464 if (info > 0) {
465 i_str = "bulk-average";
466 info = (pps+drop) / info; /* calc avg bulk */
468 if (err > 0)
469 err_str = "drv-err";
470 printf(fmt2, "devmap-xmit", "total", pps, drop,
471 info, i_str, err_str);
474 printf("\n");
477 static bool stats_collect(struct stats_record *rec)
479 int fd;
480 int i;
482 /* TODO: Detect if someone unloaded the perf event_fd's, as
483 * this can happen by someone running perf-record -e
486 fd = map_data[0].fd; /* map0: redirect_err_cnt */
487 for (i = 0; i < REDIR_RES_MAX; i++)
488 map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
490 fd = map_data[1].fd; /* map1: exception_cnt */
491 for (i = 0; i < XDP_ACTION_MAX; i++) {
492 map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
495 fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
496 for (i = 0; i < MAX_CPUS; i++)
497 map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
499 fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
500 map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
502 fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
503 map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
505 return true;
508 static void *alloc_rec_per_cpu(int record_size)
510 unsigned int nr_cpus = bpf_num_possible_cpus();
511 void *array;
512 size_t size;
514 size = record_size * nr_cpus;
515 array = malloc(size);
516 memset(array, 0, size);
517 if (!array) {
518 fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
519 exit(EXIT_FAIL_MEM);
521 return array;
524 static struct stats_record *alloc_stats_record(void)
526 struct stats_record *rec;
527 int rec_sz;
528 int i;
530 /* Alloc main stats_record structure */
531 rec = malloc(sizeof(*rec));
532 memset(rec, 0, sizeof(*rec));
533 if (!rec) {
534 fprintf(stderr, "Mem alloc error\n");
535 exit(EXIT_FAIL_MEM);
538 /* Alloc stats stored per CPU for each record */
539 rec_sz = sizeof(struct u64rec);
540 for (i = 0; i < REDIR_RES_MAX; i++)
541 rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
543 for (i = 0; i < XDP_ACTION_MAX; i++)
544 rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
546 rec_sz = sizeof(struct datarec);
547 rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
548 rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
550 for (i = 0; i < MAX_CPUS; i++)
551 rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
553 return rec;
556 static void free_stats_record(struct stats_record *r)
558 int i;
560 for (i = 0; i < REDIR_RES_MAX; i++)
561 free(r->xdp_redirect[i].cpu);
563 for (i = 0; i < XDP_ACTION_MAX; i++)
564 free(r->xdp_exception[i].cpu);
566 free(r->xdp_cpumap_kthread.cpu);
567 free(r->xdp_devmap_xmit.cpu);
569 for (i = 0; i < MAX_CPUS; i++)
570 free(r->xdp_cpumap_enqueue[i].cpu);
572 free(r);
575 /* Pointer swap trick */
576 static inline void swap(struct stats_record **a, struct stats_record **b)
578 struct stats_record *tmp;
580 tmp = *a;
581 *a = *b;
582 *b = tmp;
585 static void stats_poll(int interval, bool err_only)
587 struct stats_record *rec, *prev;
589 rec = alloc_stats_record();
590 prev = alloc_stats_record();
591 stats_collect(rec);
593 if (err_only)
594 printf("\n%s\n", __doc_err_only__);
596 /* Trick to pretty printf with thousands separators use %' */
597 setlocale(LC_NUMERIC, "en_US");
599 /* Header */
600 if (verbose)
601 printf("\n%s", __doc__);
603 /* TODO Need more advanced stats on error types */
604 if (verbose) {
605 printf(" - Stats map0: %s\n", map_data[0].name);
606 printf(" - Stats map1: %s\n", map_data[1].name);
607 printf("\n");
609 fflush(stdout);
611 while (1) {
612 swap(&prev, &rec);
613 stats_collect(rec);
614 stats_print(rec, prev, err_only);
615 fflush(stdout);
616 sleep(interval);
619 free_stats_record(rec);
620 free_stats_record(prev);
623 static void print_bpf_prog_info(void)
625 int i;
627 /* Prog info */
628 printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
629 for (i = 0; i < prog_cnt; i++) {
630 printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
633 /* Maps info */
634 printf("Loaded BPF prog have %d map(s)\n", map_data_count);
635 for (i = 0; i < map_data_count; i++) {
636 char *name = map_data[i].name;
637 int fd = map_data[i].fd;
639 printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
642 /* Event info */
643 printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
644 for (i = 0; i < prog_cnt; i++) {
645 if (event_fd[i] != -1)
646 printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
650 int main(int argc, char **argv)
652 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
653 int longindex = 0, opt;
654 int ret = EXIT_SUCCESS;
655 char bpf_obj_file[256];
657 /* Default settings: */
658 bool errors_only = true;
659 int interval = 2;
661 snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
663 /* Parse commands line args */
664 while ((opt = getopt_long(argc, argv, "hDSs:",
665 long_options, &longindex)) != -1) {
666 switch (opt) {
667 case 'D':
668 debug = true;
669 break;
670 case 'S':
671 errors_only = false;
672 break;
673 case 's':
674 interval = atoi(optarg);
675 break;
676 case 'h':
677 default:
678 usage(argv);
679 return EXIT_FAILURE;
683 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
684 perror("setrlimit(RLIMIT_MEMLOCK)");
685 return EXIT_FAILURE;
688 if (load_bpf_file(bpf_obj_file)) {
689 printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
690 return EXIT_FAILURE;
692 if (!prog_fd[0]) {
693 printf("ERROR - load_bpf_file: %s\n", strerror(errno));
694 return EXIT_FAILURE;
697 if (debug) {
698 print_bpf_prog_info();
701 /* Unload/stop tracepoint event by closing fd's */
702 if (errors_only) {
703 /* The prog_fd[i] and event_fd[i] depend on the
704 * order the functions was defined in _kern.c
706 close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
707 close(prog_fd[2]); /* func: trace_xdp_redirect */
708 close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
709 close(prog_fd[3]); /* func: trace_xdp_redirect_map */
712 stats_poll(interval, errors_only);
714 return ret;