1 // SPDX-License-Identifier: GPL-2.0
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
17 #include <sys/epoll.h>
18 #include <sys/ioctl.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
22 #include <linux/genetlink.h>
23 #include <linux/netlink.h>
25 #include "netdev-user.h"
27 /* The below ifdef blob is required because:
29 * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
30 * systems with older glibcs will not have them available. However,
31 * sys/epoll.h does include the type definition for epoll_data, which is
32 * needed by the user program (e.g. epoll_event.data.fd)
34 * - linux/eventpoll.h does not define the epoll_data type, it is simply an
35 * opaque __u64. It does, however, include the ioctl definition.
37 * Including both headers is impossible (types would be redefined), so I've
38 * opted instead to take sys/epoll.h, and include the blob below.
40 * Someday, when glibc is globally up to date, the blob below can be removed.
42 #if !defined(EPOLL_IOC_TYPE)
44 uint32_t busy_poll_usecs
;
45 uint16_t busy_poll_budget
;
46 uint8_t prefer_busy_poll
;
48 /* pad the struct to a multiple of 64bits */
52 #define EPOLL_IOC_TYPE 0x8A
53 #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
54 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
57 static uint32_t cfg_port
= 8000;
58 static struct in_addr cfg_bind_addr
= { .s_addr
= INADDR_ANY
};
59 static char *cfg_outfile
;
60 static int cfg_max_events
= 8;
61 static int cfg_ifindex
;
63 /* busy poll params */
64 static uint32_t cfg_busy_poll_usecs
;
65 static uint32_t cfg_busy_poll_budget
;
66 static uint32_t cfg_prefer_busy_poll
;
69 static uint32_t cfg_defer_hard_irqs
;
70 static uint64_t cfg_gro_flush_timeout
;
71 static uint64_t cfg_irq_suspend_timeout
;
73 static void usage(const char *filepath
)
76 "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
80 static void parse_opts(int argc
, char **argv
)
88 while ((c
= getopt(argc
, argv
, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
91 cfg_busy_poll_usecs
= strtoul(optarg
, NULL
, 0);
92 if (cfg_busy_poll_usecs
== ULONG_MAX
||
93 cfg_busy_poll_usecs
> UINT32_MAX
)
94 error(1, ERANGE
, "busy_poll_usecs too large");
97 cfg_prefer_busy_poll
= strtoul(optarg
, NULL
, 0);
98 if (cfg_prefer_busy_poll
== ULONG_MAX
||
99 cfg_prefer_busy_poll
> 1)
101 "prefer busy poll should be 0 or 1");
104 cfg_busy_poll_budget
= strtoul(optarg
, NULL
, 0);
105 if (cfg_busy_poll_budget
== ULONG_MAX
||
106 cfg_busy_poll_budget
> UINT16_MAX
)
108 "busy poll budget must be [0, UINT16_MAX]");
111 cfg_port
= strtoul(optarg
, NULL
, 0);
112 if (cfg_port
> UINT16_MAX
)
113 error(1, ERANGE
, "port must be <= 65535");
116 ret
= inet_aton(optarg
, &cfg_bind_addr
);
119 "bind address %s invalid", optarg
);
122 cfg_outfile
= strdup(optarg
);
124 error(1, 0, "outfile invalid");
127 cfg_max_events
= strtol(optarg
, NULL
, 0);
129 if (cfg_max_events
== LONG_MIN
||
130 cfg_max_events
== LONG_MAX
||
133 "max events must be > 0 and < LONG_MAX");
136 cfg_defer_hard_irqs
= strtoul(optarg
, NULL
, 0);
138 if (cfg_defer_hard_irqs
== ULONG_MAX
||
139 cfg_defer_hard_irqs
> INT32_MAX
)
141 "defer_hard_irqs must be <= INT32_MAX");
144 cfg_gro_flush_timeout
= strtoull(optarg
, NULL
, 0);
146 if (cfg_gro_flush_timeout
== ULLONG_MAX
)
148 "gro_flush_timeout must be < ULLONG_MAX");
151 cfg_irq_suspend_timeout
= strtoull(optarg
, NULL
, 0);
153 if (cfg_irq_suspend_timeout
== ULLONG_MAX
)
155 "irq_suspend_timeout must be < ULLONG_MAX");
158 cfg_ifindex
= strtoul(optarg
, NULL
, 0);
159 if (cfg_ifindex
== ULONG_MAX
)
161 "ifindex must be < ULONG_MAX");
173 static void epoll_ctl_add(int epfd
, int fd
, uint32_t events
)
175 struct epoll_event ev
;
179 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
180 error(1, errno
, "epoll_ctl add fd: %d", fd
);
183 static void setnonblock(int sockfd
)
187 flags
= fcntl(sockfd
, F_GETFL
, 0);
189 if (fcntl(sockfd
, F_SETFL
, flags
| O_NONBLOCK
) == -1)
190 error(1, errno
, "unable to set socket to nonblocking mode");
193 static void write_chunk(int fd
, char *buf
, ssize_t buflen
)
195 ssize_t remaining
= buflen
;
196 char *buf_offset
= buf
;
197 ssize_t writelen
= 0;
198 ssize_t write_result
;
200 while (writelen
< buflen
) {
201 write_result
= write(fd
, buf_offset
, remaining
);
202 if (write_result
== -1)
203 error(1, errno
, "unable to write data to outfile");
205 writelen
+= write_result
;
206 remaining
-= write_result
;
207 buf_offset
+= write_result
;
211 static void setup_queue(void)
213 struct netdev_napi_get_list
*napi_list
= NULL
;
214 struct netdev_napi_get_req_dump
*req
= NULL
;
215 struct netdev_napi_set_req
*set_req
= NULL
;
217 struct ynl_error yerr
;
220 ys
= ynl_sock_create(&ynl_netdev_family
, &yerr
);
222 error(1, 0, "YNL: %s", yerr
.msg
);
224 req
= netdev_napi_get_req_dump_alloc();
225 netdev_napi_get_req_dump_set_ifindex(req
, cfg_ifindex
);
226 napi_list
= netdev_napi_get_dump(ys
, req
);
228 /* assume there is 1 NAPI configured and take the first */
229 if (napi_list
->obj
._present
.id
)
230 napi_id
= napi_list
->obj
.id
;
232 error(1, 0, "napi ID not present?");
234 set_req
= netdev_napi_set_req_alloc();
235 netdev_napi_set_req_set_id(set_req
, napi_id
);
236 netdev_napi_set_req_set_defer_hard_irqs(set_req
, cfg_defer_hard_irqs
);
237 netdev_napi_set_req_set_gro_flush_timeout(set_req
,
238 cfg_gro_flush_timeout
);
239 netdev_napi_set_req_set_irq_suspend_timeout(set_req
,
240 cfg_irq_suspend_timeout
);
242 if (netdev_napi_set(ys
, set_req
))
243 error(1, 0, "can't set NAPI params: %s\n", yerr
.msg
);
245 netdev_napi_get_list_free(napi_list
);
246 netdev_napi_get_req_dump_free(req
);
247 netdev_napi_set_req_free(set_req
);
248 ynl_sock_destroy(ys
);
251 static void run_poller(void)
253 struct epoll_event events
[cfg_max_events
];
254 struct epoll_params epoll_params
= {0};
255 struct sockaddr_in server_addr
;
264 outfile_fd
= open(cfg_outfile
, O_WRONLY
| O_CREAT
, 0644);
265 if (outfile_fd
== -1)
266 error(1, errno
, "unable to open outfile: %s", cfg_outfile
);
268 sockfd
= socket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
);
270 error(1, errno
, "unable to create listen socket");
272 server_addr
.sin_family
= AF_INET
;
273 server_addr
.sin_port
= htons(cfg_port
);
274 server_addr
.sin_addr
= cfg_bind_addr
;
276 /* these values are range checked during parse_opts, so casting is safe
279 epoll_params
.busy_poll_usecs
= cfg_busy_poll_usecs
;
280 epoll_params
.busy_poll_budget
= (uint16_t)cfg_busy_poll_budget
;
281 epoll_params
.prefer_busy_poll
= (uint8_t)cfg_prefer_busy_poll
;
282 epoll_params
.__pad
= 0;
285 if (setsockopt(sockfd
, SOL_SOCKET
, SO_REUSEADDR
, &val
, sizeof(val
)))
286 error(1, errno
, "poller setsockopt reuseaddr");
290 if (bind(sockfd
, (struct sockaddr
*)&server_addr
,
291 sizeof(struct sockaddr_in
)))
292 error(0, errno
, "poller bind to port: %d\n", cfg_port
);
294 if (listen(sockfd
, 1))
295 error(1, errno
, "poller listen");
297 epfd
= epoll_create1(0);
298 if (ioctl(epfd
, EPIOCSPARAMS
, &epoll_params
) == -1)
299 error(1, errno
, "unable to set busy poll params");
301 epoll_ctl_add(epfd
, sockfd
, EPOLLIN
| EPOLLOUT
| EPOLLET
);
304 nfds
= epoll_wait(epfd
, events
, cfg_max_events
, -1);
305 for (i
= 0; i
< nfds
; i
++) {
306 if (events
[i
].data
.fd
== sockfd
) {
307 conn
= accept(sockfd
, NULL
, NULL
);
310 "accepting incoming connection failed");
313 epoll_ctl_add(epfd
, conn
,
314 EPOLLIN
| EPOLLET
| EPOLLRDHUP
|
316 } else if (events
[i
].events
& EPOLLIN
) {
318 readlen
= read(events
[i
].data
.fd
, buf
,
321 write_chunk(outfile_fd
, buf
,
327 /* spurious event ? */
329 if (events
[i
].events
& (EPOLLRDHUP
| EPOLLHUP
)) {
330 epoll_ctl(epfd
, EPOLL_CTL_DEL
,
331 events
[i
].data
.fd
, NULL
);
332 close(events
[i
].data
.fd
);
340 int main(int argc
, char *argv
[])
342 parse_opts(argc
, argv
);