1 // SPDX-License-Identifier: GPL-2.0
20 #include <sys/socket.h>
21 #include <sys/types.h>
25 #include <netinet/in.h>
27 #include <linux/tcp.h>
29 static int pf
= AF_INET
;
32 #define IPPROTO_MPTCP 262
41 __u8 mptcpi_add_addr_signal
;
42 __u8 mptcpi_add_addr_accepted
;
43 __u8 mptcpi_subflows_max
;
44 __u8 mptcpi_add_addr_signal_max
;
45 __u8 mptcpi_add_addr_accepted_max
;
48 __u64 mptcpi_write_seq
;
51 __u8 mptcpi_local_addr_used
;
52 __u8 mptcpi_local_addr_max
;
53 __u8 mptcpi_csum_enabled
;
54 __u32 mptcpi_retransmits
;
55 __u64 mptcpi_bytes_retrans
;
56 __u64 mptcpi_bytes_sent
;
57 __u64 mptcpi_bytes_received
;
58 __u64 mptcpi_bytes_acked
;
61 struct mptcp_subflow_data
{
62 __u32 size_subflow_data
; /* size of this structure in userspace */
63 __u32 num_subflows
; /* must be 0, set by kernel */
64 __u32 size_kernel
; /* must be 0, set by kernel */
65 __u32 size_user
; /* size of one element in data[] */
66 } __attribute__((aligned(8)));
68 struct mptcp_subflow_addrs
{
70 __kernel_sa_family_t sa_family
;
71 struct sockaddr sa_local
;
72 struct sockaddr_in sin_local
;
73 struct sockaddr_in6 sin6_local
;
74 struct __kernel_sockaddr_storage ss_local
;
77 struct sockaddr sa_remote
;
78 struct sockaddr_in sin_remote
;
79 struct sockaddr_in6 sin6_remote
;
80 struct __kernel_sockaddr_storage ss_remote
;
85 #define MPTCP_TCPINFO 2
86 #define MPTCP_SUBFLOW_ADDRS 3
89 #ifndef MPTCP_FULL_INFO
90 struct mptcp_subflow_info
{
92 struct mptcp_subflow_addrs addrs
;
95 struct mptcp_full_info
{
96 __u32 size_tcpinfo_kernel
; /* must be 0, set by kernel */
97 __u32 size_tcpinfo_user
;
98 __u32 size_sfinfo_kernel
; /* must be 0, set by kernel */
99 __u32 size_sfinfo_user
;
100 __u32 num_subflows
; /* must be 0, set by kernel (real subflow count) */
101 __u32 size_arrays_user
; /* max subflows that userspace is interested in;
102 * the buffers at subflow_info/tcp_info
103 * are respectively at least:
104 * size_arrays * size_sfinfo_user
105 * size_arrays * size_tcpinfo_user
108 __aligned_u64 subflow_info
;
109 __aligned_u64 tcp_info
;
110 struct mptcp_info mptcp_info
;
113 #define MPTCP_FULL_INFO 4
117 struct mptcp_info mi
;
118 struct mptcp_info last_sample
;
119 struct tcp_info tcp_info
;
120 struct mptcp_subflow_addrs addrs
;
121 uint64_t mptcpi_rcv_delta
;
122 uint64_t tcpi_rcv_delta
;
123 bool pkt_stats_avail
;
127 #define MIN(a, b) ((a) < (b) ? (a) : (b))
130 static void die_perror(const char *msg
)
136 static void die_usage(int r
)
138 fprintf(stderr
, "Usage: mptcp_sockopt [-6]\n");
142 static void xerror(const char *fmt
, ...)
147 vfprintf(stderr
, fmt
, ap
);
153 static const char *getxinfo_strerr(int err
)
155 if (err
== EAI_SYSTEM
)
156 return strerror(errno
);
158 return gai_strerror(err
);
161 static void xgetaddrinfo(const char *node
, const char *service
,
162 const struct addrinfo
*hints
,
163 struct addrinfo
**res
)
165 int err
= getaddrinfo(node
, service
, hints
, res
);
168 const char *errstr
= getxinfo_strerr(err
);
170 fprintf(stderr
, "Fatal: getaddrinfo(%s:%s): %s\n",
171 node
? node
: "", service
? service
: "", errstr
);
176 static int sock_listen_mptcp(const char * const listenaddr
,
177 const char * const port
)
180 struct addrinfo hints
= {
181 .ai_protocol
= IPPROTO_TCP
,
182 .ai_socktype
= SOCK_STREAM
,
183 .ai_flags
= AI_PASSIVE
| AI_NUMERICHOST
186 hints
.ai_family
= pf
;
188 struct addrinfo
*a
, *addr
;
191 xgetaddrinfo(listenaddr
, port
, &hints
, &addr
);
192 hints
.ai_family
= pf
;
194 for (a
= addr
; a
; a
= a
->ai_next
) {
195 sock
= socket(a
->ai_family
, a
->ai_socktype
, IPPROTO_MPTCP
);
199 if (-1 == setsockopt(sock
, SOL_SOCKET
, SO_REUSEADDR
, &one
,
201 perror("setsockopt");
203 if (bind(sock
, a
->ai_addr
, a
->ai_addrlen
) == 0)
214 xerror("could not create listen socket");
216 if (listen(sock
, 20))
217 die_perror("listen");
222 static int sock_connect_mptcp(const char * const remoteaddr
,
223 const char * const port
, int proto
)
225 struct addrinfo hints
= {
226 .ai_protocol
= IPPROTO_TCP
,
227 .ai_socktype
= SOCK_STREAM
,
229 struct addrinfo
*a
, *addr
;
232 hints
.ai_family
= pf
;
234 xgetaddrinfo(remoteaddr
, port
, &hints
, &addr
);
235 for (a
= addr
; a
; a
= a
->ai_next
) {
236 sock
= socket(a
->ai_family
, a
->ai_socktype
, proto
);
240 if (connect(sock
, a
->ai_addr
, a
->ai_addrlen
) == 0)
243 die_perror("connect");
247 xerror("could not create connect socket");
253 static void parse_opts(int argc
, char **argv
)
257 while ((c
= getopt(argc
, argv
, "h6")) != -1) {
272 static void do_getsockopt_bogus_sf_data(int fd
, int optname
)
274 struct mptcp_subflow_data good_data
;
276 struct mptcp_subflow_data d
;
279 socklen_t olen
, _olen
;
282 memset(&bd
, 0, sizeof(bd
));
283 memset(&good_data
, 0, sizeof(good_data
));
285 olen
= sizeof(good_data
);
286 good_data
.size_subflow_data
= olen
;
288 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
289 assert(ret
< 0); /* 0 size_subflow_data */
290 assert(olen
== sizeof(good_data
));
294 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
296 assert(olen
== sizeof(good_data
));
297 assert(bd
.d
.num_subflows
== 1);
298 assert(bd
.d
.size_kernel
> 0);
299 assert(bd
.d
.size_user
== 0);
302 _olen
= rand() % olen
;
304 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
305 assert(ret
< 0); /* bogus olen */
306 assert(olen
== _olen
); /* must be unchanged */
309 olen
= sizeof(good_data
);
310 bd
.d
.size_kernel
= 1;
311 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
312 assert(ret
< 0); /* size_kernel not 0 */
315 olen
= sizeof(good_data
);
316 bd
.d
.num_subflows
= 1;
317 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
318 assert(ret
< 0); /* num_subflows not 0 */
320 /* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */
323 bd
.d
.size_subflow_data
= sizeof(bd
);
325 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &olen
);
328 /* olen must be truncated to real data size filled by kernel: */
329 assert(olen
== sizeof(good_data
));
331 assert(bd
.d
.size_subflow_data
== sizeof(bd
));
334 bd
.d
.size_subflow_data
+= 1;
336 olen
= bd
.d
.size_subflow_data
+ 1;
339 ret
= getsockopt(fd
, SOL_MPTCP
, optname
, &bd
, &_olen
);
342 /* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */
343 assert(olen
== _olen
);
345 assert(bd
.d
.size_subflow_data
== sizeof(good_data
) + 1);
346 assert(bd
.buf
[0] == 0);
349 static void do_getsockopt_mptcp_info(struct so_state
*s
, int fd
, size_t w
)
356 ret
= getsockopt(fd
, SOL_MPTCP
, MPTCP_INFO
, &i
, &olen
);
359 die_perror("getsockopt MPTCP_INFO");
361 s
->pkt_stats_avail
= olen
>= sizeof(i
);
364 if (s
->mi
.mptcpi_write_seq
== 0)
367 assert(s
->mi
.mptcpi_write_seq
+ w
== i
.mptcpi_write_seq
);
369 s
->mptcpi_rcv_delta
= i
.mptcpi_rcv_nxt
- s
->mi
.mptcpi_rcv_nxt
;
372 static void do_getsockopt_tcp_info(struct so_state
*s
, int fd
, size_t r
, size_t w
)
375 struct mptcp_subflow_data d
;
376 struct tcp_info ti
[2];
382 memset(&ti
, 0, sizeof(ti
));
384 ti
.d
.size_subflow_data
= sizeof(struct mptcp_subflow_data
);
385 ti
.d
.size_user
= sizeof(struct tcp_info
);
388 ret
= getsockopt(fd
, SOL_MPTCP
, MPTCP_TCPINFO
, &ti
, &olen
);
390 xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)");
392 assert(olen
<= sizeof(ti
));
393 assert(ti
.d
.size_kernel
> 0);
394 assert(ti
.d
.size_user
==
395 MIN(ti
.d
.size_kernel
, sizeof(struct tcp_info
)));
396 assert(ti
.d
.num_subflows
== 1);
398 assert(olen
> (socklen_t
)sizeof(struct mptcp_subflow_data
));
399 olen
-= sizeof(struct mptcp_subflow_data
);
400 assert(olen
== ti
.d
.size_user
);
402 s
->tcp_info
= ti
.ti
[0];
404 if (ti
.ti
[0].tcpi_bytes_sent
== w
&&
405 ti
.ti
[0].tcpi_bytes_received
== r
)
408 if (r
== 0 && ti
.ti
[0].tcpi_bytes_sent
== w
&&
409 ti
.ti
[0].tcpi_bytes_received
) {
410 s
->tcpi_rcv_delta
= ti
.ti
[0].tcpi_bytes_received
;
414 /* wait and repeat, might be that tx is still ongoing */
416 } while (tries
-- > 0);
418 xerror("tcpi_bytes_sent %" PRIu64
", want %zu. tcpi_bytes_received %" PRIu64
", want %zu",
419 ti
.ti
[0].tcpi_bytes_sent
, w
, ti
.ti
[0].tcpi_bytes_received
, r
);
422 do_getsockopt_bogus_sf_data(fd
, MPTCP_TCPINFO
);
425 static void do_getsockopt_subflow_addrs(struct so_state
*s
, int fd
)
427 struct sockaddr_storage remote
, local
;
428 socklen_t olen
, rlen
, llen
;
431 struct mptcp_subflow_data d
;
432 struct mptcp_subflow_addrs addr
[2];
435 memset(&addrs
, 0, sizeof(addrs
));
436 memset(&local
, 0, sizeof(local
));
437 memset(&remote
, 0, sizeof(remote
));
439 addrs
.d
.size_subflow_data
= sizeof(struct mptcp_subflow_data
);
440 addrs
.d
.size_user
= sizeof(struct mptcp_subflow_addrs
);
441 olen
= sizeof(addrs
);
443 ret
= getsockopt(fd
, SOL_MPTCP
, MPTCP_SUBFLOW_ADDRS
, &addrs
, &olen
);
445 die_perror("getsockopt MPTCP_SUBFLOW_ADDRS");
447 assert(olen
<= sizeof(addrs
));
448 assert(addrs
.d
.size_kernel
> 0);
449 assert(addrs
.d
.size_user
==
450 MIN(addrs
.d
.size_kernel
, sizeof(struct mptcp_subflow_addrs
)));
451 assert(addrs
.d
.num_subflows
== 1);
453 assert(olen
> (socklen_t
)sizeof(struct mptcp_subflow_data
));
454 olen
-= sizeof(struct mptcp_subflow_data
);
455 assert(olen
== addrs
.d
.size_user
);
457 llen
= sizeof(local
);
458 ret
= getsockname(fd
, (struct sockaddr
*)&local
, &llen
);
460 die_perror("getsockname");
461 rlen
= sizeof(remote
);
462 ret
= getpeername(fd
, (struct sockaddr
*)&remote
, &rlen
);
464 die_perror("getpeername");
467 assert(rlen
== llen
);
469 assert(remote
.ss_family
== local
.ss_family
);
471 assert(memcmp(&local
, &addrs
.addr
[0].ss_local
, sizeof(local
)) == 0);
472 assert(memcmp(&remote
, &addrs
.addr
[0].ss_remote
, sizeof(remote
)) == 0);
473 s
->addrs
= addrs
.addr
[0];
475 memset(&addrs
, 0, sizeof(addrs
));
477 addrs
.d
.size_subflow_data
= sizeof(struct mptcp_subflow_data
);
478 addrs
.d
.size_user
= sizeof(sa_family_t
);
479 olen
= sizeof(addrs
.d
) + sizeof(sa_family_t
);
481 ret
= getsockopt(fd
, SOL_MPTCP
, MPTCP_SUBFLOW_ADDRS
, &addrs
, &olen
);
483 assert(olen
== sizeof(addrs
.d
) + sizeof(sa_family_t
));
485 assert(addrs
.addr
[0].sa_family
== pf
);
486 assert(addrs
.addr
[0].sa_family
== local
.ss_family
);
488 assert(memcmp(&local
, &addrs
.addr
[0].ss_local
, sizeof(local
)) != 0);
489 assert(memcmp(&remote
, &addrs
.addr
[0].ss_remote
, sizeof(remote
)) != 0);
491 do_getsockopt_bogus_sf_data(fd
, MPTCP_SUBFLOW_ADDRS
);
494 static void do_getsockopt_mptcp_full_info(struct so_state
*s
, int fd
)
496 size_t data_size
= sizeof(struct mptcp_full_info
);
497 struct mptcp_subflow_info sfinfo
[2];
498 struct tcp_info tcp_info
[2];
499 struct mptcp_full_info mfi
;
503 memset(&mfi
, 0, data_size
);
504 memset(tcp_info
, 0, sizeof(tcp_info
));
505 memset(sfinfo
, 0, sizeof(sfinfo
));
507 mfi
.size_tcpinfo_user
= sizeof(struct tcp_info
);
508 mfi
.size_sfinfo_user
= sizeof(struct mptcp_subflow_info
);
509 mfi
.size_arrays_user
= 2;
510 mfi
.subflow_info
= (unsigned long)&sfinfo
[0];
511 mfi
.tcp_info
= (unsigned long)&tcp_info
[0];
514 ret
= getsockopt(fd
, SOL_MPTCP
, MPTCP_FULL_INFO
, &mfi
, &olen
);
516 if (errno
== EOPNOTSUPP
) {
517 perror("MPTCP_FULL_INFO test skipped");
520 xerror("getsockopt MPTCP_FULL_INFO");
523 assert(olen
<= data_size
);
524 assert(mfi
.size_tcpinfo_kernel
> 0);
525 assert(mfi
.size_tcpinfo_user
==
526 MIN(mfi
.size_tcpinfo_kernel
, sizeof(struct tcp_info
)));
527 assert(mfi
.size_sfinfo_kernel
> 0);
528 assert(mfi
.size_sfinfo_user
==
529 MIN(mfi
.size_sfinfo_kernel
, sizeof(struct mptcp_subflow_info
)));
530 assert(mfi
.num_subflows
== 1);
532 /* Tolerate future extension to mptcp_info struct and running newer
533 * test on top of older kernel.
534 * Anyway any kernel supporting MPTCP_FULL_INFO must at least include
535 * the following in mptcp_info.
537 assert(olen
> (socklen_t
)__builtin_offsetof(struct mptcp_full_info
, tcp_info
));
538 assert(mfi
.mptcp_info
.mptcpi_subflows
== 0);
539 assert(mfi
.mptcp_info
.mptcpi_bytes_sent
== s
->last_sample
.mptcpi_bytes_sent
);
540 assert(mfi
.mptcp_info
.mptcpi_bytes_received
== s
->last_sample
.mptcpi_bytes_received
);
542 assert(sfinfo
[0].id
== 1);
543 assert(tcp_info
[0].tcpi_bytes_sent
== s
->tcp_info
.tcpi_bytes_sent
);
544 assert(tcp_info
[0].tcpi_bytes_received
== s
->tcp_info
.tcpi_bytes_received
);
545 assert(!memcmp(&sfinfo
->addrs
, &s
->addrs
, sizeof(struct mptcp_subflow_addrs
)));
548 static void do_getsockopts(struct so_state
*s
, int fd
, size_t r
, size_t w
)
550 do_getsockopt_mptcp_info(s
, fd
, w
);
552 do_getsockopt_tcp_info(s
, fd
, r
, w
);
554 do_getsockopt_subflow_addrs(s
, fd
);
557 do_getsockopt_mptcp_full_info(s
, fd
);
560 static void connect_one_server(int fd
, int pipefd
)
562 char buf
[4096], buf2
[4096];
563 size_t len
, i
, total
;
568 memset(&s
, 0, sizeof(s
));
570 len
= rand() % (sizeof(buf
) - 1);
575 for (i
= 0; i
< len
; i
++) {
576 buf
[i
] = rand() % 26;
582 do_getsockopts(&s
, fd
, 0, 0);
584 /* un-block server */
585 ret
= read(pipefd
, buf2
, 4);
589 assert(strncmp(buf2
, "xmit", 4) == 0);
591 ret
= write(fd
, buf
, len
);
595 if (ret
!= (ssize_t
)len
)
596 xerror("short write");
600 ret
= read(fd
, buf2
+ total
, sizeof(buf2
) - total
);
609 } while (total
< len
);
612 xerror("total %lu, len %lu eof %d\n", total
, len
, eof
);
614 if (memcmp(buf
, buf2
, len
))
615 xerror("data corruption");
617 if (s
.tcpi_rcv_delta
)
618 assert(s
.tcpi_rcv_delta
<= total
);
620 do_getsockopts(&s
, fd
, ret
, ret
);
623 total
+= 1; /* sequence advances due to FIN */
625 assert(s
.mptcpi_rcv_delta
== (uint64_t)total
);
629 static void process_one_client(int fd
, int pipefd
)
631 ssize_t ret
, ret2
, ret3
;
635 memset(&s
, 0, sizeof(s
));
636 do_getsockopts(&s
, fd
, 0, 0);
638 ret
= write(pipefd
, "xmit", 4);
641 ret
= read(fd
, buf
, sizeof(buf
));
645 assert(s
.mptcpi_rcv_delta
<= (uint64_t)ret
);
647 if (s
.tcpi_rcv_delta
)
648 assert(s
.tcpi_rcv_delta
== (uint64_t)ret
);
650 ret2
= write(fd
, buf
, ret
);
654 /* wait for hangup */
655 ret3
= read(fd
, buf
, 1);
657 xerror("expected EOF, got %lu", ret3
);
659 do_getsockopts(&s
, fd
, ret
, ret2
);
660 if (s
.mptcpi_rcv_delta
!= (uint64_t)ret
+ 1)
661 xerror("mptcpi_rcv_delta %" PRIu64
", expect %" PRIu64
, s
.mptcpi_rcv_delta
, ret
+ 1, s
.mptcpi_rcv_delta
- ret
);
663 /* be nice when running on top of older kernel */
664 if (s
.pkt_stats_avail
) {
665 if (s
.last_sample
.mptcpi_bytes_sent
!= ret2
)
666 xerror("mptcpi_bytes_sent %" PRIu64
", expect %" PRIu64
,
667 s
.last_sample
.mptcpi_bytes_sent
, ret2
,
668 s
.last_sample
.mptcpi_bytes_sent
- ret2
);
669 if (s
.last_sample
.mptcpi_bytes_received
!= ret
)
670 xerror("mptcpi_bytes_received %" PRIu64
", expect %" PRIu64
,
671 s
.last_sample
.mptcpi_bytes_received
, ret
,
672 s
.last_sample
.mptcpi_bytes_received
- ret
);
673 if (s
.last_sample
.mptcpi_bytes_acked
!= ret
)
674 xerror("mptcpi_bytes_acked %" PRIu64
", expect %" PRIu64
,
675 s
.last_sample
.mptcpi_bytes_acked
, ret2
,
676 s
.last_sample
.mptcpi_bytes_acked
- ret2
);
682 static int xaccept(int s
)
684 int fd
= accept(s
, NULL
, 0);
687 die_perror("accept");
692 static int server(int pipefd
)
698 fd
= sock_listen_mptcp("127.0.0.1", "15432");
701 fd
= sock_listen_mptcp("::1", "15432");
704 xerror("Unknown pf %d\n", pf
);
708 r
= write(pipefd
, "conn", 4);
714 process_one_client(r
, pipefd
);
719 static void test_ip_tos_sockopt(int fd
)
721 uint8_t tos_in
, tos_out
;
725 tos_in
= rand() & 0xfc;
726 r
= setsockopt(fd
, SOL_IP
, IP_TOS
, &tos_in
, sizeof(tos_out
));
728 die_perror("setsockopt IP_TOS");
732 r
= getsockopt(fd
, SOL_IP
, IP_TOS
, &tos_out
, &s
);
734 die_perror("getsockopt IP_TOS");
736 if (tos_in
!= tos_out
)
737 xerror("tos %x != %x socklen_t %d\n", tos_in
, tos_out
, s
);
740 xerror("tos should be 1 byte");
743 r
= getsockopt(fd
, SOL_IP
, IP_TOS
, &tos_out
, &s
);
745 die_perror("getsockopt IP_TOS 0");
747 xerror("expect socklen_t == 0");
750 r
= getsockopt(fd
, SOL_IP
, IP_TOS
, &tos_out
, &s
);
751 if (r
!= -1 && errno
!= EINVAL
)
752 die_perror("getsockopt IP_TOS did not indicate -EINVAL");
754 xerror("expect socklen_t == -1");
757 static int client(int pipefd
)
765 fd
= sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP
);
768 fd
= sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP
);
771 xerror("Unknown pf %d\n", pf
);
774 test_ip_tos_sockopt(fd
);
776 connect_one_server(fd
, pipefd
);
781 static pid_t
xfork(void)
791 static int rcheck(int wstatus
, const char *what
)
793 if (WIFEXITED(wstatus
)) {
794 if (WEXITSTATUS(wstatus
) == 0)
796 fprintf(stderr
, "%s exited, status=%d\n", what
, WEXITSTATUS(wstatus
));
797 return WEXITSTATUS(wstatus
);
798 } else if (WIFSIGNALED(wstatus
)) {
799 xerror("%s killed by signal %d\n", what
, WTERMSIG(wstatus
));
800 } else if (WIFSTOPPED(wstatus
)) {
801 xerror("%s stopped by signal %d\n", what
, WSTOPSIG(wstatus
));
807 static void init_rng(void)
809 int fd
= open("/dev/urandom", O_RDONLY
);
816 ret
= read(fd
, &foo
, sizeof(foo
));
817 assert(ret
== sizeof(foo
));
826 int main(int argc
, char *argv
[])
832 parse_opts(argc
, argv
);
842 return server(pipefds
[1]);
846 /* wait until server bound a socket */
847 e1
= read(pipefds
[0], &e1
, 4);
852 return client(pipefds
[0]);
856 ret
= waitpid(s
, &wstatus
, 0);
858 die_perror("waitpid");
859 e1
= rcheck(wstatus
, "server");
860 ret
= waitpid(c
, &wstatus
, 0);
862 die_perror("waitpid");
863 e2
= rcheck(wstatus
, "client");