2 mtr -- a network diagnostic tool
3 Copyright (C) 2016 Matt Kimball
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #include "protocols.h"
34 Implementation notes (or "Why this uses a worker thread")
36 Having done my time debugging various race conditions over the
37 last twenty-plus years as a software developer, both of my own
38 creation and discovered in the code of others, I almost always
39 try to structure my code to be single-threaded. However,
40 I think in this case, the ICMP service thread is unavoidable.
42 I would have liked to avoid multithreading entirely, but here are
45 a) mtr was originally a Unix program which used "raw sockets".
46 b) In order to port mtr to Windows, Cygwin is used to get a
47 Unix-like environment.
48 c) You can't use a raw socket to receive an ICMP reply on Windows.
49 However, Windows provides a separate API in the form of
50 ICMP.DLL for sending and receiving ICMP messages.
51 d) The ICMP API works asynchronously, and requires completion
52 through an asynchronous procedure call ("APC")
53 e) APCs are only delivered during blocking Win32 operations
54 which are flagged as "alertable." This prevents apps from
55 having APCs execute unexpectedly during an I/O operation.
56 f) Cygwin's implementation of POSIX functions does all I/O
57 through non-alertable I/O operations. This is reasonable
58 because APCs don't exist in the POSIX API.
59 g) Cygwin implements Unix-style signals at the application level,
60 since the Windows kernel doesn't have them. We want our
61 program to respond to SIGTERM and SIGKILL, at least.
62 h) Cygwin's signal implementation will deliver signals during
63 blocking I/O functions in the Cygwin library, but won't
64 respond to signals if the signal is sent while the application
65 is in a blocking Windows API call which Cygwin is not aware of.
66 i) Since we want to both send/receive ICMP probes and also respond
67 to Unix-style signals, we require two threads: one which
68 uses Cygwin's POSIX style blocking I/O and can respond to
69 signals, and one which uses alertable waits using Win32
72 The solution is to have the main thread using select() as the
73 blocking operation in its loop, and also to have an ICMP service
74 thread using WaitForSingleObjectEx() as its blocking operation.
75 The main thread will respond to signals. The ICMP service thread
76 will run the APCs completing ICMP.DLL requests.
78 These two threads communicate through a pair of pipes. One pipe
79 sends requests from the main thread to the ICMP service thread,
80 and another pipe sends the requests back as they complete.
82 We use the Cygwin pipe() to create the pipes, but in the ICMP
83 service thread we use the Win32 HANDLE that corresponds to the
84 receiving end of the input pipe to wait for ICMP requests.
88 static DWORD WINAPI
icmp_service_thread(LPVOID param
);
90 /* Windows doesn't require any initialization at a privileged level */
91 void init_net_state_privileged(
92 struct net_state_t
*net_state
)
97 Convenience similar to error(), but for reporting Windows
98 error codes instead of errno codes.
100 void error_win(int exit_code
, int win_error
, const char *str
) {
101 fprintf(stderr
, "%s (code %d)\n", str
, win_error
);
105 /* Open the ICMP.DLL interface and start the ICMP service thread */
107 struct net_state_t
*net_state
)
110 int in_pipe
[2], out_pipe
[2];
113 memset(net_state
, 0, sizeof(struct net_state_t
));
115 net_state
->platform
.icmp4
= IcmpCreateFile();
116 net_state
->platform
.icmp6
= Icmp6CreateFile();
118 if (net_state
->platform
.icmp4
== INVALID_HANDLE_VALUE
119 && net_state
->platform
.icmp6
== INVALID_HANDLE_VALUE
) {
121 error_win(EXIT_FAILURE
, GetLastError(), "Failure opening ICMP");
123 net_state
->platform
.ip4_socket_raw
= false;
124 net_state
->platform
.ip6_socket_raw
= false;
127 We need a pipe for communication with the ICMP thread
130 if (pipe(in_pipe
) == -1 || pipe(out_pipe
) == -1) {
131 error(EXIT_FAILURE
, errno
, "Failure creating thread pipe");
134 net_state
->platform
.thread_in_pipe_read
= in_pipe
[0];
135 net_state
->platform
.thread_in_pipe_write
= in_pipe
[1];
136 net_state
->platform
.thread_out_pipe_read
= out_pipe
[0];
137 net_state
->platform
.thread_out_pipe_write
= out_pipe
[1];
139 InitializeCriticalSection(&net_state
->platform
.pending_request_cs
);
140 net_state
->platform
.pending_request_count
= 0;
141 net_state
->platform
.pending_request_event
=
142 CreateEvent(NULL
, TRUE
, FALSE
, NULL
);
144 if (net_state
->platform
.pending_request_event
== NULL
) {
145 error(EXIT_FAILURE
, errno
, "Failure creating request event");
149 The read on the out pipe needs to be nonblocking because
150 it will be occasionally checked in the main thread.
152 err
= fcntl(out_pipe
[0], F_SETFL
, O_NONBLOCK
);
156 "Failure setting pipe to non-blocking");
159 /* Spin up the ICMP service thread */
160 thread
= CreateThread(
161 NULL
, 0, icmp_service_thread
, net_state
, 0, NULL
);
163 if (thread
== NULL
) {
165 EXIT_FAILURE
, GetLastError(),
166 "Failure creating ICMP service thread");
171 If we succeeded at opening the ICMP file handle, we can
172 assume that IP protocol version is supported.
174 bool is_ip_version_supported(
175 struct net_state_t
*net_state
,
178 if (ip_version
== 4) {
179 return (net_state
->platform
.icmp4
!= INVALID_HANDLE_VALUE
);
180 } else if (ip_version
== 6) {
181 return (net_state
->platform
.icmp6
!= INVALID_HANDLE_VALUE
);
187 /* On Windows, we only support ICMP probes */
188 bool is_protocol_supported(
189 struct net_state_t
* net_state
,
192 if (protocol
== IPPROTO_ICMP
) {
199 /* Set the back pointer to the net_state when a probe is allocated */
200 void platform_alloc_probe(
201 struct net_state_t
*net_state
,
202 struct probe_t
*probe
)
204 probe
->platform
.net_state
= net_state
;
207 /* Free the reply buffer when the probe is freed */
208 void platform_free_probe(
209 struct probe_t
*probe
)
213 /* Report a windows error code using a platform-independent error string */
215 void report_win_error(
219 /* It could be that we got no reply because of timeout */
220 if (err
== IP_REQ_TIMED_OUT
|| err
== IP_SOURCE_QUENCH
) {
221 printf("%d no-reply\n", command_token
);
222 } else if (err
== ERROR_INVALID_NETNAME
) {
223 printf("%d address-not-available\n", command_token
);
224 } else if (err
== ERROR_INVALID_PARAMETER
) {
225 printf("%d invalid-argument\n", command_token
);
227 printf("%d unexpected-error winerror %d\n", command_token
, err
);
232 After we have the result of an ICMP probe on the ICMP service
233 thread, this is used to send the result back to the main thread
234 for probe result reporting.
237 void queue_thread_result(struct icmp_thread_request_t
*request
)
241 /* Pass ownership of the request back through the result pipe */
243 request
->net_state
->platform
.thread_out_pipe_write
,
245 sizeof(struct icmp_thread_request_t
*));
246 if (byte_count
== -1) {
249 "failure writing to probe result queue");
254 The overlapped I/O style completion routine to be called by
255 Windows during an altertable wait when an ICMP probe has
256 completed, either by reply, or by ICMP.DLL timeout.
259 void WINAPI
on_icmp_reply(
261 PIO_STATUS_BLOCK status
,
264 struct icmp_thread_request_t
*request
=
265 (struct icmp_thread_request_t
*) context
;
267 int round_trip_us
= 0;
269 int reply_status
= 0;
270 struct sockaddr_storage remote_addr
;
271 struct sockaddr_in
*remote_addr4
;
272 struct sockaddr_in6
*remote_addr6
;
273 ICMP_ECHO_REPLY
*reply4
;
274 ICMPV6_ECHO_REPLY
*reply6
;
276 if (request
->ip_version
== 6) {
277 reply6
= request
->reply6
;
278 reply_count
= Icmp6ParseReplies(reply6
, sizeof(ICMPV6_ECHO_REPLY
));
280 if (reply_count
> 0) {
281 reply_status
= reply6
->Status
;
283 /* Unfortunately, ICMP.DLL only has millisecond precision */
284 round_trip_us
= reply6
->RoundTripTime
* 1000;
286 remote_addr6
= (struct sockaddr_in6
*) &remote_addr
;
287 remote_addr6
->sin6_family
= AF_INET6
;
288 remote_addr6
->sin6_port
= 0;
289 remote_addr6
->sin6_flowinfo
= 0;
290 memcpy(&remote_addr6
->sin6_addr
, reply6
->Address
.sin6_addr
,
291 sizeof(struct in6_addr
));
292 remote_addr6
->sin6_scope_id
= 0;
295 reply4
= request
->reply4
;
296 reply_count
= IcmpParseReplies(reply4
, sizeof(ICMP_ECHO_REPLY
));
298 if (reply_count
> 0) {
299 reply_status
= reply4
->Status
;
301 /* Unfortunately, ICMP.DLL only has millisecond precision */
302 round_trip_us
= reply4
->RoundTripTime
* 1000;
304 remote_addr4
= (struct sockaddr_in
*) &remote_addr
;
305 remote_addr4
->sin_family
= AF_INET
;
306 remote_addr4
->sin_port
= 0;
307 remote_addr4
->sin_addr
.s_addr
= reply4
->Address
;
311 if (reply_count
== 0) {
312 reply_status
= GetLastError();
316 if (reply_status
== IP_SUCCESS
) {
317 icmp_type
= ICMP_ECHOREPLY
;
318 } else if (reply_status
== IP_TTL_EXPIRED_TRANSIT
319 || reply_status
== IP_TTL_EXPIRED_REASSEM
) {
321 icmp_type
= ICMP_TIME_EXCEEDED
;
322 } else if (reply_status
== IP_DEST_HOST_UNREACHABLE
323 || reply_status
== IP_DEST_PORT_UNREACHABLE
324 || reply_status
== IP_DEST_PROT_UNREACHABLE
325 || reply_status
== IP_DEST_NET_UNREACHABLE
326 || reply_status
== IP_DEST_UNREACHABLE
327 || reply_status
== IP_DEST_NO_ROUTE
328 || reply_status
== IP_BAD_ROUTE
329 || reply_status
== IP_BAD_DESTINATION
) {
331 icmp_type
= ICMP_DEST_UNREACH
;
334 request
->icmp_type
= icmp_type
;
335 request
->reply_status
= reply_status
;
336 request
->remote_addr
= remote_addr
;
337 request
->round_trip_us
= round_trip_us
;
338 queue_thread_result(request
);
341 /* Use ICMP.DLL's send echo support to send a probe */
343 void icmp_send_probe(
344 struct icmp_thread_request_t
*request
,
348 IP_OPTION_INFORMATION option
;
353 struct sockaddr_in
*dest_sockaddr4
;
354 struct sockaddr_in6
*src_sockaddr6
;
355 struct sockaddr_in6
*dest_sockaddr6
;
357 if (request
->timeout
> 0) {
358 timeout
= 1000 * request
->timeout
;
361 IcmpSendEcho2 will return invalid argument on a timeout of
362 zero. Our Unix implementation allows it. Bump up the timeout
368 memset(&option
, 0, sizeof(IP_OPTION_INFORMATION
));
369 option
.Ttl
= request
->ttl
;
371 if (request
->ip_version
== 6) {
372 reply_size
= sizeof(ICMPV6_ECHO_REPLY
) + payload_size
;
374 reply_size
= sizeof(ICMP_ECHO_REPLY
) + payload_size
;
377 request
->reply4
= malloc(reply_size
);
378 if (request
->reply4
== NULL
) {
379 error(EXIT_FAILURE
, errno
, "failure to allocate reply buffer");
382 if (request
->ip_version
== 6) {
383 src_sockaddr6
= (struct sockaddr_in6
*) &request
->src_sockaddr
;
384 dest_sockaddr6
= (struct sockaddr_in6
*) &request
->dest_sockaddr
;
386 send_result
= Icmp6SendEcho2(request
->net_state
->platform
.icmp6
,
388 (FARPROC
) on_icmp_reply
,
390 src_sockaddr6
, dest_sockaddr6
,
391 payload
, payload_size
, &option
,
393 reply_size
, timeout
);
395 dest_sockaddr4
= (struct sockaddr_in
*) &request
->dest_sockaddr
;
397 send_result
= IcmpSendEcho2(request
->net_state
->platform
.icmp4
,
399 (FARPROC
) on_icmp_reply
,
401 dest_sockaddr4
->sin_addr
.s_addr
,
402 payload
, payload_size
, &option
,
404 reply_size
, timeout
);
407 if (send_result
== 0) {
408 err
= GetLastError();
411 ERROR_IO_PENDING is expected when the probe is sent.
412 Other errors indicate the probe wasn't sent, and should
413 be reported in the main thread.
415 if (err
!= ERROR_IO_PENDING
) {
416 request
->icmp_type
= -1;
417 request
->reply_status
= err
;
418 queue_thread_result(request
);
423 /* Fill the payload of the packet as specified by the probe parameters */
426 const struct icmp_thread_request_t
*request
,
428 int payload_buffer_size
)
433 if (request
->ip_version
== 6) {
435 sizeof(struct IP6Header
) + sizeof(struct ICMPHeader
);
436 } else if (request
->ip_version
== 4) {
437 ip_icmp_size
= sizeof(struct IPHeader
) + sizeof(struct ICMPHeader
);
443 payload_size
= request
->packet_size
- ip_icmp_size
;
444 if (payload_size
< 0) {
448 if (payload_size
> payload_buffer_size
) {
453 memset(payload
, request
->bit_pattern
, payload_size
);
459 We've received a probe request from the main thread, so
460 fill out a payload buffer and then send the probe.
463 void icmp_handle_probe_request(struct icmp_thread_request_t
*request
)
465 char payload
[PACKET_BUFFER_SIZE
];
468 payload_size
= fill_payload(request
, payload
, PACKET_BUFFER_SIZE
);
469 if (payload_size
< 0) {
470 error(EXIT_FAILURE
, errno
, "Error constructing packet");
473 icmp_send_probe(request
, payload
, payload_size
);
477 Write the next thread request to the request pipe.
478 Update the count of pending requests and set the event
479 indicating that requests are present.
482 void send_thread_request(
483 struct net_state_t
*net_state
,
484 struct icmp_thread_request_t
*request
)
488 net_state
->platform
.thread_in_pipe_write
,
490 sizeof(struct icmp_thread_request_t
*));
492 if (byte_count
== -1) {
495 "failure writing to probe request queue");
498 EnterCriticalSection(&net_state
->platform
.pending_request_cs
);
500 net_state
->platform
.pending_request_count
++;
501 SetEvent(net_state
->platform
.pending_request_event
);
503 LeaveCriticalSection(&net_state
->platform
.pending_request_cs
);
507 Read the next thread request from the pipe, if any are pending.
508 If it is the last request in the queue, reset the pending
511 If no requests are pending, return NULL.
514 struct icmp_thread_request_t
*receive_thread_request(
515 struct net_state_t
*net_state
)
517 struct icmp_thread_request_t
*request
;
519 bool pending_request
;
521 EnterCriticalSection(&net_state
->platform
.pending_request_cs
);
523 if (net_state
->platform
.pending_request_count
> 0) {
524 pending_request
= true;
525 net_state
->platform
.pending_request_count
--;
526 if (net_state
->platform
.pending_request_count
== 0) {
527 ResetEvent(net_state
->platform
.pending_request_event
);
530 pending_request
= false;
533 LeaveCriticalSection(&net_state
->platform
.pending_request_cs
);
535 if (!pending_request
) {
540 net_state
->platform
.thread_in_pipe_read
,
542 sizeof(struct icmp_thread_request_t
*));
544 if (byte_count
== -1) {
548 "failure reading probe request queue");
551 assert(byte_count
== sizeof(struct icmp_thread_request_t
*));
557 The main loop of the ICMP service thread. The loop starts
558 an overlapped read on the incoming request pipe, then waits
559 in an alertable wait for that read to complete. Because
560 the wait is alertable, ICMP probes can complete through
564 DWORD WINAPI
icmp_service_thread(LPVOID param
) {
565 struct net_state_t
*net_state
;
566 struct icmp_thread_request_t
*request
;
568 net_state
= (struct net_state_t
*)param
;
570 request
= receive_thread_request(net_state
);
571 if (request
!= NULL
) {
572 /* Start the new probe from the request */
573 icmp_handle_probe_request(request
);
576 Wait for either a request to be queued or for
577 an APC which completes an ICMP probe.
579 WaitForSingleObjectEx(
580 net_state
->platform
.pending_request_event
,
590 When we are on the main thread and need the ICMP service thread
591 to start a new probe, this is used to pass the request for the
592 new probe to the service thread.
595 void queue_thread_request(
596 struct net_state_t
*net_state
,
597 struct probe_t
*probe
,
598 const struct probe_param_t
*param
,
599 struct sockaddr_storage
*dest_sockaddr
,
600 struct sockaddr_storage
*src_sockaddr
)
602 struct icmp_thread_request_t
*request
;
604 request
= malloc(sizeof(struct icmp_thread_request_t
));
605 if (request
== NULL
) {
606 error(EXIT_FAILURE
, errno
, "failure to allocate request");
608 memset(request
, 0, sizeof(struct icmp_thread_request_t
));
610 request
->ip_version
= param
->ip_version
;
611 request
->ttl
= param
->ttl
;
612 request
->timeout
= param
->timeout
;
613 request
->packet_size
= param
->packet_size
;
614 request
->bit_pattern
= param
->bit_pattern
;
616 request
->net_state
= net_state
;
617 request
->probe
= probe
;
618 request
->dest_sockaddr
= *dest_sockaddr
;
619 request
->src_sockaddr
= *src_sockaddr
;
622 The ownership of the request is passed to the ICMP thread
625 send_thread_request(net_state
, request
);
628 /* Decode the probe parameters and send a probe */
630 struct net_state_t
*net_state
,
631 const struct probe_param_t
*param
)
633 struct probe_t
*probe
;
634 struct sockaddr_storage dest_sockaddr
;
635 struct sockaddr_storage src_sockaddr
;
637 if (resolve_probe_addresses(net_state
, param
, &dest_sockaddr
,
639 printf("%d invalid-argument\n", param
->command_token
);
643 probe
= alloc_probe(net_state
, param
->command_token
);
645 printf("%d probes-exhausted\n", param
->command_token
);
649 probe
->platform
.ip_version
= param
->ip_version
;
651 queue_thread_request(
652 net_state
, probe
, param
, &dest_sockaddr
, &src_sockaddr
);
656 After we've receive the result from the ICMP service thread,
657 report either the probe status, or any Windows error we
658 encountered while attempting to send the probe.
661 void complete_icmp_result(struct icmp_thread_request_t
*request
)
663 struct net_state_t
*net_state
;
664 struct probe_t
*probe
;
667 We can de-const the net_state and probe, since we are back
670 net_state
= (struct net_state_t
*)request
->net_state
;
671 probe
= (struct probe_t
*)request
->probe
;
673 if (request
->icmp_type
!= -1) {
674 /* Record probe result */
675 respond_to_probe(net_state
, probe
,
676 request
->icmp_type
, &request
->remote_addr
,
677 request
->round_trip_us
, 0, NULL
);
679 report_win_error(probe
->token
, request
->reply_status
);
680 free_probe(net_state
, probe
);
685 Read the status of completed probes from the ICMP service
686 if any has completed.
688 void receive_replies(
689 struct net_state_t
*net_state
)
692 struct icmp_thread_request_t
*request
;
695 net_state
->platform
.thread_out_pipe_read
,
697 sizeof(struct icmp_thread_request_t
*));
699 if (read_count
== -1) {
701 EINTR and EAGAIN can occur under normal conditions, and
702 should be retried. We will retry the next iteration
705 if (errno
== EINTR
|| errno
== EAGAIN
) {
709 error(EXIT_FAILURE
, errno
, "thread result pipe read error");
712 assert(read_count
== sizeof(struct icmp_thread_request_t
*));
713 complete_icmp_result(request
);
715 if (request
->reply4
) {
716 free(request
->reply4
);
717 request
->reply4
= NULL
;
723 On Windows, an implementation of check_probe_timeout is unnecessary because
724 timeouts are managed by ICMP.DLL, including a call to the I/O completion
725 routine when the time fully expires.
727 void check_probe_timeouts(
728 struct net_state_t
*net_state
)
733 As in the case of check_probe_timeout, getting the next probe timeout is
734 unnecessary under Windows, as ICMP.DLL manages timeouts for us.
736 bool get_next_probe_timeout(
737 const struct net_state_t
*net_state
,
738 struct timeval
*timeout
)