ctdb-common: Make the argument to ctdb_sys_have_ip() const
[samba4-gss.git] / ctdb / common / system_socket.c
blob6fe4c719f00296948c90f21a80f911767c206d60
1 /*
2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Marc Dequènes (Duck) 2009
7 Copyright (C) Volker Lendecke 2012
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "replace.h"
26 * Use BSD struct tcphdr field names for portability. Modern glibc
27 * makes them available by default via <netinet/tcp.h> but older glibc
28 * requires __FAVOR_BSD to be defined.
30 * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
31 * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
32 * set. Including "replace.h" above causes <features.h> to be
33 * indirectly included and this will not set __FAVOR_BSD because
34 * _GNU_SOURCE is set in Samba's "config.h" (which is included by
35 * "replace.h").
37 * Therefore, set __FAVOR_BSD by hand below.
39 #define __FAVOR_BSD 1
40 #include "system/network.h"
42 #ifdef HAVE_NETINET_IF_ETHER_H
43 #include <netinet/if_ether.h>
44 #endif
45 #ifdef HAVE_NETINET_IP6_H
46 #include <netinet/ip6.h>
47 #endif
48 #ifdef HAVE_NETINET_ICMP6_H
49 #include <netinet/icmp6.h>
50 #endif
51 #ifdef HAVE_LINUX_IF_PACKET_H
52 #include <linux/if_packet.h>
53 #endif
55 #ifndef ETHERTYPE_IP6
56 #define ETHERTYPE_IP6 0x86dd
57 #endif
59 #include <talloc.h>
61 #include "lib/util/debug.h"
62 #include "lib/util/blocking.h"
64 #include "protocol/protocol.h"
65 #include "protocol/protocol_util.h"
67 #include "common/logging.h"
68 #include "common/system_socket.h"
71 uint16 checksum for n bytes
73 static uint32_t uint16_checksum(uint8_t *data, size_t n)
75 uint32_t sum=0;
76 uint16_t value;
78 while (n>=2) {
79 memcpy(&value, data, 2);
80 sum += (uint32_t)ntohs(value);
81 data += 2;
82 n -= 2;
84 if (n == 1) {
85 sum += (uint32_t)ntohs(*data);
87 return sum;
90 struct ctdb_sys_local_ips_context {
91 struct ifaddrs *ifa;
94 static int ctdb_sys_local_ips_destructor(
95 struct ctdb_sys_local_ips_context *ips_ctx)
97 freeifaddrs(ips_ctx->ifa);
98 ips_ctx->ifa = NULL;
100 return 0;
103 int ctdb_sys_local_ips_init(TALLOC_CTX *ctx,
104 struct ctdb_sys_local_ips_context **ips_ctx)
106 struct ctdb_sys_local_ips_context *t = NULL;
107 int ret = 0;
109 t = talloc(ctx, struct ctdb_sys_local_ips_context);
110 if (t == NULL) {
111 return ENOMEM;
114 ret = getifaddrs(&t->ifa);
115 if (ret != 0) {
116 ret = errno;
117 talloc_free(t);
118 return ret;
121 talloc_set_destructor(t, ctdb_sys_local_ips_destructor);
122 *ips_ctx = t;
124 return ret;
127 bool ctdb_sys_local_ip_check(const struct ctdb_sys_local_ips_context *ips_ctx,
128 const ctdb_sock_addr *addr)
130 struct ifaddrs *ifa = NULL;
131 int ret;
133 for (ifa = ips_ctx->ifa; ifa != NULL; ifa = ifa->ifa_next) {
134 ctdb_sock_addr sock_addr;
135 bool match;
137 if (ifa->ifa_addr == NULL)
138 continue;
140 /* Ignore non-IPv4/IPv6 interfaces */
141 switch (ifa->ifa_addr->sa_family) {
142 case AF_INET:
143 case AF_INET6:
144 break;
145 default:
146 continue;
149 ret = ctdb_sock_addr_from_sockaddr(ifa->ifa_addr, &sock_addr);
150 if (ret != 0) {
151 return false;
154 match = ctdb_sock_addr_same_ip(&sock_addr, addr);
155 if (match) {
156 return true;
160 return false;
164 * See if the given IP is currently on an interface
166 bool ctdb_sys_have_ip(const ctdb_sock_addr *_addr)
168 int s;
169 int ret;
170 ctdb_sock_addr __addr = *_addr;
171 ctdb_sock_addr *addr = &__addr;
172 socklen_t addrlen = 0;
174 switch (addr->sa.sa_family) {
175 case AF_INET:
176 addr->ip.sin_port = 0;
177 addrlen = sizeof(struct sockaddr_in);
178 break;
179 case AF_INET6:
180 addr->ip6.sin6_port = 0;
181 addrlen = sizeof(struct sockaddr_in6);
182 break;
185 s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
186 if (s == -1) {
187 return false;
190 ret = bind(s, (struct sockaddr *)addr, addrlen);
192 close(s);
193 return ret == 0;
197 * simple TCP checksum - assumes data is multiple of 2 bytes long
199 static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
201 uint32_t sum = uint16_checksum(data, n);
202 uint16_t sum2;
204 sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
205 sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
206 sum += ip->ip_p + n;
207 sum = (sum & 0xFFFF) + (sum >> 16);
208 sum = (sum & 0xFFFF) + (sum >> 16);
209 sum2 = htons(sum);
210 sum2 = ~sum2;
211 if (sum2 == 0) {
212 return 0xFFFF;
214 return sum2;
217 static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
219 uint16_t phdr[3];
220 uint32_t sum = 0;
221 uint16_t sum2;
222 uint32_t len;
224 sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
225 sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
227 len = htonl(n);
228 phdr[0] = len & UINT16_MAX;
229 phdr[1] = (len >> 16) & UINT16_MAX;
230 /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
231 phdr[2] = htons(ip6->ip6_nxt);
232 sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
234 sum += uint16_checksum(data, n);
236 sum = (sum & 0xFFFF) + (sum >> 16);
237 sum = (sum & 0xFFFF) + (sum >> 16);
238 sum2 = htons(sum);
239 sum2 = ~sum2;
240 if (sum2 == 0) {
241 return 0xFFFF;
243 return sum2;
247 * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
250 #ifdef HAVE_PACKETSOCKET
253 * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
254 * packets
257 #define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
258 sizeof(struct ether_arp)
260 #define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
261 sizeof(struct ip6_hdr) + \
262 sizeof(struct nd_neighbor_advert) + \
263 sizeof(struct nd_opt_hdr) + \
264 sizeof(struct ether_addr)
266 #define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
268 #define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
270 static int arp_build(uint8_t *buffer,
271 size_t buflen,
272 const struct sockaddr_in *addr,
273 const struct ether_addr *hwaddr,
274 bool reply,
275 struct ether_addr **ether_dhost,
276 size_t *len)
278 size_t l = ARP_BUFFER_SIZE;
279 struct ether_header *eh;
280 struct ether_arp *ea;
281 struct arphdr *ah;
283 if (addr->sin_family != AF_INET) {
284 return EINVAL;
287 if (buflen < l) {
288 return EMSGSIZE;
291 memset(buffer, 0 , l);
293 eh = (struct ether_header *)buffer;
294 memset(eh->ether_dhost, 0xff, ETH_ALEN);
295 memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
296 eh->ether_type = htons(ETHERTYPE_ARP);
298 ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
299 ah = &ea->ea_hdr;
300 ah->ar_hrd = htons(ARPHRD_ETHER);
301 ah->ar_pro = htons(ETH_P_IP);
302 ah->ar_hln = ETH_ALEN;
303 ah->ar_pln = sizeof(ea->arp_spa);
305 if (! reply) {
306 ah->ar_op = htons(ARPOP_REQUEST);
307 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
308 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
309 memset(ea->arp_tha, 0, ETH_ALEN);
310 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
311 } else {
312 ah->ar_op = htons(ARPOP_REPLY);
313 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
314 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
315 memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
316 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
319 *ether_dhost = (struct ether_addr *)eh->ether_dhost;
320 *len = l;
321 return 0;
324 static int ip6_na_build(uint8_t *buffer,
325 size_t buflen,
326 const struct sockaddr_in6 *addr,
327 const struct ether_addr *hwaddr,
328 struct ether_addr **ether_dhost,
329 size_t *len)
331 size_t l = IP6_NA_BUFFER_SIZE;
332 struct ether_header *eh;
333 struct ip6_hdr *ip6;
334 struct nd_neighbor_advert *nd_na;
335 struct nd_opt_hdr *nd_oh;
336 struct ether_addr *ea;
337 int ret;
339 if (addr->sin6_family != AF_INET6) {
340 return EINVAL;
343 if (buflen < l) {
344 return EMSGSIZE;
347 memset(buffer, 0 , l);
349 eh = (struct ether_header *)buffer;
351 * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
352 * section 7) - note memset 0 above!
354 eh->ether_dhost[0] = 0x33;
355 eh->ether_dhost[1] = 0x33;
356 eh->ether_dhost[5] = 0x01;
357 memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
358 eh->ether_type = htons(ETHERTYPE_IP6);
360 ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
361 ip6->ip6_vfc = 6 << 4;
362 ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
363 sizeof(struct nd_opt_hdr) +
364 ETH_ALEN);
365 ip6->ip6_nxt = IPPROTO_ICMPV6;
366 ip6->ip6_hlim = 255;
367 ip6->ip6_src = addr->sin6_addr;
368 /* all-nodes multicast */
370 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
371 if (ret != 1) {
372 return EIO;
375 nd_na = (struct nd_neighbor_advert *)(buffer +
376 sizeof(struct ether_header) +
377 sizeof(struct ip6_hdr));
378 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
379 nd_na->nd_na_code = 0;
380 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
381 nd_na->nd_na_target = addr->sin6_addr;
383 /* Option: Target link-layer address */
384 nd_oh = (struct nd_opt_hdr *)(buffer +
385 sizeof(struct ether_header) +
386 sizeof(struct ip6_hdr) +
387 sizeof(struct nd_neighbor_advert));
388 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
389 nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
391 ea = (struct ether_addr *)(buffer +
392 sizeof(struct ether_header) +
393 sizeof(struct ip6_hdr) +
394 sizeof(struct nd_neighbor_advert) +
395 sizeof(struct nd_opt_hdr));
396 memcpy(ea, hwaddr, ETH_ALEN);
398 nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
399 ntohs(ip6->ip6_plen),
400 ip6);
402 *ether_dhost = (struct ether_addr *)eh->ether_dhost;
403 *len = l;
404 return 0;
407 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
409 int s;
410 struct sockaddr_ll sall = {0};
411 struct ifreq if_hwaddr = {
412 .ifr_ifru = {
413 .ifru_flags = 0
416 uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
417 struct ifreq ifr = {
418 .ifr_ifru = {
419 .ifru_flags = 0
422 struct ether_addr *hwaddr = NULL;
423 struct ether_addr *ether_dhost = NULL;
424 size_t len = 0;
425 int ret = 0;
427 s = socket(AF_PACKET, SOCK_RAW, 0);
428 if (s == -1) {
429 ret = errno;
430 DBG_ERR("Failed to open raw socket\n");
431 return ret;
433 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
435 /* Find interface */
436 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
437 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
438 ret = errno;
439 DBG_ERR("Interface '%s' not found\n", iface);
440 goto fail;
443 /* Get MAC address */
444 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
445 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
446 if ( ret < 0 ) {
447 ret = errno;
448 DBG_ERR("ioctl failed\n");
449 goto fail;
451 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
452 ret = 0;
453 D_DEBUG("Ignoring loopback arp request\n");
454 goto fail;
456 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
457 ret = EINVAL;
458 DBG_ERR("Not an ethernet address family (0x%x)\n",
459 if_hwaddr.ifr_hwaddr.sa_family);
460 goto fail;;
463 /* Set up most of destination address structure */
464 sall.sll_family = AF_PACKET;
465 sall.sll_halen = sizeof(struct ether_addr);
466 sall.sll_protocol = htons(ETH_P_ALL);
467 sall.sll_ifindex = ifr.ifr_ifindex;
469 /* For clarity */
470 hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
472 switch (addr->ip.sin_family) {
473 case AF_INET:
474 /* Send gratuitous ARP */
475 ret = arp_build(buffer,
476 sizeof(buffer),
477 &addr->ip,
478 hwaddr,
479 false,
480 &ether_dhost,
481 &len);
482 if (ret != 0) {
483 DBG_ERR("Failed to build ARP request\n");
484 goto fail;
487 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
489 ret = sendto(s,
490 buffer,
491 len,
493 (struct sockaddr *)&sall,
494 sizeof(sall));
495 if (ret < 0 ) {
496 ret = errno;
497 DBG_ERR("Failed sendto\n");
498 goto fail;
501 /* Send unsolicited ARP reply */
502 ret = arp_build(buffer,
503 sizeof(buffer),
504 &addr->ip,
505 hwaddr,
506 true,
507 &ether_dhost,
508 &len);
509 if (ret != 0) {
510 DBG_ERR("Failed to build ARP reply\n");
511 goto fail;
514 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
516 ret = sendto(s,
517 buffer,
518 len,
520 (struct sockaddr *)&sall,
521 sizeof(sall));
522 if (ret < 0 ) {
523 ret = errno;
524 DBG_ERR("Failed sendto\n");
525 goto fail;
528 close(s);
529 break;
531 case AF_INET6:
532 ret = ip6_na_build(buffer,
533 sizeof(buffer),
534 &addr->ip6,
535 hwaddr,
536 &ether_dhost,
537 &len);
538 if (ret != 0) {
539 DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
540 goto fail;
543 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
545 ret = sendto(s,
546 buffer,
547 len,
549 (struct sockaddr *)&sall,
550 sizeof(sall));
551 if (ret < 0 ) {
552 ret = errno;
553 DBG_ERR("Failed sendto\n");
554 goto fail;
557 close(s);
558 break;
560 default:
561 ret = EINVAL;
562 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
563 addr->ip.sin_family);
564 goto fail;
567 return 0;
569 fail:
570 close(s);
571 return ret;
574 #else /* HAVE_PACKETSOCKET */
576 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
578 /* Not implemented */
579 return ENOSYS;
582 #endif /* HAVE_PACKETSOCKET */
585 #define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
586 sizeof(struct tcphdr)
588 #define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
589 sizeof(struct tcphdr)
591 static int tcp4_build(uint8_t *buf,
592 size_t buflen,
593 const struct sockaddr_in *src,
594 const struct sockaddr_in *dst,
595 uint32_t seq,
596 uint32_t ack,
597 int rst,
598 size_t *len)
600 size_t l = IP4_TCP_BUFFER_SIZE;
601 struct {
602 struct ip ip;
603 struct tcphdr tcp;
604 } *ip4pkt;
606 if (l != sizeof(*ip4pkt)) {
607 return EMSGSIZE;
610 if (buflen < l) {
611 return EMSGSIZE;
614 ip4pkt = (void *)buf;
615 memset(ip4pkt, 0, l);
617 ip4pkt->ip.ip_v = 4;
618 ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
619 ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
620 ip4pkt->ip.ip_ttl = 255;
621 ip4pkt->ip.ip_p = IPPROTO_TCP;
622 ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
623 ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
624 ip4pkt->ip.ip_sum = 0;
626 ip4pkt->tcp.th_sport = src->sin_port;
627 ip4pkt->tcp.th_dport = dst->sin_port;
628 ip4pkt->tcp.th_seq = seq;
629 ip4pkt->tcp.th_ack = ack;
630 ip4pkt->tcp.th_flags = 0;
631 ip4pkt->tcp.th_flags |= TH_ACK;
632 if (rst) {
633 ip4pkt->tcp.th_flags |= TH_RST;
635 ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
636 /* this makes it easier to spot in a sniffer */
637 ip4pkt->tcp.th_win = htons(1234);
638 ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
639 sizeof(ip4pkt->tcp),
640 &ip4pkt->ip);
642 *len = l;
643 return 0;
646 static int tcp6_build(uint8_t *buf,
647 size_t buflen,
648 const struct sockaddr_in6 *src,
649 const struct sockaddr_in6 *dst,
650 uint32_t seq,
651 uint32_t ack,
652 int rst,
653 size_t *len)
655 size_t l = IP6_TCP_BUFFER_SIZE;
656 struct {
657 struct ip6_hdr ip6;
658 struct tcphdr tcp;
659 } *ip6pkt;
661 if (l != sizeof(*ip6pkt)) {
662 return EMSGSIZE;
665 if (buflen < l) {
666 return EMSGSIZE;
669 ip6pkt = (void *)buf;
670 memset(ip6pkt, 0, l);
672 ip6pkt->ip6.ip6_vfc = 6 << 4;
673 ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
674 ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
675 ip6pkt->ip6.ip6_hlim = 64;
676 ip6pkt->ip6.ip6_src = src->sin6_addr;
677 ip6pkt->ip6.ip6_dst = dst->sin6_addr;
679 ip6pkt->tcp.th_sport = src->sin6_port;
680 ip6pkt->tcp.th_dport = dst->sin6_port;
681 ip6pkt->tcp.th_seq = seq;
682 ip6pkt->tcp.th_ack = ack;
683 ip6pkt->tcp.th_flags = 0;
684 ip6pkt->tcp.th_flags |= TH_ACK;
685 if (rst) {
686 ip6pkt->tcp.th_flags |= TH_RST;
688 ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
689 /* this makes it easier to spot in a sniffer */
690 ip6pkt->tcp.th_win = htons(1234);
691 ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
692 sizeof(ip6pkt->tcp),
693 &ip6pkt->ip6);
695 *len = l;
696 return 0;
700 * Send tcp segment from the specified IP/port to the specified
701 * destination IP/port.
703 * This is used to trigger the receiving host into sending its own ACK,
704 * which should trigger early detection of TCP reset by the client
705 * after IP takeover
707 * This can also be used to send RST segments (if rst is true) and also
708 * if correct seq and ack numbers are provided.
710 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
711 const ctdb_sock_addr *src,
712 uint32_t seq,
713 uint32_t ack,
714 int rst)
716 uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
717 size_t len = 0;
718 int ret;
719 int s;
720 uint32_t one = 1;
721 struct sockaddr_in6 tmpdest = { 0 };
722 int saved_errno;
724 switch (src->ip.sin_family) {
725 case AF_INET:
726 ret = tcp4_build(buf,
727 sizeof(buf),
728 &src->ip,
729 &dest->ip,
730 seq,
731 ack,
732 rst,
733 &len);
734 if (ret != 0) {
735 DBG_ERR("Failed to build TCP packet (%d)\n", ret);
736 return ret;
739 /* open a raw socket to send this segment from */
740 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
741 if (s == -1) {
742 DBG_ERR("Failed to open raw socket (%s)\n",
743 strerror(errno));
744 return -1;
747 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
748 if (ret != 0) {
749 DBG_ERR("Failed to setup IP headers (%s)\n",
750 strerror(errno));
751 close(s);
752 return -1;
755 ret = sendto(s,
756 buf,
757 len,
759 (const struct sockaddr *)&dest->ip,
760 sizeof(dest->ip));
761 saved_errno = errno;
762 close(s);
763 if (ret == -1) {
764 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
765 return -1;
767 if ((size_t)ret != len) {
768 DBG_ERR("Failed sendto - didn't send full packet\n");
769 return -1;
771 break;
773 case AF_INET6:
774 ret = tcp6_build(buf,
775 sizeof(buf),
776 &src->ip6,
777 &dest->ip6,
778 seq,
779 ack,
780 rst,
781 &len);
782 if (ret != 0) {
783 DBG_ERR("Failed to build TCP packet (%d)\n", ret);
784 return ret;
787 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
788 if (s == -1) {
789 DBG_ERR("Failed to open sending socket\n");
790 return -1;
794 * sendto() on an IPv6 raw socket requires the port to
795 * be either 0 or a protocol value
797 tmpdest = dest->ip6;
798 tmpdest.sin6_port = 0;
800 ret = sendto(s,
801 buf,
802 len,
804 (const struct sockaddr *)&tmpdest,
805 sizeof(tmpdest));
806 saved_errno = errno;
807 close(s);
808 if (ret == -1) {
809 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
810 return -1;
812 if ((size_t)ret != len) {
813 DBG_ERR("Failed sendto - didn't send full packet\n");
814 return -1;
816 break;
818 default:
819 DBG_ERR("Not an ipv4/v6 address\n");
820 return -1;
823 return 0;
826 static int tcp4_extract(const uint8_t *ip_pkt,
827 size_t pktlen,
828 struct sockaddr_in *src,
829 struct sockaddr_in *dst,
830 uint32_t *ack_seq,
831 uint32_t *seq,
832 int *rst,
833 uint16_t *window)
835 const struct ip *ip;
836 const struct tcphdr *tcp;
838 if (pktlen < sizeof(struct ip)) {
839 return EMSGSIZE;
842 ip = (const struct ip *)ip_pkt;
844 /* IPv4 only */
845 if (ip->ip_v != 4) {
846 return ENOMSG;
848 /* Don't look at fragments */
849 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
850 return ENOMSG;
852 /* TCP only */
853 if (ip->ip_p != IPPROTO_TCP) {
854 return ENOMSG;
857 /* Ensure there is enough of the packet to gather required fields */
858 if (pktlen <
859 (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
860 return EMSGSIZE;
863 tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
865 src->sin_family = AF_INET;
866 src->sin_addr.s_addr = ip->ip_src.s_addr;
867 src->sin_port = tcp->th_sport;
869 dst->sin_family = AF_INET;
870 dst->sin_addr.s_addr = ip->ip_dst.s_addr;
871 dst->sin_port = tcp->th_dport;
873 *ack_seq = tcp->th_ack;
874 *seq = tcp->th_seq;
875 if (window != NULL) {
876 *window = tcp->th_win;
878 if (rst != NULL) {
879 *rst = tcp->th_flags & TH_RST;
882 return 0;
885 static int tcp6_extract(const uint8_t *ip_pkt,
886 size_t pktlen,
887 struct sockaddr_in6 *src,
888 struct sockaddr_in6 *dst,
889 uint32_t *ack_seq,
890 uint32_t *seq,
891 int *rst,
892 uint16_t *window)
894 const struct ip6_hdr *ip6;
895 const struct tcphdr *tcp;
897 /* Ensure there is enough of the packet to gather required fields */
898 if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
899 return EMSGSIZE;
902 ip6 = (const struct ip6_hdr *)ip_pkt;
904 /* IPv6 only */
905 if ((ip6->ip6_vfc >> 4) != 6){
906 return ENOMSG;
909 /* TCP only */
910 if (ip6->ip6_nxt != IPPROTO_TCP) {
911 return ENOMSG;
914 tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
916 src->sin6_family = AF_INET6;
917 src->sin6_port = tcp->th_sport;
918 src->sin6_addr = ip6->ip6_src;
920 dst->sin6_family = AF_INET6;
921 dst->sin6_port = tcp->th_dport;
922 dst->sin6_addr = ip6->ip6_dst;
924 *ack_seq = tcp->th_ack;
925 *seq = tcp->th_seq;
926 if (window != NULL) {
927 *window = tcp->th_win;
929 if (rst != NULL) {
930 *rst = tcp->th_flags & TH_RST;
933 return 0;
937 * Packet capture
939 * If AF_PACKET is available then use a raw socket otherwise use pcap.
940 * wscript has checked to make sure that pcap is available if needed.
943 #if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
946 * This function is used to open a raw socket to capture from
948 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
950 int s, ret;
952 /* Open a socket to capture all traffic */
953 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
954 if (s == -1) {
955 DBG_ERR("Failed to open raw socket\n");
956 return -1;
959 DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
961 ret = set_blocking(s, false);
962 if (ret != 0) {
963 DBG_ERR("Failed to set socket non-blocking (%s)\n",
964 strerror(errno));
965 close(s);
966 return -1;
969 set_close_on_exec(s);
971 return s;
975 * This function is used to do any additional cleanup required when closing
976 * a capture socket.
977 * Note that the socket itself is closed automatically in the caller.
979 int ctdb_sys_close_capture_socket(void *private_data)
981 return 0;
986 * called when the raw socket becomes readable
988 int ctdb_sys_read_tcp_packet(int s, void *private_data,
989 ctdb_sock_addr *src,
990 ctdb_sock_addr *dst,
991 uint32_t *ack_seq,
992 uint32_t *seq,
993 int *rst,
994 uint16_t *window)
996 ssize_t nread;
997 uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
998 struct ether_header *eth;
999 int ret;
1001 nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
1002 if (nread == -1) {
1003 return errno;
1005 if ((size_t)nread < sizeof(*eth)) {
1006 return EMSGSIZE;
1009 ZERO_STRUCTP(src);
1010 ZERO_STRUCTP(dst);
1012 /* Ethernet */
1013 eth = (struct ether_header *)pkt;
1015 /* we want either IPv4 or IPv6 */
1016 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
1017 ret = tcp4_extract(pkt + sizeof(struct ether_header),
1018 (size_t)nread - sizeof(struct ether_header),
1019 &src->ip,
1020 &dst->ip,
1021 ack_seq,
1022 seq,
1023 rst,
1024 window);
1025 return ret;
1027 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
1028 ret = tcp6_extract(pkt + sizeof(struct ether_header),
1029 (size_t)nread - sizeof(struct ether_header),
1030 &src->ip6,
1031 &dst->ip6,
1032 ack_seq,
1033 seq,
1034 rst,
1035 window);
1036 return ret;
1039 return ENOMSG;
1042 #else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
1044 #include <pcap.h>
1047 * Assume this exists if pcap.h exists - it has been around for a
1048 * while
1050 #include <pcap/sll.h>
1052 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
1054 char errbuf[PCAP_ERRBUF_SIZE];
1055 pcap_t *pt;
1056 int pcap_packet_type;
1057 const char *t = NULL;
1058 int fd;
1059 int ret;
1061 pt = pcap_create(iface, errbuf);
1062 if (pt == NULL) {
1063 DBG_ERR("Failed to open pcap capture device %s (%s)\n",
1064 iface,
1065 errbuf);
1066 return -1;
1069 * pcap isn't very clear about defaults...
1071 ret = pcap_set_snaplen(pt, 100);
1072 if (ret < 0) {
1073 DBG_ERR("Failed to set snaplen for pcap capture\n");
1074 goto fail;
1076 ret = pcap_set_promisc(pt, 0);
1077 if (ret < 0) {
1078 DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
1079 goto fail;
1081 ret = pcap_set_timeout(pt, 0);
1082 if (ret < 0) {
1083 DBG_ERR("Failed to set timeout for pcap capture\n");
1084 goto fail;
1086 #ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
1087 ret = pcap_set_immediate_mode(pt, 1);
1088 if (ret < 0) {
1089 DBG_ERR("Failed to set immediate mode for pcap capture\n");
1090 goto fail;
1092 #endif
1093 ret = pcap_activate(pt);
1094 if (ret < 0) {
1095 DBG_ERR("Failed to activate pcap capture\n");
1096 goto fail;
1099 pcap_packet_type = pcap_datalink(pt);
1100 switch (pcap_packet_type) {
1101 case DLT_EN10MB:
1102 t = "DLT_EN10MB";
1103 break;
1104 case DLT_LINUX_SLL:
1105 t = "DLT_LINUX_SLL";
1106 break;
1107 #ifdef DLT_LINUX_SLL2
1108 case DLT_LINUX_SLL2:
1109 t = "DLT_LINUX_SLL2";
1110 break;
1111 #endif /* DLT_LINUX_SLL2 */
1112 default:
1113 DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
1114 goto fail;
1117 fd = pcap_get_selectable_fd(pt);
1118 DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
1120 fd);
1122 *((pcap_t **)private_data) = pt;
1123 return fd;
1125 fail:
1126 pcap_close(pt);
1127 return -1;
1130 int ctdb_sys_close_capture_socket(void *private_data)
1132 pcap_t *pt = (pcap_t *)private_data;
1133 pcap_close(pt);
1134 return 0;
1137 int ctdb_sys_read_tcp_packet(int s,
1138 void *private_data,
1139 ctdb_sock_addr *src,
1140 ctdb_sock_addr *dst,
1141 uint32_t *ack_seq,
1142 uint32_t *seq,
1143 int *rst,
1144 uint16_t *window)
1146 int ret;
1147 struct pcap_pkthdr pkthdr;
1148 const u_char *buffer;
1149 pcap_t *pt = (pcap_t *)private_data;
1150 int pcap_packet_type;
1151 uint16_t ether_type;
1152 size_t ll_hdr_len;
1154 buffer=pcap_next(pt, &pkthdr);
1155 if (buffer==NULL) {
1156 return ENOMSG;
1159 ZERO_STRUCTP(src);
1160 ZERO_STRUCTP(dst);
1162 pcap_packet_type = pcap_datalink(pt);
1163 switch (pcap_packet_type) {
1164 case DLT_EN10MB: {
1165 const struct ether_header *eth =
1166 (const struct ether_header *)buffer;
1167 ether_type = ntohs(eth->ether_type);
1168 ll_hdr_len = sizeof(struct ether_header);
1169 break;
1171 case DLT_LINUX_SLL: {
1172 const struct sll_header *sll =
1173 (const struct sll_header *)buffer;
1174 uint16_t arphrd_type = ntohs(sll->sll_hatype);
1175 switch (arphrd_type) {
1176 case ARPHRD_ETHER:
1177 case ARPHRD_INFINIBAND:
1178 break;
1179 default:
1180 DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
1181 arphrd_type);
1182 return EPROTONOSUPPORT;
1184 ether_type = ntohs(sll->sll_protocol);
1185 ll_hdr_len = SLL_HDR_LEN;
1186 break;
1188 #ifdef DLT_LINUX_SLL2
1189 case DLT_LINUX_SLL2: {
1190 const struct sll2_header *sll2 =
1191 (const struct sll2_header *)buffer;
1192 uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
1193 switch (arphrd_type) {
1194 case ARPHRD_ETHER:
1195 case ARPHRD_INFINIBAND:
1196 break;
1197 default:
1198 DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
1199 arphrd_type);
1200 return EPROTONOSUPPORT;
1202 ether_type = ntohs(sll2->sll2_protocol);
1203 ll_hdr_len = SLL2_HDR_LEN;
1204 break;
1206 #endif /* DLT_LINUX_SLL2 */
1207 default:
1208 DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
1209 return EPROTONOSUPPORT;
1212 switch (ether_type) {
1213 case ETHERTYPE_IP:
1214 ret = tcp4_extract(buffer + ll_hdr_len,
1215 (size_t)pkthdr.caplen - ll_hdr_len,
1216 &src->ip,
1217 &dst->ip,
1218 ack_seq,
1219 seq,
1220 rst,
1221 window);
1222 break;
1223 case ETHERTYPE_IP6:
1224 ret = tcp6_extract(buffer + ll_hdr_len,
1225 (size_t)pkthdr.caplen - ll_hdr_len,
1226 &src->ip6,
1227 &dst->ip6,
1228 ack_seq,
1229 seq,
1230 rst,
1231 window);
1232 break;
1233 case ETHERTYPE_ARP:
1234 /* Silently ignore ARP packets */
1235 return EPROTO;
1236 default:
1237 DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
1238 return EPROTO;
1241 return ret;
1244 #endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */