drsuapi.idl: fix source_dsa spelling
[samba4-gss.git] / ctdb / common / system_socket.c
blobb4275b1fc6460a71e3a9ad7ee2c634b5c02c922d
1 /*
2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Marc Dequènes (Duck) 2009
7 Copyright (C) Volker Lendecke 2012
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "replace.h"
26 * Use BSD struct tcphdr field names for portability. Modern glibc
27 * makes them available by default via <netinet/tcp.h> but older glibc
28 * requires __FAVOR_BSD to be defined.
30 * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
31 * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
32 * set. Including "replace.h" above causes <features.h> to be
33 * indirectly included and this will not set __FAVOR_BSD because
34 * _GNU_SOURCE is set in Samba's "config.h" (which is included by
35 * "replace.h").
37 * Therefore, set __FAVOR_BSD by hand below.
39 #define __FAVOR_BSD 1
40 #include "system/network.h"
42 #ifdef HAVE_NETINET_IF_ETHER_H
43 #include <netinet/if_ether.h>
44 #endif
45 #ifdef HAVE_NETINET_IP6_H
46 #include <netinet/ip6.h>
47 #endif
48 #ifdef HAVE_NETINET_ICMP6_H
49 #include <netinet/icmp6.h>
50 #endif
51 #ifdef HAVE_LINUX_IF_PACKET_H
52 #include <linux/if_packet.h>
53 #endif
55 #ifndef ETHERTYPE_IP6
56 #define ETHERTYPE_IP6 0x86dd
57 #endif
59 #include <talloc.h>
61 #include "lib/util/debug.h"
62 #include "lib/util/blocking.h"
64 #include "protocol/protocol.h"
65 #include "protocol/protocol_util.h"
67 #include "common/logging.h"
68 #include "common/system_socket.h"
71 uint16 checksum for n bytes
73 static uint32_t uint16_checksum(uint8_t *data, size_t n)
75 uint32_t sum=0;
76 uint16_t value;
78 while (n>=2) {
79 memcpy(&value, data, 2);
80 sum += (uint32_t)ntohs(value);
81 data += 2;
82 n -= 2;
84 if (n == 1) {
85 sum += (uint32_t)ntohs(*data);
87 return sum;
90 struct ctdb_sys_local_ips_context {
91 struct ifaddrs *ifa;
94 static int ctdb_sys_local_ips_destructor(
95 struct ctdb_sys_local_ips_context *ips_ctx)
97 freeifaddrs(ips_ctx->ifa);
98 ips_ctx->ifa = NULL;
100 return 0;
103 int ctdb_sys_local_ips_init(TALLOC_CTX *ctx,
104 struct ctdb_sys_local_ips_context **ips_ctx)
106 struct ctdb_sys_local_ips_context *t = NULL;
107 int ret = 0;
109 t = talloc(ctx, struct ctdb_sys_local_ips_context);
110 if (t == NULL) {
111 return ENOMEM;
114 ret = getifaddrs(&t->ifa);
115 if (ret != 0) {
116 ret = errno;
117 talloc_free(t);
118 return ret;
121 talloc_set_destructor(t, ctdb_sys_local_ips_destructor);
122 *ips_ctx = t;
124 return ret;
127 bool ctdb_sys_local_ip_check(const struct ctdb_sys_local_ips_context *ips_ctx,
128 const ctdb_sock_addr *addr)
130 struct ifaddrs *ifa = NULL;
131 int ret;
133 for (ifa = ips_ctx->ifa; ifa != NULL; ifa = ifa->ifa_next) {
134 ctdb_sock_addr sock_addr;
135 bool match;
137 if (ifa->ifa_addr == NULL)
138 continue;
140 /* Ignore non-IPv4/IPv6 interfaces */
141 switch (ifa->ifa_addr->sa_family) {
142 case AF_INET:
143 case AF_INET6:
144 break;
145 default:
146 continue;
149 ret = ctdb_sock_addr_from_sockaddr(ifa->ifa_addr, &sock_addr);
150 if (ret != 0) {
151 return false;
154 match = ctdb_sock_addr_same_ip(&sock_addr, addr);
155 if (match) {
156 return true;
160 return false;
163 bool ctdb_sys_bind_ip_check(const ctdb_sock_addr *_addr)
165 int s;
166 int ret;
167 ctdb_sock_addr __addr = *_addr;
168 ctdb_sock_addr *addr = &__addr;
169 socklen_t addrlen = 0;
171 switch (addr->sa.sa_family) {
172 case AF_INET:
173 addr->ip.sin_port = 0;
174 addrlen = sizeof(struct sockaddr_in);
175 break;
176 case AF_INET6:
177 addr->ip6.sin6_port = 0;
178 addrlen = sizeof(struct sockaddr_in6);
179 break;
182 s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
183 if (s == -1) {
184 return false;
187 ret = bind(s, (struct sockaddr *)addr, addrlen);
189 close(s);
190 return ret == 0;
194 * See if the given IP is currently on an interface
196 bool ctdb_sys_have_ip(const ctdb_sock_addr *addr)
198 struct ctdb_sys_local_ips_context *ips_ctx = NULL;
199 bool have_ip;
200 int ret;
202 ret = ctdb_sys_local_ips_init(NULL, &ips_ctx);
203 if (ret != 0) {
204 DBG_DEBUG("Failed to get local addresses, depending on bind\n");
205 have_ip = ctdb_sys_bind_ip_check(addr);
206 return have_ip;
209 have_ip = ctdb_sys_local_ip_check(ips_ctx, addr);
210 talloc_free(ips_ctx);
212 return have_ip;
216 * simple TCP checksum - assumes data is multiple of 2 bytes long
218 static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
220 uint32_t sum = uint16_checksum(data, n);
221 uint16_t sum2;
223 sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
224 sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
225 sum += ip->ip_p + n;
226 sum = (sum & 0xFFFF) + (sum >> 16);
227 sum = (sum & 0xFFFF) + (sum >> 16);
228 sum2 = htons(sum);
229 sum2 = ~sum2;
230 if (sum2 == 0) {
231 return 0xFFFF;
233 return sum2;
236 static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
238 uint16_t phdr[3];
239 uint32_t sum = 0;
240 uint16_t sum2;
241 uint32_t len;
243 sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
244 sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
246 len = htonl(n);
247 phdr[0] = len & UINT16_MAX;
248 phdr[1] = (len >> 16) & UINT16_MAX;
249 /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
250 phdr[2] = htons(ip6->ip6_nxt);
251 sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
253 sum += uint16_checksum(data, n);
255 sum = (sum & 0xFFFF) + (sum >> 16);
256 sum = (sum & 0xFFFF) + (sum >> 16);
257 sum2 = htons(sum);
258 sum2 = ~sum2;
259 if (sum2 == 0) {
260 return 0xFFFF;
262 return sum2;
266 * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
269 #ifdef HAVE_PACKETSOCKET
272 * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
273 * packets
276 #define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
277 sizeof(struct ether_arp)
279 #define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
280 sizeof(struct ip6_hdr) + \
281 sizeof(struct nd_neighbor_advert) + \
282 sizeof(struct nd_opt_hdr) + \
283 sizeof(struct ether_addr)
285 #define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
287 #define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
289 static int arp_build(uint8_t *buffer,
290 size_t buflen,
291 const struct sockaddr_in *addr,
292 const struct ether_addr *hwaddr,
293 bool reply,
294 struct ether_addr **ether_dhost,
295 size_t *len)
297 size_t l = ARP_BUFFER_SIZE;
298 struct ether_header *eh;
299 struct ether_arp *ea;
300 struct arphdr *ah;
302 if (addr->sin_family != AF_INET) {
303 return EINVAL;
306 if (buflen < l) {
307 return EMSGSIZE;
310 memset(buffer, 0 , l);
312 eh = (struct ether_header *)buffer;
313 memset(eh->ether_dhost, 0xff, ETH_ALEN);
314 memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
315 eh->ether_type = htons(ETHERTYPE_ARP);
317 ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
318 ah = &ea->ea_hdr;
319 ah->ar_hrd = htons(ARPHRD_ETHER);
320 ah->ar_pro = htons(ETH_P_IP);
321 ah->ar_hln = ETH_ALEN;
322 ah->ar_pln = sizeof(ea->arp_spa);
324 if (! reply) {
325 ah->ar_op = htons(ARPOP_REQUEST);
326 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
327 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
328 memset(ea->arp_tha, 0, ETH_ALEN);
329 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
330 } else {
331 ah->ar_op = htons(ARPOP_REPLY);
332 memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
333 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
334 memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
335 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
338 *ether_dhost = (struct ether_addr *)eh->ether_dhost;
339 *len = l;
340 return 0;
343 static int ip6_na_build(uint8_t *buffer,
344 size_t buflen,
345 const struct sockaddr_in6 *addr,
346 const struct ether_addr *hwaddr,
347 struct ether_addr **ether_dhost,
348 size_t *len)
350 size_t l = IP6_NA_BUFFER_SIZE;
351 struct ether_header *eh;
352 struct ip6_hdr *ip6;
353 struct nd_neighbor_advert *nd_na;
354 struct nd_opt_hdr *nd_oh;
355 struct ether_addr *ea;
356 int ret;
358 if (addr->sin6_family != AF_INET6) {
359 return EINVAL;
362 if (buflen < l) {
363 return EMSGSIZE;
366 memset(buffer, 0 , l);
368 eh = (struct ether_header *)buffer;
370 * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
371 * section 7) - note memset 0 above!
373 eh->ether_dhost[0] = 0x33;
374 eh->ether_dhost[1] = 0x33;
375 eh->ether_dhost[5] = 0x01;
376 memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
377 eh->ether_type = htons(ETHERTYPE_IP6);
379 ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
380 ip6->ip6_vfc = 6 << 4;
381 ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
382 sizeof(struct nd_opt_hdr) +
383 ETH_ALEN);
384 ip6->ip6_nxt = IPPROTO_ICMPV6;
385 ip6->ip6_hlim = 255;
386 ip6->ip6_src = addr->sin6_addr;
387 /* all-nodes multicast */
389 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
390 if (ret != 1) {
391 return EIO;
394 nd_na = (struct nd_neighbor_advert *)(buffer +
395 sizeof(struct ether_header) +
396 sizeof(struct ip6_hdr));
397 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
398 nd_na->nd_na_code = 0;
399 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
400 nd_na->nd_na_target = addr->sin6_addr;
402 /* Option: Target link-layer address */
403 nd_oh = (struct nd_opt_hdr *)(buffer +
404 sizeof(struct ether_header) +
405 sizeof(struct ip6_hdr) +
406 sizeof(struct nd_neighbor_advert));
407 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
408 nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
410 ea = (struct ether_addr *)(buffer +
411 sizeof(struct ether_header) +
412 sizeof(struct ip6_hdr) +
413 sizeof(struct nd_neighbor_advert) +
414 sizeof(struct nd_opt_hdr));
415 memcpy(ea, hwaddr, ETH_ALEN);
417 nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
418 ntohs(ip6->ip6_plen),
419 ip6);
421 *ether_dhost = (struct ether_addr *)eh->ether_dhost;
422 *len = l;
423 return 0;
426 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
428 int s;
429 struct sockaddr_ll sall = {0};
430 struct ifreq if_hwaddr = {
431 .ifr_ifru = {
432 .ifru_flags = 0
435 uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
436 struct ifreq ifr = {
437 .ifr_ifru = {
438 .ifru_flags = 0
441 struct ether_addr *hwaddr = NULL;
442 struct ether_addr *ether_dhost = NULL;
443 size_t len = 0;
444 int ret = 0;
446 s = socket(AF_PACKET, SOCK_RAW, 0);
447 if (s == -1) {
448 ret = errno;
449 DBG_ERR("Failed to open raw socket\n");
450 return ret;
452 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
454 /* Find interface */
455 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
456 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
457 ret = errno;
458 DBG_ERR("Interface '%s' not found\n", iface);
459 goto fail;
462 /* Get MAC address */
463 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
464 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
465 if ( ret < 0 ) {
466 ret = errno;
467 DBG_ERR("ioctl failed\n");
468 goto fail;
470 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
471 ret = 0;
472 D_DEBUG("Ignoring loopback arp request\n");
473 goto fail;
475 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
476 ret = EINVAL;
477 DBG_ERR("Not an ethernet address family (0x%x)\n",
478 if_hwaddr.ifr_hwaddr.sa_family);
479 goto fail;;
482 /* Set up most of destination address structure */
483 sall.sll_family = AF_PACKET;
484 sall.sll_halen = sizeof(struct ether_addr);
485 sall.sll_protocol = htons(ETH_P_ALL);
486 sall.sll_ifindex = ifr.ifr_ifindex;
488 /* For clarity */
489 hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
491 switch (addr->ip.sin_family) {
492 case AF_INET:
493 /* Send gratuitous ARP */
494 ret = arp_build(buffer,
495 sizeof(buffer),
496 &addr->ip,
497 hwaddr,
498 false,
499 &ether_dhost,
500 &len);
501 if (ret != 0) {
502 DBG_ERR("Failed to build ARP request\n");
503 goto fail;
506 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
508 ret = sendto(s,
509 buffer,
510 len,
512 (struct sockaddr *)&sall,
513 sizeof(sall));
514 if (ret < 0 ) {
515 ret = errno;
516 DBG_ERR("Failed sendto\n");
517 goto fail;
520 /* Send unsolicited ARP reply */
521 ret = arp_build(buffer,
522 sizeof(buffer),
523 &addr->ip,
524 hwaddr,
525 true,
526 &ether_dhost,
527 &len);
528 if (ret != 0) {
529 DBG_ERR("Failed to build ARP reply\n");
530 goto fail;
533 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
535 ret = sendto(s,
536 buffer,
537 len,
539 (struct sockaddr *)&sall,
540 sizeof(sall));
541 if (ret < 0 ) {
542 ret = errno;
543 DBG_ERR("Failed sendto\n");
544 goto fail;
547 close(s);
548 break;
550 case AF_INET6:
551 ret = ip6_na_build(buffer,
552 sizeof(buffer),
553 &addr->ip6,
554 hwaddr,
555 &ether_dhost,
556 &len);
557 if (ret != 0) {
558 DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
559 goto fail;
562 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
564 ret = sendto(s,
565 buffer,
566 len,
568 (struct sockaddr *)&sall,
569 sizeof(sall));
570 if (ret < 0 ) {
571 ret = errno;
572 DBG_ERR("Failed sendto\n");
573 goto fail;
576 close(s);
577 break;
579 default:
580 ret = EINVAL;
581 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
582 addr->ip.sin_family);
583 goto fail;
586 return 0;
588 fail:
589 close(s);
590 return ret;
593 #else /* HAVE_PACKETSOCKET */
595 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
597 /* Not implemented */
598 return ENOSYS;
601 #endif /* HAVE_PACKETSOCKET */
604 #define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
605 sizeof(struct tcphdr)
607 #define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
608 sizeof(struct tcphdr)
610 static int tcp4_build(uint8_t *buf,
611 size_t buflen,
612 const struct sockaddr_in *src,
613 const struct sockaddr_in *dst,
614 uint32_t seq,
615 uint32_t ack,
616 int rst,
617 size_t *len)
619 size_t l = IP4_TCP_BUFFER_SIZE;
620 struct {
621 struct ip ip;
622 struct tcphdr tcp;
623 } *ip4pkt;
625 if (l != sizeof(*ip4pkt)) {
626 return EMSGSIZE;
629 if (buflen < l) {
630 return EMSGSIZE;
633 ip4pkt = (void *)buf;
634 memset(ip4pkt, 0, l);
636 ip4pkt->ip.ip_v = 4;
637 ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
638 ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
639 ip4pkt->ip.ip_ttl = 255;
640 ip4pkt->ip.ip_p = IPPROTO_TCP;
641 ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
642 ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
643 ip4pkt->ip.ip_sum = 0;
645 ip4pkt->tcp.th_sport = src->sin_port;
646 ip4pkt->tcp.th_dport = dst->sin_port;
647 ip4pkt->tcp.th_seq = seq;
648 ip4pkt->tcp.th_ack = ack;
649 ip4pkt->tcp.th_flags = 0;
650 ip4pkt->tcp.th_flags |= TH_ACK;
651 if (rst) {
652 ip4pkt->tcp.th_flags |= TH_RST;
654 ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
655 /* this makes it easier to spot in a sniffer */
656 ip4pkt->tcp.th_win = htons(1234);
657 ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
658 sizeof(ip4pkt->tcp),
659 &ip4pkt->ip);
661 *len = l;
662 return 0;
665 static int tcp6_build(uint8_t *buf,
666 size_t buflen,
667 const struct sockaddr_in6 *src,
668 const struct sockaddr_in6 *dst,
669 uint32_t seq,
670 uint32_t ack,
671 int rst,
672 size_t *len)
674 size_t l = IP6_TCP_BUFFER_SIZE;
675 struct {
676 struct ip6_hdr ip6;
677 struct tcphdr tcp;
678 } *ip6pkt;
680 if (l != sizeof(*ip6pkt)) {
681 return EMSGSIZE;
684 if (buflen < l) {
685 return EMSGSIZE;
688 ip6pkt = (void *)buf;
689 memset(ip6pkt, 0, l);
691 ip6pkt->ip6.ip6_vfc = 6 << 4;
692 ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
693 ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
694 ip6pkt->ip6.ip6_hlim = 64;
695 ip6pkt->ip6.ip6_src = src->sin6_addr;
696 ip6pkt->ip6.ip6_dst = dst->sin6_addr;
698 ip6pkt->tcp.th_sport = src->sin6_port;
699 ip6pkt->tcp.th_dport = dst->sin6_port;
700 ip6pkt->tcp.th_seq = seq;
701 ip6pkt->tcp.th_ack = ack;
702 ip6pkt->tcp.th_flags = 0;
703 ip6pkt->tcp.th_flags |= TH_ACK;
704 if (rst) {
705 ip6pkt->tcp.th_flags |= TH_RST;
707 ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
708 /* this makes it easier to spot in a sniffer */
709 ip6pkt->tcp.th_win = htons(1234);
710 ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
711 sizeof(ip6pkt->tcp),
712 &ip6pkt->ip6);
714 *len = l;
715 return 0;
719 * Send tcp segment from the specified IP/port to the specified
720 * destination IP/port.
722 * This is used to trigger the receiving host into sending its own ACK,
723 * which should trigger early detection of TCP reset by the client
724 * after IP takeover
726 * This can also be used to send RST segments (if rst is true) and also
727 * if correct seq and ack numbers are provided.
729 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
730 const ctdb_sock_addr *src,
731 uint32_t seq,
732 uint32_t ack,
733 int rst)
735 uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
736 size_t len = 0;
737 int ret;
738 int s;
739 uint32_t one = 1;
740 struct sockaddr_in6 tmpdest = { 0 };
741 int saved_errno;
743 switch (src->ip.sin_family) {
744 case AF_INET:
745 ret = tcp4_build(buf,
746 sizeof(buf),
747 &src->ip,
748 &dest->ip,
749 seq,
750 ack,
751 rst,
752 &len);
753 if (ret != 0) {
754 DBG_ERR("Failed to build TCP packet (%d)\n", ret);
755 return ret;
758 /* open a raw socket to send this segment from */
759 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
760 if (s == -1) {
761 DBG_ERR("Failed to open raw socket (%s)\n",
762 strerror(errno));
763 return -1;
766 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
767 if (ret != 0) {
768 DBG_ERR("Failed to setup IP headers (%s)\n",
769 strerror(errno));
770 close(s);
771 return -1;
774 ret = sendto(s,
775 buf,
776 len,
778 (const struct sockaddr *)&dest->ip,
779 sizeof(dest->ip));
780 saved_errno = errno;
781 close(s);
782 if (ret == -1) {
783 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
784 return -1;
786 if ((size_t)ret != len) {
787 DBG_ERR("Failed sendto - didn't send full packet\n");
788 return -1;
790 break;
792 case AF_INET6:
793 ret = tcp6_build(buf,
794 sizeof(buf),
795 &src->ip6,
796 &dest->ip6,
797 seq,
798 ack,
799 rst,
800 &len);
801 if (ret != 0) {
802 DBG_ERR("Failed to build TCP packet (%d)\n", ret);
803 return ret;
806 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
807 if (s == -1) {
808 DBG_ERR("Failed to open sending socket\n");
809 return -1;
813 * sendto() on an IPv6 raw socket requires the port to
814 * be either 0 or a protocol value
816 tmpdest = dest->ip6;
817 tmpdest.sin6_port = 0;
819 ret = sendto(s,
820 buf,
821 len,
823 (const struct sockaddr *)&tmpdest,
824 sizeof(tmpdest));
825 saved_errno = errno;
826 close(s);
827 if (ret == -1) {
828 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
829 return -1;
831 if ((size_t)ret != len) {
832 DBG_ERR("Failed sendto - didn't send full packet\n");
833 return -1;
835 break;
837 default:
838 DBG_ERR("Not an ipv4/v6 address\n");
839 return -1;
842 return 0;
845 static int tcp4_extract(const uint8_t *ip_pkt,
846 size_t pktlen,
847 struct sockaddr_in *src,
848 struct sockaddr_in *dst,
849 uint32_t *ack_seq,
850 uint32_t *seq,
851 int *rst,
852 uint16_t *window)
854 const struct ip *ip;
855 const struct tcphdr *tcp;
857 if (pktlen < sizeof(struct ip)) {
858 return EMSGSIZE;
861 ip = (const struct ip *)ip_pkt;
863 /* IPv4 only */
864 if (ip->ip_v != 4) {
865 return ENOMSG;
867 /* Don't look at fragments */
868 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
869 return ENOMSG;
871 /* TCP only */
872 if (ip->ip_p != IPPROTO_TCP) {
873 return ENOMSG;
876 /* Ensure there is enough of the packet to gather required fields */
877 if (pktlen <
878 (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
879 return EMSGSIZE;
882 tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
884 src->sin_family = AF_INET;
885 src->sin_addr.s_addr = ip->ip_src.s_addr;
886 src->sin_port = tcp->th_sport;
888 dst->sin_family = AF_INET;
889 dst->sin_addr.s_addr = ip->ip_dst.s_addr;
890 dst->sin_port = tcp->th_dport;
892 *ack_seq = tcp->th_ack;
893 *seq = tcp->th_seq;
894 if (window != NULL) {
895 *window = tcp->th_win;
897 if (rst != NULL) {
898 *rst = tcp->th_flags & TH_RST;
901 return 0;
904 static int tcp6_extract(const uint8_t *ip_pkt,
905 size_t pktlen,
906 struct sockaddr_in6 *src,
907 struct sockaddr_in6 *dst,
908 uint32_t *ack_seq,
909 uint32_t *seq,
910 int *rst,
911 uint16_t *window)
913 const struct ip6_hdr *ip6;
914 const struct tcphdr *tcp;
916 /* Ensure there is enough of the packet to gather required fields */
917 if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
918 return EMSGSIZE;
921 ip6 = (const struct ip6_hdr *)ip_pkt;
923 /* IPv6 only */
924 if ((ip6->ip6_vfc >> 4) != 6){
925 return ENOMSG;
928 /* TCP only */
929 if (ip6->ip6_nxt != IPPROTO_TCP) {
930 return ENOMSG;
933 tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
935 src->sin6_family = AF_INET6;
936 src->sin6_port = tcp->th_sport;
937 src->sin6_addr = ip6->ip6_src;
939 dst->sin6_family = AF_INET6;
940 dst->sin6_port = tcp->th_dport;
941 dst->sin6_addr = ip6->ip6_dst;
943 *ack_seq = tcp->th_ack;
944 *seq = tcp->th_seq;
945 if (window != NULL) {
946 *window = tcp->th_win;
948 if (rst != NULL) {
949 *rst = tcp->th_flags & TH_RST;
952 return 0;
956 * Packet capture
958 * If AF_PACKET is available then use a raw socket otherwise use pcap.
959 * wscript has checked to make sure that pcap is available if needed.
962 #if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
965 * This function is used to open a raw socket to capture from
967 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
969 int s, ret;
971 /* Open a socket to capture all traffic */
972 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
973 if (s == -1) {
974 DBG_ERR("Failed to open raw socket\n");
975 return -1;
978 DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
980 ret = set_blocking(s, false);
981 if (ret != 0) {
982 DBG_ERR("Failed to set socket non-blocking (%s)\n",
983 strerror(errno));
984 close(s);
985 return -1;
988 set_close_on_exec(s);
990 return s;
994 * This function is used to do any additional cleanup required when closing
995 * a capture socket.
996 * Note that the socket itself is closed automatically in the caller.
998 int ctdb_sys_close_capture_socket(void *private_data)
1000 return 0;
1005 * called when the raw socket becomes readable
1007 int ctdb_sys_read_tcp_packet(int s, void *private_data,
1008 ctdb_sock_addr *src,
1009 ctdb_sock_addr *dst,
1010 uint32_t *ack_seq,
1011 uint32_t *seq,
1012 int *rst,
1013 uint16_t *window)
1015 ssize_t nread;
1016 uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
1017 struct ether_header *eth;
1018 int ret;
1020 nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
1021 if (nread == -1) {
1022 return errno;
1024 if ((size_t)nread < sizeof(*eth)) {
1025 return EMSGSIZE;
1028 ZERO_STRUCTP(src);
1029 ZERO_STRUCTP(dst);
1031 /* Ethernet */
1032 eth = (struct ether_header *)pkt;
1034 /* we want either IPv4 or IPv6 */
1035 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
1036 ret = tcp4_extract(pkt + sizeof(struct ether_header),
1037 (size_t)nread - sizeof(struct ether_header),
1038 &src->ip,
1039 &dst->ip,
1040 ack_seq,
1041 seq,
1042 rst,
1043 window);
1044 return ret;
1046 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
1047 ret = tcp6_extract(pkt + sizeof(struct ether_header),
1048 (size_t)nread - sizeof(struct ether_header),
1049 &src->ip6,
1050 &dst->ip6,
1051 ack_seq,
1052 seq,
1053 rst,
1054 window);
1055 return ret;
1058 return ENOMSG;
1061 #else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
1063 #include <pcap.h>
1066 * Assume this exists if pcap.h exists - it has been around for a
1067 * while
1069 #include <pcap/sll.h>
1071 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
1073 char errbuf[PCAP_ERRBUF_SIZE];
1074 pcap_t *pt;
1075 int pcap_packet_type;
1076 const char *t = NULL;
1077 int fd;
1078 int ret;
1080 pt = pcap_create(iface, errbuf);
1081 if (pt == NULL) {
1082 DBG_ERR("Failed to open pcap capture device %s (%s)\n",
1083 iface,
1084 errbuf);
1085 return -1;
1088 * pcap isn't very clear about defaults...
1090 ret = pcap_set_snaplen(pt, 100);
1091 if (ret < 0) {
1092 DBG_ERR("Failed to set snaplen for pcap capture\n");
1093 goto fail;
1095 ret = pcap_set_promisc(pt, 0);
1096 if (ret < 0) {
1097 DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
1098 goto fail;
1100 ret = pcap_set_timeout(pt, 0);
1101 if (ret < 0) {
1102 DBG_ERR("Failed to set timeout for pcap capture\n");
1103 goto fail;
1105 #ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
1106 ret = pcap_set_immediate_mode(pt, 1);
1107 if (ret < 0) {
1108 DBG_ERR("Failed to set immediate mode for pcap capture\n");
1109 goto fail;
1111 #endif
1112 ret = pcap_activate(pt);
1113 if (ret < 0) {
1114 DBG_ERR("Failed to activate pcap capture\n");
1115 goto fail;
1118 pcap_packet_type = pcap_datalink(pt);
1119 switch (pcap_packet_type) {
1120 case DLT_EN10MB:
1121 t = "DLT_EN10MB";
1122 break;
1123 case DLT_LINUX_SLL:
1124 t = "DLT_LINUX_SLL";
1125 break;
1126 #ifdef DLT_LINUX_SLL2
1127 case DLT_LINUX_SLL2:
1128 t = "DLT_LINUX_SLL2";
1129 break;
1130 #endif /* DLT_LINUX_SLL2 */
1131 default:
1132 DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
1133 goto fail;
1136 fd = pcap_get_selectable_fd(pt);
1137 DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
1139 fd);
1141 *((pcap_t **)private_data) = pt;
1142 return fd;
1144 fail:
1145 pcap_close(pt);
1146 return -1;
1149 int ctdb_sys_close_capture_socket(void *private_data)
1151 pcap_t *pt = (pcap_t *)private_data;
1152 pcap_close(pt);
1153 return 0;
1156 int ctdb_sys_read_tcp_packet(int s,
1157 void *private_data,
1158 ctdb_sock_addr *src,
1159 ctdb_sock_addr *dst,
1160 uint32_t *ack_seq,
1161 uint32_t *seq,
1162 int *rst,
1163 uint16_t *window)
1165 int ret;
1166 struct pcap_pkthdr pkthdr;
1167 const u_char *buffer;
1168 pcap_t *pt = (pcap_t *)private_data;
1169 int pcap_packet_type;
1170 uint16_t ether_type;
1171 size_t ll_hdr_len;
1173 buffer=pcap_next(pt, &pkthdr);
1174 if (buffer==NULL) {
1175 return ENOMSG;
1178 ZERO_STRUCTP(src);
1179 ZERO_STRUCTP(dst);
1181 pcap_packet_type = pcap_datalink(pt);
1182 switch (pcap_packet_type) {
1183 case DLT_EN10MB: {
1184 const struct ether_header *eth =
1185 (const struct ether_header *)buffer;
1186 ether_type = ntohs(eth->ether_type);
1187 ll_hdr_len = sizeof(struct ether_header);
1188 break;
1190 case DLT_LINUX_SLL: {
1191 const struct sll_header *sll =
1192 (const struct sll_header *)buffer;
1193 uint16_t arphrd_type = ntohs(sll->sll_hatype);
1194 switch (arphrd_type) {
1195 case ARPHRD_ETHER:
1196 case ARPHRD_INFINIBAND:
1197 break;
1198 default:
1199 DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
1200 arphrd_type);
1201 return EPROTONOSUPPORT;
1203 ether_type = ntohs(sll->sll_protocol);
1204 ll_hdr_len = SLL_HDR_LEN;
1205 break;
1207 #ifdef DLT_LINUX_SLL2
1208 case DLT_LINUX_SLL2: {
1209 const struct sll2_header *sll2 =
1210 (const struct sll2_header *)buffer;
1211 uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
1212 switch (arphrd_type) {
1213 case ARPHRD_ETHER:
1214 case ARPHRD_INFINIBAND:
1215 break;
1216 default:
1217 DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
1218 arphrd_type);
1219 return EPROTONOSUPPORT;
1221 ether_type = ntohs(sll2->sll2_protocol);
1222 ll_hdr_len = SLL2_HDR_LEN;
1223 break;
1225 #endif /* DLT_LINUX_SLL2 */
1226 default:
1227 DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
1228 return EPROTONOSUPPORT;
1231 switch (ether_type) {
1232 case ETHERTYPE_IP:
1233 ret = tcp4_extract(buffer + ll_hdr_len,
1234 (size_t)pkthdr.caplen - ll_hdr_len,
1235 &src->ip,
1236 &dst->ip,
1237 ack_seq,
1238 seq,
1239 rst,
1240 window);
1241 break;
1242 case ETHERTYPE_IP6:
1243 ret = tcp6_extract(buffer + ll_hdr_len,
1244 (size_t)pkthdr.caplen - ll_hdr_len,
1245 &src->ip6,
1246 &dst->ip6,
1247 ack_seq,
1248 seq,
1249 rst,
1250 window);
1251 break;
1252 case ETHERTYPE_ARP:
1253 /* Silently ignore ARP packets */
1254 return EPROTO;
1255 default:
1256 DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
1257 return EPROTO;
1260 return ret;
1263 #endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */