ctdb-server: Use find_public_ip_vnn() in a couple of extra places
[samba4-gss.git] / ctdb / server / ctdb_takeover.c
blobbb021a29ebfd6c711d90b669a1f69390363e376c
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_file.h"
34 #include "lib/util/sys_rw.h"
35 #include "lib/util/util_process.h"
37 #include "protocol/protocol_util.h"
39 #include "ctdb_private.h"
40 #include "ctdb_client.h"
42 #include "common/reqid.h"
43 #include "common/system.h"
44 #include "common/system_socket.h"
45 #include "common/common.h"
46 #include "common/logging.h"
47 #include "common/path.h"
49 #include "conf/ctdb_config.h"
51 #include "server/ipalloc.h"
53 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
55 #define CTDB_ARP_INTERVAL 1
56 #define CTDB_ARP_REPEAT 3
58 struct ctdb_interface {
59 struct ctdb_interface *prev, *next;
60 const char *name;
61 bool link_up;
62 uint32_t references;
65 struct vnn_interface {
66 struct vnn_interface *prev, *next;
67 struct ctdb_interface *iface;
70 /* state associated with a public ip address */
71 struct ctdb_vnn {
72 struct ctdb_vnn *prev, *next;
74 struct ctdb_interface *iface;
75 struct vnn_interface *ifaces;
76 ctdb_sock_addr public_address;
77 uint8_t public_netmask_bits;
78 const char *name;
81 * The node number that is serving this public address - set
82 * to CTDB_UNKNOWN_PNN if no node is serving it
84 uint32_t pnn;
86 /* List of clients to tickle for this public address */
87 struct ctdb_tcp_array *tcp_array;
89 /* whether we need to update the other nodes with changes to our list
90 of connected clients */
91 bool tcp_update_needed;
93 /* a context to hang sending gratious arp events off */
94 TALLOC_CTX *takeover_ctx;
96 /* Set to true any time an update to this VNN is in flight.
97 This helps to avoid races. */
98 bool update_in_flight;
100 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
101 * address then this flag is set. It will be deleted in the
102 * release IP callback. */
103 bool delete_pending;
106 static const char *iface_string(const struct ctdb_interface *iface)
108 return (iface != NULL ? iface->name : "__none__");
111 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
113 return iface_string(vnn->iface);
116 static const char *ctdb_vnn_address_string(const struct ctdb_vnn *vnn)
118 return vnn->name;
121 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
122 const char *iface);
124 static struct ctdb_interface *
125 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
127 struct ctdb_interface *i;
129 if (strlen(iface) > CTDB_IFACE_SIZE) {
130 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
131 return NULL;
134 /* Verify that we don't have an entry for this ip yet */
135 i = ctdb_find_iface(ctdb, iface);
136 if (i != NULL) {
137 return i;
140 /* create a new structure for this interface */
141 i = talloc_zero(ctdb, struct ctdb_interface);
142 if (i == NULL) {
143 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
144 return NULL;
146 i->name = talloc_strdup(i, iface);
147 if (i->name == NULL) {
148 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
149 talloc_free(i);
150 return NULL;
153 i->link_up = true;
155 DLIST_ADD(ctdb->ifaces, i);
157 return i;
160 static bool vnn_has_interface(struct ctdb_vnn *vnn,
161 const struct ctdb_interface *iface)
163 struct vnn_interface *i;
165 for (i = vnn->ifaces; i != NULL; i = i->next) {
166 if (iface == i->iface) {
167 return true;
171 return false;
174 /* If any interfaces now have no possible IPs then delete them. This
175 * implementation is naive (i.e. simple) rather than clever
176 * (i.e. complex). Given that this is run on delip and that operation
177 * is rare, this doesn't need to be efficient - it needs to be
178 * foolproof. One alternative is reference counting, where the logic
179 * is distributed and can, therefore, be broken in multiple places.
180 * Another alternative is to build a red-black tree of interfaces that
181 * can have addresses (by walking ctdb->vnn once) and then walking
182 * ctdb->ifaces once and deleting those not in the tree. Let's go to
183 * one of those if the naive implementation causes problems... :-)
185 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
186 struct ctdb_vnn *vnn)
188 struct ctdb_interface *i, *next;
190 /* For each interface, check if there's an IP using it. */
191 for (i = ctdb->ifaces; i != NULL; i = next) {
192 struct ctdb_vnn *tv;
193 bool found;
194 next = i->next;
196 /* Only consider interfaces named in the given VNN. */
197 if (!vnn_has_interface(vnn, i)) {
198 continue;
201 /* Search for a vnn with this interface. */
202 found = false;
203 for (tv=ctdb->vnn; tv; tv=tv->next) {
204 if (vnn_has_interface(tv, i)) {
205 found = true;
206 break;
210 if (!found) {
211 /* None of the VNNs are using this interface. */
212 DLIST_REMOVE(ctdb->ifaces, i);
213 talloc_free(i);
219 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
220 const char *iface)
222 struct ctdb_interface *i;
224 for (i=ctdb->ifaces;i;i=i->next) {
225 if (strcmp(i->name, iface) == 0) {
226 return i;
230 return NULL;
233 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
234 struct ctdb_vnn *vnn)
236 struct vnn_interface *i;
237 struct ctdb_interface *cur = NULL;
238 struct ctdb_interface *best = NULL;
240 for (i = vnn->ifaces; i != NULL; i = i->next) {
242 cur = i->iface;
244 if (!cur->link_up) {
245 continue;
248 if (best == NULL) {
249 best = cur;
250 continue;
253 if (cur->references < best->references) {
254 best = cur;
255 continue;
259 return best;
262 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
263 struct ctdb_vnn *vnn)
265 struct ctdb_interface *best = NULL;
267 if (vnn->iface) {
268 DBG_INFO("public address '%s' still assigned to iface '%s'\n",
269 ctdb_vnn_address_string(vnn),
270 ctdb_vnn_iface_string(vnn));
271 return 0;
274 best = ctdb_vnn_best_iface(ctdb, vnn);
275 if (best == NULL) {
276 DBG_ERR("public address '%s' cannot assign to iface any iface\n",
277 ctdb_vnn_address_string(vnn));
278 return -1;
281 vnn->iface = best;
282 best->references++;
283 vnn->pnn = ctdb->pnn;
285 DBG_INFO("public address '%s' now assigned to iface '%s' refs[%d]\n",
286 ctdb_vnn_address_string(vnn),
287 ctdb_vnn_iface_string(vnn),
288 best->references);
289 return 0;
292 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
293 struct ctdb_vnn *vnn)
295 DBG_INFO("public address '%s' "
296 "now unassigned (old iface '%s' refs[%d])\n",
297 ctdb_vnn_address_string(vnn),
298 ctdb_vnn_iface_string(vnn),
299 vnn->iface != NULL ? vnn->iface->references : 0);
300 if (vnn->iface) {
301 vnn->iface->references--;
303 vnn->iface = NULL;
304 if (vnn->pnn == ctdb->pnn) {
305 vnn->pnn = CTDB_UNKNOWN_PNN;
309 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
310 struct ctdb_vnn *vnn)
312 uint32_t flags;
313 struct vnn_interface *i;
315 /* Nodes that are not RUNNING can not host IPs */
316 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
317 return false;
320 flags = ctdb->nodes[ctdb->pnn]->flags;
321 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
322 return false;
325 if (vnn->delete_pending) {
326 return false;
329 if (vnn->iface && vnn->iface->link_up) {
330 return true;
333 for (i = vnn->ifaces; i != NULL; i = i->next) {
334 if (i->iface->link_up) {
335 return true;
339 return false;
342 struct ctdb_takeover_arp {
343 struct ctdb_context *ctdb;
344 uint32_t count;
345 ctdb_sock_addr addr;
346 struct ctdb_tcp_array *tcparray;
347 struct ctdb_vnn *vnn;
352 lists of tcp endpoints
354 struct ctdb_tcp_list {
355 struct ctdb_tcp_list *prev, *next;
356 struct ctdb_client *client;
357 struct ctdb_connection connection;
361 send a gratuitous arp
363 static void ctdb_control_send_arp(struct tevent_context *ev,
364 struct tevent_timer *te,
365 struct timeval t, void *private_data)
367 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
368 struct ctdb_takeover_arp);
369 int ret;
370 struct ctdb_tcp_array *tcparray;
371 const char *iface;
373 /* IP address might have been released between sends */
374 if (arp->vnn->iface == NULL) {
375 DBG_INFO("Cancelling ARP send for released IP %s\n",
376 ctdb_vnn_address_string(arp->vnn));
377 talloc_free(arp);
378 return;
381 iface = ctdb_vnn_iface_string(arp->vnn);
382 ret = ctdb_sys_send_arp(&arp->addr, iface);
383 if (ret != 0) {
384 DBG_ERR("Failed to send ARP on interface %s: %s\n",
385 iface, strerror(ret));
388 tcparray = arp->tcparray;
389 if (tcparray) {
390 unsigned int i;
392 for (i=0;i<tcparray->num;i++) {
393 struct ctdb_connection *tcon;
394 char buf[128];
396 tcon = &tcparray->connections[i];
397 ret = ctdb_connection_to_buf(buf,
398 sizeof(buf),
399 tcon,
400 false,
401 " -> ");
402 if (ret != 0) {
403 strlcpy(buf, "UNKNOWN", sizeof(buf));
405 D_INFO("Send TCP tickle ACK: %s\n", buf);
406 ret = ctdb_sys_send_tcp(
407 &tcon->src,
408 &tcon->dst,
409 0, 0, 0);
410 if (ret != 0) {
411 DBG_ERR("Failed to send TCP tickle ACK: %s\n",
412 buf);
417 arp->count++;
419 if (arp->count == CTDB_ARP_REPEAT) {
420 talloc_free(arp);
421 return;
424 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
425 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
426 ctdb_control_send_arp, arp);
429 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
430 struct ctdb_vnn *vnn)
432 struct ctdb_takeover_arp *arp;
433 struct ctdb_tcp_array *tcparray;
435 if (!vnn->takeover_ctx) {
436 vnn->takeover_ctx = talloc_new(vnn);
437 if (!vnn->takeover_ctx) {
438 return -1;
442 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
443 if (!arp) {
444 return -1;
447 arp->ctdb = ctdb;
448 arp->addr = vnn->public_address;
449 arp->vnn = vnn;
451 tcparray = vnn->tcp_array;
452 if (tcparray) {
453 /* add all of the known tcp connections for this IP to the
454 list of tcp connections to send tickle acks for */
455 arp->tcparray = talloc_steal(arp, tcparray);
457 vnn->tcp_array = NULL;
458 vnn->tcp_update_needed = true;
461 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
462 timeval_zero(), ctdb_control_send_arp, arp);
464 return 0;
467 struct ctdb_do_takeip_state {
468 struct ctdb_req_control_old *c;
469 struct ctdb_vnn *vnn;
473 called when takeip event finishes
475 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
476 void *private_data)
478 struct ctdb_do_takeip_state *state =
479 talloc_get_type(private_data, struct ctdb_do_takeip_state);
480 int32_t ret;
481 TDB_DATA data;
483 if (status != 0) {
484 if (status == -ETIMEDOUT) {
485 ctdb_ban_self(ctdb);
487 DBG_ERR("Failed to takeover IP %s on interface %s\n",
488 ctdb_vnn_address_string(state->vnn),
489 ctdb_vnn_iface_string(state->vnn));
490 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
492 talloc_free(state);
493 return;
496 if (ctdb->do_checkpublicip) {
498 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
499 if (ret != 0) {
500 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
501 talloc_free(state);
502 return;
507 data.dptr = (uint8_t *)discard_const(
508 ctdb_vnn_address_string(state->vnn));
509 data.dsize = strlen((char *)data.dptr) + 1;
510 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
512 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
515 /* the control succeeded */
516 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
517 talloc_free(state);
518 return;
521 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
523 state->vnn->update_in_flight = false;
524 return 0;
528 take over an ip address
530 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
531 struct ctdb_req_control_old *c,
532 struct ctdb_vnn *vnn)
534 int ret;
535 struct ctdb_do_takeip_state *state;
537 if (vnn->update_in_flight) {
538 D_NOTICE("Takeover of IP %s/%u rejected "
539 "update for this IP already in flight\n",
540 ctdb_vnn_address_string(vnn),
541 vnn->public_netmask_bits);
542 return -1;
545 ret = ctdb_vnn_assign_iface(ctdb, vnn);
546 if (ret != 0) {
547 D_ERR("Takeover of IP %s/%u failed to "
548 "assign a usable interface\n",
549 ctdb_vnn_address_string(vnn),
550 vnn->public_netmask_bits);
551 return -1;
554 state = talloc(vnn, struct ctdb_do_takeip_state);
555 CTDB_NO_MEMORY(ctdb, state);
557 state->c = NULL;
558 state->vnn = vnn;
560 vnn->update_in_flight = true;
561 talloc_set_destructor(state, ctdb_takeip_destructor);
563 D_NOTICE("Takeover of IP %s/%u on interface %s\n",
564 ctdb_vnn_address_string(vnn),
565 vnn->public_netmask_bits,
566 ctdb_vnn_iface_string(vnn));
568 ret = ctdb_event_script_callback(ctdb,
569 state,
570 ctdb_do_takeip_callback,
571 state,
572 CTDB_EVENT_TAKE_IP,
573 "%s %s %u",
574 ctdb_vnn_iface_string(vnn),
575 ctdb_vnn_address_string(vnn),
576 vnn->public_netmask_bits);
578 if (ret != 0) {
579 DBG_ERR("Failed to takeover IP %s on interface %s\n",
580 ctdb_vnn_address_string(vnn),
581 ctdb_vnn_iface_string(vnn));
582 talloc_free(state);
583 return -1;
586 state->c = talloc_steal(ctdb, c);
587 return 0;
590 struct ctdb_do_updateip_state {
591 struct ctdb_req_control_old *c;
592 struct ctdb_interface *old;
593 struct ctdb_vnn *vnn;
597 called when updateip event finishes
599 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
600 void *private_data)
602 struct ctdb_do_updateip_state *state =
603 talloc_get_type(private_data, struct ctdb_do_updateip_state);
605 if (status != 0) {
606 if (status == -ETIMEDOUT) {
607 ctdb_ban_self(ctdb);
609 D_ERR("Failed update of IP %s from interface %s to %s\n",
610 ctdb_vnn_address_string(state->vnn),
611 iface_string(state->old),
612 ctdb_vnn_iface_string(state->vnn));
615 * All we can do is reset the old interface
616 * and let the next run fix it
618 ctdb_vnn_unassign_iface(ctdb, state->vnn);
619 state->vnn->iface = state->old;
620 state->vnn->iface->references++;
622 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
623 talloc_free(state);
624 return;
627 /* the control succeeded */
628 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
629 talloc_free(state);
630 return;
633 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
635 state->vnn->update_in_flight = false;
636 return 0;
640 update (move) an ip address
642 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
643 struct ctdb_req_control_old *c,
644 struct ctdb_vnn *vnn)
646 int ret;
647 struct ctdb_do_updateip_state *state;
648 struct ctdb_interface *old = vnn->iface;
649 const char *old_name = iface_string(old);
650 const char *new_name;
652 if (vnn->update_in_flight) {
653 D_NOTICE("Update of IP %s/%u rejected "
654 "update for this IP already in flight\n",
655 ctdb_vnn_address_string(vnn),
656 vnn->public_netmask_bits);
657 return -1;
660 ctdb_vnn_unassign_iface(ctdb, vnn);
661 ret = ctdb_vnn_assign_iface(ctdb, vnn);
662 if (ret != 0) {
663 D_ERR("Update of IP %s/%u failed to "
664 "assign a usable interface (old iface '%s')\n",
665 ctdb_vnn_address_string(vnn),
666 vnn->public_netmask_bits,
667 old_name);
668 return -1;
671 if (old == vnn->iface) {
672 /* A benign update from one interface onto itself.
673 * no need to run the eventscripts in this case, just return
674 * success.
676 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
677 return 0;
680 state = talloc(vnn, struct ctdb_do_updateip_state);
681 CTDB_NO_MEMORY(ctdb, state);
683 state->c = NULL;
684 state->old = old;
685 state->vnn = vnn;
687 vnn->update_in_flight = true;
688 talloc_set_destructor(state, ctdb_updateip_destructor);
690 new_name = ctdb_vnn_iface_string(vnn);
691 D_NOTICE("Update of IP %s/%u from "
692 "interface %s to %s\n",
693 ctdb_vnn_address_string(vnn),
694 vnn->public_netmask_bits,
695 old_name,
696 new_name);
698 ret = ctdb_event_script_callback(ctdb,
699 state,
700 ctdb_do_updateip_callback,
701 state,
702 CTDB_EVENT_UPDATE_IP,
703 "%s %s %s %u",
704 old_name,
705 new_name,
706 ctdb_vnn_address_string(vnn),
707 vnn->public_netmask_bits);
708 if (ret != 0) {
709 D_ERR("Failed update IP %s from interface %s to %s\n",
710 ctdb_vnn_address_string(vnn),
711 old_name,
712 new_name);
713 talloc_free(state);
714 return -1;
717 state->c = talloc_steal(ctdb, c);
718 return 0;
722 * Find vnn that has public IP addr, return NULL if not found
724 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb,
725 ctdb_sock_addr *addr)
727 struct ctdb_vnn *vnn = NULL;
729 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
730 if (ctdb_same_ip(&vnn->public_address, addr)) {
731 return vnn;
735 return NULL;
739 take over an ip address
741 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
742 struct ctdb_req_control_old *c,
743 TDB_DATA indata,
744 bool *async_reply)
746 int ret;
747 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
748 struct ctdb_vnn *vnn;
749 bool have_ip = false;
750 bool do_updateip = false;
751 bool do_takeip = false;
752 struct ctdb_interface *best_iface = NULL;
754 if (pip->pnn != ctdb->pnn) {
755 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
756 "with pnn %d, but we're node %d\n",
757 ctdb_addr_to_str(&pip->addr),
758 pip->pnn, ctdb->pnn));
759 return -1;
762 /* update out vnn list */
763 vnn = find_public_ip_vnn(ctdb, &pip->addr);
764 if (vnn == NULL) {
765 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
766 ctdb_addr_to_str(&pip->addr)));
767 return 0;
770 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
771 have_ip = ctdb_sys_have_ip(&pip->addr);
773 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
774 if (best_iface == NULL) {
775 D_ERR("takeoverip of IP %s/%u failed to find"
776 "a usable interface (old %s, have_ip %d)\n",
777 ctdb_vnn_address_string(vnn),
778 vnn->public_netmask_bits,
779 ctdb_vnn_iface_string(vnn),
780 have_ip);
781 return -1;
784 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
785 DBG_ERR("takeoverip of IP %s is known to the kernel, "
786 "and we have it on iface[%s], "
787 "but it was assigned to node %d"
788 "and we are node %d, banning ourself\n",
789 ctdb_vnn_address_string(vnn),
790 ctdb_vnn_iface_string(vnn),
791 vnn->pnn,
792 ctdb->pnn);
793 ctdb_ban_self(ctdb);
794 return -1;
797 if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
798 /* This will cause connections to be reset and
799 * reestablished. However, this is a very unusual
800 * situation and doing this will completely repair the
801 * inconsistency in the VNN.
803 DBG_WARNING(
804 "Doing updateip for IP %s already on an interface\n",
805 ctdb_vnn_address_string(vnn));
806 do_updateip = true;
809 if (vnn->iface) {
810 if (vnn->iface != best_iface) {
811 if (!vnn->iface->link_up) {
812 do_updateip = true;
813 } else if (vnn->iface->references > (best_iface->references + 1)) {
814 /* only move when the rebalance gains something */
815 do_updateip = true;
820 if (!have_ip) {
821 if (do_updateip) {
822 ctdb_vnn_unassign_iface(ctdb, vnn);
823 do_updateip = false;
825 do_takeip = true;
828 if (do_takeip) {
829 ret = ctdb_do_takeip(ctdb, c, vnn);
830 if (ret != 0) {
831 return -1;
833 } else if (do_updateip) {
834 ret = ctdb_do_updateip(ctdb, c, vnn);
835 if (ret != 0) {
836 return -1;
838 } else {
840 * The interface is up and the kernel known the ip
841 * => do nothing
843 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
844 ctdb_addr_to_str(&pip->addr),
845 vnn->public_netmask_bits,
846 ctdb_vnn_iface_string(vnn)));
847 return 0;
850 /* tell ctdb_control.c that we will be replying asynchronously */
851 *async_reply = true;
853 return 0;
856 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
858 DLIST_REMOVE(ctdb->vnn, vnn);
859 ctdb_vnn_unassign_iface(ctdb, vnn);
860 ctdb_remove_orphaned_ifaces(ctdb, vnn);
861 talloc_free(vnn);
864 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
865 struct ctdb_vnn *vnn,
866 ctdb_sock_addr *addr)
868 TDB_DATA data;
870 /* Send a message to all clients of this node telling them
871 * that the cluster has been reconfigured and they should
872 * close any connections on this IP address
874 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
875 data.dsize = strlen((char *)data.dptr)+1;
876 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
877 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
879 ctdb_vnn_unassign_iface(ctdb, vnn);
881 /* Process the IP if it has been marked for deletion */
882 if (vnn->delete_pending) {
883 do_delete_ip(ctdb, vnn);
884 return NULL;
887 return vnn;
890 struct release_ip_callback_state {
891 struct ctdb_req_control_old *c;
892 ctdb_sock_addr *addr;
893 struct ctdb_vnn *vnn;
894 uint32_t target_pnn;
898 called when releaseip event finishes
900 static void release_ip_callback(struct ctdb_context *ctdb, int status,
901 void *private_data)
903 struct release_ip_callback_state *state =
904 talloc_get_type(private_data, struct release_ip_callback_state);
906 if (status == -ETIMEDOUT) {
907 ctdb_ban_self(ctdb);
910 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
911 if (ctdb_sys_have_ip(state->addr)) {
912 DEBUG(DEBUG_ERR,
913 ("IP %s still hosted during release IP callback, failing\n",
914 ctdb_addr_to_str(state->addr)));
915 ctdb_request_control_reply(ctdb, state->c,
916 NULL, -1, NULL);
917 talloc_free(state);
918 return;
922 state->vnn->pnn = state->target_pnn;
923 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
925 /* the control succeeded */
926 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
927 talloc_free(state);
930 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
932 if (state->vnn != NULL) {
933 state->vnn->update_in_flight = false;
935 return 0;
939 release an ip address
941 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
942 struct ctdb_req_control_old *c,
943 TDB_DATA indata,
944 bool *async_reply)
946 int ret;
947 struct release_ip_callback_state *state;
948 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
949 struct ctdb_vnn *vnn;
950 const char *iface;
952 /* update our vnn list */
953 vnn = find_public_ip_vnn(ctdb, &pip->addr);
954 if (vnn == NULL) {
955 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
956 ctdb_addr_to_str(&pip->addr)));
957 return 0;
960 /* stop any previous arps */
961 talloc_free(vnn->takeover_ctx);
962 vnn->takeover_ctx = NULL;
964 /* RELEASE_IP controls are sent to all nodes that should not
965 * be hosting a particular IP. This serves 2 purposes. The
966 * first is to help resolve any inconsistencies. If a node
967 * does unexpectedly host an IP then it will be released. The
968 * 2nd is to use a "redundant release" to tell non-takeover
969 * nodes where an IP is moving to. This is how "ctdb ip" can
970 * report the (likely) location of an IP by only asking the
971 * local node. Redundant releases need to update the PNN but
972 * are otherwise ignored.
974 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
975 if (!ctdb_sys_have_ip(&pip->addr)) {
976 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits,
979 ctdb_vnn_iface_string(vnn)));
980 vnn->pnn = pip->pnn;
981 ctdb_vnn_unassign_iface(ctdb, vnn);
982 return 0;
984 } else {
985 if (vnn->iface == NULL) {
986 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
987 ctdb_addr_to_str(&pip->addr),
988 vnn->public_netmask_bits));
989 vnn->pnn = pip->pnn;
990 return 0;
994 /* There is a potential race between take_ip and us because we
995 * update the VNN via a callback that run when the
996 * eventscripts have been run. Avoid the race by allowing one
997 * update to be in flight at a time.
999 if (vnn->update_in_flight) {
1000 D_NOTICE("Release of IP %s/%u rejected "
1001 "update for this IP already in flight\n",
1002 ctdb_vnn_address_string(vnn),
1003 vnn->public_netmask_bits);
1004 return -1;
1007 iface = ctdb_vnn_iface_string(vnn);
1009 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
1010 ctdb_addr_to_str(&pip->addr),
1011 vnn->public_netmask_bits,
1012 iface,
1013 pip->pnn));
1015 state = talloc(ctdb, struct release_ip_callback_state);
1016 if (state == NULL) {
1017 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1018 __FILE__, __LINE__);
1019 return -1;
1022 state->c = NULL;
1023 state->addr = talloc(state, ctdb_sock_addr);
1024 if (state->addr == NULL) {
1025 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1026 __FILE__, __LINE__);
1027 talloc_free(state);
1028 return -1;
1030 *state->addr = pip->addr;
1031 state->target_pnn = pip->pnn;
1032 state->vnn = vnn;
1034 vnn->update_in_flight = true;
1035 talloc_set_destructor(state, ctdb_releaseip_destructor);
1037 ret = ctdb_event_script_callback(ctdb,
1038 state, release_ip_callback, state,
1039 CTDB_EVENT_RELEASE_IP,
1040 "%s %s %u",
1041 iface,
1042 ctdb_addr_to_str(&pip->addr),
1043 vnn->public_netmask_bits);
1044 if (ret != 0) {
1045 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1046 ctdb_addr_to_str(&pip->addr),
1047 ctdb_vnn_iface_string(vnn)));
1048 talloc_free(state);
1049 return -1;
1052 /* tell the control that we will be reply asynchronously */
1053 *async_reply = true;
1054 state->c = talloc_steal(state, c);
1055 return 0;
1058 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1059 ctdb_sock_addr *addr,
1060 unsigned mask, const char *ifaces)
1062 struct ctdb_vnn *vnn;
1063 char *tmp;
1064 const char *iface;
1066 /* Verify that we don't have an entry for this IP yet */
1067 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1068 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1069 D_ERR("Duplicate public IP address '%s'\n",
1070 ctdb_addr_to_str(addr));
1071 return -1;
1075 /* Create a new VNN structure for this IP address */
1076 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1077 if (vnn == NULL) {
1078 DBG_ERR("Memory allocation error\n");
1079 return -1;
1082 vnn->name = ctdb_sock_addr_to_string(vnn, addr, false);
1083 if (vnn->name == NULL) {
1084 DBG_ERR("Memory allocation error\n");
1085 talloc_free(vnn);
1086 return -1;
1089 tmp = talloc_strdup(vnn, ifaces);
1090 if (tmp == NULL) {
1091 DBG_ERR("Memory allocation error\n");
1092 talloc_free(vnn);
1093 return -1;
1095 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1096 struct vnn_interface *vnn_iface;
1097 struct ctdb_interface *i;
1099 if (!ctdb_sys_check_iface_exists(iface)) {
1100 D_ERR("Unknown interface %s for public address %s\n",
1101 iface,
1102 ctdb_vnn_address_string(vnn));
1103 talloc_free(vnn);
1104 return -1;
1107 i = ctdb_add_local_iface(ctdb, iface);
1108 if (i == NULL) {
1109 D_ERR("Failed to add interface '%s' "
1110 "for public address %s\n",
1111 iface,
1112 ctdb_vnn_address_string(vnn));
1113 talloc_free(vnn);
1114 return -1;
1117 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1118 if (vnn_iface == NULL) {
1119 DBG_ERR("Memory allocation error\n");
1120 talloc_free(vnn);
1121 return -1;
1124 vnn_iface->iface = i;
1125 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1127 talloc_free(tmp);
1128 vnn->public_address = *addr;
1129 vnn->public_netmask_bits = mask;
1130 vnn->pnn = -1;
1132 DLIST_ADD(ctdb->vnn, vnn);
1134 return 0;
1138 setup the public address lists from a file
1140 int ctdb_set_public_addresses(struct ctdb_context *ctdb)
1142 bool ok;
1143 char **lines;
1144 int nlines;
1145 int i;
1147 /* If no public addresses file given then try the default */
1148 if (ctdb->public_addresses_file == NULL) {
1149 ctdb->public_addresses_file = path_etcdir_append(
1150 ctdb, "public_addresses");
1151 if (ctdb->public_addresses_file == NULL) {
1152 DBG_ERR("Out of memory\n");
1153 return -1;
1157 /* If the file doesn't exist then warn and do nothing */
1158 ok = file_exist(ctdb->public_addresses_file);
1159 if (!ok) {
1160 D_WARNING("Not loading public addresses, no file %s\n",
1161 ctdb->public_addresses_file);
1162 return 0;
1165 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1166 if (lines == NULL) {
1167 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1168 return -1;
1170 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1171 nlines--;
1174 for (i=0;i<nlines;i++) {
1175 unsigned mask;
1176 ctdb_sock_addr addr;
1177 const char *addrstr;
1178 const char *ifaces;
1179 char *tok, *line;
1180 int ret;
1182 line = lines[i];
1183 while ((*line == ' ') || (*line == '\t')) {
1184 line++;
1186 if (*line == '#') {
1187 continue;
1189 if (strcmp(line, "") == 0) {
1190 continue;
1192 tok = strtok(line, " \t");
1193 addrstr = tok;
1195 tok = strtok(NULL, " \t");
1196 if (tok == NULL) {
1197 D_ERR("No interface specified at line %u "
1198 "of public addresses file\n", i+1);
1199 talloc_free(lines);
1200 return -1;
1202 ifaces = tok;
1204 if (addrstr == NULL) {
1205 D_ERR("Badly formed line %u in public address list\n",
1206 i+1);
1207 talloc_free(lines);
1208 return -1;
1211 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1212 if (ret != 0) {
1213 D_ERR("Badly formed line %u in public address list\n",
1214 i+1);
1215 talloc_free(lines);
1216 return -1;
1219 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
1220 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1221 talloc_free(lines);
1222 return -1;
1227 D_NOTICE("Loaded public addresses from %s\n",
1228 ctdb->public_addresses_file);
1230 talloc_free(lines);
1231 return 0;
1235 destroy a ctdb_tcp_list structure
1237 static int ctdb_tcp_list_destructor(struct ctdb_tcp_list *tcp)
1239 struct ctdb_client *client = tcp->client;
1240 struct ctdb_connection *conn = &tcp->connection;
1241 char conn_str[132] = { 0, };
1242 int ret;
1244 ret = ctdb_connection_to_buf(conn_str,
1245 sizeof(conn_str),
1246 conn,
1247 false,
1248 " -> ");
1249 if (ret != 0) {
1250 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1253 D_DEBUG("removing client TCP connection %s "
1254 "(client_id %u pid %d)\n",
1255 conn_str, client->client_id, client->pid);
1257 DLIST_REMOVE(client->tcp_list, tcp);
1260 * We don't call ctdb_remove_connection(vnn, conn) here
1261 * as we want the caller to decide if it's called
1262 * directly (local only) or indirectly via a
1263 * CTDB_CONTROL_TCP_REMOVE broadcast
1266 return 0;
1270 called by a client to inform us of a TCP connection that it is managing
1271 that should tickled with an ACK when IP takeover is done
1273 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1274 TDB_DATA indata)
1276 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1277 struct ctdb_connection *tcp_sock = NULL;
1278 struct ctdb_tcp_list *tcp;
1279 struct ctdb_connection t;
1280 int ret;
1281 TDB_DATA data;
1282 struct ctdb_vnn *vnn;
1283 char conn_str[132] = { 0, };
1285 /* If we don't have public IPs, tickles are useless */
1286 if (ctdb->vnn == NULL) {
1287 return 0;
1290 tcp_sock = (struct ctdb_connection *)indata.dptr;
1292 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1293 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1295 ret = ctdb_connection_to_buf(conn_str,
1296 sizeof(conn_str),
1297 tcp_sock,
1298 false,
1299 " -> ");
1300 if (ret != 0) {
1301 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1304 vnn = find_public_ip_vnn(ctdb, &tcp_sock->dst);
1305 if (vnn == NULL) {
1306 D_ERR("Could not register TCP connection %s - "
1307 "not a public address (client_id %u pid %u)\n",
1308 conn_str, client_id, client->pid);
1309 return 0;
1312 if (vnn->pnn != ctdb->pnn) {
1313 D_ERR("Attempt to register tcp client for IP %s we don't hold - "
1314 "failing (client_id %u pid %u)\n",
1315 ctdb_addr_to_str(&tcp_sock->dst),
1316 client_id, client->pid);
1317 /* failing this call will tell smbd to die */
1318 return -1;
1321 tcp = talloc(client, struct ctdb_tcp_list);
1322 CTDB_NO_MEMORY(ctdb, tcp);
1323 tcp->client = client;
1325 tcp->connection.src = tcp_sock->src;
1326 tcp->connection.dst = tcp_sock->dst;
1328 DLIST_ADD(client->tcp_list, tcp);
1329 talloc_set_destructor(tcp, ctdb_tcp_list_destructor);
1331 t.src = tcp_sock->src;
1332 t.dst = tcp_sock->dst;
1334 data.dptr = (uint8_t *)&t;
1335 data.dsize = sizeof(t);
1337 D_INFO("Registered TCP connection %s (client_id %u pid %u)\n",
1338 conn_str, client_id, client->pid);
1340 /* tell all nodes about this tcp connection */
1341 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1342 CTDB_CONTROL_TCP_ADD,
1343 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1344 if (ret != 0) {
1345 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1346 return -1;
1349 return 0;
1352 static bool ctdb_client_remove_tcp(struct ctdb_client *client,
1353 const struct ctdb_connection *conn)
1355 struct ctdb_tcp_list *tcp = NULL;
1356 struct ctdb_tcp_list *tcp_next = NULL;
1357 bool found = false;
1359 for (tcp = client->tcp_list; tcp != NULL; tcp = tcp_next) {
1360 bool same;
1362 tcp_next = tcp->next;
1364 same = ctdb_connection_same(conn, &tcp->connection);
1365 if (!same) {
1366 continue;
1369 TALLOC_FREE(tcp);
1370 found = true;
1373 return found;
1377 called by a client to inform us of a TCP connection that was disconnected
1379 int32_t ctdb_control_tcp_client_disconnected(struct ctdb_context *ctdb,
1380 uint32_t client_id,
1381 TDB_DATA indata)
1383 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1384 struct ctdb_connection *tcp_sock = NULL;
1385 int ret;
1386 TDB_DATA data;
1387 char conn_str[132] = { 0, };
1388 bool found = false;
1390 tcp_sock = (struct ctdb_connection *)indata.dptr;
1392 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1393 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1395 ret = ctdb_connection_to_buf(conn_str,
1396 sizeof(conn_str),
1397 tcp_sock,
1398 false,
1399 " -> ");
1400 if (ret != 0) {
1401 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1404 found = ctdb_client_remove_tcp(client, tcp_sock);
1405 if (!found) {
1406 DBG_DEBUG("TCP connection %s not found "
1407 "(client_id %u pid %u).\n",
1408 conn_str, client_id, client->pid);
1409 return 0;
1412 D_INFO("deregistered TCP connection %s "
1413 "(client_id %u pid %u)\n",
1414 conn_str, client_id, client->pid);
1416 data.dptr = (uint8_t *)tcp_sock;
1417 data.dsize = sizeof(*tcp_sock);
1419 /* tell all nodes about this tcp connection is gone */
1420 ret = ctdb_daemon_send_control(ctdb,
1421 CTDB_BROADCAST_CONNECTED,
1423 CTDB_CONTROL_TCP_REMOVE,
1425 CTDB_CTRL_FLAG_NOREPLY,
1426 data,
1427 NULL,
1428 NULL);
1429 if (ret != 0) {
1430 DBG_ERR("Failed to send CTDB_CONTROL_TCP_REMOVE: %s\n",
1431 conn_str);
1432 return -1;
1435 return 0;
1439 called by a client to inform us of a TCP connection was passed to a different
1440 "client" (typically with multichannel to another smbd process).
1442 int32_t ctdb_control_tcp_client_passed(struct ctdb_context *ctdb,
1443 uint32_t client_id,
1444 TDB_DATA indata)
1446 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1447 struct ctdb_connection *tcp_sock = NULL;
1448 int ret;
1449 char conn_str[132] = { 0, };
1450 bool found = false;
1452 tcp_sock = (struct ctdb_connection *)indata.dptr;
1454 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1455 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1457 ret = ctdb_connection_to_buf(conn_str,
1458 sizeof(conn_str),
1459 tcp_sock,
1460 false,
1461 " -> ");
1462 if (ret != 0) {
1463 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1466 found = ctdb_client_remove_tcp(client, tcp_sock);
1467 if (!found) {
1468 DBG_DEBUG("TCP connection from %s not found "
1469 "(client_id %u pid %u).\n",
1470 conn_str, client_id, client->pid);
1471 return 0;
1474 D_INFO("TCP connection from %s "
1475 "(client_id %u pid %u) passed to another client\n",
1476 conn_str, client_id, client->pid);
1479 * We don't call CTDB_CONTROL_TCP_REMOVE
1480 * nor ctdb_remove_connection() as the connection
1481 * is still alive, but handled by another client
1484 return 0;
1488 find a tcp address on a list
1490 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1491 struct ctdb_connection *tcp)
1493 unsigned int i;
1495 if (array == NULL) {
1496 return NULL;
1499 for (i=0;i<array->num;i++) {
1500 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1501 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1502 return &array->connections[i];
1505 return NULL;
1511 called by a daemon to inform us of a TCP connection that one of its
1512 clients managing that should tickled with an ACK when IP takeover is
1513 done
1515 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1517 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1518 struct ctdb_tcp_array *tcparray;
1519 struct ctdb_connection tcp;
1520 struct ctdb_vnn *vnn;
1522 /* If we don't have public IPs, tickles are useless */
1523 if (ctdb->vnn == NULL) {
1524 return 0;
1527 vnn = find_public_ip_vnn(ctdb, &p->dst);
1528 if (vnn == NULL) {
1529 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1530 ctdb_addr_to_str(&p->dst)));
1532 return -1;
1536 tcparray = vnn->tcp_array;
1538 /* If this is the first tickle */
1539 if (tcparray == NULL) {
1540 tcparray = talloc(vnn, struct ctdb_tcp_array);
1541 CTDB_NO_MEMORY(ctdb, tcparray);
1542 vnn->tcp_array = tcparray;
1544 tcparray->num = 0;
1545 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1546 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1548 tcparray->connections[tcparray->num].src = p->src;
1549 tcparray->connections[tcparray->num].dst = p->dst;
1550 tcparray->num++;
1552 if (tcp_update_needed) {
1553 vnn->tcp_update_needed = true;
1555 return 0;
1559 /* Do we already have this tickle ?*/
1560 tcp.src = p->src;
1561 tcp.dst = p->dst;
1562 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1563 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1564 ctdb_addr_to_str(&tcp.dst),
1565 ntohs(tcp.dst.ip.sin_port),
1566 vnn->pnn));
1567 return 0;
1570 /* A new tickle, we must add it to the array */
1571 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1572 struct ctdb_connection,
1573 tcparray->num+1);
1574 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1576 tcparray->connections[tcparray->num].src = p->src;
1577 tcparray->connections[tcparray->num].dst = p->dst;
1578 tcparray->num++;
1580 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1581 ctdb_addr_to_str(&tcp.dst),
1582 ntohs(tcp.dst.ip.sin_port),
1583 vnn->pnn));
1585 if (tcp_update_needed) {
1586 vnn->tcp_update_needed = true;
1589 return 0;
1593 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1595 struct ctdb_connection *tcpp;
1597 if (vnn == NULL) {
1598 return;
1601 /* if the array is empty we can't remove it
1602 and we don't need to do anything
1604 if (vnn->tcp_array == NULL) {
1605 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1606 ctdb_addr_to_str(&conn->dst),
1607 ntohs(conn->dst.ip.sin_port)));
1608 return;
1612 /* See if we know this connection
1613 if we don't know this connection then we don't need to do anything
1615 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1616 if (tcpp == NULL) {
1617 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1618 ctdb_addr_to_str(&conn->dst),
1619 ntohs(conn->dst.ip.sin_port)));
1620 return;
1624 /* We need to remove this entry from the array.
1625 Instead of allocating a new array and copying data to it
1626 we cheat and just copy the last entry in the existing array
1627 to the entry that is to be removed and just shring the
1628 ->num field
1630 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1631 vnn->tcp_array->num--;
1633 /* If we deleted the last entry we also need to remove the entire array
1635 if (vnn->tcp_array->num == 0) {
1636 talloc_free(vnn->tcp_array);
1637 vnn->tcp_array = NULL;
1640 vnn->tcp_update_needed = true;
1642 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1643 ctdb_addr_to_str(&conn->src),
1644 ntohs(conn->src.ip.sin_port)));
1649 called by a daemon to inform us of a TCP connection that one of its
1650 clients used are no longer needed in the tickle database
1652 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1654 struct ctdb_vnn *vnn;
1655 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1657 /* If we don't have public IPs, tickles are useless */
1658 if (ctdb->vnn == NULL) {
1659 return 0;
1662 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1663 if (vnn == NULL) {
1664 DEBUG(DEBUG_ERR,
1665 (__location__ " unable to find public address %s\n",
1666 ctdb_addr_to_str(&conn->dst)));
1667 return 0;
1670 ctdb_remove_connection(vnn, conn);
1672 return 0;
1676 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1677 bool force);
1680 Called when another daemon starts - causes all tickles for all
1681 public addresses we are serving to be sent to the new node on the
1682 next check. This actually causes the tickles to be sent to the
1683 other node immediately. In case there is an error, the periodic
1684 timer will send the updates on timer event. This is simple and
1685 doesn't require careful error handling.
1687 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1689 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1690 (unsigned long) pnn));
1692 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1693 return 0;
1698 called when a client structure goes away - hook to remove
1699 elements from the tcp_list in all daemons
1701 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1703 while (client->tcp_list) {
1704 struct ctdb_vnn *vnn;
1705 struct ctdb_tcp_list *tcp = client->tcp_list;
1706 struct ctdb_connection *conn = &tcp->connection;
1708 vnn = find_public_ip_vnn(client->ctdb,
1709 &conn->dst);
1711 /* If the IP address is hosted on this node then
1712 * remove the connection. */
1713 if (vnn != NULL && vnn->pnn == client->ctdb->pnn) {
1714 ctdb_remove_connection(vnn, conn);
1717 /* Otherwise this function has been called because the
1718 * server IP address has been released to another node
1719 * and the client has exited. This means that we
1720 * should not delete the connection information. The
1721 * takeover node processes connections too. */
1724 * The destructor removes from the list
1726 TALLOC_FREE(tcp);
1731 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1733 struct ctdb_vnn *vnn, *next;
1734 int count = 0;
1736 if (ctdb_config.failover_disabled == 1) {
1737 return;
1740 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1741 bool have_ip;
1742 int ret;
1744 /* vnn can be freed below in release_ip_post() */
1745 next = vnn->next;
1747 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1748 ctdb_vnn_unassign_iface(ctdb, vnn);
1749 continue;
1752 /* Don't allow multiple releases at once. Some code,
1753 * particularly ctdb_tickle_sentenced_connections() is
1754 * not re-entrant */
1755 if (vnn->update_in_flight) {
1756 DBG_WARNING(
1757 "Not releasing IP %s/%u on interface %s, "
1758 "an update is already in progress\n",
1759 ctdb_vnn_address_string(vnn),
1760 vnn->public_netmask_bits,
1761 ctdb_vnn_iface_string(vnn));
1762 continue;
1764 vnn->update_in_flight = true;
1766 D_INFO("Release of IP %s/%u on interface %s node:-1\n",
1767 ctdb_vnn_address_string(vnn),
1768 vnn->public_netmask_bits,
1769 ctdb_vnn_iface_string(vnn));
1772 * releaseip timeouts are converted to success, or IP
1773 * might be released but releaseip event failed (due
1774 * to failure of script after 10.interface), so try
1775 * hard to correctly report failures...
1777 ret = ctdb_event_script_args(
1778 ctdb,
1779 CTDB_EVENT_RELEASE_IP,
1780 "%s %s %u",
1781 ctdb_vnn_iface_string(vnn),
1782 ctdb_vnn_address_string(vnn),
1783 vnn->public_netmask_bits);
1784 have_ip = ctdb_sys_have_ip(&vnn->public_address);
1785 if (have_ip) {
1786 if (ret != 0) {
1787 DBG_ERR("Error releasing IP %s\n",
1788 ctdb_vnn_address_string(vnn));
1789 } else {
1790 DBG_ERR("IP %s not released (timed out?)\n",
1791 ctdb_vnn_address_string(vnn));
1793 vnn->update_in_flight = false;
1794 continue;
1796 if (ret != 0) {
1797 DBG_ERR("Error releasing IP %s (but IP is gone!)\n",
1798 ctdb_vnn_address_string(vnn));
1799 vnn->update_in_flight = false;
1800 continue;
1803 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1804 if (vnn != NULL) {
1805 vnn->update_in_flight = false;
1807 count++;
1810 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1815 get list of public IPs
1817 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1818 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1820 int i, num, len;
1821 struct ctdb_public_ip_list_old *ips;
1822 struct ctdb_vnn *vnn;
1823 bool only_available = false;
1825 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1826 only_available = true;
1829 /* count how many public ip structures we have */
1830 num = 0;
1831 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1832 num++;
1835 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1836 num*sizeof(struct ctdb_public_ip);
1837 ips = talloc_zero_size(outdata, len);
1838 CTDB_NO_MEMORY(ctdb, ips);
1840 i = 0;
1841 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1842 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1843 continue;
1845 ips->ips[i].pnn = vnn->pnn;
1846 ips->ips[i].addr = vnn->public_address;
1847 i++;
1849 ips->num = i;
1850 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1851 i*sizeof(struct ctdb_public_ip);
1853 outdata->dsize = len;
1854 outdata->dptr = (uint8_t *)ips;
1856 return 0;
1860 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1861 struct ctdb_req_control_old *c,
1862 TDB_DATA indata,
1863 TDB_DATA *outdata)
1865 int i, num, len;
1866 ctdb_sock_addr *addr;
1867 struct ctdb_public_ip_info_old *info;
1868 struct ctdb_vnn *vnn;
1869 struct vnn_interface *iface;
1871 addr = (ctdb_sock_addr *)indata.dptr;
1873 vnn = find_public_ip_vnn(ctdb, addr);
1874 if (vnn == NULL) {
1875 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1876 "'%s'not a public address\n",
1877 ctdb_addr_to_str(addr)));
1878 return -1;
1881 /* count how many public ip structures we have */
1882 num = 0;
1883 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1884 num++;
1887 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1888 num*sizeof(struct ctdb_iface);
1889 info = talloc_zero_size(outdata, len);
1890 CTDB_NO_MEMORY(ctdb, info);
1892 info->ip.addr = vnn->public_address;
1893 info->ip.pnn = vnn->pnn;
1894 info->active_idx = 0xFFFFFFFF;
1896 i = 0;
1897 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1898 struct ctdb_interface *cur;
1900 cur = iface->iface;
1901 if (vnn->iface == cur) {
1902 info->active_idx = i;
1904 strncpy(info->ifaces[i].name, cur->name,
1905 sizeof(info->ifaces[i].name));
1906 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1907 info->ifaces[i].link_state = cur->link_up;
1908 info->ifaces[i].references = cur->references;
1910 i++;
1912 info->num = i;
1913 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1914 i*sizeof(struct ctdb_iface);
1916 outdata->dsize = len;
1917 outdata->dptr = (uint8_t *)info;
1919 return 0;
1922 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1923 struct ctdb_req_control_old *c,
1924 TDB_DATA *outdata)
1926 int i, num, len;
1927 struct ctdb_iface_list_old *ifaces;
1928 struct ctdb_interface *cur;
1930 /* count how many public ip structures we have */
1931 num = 0;
1932 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1933 num++;
1936 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1937 num*sizeof(struct ctdb_iface);
1938 ifaces = talloc_zero_size(outdata, len);
1939 CTDB_NO_MEMORY(ctdb, ifaces);
1941 i = 0;
1942 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1943 strncpy(ifaces->ifaces[i].name, cur->name,
1944 sizeof(ifaces->ifaces[i].name));
1945 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1946 ifaces->ifaces[i].link_state = cur->link_up;
1947 ifaces->ifaces[i].references = cur->references;
1948 i++;
1950 ifaces->num = i;
1951 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1952 i*sizeof(struct ctdb_iface);
1954 outdata->dsize = len;
1955 outdata->dptr = (uint8_t *)ifaces;
1957 return 0;
1960 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1961 struct ctdb_req_control_old *c,
1962 TDB_DATA indata)
1964 struct ctdb_iface *info;
1965 struct ctdb_interface *iface;
1966 bool link_up = false;
1968 info = (struct ctdb_iface *)indata.dptr;
1970 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1971 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1972 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1973 len, len, info->name));
1974 return -1;
1977 switch (info->link_state) {
1978 case 0:
1979 link_up = false;
1980 break;
1981 case 1:
1982 link_up = true;
1983 break;
1984 default:
1985 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1986 (unsigned int)info->link_state));
1987 return -1;
1990 if (info->references != 0) {
1991 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1992 (unsigned int)info->references));
1993 return -1;
1996 iface = ctdb_find_iface(ctdb, info->name);
1997 if (iface == NULL) {
1998 return -1;
2001 if (link_up == iface->link_up) {
2002 return 0;
2005 DEBUG(DEBUG_ERR,
2006 ("iface[%s] has changed it's link status %s => %s\n",
2007 iface->name,
2008 iface->link_up?"up":"down",
2009 link_up?"up":"down"));
2011 iface->link_up = link_up;
2012 return 0;
2017 called by a daemon to inform us of the entire list of TCP tickles for
2018 a particular public address.
2019 this control should only be sent by the node that is currently serving
2020 that public address.
2022 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2024 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2025 struct ctdb_tcp_array *tcparray;
2026 struct ctdb_vnn *vnn;
2028 /* We must at least have tickles.num or else we can't verify the size
2029 of the received data blob
2031 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2032 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2033 return -1;
2036 /* verify that the size of data matches what we expect */
2037 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2038 + sizeof(struct ctdb_connection) * list->num) {
2039 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2040 return -1;
2043 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2044 ctdb_addr_to_str(&list->addr)));
2046 vnn = find_public_ip_vnn(ctdb, &list->addr);
2047 if (vnn == NULL) {
2048 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2049 ctdb_addr_to_str(&list->addr)));
2051 return 1;
2054 if (vnn->pnn == ctdb->pnn) {
2055 DEBUG(DEBUG_INFO,
2056 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2057 ctdb_addr_to_str(&list->addr)));
2058 return 0;
2061 /* remove any old ticklelist we might have */
2062 talloc_free(vnn->tcp_array);
2063 vnn->tcp_array = NULL;
2065 tcparray = talloc(vnn, struct ctdb_tcp_array);
2066 CTDB_NO_MEMORY(ctdb, tcparray);
2068 tcparray->num = list->num;
2070 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2071 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2073 memcpy(tcparray->connections, &list->connections[0],
2074 sizeof(struct ctdb_connection)*tcparray->num);
2076 /* We now have a new fresh tickle list array for this vnn */
2077 vnn->tcp_array = tcparray;
2079 return 0;
2083 called to return the full list of tickles for the puclic address associated
2084 with the provided vnn
2086 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2088 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2089 struct ctdb_tickle_list_old *list;
2090 struct ctdb_tcp_array *tcparray;
2091 unsigned int num, i;
2092 struct ctdb_vnn *vnn;
2093 unsigned port;
2095 vnn = find_public_ip_vnn(ctdb, addr);
2096 if (vnn == NULL) {
2097 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2098 ctdb_addr_to_str(addr)));
2100 return 1;
2103 port = ctdb_addr_to_port(addr);
2105 tcparray = vnn->tcp_array;
2106 num = 0;
2107 if (tcparray != NULL) {
2108 if (port == 0) {
2109 /* All connections */
2110 num = tcparray->num;
2111 } else {
2112 /* Count connections for port */
2113 for (i = 0; i < tcparray->num; i++) {
2114 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2115 num++;
2121 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2122 + sizeof(struct ctdb_connection) * num;
2124 outdata->dptr = talloc_size(outdata, outdata->dsize);
2125 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2126 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2128 list->addr = *addr;
2129 list->num = num;
2131 if (num == 0) {
2132 return 0;
2135 num = 0;
2136 for (i = 0; i < tcparray->num; i++) {
2137 if (port == 0 || \
2138 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2139 list->connections[num] = tcparray->connections[i];
2140 num++;
2144 return 0;
2149 set the list of all tcp tickles for a public address
2151 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2152 ctdb_sock_addr *addr,
2153 struct ctdb_tcp_array *tcparray)
2155 int ret, num;
2156 TDB_DATA data;
2157 struct ctdb_tickle_list_old *list;
2159 if (tcparray) {
2160 num = tcparray->num;
2161 } else {
2162 num = 0;
2165 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2166 sizeof(struct ctdb_connection) * num;
2167 data.dptr = talloc_size(ctdb, data.dsize);
2168 CTDB_NO_MEMORY(ctdb, data.dptr);
2170 list = (struct ctdb_tickle_list_old *)data.dptr;
2171 list->addr = *addr;
2172 list->num = num;
2173 if (tcparray) {
2174 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2177 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2178 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2179 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2180 if (ret != 0) {
2181 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2182 return -1;
2185 talloc_free(data.dptr);
2187 return ret;
2190 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2191 bool force)
2193 struct ctdb_vnn *vnn;
2194 int ret;
2196 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2197 /* we only send out updates for public addresses that
2198 we have taken over
2200 if (ctdb->pnn != vnn->pnn) {
2201 continue;
2204 /* We only send out the updates if we need to */
2205 if (!force && !vnn->tcp_update_needed) {
2206 continue;
2209 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2210 &vnn->public_address,
2211 vnn->tcp_array);
2212 if (ret != 0) {
2213 D_ERR("Failed to send the tickle update for ip %s\n",
2214 ctdb_vnn_address_string(vnn));
2215 vnn->tcp_update_needed = true;
2216 } else {
2217 D_INFO("Sent tickle update for ip %s\n",
2218 ctdb_vnn_address_string(vnn));
2219 vnn->tcp_update_needed = false;
2226 perform tickle updates if required
2228 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2229 struct tevent_timer *te,
2230 struct timeval t, void *private_data)
2232 struct ctdb_context *ctdb = talloc_get_type(
2233 private_data, struct ctdb_context);
2235 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2237 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2238 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2239 ctdb_update_tcp_tickles, ctdb);
2243 start periodic update of tcp tickles
2245 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2247 ctdb->tickle_update_context = talloc_new(ctdb);
2249 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2250 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2251 ctdb_update_tcp_tickles, ctdb);
2257 struct control_gratious_arp {
2258 struct ctdb_context *ctdb;
2259 ctdb_sock_addr addr;
2260 const char *iface;
2261 int count;
2265 send a control_gratuitous arp
2267 static void send_gratious_arp(struct tevent_context *ev,
2268 struct tevent_timer *te,
2269 struct timeval t, void *private_data)
2271 int ret;
2272 struct control_gratious_arp *arp = talloc_get_type(private_data,
2273 struct control_gratious_arp);
2275 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2276 if (ret != 0) {
2277 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2278 arp->iface, strerror(ret));
2282 arp->count++;
2283 if (arp->count == CTDB_ARP_REPEAT) {
2284 talloc_free(arp);
2285 return;
2288 tevent_add_timer(arp->ctdb->ev, arp,
2289 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2290 send_gratious_arp, arp);
2295 send a gratious arp
2297 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2299 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2300 struct control_gratious_arp *arp;
2302 /* verify the size of indata */
2303 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2304 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2305 (unsigned)indata.dsize,
2306 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2307 return -1;
2309 if (indata.dsize !=
2310 ( offsetof(struct ctdb_addr_info_old, iface)
2311 + gratious_arp->len ) ){
2313 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2314 "but should be %u bytes\n",
2315 (unsigned)indata.dsize,
2316 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2317 return -1;
2321 arp = talloc(ctdb, struct control_gratious_arp);
2322 CTDB_NO_MEMORY(ctdb, arp);
2324 arp->ctdb = ctdb;
2325 arp->addr = gratious_arp->addr;
2326 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2327 CTDB_NO_MEMORY(ctdb, arp->iface);
2328 arp->count = 0;
2330 tevent_add_timer(arp->ctdb->ev, arp,
2331 timeval_zero(), send_gratious_arp, arp);
2333 return 0;
2336 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2338 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2339 int ret;
2341 /* verify the size of indata */
2342 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2343 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2344 return -1;
2346 if (indata.dsize !=
2347 ( offsetof(struct ctdb_addr_info_old, iface)
2348 + pub->len ) ){
2350 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2351 "but should be %u bytes\n",
2352 (unsigned)indata.dsize,
2353 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2354 return -1;
2357 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2359 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2361 if (ret != 0) {
2362 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2363 return -1;
2366 return 0;
2369 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2371 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2372 struct ctdb_vnn *vnn;
2374 /* verify the size of indata */
2375 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2376 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2377 return -1;
2379 if (indata.dsize !=
2380 ( offsetof(struct ctdb_addr_info_old, iface)
2381 + pub->len ) ){
2383 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2384 "but should be %u bytes\n",
2385 (unsigned)indata.dsize,
2386 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2387 return -1;
2390 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2392 vnn = find_public_ip_vnn(ctdb, &pub->addr);
2393 if (vnn == NULL) {
2394 D_ERR("Delete IP of unknown public IP address %s\n",
2395 ctdb_addr_to_str(&pub->addr));
2396 return -1;
2399 if (vnn->pnn == ctdb->pnn) {
2401 * This IP is currently being hosted. Defer the
2402 * deletion until the next takeover run. "ctdb
2403 * reloadips" will always cause a takeover run. "ctdb
2404 * delip" will now need an explicit "ctdb
2405 * ipreallocated" afterwards.
2407 vnn->delete_pending = true;
2408 } else {
2410 * This IP is not hosted on the current node so just
2411 * delete it now.
2413 do_delete_ip(ctdb, vnn);
2416 return 0;
2420 struct ipreallocated_callback_state {
2421 struct ctdb_req_control_old *c;
2424 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2425 int status, void *p)
2427 struct ipreallocated_callback_state *state =
2428 talloc_get_type(p, struct ipreallocated_callback_state);
2429 TDB_DATA data = { .dsize = 0, };
2431 if (status != 0) {
2432 DEBUG(DEBUG_ERR,
2433 (" \"ipreallocated\" event script failed (status %d)\n",
2434 status));
2435 if (status == -ETIMEDOUT) {
2436 ctdb_ban_self(ctdb);
2440 D_INFO("Sending IPREALLOCATED message\n");
2441 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_IPREALLOCATED, data);
2443 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2444 talloc_free(state);
2447 /* A control to run the ipreallocated event */
2448 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2449 struct ctdb_req_control_old *c,
2450 bool *async_reply)
2452 int ret;
2453 struct ipreallocated_callback_state *state;
2455 state = talloc(ctdb, struct ipreallocated_callback_state);
2456 CTDB_NO_MEMORY(ctdb, state);
2458 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2460 ret = ctdb_event_script_callback(ctdb, state,
2461 ctdb_ipreallocated_callback, state,
2462 CTDB_EVENT_IPREALLOCATED,
2463 "%s", "");
2465 if (ret != 0) {
2466 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2467 talloc_free(state);
2468 return -1;
2471 /* tell the control that we will be reply asynchronously */
2472 state->c = talloc_steal(state, c);
2473 *async_reply = true;
2475 return 0;
2479 struct start_ipreallocate_callback_state {
2480 struct ctdb_req_control_old *c;
2483 static void ctdb_start_ipreallocate_callback(struct ctdb_context *ctdb,
2484 int status, void *p)
2486 struct start_ipreallocate_callback_state *state = talloc_get_type_abort(
2487 p, struct start_ipreallocate_callback_state);
2488 TDB_DATA data = { .dsize = 0, };
2490 if (status != 0) {
2491 D_ERR("\"startipreallocate\" event failed (status %d)\n",
2492 status);
2493 if (status == -ETIMEDOUT) {
2494 ctdb_ban_self(ctdb);
2498 D_INFO("Sending START_IPREALLOCATE message\n");
2499 ctdb_daemon_send_message(ctdb,
2500 ctdb->pnn,
2501 CTDB_SRVID_START_IPREALLOCATE,
2502 data);
2504 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2505 talloc_free(state);
2508 /* A control to run the startipreallocate event */
2509 int32_t ctdb_control_start_ipreallocate(struct ctdb_context *ctdb,
2510 struct ctdb_req_control_old *c,
2511 bool *async_reply)
2513 int ret;
2514 struct start_ipreallocate_callback_state *state;
2516 /* Nodes that are not RUNNING can not host IPs */
2517 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
2518 DBG_INFO("Skipping \"startipreallocate\" event, not RUNNING\n");
2519 return 0;
2522 state = talloc(ctdb, struct start_ipreallocate_callback_state);
2523 if (state == NULL) {
2524 DBG_ERR("Memory allocation error\n");
2525 return -1;
2528 DBG_INFO("Running \"startipreallocate\" event\n");
2530 ret = ctdb_event_script_callback(ctdb,
2531 state,
2532 ctdb_start_ipreallocate_callback,
2533 state,
2534 CTDB_EVENT_START_IPREALLOCATE,
2535 "%s",
2536 "");
2538 if (ret != 0) {
2539 D_ERR("Failed to run \"startipreallocate\" event \n");
2540 talloc_free(state);
2541 return -1;
2544 /* tell the control that we will be reply asynchronously */
2545 state->c = talloc_steal(state, c);
2546 *async_reply = true;
2548 return 0;
2552 struct ctdb_reloadips_handle {
2553 struct ctdb_context *ctdb;
2554 struct ctdb_req_control_old *c;
2555 int status;
2556 int fd[2];
2557 pid_t child;
2558 struct tevent_fd *fde;
2561 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2563 if (h == h->ctdb->reload_ips) {
2564 h->ctdb->reload_ips = NULL;
2566 if (h->c != NULL) {
2567 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2568 h->c = NULL;
2570 ctdb_kill(h->ctdb, h->child, SIGKILL);
2571 return 0;
2574 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2575 struct tevent_timer *te,
2576 struct timeval t, void *private_data)
2578 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2580 talloc_free(h);
2583 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2584 struct tevent_fd *fde,
2585 uint16_t flags, void *private_data)
2587 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2589 char res;
2590 int ret;
2592 ret = sys_read(h->fd[0], &res, 1);
2593 if (ret < 1 || res != 0) {
2594 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2595 res = 1;
2597 h->status = res;
2599 talloc_free(h);
2602 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2604 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2605 struct ctdb_public_ip_list_old *ips;
2606 struct ctdb_vnn *vnn;
2607 struct client_async_data *async_data;
2608 struct timeval timeout;
2609 TDB_DATA data;
2610 struct ctdb_client_control_state *state;
2611 bool first_add;
2612 unsigned int i;
2613 int ret;
2615 CTDB_NO_MEMORY(ctdb, mem_ctx);
2617 /* Read IPs from local node */
2618 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2619 CTDB_CURRENT_NODE, mem_ctx, &ips);
2620 if (ret != 0) {
2621 DEBUG(DEBUG_ERR,
2622 ("Unable to fetch public IPs from local node\n"));
2623 talloc_free(mem_ctx);
2624 return -1;
2627 /* Read IPs file - this is safe since this is a child process */
2628 ctdb->vnn = NULL;
2629 if (ctdb_set_public_addresses(ctdb) != 0) {
2630 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2631 talloc_free(mem_ctx);
2632 return -1;
2635 async_data = talloc_zero(mem_ctx, struct client_async_data);
2636 CTDB_NO_MEMORY(ctdb, async_data);
2638 /* Compare IPs between node and file for IPs to be deleted */
2639 for (i = 0; i < ips->num; i++) {
2640 struct ctdb_addr_info_old *pub = NULL;
2642 vnn = find_public_ip_vnn(ctdb, &ips->ips[i].addr);
2643 if (vnn != NULL) {
2644 /* IP is still in file */
2645 continue;
2649 * Delete IP ips->ips[i]
2652 D_NOTICE("IP %s no longer configured, deleting it\n",
2653 ctdb_addr_to_str(&ips->ips[i].addr));
2655 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2656 CTDB_NO_MEMORY(ctdb, pub);
2658 pub->addr = ips->ips[i].addr;
2659 pub->mask = 0;
2660 pub->len = 0;
2662 timeout = TAKEOVER_TIMEOUT();
2664 data.dsize = offsetof(struct ctdb_addr_info_old,
2665 iface) + pub->len;
2666 data.dptr = (uint8_t *)pub;
2668 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2669 CTDB_CONTROL_DEL_PUBLIC_IP,
2670 0, data, async_data,
2671 &timeout, NULL);
2672 if (state == NULL) {
2673 DBG_ERR("Failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n");
2674 goto failed;
2677 ctdb_client_async_add(async_data, state);
2680 /* Compare IPs between node and file for IPs to be added */
2681 first_add = true;
2682 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2683 for (i = 0; i < ips->num; i++) {
2684 if (ctdb_same_ip(&vnn->public_address,
2685 &ips->ips[i].addr)) {
2686 /* IP already on node */
2687 break;
2690 if (i == ips->num) {
2691 /* Add IP ips->ips[i] */
2692 struct ctdb_addr_info_old *pub;
2693 const char *ifaces = NULL;
2694 uint32_t len;
2695 struct vnn_interface *iface = NULL;
2697 D_NOTICE("New IP %s configured, adding it\n",
2698 ctdb_vnn_address_string(vnn));
2699 if (first_add) {
2700 uint32_t pnn = ctdb_get_pnn(ctdb);
2702 data.dsize = sizeof(pnn);
2703 data.dptr = (uint8_t *)&pnn;
2705 ret = ctdb_client_send_message(
2706 ctdb,
2707 CTDB_BROADCAST_CONNECTED,
2708 CTDB_SRVID_REBALANCE_NODE,
2709 data);
2710 if (ret != 0) {
2711 DEBUG(DEBUG_WARNING,
2712 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2715 first_add = false;
2718 ifaces = vnn->ifaces->iface->name;
2719 iface = vnn->ifaces->next;
2720 while (iface != NULL) {
2721 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2722 iface->iface->name);
2723 iface = iface->next;
2726 len = strlen(ifaces) + 1;
2727 pub = talloc_zero_size(mem_ctx,
2728 offsetof(struct ctdb_addr_info_old, iface) + len);
2729 CTDB_NO_MEMORY(ctdb, pub);
2731 pub->addr = vnn->public_address;
2732 pub->mask = vnn->public_netmask_bits;
2733 pub->len = len;
2734 memcpy(&pub->iface[0], ifaces, pub->len);
2736 timeout = TAKEOVER_TIMEOUT();
2738 data.dsize = offsetof(struct ctdb_addr_info_old,
2739 iface) + pub->len;
2740 data.dptr = (uint8_t *)pub;
2742 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2743 CTDB_CONTROL_ADD_PUBLIC_IP,
2744 0, data, async_data,
2745 &timeout, NULL);
2746 if (state == NULL) {
2747 DEBUG(DEBUG_ERR,
2748 (__location__
2749 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2750 goto failed;
2753 ctdb_client_async_add(async_data, state);
2757 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2758 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2759 goto failed;
2762 talloc_free(mem_ctx);
2763 return 0;
2765 failed:
2766 talloc_free(mem_ctx);
2767 return -1;
2770 /* This control is sent to force the node to re-read the public addresses file
2771 and drop any addresses we should nnot longer host, and add new addresses
2772 that we are now able to host
2774 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2776 struct ctdb_reloadips_handle *h;
2777 pid_t parent = getpid();
2779 if (ctdb->reload_ips != NULL) {
2780 talloc_free(ctdb->reload_ips);
2781 ctdb->reload_ips = NULL;
2784 h = talloc(ctdb, struct ctdb_reloadips_handle);
2785 CTDB_NO_MEMORY(ctdb, h);
2786 h->ctdb = ctdb;
2787 h->c = NULL;
2788 h->status = -1;
2790 if (pipe(h->fd) == -1) {
2791 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2792 talloc_free(h);
2793 return -1;
2796 h->child = ctdb_fork(ctdb);
2797 if (h->child == (pid_t)-1) {
2798 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2799 close(h->fd[0]);
2800 close(h->fd[1]);
2801 talloc_free(h);
2802 return -1;
2805 /* child process */
2806 if (h->child == 0) {
2807 signed char res = 0;
2809 close(h->fd[0]);
2811 prctl_set_comment("ctdb_reloadips");
2812 if (switch_from_server_to_client(ctdb) != 0) {
2813 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2814 res = -1;
2815 } else {
2816 res = ctdb_reloadips_child(ctdb);
2817 if (res != 0) {
2818 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2822 sys_write(h->fd[1], &res, 1);
2823 ctdb_wait_for_process_to_exit(parent);
2824 _exit(0);
2827 h->c = talloc_steal(h, c);
2829 close(h->fd[1]);
2830 set_close_on_exec(h->fd[0]);
2832 talloc_set_destructor(h, ctdb_reloadips_destructor);
2835 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2836 ctdb_reloadips_child_handler, (void *)h);
2837 tevent_fd_set_auto_close(h->fde);
2839 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2840 ctdb_reloadips_timeout_event, h);
2842 /* we reply later */
2843 *async_reply = true;
2844 return 0;