wbclient: Fix a typo
[samba4-gss.git] / ctdb / server / ctdb_takeover.c
blob0f7ac756497e4b3a907f1e0e7eb84b1ccecaa3ff
1 /*
2 ctdb ip takeover code
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
27 #include <talloc.h>
28 #include <tevent.h>
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_file.h"
34 #include "lib/util/sys_rw.h"
35 #include "lib/util/util_process.h"
37 #include "protocol/protocol_util.h"
39 #include "ctdb_private.h"
40 #include "ctdb_client.h"
42 #include "common/reqid.h"
43 #include "common/system.h"
44 #include "common/system_socket.h"
45 #include "common/common.h"
46 #include "common/logging.h"
48 #include "server/ctdb_config.h"
50 #include "server/ipalloc.h"
52 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
54 #define CTDB_ARP_INTERVAL 1
55 #define CTDB_ARP_REPEAT 3
57 struct ctdb_interface {
58 struct ctdb_interface *prev, *next;
59 const char *name;
60 bool link_up;
61 uint32_t references;
64 struct vnn_interface {
65 struct vnn_interface *prev, *next;
66 struct ctdb_interface *iface;
69 /* state associated with a public ip address */
70 struct ctdb_vnn {
71 struct ctdb_vnn *prev, *next;
73 struct ctdb_interface *iface;
74 struct vnn_interface *ifaces;
75 ctdb_sock_addr public_address;
76 uint8_t public_netmask_bits;
79 * The node number that is serving this public address - set
80 * to CTDB_UNKNOWN_PNN if node is serving it
82 uint32_t pnn;
84 /* List of clients to tickle for this public address */
85 struct ctdb_tcp_array *tcp_array;
87 /* whether we need to update the other nodes with changes to our list
88 of connected clients */
89 bool tcp_update_needed;
91 /* a context to hang sending gratious arp events off */
92 TALLOC_CTX *takeover_ctx;
94 /* Set to true any time an update to this VNN is in flight.
95 This helps to avoid races. */
96 bool update_in_flight;
98 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
99 * address then this flag is set. It will be deleted in the
100 * release IP callback. */
101 bool delete_pending;
104 static const char *iface_string(const struct ctdb_interface *iface)
106 return (iface != NULL ? iface->name : "__none__");
109 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
111 return iface_string(vnn->iface);
114 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
115 const char *iface);
117 static struct ctdb_interface *
118 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
120 struct ctdb_interface *i;
122 if (strlen(iface) > CTDB_IFACE_SIZE) {
123 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
124 return NULL;
127 /* Verify that we don't have an entry for this ip yet */
128 i = ctdb_find_iface(ctdb, iface);
129 if (i != NULL) {
130 return i;
133 /* create a new structure for this interface */
134 i = talloc_zero(ctdb, struct ctdb_interface);
135 if (i == NULL) {
136 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
137 return NULL;
139 i->name = talloc_strdup(i, iface);
140 if (i->name == NULL) {
141 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
142 talloc_free(i);
143 return NULL;
146 i->link_up = true;
148 DLIST_ADD(ctdb->ifaces, i);
150 return i;
153 static bool vnn_has_interface(struct ctdb_vnn *vnn,
154 const struct ctdb_interface *iface)
156 struct vnn_interface *i;
158 for (i = vnn->ifaces; i != NULL; i = i->next) {
159 if (iface == i->iface) {
160 return true;
164 return false;
167 /* If any interfaces now have no possible IPs then delete them. This
168 * implementation is naive (i.e. simple) rather than clever
169 * (i.e. complex). Given that this is run on delip and that operation
170 * is rare, this doesn't need to be efficient - it needs to be
171 * foolproof. One alternative is reference counting, where the logic
172 * is distributed and can, therefore, be broken in multiple places.
173 * Another alternative is to build a red-black tree of interfaces that
174 * can have addresses (by walking ctdb->vnn once) and then walking
175 * ctdb->ifaces once and deleting those not in the tree. Let's go to
176 * one of those if the naive implementation causes problems... :-)
178 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
179 struct ctdb_vnn *vnn)
181 struct ctdb_interface *i, *next;
183 /* For each interface, check if there's an IP using it. */
184 for (i = ctdb->ifaces; i != NULL; i = next) {
185 struct ctdb_vnn *tv;
186 bool found;
187 next = i->next;
189 /* Only consider interfaces named in the given VNN. */
190 if (!vnn_has_interface(vnn, i)) {
191 continue;
194 /* Search for a vnn with this interface. */
195 found = false;
196 for (tv=ctdb->vnn; tv; tv=tv->next) {
197 if (vnn_has_interface(tv, i)) {
198 found = true;
199 break;
203 if (!found) {
204 /* None of the VNNs are using this interface. */
205 DLIST_REMOVE(ctdb->ifaces, i);
206 talloc_free(i);
212 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
213 const char *iface)
215 struct ctdb_interface *i;
217 for (i=ctdb->ifaces;i;i=i->next) {
218 if (strcmp(i->name, iface) == 0) {
219 return i;
223 return NULL;
226 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
227 struct ctdb_vnn *vnn)
229 struct vnn_interface *i;
230 struct ctdb_interface *cur = NULL;
231 struct ctdb_interface *best = NULL;
233 for (i = vnn->ifaces; i != NULL; i = i->next) {
235 cur = i->iface;
237 if (!cur->link_up) {
238 continue;
241 if (best == NULL) {
242 best = cur;
243 continue;
246 if (cur->references < best->references) {
247 best = cur;
248 continue;
252 return best;
255 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
256 struct ctdb_vnn *vnn)
258 struct ctdb_interface *best = NULL;
260 if (vnn->iface) {
261 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
262 "still assigned to iface '%s'\n",
263 ctdb_addr_to_str(&vnn->public_address),
264 ctdb_vnn_iface_string(vnn)));
265 return 0;
268 best = ctdb_vnn_best_iface(ctdb, vnn);
269 if (best == NULL) {
270 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
271 "cannot assign to iface any iface\n",
272 ctdb_addr_to_str(&vnn->public_address)));
273 return -1;
276 vnn->iface = best;
277 best->references++;
278 vnn->pnn = ctdb->pnn;
280 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
281 "now assigned to iface '%s' refs[%d]\n",
282 ctdb_addr_to_str(&vnn->public_address),
283 ctdb_vnn_iface_string(vnn),
284 best->references));
285 return 0;
288 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
289 struct ctdb_vnn *vnn)
291 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
292 "now unassigned (old iface '%s' refs[%d])\n",
293 ctdb_addr_to_str(&vnn->public_address),
294 ctdb_vnn_iface_string(vnn),
295 vnn->iface?vnn->iface->references:0));
296 if (vnn->iface) {
297 vnn->iface->references--;
299 vnn->iface = NULL;
300 if (vnn->pnn == ctdb->pnn) {
301 vnn->pnn = CTDB_UNKNOWN_PNN;
305 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
306 struct ctdb_vnn *vnn)
308 uint32_t flags;
309 struct vnn_interface *i;
311 /* Nodes that are not RUNNING can not host IPs */
312 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
313 return false;
316 flags = ctdb->nodes[ctdb->pnn]->flags;
317 if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) {
318 return false;
321 if (vnn->delete_pending) {
322 return false;
325 if (vnn->iface && vnn->iface->link_up) {
326 return true;
329 for (i = vnn->ifaces; i != NULL; i = i->next) {
330 if (i->iface->link_up) {
331 return true;
335 return false;
338 struct ctdb_takeover_arp {
339 struct ctdb_context *ctdb;
340 uint32_t count;
341 ctdb_sock_addr addr;
342 struct ctdb_tcp_array *tcparray;
343 struct ctdb_vnn *vnn;
348 lists of tcp endpoints
350 struct ctdb_tcp_list {
351 struct ctdb_tcp_list *prev, *next;
352 struct ctdb_client *client;
353 struct ctdb_connection connection;
357 send a gratuitous arp
359 static void ctdb_control_send_arp(struct tevent_context *ev,
360 struct tevent_timer *te,
361 struct timeval t, void *private_data)
363 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
364 struct ctdb_takeover_arp);
365 int ret;
366 struct ctdb_tcp_array *tcparray;
367 const char *iface;
369 /* IP address might have been released between sends */
370 if (arp->vnn->iface == NULL) {
371 DBG_INFO("Cancelling ARP send for released IP %s\n",
372 ctdb_addr_to_str(&arp->vnn->public_address));
373 talloc_free(arp);
374 return;
377 iface = ctdb_vnn_iface_string(arp->vnn);
378 ret = ctdb_sys_send_arp(&arp->addr, iface);
379 if (ret != 0) {
380 DBG_ERR("Failed to send ARP on interface %s: %s\n",
381 iface, strerror(ret));
384 tcparray = arp->tcparray;
385 if (tcparray) {
386 unsigned int i;
388 for (i=0;i<tcparray->num;i++) {
389 struct ctdb_connection *tcon;
390 char buf[128];
392 tcon = &tcparray->connections[i];
393 ret = ctdb_connection_to_buf(buf,
394 sizeof(buf),
395 tcon,
396 false,
397 " -> ");
398 if (ret != 0) {
399 strlcpy(buf, "UNKNOWN", sizeof(buf));
401 D_INFO("Send TCP tickle ACK: %s\n", buf);
402 ret = ctdb_sys_send_tcp(
403 &tcon->src,
404 &tcon->dst,
405 0, 0, 0);
406 if (ret != 0) {
407 DBG_ERR("Failed to send TCP tickle ACK: %s\n",
408 buf);
413 arp->count++;
415 if (arp->count == CTDB_ARP_REPEAT) {
416 talloc_free(arp);
417 return;
420 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
421 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
422 ctdb_control_send_arp, arp);
425 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
426 struct ctdb_vnn *vnn)
428 struct ctdb_takeover_arp *arp;
429 struct ctdb_tcp_array *tcparray;
431 if (!vnn->takeover_ctx) {
432 vnn->takeover_ctx = talloc_new(vnn);
433 if (!vnn->takeover_ctx) {
434 return -1;
438 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
439 if (!arp) {
440 return -1;
443 arp->ctdb = ctdb;
444 arp->addr = vnn->public_address;
445 arp->vnn = vnn;
447 tcparray = vnn->tcp_array;
448 if (tcparray) {
449 /* add all of the known tcp connections for this IP to the
450 list of tcp connections to send tickle acks for */
451 arp->tcparray = talloc_steal(arp, tcparray);
453 vnn->tcp_array = NULL;
454 vnn->tcp_update_needed = true;
457 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
458 timeval_zero(), ctdb_control_send_arp, arp);
460 return 0;
463 struct ctdb_do_takeip_state {
464 struct ctdb_req_control_old *c;
465 struct ctdb_vnn *vnn;
469 called when takeip event finishes
471 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
472 void *private_data)
474 struct ctdb_do_takeip_state *state =
475 talloc_get_type(private_data, struct ctdb_do_takeip_state);
476 int32_t ret;
477 TDB_DATA data;
479 if (status != 0) {
480 if (status == -ETIMEDOUT) {
481 ctdb_ban_self(ctdb);
483 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
484 ctdb_addr_to_str(&state->vnn->public_address),
485 ctdb_vnn_iface_string(state->vnn)));
486 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
488 talloc_free(state);
489 return;
492 if (ctdb->do_checkpublicip) {
494 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
495 if (ret != 0) {
496 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
497 talloc_free(state);
498 return;
503 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
504 data.dsize = strlen((char *)data.dptr) + 1;
505 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
507 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
510 /* the control succeeded */
511 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
512 talloc_free(state);
513 return;
516 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
518 state->vnn->update_in_flight = false;
519 return 0;
523 take over an ip address
525 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
526 struct ctdb_req_control_old *c,
527 struct ctdb_vnn *vnn)
529 int ret;
530 struct ctdb_do_takeip_state *state;
532 if (vnn->update_in_flight) {
533 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
534 "update for this IP already in flight\n",
535 ctdb_addr_to_str(&vnn->public_address),
536 vnn->public_netmask_bits));
537 return -1;
540 ret = ctdb_vnn_assign_iface(ctdb, vnn);
541 if (ret != 0) {
542 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
543 "assign a usable interface\n",
544 ctdb_addr_to_str(&vnn->public_address),
545 vnn->public_netmask_bits));
546 return -1;
549 state = talloc(vnn, struct ctdb_do_takeip_state);
550 CTDB_NO_MEMORY(ctdb, state);
552 state->c = NULL;
553 state->vnn = vnn;
555 vnn->update_in_flight = true;
556 talloc_set_destructor(state, ctdb_takeip_destructor);
558 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
559 ctdb_addr_to_str(&vnn->public_address),
560 vnn->public_netmask_bits,
561 ctdb_vnn_iface_string(vnn)));
563 ret = ctdb_event_script_callback(ctdb,
564 state,
565 ctdb_do_takeip_callback,
566 state,
567 CTDB_EVENT_TAKE_IP,
568 "%s %s %u",
569 ctdb_vnn_iface_string(vnn),
570 ctdb_addr_to_str(&vnn->public_address),
571 vnn->public_netmask_bits);
573 if (ret != 0) {
574 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
575 ctdb_addr_to_str(&vnn->public_address),
576 ctdb_vnn_iface_string(vnn)));
577 talloc_free(state);
578 return -1;
581 state->c = talloc_steal(ctdb, c);
582 return 0;
585 struct ctdb_do_updateip_state {
586 struct ctdb_req_control_old *c;
587 struct ctdb_interface *old;
588 struct ctdb_vnn *vnn;
592 called when updateip event finishes
594 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
595 void *private_data)
597 struct ctdb_do_updateip_state *state =
598 talloc_get_type(private_data, struct ctdb_do_updateip_state);
600 if (status != 0) {
601 if (status == -ETIMEDOUT) {
602 ctdb_ban_self(ctdb);
604 DEBUG(DEBUG_ERR,
605 ("Failed update of IP %s from interface %s to %s\n",
606 ctdb_addr_to_str(&state->vnn->public_address),
607 iface_string(state->old),
608 ctdb_vnn_iface_string(state->vnn)));
611 * All we can do is reset the old interface
612 * and let the next run fix it
614 ctdb_vnn_unassign_iface(ctdb, state->vnn);
615 state->vnn->iface = state->old;
616 state->vnn->iface->references++;
618 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
619 talloc_free(state);
620 return;
623 /* the control succeeded */
624 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
625 talloc_free(state);
626 return;
629 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
631 state->vnn->update_in_flight = false;
632 return 0;
636 update (move) an ip address
638 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
639 struct ctdb_req_control_old *c,
640 struct ctdb_vnn *vnn)
642 int ret;
643 struct ctdb_do_updateip_state *state;
644 struct ctdb_interface *old = vnn->iface;
645 const char *old_name = iface_string(old);
646 const char *new_name;
648 if (vnn->update_in_flight) {
649 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
650 "update for this IP already in flight\n",
651 ctdb_addr_to_str(&vnn->public_address),
652 vnn->public_netmask_bits));
653 return -1;
656 ctdb_vnn_unassign_iface(ctdb, vnn);
657 ret = ctdb_vnn_assign_iface(ctdb, vnn);
658 if (ret != 0) {
659 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
660 "assign a usable interface (old iface '%s')\n",
661 ctdb_addr_to_str(&vnn->public_address),
662 vnn->public_netmask_bits,
663 old_name));
664 return -1;
667 if (old == vnn->iface) {
668 /* A benign update from one interface onto itself.
669 * no need to run the eventscripts in this case, just return
670 * success.
672 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
673 return 0;
676 state = talloc(vnn, struct ctdb_do_updateip_state);
677 CTDB_NO_MEMORY(ctdb, state);
679 state->c = NULL;
680 state->old = old;
681 state->vnn = vnn;
683 vnn->update_in_flight = true;
684 talloc_set_destructor(state, ctdb_updateip_destructor);
686 new_name = ctdb_vnn_iface_string(vnn);
687 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
688 "interface %s to %s\n",
689 ctdb_addr_to_str(&vnn->public_address),
690 vnn->public_netmask_bits,
691 old_name,
692 new_name));
694 ret = ctdb_event_script_callback(ctdb,
695 state,
696 ctdb_do_updateip_callback,
697 state,
698 CTDB_EVENT_UPDATE_IP,
699 "%s %s %s %u",
700 old_name,
701 new_name,
702 ctdb_addr_to_str(&vnn->public_address),
703 vnn->public_netmask_bits);
704 if (ret != 0) {
705 DEBUG(DEBUG_ERR,
706 ("Failed update IP %s from interface %s to %s\n",
707 ctdb_addr_to_str(&vnn->public_address),
708 old_name, new_name));
709 talloc_free(state);
710 return -1;
713 state->c = talloc_steal(ctdb, c);
714 return 0;
718 Find the vnn of the node that has a public ip address
719 returns -1 if the address is not known as a public address
721 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
723 struct ctdb_vnn *vnn;
725 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
726 if (ctdb_same_ip(&vnn->public_address, addr)) {
727 return vnn;
731 return NULL;
735 take over an ip address
737 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
738 struct ctdb_req_control_old *c,
739 TDB_DATA indata,
740 bool *async_reply)
742 int ret;
743 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
744 struct ctdb_vnn *vnn;
745 bool have_ip = false;
746 bool do_updateip = false;
747 bool do_takeip = false;
748 struct ctdb_interface *best_iface = NULL;
750 if (pip->pnn != ctdb->pnn) {
751 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
752 "with pnn %d, but we're node %d\n",
753 ctdb_addr_to_str(&pip->addr),
754 pip->pnn, ctdb->pnn));
755 return -1;
758 /* update out vnn list */
759 vnn = find_public_ip_vnn(ctdb, &pip->addr);
760 if (vnn == NULL) {
761 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
762 ctdb_addr_to_str(&pip->addr)));
763 return 0;
766 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
767 have_ip = ctdb_sys_have_ip(&pip->addr);
769 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
770 if (best_iface == NULL) {
771 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
772 "a usable interface (old %s, have_ip %d)\n",
773 ctdb_addr_to_str(&vnn->public_address),
774 vnn->public_netmask_bits,
775 ctdb_vnn_iface_string(vnn),
776 have_ip));
777 return -1;
780 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) {
781 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
782 "and we have it on iface[%s], but it was assigned to node %d"
783 "and we are node %d, banning ourself\n",
784 ctdb_addr_to_str(&vnn->public_address),
785 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
786 ctdb_ban_self(ctdb);
787 return -1;
790 if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) {
791 /* This will cause connections to be reset and
792 * reestablished. However, this is a very unusual
793 * situation and doing this will completely repair the
794 * inconsistency in the VNN.
796 DEBUG(DEBUG_WARNING,
797 (__location__
798 " Doing updateip for IP %s already on an interface\n",
799 ctdb_addr_to_str(&vnn->public_address)));
800 do_updateip = true;
803 if (vnn->iface) {
804 if (vnn->iface != best_iface) {
805 if (!vnn->iface->link_up) {
806 do_updateip = true;
807 } else if (vnn->iface->references > (best_iface->references + 1)) {
808 /* only move when the rebalance gains something */
809 do_updateip = true;
814 if (!have_ip) {
815 if (do_updateip) {
816 ctdb_vnn_unassign_iface(ctdb, vnn);
817 do_updateip = false;
819 do_takeip = true;
822 if (do_takeip) {
823 ret = ctdb_do_takeip(ctdb, c, vnn);
824 if (ret != 0) {
825 return -1;
827 } else if (do_updateip) {
828 ret = ctdb_do_updateip(ctdb, c, vnn);
829 if (ret != 0) {
830 return -1;
832 } else {
834 * The interface is up and the kernel known the ip
835 * => do nothing
837 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
838 ctdb_addr_to_str(&pip->addr),
839 vnn->public_netmask_bits,
840 ctdb_vnn_iface_string(vnn)));
841 return 0;
844 /* tell ctdb_control.c that we will be replying asynchronously */
845 *async_reply = true;
847 return 0;
850 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
852 DLIST_REMOVE(ctdb->vnn, vnn);
853 ctdb_vnn_unassign_iface(ctdb, vnn);
854 ctdb_remove_orphaned_ifaces(ctdb, vnn);
855 talloc_free(vnn);
858 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
859 struct ctdb_vnn *vnn,
860 ctdb_sock_addr *addr)
862 TDB_DATA data;
864 /* Send a message to all clients of this node telling them
865 * that the cluster has been reconfigured and they should
866 * close any connections on this IP address
868 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
869 data.dsize = strlen((char *)data.dptr)+1;
870 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
871 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
873 ctdb_vnn_unassign_iface(ctdb, vnn);
875 /* Process the IP if it has been marked for deletion */
876 if (vnn->delete_pending) {
877 do_delete_ip(ctdb, vnn);
878 return NULL;
881 return vnn;
884 struct release_ip_callback_state {
885 struct ctdb_req_control_old *c;
886 ctdb_sock_addr *addr;
887 struct ctdb_vnn *vnn;
888 uint32_t target_pnn;
892 called when releaseip event finishes
894 static void release_ip_callback(struct ctdb_context *ctdb, int status,
895 void *private_data)
897 struct release_ip_callback_state *state =
898 talloc_get_type(private_data, struct release_ip_callback_state);
900 if (status == -ETIMEDOUT) {
901 ctdb_ban_self(ctdb);
904 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
905 if (ctdb_sys_have_ip(state->addr)) {
906 DEBUG(DEBUG_ERR,
907 ("IP %s still hosted during release IP callback, failing\n",
908 ctdb_addr_to_str(state->addr)));
909 ctdb_request_control_reply(ctdb, state->c,
910 NULL, -1, NULL);
911 talloc_free(state);
912 return;
916 state->vnn->pnn = state->target_pnn;
917 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
919 /* the control succeeded */
920 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
921 talloc_free(state);
924 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
926 if (state->vnn != NULL) {
927 state->vnn->update_in_flight = false;
929 return 0;
933 release an ip address
935 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
936 struct ctdb_req_control_old *c,
937 TDB_DATA indata,
938 bool *async_reply)
940 int ret;
941 struct release_ip_callback_state *state;
942 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
943 struct ctdb_vnn *vnn;
944 const char *iface;
946 /* update our vnn list */
947 vnn = find_public_ip_vnn(ctdb, &pip->addr);
948 if (vnn == NULL) {
949 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
950 ctdb_addr_to_str(&pip->addr)));
951 return 0;
954 /* stop any previous arps */
955 talloc_free(vnn->takeover_ctx);
956 vnn->takeover_ctx = NULL;
958 /* RELEASE_IP controls are sent to all nodes that should not
959 * be hosting a particular IP. This serves 2 purposes. The
960 * first is to help resolve any inconsistencies. If a node
961 * does unexpectedly host an IP then it will be released. The
962 * 2nd is to use a "redundant release" to tell non-takeover
963 * nodes where an IP is moving to. This is how "ctdb ip" can
964 * report the (likely) location of an IP by only asking the
965 * local node. Redundant releases need to update the PNN but
966 * are otherwise ignored.
968 if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) {
969 if (!ctdb_sys_have_ip(&pip->addr)) {
970 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
971 ctdb_addr_to_str(&pip->addr),
972 vnn->public_netmask_bits,
973 ctdb_vnn_iface_string(vnn)));
974 vnn->pnn = pip->pnn;
975 ctdb_vnn_unassign_iface(ctdb, vnn);
976 return 0;
978 } else {
979 if (vnn->iface == NULL) {
980 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
981 ctdb_addr_to_str(&pip->addr),
982 vnn->public_netmask_bits));
983 vnn->pnn = pip->pnn;
984 return 0;
988 /* There is a potential race between take_ip and us because we
989 * update the VNN via a callback that run when the
990 * eventscripts have been run. Avoid the race by allowing one
991 * update to be in flight at a time.
993 if (vnn->update_in_flight) {
994 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
995 "update for this IP already in flight\n",
996 ctdb_addr_to_str(&vnn->public_address),
997 vnn->public_netmask_bits));
998 return -1;
1001 iface = ctdb_vnn_iface_string(vnn);
1003 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
1004 ctdb_addr_to_str(&pip->addr),
1005 vnn->public_netmask_bits,
1006 iface,
1007 pip->pnn));
1009 state = talloc(ctdb, struct release_ip_callback_state);
1010 if (state == NULL) {
1011 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1012 __FILE__, __LINE__);
1013 return -1;
1016 state->c = NULL;
1017 state->addr = talloc(state, ctdb_sock_addr);
1018 if (state->addr == NULL) {
1019 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1020 __FILE__, __LINE__);
1021 talloc_free(state);
1022 return -1;
1024 *state->addr = pip->addr;
1025 state->target_pnn = pip->pnn;
1026 state->vnn = vnn;
1028 vnn->update_in_flight = true;
1029 talloc_set_destructor(state, ctdb_releaseip_destructor);
1031 ret = ctdb_event_script_callback(ctdb,
1032 state, release_ip_callback, state,
1033 CTDB_EVENT_RELEASE_IP,
1034 "%s %s %u",
1035 iface,
1036 ctdb_addr_to_str(&pip->addr),
1037 vnn->public_netmask_bits);
1038 if (ret != 0) {
1039 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1040 ctdb_addr_to_str(&pip->addr),
1041 ctdb_vnn_iface_string(vnn)));
1042 talloc_free(state);
1043 return -1;
1046 /* tell the control that we will be reply asynchronously */
1047 *async_reply = true;
1048 state->c = talloc_steal(state, c);
1049 return 0;
1052 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1053 ctdb_sock_addr *addr,
1054 unsigned mask, const char *ifaces,
1055 bool check_address)
1057 struct ctdb_vnn *vnn;
1058 char *tmp;
1059 const char *iface;
1061 /* Verify that we don't have an entry for this IP yet */
1062 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1063 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1064 D_ERR("Duplicate public IP address '%s'\n",
1065 ctdb_addr_to_str(addr));
1066 return -1;
1070 /* Create a new VNN structure for this IP address */
1071 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1072 if (vnn == NULL) {
1073 DBG_ERR("Memory allocation error\n");
1074 return -1;
1076 tmp = talloc_strdup(vnn, ifaces);
1077 if (tmp == NULL) {
1078 DBG_ERR("Memory allocation error\n");
1079 talloc_free(vnn);
1080 return -1;
1082 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1083 struct vnn_interface *vnn_iface;
1084 struct ctdb_interface *i;
1086 if (!ctdb_sys_check_iface_exists(iface)) {
1087 D_ERR("Unknown interface %s for public address %s\n",
1088 iface,
1089 ctdb_addr_to_str(addr));
1090 talloc_free(vnn);
1091 return -1;
1094 i = ctdb_add_local_iface(ctdb, iface);
1095 if (i == NULL) {
1096 D_ERR("Failed to add interface '%s' "
1097 "for public address %s\n",
1098 iface,
1099 ctdb_addr_to_str(addr));
1100 talloc_free(vnn);
1101 return -1;
1104 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1105 if (vnn_iface == NULL) {
1106 DBG_ERR("Memory allocation error\n");
1107 talloc_free(vnn);
1108 return -1;
1111 vnn_iface->iface = i;
1112 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1114 talloc_free(tmp);
1115 vnn->public_address = *addr;
1116 vnn->public_netmask_bits = mask;
1117 vnn->pnn = -1;
1119 DLIST_ADD(ctdb->vnn, vnn);
1121 return 0;
1125 setup the public address lists from a file
1127 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1129 bool ok;
1130 char **lines;
1131 int nlines;
1132 int i;
1134 /* If no public addresses file given then try the default */
1135 if (ctdb->public_addresses_file == NULL) {
1136 const char *b = getenv("CTDB_BASE");
1137 if (b == NULL) {
1138 DBG_ERR("CTDB_BASE not set\n");
1139 return -1;
1141 ctdb->public_addresses_file = talloc_asprintf(
1142 ctdb, "%s/%s", b, "public_addresses");
1143 if (ctdb->public_addresses_file == NULL) {
1144 DBG_ERR("Out of memory\n");
1145 return -1;
1149 /* If the file doesn't exist then warn and do nothing */
1150 ok = file_exist(ctdb->public_addresses_file);
1151 if (!ok) {
1152 D_WARNING("Not loading public addresses, no file %s\n",
1153 ctdb->public_addresses_file);
1154 return 0;
1157 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1158 if (lines == NULL) {
1159 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1160 return -1;
1162 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1163 nlines--;
1166 for (i=0;i<nlines;i++) {
1167 unsigned mask;
1168 ctdb_sock_addr addr;
1169 const char *addrstr;
1170 const char *ifaces;
1171 char *tok, *line;
1172 int ret;
1174 line = lines[i];
1175 while ((*line == ' ') || (*line == '\t')) {
1176 line++;
1178 if (*line == '#') {
1179 continue;
1181 if (strcmp(line, "") == 0) {
1182 continue;
1184 tok = strtok(line, " \t");
1185 addrstr = tok;
1187 tok = strtok(NULL, " \t");
1188 if (tok == NULL) {
1189 D_ERR("No interface specified at line %u "
1190 "of public addresses file\n", i+1);
1191 talloc_free(lines);
1192 return -1;
1194 ifaces = tok;
1196 if (addrstr == NULL) {
1197 D_ERR("Badly formed line %u in public address list\n",
1198 i+1);
1199 talloc_free(lines);
1200 return -1;
1203 ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask);
1204 if (ret != 0) {
1205 D_ERR("Badly formed line %u in public address list\n",
1206 i+1);
1207 talloc_free(lines);
1208 return -1;
1211 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1212 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1213 talloc_free(lines);
1214 return -1;
1219 D_NOTICE("Loaded public addresses from %s\n",
1220 ctdb->public_addresses_file);
1222 talloc_free(lines);
1223 return 0;
1227 destroy a ctdb_tcp_list structure
1229 static int ctdb_tcp_list_destructor(struct ctdb_tcp_list *tcp)
1231 struct ctdb_client *client = tcp->client;
1232 struct ctdb_connection *conn = &tcp->connection;
1233 char conn_str[132] = { 0, };
1234 int ret;
1236 ret = ctdb_connection_to_buf(conn_str,
1237 sizeof(conn_str),
1238 conn,
1239 false,
1240 " -> ");
1241 if (ret != 0) {
1242 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1245 D_DEBUG("removing client TCP connection %s "
1246 "(client_id %u pid %d)\n",
1247 conn_str, client->client_id, client->pid);
1249 DLIST_REMOVE(client->tcp_list, tcp);
1252 * We don't call ctdb_remove_connection(vnn, conn) here
1253 * as we want the caller to decide if it's called
1254 * directly (local only) or indirectly via a
1255 * CTDB_CONTROL_TCP_REMOVE broadcast
1258 return 0;
1262 called by a client to inform us of a TCP connection that it is managing
1263 that should tickled with an ACK when IP takeover is done
1265 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1266 TDB_DATA indata)
1268 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1269 struct ctdb_connection *tcp_sock = NULL;
1270 struct ctdb_tcp_list *tcp;
1271 struct ctdb_connection t;
1272 int ret;
1273 TDB_DATA data;
1274 struct ctdb_vnn *vnn;
1275 char conn_str[132] = { 0, };
1277 /* If we don't have public IPs, tickles are useless */
1278 if (ctdb->vnn == NULL) {
1279 return 0;
1282 tcp_sock = (struct ctdb_connection *)indata.dptr;
1284 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1285 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1287 ret = ctdb_connection_to_buf(conn_str,
1288 sizeof(conn_str),
1289 tcp_sock,
1290 false,
1291 " -> ");
1292 if (ret != 0) {
1293 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1296 vnn = find_public_ip_vnn(ctdb, &tcp_sock->dst);
1297 if (vnn == NULL) {
1298 D_ERR("Could not register TCP connection %s - "
1299 "not a public address (client_id %u pid %u)\n",
1300 conn_str, client_id, client->pid);
1301 return 0;
1304 if (vnn->pnn != ctdb->pnn) {
1305 D_ERR("Attempt to register tcp client for IP %s we don't hold - "
1306 "failing (client_id %u pid %u)\n",
1307 ctdb_addr_to_str(&tcp_sock->dst),
1308 client_id, client->pid);
1309 /* failing this call will tell smbd to die */
1310 return -1;
1313 tcp = talloc(client, struct ctdb_tcp_list);
1314 CTDB_NO_MEMORY(ctdb, tcp);
1315 tcp->client = client;
1317 tcp->connection.src = tcp_sock->src;
1318 tcp->connection.dst = tcp_sock->dst;
1320 DLIST_ADD(client->tcp_list, tcp);
1321 talloc_set_destructor(tcp, ctdb_tcp_list_destructor);
1323 t.src = tcp_sock->src;
1324 t.dst = tcp_sock->dst;
1326 data.dptr = (uint8_t *)&t;
1327 data.dsize = sizeof(t);
1329 D_INFO("Registered TCP connection %s (client_id %u pid %u)\n",
1330 conn_str, client_id, client->pid);
1332 /* tell all nodes about this tcp connection */
1333 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1334 CTDB_CONTROL_TCP_ADD,
1335 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1336 if (ret != 0) {
1337 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1338 return -1;
1341 return 0;
1344 static bool ctdb_client_remove_tcp(struct ctdb_client *client,
1345 const struct ctdb_connection *conn)
1347 struct ctdb_tcp_list *tcp = NULL;
1348 struct ctdb_tcp_list *tcp_next = NULL;
1349 bool found = false;
1351 for (tcp = client->tcp_list; tcp != NULL; tcp = tcp_next) {
1352 bool same;
1354 tcp_next = tcp->next;
1356 same = ctdb_connection_same(conn, &tcp->connection);
1357 if (!same) {
1358 continue;
1361 TALLOC_FREE(tcp);
1362 found = true;
1365 return found;
1369 called by a client to inform us of a TCP connection that was disconnected
1371 int32_t ctdb_control_tcp_client_disconnected(struct ctdb_context *ctdb,
1372 uint32_t client_id,
1373 TDB_DATA indata)
1375 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1376 struct ctdb_connection *tcp_sock = NULL;
1377 int ret;
1378 TDB_DATA data;
1379 char conn_str[132] = { 0, };
1380 bool found = false;
1382 tcp_sock = (struct ctdb_connection *)indata.dptr;
1384 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1385 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1387 ret = ctdb_connection_to_buf(conn_str,
1388 sizeof(conn_str),
1389 tcp_sock,
1390 false,
1391 " -> ");
1392 if (ret != 0) {
1393 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1396 found = ctdb_client_remove_tcp(client, tcp_sock);
1397 if (!found) {
1398 DBG_DEBUG("TCP connection %s not found "
1399 "(client_id %u pid %u).\n",
1400 conn_str, client_id, client->pid);
1401 return 0;
1404 D_INFO("deregistered TCP connection %s "
1405 "(client_id %u pid %u)\n",
1406 conn_str, client_id, client->pid);
1408 data.dptr = (uint8_t *)tcp_sock;
1409 data.dsize = sizeof(*tcp_sock);
1411 /* tell all nodes about this tcp connection is gone */
1412 ret = ctdb_daemon_send_control(ctdb,
1413 CTDB_BROADCAST_CONNECTED,
1415 CTDB_CONTROL_TCP_REMOVE,
1417 CTDB_CTRL_FLAG_NOREPLY,
1418 data,
1419 NULL,
1420 NULL);
1421 if (ret != 0) {
1422 DBG_ERR("Failed to send CTDB_CONTROL_TCP_REMOVE: %s\n",
1423 conn_str);
1424 return -1;
1427 return 0;
1431 called by a client to inform us of a TCP connection was passed to a different
1432 "client" (typically with multichannel to another smbd process).
1434 int32_t ctdb_control_tcp_client_passed(struct ctdb_context *ctdb,
1435 uint32_t client_id,
1436 TDB_DATA indata)
1438 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1439 struct ctdb_connection *tcp_sock = NULL;
1440 int ret;
1441 char conn_str[132] = { 0, };
1442 bool found = false;
1444 tcp_sock = (struct ctdb_connection *)indata.dptr;
1446 ctdb_canonicalize_ip_inplace(&tcp_sock->src);
1447 ctdb_canonicalize_ip_inplace(&tcp_sock->dst);
1449 ret = ctdb_connection_to_buf(conn_str,
1450 sizeof(conn_str),
1451 tcp_sock,
1452 false,
1453 " -> ");
1454 if (ret != 0) {
1455 strlcpy(conn_str, "UNKNOWN", sizeof(conn_str));
1458 found = ctdb_client_remove_tcp(client, tcp_sock);
1459 if (!found) {
1460 DBG_DEBUG("TCP connection from %s not found "
1461 "(client_id %u pid %u).\n",
1462 conn_str, client_id, client->pid);
1463 return 0;
1466 D_INFO("TCP connection from %s "
1467 "(client_id %u pid %u) passed to another client\n",
1468 conn_str, client_id, client->pid);
1471 * We don't call CTDB_CONTROL_TCP_REMOVE
1472 * nor ctdb_remove_connection() as the connection
1473 * is still alive, but handled by another client
1476 return 0;
1480 find a tcp address on a list
1482 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1483 struct ctdb_connection *tcp)
1485 unsigned int i;
1487 if (array == NULL) {
1488 return NULL;
1491 for (i=0;i<array->num;i++) {
1492 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1493 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1494 return &array->connections[i];
1497 return NULL;
1503 called by a daemon to inform us of a TCP connection that one of its
1504 clients managing that should tickled with an ACK when IP takeover is
1505 done
1507 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1509 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1510 struct ctdb_tcp_array *tcparray;
1511 struct ctdb_connection tcp;
1512 struct ctdb_vnn *vnn;
1514 /* If we don't have public IPs, tickles are useless */
1515 if (ctdb->vnn == NULL) {
1516 return 0;
1519 vnn = find_public_ip_vnn(ctdb, &p->dst);
1520 if (vnn == NULL) {
1521 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1522 ctdb_addr_to_str(&p->dst)));
1524 return -1;
1528 tcparray = vnn->tcp_array;
1530 /* If this is the first tickle */
1531 if (tcparray == NULL) {
1532 tcparray = talloc(vnn, struct ctdb_tcp_array);
1533 CTDB_NO_MEMORY(ctdb, tcparray);
1534 vnn->tcp_array = tcparray;
1536 tcparray->num = 0;
1537 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1538 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1540 tcparray->connections[tcparray->num].src = p->src;
1541 tcparray->connections[tcparray->num].dst = p->dst;
1542 tcparray->num++;
1544 if (tcp_update_needed) {
1545 vnn->tcp_update_needed = true;
1547 return 0;
1551 /* Do we already have this tickle ?*/
1552 tcp.src = p->src;
1553 tcp.dst = p->dst;
1554 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1555 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1556 ctdb_addr_to_str(&tcp.dst),
1557 ntohs(tcp.dst.ip.sin_port),
1558 vnn->pnn));
1559 return 0;
1562 /* A new tickle, we must add it to the array */
1563 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1564 struct ctdb_connection,
1565 tcparray->num+1);
1566 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1568 tcparray->connections[tcparray->num].src = p->src;
1569 tcparray->connections[tcparray->num].dst = p->dst;
1570 tcparray->num++;
1572 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1573 ctdb_addr_to_str(&tcp.dst),
1574 ntohs(tcp.dst.ip.sin_port),
1575 vnn->pnn));
1577 if (tcp_update_needed) {
1578 vnn->tcp_update_needed = true;
1581 return 0;
1585 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1587 struct ctdb_connection *tcpp;
1589 if (vnn == NULL) {
1590 return;
1593 /* if the array is empty we can't remove it
1594 and we don't need to do anything
1596 if (vnn->tcp_array == NULL) {
1597 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1598 ctdb_addr_to_str(&conn->dst),
1599 ntohs(conn->dst.ip.sin_port)));
1600 return;
1604 /* See if we know this connection
1605 if we don't know this connection then we don't need to do anything
1607 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1608 if (tcpp == NULL) {
1609 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1610 ctdb_addr_to_str(&conn->dst),
1611 ntohs(conn->dst.ip.sin_port)));
1612 return;
1616 /* We need to remove this entry from the array.
1617 Instead of allocating a new array and copying data to it
1618 we cheat and just copy the last entry in the existing array
1619 to the entry that is to be removed and just shring the
1620 ->num field
1622 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1623 vnn->tcp_array->num--;
1625 /* If we deleted the last entry we also need to remove the entire array
1627 if (vnn->tcp_array->num == 0) {
1628 talloc_free(vnn->tcp_array);
1629 vnn->tcp_array = NULL;
1632 vnn->tcp_update_needed = true;
1634 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1635 ctdb_addr_to_str(&conn->src),
1636 ntohs(conn->src.ip.sin_port)));
1641 called by a daemon to inform us of a TCP connection that one of its
1642 clients used are no longer needed in the tickle database
1644 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1646 struct ctdb_vnn *vnn;
1647 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1649 /* If we don't have public IPs, tickles are useless */
1650 if (ctdb->vnn == NULL) {
1651 return 0;
1654 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1655 if (vnn == NULL) {
1656 DEBUG(DEBUG_ERR,
1657 (__location__ " unable to find public address %s\n",
1658 ctdb_addr_to_str(&conn->dst)));
1659 return 0;
1662 ctdb_remove_connection(vnn, conn);
1664 return 0;
1668 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1669 bool force);
1672 Called when another daemon starts - causes all tickles for all
1673 public addresses we are serving to be sent to the new node on the
1674 next check. This actually causes the tickles to be sent to the
1675 other node immediately. In case there is an error, the periodic
1676 timer will send the updates on timer event. This is simple and
1677 doesn't require careful error handling.
1679 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1681 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1682 (unsigned long) pnn));
1684 ctdb_send_set_tcp_tickles_for_all(ctdb, true);
1685 return 0;
1690 called when a client structure goes away - hook to remove
1691 elements from the tcp_list in all daemons
1693 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1695 while (client->tcp_list) {
1696 struct ctdb_vnn *vnn;
1697 struct ctdb_tcp_list *tcp = client->tcp_list;
1698 struct ctdb_connection *conn = &tcp->connection;
1700 vnn = find_public_ip_vnn(client->ctdb,
1701 &conn->dst);
1703 /* If the IP address is hosted on this node then
1704 * remove the connection. */
1705 if (vnn != NULL && vnn->pnn == client->ctdb->pnn) {
1706 ctdb_remove_connection(vnn, conn);
1709 /* Otherwise this function has been called because the
1710 * server IP address has been released to another node
1711 * and the client has exited. This means that we
1712 * should not delete the connection information. The
1713 * takeover node processes connections too. */
1716 * The destructor removes from the list
1718 TALLOC_FREE(tcp);
1723 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1725 struct ctdb_vnn *vnn, *next;
1726 int count = 0;
1728 if (ctdb_config.failover_disabled == 1) {
1729 return;
1732 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1733 /* vnn can be freed below in release_ip_post() */
1734 next = vnn->next;
1736 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1737 ctdb_vnn_unassign_iface(ctdb, vnn);
1738 continue;
1741 /* Don't allow multiple releases at once. Some code,
1742 * particularly ctdb_tickle_sentenced_connections() is
1743 * not re-entrant */
1744 if (vnn->update_in_flight) {
1745 DEBUG(DEBUG_WARNING,
1746 (__location__
1747 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1748 ctdb_addr_to_str(&vnn->public_address),
1749 vnn->public_netmask_bits,
1750 ctdb_vnn_iface_string(vnn)));
1751 continue;
1753 vnn->update_in_flight = true;
1755 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1756 ctdb_addr_to_str(&vnn->public_address),
1757 vnn->public_netmask_bits,
1758 ctdb_vnn_iface_string(vnn)));
1760 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1761 ctdb_vnn_iface_string(vnn),
1762 ctdb_addr_to_str(&vnn->public_address),
1763 vnn->public_netmask_bits);
1764 /* releaseip timeouts are converted to success, so to
1765 * detect failures just check if the IP address is
1766 * still there...
1768 if (ctdb_sys_have_ip(&vnn->public_address)) {
1769 DEBUG(DEBUG_ERR,
1770 (__location__
1771 " IP address %s not released\n",
1772 ctdb_addr_to_str(&vnn->public_address)));
1773 vnn->update_in_flight = false;
1774 continue;
1777 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1778 if (vnn != NULL) {
1779 vnn->update_in_flight = false;
1781 count++;
1784 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1789 get list of public IPs
1791 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1792 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1794 int i, num, len;
1795 struct ctdb_public_ip_list_old *ips;
1796 struct ctdb_vnn *vnn;
1797 bool only_available = false;
1799 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1800 only_available = true;
1803 /* count how many public ip structures we have */
1804 num = 0;
1805 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1806 num++;
1809 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1810 num*sizeof(struct ctdb_public_ip);
1811 ips = talloc_zero_size(outdata, len);
1812 CTDB_NO_MEMORY(ctdb, ips);
1814 i = 0;
1815 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1816 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1817 continue;
1819 ips->ips[i].pnn = vnn->pnn;
1820 ips->ips[i].addr = vnn->public_address;
1821 i++;
1823 ips->num = i;
1824 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1825 i*sizeof(struct ctdb_public_ip);
1827 outdata->dsize = len;
1828 outdata->dptr = (uint8_t *)ips;
1830 return 0;
1834 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1835 struct ctdb_req_control_old *c,
1836 TDB_DATA indata,
1837 TDB_DATA *outdata)
1839 int i, num, len;
1840 ctdb_sock_addr *addr;
1841 struct ctdb_public_ip_info_old *info;
1842 struct ctdb_vnn *vnn;
1843 struct vnn_interface *iface;
1845 addr = (ctdb_sock_addr *)indata.dptr;
1847 vnn = find_public_ip_vnn(ctdb, addr);
1848 if (vnn == NULL) {
1849 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1850 "'%s'not a public address\n",
1851 ctdb_addr_to_str(addr)));
1852 return -1;
1855 /* count how many public ip structures we have */
1856 num = 0;
1857 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1858 num++;
1861 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1862 num*sizeof(struct ctdb_iface);
1863 info = talloc_zero_size(outdata, len);
1864 CTDB_NO_MEMORY(ctdb, info);
1866 info->ip.addr = vnn->public_address;
1867 info->ip.pnn = vnn->pnn;
1868 info->active_idx = 0xFFFFFFFF;
1870 i = 0;
1871 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1872 struct ctdb_interface *cur;
1874 cur = iface->iface;
1875 if (vnn->iface == cur) {
1876 info->active_idx = i;
1878 strncpy(info->ifaces[i].name, cur->name,
1879 sizeof(info->ifaces[i].name));
1880 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1881 info->ifaces[i].link_state = cur->link_up;
1882 info->ifaces[i].references = cur->references;
1884 i++;
1886 info->num = i;
1887 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1888 i*sizeof(struct ctdb_iface);
1890 outdata->dsize = len;
1891 outdata->dptr = (uint8_t *)info;
1893 return 0;
1896 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1897 struct ctdb_req_control_old *c,
1898 TDB_DATA *outdata)
1900 int i, num, len;
1901 struct ctdb_iface_list_old *ifaces;
1902 struct ctdb_interface *cur;
1904 /* count how many public ip structures we have */
1905 num = 0;
1906 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1907 num++;
1910 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1911 num*sizeof(struct ctdb_iface);
1912 ifaces = talloc_zero_size(outdata, len);
1913 CTDB_NO_MEMORY(ctdb, ifaces);
1915 i = 0;
1916 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1917 strncpy(ifaces->ifaces[i].name, cur->name,
1918 sizeof(ifaces->ifaces[i].name));
1919 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1920 ifaces->ifaces[i].link_state = cur->link_up;
1921 ifaces->ifaces[i].references = cur->references;
1922 i++;
1924 ifaces->num = i;
1925 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1926 i*sizeof(struct ctdb_iface);
1928 outdata->dsize = len;
1929 outdata->dptr = (uint8_t *)ifaces;
1931 return 0;
1934 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1935 struct ctdb_req_control_old *c,
1936 TDB_DATA indata)
1938 struct ctdb_iface *info;
1939 struct ctdb_interface *iface;
1940 bool link_up = false;
1942 info = (struct ctdb_iface *)indata.dptr;
1944 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1945 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1946 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1947 len, len, info->name));
1948 return -1;
1951 switch (info->link_state) {
1952 case 0:
1953 link_up = false;
1954 break;
1955 case 1:
1956 link_up = true;
1957 break;
1958 default:
1959 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1960 (unsigned int)info->link_state));
1961 return -1;
1964 if (info->references != 0) {
1965 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1966 (unsigned int)info->references));
1967 return -1;
1970 iface = ctdb_find_iface(ctdb, info->name);
1971 if (iface == NULL) {
1972 return -1;
1975 if (link_up == iface->link_up) {
1976 return 0;
1979 DEBUG(DEBUG_ERR,
1980 ("iface[%s] has changed it's link status %s => %s\n",
1981 iface->name,
1982 iface->link_up?"up":"down",
1983 link_up?"up":"down"));
1985 iface->link_up = link_up;
1986 return 0;
1991 called by a daemon to inform us of the entire list of TCP tickles for
1992 a particular public address.
1993 this control should only be sent by the node that is currently serving
1994 that public address.
1996 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1998 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1999 struct ctdb_tcp_array *tcparray;
2000 struct ctdb_vnn *vnn;
2002 /* We must at least have tickles.num or else we can't verify the size
2003 of the received data blob
2005 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2006 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2007 return -1;
2010 /* verify that the size of data matches what we expect */
2011 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2012 + sizeof(struct ctdb_connection) * list->num) {
2013 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2014 return -1;
2017 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2018 ctdb_addr_to_str(&list->addr)));
2020 vnn = find_public_ip_vnn(ctdb, &list->addr);
2021 if (vnn == NULL) {
2022 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2023 ctdb_addr_to_str(&list->addr)));
2025 return 1;
2028 if (vnn->pnn == ctdb->pnn) {
2029 DEBUG(DEBUG_INFO,
2030 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2031 ctdb_addr_to_str(&list->addr)));
2032 return 0;
2035 /* remove any old ticklelist we might have */
2036 talloc_free(vnn->tcp_array);
2037 vnn->tcp_array = NULL;
2039 tcparray = talloc(vnn, struct ctdb_tcp_array);
2040 CTDB_NO_MEMORY(ctdb, tcparray);
2042 tcparray->num = list->num;
2044 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2045 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2047 memcpy(tcparray->connections, &list->connections[0],
2048 sizeof(struct ctdb_connection)*tcparray->num);
2050 /* We now have a new fresh tickle list array for this vnn */
2051 vnn->tcp_array = tcparray;
2053 return 0;
2057 called to return the full list of tickles for the puclic address associated
2058 with the provided vnn
2060 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2062 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2063 struct ctdb_tickle_list_old *list;
2064 struct ctdb_tcp_array *tcparray;
2065 unsigned int num, i;
2066 struct ctdb_vnn *vnn;
2067 unsigned port;
2069 vnn = find_public_ip_vnn(ctdb, addr);
2070 if (vnn == NULL) {
2071 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2072 ctdb_addr_to_str(addr)));
2074 return 1;
2077 port = ctdb_addr_to_port(addr);
2079 tcparray = vnn->tcp_array;
2080 num = 0;
2081 if (tcparray != NULL) {
2082 if (port == 0) {
2083 /* All connections */
2084 num = tcparray->num;
2085 } else {
2086 /* Count connections for port */
2087 for (i = 0; i < tcparray->num; i++) {
2088 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2089 num++;
2095 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2096 + sizeof(struct ctdb_connection) * num;
2098 outdata->dptr = talloc_size(outdata, outdata->dsize);
2099 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2100 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2102 list->addr = *addr;
2103 list->num = num;
2105 if (num == 0) {
2106 return 0;
2109 num = 0;
2110 for (i = 0; i < tcparray->num; i++) {
2111 if (port == 0 || \
2112 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2113 list->connections[num] = tcparray->connections[i];
2114 num++;
2118 return 0;
2123 set the list of all tcp tickles for a public address
2125 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2126 ctdb_sock_addr *addr,
2127 struct ctdb_tcp_array *tcparray)
2129 int ret, num;
2130 TDB_DATA data;
2131 struct ctdb_tickle_list_old *list;
2133 if (tcparray) {
2134 num = tcparray->num;
2135 } else {
2136 num = 0;
2139 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2140 sizeof(struct ctdb_connection) * num;
2141 data.dptr = talloc_size(ctdb, data.dsize);
2142 CTDB_NO_MEMORY(ctdb, data.dptr);
2144 list = (struct ctdb_tickle_list_old *)data.dptr;
2145 list->addr = *addr;
2146 list->num = num;
2147 if (tcparray) {
2148 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2151 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2152 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2153 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2154 if (ret != 0) {
2155 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2156 return -1;
2159 talloc_free(data.dptr);
2161 return ret;
2164 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
2165 bool force)
2167 struct ctdb_vnn *vnn;
2168 int ret;
2170 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2171 /* we only send out updates for public addresses that
2172 we have taken over
2174 if (ctdb->pnn != vnn->pnn) {
2175 continue;
2178 /* We only send out the updates if we need to */
2179 if (!force && !vnn->tcp_update_needed) {
2180 continue;
2183 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2184 &vnn->public_address,
2185 vnn->tcp_array);
2186 if (ret != 0) {
2187 D_ERR("Failed to send the tickle update for ip %s\n",
2188 ctdb_addr_to_str(&vnn->public_address));
2189 vnn->tcp_update_needed = true;
2190 } else {
2191 D_INFO("Sent tickle update for ip %s\n",
2192 ctdb_addr_to_str(&vnn->public_address));
2193 vnn->tcp_update_needed = false;
2200 perform tickle updates if required
2202 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2203 struct tevent_timer *te,
2204 struct timeval t, void *private_data)
2206 struct ctdb_context *ctdb = talloc_get_type(
2207 private_data, struct ctdb_context);
2209 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2211 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2212 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2213 ctdb_update_tcp_tickles, ctdb);
2217 start periodic update of tcp tickles
2219 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2221 ctdb->tickle_update_context = talloc_new(ctdb);
2223 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2224 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2225 ctdb_update_tcp_tickles, ctdb);
2231 struct control_gratious_arp {
2232 struct ctdb_context *ctdb;
2233 ctdb_sock_addr addr;
2234 const char *iface;
2235 int count;
2239 send a control_gratuitous arp
2241 static void send_gratious_arp(struct tevent_context *ev,
2242 struct tevent_timer *te,
2243 struct timeval t, void *private_data)
2245 int ret;
2246 struct control_gratious_arp *arp = talloc_get_type(private_data,
2247 struct control_gratious_arp);
2249 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2250 if (ret != 0) {
2251 DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n",
2252 arp->iface, strerror(ret));
2256 arp->count++;
2257 if (arp->count == CTDB_ARP_REPEAT) {
2258 talloc_free(arp);
2259 return;
2262 tevent_add_timer(arp->ctdb->ev, arp,
2263 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2264 send_gratious_arp, arp);
2269 send a gratious arp
2271 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2273 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2274 struct control_gratious_arp *arp;
2276 /* verify the size of indata */
2277 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2278 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2279 (unsigned)indata.dsize,
2280 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2281 return -1;
2283 if (indata.dsize !=
2284 ( offsetof(struct ctdb_addr_info_old, iface)
2285 + gratious_arp->len ) ){
2287 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2288 "but should be %u bytes\n",
2289 (unsigned)indata.dsize,
2290 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2291 return -1;
2295 arp = talloc(ctdb, struct control_gratious_arp);
2296 CTDB_NO_MEMORY(ctdb, arp);
2298 arp->ctdb = ctdb;
2299 arp->addr = gratious_arp->addr;
2300 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2301 CTDB_NO_MEMORY(ctdb, arp->iface);
2302 arp->count = 0;
2304 tevent_add_timer(arp->ctdb->ev, arp,
2305 timeval_zero(), send_gratious_arp, arp);
2307 return 0;
2310 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2312 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2313 int ret;
2315 /* verify the size of indata */
2316 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2317 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2318 return -1;
2320 if (indata.dsize !=
2321 ( offsetof(struct ctdb_addr_info_old, iface)
2322 + pub->len ) ){
2324 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2325 "but should be %u bytes\n",
2326 (unsigned)indata.dsize,
2327 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2328 return -1;
2331 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2333 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2335 if (ret != 0) {
2336 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2337 return -1;
2340 return 0;
2343 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2345 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2346 struct ctdb_vnn *vnn;
2348 /* verify the size of indata */
2349 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2350 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2351 return -1;
2353 if (indata.dsize !=
2354 ( offsetof(struct ctdb_addr_info_old, iface)
2355 + pub->len ) ){
2357 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2358 "but should be %u bytes\n",
2359 (unsigned)indata.dsize,
2360 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2361 return -1;
2364 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2366 /* walk over all public addresses until we find a match */
2367 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2368 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2369 if (vnn->pnn == ctdb->pnn) {
2370 /* This IP is currently being hosted.
2371 * Defer the deletion until the next
2372 * takeover run. "ctdb reloadips" will
2373 * always cause a takeover run. "ctdb
2374 * delip" will now need an explicit
2375 * "ctdb ipreallocated" afterwards. */
2376 vnn->delete_pending = true;
2377 } else {
2378 /* This IP is not hosted on the
2379 * current node so just delete it
2380 * now. */
2381 do_delete_ip(ctdb, vnn);
2384 return 0;
2388 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2389 ctdb_addr_to_str(&pub->addr)));
2390 return -1;
2394 struct ipreallocated_callback_state {
2395 struct ctdb_req_control_old *c;
2398 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2399 int status, void *p)
2401 struct ipreallocated_callback_state *state =
2402 talloc_get_type(p, struct ipreallocated_callback_state);
2403 TDB_DATA data = { .dsize = 0, };
2405 if (status != 0) {
2406 DEBUG(DEBUG_ERR,
2407 (" \"ipreallocated\" event script failed (status %d)\n",
2408 status));
2409 if (status == -ETIMEDOUT) {
2410 ctdb_ban_self(ctdb);
2414 D_INFO("Sending IPREALLOCATED message\n");
2415 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_IPREALLOCATED, data);
2417 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2418 talloc_free(state);
2421 /* A control to run the ipreallocated event */
2422 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2423 struct ctdb_req_control_old *c,
2424 bool *async_reply)
2426 int ret;
2427 struct ipreallocated_callback_state *state;
2429 state = talloc(ctdb, struct ipreallocated_callback_state);
2430 CTDB_NO_MEMORY(ctdb, state);
2432 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2434 ret = ctdb_event_script_callback(ctdb, state,
2435 ctdb_ipreallocated_callback, state,
2436 CTDB_EVENT_IPREALLOCATED,
2437 "%s", "");
2439 if (ret != 0) {
2440 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2441 talloc_free(state);
2442 return -1;
2445 /* tell the control that we will be reply asynchronously */
2446 state->c = talloc_steal(state, c);
2447 *async_reply = true;
2449 return 0;
2453 struct start_ipreallocate_callback_state {
2454 struct ctdb_req_control_old *c;
2457 static void ctdb_start_ipreallocate_callback(struct ctdb_context *ctdb,
2458 int status, void *p)
2460 struct start_ipreallocate_callback_state *state = talloc_get_type_abort(
2461 p, struct start_ipreallocate_callback_state);
2462 TDB_DATA data = { .dsize = 0, };
2464 if (status != 0) {
2465 D_ERR("\"startipreallocate\" event failed (status %d)\n",
2466 status);
2467 if (status == -ETIMEDOUT) {
2468 ctdb_ban_self(ctdb);
2472 D_INFO("Sending START_IPREALLOCATE message\n");
2473 ctdb_daemon_send_message(ctdb,
2474 ctdb->pnn,
2475 CTDB_SRVID_START_IPREALLOCATE,
2476 data);
2478 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2479 talloc_free(state);
2482 /* A control to run the startipreallocate event */
2483 int32_t ctdb_control_start_ipreallocate(struct ctdb_context *ctdb,
2484 struct ctdb_req_control_old *c,
2485 bool *async_reply)
2487 int ret;
2488 struct start_ipreallocate_callback_state *state;
2490 /* Nodes that are not RUNNING can not host IPs */
2491 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
2492 DBG_INFO("Skipping \"startipreallocate\" event, not RUNNING\n");
2493 return 0;
2496 state = talloc(ctdb, struct start_ipreallocate_callback_state);
2497 if (state == NULL) {
2498 DBG_ERR("Memory allocation error\n");
2499 return -1;
2502 DBG_INFO("Running \"startipreallocate\" event\n");
2504 ret = ctdb_event_script_callback(ctdb,
2505 state,
2506 ctdb_start_ipreallocate_callback,
2507 state,
2508 CTDB_EVENT_START_IPREALLOCATE,
2509 "%s",
2510 "");
2512 if (ret != 0) {
2513 D_ERR("Failed to run \"startipreallocate\" event \n");
2514 talloc_free(state);
2515 return -1;
2518 /* tell the control that we will be reply asynchronously */
2519 state->c = talloc_steal(state, c);
2520 *async_reply = true;
2522 return 0;
2526 struct ctdb_reloadips_handle {
2527 struct ctdb_context *ctdb;
2528 struct ctdb_req_control_old *c;
2529 int status;
2530 int fd[2];
2531 pid_t child;
2532 struct tevent_fd *fde;
2535 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2537 if (h == h->ctdb->reload_ips) {
2538 h->ctdb->reload_ips = NULL;
2540 if (h->c != NULL) {
2541 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2542 h->c = NULL;
2544 ctdb_kill(h->ctdb, h->child, SIGKILL);
2545 return 0;
2548 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2549 struct tevent_timer *te,
2550 struct timeval t, void *private_data)
2552 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2554 talloc_free(h);
2557 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2558 struct tevent_fd *fde,
2559 uint16_t flags, void *private_data)
2561 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2563 char res;
2564 int ret;
2566 ret = sys_read(h->fd[0], &res, 1);
2567 if (ret < 1 || res != 0) {
2568 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2569 res = 1;
2571 h->status = res;
2573 talloc_free(h);
2576 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2578 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2579 struct ctdb_public_ip_list_old *ips;
2580 struct ctdb_vnn *vnn;
2581 struct client_async_data *async_data;
2582 struct timeval timeout;
2583 TDB_DATA data;
2584 struct ctdb_client_control_state *state;
2585 bool first_add;
2586 unsigned int i;
2587 int ret;
2589 CTDB_NO_MEMORY(ctdb, mem_ctx);
2591 /* Read IPs from local node */
2592 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2593 CTDB_CURRENT_NODE, mem_ctx, &ips);
2594 if (ret != 0) {
2595 DEBUG(DEBUG_ERR,
2596 ("Unable to fetch public IPs from local node\n"));
2597 talloc_free(mem_ctx);
2598 return -1;
2601 /* Read IPs file - this is safe since this is a child process */
2602 ctdb->vnn = NULL;
2603 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2604 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2605 talloc_free(mem_ctx);
2606 return -1;
2609 async_data = talloc_zero(mem_ctx, struct client_async_data);
2610 CTDB_NO_MEMORY(ctdb, async_data);
2612 /* Compare IPs between node and file for IPs to be deleted */
2613 for (i = 0; i < ips->num; i++) {
2614 /* */
2615 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2616 if (ctdb_same_ip(&vnn->public_address,
2617 &ips->ips[i].addr)) {
2618 /* IP is still in file */
2619 break;
2623 if (vnn == NULL) {
2624 /* Delete IP ips->ips[i] */
2625 struct ctdb_addr_info_old *pub;
2627 DEBUG(DEBUG_NOTICE,
2628 ("IP %s no longer configured, deleting it\n",
2629 ctdb_addr_to_str(&ips->ips[i].addr)));
2631 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2632 CTDB_NO_MEMORY(ctdb, pub);
2634 pub->addr = ips->ips[i].addr;
2635 pub->mask = 0;
2636 pub->len = 0;
2638 timeout = TAKEOVER_TIMEOUT();
2640 data.dsize = offsetof(struct ctdb_addr_info_old,
2641 iface) + pub->len;
2642 data.dptr = (uint8_t *)pub;
2644 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2645 CTDB_CONTROL_DEL_PUBLIC_IP,
2646 0, data, async_data,
2647 &timeout, NULL);
2648 if (state == NULL) {
2649 DEBUG(DEBUG_ERR,
2650 (__location__
2651 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2652 goto failed;
2655 ctdb_client_async_add(async_data, state);
2659 /* Compare IPs between node and file for IPs to be added */
2660 first_add = true;
2661 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2662 for (i = 0; i < ips->num; i++) {
2663 if (ctdb_same_ip(&vnn->public_address,
2664 &ips->ips[i].addr)) {
2665 /* IP already on node */
2666 break;
2669 if (i == ips->num) {
2670 /* Add IP ips->ips[i] */
2671 struct ctdb_addr_info_old *pub;
2672 const char *ifaces = NULL;
2673 uint32_t len;
2674 struct vnn_interface *iface = NULL;
2676 DEBUG(DEBUG_NOTICE,
2677 ("New IP %s configured, adding it\n",
2678 ctdb_addr_to_str(&vnn->public_address)));
2679 if (first_add) {
2680 uint32_t pnn = ctdb_get_pnn(ctdb);
2682 data.dsize = sizeof(pnn);
2683 data.dptr = (uint8_t *)&pnn;
2685 ret = ctdb_client_send_message(
2686 ctdb,
2687 CTDB_BROADCAST_CONNECTED,
2688 CTDB_SRVID_REBALANCE_NODE,
2689 data);
2690 if (ret != 0) {
2691 DEBUG(DEBUG_WARNING,
2692 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2695 first_add = false;
2698 ifaces = vnn->ifaces->iface->name;
2699 iface = vnn->ifaces->next;
2700 while (iface != NULL) {
2701 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2702 iface->iface->name);
2703 iface = iface->next;
2706 len = strlen(ifaces) + 1;
2707 pub = talloc_zero_size(mem_ctx,
2708 offsetof(struct ctdb_addr_info_old, iface) + len);
2709 CTDB_NO_MEMORY(ctdb, pub);
2711 pub->addr = vnn->public_address;
2712 pub->mask = vnn->public_netmask_bits;
2713 pub->len = len;
2714 memcpy(&pub->iface[0], ifaces, pub->len);
2716 timeout = TAKEOVER_TIMEOUT();
2718 data.dsize = offsetof(struct ctdb_addr_info_old,
2719 iface) + pub->len;
2720 data.dptr = (uint8_t *)pub;
2722 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2723 CTDB_CONTROL_ADD_PUBLIC_IP,
2724 0, data, async_data,
2725 &timeout, NULL);
2726 if (state == NULL) {
2727 DEBUG(DEBUG_ERR,
2728 (__location__
2729 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2730 goto failed;
2733 ctdb_client_async_add(async_data, state);
2737 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2738 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2739 goto failed;
2742 talloc_free(mem_ctx);
2743 return 0;
2745 failed:
2746 talloc_free(mem_ctx);
2747 return -1;
2750 /* This control is sent to force the node to re-read the public addresses file
2751 and drop any addresses we should nnot longer host, and add new addresses
2752 that we are now able to host
2754 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2756 struct ctdb_reloadips_handle *h;
2757 pid_t parent = getpid();
2759 if (ctdb->reload_ips != NULL) {
2760 talloc_free(ctdb->reload_ips);
2761 ctdb->reload_ips = NULL;
2764 h = talloc(ctdb, struct ctdb_reloadips_handle);
2765 CTDB_NO_MEMORY(ctdb, h);
2766 h->ctdb = ctdb;
2767 h->c = NULL;
2768 h->status = -1;
2770 if (pipe(h->fd) == -1) {
2771 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2772 talloc_free(h);
2773 return -1;
2776 h->child = ctdb_fork(ctdb);
2777 if (h->child == (pid_t)-1) {
2778 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2779 close(h->fd[0]);
2780 close(h->fd[1]);
2781 talloc_free(h);
2782 return -1;
2785 /* child process */
2786 if (h->child == 0) {
2787 signed char res = 0;
2789 close(h->fd[0]);
2791 prctl_set_comment("ctdb_reloadips");
2792 if (switch_from_server_to_client(ctdb) != 0) {
2793 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2794 res = -1;
2795 } else {
2796 res = ctdb_reloadips_child(ctdb);
2797 if (res != 0) {
2798 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2802 sys_write(h->fd[1], &res, 1);
2803 ctdb_wait_for_process_to_exit(parent);
2804 _exit(0);
2807 h->c = talloc_steal(h, c);
2809 close(h->fd[1]);
2810 set_close_on_exec(h->fd[0]);
2812 talloc_set_destructor(h, ctdb_reloadips_destructor);
2815 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2816 ctdb_reloadips_child_handler, (void *)h);
2817 tevent_fd_set_auto_close(h->fde);
2819 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2820 ctdb_reloadips_timeout_event, h);
2822 /* we reply later */
2823 *async_reply = true;
2824 return 0;