s3:utils: Fix 'Usage:' for 'net ads enctypes'
[samba4-gss.git] / ctdb / server / ctdb_daemon.c
blob46bc324ae8795dd5488007ddd4accf038fcd13f9
1 /*
2 ctdb daemon code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "system/time.h"
26 #include <talloc.h>
27 /* Allow use of deprecated function tevent_loop_allow_nesting() */
28 #define TEVENT_DEPRECATED
29 #include <tevent.h>
30 #include <tdb.h>
32 #include "lib/tdb_wrap/tdb_wrap.h"
33 #include "lib/util/dlinklist.h"
34 #include "lib/util/debug.h"
35 #include "lib/util/time.h"
36 #include "lib/util/blocking.h"
37 #include "lib/util/become_daemon.h"
39 #include "version.h"
40 #include "ctdb_private.h"
41 #include "ctdb_client.h"
43 #include "protocol/protocol.h"
44 #include "protocol/protocol_api.h"
46 #include "common/rb_tree.h"
47 #include "common/reqid.h"
48 #include "common/system.h"
49 #include "common/common.h"
50 #include "common/logging.h"
51 #include "common/pidfile.h"
52 #include "common/sock_io.h"
54 #include "conf/node.h"
56 struct ctdb_client_pid_list {
57 struct ctdb_client_pid_list *next, *prev;
58 struct ctdb_context *ctdb;
59 pid_t pid;
60 struct ctdb_client *client;
63 const char *ctdbd_pidfile = NULL;
64 static struct pidfile_context *ctdbd_pidfile_ctx = NULL;
66 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
68 static pid_t __ctdbd_pid;
70 static void print_exit_message(void)
72 if (getpid() == __ctdbd_pid) {
73 DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
75 /* Wait a second to allow pending log messages to be flushed */
76 sleep(1);
80 #ifdef HAVE_GETRUSAGE
82 struct cpu_check_threshold_data {
83 unsigned short percent;
84 struct timeval timeofday;
85 struct timeval ru_time;
88 static void ctdb_cpu_check_threshold(struct tevent_context *ev,
89 struct tevent_timer *te,
90 struct timeval tv,
91 void *private_data)
93 struct ctdb_context *ctdb = talloc_get_type_abort(
94 private_data, struct ctdb_context);
95 uint32_t interval = 60;
97 static unsigned short threshold = 0;
98 static struct cpu_check_threshold_data prev = {
99 .percent = 0,
100 .timeofday = { .tv_sec = 0 },
101 .ru_time = { .tv_sec = 0 },
104 struct rusage usage;
105 struct cpu_check_threshold_data curr = {
106 .percent = 0,
108 int64_t ru_time_diff, timeofday_diff;
109 bool first;
110 int ret;
113 * Cache the threshold so that we don't waste time checking
114 * the environment variable every time
116 if (threshold == 0) {
117 const char *t;
119 threshold = 90;
121 t = getenv("CTDB_TEST_CPU_USAGE_THRESHOLD");
122 if (t != NULL) {
123 int th;
125 th = atoi(t);
126 if (th <= 0 || th > 100) {
127 DBG_WARNING("Failed to parse env var: %s\n", t);
128 } else {
129 threshold = th;
134 ret = getrusage(RUSAGE_SELF, &usage);
135 if (ret != 0) {
136 DBG_WARNING("rusage() failed: %d\n", ret);
137 goto next;
140 /* Sum the system and user CPU usage */
141 curr.ru_time = timeval_sum(&usage.ru_utime, &usage.ru_stime);
143 curr.timeofday = tv;
145 first = timeval_is_zero(&prev.timeofday);
146 if (first) {
147 /* No previous values recorded so no calculation to do */
148 goto done;
151 timeofday_diff = usec_time_diff(&curr.timeofday, &prev.timeofday);
152 if (timeofday_diff <= 0) {
154 * Time went backwards or didn't progress so no (sane)
155 * calculation can be done
157 goto done;
160 ru_time_diff = usec_time_diff(&curr.ru_time, &prev.ru_time);
162 curr.percent = ru_time_diff * 100 / timeofday_diff;
164 if (curr.percent >= threshold) {
165 /* Log only if the utilisation changes */
166 if (curr.percent != prev.percent) {
167 D_WARNING("WARNING: CPU utilisation %hu%% >= "
168 "threshold (%hu%%)\n",
169 curr.percent,
170 threshold);
172 } else {
173 /* Log if the utilisation falls below the threshold */
174 if (prev.percent >= threshold) {
175 D_WARNING("WARNING: CPU utilisation %hu%% < "
176 "threshold (%hu%%)\n",
177 curr.percent,
178 threshold);
182 done:
183 prev = curr;
185 next:
186 tevent_add_timer(ctdb->ev, ctdb,
187 timeval_current_ofs(interval, 0),
188 ctdb_cpu_check_threshold,
189 ctdb);
192 static void ctdb_start_cpu_check_threshold(struct ctdb_context *ctdb)
194 tevent_add_timer(ctdb->ev, ctdb,
195 timeval_current(),
196 ctdb_cpu_check_threshold,
197 ctdb);
199 #endif /* HAVE_GETRUSAGE */
201 static void ctdb_time_tick(struct tevent_context *ev, struct tevent_timer *te,
202 struct timeval t, void *private_data)
204 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
206 if (getpid() != ctdb->ctdbd_pid) {
207 return;
210 tevent_add_timer(ctdb->ev, ctdb,
211 timeval_current_ofs(1, 0),
212 ctdb_time_tick, ctdb);
215 /* Used to trigger a dummy event once per second, to make
216 * detection of hangs more reliable.
218 static void ctdb_start_time_tickd(struct ctdb_context *ctdb)
220 tevent_add_timer(ctdb->ev, ctdb,
221 timeval_current_ofs(1, 0),
222 ctdb_time_tick, ctdb);
225 static void ctdb_start_periodic_events(struct ctdb_context *ctdb)
227 /* start monitoring for connected/disconnected nodes */
228 ctdb_start_keepalive(ctdb);
230 /* start periodic update of tcp tickle lists */
231 ctdb_start_tcp_tickle_update(ctdb);
233 /* start listening for recovery daemon pings */
234 ctdb_control_recd_ping(ctdb);
236 /* start listening to timer ticks */
237 ctdb_start_time_tickd(ctdb);
239 #ifdef HAVE_GETRUSAGE
240 ctdb_start_cpu_check_threshold(ctdb);
241 #endif /* HAVE_GETRUSAGE */
244 static void ignore_signal(int signum)
246 struct sigaction act;
248 memset(&act, 0, sizeof(act));
250 act.sa_handler = SIG_IGN;
251 sigemptyset(&act.sa_mask);
252 sigaddset(&act.sa_mask, signum);
253 sigaction(signum, &act, NULL);
258 send a packet to a client
260 static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
262 CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent);
263 if (hdr->operation == CTDB_REQ_MESSAGE) {
264 if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) {
265 DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n"));
266 talloc_free(client);
267 return -1;
270 return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length);
274 message handler for when we are in daemon mode. This redirects the message
275 to the right client
277 static void daemon_message_handler(uint64_t srvid, TDB_DATA data,
278 void *private_data)
280 struct ctdb_client *client = talloc_get_type(private_data, struct ctdb_client);
281 struct ctdb_req_message_old *r;
282 int len;
284 /* construct a message to send to the client containing the data */
285 len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
286 r = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_MESSAGE,
287 len, struct ctdb_req_message_old);
288 CTDB_NO_MEMORY_VOID(client->ctdb, r);
290 talloc_set_name_const(r, "req_message packet");
292 r->srvid = srvid;
293 r->datalen = data.dsize;
294 memcpy(&r->data[0], data.dptr, data.dsize);
296 daemon_queue_send(client, &r->hdr);
298 talloc_free(r);
302 this is called when the ctdb daemon received a ctdb request to
303 set the srvid from the client
305 int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
307 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
308 int res;
309 if (client == NULL) {
310 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_register_message_handler\n"));
311 return -1;
313 res = srvid_register(ctdb->srv, client, srvid, daemon_message_handler,
314 client);
315 if (res != 0) {
316 DEBUG(DEBUG_ERR,(__location__ " Failed to register handler %llu in daemon\n",
317 (unsigned long long)srvid));
318 } else {
319 DEBUG(DEBUG_INFO,(__location__ " Registered message handler for srvid=%llu\n",
320 (unsigned long long)srvid));
323 return res;
327 this is called when the ctdb daemon received a ctdb request to
328 remove a srvid from the client
330 int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
332 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
333 if (client == NULL) {
334 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_deregister_message_handler\n"));
335 return -1;
337 return srvid_deregister(ctdb->srv, srvid, client);
340 void daemon_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
341 void *private_data)
343 struct ctdb_client *client =
344 talloc_get_type_abort(private_data, struct ctdb_client);
345 struct ctdb_req_tunnel_old *c, *pkt;
346 size_t len;
348 pkt = (struct ctdb_req_tunnel_old *)data.dptr;
350 len = offsetof(struct ctdb_req_tunnel_old, data) + pkt->datalen;
351 c = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_TUNNEL,
352 len, struct ctdb_req_tunnel_old);
353 if (c == NULL) {
354 DEBUG(DEBUG_ERR, ("Memory error in daemon_tunnel_handler\n"));
355 return;
358 talloc_set_name_const(c, "req_tunnel packet");
360 c->tunnel_id = tunnel_id;
361 c->flags = pkt->flags;
362 c->datalen = pkt->datalen;
363 memcpy(c->data, pkt->data, pkt->datalen);
365 daemon_queue_send(client, &c->hdr);
367 talloc_free(c);
371 destroy a ctdb_client
373 static int ctdb_client_destructor(struct ctdb_client *client)
375 struct ctdb_db_context *ctdb_db;
377 ctdb_takeover_client_destructor_hook(client);
378 reqid_remove(client->ctdb->idr, client->client_id);
379 client->ctdb->num_clients--;
381 if (client->num_persistent_updates != 0) {
382 DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
383 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
385 ctdb_db = find_ctdb_db(client->ctdb, client->db_id);
386 if (ctdb_db) {
387 DEBUG(DEBUG_ERR, (__location__ " client exit while transaction "
388 "commit active. Forcing recovery.\n"));
389 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
392 * trans3 transaction state:
394 * The destructor sets the pointer to NULL.
396 talloc_free(ctdb_db->persistent_state);
399 return 0;
404 this is called when the ctdb daemon received a ctdb request message
405 from a local client over the unix domain socket
407 static void daemon_request_message_from_client(struct ctdb_client *client,
408 struct ctdb_req_message_old *c)
410 TDB_DATA data;
411 int res;
413 if (c->hdr.destnode == CTDB_CURRENT_NODE) {
414 c->hdr.destnode = ctdb_get_pnn(client->ctdb);
417 /* maybe the message is for another client on this node */
418 if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) {
419 ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
420 return;
423 /* its for a remote node */
424 data.dptr = &c->data[0];
425 data.dsize = c->datalen;
426 res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
427 c->srvid, data);
428 if (res != 0) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n",
430 c->hdr.destnode));
435 struct daemon_call_state {
436 struct ctdb_client *client;
437 uint32_t reqid;
438 struct ctdb_call *call;
439 struct timeval start_time;
441 /* readonly request ? */
442 uint32_t readonly_fetch;
443 uint32_t client_callid;
447 complete a call from a client
449 static void daemon_call_from_client_callback(struct ctdb_call_state *state)
451 struct daemon_call_state *dstate = talloc_get_type(state->async.private_data,
452 struct daemon_call_state);
453 struct ctdb_reply_call_old *r;
454 int res;
455 uint32_t length;
456 struct ctdb_client *client = dstate->client;
457 struct ctdb_db_context *ctdb_db = state->ctdb_db;
459 talloc_steal(client, dstate);
460 talloc_steal(dstate, dstate->call);
462 res = ctdb_daemon_call_recv(state, dstate->call);
463 if (res != 0) {
464 DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
465 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
467 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time);
468 return;
471 length = offsetof(struct ctdb_reply_call_old, data) + dstate->call->reply_data.dsize;
472 /* If the client asked for readonly FETCH, we remapped this to
473 FETCH_WITH_HEADER when calling the daemon. So we must
474 strip the extra header off the reply data before passing
475 it back to the client.
477 if (dstate->readonly_fetch
478 && dstate->client_callid == CTDB_FETCH_FUNC) {
479 length -= sizeof(struct ctdb_ltdb_header);
482 r = ctdbd_allocate_pkt(client->ctdb, dstate, CTDB_REPLY_CALL,
483 length, struct ctdb_reply_call_old);
484 if (r == NULL) {
485 DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
486 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
487 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time);
488 return;
490 r->hdr.reqid = dstate->reqid;
491 r->status = dstate->call->status;
493 if (dstate->readonly_fetch
494 && dstate->client_callid == CTDB_FETCH_FUNC) {
495 /* client only asked for a FETCH so we must strip off
496 the extra ctdb_ltdb header
498 r->datalen = dstate->call->reply_data.dsize - sizeof(struct ctdb_ltdb_header);
499 memcpy(&r->data[0], dstate->call->reply_data.dptr + sizeof(struct ctdb_ltdb_header), r->datalen);
500 } else {
501 r->datalen = dstate->call->reply_data.dsize;
502 memcpy(&r->data[0], dstate->call->reply_data.dptr, r->datalen);
505 res = daemon_queue_send(client, &r->hdr);
506 if (res == -1) {
507 /* client is dead - return immediately */
508 return;
510 if (res != 0) {
511 DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
513 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time);
514 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
515 talloc_free(dstate);
518 struct ctdb_daemon_packet_wrap {
519 struct ctdb_context *ctdb;
520 uint32_t client_id;
524 a wrapper to catch disconnected clients
526 static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr)
528 struct ctdb_client *client;
529 struct ctdb_daemon_packet_wrap *w = talloc_get_type(p,
530 struct ctdb_daemon_packet_wrap);
531 if (w == NULL) {
532 DEBUG(DEBUG_CRIT,(__location__ " Bad packet type '%s'\n", talloc_get_name(p)));
533 return;
536 client = reqid_find(w->ctdb->idr, w->client_id, struct ctdb_client);
537 if (client == NULL) {
538 DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
539 w->client_id));
540 talloc_free(w);
541 return;
543 talloc_free(w);
545 /* process it */
546 daemon_incoming_packet(client, hdr);
549 struct ctdb_deferred_fetch_call {
550 struct ctdb_deferred_fetch_call *next, *prev;
551 struct ctdb_req_call_old *c;
552 struct ctdb_daemon_packet_wrap *w;
555 struct ctdb_deferred_fetch_queue {
556 struct ctdb_deferred_fetch_call *deferred_calls;
559 struct ctdb_deferred_requeue {
560 struct ctdb_deferred_fetch_call *dfc;
561 struct ctdb_client *client;
564 /* called from a timer event and starts reprocessing the deferred call.*/
565 static void reprocess_deferred_call(struct tevent_context *ev,
566 struct tevent_timer *te,
567 struct timeval t, void *private_data)
569 struct ctdb_deferred_requeue *dfr = (struct ctdb_deferred_requeue *)private_data;
570 struct ctdb_client *client = dfr->client;
572 talloc_steal(client, dfr->dfc->c);
573 daemon_incoming_packet(client, (struct ctdb_req_header *)dfr->dfc->c);
574 talloc_free(dfr);
577 /* the referral context is destroyed either after a timeout or when the initial
578 fetch-lock has finished.
579 at this stage, immediately start reprocessing the queued up deferred
580 calls so they get reprocessed immediately (and since we are dmaster at
581 this stage, trigger the waiting smbd processes to pick up and acquire the
582 record right away.
584 static int deferred_fetch_queue_destructor(struct ctdb_deferred_fetch_queue *dfq)
587 /* need to reprocess the packets from the queue explicitly instead of
588 just using a normal destructor since we need to
589 call the clients in the same order as the requests queued up
591 while (dfq->deferred_calls != NULL) {
592 struct ctdb_client *client;
593 struct ctdb_deferred_fetch_call *dfc = dfq->deferred_calls;
594 struct ctdb_deferred_requeue *dfr;
596 DLIST_REMOVE(dfq->deferred_calls, dfc);
598 client = reqid_find(dfc->w->ctdb->idr, dfc->w->client_id, struct ctdb_client);
599 if (client == NULL) {
600 DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
601 dfc->w->client_id));
602 continue;
605 /* process it by pushing it back onto the eventloop */
606 dfr = talloc(client, struct ctdb_deferred_requeue);
607 if (dfr == NULL) {
608 DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch requeue structure\n"));
609 continue;
612 dfr->dfc = talloc_steal(dfr, dfc);
613 dfr->client = client;
615 tevent_add_timer(dfc->w->ctdb->ev, client, timeval_zero(),
616 reprocess_deferred_call, dfr);
619 return 0;
622 /* insert the new deferral context into the rb tree.
623 there should never be a pre-existing context here, but check for it
624 warn and destroy the previous context if there is already a deferral context
625 for this key.
627 static void *insert_dfq_callback(void *parm, void *data)
629 if (data) {
630 DEBUG(DEBUG_ERR,("Already have DFQ registered. Free old %p and create new %p\n", data, parm));
631 talloc_free(data);
633 return parm;
636 /* if the original fetch-lock did not complete within a reasonable time,
637 free the context and context for all deferred requests to cause them to be
638 re-inserted into the event system.
640 static void dfq_timeout(struct tevent_context *ev, struct tevent_timer *te,
641 struct timeval t, void *private_data)
643 talloc_free(private_data);
646 /* This function is used in the local daemon to register a KEY in a database
647 for being "fetched"
648 While the remote fetch is in-flight, any further attempts to re-fetch the
649 same record will be deferred until the fetch completes.
651 static int setup_deferred_fetch_locks(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
653 uint32_t *k;
654 struct ctdb_deferred_fetch_queue *dfq;
656 k = ctdb_key_to_idkey(call, call->key);
657 if (k == NULL) {
658 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
659 return -1;
662 dfq = talloc(call, struct ctdb_deferred_fetch_queue);
663 if (dfq == NULL) {
664 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch queue structure\n"));
665 talloc_free(k);
666 return -1;
668 dfq->deferred_calls = NULL;
670 trbt_insertarray32_callback(ctdb_db->deferred_fetch, k[0], &k[0], insert_dfq_callback, dfq);
672 talloc_set_destructor(dfq, deferred_fetch_queue_destructor);
674 /* If the fetch hasn't completed in 30 seconds, just tear it all down
675 and let it try again as the events are reissued */
676 tevent_add_timer(ctdb_db->ctdb->ev, dfq, timeval_current_ofs(30, 0),
677 dfq_timeout, dfq);
679 talloc_free(k);
680 return 0;
683 /* check if this is a duplicate request to a fetch already in-flight
684 if it is, make this call deferred to be reprocessed later when
685 the in-flight fetch completes.
687 static int requeue_duplicate_fetch(struct ctdb_db_context *ctdb_db, struct ctdb_client *client, TDB_DATA key, struct ctdb_req_call_old *c)
689 uint32_t *k;
690 struct ctdb_deferred_fetch_queue *dfq;
691 struct ctdb_deferred_fetch_call *dfc;
693 k = ctdb_key_to_idkey(c, key);
694 if (k == NULL) {
695 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
696 return -1;
699 dfq = trbt_lookuparray32(ctdb_db->deferred_fetch, k[0], &k[0]);
700 if (dfq == NULL) {
701 talloc_free(k);
702 return -1;
706 talloc_free(k);
708 dfc = talloc(dfq, struct ctdb_deferred_fetch_call);
709 if (dfc == NULL) {
710 DEBUG(DEBUG_ERR, ("Failed to allocate deferred fetch call structure\n"));
711 return -1;
714 dfc->w = talloc(dfc, struct ctdb_daemon_packet_wrap);
715 if (dfc->w == NULL) {
716 DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch daemon packet wrap structure\n"));
717 talloc_free(dfc);
718 return -1;
721 dfc->c = talloc_steal(dfc, c);
722 dfc->w->ctdb = ctdb_db->ctdb;
723 dfc->w->client_id = client->client_id;
725 DLIST_ADD_END(dfq->deferred_calls, dfc);
727 return 0;
732 this is called when the ctdb daemon received a ctdb request call
733 from a local client over the unix domain socket
735 static void daemon_request_call_from_client(struct ctdb_client *client,
736 struct ctdb_req_call_old *c)
738 struct ctdb_call_state *state;
739 struct ctdb_db_context *ctdb_db;
740 struct daemon_call_state *dstate;
741 struct ctdb_call *call;
742 struct ctdb_ltdb_header header;
743 TDB_DATA key, data;
744 int ret;
745 struct ctdb_context *ctdb = client->ctdb;
746 struct ctdb_daemon_packet_wrap *w;
748 CTDB_INCREMENT_STAT(ctdb, total_calls);
749 CTDB_INCREMENT_STAT(ctdb, pending_calls);
751 ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
752 if (!ctdb_db) {
753 DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x\n",
754 c->db_id));
755 CTDB_DECREMENT_STAT(ctdb, pending_calls);
756 return;
759 if (ctdb_db->unhealthy_reason) {
761 * this is just a warning, as the tdb should be empty anyway,
762 * and only persistent databases can be unhealthy, which doesn't
763 * use this code patch
765 DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
766 ctdb_db->db_name, ctdb_db->unhealthy_reason));
769 key.dptr = c->data;
770 key.dsize = c->keylen;
772 w = talloc(ctdb, struct ctdb_daemon_packet_wrap);
773 CTDB_NO_MEMORY_VOID(ctdb, w);
775 w->ctdb = ctdb;
776 w->client_id = client->client_id;
778 ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header,
779 (struct ctdb_req_header *)c, &data,
780 daemon_incoming_packet_wrap, w, true);
781 if (ret == -2) {
782 /* will retry later */
783 CTDB_DECREMENT_STAT(ctdb, pending_calls);
784 return;
787 talloc_free(w);
789 if (ret != 0) {
790 DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n"));
791 CTDB_DECREMENT_STAT(ctdb, pending_calls);
792 return;
796 /* check if this fetch request is a duplicate for a
797 request we already have in flight. If so defer it until
798 the first request completes.
800 if (ctdb->tunable.fetch_collapse == 1) {
801 if (requeue_duplicate_fetch(ctdb_db, client, key, c) == 0) {
802 ret = ctdb_ltdb_unlock(ctdb_db, key);
803 if (ret != 0) {
804 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
806 CTDB_DECREMENT_STAT(ctdb, pending_calls);
807 talloc_free(data.dptr);
808 return;
812 /* Dont do READONLY if we don't have a tracking database */
813 if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db_readonly(ctdb_db)) {
814 c->flags &= ~CTDB_WANT_READONLY;
817 if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) {
818 header.flags &= ~CTDB_REC_RO_FLAGS;
819 CTDB_INCREMENT_STAT(ctdb, total_ro_revokes);
820 CTDB_INCREMENT_DB_STAT(ctdb_db, db_ro_revokes);
821 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
822 ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag");
824 /* and clear out the tracking data */
825 if (tdb_delete(ctdb_db->rottdb, key) != 0) {
826 DEBUG(DEBUG_ERR,(__location__ " Failed to clear out trackingdb record\n"));
830 /* if we are revoking, we must defer all other calls until the revoke
831 * had completed.
833 if (header.flags & CTDB_REC_RO_REVOKING_READONLY) {
834 talloc_free(data.dptr);
835 ret = ctdb_ltdb_unlock(ctdb_db, key);
837 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
838 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
840 CTDB_DECREMENT_STAT(ctdb, pending_calls);
841 return;
844 if ((header.dmaster == ctdb->pnn)
845 && (!(c->flags & CTDB_WANT_READONLY))
846 && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) {
847 header.flags |= CTDB_REC_RO_REVOKING_READONLY;
848 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
849 ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
851 ret = ctdb_ltdb_unlock(ctdb_db, key);
853 if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, key, &header, data) != 0) {
854 ctdb_fatal(ctdb, "Failed to start record revoke");
856 talloc_free(data.dptr);
858 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
859 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
862 CTDB_DECREMENT_STAT(ctdb, pending_calls);
863 return;
866 dstate = talloc(client, struct daemon_call_state);
867 if (dstate == NULL) {
868 ret = ctdb_ltdb_unlock(ctdb_db, key);
869 if (ret != 0) {
870 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
873 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n"));
874 CTDB_DECREMENT_STAT(ctdb, pending_calls);
875 return;
877 dstate->start_time = timeval_current();
878 dstate->client = client;
879 dstate->reqid = c->hdr.reqid;
880 talloc_steal(dstate, data.dptr);
882 call = dstate->call = talloc_zero(dstate, struct ctdb_call);
883 if (call == NULL) {
884 ret = ctdb_ltdb_unlock(ctdb_db, key);
885 if (ret != 0) {
886 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
889 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n"));
890 CTDB_DECREMENT_STAT(ctdb, pending_calls);
891 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time);
892 return;
895 dstate->readonly_fetch = 0;
896 call->call_id = c->callid;
897 call->key = key;
898 call->call_data.dptr = c->data + c->keylen;
899 call->call_data.dsize = c->calldatalen;
900 call->flags = c->flags;
902 if (c->flags & CTDB_WANT_READONLY) {
903 /* client wants readonly record, so translate this into a
904 fetch with header. remember what the client asked for
905 so we can remap the reply back to the proper format for
906 the client in the reply
908 dstate->client_callid = call->call_id;
909 call->call_id = CTDB_FETCH_WITH_HEADER_FUNC;
910 dstate->readonly_fetch = 1;
913 if (header.dmaster == ctdb->pnn) {
914 state = ctdb_call_local_send(ctdb_db, call, &header, &data);
915 } else {
916 state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
917 if (ctdb->tunable.fetch_collapse == 1) {
918 /* This request triggered a remote fetch-lock.
919 set up a deferral for this key so any additional
920 fetch-locks are deferred until the current one
921 finishes.
923 setup_deferred_fetch_locks(ctdb_db, call);
927 ret = ctdb_ltdb_unlock(ctdb_db, key);
928 if (ret != 0) {
929 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
932 if (state == NULL) {
933 DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n"));
934 CTDB_DECREMENT_STAT(ctdb, pending_calls);
935 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time);
936 return;
938 talloc_steal(state, dstate);
939 talloc_steal(client, state);
941 state->async.fn = daemon_call_from_client_callback;
942 state->async.private_data = dstate;
946 static void daemon_request_control_from_client(struct ctdb_client *client,
947 struct ctdb_req_control_old *c);
948 static void daemon_request_tunnel_from_client(struct ctdb_client *client,
949 struct ctdb_req_tunnel_old *c);
951 /* data contains a packet from the client */
952 static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr)
954 struct ctdb_client *client = talloc_get_type(p, struct ctdb_client);
955 TALLOC_CTX *tmp_ctx;
956 struct ctdb_context *ctdb = client->ctdb;
958 /* place the packet as a child of a tmp_ctx. We then use
959 talloc_free() below to free it. If any of the calls want
960 to keep it, then they will steal it somewhere else, and the
961 talloc_free() will be a no-op */
962 tmp_ctx = talloc_new(client);
963 talloc_steal(tmp_ctx, hdr);
965 if (hdr->ctdb_magic != CTDB_MAGIC) {
966 ctdb_set_error(client->ctdb, "Non CTDB packet rejected in daemon\n");
967 goto done;
970 if (hdr->ctdb_version != CTDB_PROTOCOL) {
971 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
972 goto done;
975 switch (hdr->operation) {
976 case CTDB_REQ_CALL:
977 CTDB_INCREMENT_STAT(ctdb, client.req_call);
978 daemon_request_call_from_client(client, (struct ctdb_req_call_old *)hdr);
979 break;
981 case CTDB_REQ_MESSAGE:
982 CTDB_INCREMENT_STAT(ctdb, client.req_message);
983 daemon_request_message_from_client(client, (struct ctdb_req_message_old *)hdr);
984 break;
986 case CTDB_REQ_CONTROL:
987 CTDB_INCREMENT_STAT(ctdb, client.req_control);
988 daemon_request_control_from_client(client, (struct ctdb_req_control_old *)hdr);
989 break;
991 case CTDB_REQ_TUNNEL:
992 CTDB_INCREMENT_STAT(ctdb, client.req_tunnel);
993 daemon_request_tunnel_from_client(client, (struct ctdb_req_tunnel_old *)hdr);
994 break;
996 default:
997 DEBUG(DEBUG_CRIT,(__location__ " daemon: unrecognized operation %u\n",
998 hdr->operation));
1001 done:
1002 talloc_free(tmp_ctx);
1006 called when the daemon gets a incoming packet
1008 static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
1010 struct ctdb_client *client = talloc_get_type(args, struct ctdb_client);
1011 struct ctdb_req_header *hdr;
1013 if (cnt == 0) {
1014 talloc_free(client);
1015 return;
1018 CTDB_INCREMENT_STAT(client->ctdb, client_packets_recv);
1020 if (cnt < sizeof(*hdr)) {
1021 ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n",
1022 (unsigned)cnt);
1023 return;
1025 hdr = (struct ctdb_req_header *)data;
1027 if (hdr->ctdb_magic != CTDB_MAGIC) {
1028 ctdb_set_error(client->ctdb, "Non CTDB packet rejected\n");
1029 goto err_out;
1032 if (hdr->ctdb_version != CTDB_PROTOCOL) {
1033 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
1034 goto err_out;
1037 DEBUG(DEBUG_DEBUG,(__location__ " client request %u of type %u length %u from "
1038 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
1039 hdr->srcnode, hdr->destnode));
1041 /* it is the responsibility of the incoming packet function to free 'data' */
1042 daemon_incoming_packet(client, hdr);
1043 return;
1045 err_out:
1046 TALLOC_FREE(data);
1050 static int ctdb_clientpid_destructor(struct ctdb_client_pid_list *client_pid)
1052 if (client_pid->ctdb->client_pids != NULL) {
1053 DLIST_REMOVE(client_pid->ctdb->client_pids, client_pid);
1056 return 0;
1059 static int get_new_client_id(struct reqid_context *idr,
1060 struct ctdb_client *client,
1061 uint32_t *out)
1063 uint32_t client_id;
1065 client_id = reqid_new(idr, client);
1067 * Some places in the code (e.g. ctdb_control_db_attach(),
1068 * ctdb_control_db_detach()) assign a special meaning to
1069 * client_id 0. The assumption is that if client_id is 0 then
1070 * the control has come from another daemon. Therefore, we
1071 * should never return client_id == 0.
1073 if (client_id == 0) {
1075 * Don't leak ID 0. This is safe because the ID keeps
1076 * increasing. A test will be added to ensure that
1077 * this doesn't change.
1079 reqid_remove(idr, 0);
1081 client_id = reqid_new(idr, client);
1084 if (client_id == REQID_INVALID) {
1085 return EINVAL;
1088 if (client_id == 0) {
1089 /* Every other ID must have been used and we can't use 0 */
1090 reqid_remove(idr, 0);
1091 return EINVAL;
1094 *out = client_id;
1095 return 0;
1098 static void ctdb_accept_client(struct tevent_context *ev,
1099 struct tevent_fd *fde, uint16_t flags,
1100 void *private_data)
1102 struct sockaddr_un addr;
1103 socklen_t len;
1104 int fd;
1105 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1106 struct ctdb_client *client;
1107 struct ctdb_client_pid_list *client_pid;
1108 pid_t peer_pid = 0;
1109 int ret;
1111 memset(&addr, 0, sizeof(addr));
1112 len = sizeof(addr);
1113 fd = accept(ctdb->daemon.sd, (struct sockaddr *)&addr, &len);
1114 if (fd == -1) {
1115 return;
1117 smb_set_close_on_exec(fd);
1119 ret = set_blocking(fd, false);
1120 if (ret != 0) {
1121 DEBUG(DEBUG_ERR,
1122 (__location__
1123 " failed to set socket non-blocking (%s)\n",
1124 strerror(errno)));
1125 close(fd);
1126 return;
1129 set_close_on_exec(fd);
1131 DEBUG(DEBUG_DEBUG,(__location__ " Created SOCKET FD:%d to connected child\n", fd));
1133 client = talloc_zero(ctdb, struct ctdb_client);
1134 if (ctdb_get_peer_pid(fd, &peer_pid) == 0) {
1135 DEBUG(DEBUG_INFO,("Connected client with pid:%u\n", (unsigned)peer_pid));
1138 client->ctdb = ctdb;
1139 client->fd = fd;
1141 ret = get_new_client_id(ctdb->idr, client, &client->client_id);
1142 if (ret != 0) {
1143 DBG_ERR("Unable to get client ID (%d)\n", ret);
1144 close(fd);
1145 talloc_free(client);
1146 return;
1149 client->pid = peer_pid;
1151 client_pid = talloc(client, struct ctdb_client_pid_list);
1152 if (client_pid == NULL) {
1153 DEBUG(DEBUG_ERR,("Failed to allocate client pid structure\n"));
1154 close(fd);
1155 talloc_free(client);
1156 return;
1158 client_pid->ctdb = ctdb;
1159 client_pid->pid = peer_pid;
1160 client_pid->client = client;
1162 DLIST_ADD(ctdb->client_pids, client_pid);
1164 client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT,
1165 ctdb_daemon_read_cb, client,
1166 "client-%u", client->pid);
1168 talloc_set_destructor(client, ctdb_client_destructor);
1169 talloc_set_destructor(client_pid, ctdb_clientpid_destructor);
1170 ctdb->num_clients++;
1176 * Create a unix domain socket, bind it, secure it and listen. Return
1177 * the file descriptor for the socket.
1179 static int ux_socket_bind(struct ctdb_context *ctdb, bool test_mode_enabled)
1181 struct sockaddr_un addr = { .sun_family = AF_UNIX };
1182 int ret;
1184 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
1185 if (ctdb->daemon.sd == -1) {
1186 return -1;
1189 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
1191 if (! sock_clean(ctdb->daemon.name)) {
1192 return -1;
1195 set_close_on_exec(ctdb->daemon.sd);
1197 ret = set_blocking(ctdb->daemon.sd, false);
1198 if (ret != 0) {
1199 DBG_ERR("Failed to set socket non-blocking (%s)\n",
1200 strerror(errno));
1201 goto failed;
1204 ret = bind(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr));
1205 if (ret == -1) {
1206 D_ERR("Unable to bind on ctdb socket '%s'\n", ctdb->daemon.name);
1207 goto failed;
1210 if (!test_mode_enabled) {
1211 ret = chown(ctdb->daemon.name, geteuid(), getegid());
1212 if (ret != 0 && !test_mode_enabled) {
1213 D_ERR("Unable to secure (chown) ctdb socket '%s'\n",
1214 ctdb->daemon.name);
1215 goto failed;
1219 ret = chmod(ctdb->daemon.name, 0700);
1220 if (ret != 0) {
1221 D_ERR("Unable to secure (chmod) ctdb socket '%s'\n",
1222 ctdb->daemon.name);
1223 goto failed;
1227 ret = listen(ctdb->daemon.sd, 100);
1228 if (ret != 0) {
1229 D_ERR("Unable to listen on ctdb socket '%s'\n",
1230 ctdb->daemon.name);
1231 goto failed;
1234 D_NOTICE("Listening to ctdb socket %s\n", ctdb->daemon.name);
1235 return 0;
1237 failed:
1238 close(ctdb->daemon.sd);
1239 ctdb->daemon.sd = -1;
1240 return -1;
1243 struct ctdb_node *ctdb_find_node(struct ctdb_context *ctdb, uint32_t pnn)
1245 struct ctdb_node *node = NULL;
1246 unsigned int i;
1248 if (pnn == CTDB_CURRENT_NODE) {
1249 pnn = ctdb->pnn;
1252 /* Always found: PNN correctly set just before this is called */
1253 for (i = 0; i < ctdb->num_nodes; i++) {
1254 node = ctdb->nodes[i];
1255 if (pnn == node->pnn) {
1256 return node;
1260 return NULL;
1263 static void initialise_node_flags (struct ctdb_context *ctdb)
1265 struct ctdb_node *node = NULL;
1267 node = ctdb_find_node(ctdb, CTDB_CURRENT_NODE);
1269 * PNN correctly set just before this is called so always
1270 * found but keep static analysers happy...
1272 if (node == NULL) {
1273 DBG_ERR("Unable to find current node\n");
1274 return;
1277 node->flags &= ~NODE_FLAGS_DISCONNECTED;
1279 /* do we start out in DISABLED mode? */
1280 if (ctdb->start_as_disabled != 0) {
1281 D_ERR("This node is configured to start in DISABLED state\n");
1282 node->flags |= NODE_FLAGS_PERMANENTLY_DISABLED;
1284 /* do we start out in STOPPED mode? */
1285 if (ctdb->start_as_stopped != 0) {
1286 D_ERR("This node is configured to start in STOPPED state\n");
1287 node->flags |= NODE_FLAGS_STOPPED;
1291 static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
1292 void *private_data)
1294 if (status != 0) {
1295 ctdb_die(ctdb, "Failed to run setup event");
1297 ctdb_run_notification_script(ctdb, "setup");
1299 /* Start the recovery daemon */
1300 if (ctdb_start_recoverd(ctdb) != 0) {
1301 DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n"));
1302 exit(11);
1305 ctdb_start_periodic_events(ctdb);
1307 ctdb_wait_for_first_recovery(ctdb);
1310 static struct timeval tevent_before_wait_ts;
1311 static struct timeval tevent_after_wait_ts;
1313 static void ctdb_tevent_trace_init(void)
1315 struct timeval now;
1317 now = timeval_current();
1319 tevent_before_wait_ts = now;
1320 tevent_after_wait_ts = now;
1323 static void ctdb_tevent_trace(enum tevent_trace_point tp,
1324 void *private_data)
1326 struct timeval diff;
1327 struct timeval now;
1328 struct ctdb_context *ctdb =
1329 talloc_get_type(private_data, struct ctdb_context);
1331 if (getpid() != ctdb->ctdbd_pid) {
1332 return;
1335 now = timeval_current();
1337 switch (tp) {
1338 case TEVENT_TRACE_BEFORE_WAIT:
1339 diff = tevent_timeval_until(&tevent_after_wait_ts, &now);
1340 if (diff.tv_sec > 3) {
1341 DEBUG(DEBUG_ERR,
1342 ("Handling event took %ld seconds!\n",
1343 (long)diff.tv_sec));
1345 tevent_before_wait_ts = now;
1346 break;
1348 case TEVENT_TRACE_AFTER_WAIT:
1349 diff = tevent_timeval_until(&tevent_before_wait_ts, &now);
1350 if (diff.tv_sec > 3) {
1351 DEBUG(DEBUG_ERR,
1352 ("No event for %ld seconds!\n",
1353 (long)diff.tv_sec));
1355 tevent_after_wait_ts = now;
1356 break;
1358 default:
1359 /* Do nothing for future tevent trace points */ ;
1363 static void ctdb_remove_pidfile(void)
1365 TALLOC_FREE(ctdbd_pidfile_ctx);
1368 static void ctdb_create_pidfile(TALLOC_CTX *mem_ctx)
1370 if (ctdbd_pidfile != NULL) {
1371 int ret = pidfile_context_create(mem_ctx, ctdbd_pidfile,
1372 &ctdbd_pidfile_ctx);
1373 if (ret != 0) {
1374 DEBUG(DEBUG_ERR,
1375 ("Failed to create PID file %s\n",
1376 ctdbd_pidfile));
1377 exit(11);
1380 DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile));
1381 atexit(ctdb_remove_pidfile);
1385 static void ctdb_initialise_vnn_map(struct ctdb_context *ctdb)
1387 unsigned int i, j, count;
1389 /* initialize the vnn mapping table, skipping any deleted nodes */
1390 ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
1391 CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map);
1393 count = 0;
1394 for (i = 0; i < ctdb->num_nodes; i++) {
1395 if ((ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) == 0) {
1396 count++;
1400 ctdb->vnn_map->generation = INVALID_GENERATION;
1401 ctdb->vnn_map->size = count;
1402 ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
1403 CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map->map);
1405 for(i=0, j=0; i < ctdb->vnn_map->size; i++) {
1406 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1407 continue;
1409 ctdb->vnn_map->map[j] = i;
1410 j++;
1414 static void ctdb_set_my_pnn(struct ctdb_context *ctdb)
1416 if (ctdb->address == NULL) {
1417 ctdb_fatal(ctdb,
1418 "Can not determine PNN - node address is not set\n");
1421 ctdb->pnn = ctdb_ip_to_pnn(ctdb, ctdb->address);
1422 if (ctdb->pnn == CTDB_UNKNOWN_PNN) {
1423 ctdb_fatal(ctdb,
1424 "Can not determine PNN - unknown node address\n");
1427 D_NOTICE("PNN is %u\n", ctdb->pnn);
1430 static void stdin_handler(struct tevent_context *ev,
1431 struct tevent_fd *fde,
1432 uint16_t flags,
1433 void *private_data)
1435 struct ctdb_context *ctdb = talloc_get_type_abort(
1436 private_data, struct ctdb_context);
1437 ssize_t nread;
1438 char c;
1440 nread = read(STDIN_FILENO, &c, 1);
1441 if (nread != 1) {
1442 D_ERR("stdin closed, exiting\n");
1443 talloc_free(fde);
1444 ctdb_shutdown_sequence(ctdb, EPIPE);
1448 static int setup_stdin_handler(struct ctdb_context *ctdb)
1450 struct tevent_fd *fde;
1451 struct stat st;
1452 int ret;
1454 ret = fstat(STDIN_FILENO, &st);
1455 if (ret != 0) {
1456 /* Problem with stdin, ignore... */
1457 DBG_INFO("Can't fstat() stdin\n");
1458 return 0;
1461 if (!S_ISFIFO(st.st_mode)) {
1462 DBG_INFO("Not a pipe...\n");
1463 return 0;
1466 fde = tevent_add_fd(ctdb->ev,
1467 ctdb,
1468 STDIN_FILENO,
1469 TEVENT_FD_READ,
1470 stdin_handler,
1471 ctdb);
1472 if (fde == NULL) {
1473 return ENOMEM;
1476 DBG_INFO("Set up stdin handler\n");
1477 return 0;
1480 static void fork_only(void)
1482 pid_t pid;
1484 pid = fork();
1485 if (pid == -1) {
1486 D_ERR("Fork failed (errno=%d)\n", errno);
1487 exit(1);
1490 if (pid != 0) {
1491 /* Parent simply exits... */
1492 exit(0);
1496 static void sighup_hook(void *private_data)
1498 struct ctdb_context *ctdb = talloc_get_type_abort(private_data,
1499 struct ctdb_context);
1501 if (ctdb->recoverd_pid > 0) {
1502 kill(ctdb->recoverd_pid, SIGHUP);
1504 ctdb_event_reopen_logs(ctdb);
1508 start the protocol going as a daemon
1510 int ctdb_start_daemon(struct ctdb_context *ctdb,
1511 bool interactive,
1512 bool test_mode_enabled)
1514 bool status;
1515 int ret;
1516 struct tevent_fd *fde;
1518 /* Fork if not interactive */
1519 if (!interactive) {
1520 if (test_mode_enabled) {
1521 /* Keep stdin open */
1522 fork_only();
1523 } else {
1524 /* Fork, close stdin, start a session */
1525 become_daemon(true, false, false);
1529 ignore_signal(SIGPIPE);
1530 ignore_signal(SIGUSR1);
1532 ctdb->ctdbd_pid = getpid();
1533 DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",
1534 SAMBA_VERSION_STRING, ctdb->ctdbd_pid));
1535 ctdb_create_pidfile(ctdb);
1537 /* create a unix domain stream socket to listen to */
1538 ret = ux_socket_bind(ctdb, test_mode_enabled);
1539 if (ret != 0) {
1540 D_ERR("Cannot continue. Exiting!\n");
1541 exit(10);
1544 /* Make sure we log something when the daemon terminates.
1545 * This must be the first exit handler to run (so the last to
1546 * be registered.
1548 __ctdbd_pid = getpid();
1549 atexit(print_exit_message);
1551 if (ctdb->do_setsched) {
1552 /* try to set us up as realtime */
1553 if (!set_scheduler()) {
1554 exit(1);
1556 DEBUG(DEBUG_NOTICE, ("Set real-time scheduler priority\n"));
1559 ctdb->ev = tevent_context_init(NULL);
1560 if (ctdb->ev == NULL) {
1561 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
1562 exit(1);
1564 tevent_loop_allow_nesting(ctdb->ev);
1565 ctdb_tevent_trace_init();
1566 tevent_set_trace_callback(ctdb->ev, ctdb_tevent_trace, ctdb);
1568 status = logging_setup_sighup_handler(ctdb->ev,
1569 ctdb,
1570 sighup_hook,
1571 ctdb);
1572 if (!status) {
1573 D_ERR("Failed to set up signal handler for SIGHUP\n");
1574 exit(1);
1577 /* set up a handler to pick up sigchld */
1578 if (ctdb_init_sigchld(ctdb) == NULL) {
1579 DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
1580 exit(1);
1583 if (!interactive) {
1584 ctdb_set_child_logging(ctdb);
1587 /* Exit if stdin is closed */
1588 if (test_mode_enabled) {
1589 ret = setup_stdin_handler(ctdb);
1590 if (ret != 0) {
1591 DBG_ERR("Failed to setup stdin handler\n");
1592 exit(1);
1596 TALLOC_FREE(ctdb->srv);
1597 if (srvid_init(ctdb, &ctdb->srv) != 0) {
1598 DEBUG(DEBUG_CRIT,("Failed to setup message srvid context\n"));
1599 exit(1);
1602 TALLOC_FREE(ctdb->tunnels);
1603 if (srvid_init(ctdb, &ctdb->tunnels) != 0) {
1604 DEBUG(DEBUG_ERR, ("Failed to setup tunnels context\n"));
1605 exit(1);
1608 /* initialize statistics collection */
1609 ctdb_statistics_init(ctdb);
1611 /* force initial recovery for election */
1612 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
1614 if (ctdb_start_eventd(ctdb) != 0) {
1615 DEBUG(DEBUG_ERR, ("Failed to start event daemon\n"));
1616 exit(1);
1619 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT);
1620 ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
1621 if (ret != 0) {
1622 ctdb_die(ctdb, "Failed to run init event\n");
1624 ctdb_run_notification_script(ctdb, "init");
1626 if (strcmp(ctdb->transport, "tcp") == 0) {
1627 ret = ctdb_tcp_init(ctdb);
1629 #ifdef USE_INFINIBAND
1630 if (strcmp(ctdb->transport, "ib") == 0) {
1631 ret = ctdb_ibw_init(ctdb);
1633 #endif
1634 if (ret != 0) {
1635 DEBUG(DEBUG_ERR,("Failed to initialise transport '%s'\n", ctdb->transport));
1636 return -1;
1639 if (ctdb->methods == NULL) {
1640 DEBUG(DEBUG_ALERT,(__location__ " Can not initialize transport. ctdb->methods is NULL\n"));
1641 ctdb_fatal(ctdb, "transport is unavailable. can not initialize.");
1644 /* Initialise the transport. This sets the node address if it
1645 * was not set via the command-line. */
1646 if (ctdb->methods->initialise(ctdb) != 0) {
1647 ctdb_fatal(ctdb, "transport failed to initialise");
1650 ctdb_set_my_pnn(ctdb);
1652 initialise_node_flags(ctdb);
1654 ret = ctdb_set_public_addresses(ctdb);
1655 if (ret == -1) {
1656 D_ERR("Unable to setup public IP addresses\n");
1657 exit(1);
1660 ctdb_initialise_vnn_map(ctdb);
1662 /* attach to existing databases */
1663 if (ctdb_attach_databases(ctdb) != 0) {
1664 ctdb_fatal(ctdb, "Failed to attach to databases\n");
1667 /* start frozen, then let the first election sort things out */
1668 if (!ctdb_blocking_freeze(ctdb)) {
1669 ctdb_fatal(ctdb, "Failed to get initial freeze\n");
1672 /* now start accepting clients, only can do this once frozen */
1673 fde = tevent_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd, TEVENT_FD_READ,
1674 ctdb_accept_client, ctdb);
1675 if (fde == NULL) {
1676 ctdb_fatal(ctdb, "Failed to add daemon socket to event loop");
1678 tevent_fd_set_auto_close(fde);
1680 /* Start the transport */
1681 if (ctdb->methods->start(ctdb) != 0) {
1682 DEBUG(DEBUG_ALERT,("transport failed to start!\n"));
1683 ctdb_fatal(ctdb, "transport failed to start");
1686 /* Recovery daemon and timed events are started from the
1687 * callback, only after the setup event completes
1688 * successfully.
1690 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SETUP);
1691 ret = ctdb_event_script_callback(ctdb,
1692 ctdb,
1693 ctdb_setup_event_callback,
1694 ctdb,
1695 CTDB_EVENT_SETUP,
1696 "%s",
1697 "");
1698 if (ret != 0) {
1699 DEBUG(DEBUG_CRIT,("Failed to set up 'setup' event\n"));
1700 exit(1);
1703 lockdown_memory(ctdb->valgrinding);
1705 /* go into a wait loop to allow other nodes to complete */
1706 tevent_loop_wait(ctdb->ev);
1708 DEBUG(DEBUG_CRIT,("event_loop_wait() returned. this should not happen\n"));
1709 exit(1);
1713 allocate a packet for use in daemon<->daemon communication
1715 struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
1716 TALLOC_CTX *mem_ctx,
1717 enum ctdb_operation operation,
1718 size_t length, size_t slength,
1719 const char *type)
1721 int size;
1722 struct ctdb_req_header *hdr;
1724 length = MAX(length, slength);
1725 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
1727 if (ctdb->methods == NULL) {
1728 DEBUG(DEBUG_INFO,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n",
1729 operation, (unsigned)length));
1730 return NULL;
1733 hdr = (struct ctdb_req_header *)ctdb->methods->allocate_pkt(mem_ctx, size);
1734 if (hdr == NULL) {
1735 DEBUG(DEBUG_ERR,("Unable to allocate transport packet for operation %u of length %u\n",
1736 operation, (unsigned)length));
1737 return NULL;
1739 talloc_set_name_const(hdr, type);
1740 memset(hdr, 0, slength);
1741 hdr->length = length;
1742 hdr->operation = operation;
1743 hdr->ctdb_magic = CTDB_MAGIC;
1744 hdr->ctdb_version = CTDB_PROTOCOL;
1745 hdr->generation = ctdb->vnn_map->generation;
1746 hdr->srcnode = ctdb->pnn;
1748 return hdr;
1751 struct daemon_control_state {
1752 struct daemon_control_state *next, *prev;
1753 struct ctdb_client *client;
1754 struct ctdb_req_control_old *c;
1755 uint32_t reqid;
1756 struct ctdb_node *node;
1760 callback when a control reply comes in
1762 static void daemon_control_callback(struct ctdb_context *ctdb,
1763 int32_t status, TDB_DATA data,
1764 const char *errormsg,
1765 void *private_data)
1767 struct daemon_control_state *state = talloc_get_type(private_data,
1768 struct daemon_control_state);
1769 struct ctdb_client *client = state->client;
1770 struct ctdb_reply_control_old *r;
1771 size_t len;
1772 int ret;
1774 /* construct a message to send to the client containing the data */
1775 len = offsetof(struct ctdb_reply_control_old, data) + data.dsize;
1776 if (errormsg) {
1777 len += strlen(errormsg);
1779 r = ctdbd_allocate_pkt(ctdb, state, CTDB_REPLY_CONTROL, len,
1780 struct ctdb_reply_control_old);
1781 CTDB_NO_MEMORY_VOID(ctdb, r);
1783 r->hdr.reqid = state->reqid;
1784 r->status = status;
1785 r->datalen = data.dsize;
1786 r->errorlen = 0;
1787 memcpy(&r->data[0], data.dptr, data.dsize);
1788 if (errormsg) {
1789 r->errorlen = strlen(errormsg);
1790 memcpy(&r->data[r->datalen], errormsg, r->errorlen);
1793 ret = daemon_queue_send(client, &r->hdr);
1794 if (ret != -1) {
1795 talloc_free(state);
1800 fail all pending controls to a disconnected node
1802 void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
1804 struct daemon_control_state *state;
1805 while ((state = node->pending_controls)) {
1806 DLIST_REMOVE(node->pending_controls, state);
1807 daemon_control_callback(ctdb, (uint32_t)-1, tdb_null,
1808 "node is disconnected", state);
1813 destroy a daemon_control_state
1815 static int daemon_control_destructor(struct daemon_control_state *state)
1817 if (state->node) {
1818 DLIST_REMOVE(state->node->pending_controls, state);
1820 return 0;
1824 this is called when the ctdb daemon received a ctdb request control
1825 from a local client over the unix domain socket
1827 static void daemon_request_control_from_client(struct ctdb_client *client,
1828 struct ctdb_req_control_old *c)
1830 TDB_DATA data;
1831 int res;
1832 struct daemon_control_state *state;
1833 TALLOC_CTX *tmp_ctx = talloc_new(client);
1835 if (c->hdr.destnode == CTDB_CURRENT_NODE) {
1836 c->hdr.destnode = client->ctdb->pnn;
1839 state = talloc(client, struct daemon_control_state);
1840 CTDB_NO_MEMORY_VOID(client->ctdb, state);
1842 state->client = client;
1843 state->c = talloc_steal(state, c);
1844 state->reqid = c->hdr.reqid;
1845 if (ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
1846 state->node = client->ctdb->nodes[c->hdr.destnode];
1847 DLIST_ADD(state->node->pending_controls, state);
1848 } else {
1849 state->node = NULL;
1852 talloc_set_destructor(state, daemon_control_destructor);
1854 if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
1855 talloc_steal(tmp_ctx, state);
1858 data.dptr = &c->data[0];
1859 data.dsize = c->datalen;
1860 res = ctdb_daemon_send_control(client->ctdb, c->hdr.destnode,
1861 c->srvid, c->opcode, client->client_id,
1862 c->flags,
1863 data, daemon_control_callback,
1864 state);
1865 if (res != 0) {
1866 DEBUG(DEBUG_ERR,(__location__ " Failed to send control to remote node %u\n",
1867 c->hdr.destnode));
1870 talloc_free(tmp_ctx);
1873 static void daemon_request_tunnel_from_client(struct ctdb_client *client,
1874 struct ctdb_req_tunnel_old *c)
1876 TDB_DATA data;
1877 int ret;
1879 if (! ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
1880 DEBUG(DEBUG_ERR, ("Invalid destination 0x%x\n",
1881 c->hdr.destnode));
1882 return;
1885 ret = srvid_exists(client->ctdb->tunnels, c->tunnel_id, NULL);
1886 if (ret != 0) {
1887 DEBUG(DEBUG_ERR,
1888 ("tunnel id 0x%"PRIx64" not registered, dropping pkt\n",
1889 c->tunnel_id));
1890 return;
1893 data = (TDB_DATA) {
1894 .dsize = c->datalen,
1895 .dptr = &c->data[0],
1898 ret = ctdb_daemon_send_tunnel(client->ctdb, c->hdr.destnode,
1899 c->tunnel_id, c->flags, data);
1900 if (ret != 0) {
1901 DEBUG(DEBUG_ERR, ("Failed to set tunnel to remote note %u\n",
1902 c->hdr.destnode));
1907 register a call function
1909 int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
1910 ctdb_fn_t fn, int id)
1912 struct ctdb_registered_call *call;
1913 struct ctdb_db_context *ctdb_db;
1915 ctdb_db = find_ctdb_db(ctdb, db_id);
1916 if (ctdb_db == NULL) {
1917 return -1;
1920 call = talloc(ctdb_db, struct ctdb_registered_call);
1921 call->fn = fn;
1922 call->id = id;
1924 DLIST_ADD(ctdb_db->calls, call);
1925 return 0;
1931 this local messaging handler is ugly, but is needed to prevent
1932 recursion in ctdb_send_message() when the destination node is the
1933 same as the source node
1935 struct ctdb_local_message {
1936 struct ctdb_context *ctdb;
1937 uint64_t srvid;
1938 TDB_DATA data;
1941 static void ctdb_local_message_trigger(struct tevent_context *ev,
1942 struct tevent_timer *te,
1943 struct timeval t, void *private_data)
1945 struct ctdb_local_message *m = talloc_get_type(
1946 private_data, struct ctdb_local_message);
1948 srvid_dispatch(m->ctdb->srv, m->srvid, CTDB_SRVID_ALL, m->data);
1949 talloc_free(m);
1952 static int ctdb_local_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
1954 struct ctdb_local_message *m;
1955 m = talloc(ctdb, struct ctdb_local_message);
1956 CTDB_NO_MEMORY(ctdb, m);
1958 m->ctdb = ctdb;
1959 m->srvid = srvid;
1960 m->data = data;
1961 m->data.dptr = talloc_memdup(m, m->data.dptr, m->data.dsize);
1962 if (m->data.dptr == NULL) {
1963 talloc_free(m);
1964 return -1;
1967 /* this needs to be done as an event to prevent recursion */
1968 tevent_add_timer(ctdb->ev, m, timeval_zero(),
1969 ctdb_local_message_trigger, m);
1970 return 0;
1974 send a ctdb message
1976 int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
1977 uint64_t srvid, TDB_DATA data)
1979 struct ctdb_req_message_old *r;
1980 int len;
1982 if (ctdb->methods == NULL) {
1983 DEBUG(DEBUG_INFO,(__location__ " Failed to send message. Transport is DOWN\n"));
1984 return -1;
1987 /* see if this is a message to ourselves */
1988 if (pnn == ctdb->pnn) {
1989 return ctdb_local_message(ctdb, srvid, data);
1992 len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
1993 r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_MESSAGE, len,
1994 struct ctdb_req_message_old);
1995 CTDB_NO_MEMORY(ctdb, r);
1997 r->hdr.destnode = pnn;
1998 r->srvid = srvid;
1999 r->datalen = data.dsize;
2000 memcpy(&r->data[0], data.dptr, data.dsize);
2002 ctdb_queue_packet(ctdb, &r->hdr);
2004 talloc_free(r);
2005 return 0;
2010 struct ctdb_client_notify_list {
2011 struct ctdb_client_notify_list *next, *prev;
2012 struct ctdb_context *ctdb;
2013 uint64_t srvid;
2014 TDB_DATA data;
2018 static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
2020 int ret;
2022 DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
2024 ret = ctdb_daemon_send_message(nl->ctdb,
2025 CTDB_BROADCAST_CONNECTED,
2026 nl->srvid,
2027 nl->data);
2028 if (ret != 0) {
2029 DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
2032 return 0;
2035 int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
2037 struct ctdb_notify_data_old *notify = (struct ctdb_notify_data_old *)indata.dptr;
2038 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2039 struct ctdb_client_notify_list *nl;
2041 DEBUG(DEBUG_INFO,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
2043 if (indata.dsize < offsetof(struct ctdb_notify_data_old, notify_data)) {
2044 DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
2045 return -1;
2048 if (indata.dsize != (notify->len + offsetof(struct ctdb_notify_data_old, notify_data))) {
2049 DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_notify_data_old, notify_data))));
2050 return -1;
2054 if (client == NULL) {
2055 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
2056 return -1;
2059 for(nl=client->notify; nl; nl=nl->next) {
2060 if (nl->srvid == notify->srvid) {
2061 break;
2064 if (nl != NULL) {
2065 DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
2066 return -1;
2069 nl = talloc(client, struct ctdb_client_notify_list);
2070 CTDB_NO_MEMORY(ctdb, nl);
2071 nl->ctdb = ctdb;
2072 nl->srvid = notify->srvid;
2073 nl->data.dsize = notify->len;
2074 nl->data.dptr = talloc_memdup(nl, notify->notify_data,
2075 nl->data.dsize);
2076 CTDB_NO_MEMORY(ctdb, nl->data.dptr);
2078 DLIST_ADD(client->notify, nl);
2079 talloc_set_destructor(nl, ctdb_client_notify_destructor);
2081 return 0;
2084 int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
2086 uint64_t srvid = *(uint64_t *)indata.dptr;
2087 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2088 struct ctdb_client_notify_list *nl;
2090 DEBUG(DEBUG_INFO,("Deregister srvid %llu for client %d\n", (unsigned long long)srvid, client_id));
2092 if (client == NULL) {
2093 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
2094 return -1;
2097 for(nl=client->notify; nl; nl=nl->next) {
2098 if (nl->srvid == srvid) {
2099 break;
2102 if (nl == NULL) {
2103 DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)srvid));
2104 return -1;
2107 DLIST_REMOVE(client->notify, nl);
2108 talloc_set_destructor(nl, NULL);
2109 talloc_free(nl);
2111 return 0;
2114 struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb, pid_t pid)
2116 struct ctdb_client_pid_list *client_pid;
2118 for (client_pid = ctdb->client_pids; client_pid; client_pid=client_pid->next) {
2119 if (client_pid->pid == pid) {
2120 return client_pid->client;
2123 return NULL;
2127 /* This control is used by samba when probing if a process (of a samba daemon)
2128 exists on the node.
2129 Samba does this when it needs/wants to check if a subrecord in one of the
2130 databases is still valid, or if it is stale and can be removed.
2131 If the node is in unhealthy or stopped state we just kill of the samba
2132 process holding this sub-record and return to the calling samba that
2133 the process does not exist.
2134 This allows us to forcefully recall subrecords registered by samba processes
2135 on banned and stopped nodes.
2137 int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid)
2139 struct ctdb_client *client;
2141 client = ctdb_find_client_by_pid(ctdb, pid);
2142 if (client == NULL) {
2143 return -1;
2146 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
2147 DEBUG(DEBUG_NOTICE,
2148 ("Killing client with pid:%d on banned/stopped node\n",
2149 (int)pid));
2150 talloc_free(client);
2151 return -1;
2154 return kill(pid, 0);
2157 int32_t ctdb_control_check_pid_srvid(struct ctdb_context *ctdb,
2158 TDB_DATA indata)
2160 struct ctdb_client_pid_list *client_pid;
2161 pid_t pid;
2162 uint64_t srvid;
2163 int ret;
2165 pid = *(pid_t *)indata.dptr;
2166 srvid = *(uint64_t *)(indata.dptr + sizeof(pid_t));
2168 for (client_pid = ctdb->client_pids;
2169 client_pid != NULL;
2170 client_pid = client_pid->next) {
2171 if (client_pid->pid == pid) {
2172 ret = srvid_exists(ctdb->srv, srvid,
2173 client_pid->client);
2174 if (ret == 0) {
2175 return 0;
2180 return -1;
2183 int ctdb_control_getnodesfile(struct ctdb_context *ctdb,
2184 uint32_t opcode,
2185 TDB_DATA indata,
2186 TDB_DATA *outdata)
2188 struct ctdb_node_map *node_map = NULL;
2189 size_t len;
2190 uint8_t *buf = NULL;
2191 size_t npush = 0;
2192 int ret = -1;
2194 CHECK_CONTROL_DATA_SIZE(0);
2196 node_map = ctdb_read_nodes(ctdb, ctdb->nodes_source);
2197 if (node_map == NULL) {
2198 D_ERR("Failed to read nodes file\n");
2199 return -1;
2202 len = ctdb_node_map_len(node_map);
2203 buf = talloc_size(ctdb, len);
2204 if (buf == NULL) {
2205 goto done;
2208 ctdb_node_map_push(node_map, buf, &npush);
2209 if (len != npush) {
2210 talloc_free(buf);
2211 goto done;
2214 outdata->dptr = buf;
2215 outdata->dsize = len;
2216 ret = 0;
2217 done:
2218 talloc_free(node_map);
2219 return ret;
2222 void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code)
2224 if (ctdb->runstate == CTDB_RUNSTATE_SHUTDOWN) {
2225 DEBUG(DEBUG_NOTICE,("Already shutting down so will not proceed.\n"));
2226 return;
2229 DEBUG(DEBUG_ERR,("Shutdown sequence commencing.\n"));
2230 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SHUTDOWN);
2231 ctdb_stop_recoverd(ctdb);
2232 ctdb_stop_keepalive(ctdb);
2233 ctdb_stop_monitoring(ctdb);
2234 ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
2235 ctdb_stop_eventd(ctdb);
2236 if (ctdb->methods != NULL && ctdb->methods->shutdown != NULL) {
2237 ctdb->methods->shutdown(ctdb);
2240 DEBUG(DEBUG_ERR,("Shutdown sequence complete, exiting.\n"));
2241 exit(exit_code);
2244 /* When forking the main daemon and the child process needs to connect
2245 * back to the daemon as a client process, this function can be used
2246 * to change the ctdb context from daemon into client mode. The child
2247 * process must be created using ctdb_fork() and not fork() -
2248 * ctdb_fork() does some necessary housekeeping.
2250 int switch_from_server_to_client(struct ctdb_context *ctdb)
2252 int ret;
2254 if (ctdb->daemon.sd != -1) {
2255 close(ctdb->daemon.sd);
2256 ctdb->daemon.sd = -1;
2259 /* get a new event context */
2260 ctdb->ev = tevent_context_init(ctdb);
2261 if (ctdb->ev == NULL) {
2262 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
2263 exit(1);
2265 tevent_loop_allow_nesting(ctdb->ev);
2267 /* Connect to main CTDB daemon */
2268 ret = ctdb_socket_connect(ctdb);
2269 if (ret != 0) {
2270 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
2271 return -1;
2274 ctdb->can_send_controls = true;
2276 return 0;