ctdb-server: Use find_public_ip_vnn() in a couple of extra places
[samba4-gss.git] / ctdb / utils / ceph / ctdb_mutex_ceph_rados_helper.c
blob46566c97a8392960a5b706c930f8c578f6718365
1 /*
2 CTDB mutex helper using Ceph librados locks
4 Copyright (C) David Disseldorp 2016-2020
6 Based on ctdb_mutex_fcntl_helper.c, which is:
7 Copyright (C) Martin Schwenke 2015
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "replace.h"
25 #include "tevent.h"
26 #include "talloc.h"
27 #include "rados/librados.h"
29 #define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex"
30 #define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME
31 #define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB cluster lock"
33 * During failover it may take up to <lock duration> seconds before the
34 * newly elected recovery master can obtain the lock.
36 #define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT 10
38 #define CTDB_MUTEX_STATUS_HOLDING "0"
39 #define CTDB_MUTEX_STATUS_CONTENDED "1"
40 #define CTDB_MUTEX_STATUS_TIMEOUT "2"
41 #define CTDB_MUTEX_STATUS_ERROR "3"
43 static char *progname = NULL;
45 static void usage(void)
47 fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
48 "<RADOS pool> <RADOS object> "
49 "[lock duration secs] [-n RADOS namespace]\n",
50 progname);
53 static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
54 const char *ceph_auth_name,
55 const char *pool_name,
56 const char *namespace,
57 rados_t *_ceph_cluster,
58 rados_ioctx_t *_ioctx)
60 rados_t ceph_cluster = NULL;
61 rados_ioctx_t ioctx = NULL;
62 int ret;
64 ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
65 if (ret < 0) {
66 fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
67 " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
68 strerror(-ret));
69 return ret;
72 /* path=NULL tells librados to use default locations */
73 ret = rados_conf_read_file(ceph_cluster, NULL);
74 if (ret < 0) {
75 fprintf(stderr, "%s: failed to parse Ceph cluster config"
76 " - (%s)\n", progname, strerror(-ret));
77 rados_shutdown(ceph_cluster);
78 return ret;
81 ret = rados_connect(ceph_cluster);
82 if (ret < 0) {
83 fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
84 " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
85 strerror(-ret));
86 rados_shutdown(ceph_cluster);
87 return ret;
91 ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
92 if (ret < 0) {
93 fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
94 " - (%s)\n", progname, pool_name, strerror(-ret));
95 rados_shutdown(ceph_cluster);
96 return ret;
99 if (namespace != NULL) {
100 rados_ioctx_set_namespace(ioctx, namespace);
103 *_ceph_cluster = ceph_cluster;
104 *_ioctx = ioctx;
106 return 0;
109 static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
110 const char *oid,
111 uint64_t lock_duration_s,
112 uint8_t flags)
114 int ret;
115 struct timeval tv = { lock_duration_s, 0 };
117 ret = rados_lock_exclusive(ioctx, oid,
118 CTDB_MUTEX_CEPH_LOCK_NAME,
119 CTDB_MUTEX_CEPH_LOCK_COOKIE,
120 CTDB_MUTEX_CEPH_LOCK_DESC,
121 lock_duration_s == 0 ? NULL : &tv,
122 flags);
123 if ((ret == -EEXIST) || (ret == -EBUSY)) {
124 /* lock contention */
125 return ret;
126 } else if (ret < 0) {
127 /* unexpected failure */
128 fprintf(stderr,
129 "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
130 progname, oid, strerror(-ret));
131 return ret;
134 /* lock obtained */
135 return 0;
138 static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
139 const char *oid)
141 int ret;
143 ret = rados_unlock(ioctx, oid,
144 CTDB_MUTEX_CEPH_LOCK_NAME,
145 CTDB_MUTEX_CEPH_LOCK_COOKIE);
146 if (ret < 0) {
147 fprintf(stderr,
148 "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
149 progname, oid, strerror(-ret));
150 return ret;
153 return 0;
156 struct ctdb_mutex_rados_state {
157 bool holding_mutex;
158 const char *ceph_cluster_name;
159 const char *ceph_auth_name;
160 const char *pool_name;
161 const char *namespace;
162 const char *object;
163 uint64_t lock_duration_s;
164 int ppid;
165 struct tevent_context *ev;
166 struct tevent_signal *sigterm_ev;
167 struct tevent_signal *sigint_ev;
168 struct tevent_timer *ppid_timer_ev;
169 struct tevent_timer *renew_timer_ev;
170 rados_t ceph_cluster;
171 rados_ioctx_t ioctx;
174 static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
175 struct tevent_signal *se,
176 int signum,
177 int count,
178 void *siginfo,
179 void *private_data)
181 struct ctdb_mutex_rados_state *cmr_state = private_data;
182 int ret = 0;
184 if (!cmr_state->holding_mutex) {
185 fprintf(stderr, "Sigterm callback invoked without mutex!\n");
186 ret = -EINVAL;
189 talloc_free(cmr_state);
190 exit(ret ? 1 : 0);
193 static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev,
194 struct tevent_timer *te,
195 struct timeval current_time,
196 void *private_data)
198 struct ctdb_mutex_rados_state *cmr_state = private_data;
199 int ret = 0;
201 if (!cmr_state->holding_mutex) {
202 fprintf(stderr, "Timer callback invoked without mutex!\n");
203 ret = -EINVAL;
204 goto err_ctx_cleanup;
207 if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
208 /* parent still around, keep waiting */
209 cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev,
210 cmr_state,
211 tevent_timeval_current_ofs(5, 0),
212 ctdb_mutex_rados_ppid_timer_cb,
213 cmr_state);
214 if (cmr_state->ppid_timer_ev == NULL) {
215 fprintf(stderr, "Failed to create timer event\n");
216 /* rely on signal cb */
218 return;
221 /* parent ended, drop lock (via destructor) and exit */
222 err_ctx_cleanup:
223 talloc_free(cmr_state);
224 exit(ret ? 1 : 0);
227 #define USECS_IN_SEC 1000000
229 static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev,
230 struct tevent_timer *te,
231 struct timeval current_time,
232 void *private_data)
234 struct ctdb_mutex_rados_state *cmr_state = private_data;
235 struct timeval tv;
236 int ret;
238 ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
239 cmr_state->lock_duration_s,
240 LIBRADOS_LOCK_FLAG_RENEW);
241 if (ret == -EBUSY) {
242 /* should never get -EEXIST on renewal */
243 fprintf(stderr, "Lock contention during renew: %d\n", ret);
244 goto err_ctx_cleanup;
245 } else if (ret < 0) {
246 fprintf(stderr, "Lock renew failed\n");
247 goto err_ctx_cleanup;
250 tv = tevent_timeval_current_ofs(0,
251 cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
252 cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
253 cmr_state,
255 ctdb_mutex_rados_lock_renew_timer_cb,
256 cmr_state);
257 if (cmr_state->renew_timer_ev == NULL) {
258 fprintf(stderr, "Failed to create timer event\n");
259 goto err_ctx_cleanup;
262 return;
264 err_ctx_cleanup:
265 /* drop lock (via destructor) and exit */
266 talloc_free(cmr_state);
267 exit(1);
270 static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state)
272 if (cmr_state->holding_mutex) {
273 ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
275 if (cmr_state->ioctx != NULL) {
276 rados_ioctx_destroy(cmr_state->ioctx);
278 if (cmr_state->ceph_cluster != NULL) {
279 rados_shutdown(cmr_state->ceph_cluster);
281 return 0;
284 /* register this host+service with ceph-mgr for visibility */
285 static int ctdb_mutex_rados_mgr_reg(rados_t ceph_cluster)
287 int ret;
288 uint64_t instance_guid;
289 char id_buf[128];
291 instance_guid = rados_get_instance_id(ceph_cluster);
292 ret = snprintf(id_buf, sizeof(id_buf), "%s:0x%016llx",
293 "ctdb_mutex_ceph_rados_helper",
294 (unsigned long long)instance_guid);
295 if (ret < 0 || ret >= sizeof(id_buf)) {
296 fprintf(stderr, "Ceph instance name too long\n");
297 return -ENAMETOOLONG;
300 ret = rados_service_register(ceph_cluster, "ctdb", id_buf, "");
301 if (ret < 0) {
302 fprintf(stderr, "failed to register service with ceph-mgr\n");
303 return ret;
306 return 0;
309 int main(int argc, char *argv[])
311 int ret;
312 int opt;
313 struct ctdb_mutex_rados_state *cmr_state;
315 progname = argv[0];
317 if (argc < 5) {
318 usage();
319 ret = -EINVAL;
320 goto err_out;
323 ret = setvbuf(stdout, NULL, _IONBF, 0);
324 if (ret != 0) {
325 fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
328 cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
329 if (cmr_state == NULL) {
330 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
331 ret = -ENOMEM;
332 goto err_out;
335 talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy);
336 cmr_state->ceph_cluster_name = argv[1];
337 cmr_state->ceph_auth_name = argv[2];
338 cmr_state->pool_name = argv[3];
339 cmr_state->object = argv[4];
341 optind = 5;
342 while ((opt = getopt(argc, argv, "n:")) != -1) {
343 switch(opt) {
344 case 'n':
345 cmr_state->namespace = optarg;
346 break;
347 default:
348 usage();
349 ret = -EINVAL;
350 goto err_ctx_cleanup;
354 if (argv[optind] != NULL) {
355 /* optional lock duration provided */
356 char *endptr = NULL;
357 cmr_state->lock_duration_s = strtoull(argv[optind], &endptr, 0);
358 if ((endptr == argv[optind]) || (*endptr != '\0')) {
359 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
360 ret = -EINVAL;
361 goto err_ctx_cleanup;
363 if (argv[++optind] != NULL) {
364 /* incorrect count or format for optional arguments */
365 usage();
366 ret = -EINVAL;
367 goto err_ctx_cleanup;
370 } else {
371 cmr_state->lock_duration_s
372 = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT;
375 cmr_state->ppid = getppid();
376 if (cmr_state->ppid == 1) {
378 * The original parent is gone and the process has
379 * been reparented to init. This can happen if the
380 * helper is started just as the parent is killed
381 * during shutdown. The error message doesn't need to
382 * be stellar, since there won't be anything around to
383 * capture and log it...
385 fprintf(stderr, "%s: PPID == 1\n", progname);
386 ret = -EPIPE;
387 goto err_ctx_cleanup;
390 cmr_state->ev = tevent_context_init(cmr_state);
391 if (cmr_state->ev == NULL) {
392 fprintf(stderr, "tevent_context_init failed\n");
393 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
394 ret = -ENOMEM;
395 goto err_ctx_cleanup;
398 /* wait for sigterm */
399 cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
400 ctdb_mutex_rados_sigterm_cb,
401 cmr_state);
402 if (cmr_state->sigterm_ev == NULL) {
403 fprintf(stderr, "Failed to create term signal event\n");
404 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
405 ret = -ENOMEM;
406 goto err_ctx_cleanup;
409 cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0,
410 ctdb_mutex_rados_sigterm_cb,
411 cmr_state);
412 if (cmr_state->sigint_ev == NULL) {
413 fprintf(stderr, "Failed to create int signal event\n");
414 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
415 ret = -ENOMEM;
416 goto err_ctx_cleanup;
419 /* periodically check parent */
420 cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
421 tevent_timeval_current_ofs(5, 0),
422 ctdb_mutex_rados_ppid_timer_cb,
423 cmr_state);
424 if (cmr_state->ppid_timer_ev == NULL) {
425 fprintf(stderr, "Failed to create timer event\n");
426 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
427 ret = -ENOMEM;
428 goto err_ctx_cleanup;
431 ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
432 cmr_state->ceph_auth_name,
433 cmr_state->pool_name,
434 cmr_state->namespace,
435 &cmr_state->ceph_cluster,
436 &cmr_state->ioctx);
437 if (ret < 0) {
438 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
439 goto err_ctx_cleanup;
442 ret = ctdb_mutex_rados_mgr_reg(cmr_state->ceph_cluster);
443 if (ret < 0) {
444 fprintf(stderr, "Failed to register with ceph-mgr\n");
445 /* ignore: ceph-mgr service registration is informational */
448 ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
449 cmr_state->lock_duration_s,
451 if ((ret == -EEXIST) || (ret == -EBUSY)) {
452 fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
453 goto err_ctx_cleanup;
454 } else if (ret < 0) {
455 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
456 goto err_ctx_cleanup;
458 cmr_state->holding_mutex = true;
460 if (cmr_state->lock_duration_s != 0) {
462 * renew (reobtain) the lock, using a period of half the lock
463 * duration. Convert to usecs to avoid rounding.
465 struct timeval tv = tevent_timeval_current_ofs(0,
466 cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
467 cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
468 cmr_state,
470 ctdb_mutex_rados_lock_renew_timer_cb,
471 cmr_state);
472 if (cmr_state->renew_timer_ev == NULL) {
473 fprintf(stderr, "Failed to create timer event\n");
474 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
475 ret = -ENOMEM;
476 goto err_ctx_cleanup;
480 fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
482 /* wait for the signal / timer events to do their work */
483 ret = tevent_loop_wait(cmr_state->ev);
484 if (ret < 0) {
485 goto err_ctx_cleanup;
487 err_ctx_cleanup:
488 talloc_free(cmr_state);
489 err_out:
490 return ret ? 1 : 0;