4 * Copyright (C) 2009 Red Hat, Inc. All rights reserved.
6 * This file is part of LVM2.
8 * This copyrighted material is made available to anyone wishing to use,
9 * modify, copy, or redistribute it subject to the terms and conditions
10 * of the GNU Lesser General Public License v.2.1.
12 * You should have received a copy of the GNU Lesser General Public License
13 * along with this program; if not, write to the Free Software Foundation,
14 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 * This provides the interface between clvmd and corosync/DLM as the cluster
23 #define _FILE_OFFSET_BITS 64
25 #include <configure.h>
27 #include <sys/types.h>
28 #include <sys/utsname.h>
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
48 #include <libdevmapper.h>
50 #include <corosync/corotypes.h>
51 #include <corosync/cpg.h>
52 #include <corosync/quorum.h>
53 #include <corosync/confdb.h>
57 #include "lvm-logging.h"
59 #include "clvmd-comms.h"
60 #include "lvm-functions.h"
63 /* Timeout value for several corosync calls */
64 #define LOCKSPACE_NAME "clvmd"
66 static void corosync_cpg_deliver_callback (cpg_handle_t handle
,
67 const struct cpg_name
*groupName
,
72 static void corosync_cpg_confchg_callback(cpg_handle_t handle
,
73 const struct cpg_name
*groupName
,
74 const struct cpg_address
*member_list
, size_t member_list_entries
,
75 const struct cpg_address
*left_list
, size_t left_list_entries
,
76 const struct cpg_address
*joined_list
, size_t joined_list_entries
);
77 static void _cluster_closedown(void);
79 /* Hash list of nodes in the cluster */
80 static struct dm_hash_table
*node_hash
;
82 /* Number of active nodes */
84 static unsigned int our_nodeid
;
86 static struct local_client
*cluster_client
;
88 /* Corosync handles */
89 static cpg_handle_t cpg_handle
;
90 static quorum_handle_t quorum_handle
;
93 static dlm_lshandle_t
*lockspace
;
95 static struct cpg_name cpg_group_name
;
97 /* Corosync callback structs */
98 cpg_callbacks_t corosync_cpg_callbacks
= {
99 .cpg_deliver_fn
= corosync_cpg_deliver_callback
,
100 .cpg_confchg_fn
= corosync_cpg_confchg_callback
,
103 quorum_callbacks_t quorum_callbacks
= {
104 .quorum_notify_fn
= NULL
,
109 enum {NODE_UNKNOWN
, NODE_DOWN
, NODE_UP
, NODE_CLVMD
} state
;
114 /* Set errno to something approximating the right value and return 0 or -1 */
115 static int cs_to_errno(cs_error_t err
)
133 case CS_ERR_TRY_AGAIN
:
136 case CS_ERR_INVALID_PARAM
:
139 case CS_ERR_NO_MEMORY
:
142 case CS_ERR_BAD_HANDLE
:
151 case CS_ERR_NOT_EXIST
:
154 case CS_ERR_NAME_TOO_LONG
:
155 errno
= ENAMETOOLONG
;
160 case CS_ERR_NO_SPACE
:
163 case CS_ERR_INTERRUPT
:
166 case CS_ERR_NAME_NOT_FOUND
:
169 case CS_ERR_NO_RESOURCES
:
172 case CS_ERR_NOT_SUPPORTED
:
175 case CS_ERR_BAD_OPERATION
:
178 case CS_ERR_FAILED_OPERATION
:
181 case CS_ERR_MESSAGE_ERROR
:
184 case CS_ERR_QUEUE_FULL
:
187 case CS_ERR_QUEUE_NOT_AVAILABLE
:
190 case CS_ERR_BAD_FLAGS
:
196 case CS_ERR_NO_SECTIONS
:
206 static char *print_corosync_csid(const char *csid
)
208 static char buf
[128];
211 memcpy(&id
, csid
, sizeof(int));
212 sprintf(buf
, "%d", id
);
216 static void corosync_cpg_deliver_callback (cpg_handle_t handle
,
217 const struct cpg_name
*groupName
,
225 memcpy(&target_nodeid
, msg
, COROSYNC_CSID_LEN
);
227 DEBUGLOG("%u got message from nodeid %d for %d. len %zd\n",
228 our_nodeid
, nodeid
, target_nodeid
, msg_len
-4);
230 if (nodeid
!= our_nodeid
)
231 if (target_nodeid
== our_nodeid
|| target_nodeid
== 0)
232 process_message(cluster_client
, (char *)msg
+COROSYNC_CSID_LEN
,
233 msg_len
-COROSYNC_CSID_LEN
, (char*)&nodeid
);
236 static void corosync_cpg_confchg_callback(cpg_handle_t handle
,
237 const struct cpg_name
*groupName
,
238 const struct cpg_address
*member_list
, size_t member_list_entries
,
239 const struct cpg_address
*left_list
, size_t left_list_entries
,
240 const struct cpg_address
*joined_list
, size_t joined_list_entries
)
243 struct node_info
*ninfo
;
245 DEBUGLOG("confchg callback. %zd joined, %zd left, %zd members\n",
246 joined_list_entries
, left_list_entries
, member_list_entries
);
248 for (i
=0; i
<joined_list_entries
; i
++) {
249 ninfo
= dm_hash_lookup_binary(node_hash
,
250 (char *)&joined_list
[i
].nodeid
,
253 ninfo
= malloc(sizeof(struct node_info
));
258 ninfo
->nodeid
= joined_list
[i
].nodeid
;
259 dm_hash_insert_binary(node_hash
,
260 (char *)&ninfo
->nodeid
,
261 COROSYNC_CSID_LEN
, ninfo
);
264 ninfo
->state
= NODE_CLVMD
;
267 for (i
=0; i
<left_list_entries
; i
++) {
268 ninfo
= dm_hash_lookup_binary(node_hash
,
269 (char *)&left_list
[i
].nodeid
,
272 ninfo
->state
= NODE_DOWN
;
275 for (i
=0; i
<member_list_entries
; i
++) {
276 if (member_list
[i
].nodeid
== 0) continue;
277 ninfo
= dm_hash_lookup_binary(node_hash
,
278 (char *)&member_list
[i
].nodeid
,
281 ninfo
= malloc(sizeof(struct node_info
));
286 ninfo
->nodeid
= member_list
[i
].nodeid
;
287 dm_hash_insert_binary(node_hash
,
288 (char *)&ninfo
->nodeid
,
289 COROSYNC_CSID_LEN
, ninfo
);
292 ninfo
->state
= NODE_CLVMD
;
295 num_nodes
= member_list_entries
;
298 static int _init_cluster(void)
302 node_hash
= dm_hash_create(100);
304 err
= cpg_initialize(&cpg_handle
,
305 &corosync_cpg_callbacks
);
307 syslog(LOG_ERR
, "Cannot initialise Corosync CPG service: %d",
309 DEBUGLOG("Cannot initialise Corosync CPG service: %d", err
);
310 return cs_to_errno(err
);
313 err
= quorum_initialize(&quorum_handle
,
316 syslog(LOG_ERR
, "Cannot initialise Corosync quorum service: %d",
318 DEBUGLOG("Cannot initialise Corosync quorum service: %d", err
);
319 return cs_to_errno(err
);
323 /* Create a lockspace for LV & VG locks to live in */
324 lockspace
= dlm_create_lockspace(LOCKSPACE_NAME
, 0600);
326 if (errno
== EEXIST
) {
327 lockspace
= dlm_open_lockspace(LOCKSPACE_NAME
);
330 syslog(LOG_ERR
, "Unable to create lockspace for CLVM: %m");
331 quorum_finalize(quorum_handle
);
335 dlm_ls_pthread_init(lockspace
);
336 DEBUGLOG("DLM initialisation complete\n");
338 /* Connect to the clvmd group */
339 strcpy((char *)cpg_group_name
.value
, "clvmd");
340 cpg_group_name
.length
= strlen((char *)cpg_group_name
.value
);
341 err
= cpg_join(cpg_handle
, &cpg_group_name
);
343 cpg_finalize(cpg_handle
);
344 quorum_finalize(quorum_handle
);
345 dlm_release_lockspace(LOCKSPACE_NAME
, lockspace
, 1);
346 syslog(LOG_ERR
, "Cannot join clvmd process group");
347 DEBUGLOG("Cannot join clvmd process group: %d\n", err
);
348 return cs_to_errno(err
);
351 err
= cpg_local_get(cpg_handle
,
354 cpg_finalize(cpg_handle
);
355 quorum_finalize(quorum_handle
);
356 dlm_release_lockspace(LOCKSPACE_NAME
, lockspace
, 1);
357 syslog(LOG_ERR
, "Cannot get local node id\n");
358 return cs_to_errno(err
);
360 DEBUGLOG("Our local node id is %d\n", our_nodeid
);
362 DEBUGLOG("Connected to Corosync\n");
367 static void _cluster_closedown(void)
369 DEBUGLOG("cluster_closedown\n");
372 dlm_release_lockspace(LOCKSPACE_NAME
, lockspace
, 1);
373 cpg_finalize(cpg_handle
);
374 quorum_finalize(quorum_handle
);
377 static void _get_our_csid(char *csid
)
379 memcpy(csid
, &our_nodeid
, sizeof(int));
382 /* Corosync doesn't really have nmode names so we
383 just use the node ID in hex instead */
384 static int _csid_from_name(char *csid
, const char *name
)
387 struct node_info
*ninfo
;
389 if (sscanf(name
, "%x", &nodeid
) == 1) {
390 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, COROSYNC_CSID_LEN
);
397 static int _name_from_csid(const char *csid
, char *name
)
399 struct node_info
*ninfo
;
401 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, COROSYNC_CSID_LEN
);
404 sprintf(name
, "UNKNOWN %s", print_corosync_csid(csid
));
408 sprintf(name
, "%x", ninfo
->nodeid
);
412 static int _get_num_nodes()
414 DEBUGLOG("num_nodes = %d\n", num_nodes
);
418 /* Node is now known to be running a clvmd */
419 static void _add_up_node(const char *csid
)
421 struct node_info
*ninfo
;
423 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, COROSYNC_CSID_LEN
);
425 DEBUGLOG("corosync_add_up_node no node_hash entry for csid %s\n",
426 print_corosync_csid(csid
));
430 DEBUGLOG("corosync_add_up_node %d\n", ninfo
->nodeid
);
432 ninfo
->state
= NODE_CLVMD
;
437 /* Call a callback for each node, so the caller knows whether it's up or down */
438 static int _cluster_do_node_callback(struct local_client
*master_client
,
439 void (*callback
)(struct local_client
*,
440 const char *csid
, int node_up
))
442 struct dm_hash_node
*hn
;
443 struct node_info
*ninfo
;
446 dm_hash_iterate(hn
, node_hash
)
448 char csid
[COROSYNC_CSID_LEN
];
450 ninfo
= dm_hash_get_data(node_hash
, hn
);
451 memcpy(csid
, dm_hash_get_key(node_hash
, hn
), COROSYNC_CSID_LEN
);
453 DEBUGLOG("down_callback. node %d, state = %d\n", ninfo
->nodeid
,
456 if (ninfo
->state
!= NODE_DOWN
)
457 callback(master_client
, csid
, ninfo
->state
== NODE_CLVMD
);
458 if (ninfo
->state
!= NODE_CLVMD
)
465 static int _lock_resource(const char *resource
, int mode
, int flags
, int *lockid
)
467 struct dlm_lksb lksb
;
470 DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource
, flags
, mode
);
472 if (flags
& LKF_CONVERT
)
473 lksb
.sb_lkid
= *lockid
;
475 err
= dlm_ls_lock_wait(lockspace
,
486 DEBUGLOG("dlm_ls_lock returned %d\n", errno
);
489 if (lksb
.sb_status
!= 0)
491 DEBUGLOG("dlm_ls_lock returns lksb.sb_status %d\n", lksb
.sb_status
);
492 errno
= lksb
.sb_status
;
496 DEBUGLOG("lock_resource returning %d, lock_id=%x\n", err
, lksb
.sb_lkid
);
498 *lockid
= lksb
.sb_lkid
;
504 static int _unlock_resource(const char *resource
, int lockid
)
506 struct dlm_lksb lksb
;
509 DEBUGLOG("unlock_resource: %s lockid: %x\n", resource
, lockid
);
510 lksb
.sb_lkid
= lockid
;
512 err
= dlm_ls_unlock_wait(lockspace
,
518 DEBUGLOG("Unlock returned %d\n", err
);
521 if (lksb
.sb_status
!= EUNLOCK
)
523 DEBUGLOG("dlm_ls_unlock_wait returns lksb.sb_status: %d\n", lksb
.sb_status
);
524 errno
= lksb
.sb_status
;
532 static int _is_quorate()
535 if (quorum_getquorate(quorum_handle
, &quorate
) == CS_OK
)
541 static int _get_main_cluster_fd(void)
545 cpg_fd_get(cpg_handle
, &select_fd
);
549 static int _cluster_fd_callback(struct local_client
*fd
, char *buf
, int len
,
551 struct local_client
**new_client
)
555 cpg_dispatch(cpg_handle
, CS_DISPATCH_ONE
);
559 static int _cluster_send_message(const void *buf
, int msglen
, const char *csid
,
567 memcpy(&target_node
, csid
, COROSYNC_CSID_LEN
);
571 iov
[0].iov_base
= &target_node
;
572 iov
[0].iov_len
= sizeof(int);
573 iov
[1].iov_base
= (char *)buf
;
574 iov
[1].iov_len
= msglen
;
576 err
= cpg_mcast_joined(cpg_handle
, CPG_TYPE_AGREED
, iov
, 2);
577 return cs_to_errno(err
);
581 * We are not necessarily connected to a Red Hat Cluster system,
582 * but if we are, this returns the cluster name from cluster.conf.
583 * I've used confdb rather than ccs to reduce the inter-package
584 * dependancies as well as to allow people to set a cluster name
585 * for themselves even if they are not running on RH cluster.
587 static int _get_cluster_name(char *buf
, int buflen
)
589 confdb_handle_t handle
;
591 size_t namelen
= buflen
;
592 hdb_handle_t cluster_handle
;
593 confdb_callbacks_t callbacks
= {
594 .confdb_key_change_notify_fn
= NULL
,
595 .confdb_object_create_change_notify_fn
= NULL
,
596 .confdb_object_delete_change_notify_fn
= NULL
599 /* This is a default in case everything else fails */
600 strncpy(buf
, "Corosync", buflen
);
602 /* Look for a cluster name in confdb */
603 result
= confdb_initialize (&handle
, &callbacks
);
607 result
= confdb_object_find_start(handle
, OBJECT_PARENT_HANDLE
);
611 result
= confdb_object_find(handle
, OBJECT_PARENT_HANDLE
, (void *)"cluster", strlen("cluster"), &cluster_handle
);
615 result
= confdb_key_get(handle
, cluster_handle
, (void *)"name", strlen("name"), buf
, &namelen
);
622 confdb_finalize(handle
);
626 static struct cluster_ops _cluster_corosync_ops
= {
627 .cluster_init_completed
= NULL
,
628 .cluster_send_message
= _cluster_send_message
,
629 .name_from_csid
= _name_from_csid
,
630 .csid_from_name
= _csid_from_name
,
631 .get_num_nodes
= _get_num_nodes
,
632 .cluster_fd_callback
= _cluster_fd_callback
,
633 .get_main_cluster_fd
= _get_main_cluster_fd
,
634 .cluster_do_node_callback
= _cluster_do_node_callback
,
635 .is_quorate
= _is_quorate
,
636 .get_our_csid
= _get_our_csid
,
637 .add_up_node
= _add_up_node
,
638 .reread_config
= NULL
,
639 .cluster_closedown
= _cluster_closedown
,
640 .get_cluster_name
= _get_cluster_name
,
641 .sync_lock
= _lock_resource
,
642 .sync_unlock
= _unlock_resource
,
645 struct cluster_ops
*init_corosync_cluster(void)
647 if (!_init_cluster())
648 return &_cluster_corosync_ops
;