4 * Copyright (C) 2007-2009 Red Hat, Inc. All rights reserved.
6 * This file is part of LVM2.
8 * This copyrighted material is made available to anyone wishing to use,
9 * modify, copy, or redistribute it subject to the terms and conditions
10 * of the GNU Lesser General Public License v.2.1.
12 * You should have received a copy of the GNU Lesser General Public License
13 * along with this program; if not, write to the Free Software Foundation,
14 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 * This provides the interface between clvmd and OpenAIS as the cluster
23 #define _FILE_OFFSET_BITS 64
25 #include <configure.h>
27 #include <sys/types.h>
28 #include <sys/utsname.h>
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
48 #include <libdevmapper.h>
50 #include <openais/saAis.h>
51 #include <openais/saLck.h>
53 #include <corosync/corotypes.h>
54 #include <corosync/cpg.h>
57 #include "lvm-logging.h"
59 #include "clvmd-comms.h"
60 #include "lvm-functions.h"
63 /* Timeout value for several openais calls */
66 static void openais_cpg_deliver_callback (cpg_handle_t handle
,
67 const struct cpg_name
*groupName
,
72 static void openais_cpg_confchg_callback(cpg_handle_t handle
,
73 const struct cpg_name
*groupName
,
74 const struct cpg_address
*member_list
, size_t member_list_entries
,
75 const struct cpg_address
*left_list
, size_t left_list_entries
,
76 const struct cpg_address
*joined_list
, size_t joined_list_entries
);
78 static void _cluster_closedown(void);
80 /* Hash list of nodes in the cluster */
81 static struct dm_hash_table
*node_hash
;
83 /* For associating lock IDs & resource handles */
84 static struct dm_hash_table
*lock_hash
;
86 /* Number of active nodes */
88 static unsigned int our_nodeid
;
90 static struct local_client
*cluster_client
;
93 static cpg_handle_t cpg_handle
;
94 static SaLckHandleT lck_handle
;
96 static struct cpg_name cpg_group_name
;
98 /* Openais callback structs */
99 cpg_callbacks_t openais_cpg_callbacks
= {
100 .cpg_deliver_fn
= openais_cpg_deliver_callback
,
101 .cpg_confchg_fn
= openais_cpg_confchg_callback
,
106 enum {NODE_UNKNOWN
, NODE_DOWN
, NODE_UP
, NODE_CLVMD
} state
;
112 SaLckResourceHandleT res_handle
;
113 SaLckLockIdT lock_id
;
117 /* Set errno to something approximating the right value and return 0 or -1 */
118 static int ais_to_errno(SaAisErrorT err
)
124 case SA_AIS_ERR_LIBRARY
:
127 case SA_AIS_ERR_VERSION
:
130 case SA_AIS_ERR_INIT
:
133 case SA_AIS_ERR_TIMEOUT
:
136 case SA_AIS_ERR_TRY_AGAIN
:
139 case SA_AIS_ERR_INVALID_PARAM
:
142 case SA_AIS_ERR_NO_MEMORY
:
145 case SA_AIS_ERR_BAD_HANDLE
:
148 case SA_AIS_ERR_BUSY
:
151 case SA_AIS_ERR_ACCESS
:
154 case SA_AIS_ERR_NOT_EXIST
:
157 case SA_AIS_ERR_NAME_TOO_LONG
:
158 errno
= ENAMETOOLONG
;
160 case SA_AIS_ERR_EXIST
:
163 case SA_AIS_ERR_NO_SPACE
:
166 case SA_AIS_ERR_INTERRUPT
:
169 case SA_AIS_ERR_NAME_NOT_FOUND
:
172 case SA_AIS_ERR_NO_RESOURCES
:
175 case SA_AIS_ERR_NOT_SUPPORTED
:
178 case SA_AIS_ERR_BAD_OPERATION
:
181 case SA_AIS_ERR_FAILED_OPERATION
:
184 case SA_AIS_ERR_MESSAGE_ERROR
:
187 case SA_AIS_ERR_QUEUE_FULL
:
190 case SA_AIS_ERR_QUEUE_NOT_AVAILABLE
:
193 case SA_AIS_ERR_BAD_FLAGS
:
196 case SA_AIS_ERR_TOO_BIG
:
199 case SA_AIS_ERR_NO_SECTIONS
:
209 static char *print_openais_csid(const char *csid
)
211 static char buf
[128];
214 memcpy(&id
, csid
, sizeof(int));
215 sprintf(buf
, "%d", id
);
219 static int add_internal_client(int fd
, fd_callback_t callback
)
221 struct local_client
*client
;
223 DEBUGLOG("Add_internal_client, fd = %d\n", fd
);
225 client
= malloc(sizeof(struct local_client
));
228 DEBUGLOG("malloc failed\n");
232 memset(client
, 0, sizeof(struct local_client
));
234 client
->type
= CLUSTER_INTERNAL
;
235 client
->callback
= callback
;
238 /* Set Close-on-exec */
239 fcntl(fd
, F_SETFD
, 1);
244 static void openais_cpg_deliver_callback (cpg_handle_t handle
,
245 const struct cpg_name
*groupName
,
253 memcpy(&target_nodeid
, msg
, OPENAIS_CSID_LEN
);
255 DEBUGLOG("%u got message from nodeid %d for %d. len %d\n",
256 our_nodeid
, nodeid
, target_nodeid
, msg_len
-4);
258 if (nodeid
!= our_nodeid
)
259 if (target_nodeid
== our_nodeid
|| target_nodeid
== 0)
260 process_message(cluster_client
, (char *)msg
+OPENAIS_CSID_LEN
,
261 msg_len
-OPENAIS_CSID_LEN
, (char*)&nodeid
);
264 static void openais_cpg_confchg_callback(cpg_handle_t handle
,
265 const struct cpg_name
*groupName
,
266 const struct cpg_address
*member_list
, size_t member_list_entries
,
267 const struct cpg_address
*left_list
, size_t left_list_entries
,
268 const struct cpg_address
*joined_list
, size_t joined_list_entries
)
271 struct node_info
*ninfo
;
273 DEBUGLOG("confchg callback. %d joined, %d left, %d members\n",
274 joined_list_entries
, left_list_entries
, member_list_entries
);
276 for (i
=0; i
<joined_list_entries
; i
++) {
277 ninfo
= dm_hash_lookup_binary(node_hash
,
278 (char *)&joined_list
[i
].nodeid
,
281 ninfo
= malloc(sizeof(struct node_info
));
286 ninfo
->nodeid
= joined_list
[i
].nodeid
;
287 dm_hash_insert_binary(node_hash
,
288 (char *)&ninfo
->nodeid
,
289 OPENAIS_CSID_LEN
, ninfo
);
292 ninfo
->state
= NODE_CLVMD
;
295 for (i
=0; i
<left_list_entries
; i
++) {
296 ninfo
= dm_hash_lookup_binary(node_hash
,
297 (char *)&left_list
[i
].nodeid
,
300 ninfo
->state
= NODE_DOWN
;
303 for (i
=0; i
<member_list_entries
; i
++) {
304 if (member_list
[i
].nodeid
== 0) continue;
305 ninfo
= dm_hash_lookup_binary(node_hash
,
306 (char *)&member_list
[i
].nodeid
,
309 ninfo
= malloc(sizeof(struct node_info
));
314 ninfo
->nodeid
= member_list
[i
].nodeid
;
315 dm_hash_insert_binary(node_hash
,
316 (char *)&ninfo
->nodeid
,
317 OPENAIS_CSID_LEN
, ninfo
);
320 ninfo
->state
= NODE_CLVMD
;
323 num_nodes
= member_list_entries
;
326 static int lck_dispatch(struct local_client
*client
, char *buf
, int len
,
327 const char *csid
, struct local_client
**new_client
)
330 saLckDispatch(lck_handle
, SA_DISPATCH_ONE
);
334 static int _init_cluster(void)
337 SaVersionT ver
= { 'B', 1, 1 };
340 node_hash
= dm_hash_create(100);
341 lock_hash
= dm_hash_create(10);
343 err
= cpg_initialize(&cpg_handle
,
344 &openais_cpg_callbacks
);
345 if (err
!= SA_AIS_OK
) {
346 syslog(LOG_ERR
, "Cannot initialise OpenAIS CPG service: %d",
348 DEBUGLOG("Cannot initialise OpenAIS CPG service: %d", err
);
349 return ais_to_errno(err
);
352 err
= saLckInitialize(&lck_handle
,
355 if (err
!= SA_AIS_OK
) {
356 cpg_initialize(&cpg_handle
, &openais_cpg_callbacks
);
357 syslog(LOG_ERR
, "Cannot initialise OpenAIS lock service: %d",
359 DEBUGLOG("Cannot initialise OpenAIS lock service: %d\n\n", err
);
360 return ais_to_errno(err
);
363 /* Connect to the clvmd group */
364 strcpy((char *)cpg_group_name
.value
, "clvmd");
365 cpg_group_name
.length
= strlen((char *)cpg_group_name
.value
);
366 err
= cpg_join(cpg_handle
, &cpg_group_name
);
367 if (err
!= SA_AIS_OK
) {
368 cpg_finalize(cpg_handle
);
369 saLckFinalize(lck_handle
);
370 syslog(LOG_ERR
, "Cannot join clvmd process group");
371 DEBUGLOG("Cannot join clvmd process group: %d\n", err
);
372 return ais_to_errno(err
);
375 err
= cpg_local_get(cpg_handle
,
377 if (err
!= SA_AIS_OK
) {
378 cpg_finalize(cpg_handle
);
379 saLckFinalize(lck_handle
);
380 syslog(LOG_ERR
, "Cannot get local node id\n");
381 return ais_to_errno(err
);
383 DEBUGLOG("Our local node id is %d\n", our_nodeid
);
385 saLckSelectionObjectGet(lck_handle
, (SaSelectionObjectT
*)&select_fd
);
386 add_internal_client(select_fd
, lck_dispatch
);
388 DEBUGLOG("Connected to OpenAIS\n");
393 static void _cluster_closedown(void)
395 DEBUGLOG("cluster_closedown\n");
398 saLckFinalize(lck_handle
);
399 cpg_finalize(cpg_handle
);
402 static void _get_our_csid(char *csid
)
404 memcpy(csid
, &our_nodeid
, sizeof(int));
407 /* OpenAIS doesn't really have nmode names so we
408 just use the node ID in hex instead */
409 static int _csid_from_name(char *csid
, const char *name
)
412 struct node_info
*ninfo
;
414 if (sscanf(name
, "%x", &nodeid
) == 1) {
415 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, OPENAIS_CSID_LEN
);
422 static int _name_from_csid(const char *csid
, char *name
)
424 struct node_info
*ninfo
;
426 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, OPENAIS_CSID_LEN
);
429 sprintf(name
, "UNKNOWN %s", print_openais_csid(csid
));
433 sprintf(name
, "%x", ninfo
->nodeid
);
437 static int _get_num_nodes()
439 DEBUGLOG("num_nodes = %d\n", num_nodes
);
443 /* Node is now known to be running a clvmd */
444 static void _add_up_node(const char *csid
)
446 struct node_info
*ninfo
;
448 ninfo
= dm_hash_lookup_binary(node_hash
, csid
, OPENAIS_CSID_LEN
);
450 DEBUGLOG("openais_add_up_node no node_hash entry for csid %s\n",
451 print_openais_csid(csid
));
455 DEBUGLOG("openais_add_up_node %d\n", ninfo
->nodeid
);
457 ninfo
->state
= NODE_CLVMD
;
462 /* Call a callback for each node, so the caller knows whether it's up or down */
463 static int _cluster_do_node_callback(struct local_client
*master_client
,
464 void (*callback
)(struct local_client
*,
465 const char *csid
, int node_up
))
467 struct dm_hash_node
*hn
;
468 struct node_info
*ninfo
;
471 dm_hash_iterate(hn
, node_hash
)
473 char csid
[OPENAIS_CSID_LEN
];
475 ninfo
= dm_hash_get_data(node_hash
, hn
);
476 memcpy(csid
, dm_hash_get_key(node_hash
, hn
), OPENAIS_CSID_LEN
);
478 DEBUGLOG("down_callback. node %d, state = %d\n", ninfo
->nodeid
,
481 if (ninfo
->state
!= NODE_DOWN
)
482 callback(master_client
, csid
, ninfo
->state
== NODE_CLVMD
);
483 if (ninfo
->state
!= NODE_CLVMD
)
490 static int _lock_resource(char *resource
, int mode
, int flags
, int *lockid
)
492 struct lock_info
*linfo
;
493 SaLckResourceHandleT res_handle
;
495 SaLckLockIdT lock_id
;
496 SaLckLockStatusT lockStatus
;
498 /* This needs to be converted from DLM/LVM2 value for OpenAIS LCK */
499 if (flags
& LCK_NONBLOCK
) flags
= SA_LCK_LOCK_NO_QUEUE
;
501 linfo
= malloc(sizeof(struct lock_info
));
505 DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource
, flags
, mode
);
507 linfo
->lock_name
.length
= strlen(resource
)+1;
508 strcpy((char *)linfo
->lock_name
.value
, resource
);
510 err
= saLckResourceOpen(lck_handle
, &linfo
->lock_name
,
511 SA_LCK_RESOURCE_CREATE
, TIMEOUT
, &res_handle
);
512 if (err
!= SA_AIS_OK
)
514 DEBUGLOG("ResourceOpen returned %d\n", err
);
516 return ais_to_errno(err
);
519 err
= saLckResourceLock(
527 if (err
!= SA_AIS_OK
&& lockStatus
!= SA_LCK_LOCK_GRANTED
)
530 saLckResourceClose(res_handle
);
531 return ais_to_errno(err
);
534 /* Wait for it to complete */
536 DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err
,
539 linfo
->lock_id
= lock_id
;
540 linfo
->res_handle
= res_handle
;
542 dm_hash_insert(lock_hash
, resource
, linfo
);
544 return ais_to_errno(err
);
548 static int _unlock_resource(char *resource
, int lockid
)
551 struct lock_info
*linfo
;
553 DEBUGLOG("unlock_resource %s\n", resource
);
554 linfo
= dm_hash_lookup(lock_hash
, resource
);
558 DEBUGLOG("unlock_resource: lockid: %llx\n", linfo
->lock_id
);
559 err
= saLckResourceUnlock(linfo
->lock_id
, SA_TIME_END
);
560 if (err
!= SA_AIS_OK
)
562 DEBUGLOG("Unlock returned %d\n", err
);
563 return ais_to_errno(err
);
566 /* Release the resource */
567 dm_hash_remove(lock_hash
, resource
);
568 saLckResourceClose(linfo
->res_handle
);
571 return ais_to_errno(err
);
574 static int _sync_lock(const char *resource
, int mode
, int flags
, int *lockid
)
577 char lock1
[strlen(resource
)+3];
578 char lock2
[strlen(resource
)+3];
580 snprintf(lock1
, sizeof(lock1
), "%s-1", resource
);
581 snprintf(lock2
, sizeof(lock2
), "%s-2", resource
);
586 status
= _lock_resource(lock1
, SA_LCK_EX_LOCK_MODE
, flags
, lockid
);
590 /* If we can't get this lock too then bail out */
591 status
= _lock_resource(lock2
, SA_LCK_EX_LOCK_MODE
, LCK_NONBLOCK
,
593 if (status
== SA_LCK_LOCK_NOT_QUEUED
)
595 _unlock_resource(lock1
, *lockid
);
603 status
= _lock_resource(lock1
, SA_LCK_PR_LOCK_MODE
, flags
, lockid
);
606 _unlock_resource(lock2
, *lockid
);
610 status
= _lock_resource(lock2
, SA_LCK_EX_LOCK_MODE
, flags
, lockid
);
613 _unlock_resource(lock1
, *lockid
);
626 static int _sync_unlock(const char *resource
, int lockid
)
629 char lock1
[strlen(resource
)+3];
630 char lock2
[strlen(resource
)+3];
632 snprintf(lock1
, sizeof(lock1
), "%s-1", resource
);
633 snprintf(lock2
, sizeof(lock2
), "%s-2", resource
);
635 _unlock_resource(lock1
, lockid
);
636 _unlock_resource(lock2
, lockid
);
641 /* We are always quorate ! */
642 static int _is_quorate()
647 static int _get_main_cluster_fd(void)
651 cpg_fd_get(cpg_handle
, &select_fd
);
655 static int _cluster_fd_callback(struct local_client
*fd
, char *buf
, int len
,
657 struct local_client
**new_client
)
661 cpg_dispatch(cpg_handle
, SA_DISPATCH_ONE
);
665 static int _cluster_send_message(const void *buf
, int msglen
, const char *csid
,
673 memcpy(&target_node
, csid
, OPENAIS_CSID_LEN
);
677 iov
[0].iov_base
= &target_node
;
678 iov
[0].iov_len
= sizeof(int);
679 iov
[1].iov_base
= (char *)buf
;
680 iov
[1].iov_len
= msglen
;
682 err
= cpg_mcast_joined(cpg_handle
, CPG_TYPE_AGREED
, iov
, 2);
683 return ais_to_errno(err
);
686 /* We don't have a cluster name to report here */
687 static int _get_cluster_name(char *buf
, int buflen
)
689 strncpy(buf
, "OpenAIS", buflen
);
693 static struct cluster_ops _cluster_openais_ops
= {
694 .cluster_init_completed
= NULL
,
695 .cluster_send_message
= _cluster_send_message
,
696 .name_from_csid
= _name_from_csid
,
697 .csid_from_name
= _csid_from_name
,
698 .get_num_nodes
= _get_num_nodes
,
699 .cluster_fd_callback
= _cluster_fd_callback
,
700 .get_main_cluster_fd
= _get_main_cluster_fd
,
701 .cluster_do_node_callback
= _cluster_do_node_callback
,
702 .is_quorate
= _is_quorate
,
703 .get_our_csid
= _get_our_csid
,
704 .add_up_node
= _add_up_node
,
705 .reread_config
= NULL
,
706 .cluster_closedown
= _cluster_closedown
,
707 .get_cluster_name
= _get_cluster_name
,
708 .sync_lock
= _sync_lock
,
709 .sync_unlock
= _sync_unlock
,
712 struct cluster_ops
*init_openais_cluster(void)
714 if (!_init_cluster())
715 return &_cluster_openais_ops
;