4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
7 * This file is part of LVM2.
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * Locking functions for LVM.
20 * The main purpose of this part of the library is to serialise LVM
21 * management operations across a cluster.
26 #include "lvm-string.h"
28 #include "locking_types.h"
29 #include "toolcontext.h"
33 #include <sys/socket.h>
37 #ifndef CLUSTER_LOCKING_INTERNAL
38 int lock_resource(struct cmd_context
*cmd
, const char *resource
, uint32_t flags
);
39 int query_resource(const char *resource
, int *mode
);
40 void locking_end(void);
41 int locking_init(int type
, struct config_tree
*cf
, uint32_t *flags
);
44 typedef struct lvm_response
{
52 * This gets stuck at the start of memory we allocate so we
53 * can sanity-check it at deallocation time
55 #define LVM_SIGNATURE 0x434C564D
58 * NOTE: the LVMD uses the socket FD as the client ID, this means
59 * that any client that calls fork() will inherit the context of
62 static int _clvmd_sock
= -1;
64 /* FIXME Install SIGPIPE handler? */
66 /* Open connection to the Cluster Manager daemon */
67 static int _open_local_sock(void)
70 struct sockaddr_un sockaddr
;
72 /* Open local socket */
73 if ((local_socket
= socket(PF_UNIX
, SOCK_STREAM
, 0)) < 0) {
74 log_error("Local socket creation failed: %s", strerror(errno
));
78 memset(&sockaddr
, 0, sizeof(sockaddr
));
79 memcpy(sockaddr
.sun_path
, CLVMD_SOCKNAME
, sizeof(CLVMD_SOCKNAME
));
81 sockaddr
.sun_family
= AF_UNIX
;
83 if (connect(local_socket
,(struct sockaddr
*) &sockaddr
,
85 int saved_errno
= errno
;
87 log_error("connect() failed on local socket: %s",
89 if (close(local_socket
))
99 /* Send a request and return the status */
100 static int _send_request(char *inbuf
, int inlen
, char **retbuf
)
102 char outbuf
[PIPE_BUF
] __attribute((aligned(8)));
103 struct clvm_header
*outheader
= (struct clvm_header
*) outbuf
;
109 /* Send it to CLVMD */
111 if ( (err
= write(_clvmd_sock
, inbuf
, inlen
)) != inlen
) {
112 if (err
== -1 && errno
== EINTR
)
114 log_error("Error writing data to clvmd: %s", strerror(errno
));
118 /* Get the response */
120 if ((len
= read(_clvmd_sock
, outbuf
, sizeof(struct clvm_header
))) < 0) {
123 log_error("Error reading data from clvmd: %s", strerror(errno
));
128 log_error("EOF reading CLVMD");
133 /* Allocate buffer */
134 buflen
= len
+ outheader
->arglen
;
135 *retbuf
= dm_malloc(buflen
);
141 /* Copy the header */
142 memcpy(*retbuf
, outbuf
, len
);
143 outheader
= (struct clvm_header
*) *retbuf
;
145 /* Read the returned values */
146 off
= 1; /* we've already read the first byte */
147 while (off
<= outheader
->arglen
&& len
> 0) {
148 len
= read(_clvmd_sock
, outheader
->args
+ off
,
149 buflen
- off
- offsetof(struct clvm_header
, args
));
154 /* Was it an error ? */
155 if (outheader
->status
!= 0) {
156 errno
= outheader
->status
;
158 /* Only return an error here if there are no node-specific
159 errors present in the message that might have more detail */
160 if (!(outheader
->flags
& CLVMD_FLAG_NODEERRS
)) {
161 log_error("cluster request failed: %s", strerror(errno
));
170 /* Build the structure header and parse-out wildcard node names */
171 /* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
172 static void _build_header(struct clvm_header
*head
, int clvmd_cmd
, const char *node
,
175 head
->cmd
= clvmd_cmd
;
183 * Allow a couple of special node names:
185 * "." for the local node only
187 if (strcmp(node
, "*") == 0) {
188 head
->node
[0] = '\0';
189 } else if (strcmp(node
, ".") == 0) {
190 head
->node
[0] = '\0';
191 head
->flags
= CLVMD_FLAG_LOCAL
;
193 strcpy(head
->node
, node
);
195 head
->node
[0] = '\0';
199 * Send a message to a(or all) node(s) in the cluster and wait for replies
201 static int _cluster_request(char clvmd_cmd
, const char *node
, void *data
, int len
,
202 lvm_response_t
** response
, int *num
)
204 char outbuf
[sizeof(struct clvm_header
) + len
+ strlen(node
) + 1] __attribute((aligned(8)));
209 int num_responses
= 0;
210 struct clvm_header
*head
= (struct clvm_header
*) outbuf
;
211 lvm_response_t
*rarray
;
215 if (_clvmd_sock
== -1)
216 _clvmd_sock
= _open_local_sock();
218 if (_clvmd_sock
== -1)
221 _build_header(head
, clvmd_cmd
, node
, len
);
222 memcpy(head
->node
+ strlen(head
->node
) + 1, data
, len
);
224 status
= _send_request(outbuf
, sizeof(struct clvm_header
) +
225 strlen(head
->node
) + len
, &retbuf
);
229 /* Count the number of responses we got */
230 head
= (struct clvm_header
*) retbuf
;
234 inptr
+= strlen(inptr
) + 1;
235 inptr
+= sizeof(int);
236 inptr
+= strlen(inptr
) + 1;
240 * Allocate response array.
241 * With an extra pair of INTs on the front to sanity
242 * check the pointer when we are given it back to free
244 *response
= dm_malloc(sizeof(lvm_response_t
) * num_responses
);
253 /* Unpack the response into an lvm_response_t array */
257 strcpy(rarray
[i
].node
, inptr
);
258 inptr
+= strlen(inptr
) + 1;
260 memcpy(&rarray
[i
].status
, inptr
, sizeof(int));
261 inptr
+= sizeof(int);
263 rarray
[i
].response
= dm_malloc(strlen(inptr
) + 1);
264 if (rarray
[i
].response
== NULL
) {
265 /* Free up everything else and return error */
267 for (j
= 0; j
< i
; j
++)
268 dm_free(rarray
[i
].response
);
275 strcpy(rarray
[i
].response
, inptr
);
276 rarray
[i
].len
= strlen(inptr
);
277 inptr
+= strlen(inptr
) + 1;
280 *num
= num_responses
;
290 /* Free reply array */
291 static int _cluster_free_request(lvm_response_t
* response
, int num
)
295 for (i
= 0; i
< num
; i
++) {
296 dm_free(response
[i
].response
);
304 static int _lock_for_cluster(struct cmd_context
*cmd
, unsigned char clvmd_cmd
,
305 uint32_t flags
, const char *name
)
310 const char *node
= "";
312 int saved_errno
= errno
;
313 lvm_response_t
*response
= NULL
;
318 len
= strlen(name
) + 3;
320 strcpy(args
+ 2, name
);
322 args
[0] = flags
& 0x7F; /* Maskoff lock flags */
323 args
[1] = flags
& 0xC0; /* Bitmap flags */
325 if (mirror_in_sync())
326 args
[1] |= LCK_MIRROR_NOSYNC_MODE
;
328 if (dmeventd_monitor_mode())
329 args
[1] |= LCK_DMEVENTD_MONITOR_MODE
;
331 if (cmd
->partial_activation
)
332 args
[1] |= LCK_PARTIAL_MODE
;
335 * VG locks are just that: locks, and have no side effects
336 * so we only need to do them on the local node because all
337 * locks are cluster-wide.
338 * Also, if the lock is exclusive it makes no sense to try to
339 * acquire it on all nodes, so just do that on the local node too.
340 * One exception, is that P_ locks /do/ get distributed across
341 * the cluster because they might have side-effects.
343 if (strncmp(name
, "P_", 2) &&
344 (clvmd_cmd
== CLVMD_CMD_LOCK_VG
||
345 (flags
& LCK_TYPE_MASK
) == LCK_EXCL
||
346 (flags
& LCK_LOCAL
) ||
347 !(flags
& LCK_CLUSTER_VG
)))
350 status
= _cluster_request(clvmd_cmd
, node
, args
, len
,
351 &response
, &num_responses
);
353 /* If any nodes were down then display them and return an error */
354 for (i
= 0; i
< num_responses
; i
++) {
355 if (response
[i
].status
== EHOSTDOWN
) {
356 log_error("clvmd not running on node %s",
359 errno
= response
[i
].status
;
360 } else if (response
[i
].status
) {
361 log_error("Error locking on node %s: %s",
363 response
[i
].response
[0] ?
364 response
[i
].response
:
365 strerror(response
[i
].status
));
367 errno
= response
[i
].status
;
372 _cluster_free_request(response
, num_responses
);
378 /* API entry point for LVM */
379 #ifdef CLUSTER_LOCKING_INTERNAL
380 static int _lock_resource(struct cmd_context
*cmd
, const char *resource
,
383 int lock_resource(struct cmd_context
*cmd
, const char *resource
, uint32_t flags
)
386 char lockname
[PATH_MAX
];
388 const char *lock_scope
;
389 const char *lock_type
= "";
391 assert(strlen(resource
) < sizeof(lockname
));
394 switch (flags
& LCK_SCOPE_MASK
) {
396 if (flags
== LCK_VG_BACKUP
) {
397 log_very_verbose("Requesting backup of VG metadata for %s",
399 return _lock_for_cluster(cmd
, CLVMD_CMD_VG_BACKUP
,
400 LCK_CLUSTER_VG
, resource
);
403 /* If the VG name is empty then lock the unused PVs */
404 if (*resource
== '#' || (flags
& LCK_CACHE
))
405 dm_snprintf(lockname
, sizeof(lockname
), "P_%s",
408 dm_snprintf(lockname
, sizeof(lockname
), "V_%s",
412 clvmd_cmd
= CLVMD_CMD_LOCK_VG
;
413 flags
&= LCK_TYPE_MASK
;
417 clvmd_cmd
= CLVMD_CMD_LOCK_LV
;
418 strcpy(lockname
, resource
);
420 flags
&= 0xffdf; /* Mask off HOLD flag */
424 log_error("Unrecognised lock scope: %d",
425 flags
& LCK_SCOPE_MASK
);
429 switch(flags
& LCK_TYPE_MASK
) {
449 log_error("Unrecognised lock type: %u",
450 flags
& LCK_TYPE_MASK
);
454 log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope
, lockname
,
456 flags
& LCK_NONBLOCK
? "" : "B",
457 flags
& LCK_HOLD
? "H" : "",
458 flags
& LCK_LOCAL
? "L" : "",
459 flags
& LCK_CLUSTER_VG
? "C" : "",
462 /* Send a message to the cluster manager */
463 return _lock_for_cluster(cmd
, clvmd_cmd
, flags
, lockname
);
466 static int decode_lock_type(const char *response
)
470 else if (strcmp(response
, "EX"))
472 else if (strcmp(response
, "CR"))
474 else if (strcmp(response
, "PR"))
481 #ifdef CLUSTER_LOCKING_INTERNAL
482 static int _query_resource(const char *resource
, int *mode
)
484 int query_resource(const char *resource
, int *mode
)
487 int i
, status
, len
, num_responses
, saved_errno
;
488 const char *node
= "";
490 lvm_response_t
*response
= NULL
;
493 len
= strlen(resource
) + 3;
495 strcpy(args
+ 2, resource
);
498 args
[1] = LCK_CLUSTER_VG
;
500 status
= _cluster_request(CLVMD_CMD_LOCK_QUERY
, node
, args
, len
,
501 &response
, &num_responses
);
503 for (i
= 0; i
< num_responses
; i
++) {
504 if (response
[i
].status
== EHOSTDOWN
)
507 if (!response
[i
].response
[0])
511 * All nodes should use CR, or exactly one node
512 * should held EX. (PR is obsolete)
513 * If two nodes node reports different locks,
514 * something is broken - just return more important mode.
516 if (decode_lock_type(response
[i
].response
) > *mode
)
517 *mode
= decode_lock_type(response
[i
].response
);
519 log_debug("Lock held for %s, node %s : %s", resource
,
520 response
[i
].node
, response
[i
].response
);
523 _cluster_free_request(response
, num_responses
);
529 #ifdef CLUSTER_LOCKING_INTERNAL
530 static void _locking_end(void)
532 void locking_end(void)
535 if (_clvmd_sock
!= -1 && close(_clvmd_sock
))
541 #ifdef CLUSTER_LOCKING_INTERNAL
542 static void _reset_locking(void)
544 void reset_locking(void)
547 if (close(_clvmd_sock
))
550 _clvmd_sock
= _open_local_sock();
551 if (_clvmd_sock
== -1)
555 #ifdef CLUSTER_LOCKING_INTERNAL
556 int init_cluster_locking(struct locking_type
*locking
, struct cmd_context
*cmd
)
558 locking
->lock_resource
= _lock_resource
;
559 locking
->query_resource
= _query_resource
;
560 locking
->fin_locking
= _locking_end
;
561 locking
->reset_locking
= _reset_locking
;
562 locking
->flags
= LCK_PRE_MEMLOCK
| LCK_CLUSTERED
;
564 _clvmd_sock
= _open_local_sock();
565 if (_clvmd_sock
== -1)
571 int locking_init(int type
, struct config_tree
*cf
, uint32_t *flags
)
573 _clvmd_sock
= _open_local_sock();
574 if (_clvmd_sock
== -1)
577 /* Ask LVM to lock memory before calling us */
578 *flags
|= LCK_PRE_MEMLOCK
;
579 *flags
|= LCK_CLUSTERED
;