4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
28 #include <linux/module.h>
29 #include <linux/drbd.h>
32 #include <linux/file.h>
33 #include <linux/slab.h>
34 #include <linux/blkpg.h>
35 #include <linux/cpumask.h>
37 #include "drbd_protocol.h"
39 #include "drbd_state_change.h"
40 #include <asm/unaligned.h>
41 #include <linux/drbd_limits.h>
42 #include <linux/kthread.h>
44 #include <net/genetlink.h>
47 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
48 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
50 int drbd_adm_new_minor(struct sk_buff
*skb
, struct genl_info
*info
);
51 int drbd_adm_del_minor(struct sk_buff
*skb
, struct genl_info
*info
);
53 int drbd_adm_new_resource(struct sk_buff
*skb
, struct genl_info
*info
);
54 int drbd_adm_del_resource(struct sk_buff
*skb
, struct genl_info
*info
);
55 int drbd_adm_down(struct sk_buff
*skb
, struct genl_info
*info
);
57 int drbd_adm_set_role(struct sk_buff
*skb
, struct genl_info
*info
);
58 int drbd_adm_attach(struct sk_buff
*skb
, struct genl_info
*info
);
59 int drbd_adm_disk_opts(struct sk_buff
*skb
, struct genl_info
*info
);
60 int drbd_adm_detach(struct sk_buff
*skb
, struct genl_info
*info
);
61 int drbd_adm_connect(struct sk_buff
*skb
, struct genl_info
*info
);
62 int drbd_adm_net_opts(struct sk_buff
*skb
, struct genl_info
*info
);
63 int drbd_adm_resize(struct sk_buff
*skb
, struct genl_info
*info
);
64 int drbd_adm_start_ov(struct sk_buff
*skb
, struct genl_info
*info
);
65 int drbd_adm_new_c_uuid(struct sk_buff
*skb
, struct genl_info
*info
);
66 int drbd_adm_disconnect(struct sk_buff
*skb
, struct genl_info
*info
);
67 int drbd_adm_invalidate(struct sk_buff
*skb
, struct genl_info
*info
);
68 int drbd_adm_invalidate_peer(struct sk_buff
*skb
, struct genl_info
*info
);
69 int drbd_adm_pause_sync(struct sk_buff
*skb
, struct genl_info
*info
);
70 int drbd_adm_resume_sync(struct sk_buff
*skb
, struct genl_info
*info
);
71 int drbd_adm_suspend_io(struct sk_buff
*skb
, struct genl_info
*info
);
72 int drbd_adm_resume_io(struct sk_buff
*skb
, struct genl_info
*info
);
73 int drbd_adm_outdate(struct sk_buff
*skb
, struct genl_info
*info
);
74 int drbd_adm_resource_opts(struct sk_buff
*skb
, struct genl_info
*info
);
75 int drbd_adm_get_status(struct sk_buff
*skb
, struct genl_info
*info
);
76 int drbd_adm_get_timeout_type(struct sk_buff
*skb
, struct genl_info
*info
);
78 int drbd_adm_get_status_all(struct sk_buff
*skb
, struct netlink_callback
*cb
);
79 int drbd_adm_dump_resources(struct sk_buff
*skb
, struct netlink_callback
*cb
);
80 int drbd_adm_dump_devices(struct sk_buff
*skb
, struct netlink_callback
*cb
);
81 int drbd_adm_dump_devices_done(struct netlink_callback
*cb
);
82 int drbd_adm_dump_connections(struct sk_buff
*skb
, struct netlink_callback
*cb
);
83 int drbd_adm_dump_connections_done(struct netlink_callback
*cb
);
84 int drbd_adm_dump_peer_devices(struct sk_buff
*skb
, struct netlink_callback
*cb
);
85 int drbd_adm_dump_peer_devices_done(struct netlink_callback
*cb
);
86 int drbd_adm_get_initial_state(struct sk_buff
*skb
, struct netlink_callback
*cb
);
88 #include <linux/drbd_genl_api.h>
90 #include <linux/genl_magic_func.h>
92 static atomic_t drbd_genl_seq
= ATOMIC_INIT(2); /* two. */
93 static atomic_t notify_genl_seq
= ATOMIC_INIT(2); /* two. */
95 DEFINE_MUTEX(notification_mutex
);
97 /* used blkdev_get_by_path, to claim our meta data device(s) */
98 static char *drbd_m_holder
= "Hands off! this is DRBD's meta data device.";
100 static void drbd_adm_send_reply(struct sk_buff
*skb
, struct genl_info
*info
)
102 genlmsg_end(skb
, genlmsg_data(nlmsg_data(nlmsg_hdr(skb
))));
103 if (genlmsg_reply(skb
, info
))
104 pr_err("error sending genl reply\n");
107 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
108 * reason it could fail was no space in skb, and there are 4k available. */
109 static int drbd_msg_put_info(struct sk_buff
*skb
, const char *info
)
114 if (!info
|| !info
[0])
117 nla
= nla_nest_start(skb
, DRBD_NLA_CFG_REPLY
);
121 err
= nla_put_string(skb
, T_info_text
, info
);
123 nla_nest_cancel(skb
, nla
);
126 nla_nest_end(skb
, nla
);
130 /* This would be a good candidate for a "pre_doit" hook,
131 * and per-family private info->pointers.
132 * But we need to stay compatible with older kernels.
133 * If it returns successfully, adm_ctx members are valid.
135 * At this point, we still rely on the global genl_lock().
136 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
137 * to add additional synchronization against object destruction/modification.
139 #define DRBD_ADM_NEED_MINOR 1
140 #define DRBD_ADM_NEED_RESOURCE 2
141 #define DRBD_ADM_NEED_CONNECTION 4
142 static int drbd_adm_prepare(struct drbd_config_context
*adm_ctx
,
143 struct sk_buff
*skb
, struct genl_info
*info
, unsigned flags
)
145 struct drbd_genlmsghdr
*d_in
= info
->userhdr
;
146 const u8 cmd
= info
->genlhdr
->cmd
;
149 memset(adm_ctx
, 0, sizeof(*adm_ctx
));
151 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
152 if (cmd
!= DRBD_ADM_GET_STATUS
&& !capable(CAP_NET_ADMIN
))
155 adm_ctx
->reply_skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_KERNEL
);
156 if (!adm_ctx
->reply_skb
) {
161 adm_ctx
->reply_dh
= genlmsg_put_reply(adm_ctx
->reply_skb
,
162 info
, &drbd_genl_family
, 0, cmd
);
163 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
165 if (!adm_ctx
->reply_dh
) {
170 adm_ctx
->reply_dh
->minor
= d_in
->minor
;
171 adm_ctx
->reply_dh
->ret_code
= NO_ERROR
;
173 adm_ctx
->volume
= VOLUME_UNSPECIFIED
;
174 if (info
->attrs
[DRBD_NLA_CFG_CONTEXT
]) {
176 /* parse and validate only */
177 err
= drbd_cfg_context_from_attrs(NULL
, info
);
181 /* It was present, and valid,
182 * copy it over to the reply skb. */
183 err
= nla_put_nohdr(adm_ctx
->reply_skb
,
184 info
->attrs
[DRBD_NLA_CFG_CONTEXT
]->nla_len
,
185 info
->attrs
[DRBD_NLA_CFG_CONTEXT
]);
189 /* and assign stuff to the adm_ctx */
190 nla
= nested_attr_tb
[__nla_type(T_ctx_volume
)];
192 adm_ctx
->volume
= nla_get_u32(nla
);
193 nla
= nested_attr_tb
[__nla_type(T_ctx_resource_name
)];
195 adm_ctx
->resource_name
= nla_data(nla
);
196 adm_ctx
->my_addr
= nested_attr_tb
[__nla_type(T_ctx_my_addr
)];
197 adm_ctx
->peer_addr
= nested_attr_tb
[__nla_type(T_ctx_peer_addr
)];
198 if ((adm_ctx
->my_addr
&&
199 nla_len(adm_ctx
->my_addr
) > sizeof(adm_ctx
->connection
->my_addr
)) ||
200 (adm_ctx
->peer_addr
&&
201 nla_len(adm_ctx
->peer_addr
) > sizeof(adm_ctx
->connection
->peer_addr
))) {
207 adm_ctx
->minor
= d_in
->minor
;
208 adm_ctx
->device
= minor_to_device(d_in
->minor
);
210 /* We are protected by the global genl_lock().
211 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
212 * so make sure this object stays around. */
214 kref_get(&adm_ctx
->device
->kref
);
216 if (adm_ctx
->resource_name
) {
217 adm_ctx
->resource
= drbd_find_resource(adm_ctx
->resource_name
);
220 if (!adm_ctx
->device
&& (flags
& DRBD_ADM_NEED_MINOR
)) {
221 drbd_msg_put_info(adm_ctx
->reply_skb
, "unknown minor");
222 return ERR_MINOR_INVALID
;
224 if (!adm_ctx
->resource
&& (flags
& DRBD_ADM_NEED_RESOURCE
)) {
225 drbd_msg_put_info(adm_ctx
->reply_skb
, "unknown resource");
226 if (adm_ctx
->resource_name
)
227 return ERR_RES_NOT_KNOWN
;
228 return ERR_INVALID_REQUEST
;
231 if (flags
& DRBD_ADM_NEED_CONNECTION
) {
232 if (adm_ctx
->resource
) {
233 drbd_msg_put_info(adm_ctx
->reply_skb
, "no resource name expected");
234 return ERR_INVALID_REQUEST
;
236 if (adm_ctx
->device
) {
237 drbd_msg_put_info(adm_ctx
->reply_skb
, "no minor number expected");
238 return ERR_INVALID_REQUEST
;
240 if (adm_ctx
->my_addr
&& adm_ctx
->peer_addr
)
241 adm_ctx
->connection
= conn_get_by_addrs(nla_data(adm_ctx
->my_addr
),
242 nla_len(adm_ctx
->my_addr
),
243 nla_data(adm_ctx
->peer_addr
),
244 nla_len(adm_ctx
->peer_addr
));
245 if (!adm_ctx
->connection
) {
246 drbd_msg_put_info(adm_ctx
->reply_skb
, "unknown connection");
247 return ERR_INVALID_REQUEST
;
251 /* some more paranoia, if the request was over-determined */
252 if (adm_ctx
->device
&& adm_ctx
->resource
&&
253 adm_ctx
->device
->resource
!= adm_ctx
->resource
) {
254 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
255 adm_ctx
->minor
, adm_ctx
->resource
->name
,
256 adm_ctx
->device
->resource
->name
);
257 drbd_msg_put_info(adm_ctx
->reply_skb
, "minor exists in different resource");
258 return ERR_INVALID_REQUEST
;
260 if (adm_ctx
->device
&&
261 adm_ctx
->volume
!= VOLUME_UNSPECIFIED
&&
262 adm_ctx
->volume
!= adm_ctx
->device
->vnr
) {
263 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
264 adm_ctx
->minor
, adm_ctx
->volume
,
265 adm_ctx
->device
->vnr
,
266 adm_ctx
->device
->resource
->name
);
267 drbd_msg_put_info(adm_ctx
->reply_skb
, "minor exists as different volume");
268 return ERR_INVALID_REQUEST
;
271 /* still, provide adm_ctx->resource always, if possible. */
272 if (!adm_ctx
->resource
) {
273 adm_ctx
->resource
= adm_ctx
->device
? adm_ctx
->device
->resource
274 : adm_ctx
->connection
? adm_ctx
->connection
->resource
: NULL
;
275 if (adm_ctx
->resource
)
276 kref_get(&adm_ctx
->resource
->kref
);
282 nlmsg_free(adm_ctx
->reply_skb
);
283 adm_ctx
->reply_skb
= NULL
;
287 static int drbd_adm_finish(struct drbd_config_context
*adm_ctx
,
288 struct genl_info
*info
, int retcode
)
290 if (adm_ctx
->device
) {
291 kref_put(&adm_ctx
->device
->kref
, drbd_destroy_device
);
292 adm_ctx
->device
= NULL
;
294 if (adm_ctx
->connection
) {
295 kref_put(&adm_ctx
->connection
->kref
, &drbd_destroy_connection
);
296 adm_ctx
->connection
= NULL
;
298 if (adm_ctx
->resource
) {
299 kref_put(&adm_ctx
->resource
->kref
, drbd_destroy_resource
);
300 adm_ctx
->resource
= NULL
;
303 if (!adm_ctx
->reply_skb
)
306 adm_ctx
->reply_dh
->ret_code
= retcode
;
307 drbd_adm_send_reply(adm_ctx
->reply_skb
, info
);
311 static void setup_khelper_env(struct drbd_connection
*connection
, char **envp
)
315 /* FIXME: A future version will not allow this case. */
316 if (connection
->my_addr_len
== 0 || connection
->peer_addr_len
== 0)
319 switch (((struct sockaddr
*)&connection
->peer_addr
)->sa_family
) {
322 snprintf(envp
[4], 60, "DRBD_PEER_ADDRESS=%pI6",
323 &((struct sockaddr_in6
*)&connection
->peer_addr
)->sin6_addr
);
327 snprintf(envp
[4], 60, "DRBD_PEER_ADDRESS=%pI4",
328 &((struct sockaddr_in
*)&connection
->peer_addr
)->sin_addr
);
332 snprintf(envp
[4], 60, "DRBD_PEER_ADDRESS=%pI4",
333 &((struct sockaddr_in
*)&connection
->peer_addr
)->sin_addr
);
335 snprintf(envp
[3], 20, "DRBD_PEER_AF=%s", afs
);
338 int drbd_khelper(struct drbd_device
*device
, char *cmd
)
340 char *envp
[] = { "HOME=/",
342 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
343 (char[20]) { }, /* address family */
344 (char[60]) { }, /* address */
347 char *argv
[] = {drbd_usermode_helper
, cmd
, mb
, NULL
};
348 struct drbd_connection
*connection
= first_peer_device(device
)->connection
;
352 if (current
== connection
->worker
.task
)
353 set_bit(CALLBACK_PENDING
, &connection
->flags
);
355 snprintf(mb
, 14, "minor-%d", device_to_minor(device
));
356 setup_khelper_env(connection
, envp
);
358 /* The helper may take some time.
359 * write out any unsynced meta data changes now */
360 drbd_md_sync(device
);
362 drbd_info(device
, "helper command: %s %s %s\n", drbd_usermode_helper
, cmd
, mb
);
363 sib
.sib_reason
= SIB_HELPER_PRE
;
364 sib
.helper_name
= cmd
;
365 drbd_bcast_event(device
, &sib
);
366 notify_helper(NOTIFY_CALL
, device
, connection
, cmd
, 0);
367 ret
= call_usermodehelper(drbd_usermode_helper
, argv
, envp
, UMH_WAIT_PROC
);
369 drbd_warn(device
, "helper command: %s %s %s exit code %u (0x%x)\n",
370 drbd_usermode_helper
, cmd
, mb
,
371 (ret
>> 8) & 0xff, ret
);
373 drbd_info(device
, "helper command: %s %s %s exit code %u (0x%x)\n",
374 drbd_usermode_helper
, cmd
, mb
,
375 (ret
>> 8) & 0xff, ret
);
376 sib
.sib_reason
= SIB_HELPER_POST
;
377 sib
.helper_exit_code
= ret
;
378 drbd_bcast_event(device
, &sib
);
379 notify_helper(NOTIFY_RESPONSE
, device
, connection
, cmd
, ret
);
381 if (current
== connection
->worker
.task
)
382 clear_bit(CALLBACK_PENDING
, &connection
->flags
);
384 if (ret
< 0) /* Ignore any ERRNOs we got. */
390 enum drbd_peer_state
conn_khelper(struct drbd_connection
*connection
, char *cmd
)
392 char *envp
[] = { "HOME=/",
394 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
395 (char[20]) { }, /* address family */
396 (char[60]) { }, /* address */
398 char *resource_name
= connection
->resource
->name
;
399 char *argv
[] = {drbd_usermode_helper
, cmd
, resource_name
, NULL
};
402 setup_khelper_env(connection
, envp
);
403 conn_md_sync(connection
);
405 drbd_info(connection
, "helper command: %s %s %s\n", drbd_usermode_helper
, cmd
, resource_name
);
406 /* TODO: conn_bcast_event() ?? */
407 notify_helper(NOTIFY_CALL
, NULL
, connection
, cmd
, 0);
409 ret
= call_usermodehelper(drbd_usermode_helper
, argv
, envp
, UMH_WAIT_PROC
);
411 drbd_warn(connection
, "helper command: %s %s %s exit code %u (0x%x)\n",
412 drbd_usermode_helper
, cmd
, resource_name
,
413 (ret
>> 8) & 0xff, ret
);
415 drbd_info(connection
, "helper command: %s %s %s exit code %u (0x%x)\n",
416 drbd_usermode_helper
, cmd
, resource_name
,
417 (ret
>> 8) & 0xff, ret
);
418 /* TODO: conn_bcast_event() ?? */
419 notify_helper(NOTIFY_RESPONSE
, NULL
, connection
, cmd
, ret
);
421 if (ret
< 0) /* Ignore any ERRNOs we got. */
427 static enum drbd_fencing_p
highest_fencing_policy(struct drbd_connection
*connection
)
429 enum drbd_fencing_p fp
= FP_NOT_AVAIL
;
430 struct drbd_peer_device
*peer_device
;
434 idr_for_each_entry(&connection
->peer_devices
, peer_device
, vnr
) {
435 struct drbd_device
*device
= peer_device
->device
;
436 if (get_ldev_if_state(device
, D_CONSISTENT
)) {
437 struct disk_conf
*disk_conf
=
438 rcu_dereference(peer_device
->device
->ldev
->disk_conf
);
439 fp
= max_t(enum drbd_fencing_p
, fp
, disk_conf
->fencing
);
448 static bool resource_is_supended(struct drbd_resource
*resource
)
450 return resource
->susp
|| resource
->susp_fen
|| resource
->susp_nod
;
453 bool conn_try_outdate_peer(struct drbd_connection
*connection
)
455 struct drbd_resource
* const resource
= connection
->resource
;
456 unsigned int connect_cnt
;
457 union drbd_state mask
= { };
458 union drbd_state val
= { };
459 enum drbd_fencing_p fp
;
463 spin_lock_irq(&resource
->req_lock
);
464 if (connection
->cstate
>= C_WF_REPORT_PARAMS
) {
465 drbd_err(connection
, "Expected cstate < C_WF_REPORT_PARAMS\n");
466 spin_unlock_irq(&resource
->req_lock
);
470 connect_cnt
= connection
->connect_cnt
;
471 spin_unlock_irq(&resource
->req_lock
);
473 fp
= highest_fencing_policy(connection
);
476 drbd_warn(connection
, "Not fencing peer, I'm not even Consistent myself.\n");
477 spin_lock_irq(&resource
->req_lock
);
478 if (connection
->cstate
< C_WF_REPORT_PARAMS
) {
479 _conn_request_state(connection
,
480 (union drbd_state
) { { .susp_fen
= 1 } },
481 (union drbd_state
) { { .susp_fen
= 0 } },
482 CS_VERBOSE
| CS_HARD
| CS_DC_SUSP
);
483 /* We are no longer suspended due to the fencing policy.
484 * We may still be suspended due to the on-no-data-accessible policy.
485 * If that was OND_IO_ERROR, fail pending requests. */
486 if (!resource_is_supended(resource
))
487 _tl_restart(connection
, CONNECTION_LOST_WHILE_PENDING
);
489 /* Else: in case we raced with a connection handshake,
490 * let the handshake figure out if we maybe can RESEND,
491 * and do not resume/fail pending requests here.
492 * Worst case is we stay suspended for now, which may be
493 * resolved by either re-establishing the replication link, or
494 * the next link failure, or eventually the administrator. */
495 spin_unlock_irq(&resource
->req_lock
);
503 r
= conn_khelper(connection
, "fence-peer");
505 switch ((r
>>8) & 0xff) {
506 case P_INCONSISTENT
: /* peer is inconsistent */
507 ex_to_string
= "peer is inconsistent or worse";
509 val
.pdsk
= D_INCONSISTENT
;
511 case P_OUTDATED
: /* peer got outdated, or was already outdated */
512 ex_to_string
= "peer was fenced";
514 val
.pdsk
= D_OUTDATED
;
516 case P_DOWN
: /* peer was down */
517 if (conn_highest_disk(connection
) == D_UP_TO_DATE
) {
518 /* we will(have) create(d) a new UUID anyways... */
519 ex_to_string
= "peer is unreachable, assumed to be dead";
521 val
.pdsk
= D_OUTDATED
;
523 ex_to_string
= "peer unreachable, doing nothing since disk != UpToDate";
526 case P_PRIMARY
: /* Peer is primary, voluntarily outdate myself.
527 * This is useful when an unconnected R_SECONDARY is asked to
528 * become R_PRIMARY, but finds the other peer being active. */
529 ex_to_string
= "peer is active";
530 drbd_warn(connection
, "Peer is primary, outdating myself.\n");
532 val
.disk
= D_OUTDATED
;
535 /* THINK: do we need to handle this
536 * like case 4, or more like case 5? */
537 if (fp
!= FP_STONITH
)
538 drbd_err(connection
, "fence-peer() = 7 && fencing != Stonith !!!\n");
539 ex_to_string
= "peer was stonithed";
541 val
.pdsk
= D_OUTDATED
;
544 /* The script is broken ... */
545 drbd_err(connection
, "fence-peer helper broken, returned %d\n", (r
>>8)&0xff);
546 return false; /* Eventually leave IO frozen */
549 drbd_info(connection
, "fence-peer helper returned %d (%s)\n",
550 (r
>>8) & 0xff, ex_to_string
);
553 conn_request_state(connection, mask, val, CS_VERBOSE);
554 here, because we might were able to re-establish the connection in the
556 spin_lock_irq(&resource
->req_lock
);
557 if (connection
->cstate
< C_WF_REPORT_PARAMS
&& !test_bit(STATE_SENT
, &connection
->flags
)) {
558 if (connection
->connect_cnt
!= connect_cnt
)
559 /* In case the connection was established and droped
560 while the fence-peer handler was running, ignore it */
561 drbd_info(connection
, "Ignoring fence-peer exit code\n");
563 _conn_request_state(connection
, mask
, val
, CS_VERBOSE
);
565 spin_unlock_irq(&resource
->req_lock
);
567 return conn_highest_pdsk(connection
) <= D_OUTDATED
;
570 static int _try_outdate_peer_async(void *data
)
572 struct drbd_connection
*connection
= (struct drbd_connection
*)data
;
574 conn_try_outdate_peer(connection
);
576 kref_put(&connection
->kref
, drbd_destroy_connection
);
580 void conn_try_outdate_peer_async(struct drbd_connection
*connection
)
582 struct task_struct
*opa
;
584 kref_get(&connection
->kref
);
585 /* We may just have force_sig()'ed this thread
586 * to get it out of some blocking network function.
587 * Clear signals; otherwise kthread_run(), which internally uses
588 * wait_on_completion_killable(), will mistake our pending signal
589 * for a new fatal signal and fail. */
590 flush_signals(current
);
591 opa
= kthread_run(_try_outdate_peer_async
, connection
, "drbd_async_h");
593 drbd_err(connection
, "out of mem, failed to invoke fence-peer helper\n");
594 kref_put(&connection
->kref
, drbd_destroy_connection
);
599 drbd_set_role(struct drbd_device
*const device
, enum drbd_role new_role
, int force
)
601 struct drbd_peer_device
*const peer_device
= first_peer_device(device
);
602 struct drbd_connection
*const connection
= peer_device
? peer_device
->connection
: NULL
;
603 const int max_tries
= 4;
604 enum drbd_state_rv rv
= SS_UNKNOWN_ERROR
;
608 union drbd_state mask
, val
;
610 if (new_role
== R_PRIMARY
) {
611 struct drbd_connection
*connection
;
613 /* Detect dead peers as soon as possible. */
616 for_each_connection(connection
, device
->resource
)
617 request_ping(connection
);
621 mutex_lock(device
->state_mutex
);
623 mask
.i
= 0; mask
.role
= R_MASK
;
624 val
.i
= 0; val
.role
= new_role
;
626 while (try++ < max_tries
) {
627 rv
= _drbd_request_state_holding_state_mutex(device
, mask
, val
, CS_WAIT_COMPLETE
);
629 /* in case we first succeeded to outdate,
630 * but now suddenly could establish a connection */
631 if (rv
== SS_CW_FAILED_BY_PEER
&& mask
.pdsk
!= 0) {
637 if (rv
== SS_NO_UP_TO_DATE_DISK
&& force
&&
638 (device
->state
.disk
< D_UP_TO_DATE
&&
639 device
->state
.disk
>= D_INCONSISTENT
)) {
641 val
.disk
= D_UP_TO_DATE
;
646 if (rv
== SS_NO_UP_TO_DATE_DISK
&&
647 device
->state
.disk
== D_CONSISTENT
&& mask
.pdsk
== 0) {
648 D_ASSERT(device
, device
->state
.pdsk
== D_UNKNOWN
);
650 if (conn_try_outdate_peer(connection
)) {
651 val
.disk
= D_UP_TO_DATE
;
657 if (rv
== SS_NOTHING_TO_DO
)
659 if (rv
== SS_PRIMARY_NOP
&& mask
.pdsk
== 0) {
660 if (!conn_try_outdate_peer(connection
) && force
) {
661 drbd_warn(device
, "Forced into split brain situation!\n");
663 val
.pdsk
= D_OUTDATED
;
668 if (rv
== SS_TWO_PRIMARIES
) {
669 /* Maybe the peer is detected as dead very soon...
670 retry at most once more in this case. */
673 nc
= rcu_dereference(connection
->net_conf
);
674 timeo
= nc
? (nc
->ping_timeo
+ 1) * HZ
/ 10 : 1;
676 schedule_timeout_interruptible(timeo
);
681 if (rv
< SS_SUCCESS
) {
682 rv
= _drbd_request_state(device
, mask
, val
,
683 CS_VERBOSE
+ CS_WAIT_COMPLETE
);
694 drbd_warn(device
, "Forced to consider local data as UpToDate!\n");
696 /* Wait until nothing is on the fly :) */
697 wait_event(device
->misc_wait
, atomic_read(&device
->ap_pending_cnt
) == 0);
699 /* FIXME also wait for all pending P_BARRIER_ACK? */
701 if (new_role
== R_SECONDARY
) {
702 if (get_ldev(device
)) {
703 device
->ldev
->md
.uuid
[UI_CURRENT
] &= ~(u64
)1;
707 mutex_lock(&device
->resource
->conf_update
);
708 nc
= connection
->net_conf
;
710 nc
->discard_my_data
= 0; /* without copy; single bit op is atomic */
711 mutex_unlock(&device
->resource
->conf_update
);
713 if (get_ldev(device
)) {
714 if (((device
->state
.conn
< C_CONNECTED
||
715 device
->state
.pdsk
<= D_FAILED
)
716 && device
->ldev
->md
.uuid
[UI_BITMAP
] == 0) || forced
)
717 drbd_uuid_new_current(device
);
719 device
->ldev
->md
.uuid
[UI_CURRENT
] |= (u64
)1;
724 /* writeout of activity log covered areas of the bitmap
725 * to stable storage done in after state change already */
727 if (device
->state
.conn
>= C_WF_REPORT_PARAMS
) {
728 /* if this was forced, we should consider sync */
730 drbd_send_uuids(peer_device
);
731 drbd_send_current_state(peer_device
);
734 drbd_md_sync(device
);
735 set_disk_ro(device
->vdisk
, new_role
== R_SECONDARY
);
736 kobject_uevent(&disk_to_dev(device
->vdisk
)->kobj
, KOBJ_CHANGE
);
738 mutex_unlock(device
->state_mutex
);
742 static const char *from_attrs_err_to_txt(int err
)
744 return err
== -ENOMSG
? "required attribute missing" :
745 err
== -EOPNOTSUPP
? "unknown mandatory attribute" :
746 err
== -EEXIST
? "can not change invariant setting" :
747 "invalid attribute value";
750 int drbd_adm_set_role(struct sk_buff
*skb
, struct genl_info
*info
)
752 struct drbd_config_context adm_ctx
;
753 struct set_role_parms parms
;
755 enum drbd_ret_code retcode
;
757 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
758 if (!adm_ctx
.reply_skb
)
760 if (retcode
!= NO_ERROR
)
763 memset(&parms
, 0, sizeof(parms
));
764 if (info
->attrs
[DRBD_NLA_SET_ROLE_PARMS
]) {
765 err
= set_role_parms_from_attrs(&parms
, info
);
767 retcode
= ERR_MANDATORY_TAG
;
768 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
773 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
775 if (info
->genlhdr
->cmd
== DRBD_ADM_PRIMARY
)
776 retcode
= drbd_set_role(adm_ctx
.device
, R_PRIMARY
, parms
.assume_uptodate
);
778 retcode
= drbd_set_role(adm_ctx
.device
, R_SECONDARY
, 0);
780 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
783 drbd_adm_finish(&adm_ctx
, info
, retcode
);
787 /* Initializes the md.*_offset members, so we are able to find
788 * the on disk meta data.
790 * We currently have two possible layouts:
792 * |----------- md_size_sect ------------------|
793 * [ 4k superblock ][ activity log ][ Bitmap ]
795 * | bm_offset = al_offset + X |
796 * ==> bitmap sectors = md_size_sect - bm_offset
799 * |----------- md_size_sect ------------------|
800 * [data.....][ Bitmap ][ activity log ][ 4k superblock ]
802 * | bm_offset = al_offset - Y |
803 * ==> bitmap sectors = Y = al_offset - bm_offset
805 * Activity log size used to be fixed 32kB,
806 * but is about to become configurable.
808 static void drbd_md_set_sector_offsets(struct drbd_device
*device
,
809 struct drbd_backing_dev
*bdev
)
811 sector_t md_size_sect
= 0;
812 unsigned int al_size_sect
= bdev
->md
.al_size_4k
* 8;
814 bdev
->md
.md_offset
= drbd_md_ss(bdev
);
816 switch (bdev
->md
.meta_dev_idx
) {
818 /* v07 style fixed size indexed meta data */
819 bdev
->md
.md_size_sect
= MD_128MB_SECT
;
820 bdev
->md
.al_offset
= MD_4kB_SECT
;
821 bdev
->md
.bm_offset
= MD_4kB_SECT
+ al_size_sect
;
823 case DRBD_MD_INDEX_FLEX_EXT
:
824 /* just occupy the full device; unit: sectors */
825 bdev
->md
.md_size_sect
= drbd_get_capacity(bdev
->md_bdev
);
826 bdev
->md
.al_offset
= MD_4kB_SECT
;
827 bdev
->md
.bm_offset
= MD_4kB_SECT
+ al_size_sect
;
829 case DRBD_MD_INDEX_INTERNAL
:
830 case DRBD_MD_INDEX_FLEX_INT
:
831 /* al size is still fixed */
832 bdev
->md
.al_offset
= -al_size_sect
;
833 /* we need (slightly less than) ~ this much bitmap sectors: */
834 md_size_sect
= drbd_get_capacity(bdev
->backing_bdev
);
835 md_size_sect
= ALIGN(md_size_sect
, BM_SECT_PER_EXT
);
836 md_size_sect
= BM_SECT_TO_EXT(md_size_sect
);
837 md_size_sect
= ALIGN(md_size_sect
, 8);
839 /* plus the "drbd meta data super block",
840 * and the activity log; */
841 md_size_sect
+= MD_4kB_SECT
+ al_size_sect
;
843 bdev
->md
.md_size_sect
= md_size_sect
;
844 /* bitmap offset is adjusted by 'super' block size */
845 bdev
->md
.bm_offset
= -md_size_sect
+ MD_4kB_SECT
;
850 /* input size is expected to be in KB */
851 char *ppsize(char *buf
, unsigned long long size
)
853 /* Needs 9 bytes at max including trailing NUL:
854 * -1ULL ==> "16384 EB" */
855 static char units
[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
857 while (size
>= 10000 && base
< sizeof(units
)-1) {
859 size
= (size
>> 10) + !!(size
& (1<<9));
862 sprintf(buf
, "%u %cB", (unsigned)size
, units
[base
]);
867 /* there is still a theoretical deadlock when called from receiver
868 * on an D_INCONSISTENT R_PRIMARY:
869 * remote READ does inc_ap_bio, receiver would need to receive answer
870 * packet from remote to dec_ap_bio again.
871 * receiver receive_sizes(), comes here,
872 * waits for ap_bio_cnt == 0. -> deadlock.
873 * but this cannot happen, actually, because:
874 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
875 * (not connected, or bad/no disk on peer):
876 * see drbd_fail_request_early, ap_bio_cnt is zero.
877 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
878 * peer may not initiate a resize.
880 /* Note these are not to be confused with
881 * drbd_adm_suspend_io/drbd_adm_resume_io,
882 * which are (sub) state changes triggered by admin (drbdsetup),
883 * and can be long lived.
884 * This changes an device->flag, is triggered by drbd internals,
885 * and should be short-lived. */
886 /* It needs to be a counter, since multiple threads might
887 independently suspend and resume IO. */
888 void drbd_suspend_io(struct drbd_device
*device
)
890 atomic_inc(&device
->suspend_cnt
);
891 if (drbd_suspended(device
))
893 wait_event(device
->misc_wait
, !atomic_read(&device
->ap_bio_cnt
));
896 void drbd_resume_io(struct drbd_device
*device
)
898 if (atomic_dec_and_test(&device
->suspend_cnt
))
899 wake_up(&device
->misc_wait
);
903 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
904 * @device: DRBD device.
906 * Returns 0 on success, negative return values indicate errors.
907 * You should call drbd_md_sync() after calling this function.
909 enum determine_dev_size
910 drbd_determine_dev_size(struct drbd_device
*device
, enum dds_flags flags
, struct resize_parms
*rs
) __must_hold(local
)
912 struct md_offsets_and_sizes
{
913 u64 last_agreed_sect
;
920 u32 al_stripe_size_4k
;
922 sector_t u_size
, size
;
923 struct drbd_md
*md
= &device
->ldev
->md
;
927 int md_moved
, la_size_changed
;
928 enum determine_dev_size rv
= DS_UNCHANGED
;
930 /* We may change the on-disk offsets of our meta data below. Lock out
931 * anything that may cause meta data IO, to avoid acting on incomplete
932 * layout changes or scribbling over meta data that is in the process
935 * Move is not exactly correct, btw, currently we have all our meta
936 * data in core memory, to "move" it we just write it all out, there
938 drbd_suspend_io(device
);
939 buffer
= drbd_md_get_buffer(device
, __func__
); /* Lock meta-data IO */
941 drbd_resume_io(device
);
945 /* remember current offset and sizes */
946 prev
.last_agreed_sect
= md
->la_size_sect
;
947 prev
.md_offset
= md
->md_offset
;
948 prev
.al_offset
= md
->al_offset
;
949 prev
.bm_offset
= md
->bm_offset
;
950 prev
.md_size_sect
= md
->md_size_sect
;
951 prev
.al_stripes
= md
->al_stripes
;
952 prev
.al_stripe_size_4k
= md
->al_stripe_size_4k
;
955 /* rs is non NULL if we should change the AL layout only */
956 md
->al_stripes
= rs
->al_stripes
;
957 md
->al_stripe_size_4k
= rs
->al_stripe_size
/ 4;
958 md
->al_size_4k
= (u64
)rs
->al_stripes
* rs
->al_stripe_size
/ 4;
961 drbd_md_set_sector_offsets(device
, device
->ldev
);
964 u_size
= rcu_dereference(device
->ldev
->disk_conf
)->disk_size
;
966 size
= drbd_new_dev_size(device
, device
->ldev
, u_size
, flags
& DDSF_FORCED
);
968 if (size
< prev
.last_agreed_sect
) {
969 if (rs
&& u_size
== 0) {
970 /* Remove "rs &&" later. This check should always be active, but
971 right now the receiver expects the permissive behavior */
972 drbd_warn(device
, "Implicit shrink not allowed. "
973 "Use --size=%llus for explicit shrink.\n",
974 (unsigned long long)size
);
975 rv
= DS_ERROR_SHRINK
;
978 rv
= DS_ERROR_SPACE_MD
;
979 if (rv
!= DS_UNCHANGED
)
983 if (drbd_get_capacity(device
->this_bdev
) != size
||
984 drbd_bm_capacity(device
) != size
) {
986 err
= drbd_bm_resize(device
, size
, !(flags
& DDSF_NO_RESYNC
));
988 /* currently there is only one error: ENOMEM! */
989 size
= drbd_bm_capacity(device
);
991 drbd_err(device
, "OUT OF MEMORY! "
992 "Could not allocate bitmap!\n");
994 drbd_err(device
, "BM resizing failed. "
995 "Leaving size unchanged\n");
999 /* racy, see comments above. */
1000 drbd_set_my_capacity(device
, size
);
1001 md
->la_size_sect
= size
;
1002 drbd_info(device
, "size = %s (%llu KB)\n", ppsize(ppb
, size
>>1),
1003 (unsigned long long)size
>>1);
1008 la_size_changed
= (prev
.last_agreed_sect
!= md
->la_size_sect
);
1010 md_moved
= prev
.md_offset
!= md
->md_offset
1011 || prev
.md_size_sect
!= md
->md_size_sect
;
1013 if (la_size_changed
|| md_moved
|| rs
) {
1016 /* We do some synchronous IO below, which may take some time.
1017 * Clear the timer, to avoid scary "timer expired!" messages,
1018 * "Superblock" is written out at least twice below, anyways. */
1019 del_timer(&device
->md_sync_timer
);
1021 /* We won't change the "al-extents" setting, we just may need
1022 * to move the on-disk location of the activity log ringbuffer.
1023 * Lock for transaction is good enough, it may well be "dirty"
1024 * or even "starving". */
1025 wait_event(device
->al_wait
, lc_try_lock_for_transaction(device
->act_log
));
1027 /* mark current on-disk bitmap and activity log as unreliable */
1028 prev_flags
= md
->flags
;
1029 md
->flags
|= MDF_FULL_SYNC
| MDF_AL_DISABLED
;
1030 drbd_md_write(device
, buffer
);
1032 drbd_al_initialize(device
, buffer
);
1034 drbd_info(device
, "Writing the whole bitmap, %s\n",
1035 la_size_changed
&& md_moved
? "size changed and md moved" :
1036 la_size_changed
? "size changed" : "md moved");
1037 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
1038 drbd_bitmap_io(device
, md_moved
? &drbd_bm_write_all
: &drbd_bm_write
,
1039 "size changed", BM_LOCKED_MASK
);
1041 /* on-disk bitmap and activity log is authoritative again
1042 * (unless there was an IO error meanwhile...) */
1043 md
->flags
= prev_flags
;
1044 drbd_md_write(device
, buffer
);
1047 drbd_info(device
, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
1048 md
->al_stripes
, md
->al_stripe_size_4k
* 4);
1051 if (size
> prev
.last_agreed_sect
)
1052 rv
= prev
.last_agreed_sect
? DS_GREW
: DS_GREW_FROM_ZERO
;
1053 if (size
< prev
.last_agreed_sect
)
1058 /* restore previous offset and sizes */
1059 md
->la_size_sect
= prev
.last_agreed_sect
;
1060 md
->md_offset
= prev
.md_offset
;
1061 md
->al_offset
= prev
.al_offset
;
1062 md
->bm_offset
= prev
.bm_offset
;
1063 md
->md_size_sect
= prev
.md_size_sect
;
1064 md
->al_stripes
= prev
.al_stripes
;
1065 md
->al_stripe_size_4k
= prev
.al_stripe_size_4k
;
1066 md
->al_size_4k
= (u64
)prev
.al_stripes
* prev
.al_stripe_size_4k
;
1068 lc_unlock(device
->act_log
);
1069 wake_up(&device
->al_wait
);
1070 drbd_md_put_buffer(device
);
1071 drbd_resume_io(device
);
1077 drbd_new_dev_size(struct drbd_device
*device
, struct drbd_backing_dev
*bdev
,
1078 sector_t u_size
, int assume_peer_has_space
)
1080 sector_t p_size
= device
->p_size
; /* partner's disk size. */
1081 sector_t la_size_sect
= bdev
->md
.la_size_sect
; /* last agreed size. */
1082 sector_t m_size
; /* my size */
1085 m_size
= drbd_get_max_capacity(bdev
);
1087 if (device
->state
.conn
< C_CONNECTED
&& assume_peer_has_space
) {
1088 drbd_warn(device
, "Resize while not connected was forced by the user!\n");
1092 if (p_size
&& m_size
) {
1093 size
= min_t(sector_t
, p_size
, m_size
);
1096 size
= la_size_sect
;
1097 if (m_size
&& m_size
< size
)
1099 if (p_size
&& p_size
< size
)
1110 drbd_err(device
, "Both nodes diskless!\n");
1114 drbd_err(device
, "Requested disk size is too big (%lu > %lu)\n",
1115 (unsigned long)u_size
>>1, (unsigned long)size
>>1);
1124 * drbd_check_al_size() - Ensures that the AL is of the right size
1125 * @device: DRBD device.
1127 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1128 * failed, and 0 on success. You should call drbd_md_sync() after you called
1131 static int drbd_check_al_size(struct drbd_device
*device
, struct disk_conf
*dc
)
1133 struct lru_cache
*n
, *t
;
1134 struct lc_element
*e
;
1135 unsigned int in_use
;
1138 if (device
->act_log
&&
1139 device
->act_log
->nr_elements
== dc
->al_extents
)
1143 t
= device
->act_log
;
1144 n
= lc_create("act_log", drbd_al_ext_cache
, AL_UPDATES_PER_TRANSACTION
,
1145 dc
->al_extents
, sizeof(struct lc_element
), 0);
1148 drbd_err(device
, "Cannot allocate act_log lru!\n");
1151 spin_lock_irq(&device
->al_lock
);
1153 for (i
= 0; i
< t
->nr_elements
; i
++) {
1154 e
= lc_element_by_index(t
, i
);
1156 drbd_err(device
, "refcnt(%d)==%d\n",
1157 e
->lc_number
, e
->refcnt
);
1158 in_use
+= e
->refcnt
;
1162 device
->act_log
= n
;
1163 spin_unlock_irq(&device
->al_lock
);
1165 drbd_err(device
, "Activity log still in use!\n");
1171 drbd_md_mark_dirty(device
); /* we changed device->act_log->nr_elemens */
1175 static void blk_queue_discard_granularity(struct request_queue
*q
, unsigned int granularity
)
1177 q
->limits
.discard_granularity
= granularity
;
1180 static unsigned int drbd_max_discard_sectors(struct drbd_connection
*connection
)
1182 /* when we introduced REQ_WRITE_SAME support, we also bumped
1183 * our maximum supported batch bio size used for discards. */
1184 if (connection
->agreed_features
& DRBD_FF_WSAME
)
1185 return DRBD_MAX_BBIO_SECTORS
;
1186 /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
1187 return AL_EXTENT_SIZE
>> 9;
1190 static void decide_on_discard_support(struct drbd_device
*device
,
1191 struct request_queue
*q
,
1192 struct request_queue
*b
,
1193 bool discard_zeroes_if_aligned
)
1195 /* q = drbd device queue (device->rq_queue)
1196 * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
1197 * or NULL if diskless
1199 struct drbd_connection
*connection
= first_peer_device(device
)->connection
;
1200 bool can_do
= b
? blk_queue_discard(b
) : true;
1202 if (can_do
&& connection
->cstate
>= C_CONNECTED
&& !(connection
->agreed_features
& DRBD_FF_TRIM
)) {
1204 drbd_info(connection
, "peer DRBD too old, does not support TRIM: disabling discards\n");
1207 /* We don't care for the granularity, really.
1208 * Stacking limits below should fix it for the local
1209 * device. Whether or not it is a suitable granularity
1210 * on the remote device is not our problem, really. If
1211 * you care, you need to use devices with similar
1212 * topology on all peers. */
1213 blk_queue_discard_granularity(q
, 512);
1214 q
->limits
.max_discard_sectors
= drbd_max_discard_sectors(connection
);
1215 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, q
);
1216 q
->limits
.max_write_zeroes_sectors
= drbd_max_discard_sectors(connection
);
1218 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD
, q
);
1219 blk_queue_discard_granularity(q
, 0);
1220 q
->limits
.max_discard_sectors
= 0;
1221 q
->limits
.max_write_zeroes_sectors
= 0;
1225 static void fixup_discard_if_not_supported(struct request_queue
*q
)
1227 /* To avoid confusion, if this queue does not support discard, clear
1228 * max_discard_sectors, which is what lsblk -D reports to the user.
1229 * Older kernels got this wrong in "stack limits".
1231 if (!blk_queue_discard(q
)) {
1232 blk_queue_max_discard_sectors(q
, 0);
1233 blk_queue_discard_granularity(q
, 0);
1237 static void decide_on_write_same_support(struct drbd_device
*device
,
1238 struct request_queue
*q
,
1239 struct request_queue
*b
, struct o_qlim
*o
,
1240 bool disable_write_same
)
1242 struct drbd_peer_device
*peer_device
= first_peer_device(device
);
1243 struct drbd_connection
*connection
= peer_device
->connection
;
1244 bool can_do
= b
? b
->limits
.max_write_same_sectors
: true;
1246 if (can_do
&& disable_write_same
) {
1248 drbd_info(peer_device
, "WRITE_SAME disabled by config\n");
1251 if (can_do
&& connection
->cstate
>= C_CONNECTED
&& !(connection
->agreed_features
& DRBD_FF_WSAME
)) {
1253 drbd_info(peer_device
, "peer does not support WRITE_SAME\n");
1257 /* logical block size; queue_logical_block_size(NULL) is 512 */
1258 unsigned int peer_lbs
= be32_to_cpu(o
->logical_block_size
);
1259 unsigned int me_lbs_b
= queue_logical_block_size(b
);
1260 unsigned int me_lbs
= queue_logical_block_size(q
);
1262 if (me_lbs_b
!= me_lbs
) {
1264 "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
1266 /* rather disable write same than trigger some BUG_ON later in the scsi layer. */
1269 if (me_lbs_b
!= peer_lbs
) {
1270 drbd_warn(peer_device
, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
1273 drbd_dbg(peer_device
, "logical block size mismatch: WRITE_SAME disabled.\n");
1276 me_lbs
= max(me_lbs
, me_lbs_b
);
1277 /* We cannot change the logical block size of an in-use queue.
1278 * We can only hope that access happens to be properly aligned.
1279 * If not, the peer will likely produce an IO error, and detach. */
1280 if (peer_lbs
> me_lbs
) {
1281 if (device
->state
.role
!= R_PRIMARY
) {
1282 blk_queue_logical_block_size(q
, peer_lbs
);
1283 drbd_warn(peer_device
, "logical block size set to %u\n", peer_lbs
);
1285 drbd_warn(peer_device
,
1286 "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
1291 if (can_do
&& !o
->write_same_capable
) {
1292 /* If we introduce an open-coded write-same loop on the receiving side,
1293 * the peer would present itself as "capable". */
1294 drbd_dbg(peer_device
, "WRITE_SAME disabled (peer device not capable)\n");
1299 blk_queue_max_write_same_sectors(q
, can_do
? DRBD_MAX_BBIO_SECTORS
: 0);
1302 static void drbd_setup_queue_param(struct drbd_device
*device
, struct drbd_backing_dev
*bdev
,
1303 unsigned int max_bio_size
, struct o_qlim
*o
)
1305 struct request_queue
* const q
= device
->rq_queue
;
1306 unsigned int max_hw_sectors
= max_bio_size
>> 9;
1307 unsigned int max_segments
= 0;
1308 struct request_queue
*b
= NULL
;
1309 struct disk_conf
*dc
;
1310 bool discard_zeroes_if_aligned
= true;
1311 bool disable_write_same
= false;
1314 b
= bdev
->backing_bdev
->bd_disk
->queue
;
1316 max_hw_sectors
= min(queue_max_hw_sectors(b
), max_bio_size
>> 9);
1318 dc
= rcu_dereference(device
->ldev
->disk_conf
);
1319 max_segments
= dc
->max_bio_bvecs
;
1320 discard_zeroes_if_aligned
= dc
->discard_zeroes_if_aligned
;
1321 disable_write_same
= dc
->disable_write_same
;
1324 blk_set_stacking_limits(&q
->limits
);
1327 blk_queue_max_hw_sectors(q
, max_hw_sectors
);
1328 /* This is the workaround for "bio would need to, but cannot, be split" */
1329 blk_queue_max_segments(q
, max_segments
? max_segments
: BLK_MAX_SEGMENTS
);
1330 blk_queue_segment_boundary(q
, PAGE_SIZE
-1);
1331 decide_on_discard_support(device
, q
, b
, discard_zeroes_if_aligned
);
1332 decide_on_write_same_support(device
, q
, b
, o
, disable_write_same
);
1335 blk_queue_stack_limits(q
, b
);
1337 if (q
->backing_dev_info
->ra_pages
!=
1338 b
->backing_dev_info
->ra_pages
) {
1339 drbd_info(device
, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1340 q
->backing_dev_info
->ra_pages
,
1341 b
->backing_dev_info
->ra_pages
);
1342 q
->backing_dev_info
->ra_pages
=
1343 b
->backing_dev_info
->ra_pages
;
1346 fixup_discard_if_not_supported(q
);
1349 void drbd_reconsider_queue_parameters(struct drbd_device
*device
, struct drbd_backing_dev
*bdev
, struct o_qlim
*o
)
1351 unsigned int now
, new, local
, peer
;
1353 now
= queue_max_hw_sectors(device
->rq_queue
) << 9;
1354 local
= device
->local_max_bio_size
; /* Eventually last known value, from volatile memory */
1355 peer
= device
->peer_max_bio_size
; /* Eventually last known value, from meta data */
1358 local
= queue_max_hw_sectors(bdev
->backing_bdev
->bd_disk
->queue
) << 9;
1359 device
->local_max_bio_size
= local
;
1361 local
= min(local
, DRBD_MAX_BIO_SIZE
);
1363 /* We may ignore peer limits if the peer is modern enough.
1364 Because new from 8.3.8 onwards the peer can use multiple
1365 BIOs for a single peer_request */
1366 if (device
->state
.conn
>= C_WF_REPORT_PARAMS
) {
1367 if (first_peer_device(device
)->connection
->agreed_pro_version
< 94)
1368 peer
= min(device
->peer_max_bio_size
, DRBD_MAX_SIZE_H80_PACKET
);
1369 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1370 else if (first_peer_device(device
)->connection
->agreed_pro_version
== 94)
1371 peer
= DRBD_MAX_SIZE_H80_PACKET
;
1372 else if (first_peer_device(device
)->connection
->agreed_pro_version
< 100)
1373 peer
= DRBD_MAX_BIO_SIZE_P95
; /* drbd 8.3.8 onwards, before 8.4.0 */
1375 peer
= DRBD_MAX_BIO_SIZE
;
1377 /* We may later detach and re-attach on a disconnected Primary.
1378 * Avoid this setting to jump back in that case.
1379 * We want to store what we know the peer DRBD can handle,
1380 * not what the peer IO backend can handle. */
1381 if (peer
> device
->peer_max_bio_size
)
1382 device
->peer_max_bio_size
= peer
;
1384 new = min(local
, peer
);
1386 if (device
->state
.role
== R_PRIMARY
&& new < now
)
1387 drbd_err(device
, "ASSERT FAILED new < now; (%u < %u)\n", new, now
);
1390 drbd_info(device
, "max BIO size = %u\n", new);
1392 drbd_setup_queue_param(device
, bdev
, new, o
);
1395 /* Starts the worker thread */
1396 static void conn_reconfig_start(struct drbd_connection
*connection
)
1398 drbd_thread_start(&connection
->worker
);
1399 drbd_flush_workqueue(&connection
->sender_work
);
1402 /* if still unconfigured, stops worker again. */
1403 static void conn_reconfig_done(struct drbd_connection
*connection
)
1406 spin_lock_irq(&connection
->resource
->req_lock
);
1407 stop_threads
= conn_all_vols_unconf(connection
) &&
1408 connection
->cstate
== C_STANDALONE
;
1409 spin_unlock_irq(&connection
->resource
->req_lock
);
1411 /* ack_receiver thread and ack_sender workqueue are implicitly
1412 * stopped by receiver in conn_disconnect() */
1413 drbd_thread_stop(&connection
->receiver
);
1414 drbd_thread_stop(&connection
->worker
);
1418 /* Make sure IO is suspended before calling this function(). */
1419 static void drbd_suspend_al(struct drbd_device
*device
)
1423 if (!lc_try_lock(device
->act_log
)) {
1424 drbd_warn(device
, "Failed to lock al in drbd_suspend_al()\n");
1428 drbd_al_shrink(device
);
1429 spin_lock_irq(&device
->resource
->req_lock
);
1430 if (device
->state
.conn
< C_CONNECTED
)
1431 s
= !test_and_set_bit(AL_SUSPENDED
, &device
->flags
);
1432 spin_unlock_irq(&device
->resource
->req_lock
);
1433 lc_unlock(device
->act_log
);
1436 drbd_info(device
, "Suspended AL updates\n");
1440 static bool should_set_defaults(struct genl_info
*info
)
1442 unsigned flags
= ((struct drbd_genlmsghdr
*)info
->userhdr
)->flags
;
1443 return 0 != (flags
& DRBD_GENL_F_SET_DEFAULTS
);
1446 static unsigned int drbd_al_extents_max(struct drbd_backing_dev
*bdev
)
1448 /* This is limited by 16 bit "slot" numbers,
1449 * and by available on-disk context storage.
1451 * Also (u16)~0 is special (denotes a "free" extent).
1453 * One transaction occupies one 4kB on-disk block,
1454 * we have n such blocks in the on disk ring buffer,
1455 * the "current" transaction may fail (n-1),
1456 * and there is 919 slot numbers context information per transaction.
1458 * 72 transaction blocks amounts to more than 2**16 context slots,
1459 * so cap there first.
1461 const unsigned int max_al_nr
= DRBD_AL_EXTENTS_MAX
;
1462 const unsigned int sufficient_on_disk
=
1463 (max_al_nr
+ AL_CONTEXT_PER_TRANSACTION
-1)
1464 /AL_CONTEXT_PER_TRANSACTION
;
1466 unsigned int al_size_4k
= bdev
->md
.al_size_4k
;
1468 if (al_size_4k
> sufficient_on_disk
)
1471 return (al_size_4k
- 1) * AL_CONTEXT_PER_TRANSACTION
;
1474 static bool write_ordering_changed(struct disk_conf
*a
, struct disk_conf
*b
)
1476 return a
->disk_barrier
!= b
->disk_barrier
||
1477 a
->disk_flushes
!= b
->disk_flushes
||
1478 a
->disk_drain
!= b
->disk_drain
;
1481 static void sanitize_disk_conf(struct drbd_device
*device
, struct disk_conf
*disk_conf
,
1482 struct drbd_backing_dev
*nbc
)
1484 struct request_queue
* const q
= nbc
->backing_bdev
->bd_disk
->queue
;
1486 if (disk_conf
->al_extents
< DRBD_AL_EXTENTS_MIN
)
1487 disk_conf
->al_extents
= DRBD_AL_EXTENTS_MIN
;
1488 if (disk_conf
->al_extents
> drbd_al_extents_max(nbc
))
1489 disk_conf
->al_extents
= drbd_al_extents_max(nbc
);
1491 if (!blk_queue_discard(q
)) {
1492 if (disk_conf
->rs_discard_granularity
) {
1493 disk_conf
->rs_discard_granularity
= 0; /* disable feature */
1494 drbd_info(device
, "rs_discard_granularity feature disabled\n");
1498 if (disk_conf
->rs_discard_granularity
) {
1499 int orig_value
= disk_conf
->rs_discard_granularity
;
1502 if (q
->limits
.discard_granularity
> disk_conf
->rs_discard_granularity
)
1503 disk_conf
->rs_discard_granularity
= q
->limits
.discard_granularity
;
1505 remainder
= disk_conf
->rs_discard_granularity
% q
->limits
.discard_granularity
;
1506 disk_conf
->rs_discard_granularity
+= remainder
;
1508 if (disk_conf
->rs_discard_granularity
> q
->limits
.max_discard_sectors
<< 9)
1509 disk_conf
->rs_discard_granularity
= q
->limits
.max_discard_sectors
<< 9;
1511 if (disk_conf
->rs_discard_granularity
!= orig_value
)
1512 drbd_info(device
, "rs_discard_granularity changed to %d\n",
1513 disk_conf
->rs_discard_granularity
);
1517 int drbd_adm_disk_opts(struct sk_buff
*skb
, struct genl_info
*info
)
1519 struct drbd_config_context adm_ctx
;
1520 enum drbd_ret_code retcode
;
1521 struct drbd_device
*device
;
1522 struct disk_conf
*new_disk_conf
, *old_disk_conf
;
1523 struct fifo_buffer
*old_plan
= NULL
, *new_plan
= NULL
;
1526 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
1527 if (!adm_ctx
.reply_skb
)
1529 if (retcode
!= NO_ERROR
)
1532 device
= adm_ctx
.device
;
1533 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
1535 /* we also need a disk
1536 * to change the options on */
1537 if (!get_ldev(device
)) {
1538 retcode
= ERR_NO_DISK
;
1542 new_disk_conf
= kmalloc(sizeof(struct disk_conf
), GFP_KERNEL
);
1543 if (!new_disk_conf
) {
1544 retcode
= ERR_NOMEM
;
1548 mutex_lock(&device
->resource
->conf_update
);
1549 old_disk_conf
= device
->ldev
->disk_conf
;
1550 *new_disk_conf
= *old_disk_conf
;
1551 if (should_set_defaults(info
))
1552 set_disk_conf_defaults(new_disk_conf
);
1554 err
= disk_conf_from_attrs_for_change(new_disk_conf
, info
);
1555 if (err
&& err
!= -ENOMSG
) {
1556 retcode
= ERR_MANDATORY_TAG
;
1557 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
1561 if (!expect(new_disk_conf
->resync_rate
>= 1))
1562 new_disk_conf
->resync_rate
= 1;
1564 sanitize_disk_conf(device
, new_disk_conf
, device
->ldev
);
1566 if (new_disk_conf
->c_plan_ahead
> DRBD_C_PLAN_AHEAD_MAX
)
1567 new_disk_conf
->c_plan_ahead
= DRBD_C_PLAN_AHEAD_MAX
;
1569 fifo_size
= (new_disk_conf
->c_plan_ahead
* 10 * SLEEP_TIME
) / HZ
;
1570 if (fifo_size
!= device
->rs_plan_s
->size
) {
1571 new_plan
= fifo_alloc(fifo_size
);
1573 drbd_err(device
, "kmalloc of fifo_buffer failed");
1574 retcode
= ERR_NOMEM
;
1579 drbd_suspend_io(device
);
1580 wait_event(device
->al_wait
, lc_try_lock(device
->act_log
));
1581 drbd_al_shrink(device
);
1582 err
= drbd_check_al_size(device
, new_disk_conf
);
1583 lc_unlock(device
->act_log
);
1584 wake_up(&device
->al_wait
);
1585 drbd_resume_io(device
);
1588 retcode
= ERR_NOMEM
;
1592 lock_all_resources();
1593 retcode
= drbd_resync_after_valid(device
, new_disk_conf
->resync_after
);
1594 if (retcode
== NO_ERROR
) {
1595 rcu_assign_pointer(device
->ldev
->disk_conf
, new_disk_conf
);
1596 drbd_resync_after_changed(device
);
1598 unlock_all_resources();
1600 if (retcode
!= NO_ERROR
)
1604 old_plan
= device
->rs_plan_s
;
1605 rcu_assign_pointer(device
->rs_plan_s
, new_plan
);
1608 mutex_unlock(&device
->resource
->conf_update
);
1610 if (new_disk_conf
->al_updates
)
1611 device
->ldev
->md
.flags
&= ~MDF_AL_DISABLED
;
1613 device
->ldev
->md
.flags
|= MDF_AL_DISABLED
;
1615 if (new_disk_conf
->md_flushes
)
1616 clear_bit(MD_NO_FUA
, &device
->flags
);
1618 set_bit(MD_NO_FUA
, &device
->flags
);
1620 if (write_ordering_changed(old_disk_conf
, new_disk_conf
))
1621 drbd_bump_write_ordering(device
->resource
, NULL
, WO_BDEV_FLUSH
);
1623 if (old_disk_conf
->discard_zeroes_if_aligned
!= new_disk_conf
->discard_zeroes_if_aligned
1624 || old_disk_conf
->disable_write_same
!= new_disk_conf
->disable_write_same
)
1625 drbd_reconsider_queue_parameters(device
, device
->ldev
, NULL
);
1627 drbd_md_sync(device
);
1629 if (device
->state
.conn
>= C_CONNECTED
) {
1630 struct drbd_peer_device
*peer_device
;
1632 for_each_peer_device(peer_device
, device
)
1633 drbd_send_sync_param(peer_device
);
1637 kfree(old_disk_conf
);
1639 mod_timer(&device
->request_timer
, jiffies
+ HZ
);
1643 mutex_unlock(&device
->resource
->conf_update
);
1645 kfree(new_disk_conf
);
1650 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
1652 drbd_adm_finish(&adm_ctx
, info
, retcode
);
1656 static struct block_device
*open_backing_dev(struct drbd_device
*device
,
1657 const char *bdev_path
, void *claim_ptr
, bool do_bd_link
)
1659 struct block_device
*bdev
;
1662 bdev
= blkdev_get_by_path(bdev_path
,
1663 FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
, claim_ptr
);
1665 drbd_err(device
, "open(\"%s\") failed with %ld\n",
1666 bdev_path
, PTR_ERR(bdev
));
1673 err
= bd_link_disk_holder(bdev
, device
->vdisk
);
1675 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
1676 drbd_err(device
, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
1678 bdev
= ERR_PTR(err
);
1683 static int open_backing_devices(struct drbd_device
*device
,
1684 struct disk_conf
*new_disk_conf
,
1685 struct drbd_backing_dev
*nbc
)
1687 struct block_device
*bdev
;
1689 bdev
= open_backing_dev(device
, new_disk_conf
->backing_dev
, device
, true);
1691 return ERR_OPEN_DISK
;
1692 nbc
->backing_bdev
= bdev
;
1695 * meta_dev_idx >= 0: external fixed size, possibly multiple
1696 * drbd sharing one meta device. TODO in that case, paranoia
1697 * check that [md_bdev, meta_dev_idx] is not yet used by some
1698 * other drbd minor! (if you use drbd.conf + drbdadm, that
1699 * should check it for you already; but if you don't, or
1700 * someone fooled it, we need to double check here)
1702 bdev
= open_backing_dev(device
, new_disk_conf
->meta_dev
,
1703 /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
1704 * if potentially shared with other drbd minors */
1705 (new_disk_conf
->meta_dev_idx
< 0) ? (void*)device
: (void*)drbd_m_holder
,
1706 /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
1707 * as would happen with internal metadata. */
1708 (new_disk_conf
->meta_dev_idx
!= DRBD_MD_INDEX_FLEX_INT
&&
1709 new_disk_conf
->meta_dev_idx
!= DRBD_MD_INDEX_INTERNAL
));
1711 return ERR_OPEN_MD_DISK
;
1712 nbc
->md_bdev
= bdev
;
1716 static void close_backing_dev(struct drbd_device
*device
, struct block_device
*bdev
,
1722 bd_unlink_disk_holder(bdev
, device
->vdisk
);
1723 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
1726 void drbd_backing_dev_free(struct drbd_device
*device
, struct drbd_backing_dev
*ldev
)
1731 close_backing_dev(device
, ldev
->md_bdev
, ldev
->md_bdev
!= ldev
->backing_bdev
);
1732 close_backing_dev(device
, ldev
->backing_bdev
, true);
1734 kfree(ldev
->disk_conf
);
1738 int drbd_adm_attach(struct sk_buff
*skb
, struct genl_info
*info
)
1740 struct drbd_config_context adm_ctx
;
1741 struct drbd_device
*device
;
1742 struct drbd_peer_device
*peer_device
;
1743 struct drbd_connection
*connection
;
1745 enum drbd_ret_code retcode
;
1746 enum determine_dev_size dd
;
1747 sector_t max_possible_sectors
;
1748 sector_t min_md_device_sectors
;
1749 struct drbd_backing_dev
*nbc
= NULL
; /* new_backing_conf */
1750 struct disk_conf
*new_disk_conf
= NULL
;
1751 struct lru_cache
*resync_lru
= NULL
;
1752 struct fifo_buffer
*new_plan
= NULL
;
1753 union drbd_state ns
, os
;
1754 enum drbd_state_rv rv
;
1755 struct net_conf
*nc
;
1757 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
1758 if (!adm_ctx
.reply_skb
)
1760 if (retcode
!= NO_ERROR
)
1763 device
= adm_ctx
.device
;
1764 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
1765 peer_device
= first_peer_device(device
);
1766 connection
= peer_device
->connection
;
1767 conn_reconfig_start(connection
);
1769 /* if you want to reconfigure, please tear down first */
1770 if (device
->state
.disk
> D_DISKLESS
) {
1771 retcode
= ERR_DISK_CONFIGURED
;
1774 /* It may just now have detached because of IO error. Make sure
1775 * drbd_ldev_destroy is done already, we may end up here very fast,
1776 * e.g. if someone calls attach from the on-io-error handler,
1777 * to realize a "hot spare" feature (not that I'd recommend that) */
1778 wait_event(device
->misc_wait
, !test_bit(GOING_DISKLESS
, &device
->flags
));
1780 /* make sure there is no leftover from previous force-detach attempts */
1781 clear_bit(FORCE_DETACH
, &device
->flags
);
1782 clear_bit(WAS_IO_ERROR
, &device
->flags
);
1783 clear_bit(WAS_READ_ERROR
, &device
->flags
);
1785 /* and no leftover from previously aborted resync or verify, either */
1786 device
->rs_total
= 0;
1787 device
->rs_failed
= 0;
1788 atomic_set(&device
->rs_pending_cnt
, 0);
1790 /* allocation not in the IO path, drbdsetup context */
1791 nbc
= kzalloc(sizeof(struct drbd_backing_dev
), GFP_KERNEL
);
1793 retcode
= ERR_NOMEM
;
1796 spin_lock_init(&nbc
->md
.uuid_lock
);
1798 new_disk_conf
= kzalloc(sizeof(struct disk_conf
), GFP_KERNEL
);
1799 if (!new_disk_conf
) {
1800 retcode
= ERR_NOMEM
;
1803 nbc
->disk_conf
= new_disk_conf
;
1805 set_disk_conf_defaults(new_disk_conf
);
1806 err
= disk_conf_from_attrs(new_disk_conf
, info
);
1808 retcode
= ERR_MANDATORY_TAG
;
1809 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
1813 if (new_disk_conf
->c_plan_ahead
> DRBD_C_PLAN_AHEAD_MAX
)
1814 new_disk_conf
->c_plan_ahead
= DRBD_C_PLAN_AHEAD_MAX
;
1816 new_plan
= fifo_alloc((new_disk_conf
->c_plan_ahead
* 10 * SLEEP_TIME
) / HZ
);
1818 retcode
= ERR_NOMEM
;
1822 if (new_disk_conf
->meta_dev_idx
< DRBD_MD_INDEX_FLEX_INT
) {
1823 retcode
= ERR_MD_IDX_INVALID
;
1828 nc
= rcu_dereference(connection
->net_conf
);
1830 if (new_disk_conf
->fencing
== FP_STONITH
&& nc
->wire_protocol
== DRBD_PROT_A
) {
1832 retcode
= ERR_STONITH_AND_PROT_A
;
1838 retcode
= open_backing_devices(device
, new_disk_conf
, nbc
);
1839 if (retcode
!= NO_ERROR
)
1842 if ((nbc
->backing_bdev
== nbc
->md_bdev
) !=
1843 (new_disk_conf
->meta_dev_idx
== DRBD_MD_INDEX_INTERNAL
||
1844 new_disk_conf
->meta_dev_idx
== DRBD_MD_INDEX_FLEX_INT
)) {
1845 retcode
= ERR_MD_IDX_INVALID
;
1849 resync_lru
= lc_create("resync", drbd_bm_ext_cache
,
1850 1, 61, sizeof(struct bm_extent
),
1851 offsetof(struct bm_extent
, lce
));
1853 retcode
= ERR_NOMEM
;
1857 /* Read our meta data super block early.
1858 * This also sets other on-disk offsets. */
1859 retcode
= drbd_md_read(device
, nbc
);
1860 if (retcode
!= NO_ERROR
)
1863 sanitize_disk_conf(device
, new_disk_conf
, nbc
);
1865 if (drbd_get_max_capacity(nbc
) < new_disk_conf
->disk_size
) {
1866 drbd_err(device
, "max capacity %llu smaller than disk size %llu\n",
1867 (unsigned long long) drbd_get_max_capacity(nbc
),
1868 (unsigned long long) new_disk_conf
->disk_size
);
1869 retcode
= ERR_DISK_TOO_SMALL
;
1873 if (new_disk_conf
->meta_dev_idx
< 0) {
1874 max_possible_sectors
= DRBD_MAX_SECTORS_FLEX
;
1875 /* at least one MB, otherwise it does not make sense */
1876 min_md_device_sectors
= (2<<10);
1878 max_possible_sectors
= DRBD_MAX_SECTORS
;
1879 min_md_device_sectors
= MD_128MB_SECT
* (new_disk_conf
->meta_dev_idx
+ 1);
1882 if (drbd_get_capacity(nbc
->md_bdev
) < min_md_device_sectors
) {
1883 retcode
= ERR_MD_DISK_TOO_SMALL
;
1884 drbd_warn(device
, "refusing attach: md-device too small, "
1885 "at least %llu sectors needed for this meta-disk type\n",
1886 (unsigned long long) min_md_device_sectors
);
1890 /* Make sure the new disk is big enough
1891 * (we may currently be R_PRIMARY with no local disk...) */
1892 if (drbd_get_max_capacity(nbc
) <
1893 drbd_get_capacity(device
->this_bdev
)) {
1894 retcode
= ERR_DISK_TOO_SMALL
;
1898 nbc
->known_size
= drbd_get_capacity(nbc
->backing_bdev
);
1900 if (nbc
->known_size
> max_possible_sectors
) {
1901 drbd_warn(device
, "==> truncating very big lower level device "
1902 "to currently maximum possible %llu sectors <==\n",
1903 (unsigned long long) max_possible_sectors
);
1904 if (new_disk_conf
->meta_dev_idx
>= 0)
1905 drbd_warn(device
, "==>> using internal or flexible "
1906 "meta data may help <<==\n");
1909 drbd_suspend_io(device
);
1910 /* also wait for the last barrier ack. */
1911 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1912 * We need a way to either ignore barrier acks for barriers sent before a device
1913 * was attached, or a way to wait for all pending barrier acks to come in.
1914 * As barriers are counted per resource,
1915 * we'd need to suspend io on all devices of a resource.
1917 wait_event(device
->misc_wait
, !atomic_read(&device
->ap_pending_cnt
) || drbd_suspended(device
));
1918 /* and for any other previously queued work */
1919 drbd_flush_workqueue(&connection
->sender_work
);
1921 rv
= _drbd_request_state(device
, NS(disk
, D_ATTACHING
), CS_VERBOSE
);
1922 retcode
= rv
; /* FIXME: Type mismatch. */
1923 drbd_resume_io(device
);
1924 if (rv
< SS_SUCCESS
)
1927 if (!get_ldev_if_state(device
, D_ATTACHING
))
1928 goto force_diskless
;
1930 if (!device
->bitmap
) {
1931 if (drbd_bm_init(device
)) {
1932 retcode
= ERR_NOMEM
;
1933 goto force_diskless_dec
;
1937 if (device
->state
.conn
< C_CONNECTED
&&
1938 device
->state
.role
== R_PRIMARY
&& device
->ed_uuid
&&
1939 (device
->ed_uuid
& ~((u64
)1)) != (nbc
->md
.uuid
[UI_CURRENT
] & ~((u64
)1))) {
1940 drbd_err(device
, "Can only attach to data with current UUID=%016llX\n",
1941 (unsigned long long)device
->ed_uuid
);
1942 retcode
= ERR_DATA_NOT_CURRENT
;
1943 goto force_diskless_dec
;
1946 /* Since we are diskless, fix the activity log first... */
1947 if (drbd_check_al_size(device
, new_disk_conf
)) {
1948 retcode
= ERR_NOMEM
;
1949 goto force_diskless_dec
;
1952 /* Prevent shrinking of consistent devices ! */
1953 if (drbd_md_test_flag(nbc
, MDF_CONSISTENT
) &&
1954 drbd_new_dev_size(device
, nbc
, nbc
->disk_conf
->disk_size
, 0) < nbc
->md
.la_size_sect
) {
1955 drbd_warn(device
, "refusing to truncate a consistent device\n");
1956 retcode
= ERR_DISK_TOO_SMALL
;
1957 goto force_diskless_dec
;
1960 lock_all_resources();
1961 retcode
= drbd_resync_after_valid(device
, new_disk_conf
->resync_after
);
1962 if (retcode
!= NO_ERROR
) {
1963 unlock_all_resources();
1964 goto force_diskless_dec
;
1967 /* Reset the "barriers don't work" bits here, then force meta data to
1968 * be written, to ensure we determine if barriers are supported. */
1969 if (new_disk_conf
->md_flushes
)
1970 clear_bit(MD_NO_FUA
, &device
->flags
);
1972 set_bit(MD_NO_FUA
, &device
->flags
);
1974 /* Point of no return reached.
1975 * Devices and memory are no longer released by error cleanup below.
1976 * now device takes over responsibility, and the state engine should
1977 * clean it up somewhere. */
1978 D_ASSERT(device
, device
->ldev
== NULL
);
1980 device
->resync
= resync_lru
;
1981 device
->rs_plan_s
= new_plan
;
1984 new_disk_conf
= NULL
;
1987 drbd_resync_after_changed(device
);
1988 drbd_bump_write_ordering(device
->resource
, device
->ldev
, WO_BDEV_FLUSH
);
1989 unlock_all_resources();
1991 if (drbd_md_test_flag(device
->ldev
, MDF_CRASHED_PRIMARY
))
1992 set_bit(CRASHED_PRIMARY
, &device
->flags
);
1994 clear_bit(CRASHED_PRIMARY
, &device
->flags
);
1996 if (drbd_md_test_flag(device
->ldev
, MDF_PRIMARY_IND
) &&
1997 !(device
->state
.role
== R_PRIMARY
&& device
->resource
->susp_nod
))
1998 set_bit(CRASHED_PRIMARY
, &device
->flags
);
2000 device
->send_cnt
= 0;
2001 device
->recv_cnt
= 0;
2002 device
->read_cnt
= 0;
2003 device
->writ_cnt
= 0;
2005 drbd_reconsider_queue_parameters(device
, device
->ldev
, NULL
);
2007 /* If I am currently not R_PRIMARY,
2008 * but meta data primary indicator is set,
2009 * I just now recover from a hard crash,
2010 * and have been R_PRIMARY before that crash.
2012 * Now, if I had no connection before that crash
2013 * (have been degraded R_PRIMARY), chances are that
2014 * I won't find my peer now either.
2016 * In that case, and _only_ in that case,
2017 * we use the degr-wfc-timeout instead of the default,
2018 * so we can automatically recover from a crash of a
2019 * degraded but active "cluster" after a certain timeout.
2021 clear_bit(USE_DEGR_WFC_T
, &device
->flags
);
2022 if (device
->state
.role
!= R_PRIMARY
&&
2023 drbd_md_test_flag(device
->ldev
, MDF_PRIMARY_IND
) &&
2024 !drbd_md_test_flag(device
->ldev
, MDF_CONNECTED_IND
))
2025 set_bit(USE_DEGR_WFC_T
, &device
->flags
);
2027 dd
= drbd_determine_dev_size(device
, 0, NULL
);
2028 if (dd
<= DS_ERROR
) {
2029 retcode
= ERR_NOMEM_BITMAP
;
2030 goto force_diskless_dec
;
2031 } else if (dd
== DS_GREW
)
2032 set_bit(RESYNC_AFTER_NEG
, &device
->flags
);
2034 if (drbd_md_test_flag(device
->ldev
, MDF_FULL_SYNC
) ||
2035 (test_bit(CRASHED_PRIMARY
, &device
->flags
) &&
2036 drbd_md_test_flag(device
->ldev
, MDF_AL_DISABLED
))) {
2037 drbd_info(device
, "Assuming that all blocks are out of sync "
2038 "(aka FullSync)\n");
2039 if (drbd_bitmap_io(device
, &drbd_bmio_set_n_write
,
2040 "set_n_write from attaching", BM_LOCKED_MASK
)) {
2041 retcode
= ERR_IO_MD_DISK
;
2042 goto force_diskless_dec
;
2045 if (drbd_bitmap_io(device
, &drbd_bm_read
,
2046 "read from attaching", BM_LOCKED_MASK
)) {
2047 retcode
= ERR_IO_MD_DISK
;
2048 goto force_diskless_dec
;
2052 if (_drbd_bm_total_weight(device
) == drbd_bm_bits(device
))
2053 drbd_suspend_al(device
); /* IO is still suspended here... */
2055 spin_lock_irq(&device
->resource
->req_lock
);
2056 os
= drbd_read_state(device
);
2058 /* If MDF_CONSISTENT is not set go into inconsistent state,
2059 otherwise investigate MDF_WasUpToDate...
2060 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
2061 otherwise into D_CONSISTENT state.
2063 if (drbd_md_test_flag(device
->ldev
, MDF_CONSISTENT
)) {
2064 if (drbd_md_test_flag(device
->ldev
, MDF_WAS_UP_TO_DATE
))
2065 ns
.disk
= D_CONSISTENT
;
2067 ns
.disk
= D_OUTDATED
;
2069 ns
.disk
= D_INCONSISTENT
;
2072 if (drbd_md_test_flag(device
->ldev
, MDF_PEER_OUT_DATED
))
2073 ns
.pdsk
= D_OUTDATED
;
2076 if (ns
.disk
== D_CONSISTENT
&&
2077 (ns
.pdsk
== D_OUTDATED
|| rcu_dereference(device
->ldev
->disk_conf
)->fencing
== FP_DONT_CARE
))
2078 ns
.disk
= D_UP_TO_DATE
;
2080 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
2081 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
2082 this point, because drbd_request_state() modifies these
2085 if (rcu_dereference(device
->ldev
->disk_conf
)->al_updates
)
2086 device
->ldev
->md
.flags
&= ~MDF_AL_DISABLED
;
2088 device
->ldev
->md
.flags
|= MDF_AL_DISABLED
;
2092 /* In case we are C_CONNECTED postpone any decision on the new disk
2093 state after the negotiation phase. */
2094 if (device
->state
.conn
== C_CONNECTED
) {
2095 device
->new_state_tmp
.i
= ns
.i
;
2097 ns
.disk
= D_NEGOTIATING
;
2099 /* We expect to receive up-to-date UUIDs soon.
2100 To avoid a race in receive_state, free p_uuid while
2101 holding req_lock. I.e. atomic with the state change */
2102 kfree(device
->p_uuid
);
2103 device
->p_uuid
= NULL
;
2106 rv
= _drbd_set_state(device
, ns
, CS_VERBOSE
, NULL
);
2107 spin_unlock_irq(&device
->resource
->req_lock
);
2109 if (rv
< SS_SUCCESS
)
2110 goto force_diskless_dec
;
2112 mod_timer(&device
->request_timer
, jiffies
+ HZ
);
2114 if (device
->state
.role
== R_PRIMARY
)
2115 device
->ldev
->md
.uuid
[UI_CURRENT
] |= (u64
)1;
2117 device
->ldev
->md
.uuid
[UI_CURRENT
] &= ~(u64
)1;
2119 drbd_md_mark_dirty(device
);
2120 drbd_md_sync(device
);
2122 kobject_uevent(&disk_to_dev(device
->vdisk
)->kobj
, KOBJ_CHANGE
);
2124 conn_reconfig_done(connection
);
2125 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2126 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2132 drbd_force_state(device
, NS(disk
, D_DISKLESS
));
2133 drbd_md_sync(device
);
2135 conn_reconfig_done(connection
);
2137 close_backing_dev(device
, nbc
->md_bdev
, nbc
->md_bdev
!= nbc
->backing_bdev
);
2138 close_backing_dev(device
, nbc
->backing_bdev
, true);
2141 kfree(new_disk_conf
);
2142 lc_destroy(resync_lru
);
2144 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2146 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2150 static int adm_detach(struct drbd_device
*device
, int force
)
2153 set_bit(FORCE_DETACH
, &device
->flags
);
2154 drbd_force_state(device
, NS(disk
, D_FAILED
));
2158 return drbd_request_detach_interruptible(device
);
2161 /* Detaching the disk is a process in multiple stages. First we need to lock
2162 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
2163 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
2164 * internal references as well.
2165 * Only then we have finally detached. */
2166 int drbd_adm_detach(struct sk_buff
*skb
, struct genl_info
*info
)
2168 struct drbd_config_context adm_ctx
;
2169 enum drbd_ret_code retcode
;
2170 struct detach_parms parms
= { };
2173 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
2174 if (!adm_ctx
.reply_skb
)
2176 if (retcode
!= NO_ERROR
)
2179 if (info
->attrs
[DRBD_NLA_DETACH_PARMS
]) {
2180 err
= detach_parms_from_attrs(&parms
, info
);
2182 retcode
= ERR_MANDATORY_TAG
;
2183 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2188 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2189 retcode
= adm_detach(adm_ctx
.device
, parms
.force_detach
);
2190 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2192 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2196 static bool conn_resync_running(struct drbd_connection
*connection
)
2198 struct drbd_peer_device
*peer_device
;
2203 idr_for_each_entry(&connection
->peer_devices
, peer_device
, vnr
) {
2204 struct drbd_device
*device
= peer_device
->device
;
2205 if (device
->state
.conn
== C_SYNC_SOURCE
||
2206 device
->state
.conn
== C_SYNC_TARGET
||
2207 device
->state
.conn
== C_PAUSED_SYNC_S
||
2208 device
->state
.conn
== C_PAUSED_SYNC_T
) {
2218 static bool conn_ov_running(struct drbd_connection
*connection
)
2220 struct drbd_peer_device
*peer_device
;
2225 idr_for_each_entry(&connection
->peer_devices
, peer_device
, vnr
) {
2226 struct drbd_device
*device
= peer_device
->device
;
2227 if (device
->state
.conn
== C_VERIFY_S
||
2228 device
->state
.conn
== C_VERIFY_T
) {
2238 static enum drbd_ret_code
2239 _check_net_options(struct drbd_connection
*connection
, struct net_conf
*old_net_conf
, struct net_conf
*new_net_conf
)
2241 struct drbd_peer_device
*peer_device
;
2244 if (old_net_conf
&& connection
->cstate
== C_WF_REPORT_PARAMS
&& connection
->agreed_pro_version
< 100) {
2245 if (new_net_conf
->wire_protocol
!= old_net_conf
->wire_protocol
)
2246 return ERR_NEED_APV_100
;
2248 if (new_net_conf
->two_primaries
!= old_net_conf
->two_primaries
)
2249 return ERR_NEED_APV_100
;
2251 if (strcmp(new_net_conf
->integrity_alg
, old_net_conf
->integrity_alg
))
2252 return ERR_NEED_APV_100
;
2255 if (!new_net_conf
->two_primaries
&&
2256 conn_highest_role(connection
) == R_PRIMARY
&&
2257 conn_highest_peer(connection
) == R_PRIMARY
)
2258 return ERR_NEED_ALLOW_TWO_PRI
;
2260 if (new_net_conf
->two_primaries
&&
2261 (new_net_conf
->wire_protocol
!= DRBD_PROT_C
))
2262 return ERR_NOT_PROTO_C
;
2264 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
2265 struct drbd_device
*device
= peer_device
->device
;
2266 if (get_ldev(device
)) {
2267 enum drbd_fencing_p fp
= rcu_dereference(device
->ldev
->disk_conf
)->fencing
;
2269 if (new_net_conf
->wire_protocol
== DRBD_PROT_A
&& fp
== FP_STONITH
)
2270 return ERR_STONITH_AND_PROT_A
;
2272 if (device
->state
.role
== R_PRIMARY
&& new_net_conf
->discard_my_data
)
2273 return ERR_DISCARD_IMPOSSIBLE
;
2276 if (new_net_conf
->on_congestion
!= OC_BLOCK
&& new_net_conf
->wire_protocol
!= DRBD_PROT_A
)
2277 return ERR_CONG_NOT_PROTO_A
;
2282 static enum drbd_ret_code
2283 check_net_options(struct drbd_connection
*connection
, struct net_conf
*new_net_conf
)
2285 enum drbd_ret_code rv
;
2286 struct drbd_peer_device
*peer_device
;
2290 rv
= _check_net_options(connection
, rcu_dereference(connection
->net_conf
), new_net_conf
);
2293 /* connection->peer_devices protected by genl_lock() here */
2294 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
2295 struct drbd_device
*device
= peer_device
->device
;
2296 if (!device
->bitmap
) {
2297 if (drbd_bm_init(device
))
2306 struct crypto_ahash
*verify_tfm
;
2307 struct crypto_ahash
*csums_tfm
;
2308 struct crypto_shash
*cram_hmac_tfm
;
2309 struct crypto_ahash
*integrity_tfm
;
2313 alloc_shash(struct crypto_shash
**tfm
, char *tfm_name
, int err_alg
)
2318 *tfm
= crypto_alloc_shash(tfm_name
, 0, 0);
2328 alloc_ahash(struct crypto_ahash
**tfm
, char *tfm_name
, int err_alg
)
2333 *tfm
= crypto_alloc_ahash(tfm_name
, 0, CRYPTO_ALG_ASYNC
);
2342 static enum drbd_ret_code
2343 alloc_crypto(struct crypto
*crypto
, struct net_conf
*new_net_conf
)
2345 char hmac_name
[CRYPTO_MAX_ALG_NAME
];
2346 enum drbd_ret_code rv
;
2348 rv
= alloc_ahash(&crypto
->csums_tfm
, new_net_conf
->csums_alg
,
2352 rv
= alloc_ahash(&crypto
->verify_tfm
, new_net_conf
->verify_alg
,
2356 rv
= alloc_ahash(&crypto
->integrity_tfm
, new_net_conf
->integrity_alg
,
2360 if (new_net_conf
->cram_hmac_alg
[0] != 0) {
2361 snprintf(hmac_name
, CRYPTO_MAX_ALG_NAME
, "hmac(%s)",
2362 new_net_conf
->cram_hmac_alg
);
2364 rv
= alloc_shash(&crypto
->cram_hmac_tfm
, hmac_name
,
2371 static void free_crypto(struct crypto
*crypto
)
2373 crypto_free_shash(crypto
->cram_hmac_tfm
);
2374 crypto_free_ahash(crypto
->integrity_tfm
);
2375 crypto_free_ahash(crypto
->csums_tfm
);
2376 crypto_free_ahash(crypto
->verify_tfm
);
2379 int drbd_adm_net_opts(struct sk_buff
*skb
, struct genl_info
*info
)
2381 struct drbd_config_context adm_ctx
;
2382 enum drbd_ret_code retcode
;
2383 struct drbd_connection
*connection
;
2384 struct net_conf
*old_net_conf
, *new_net_conf
= NULL
;
2386 int ovr
; /* online verify running */
2387 int rsr
; /* re-sync running */
2388 struct crypto crypto
= { };
2390 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_CONNECTION
);
2391 if (!adm_ctx
.reply_skb
)
2393 if (retcode
!= NO_ERROR
)
2396 connection
= adm_ctx
.connection
;
2397 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2399 new_net_conf
= kzalloc(sizeof(struct net_conf
), GFP_KERNEL
);
2400 if (!new_net_conf
) {
2401 retcode
= ERR_NOMEM
;
2405 conn_reconfig_start(connection
);
2407 mutex_lock(&connection
->data
.mutex
);
2408 mutex_lock(&connection
->resource
->conf_update
);
2409 old_net_conf
= connection
->net_conf
;
2411 if (!old_net_conf
) {
2412 drbd_msg_put_info(adm_ctx
.reply_skb
, "net conf missing, try connect");
2413 retcode
= ERR_INVALID_REQUEST
;
2417 *new_net_conf
= *old_net_conf
;
2418 if (should_set_defaults(info
))
2419 set_net_conf_defaults(new_net_conf
);
2421 err
= net_conf_from_attrs_for_change(new_net_conf
, info
);
2422 if (err
&& err
!= -ENOMSG
) {
2423 retcode
= ERR_MANDATORY_TAG
;
2424 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2428 retcode
= check_net_options(connection
, new_net_conf
);
2429 if (retcode
!= NO_ERROR
)
2432 /* re-sync running */
2433 rsr
= conn_resync_running(connection
);
2434 if (rsr
&& strcmp(new_net_conf
->csums_alg
, old_net_conf
->csums_alg
)) {
2435 retcode
= ERR_CSUMS_RESYNC_RUNNING
;
2439 /* online verify running */
2440 ovr
= conn_ov_running(connection
);
2441 if (ovr
&& strcmp(new_net_conf
->verify_alg
, old_net_conf
->verify_alg
)) {
2442 retcode
= ERR_VERIFY_RUNNING
;
2446 retcode
= alloc_crypto(&crypto
, new_net_conf
);
2447 if (retcode
!= NO_ERROR
)
2450 rcu_assign_pointer(connection
->net_conf
, new_net_conf
);
2453 crypto_free_ahash(connection
->csums_tfm
);
2454 connection
->csums_tfm
= crypto
.csums_tfm
;
2455 crypto
.csums_tfm
= NULL
;
2458 crypto_free_ahash(connection
->verify_tfm
);
2459 connection
->verify_tfm
= crypto
.verify_tfm
;
2460 crypto
.verify_tfm
= NULL
;
2463 crypto_free_ahash(connection
->integrity_tfm
);
2464 connection
->integrity_tfm
= crypto
.integrity_tfm
;
2465 if (connection
->cstate
>= C_WF_REPORT_PARAMS
&& connection
->agreed_pro_version
>= 100)
2466 /* Do this without trying to take connection->data.mutex again. */
2467 __drbd_send_protocol(connection
, P_PROTOCOL_UPDATE
);
2469 crypto_free_shash(connection
->cram_hmac_tfm
);
2470 connection
->cram_hmac_tfm
= crypto
.cram_hmac_tfm
;
2472 mutex_unlock(&connection
->resource
->conf_update
);
2473 mutex_unlock(&connection
->data
.mutex
);
2475 kfree(old_net_conf
);
2477 if (connection
->cstate
>= C_WF_REPORT_PARAMS
) {
2478 struct drbd_peer_device
*peer_device
;
2481 idr_for_each_entry(&connection
->peer_devices
, peer_device
, vnr
)
2482 drbd_send_sync_param(peer_device
);
2488 mutex_unlock(&connection
->resource
->conf_update
);
2489 mutex_unlock(&connection
->data
.mutex
);
2490 free_crypto(&crypto
);
2491 kfree(new_net_conf
);
2493 conn_reconfig_done(connection
);
2495 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2497 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2501 static void connection_to_info(struct connection_info
*info
,
2502 struct drbd_connection
*connection
)
2504 info
->conn_connection_state
= connection
->cstate
;
2505 info
->conn_role
= conn_highest_peer(connection
);
2508 static void peer_device_to_info(struct peer_device_info
*info
,
2509 struct drbd_peer_device
*peer_device
)
2511 struct drbd_device
*device
= peer_device
->device
;
2513 info
->peer_repl_state
=
2514 max_t(enum drbd_conns
, C_WF_REPORT_PARAMS
, device
->state
.conn
);
2515 info
->peer_disk_state
= device
->state
.pdsk
;
2516 info
->peer_resync_susp_user
= device
->state
.user_isp
;
2517 info
->peer_resync_susp_peer
= device
->state
.peer_isp
;
2518 info
->peer_resync_susp_dependency
= device
->state
.aftr_isp
;
2521 int drbd_adm_connect(struct sk_buff
*skb
, struct genl_info
*info
)
2523 struct connection_info connection_info
;
2524 enum drbd_notification_type flags
;
2525 unsigned int peer_devices
= 0;
2526 struct drbd_config_context adm_ctx
;
2527 struct drbd_peer_device
*peer_device
;
2528 struct net_conf
*old_net_conf
, *new_net_conf
= NULL
;
2529 struct crypto crypto
= { };
2530 struct drbd_resource
*resource
;
2531 struct drbd_connection
*connection
;
2532 enum drbd_ret_code retcode
;
2536 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_RESOURCE
);
2538 if (!adm_ctx
.reply_skb
)
2540 if (retcode
!= NO_ERROR
)
2542 if (!(adm_ctx
.my_addr
&& adm_ctx
.peer_addr
)) {
2543 drbd_msg_put_info(adm_ctx
.reply_skb
, "connection endpoint(s) missing");
2544 retcode
= ERR_INVALID_REQUEST
;
2548 /* No need for _rcu here. All reconfiguration is
2549 * strictly serialized on genl_lock(). We are protected against
2550 * concurrent reconfiguration/addition/deletion */
2551 for_each_resource(resource
, &drbd_resources
) {
2552 for_each_connection(connection
, resource
) {
2553 if (nla_len(adm_ctx
.my_addr
) == connection
->my_addr_len
&&
2554 !memcmp(nla_data(adm_ctx
.my_addr
), &connection
->my_addr
,
2555 connection
->my_addr_len
)) {
2556 retcode
= ERR_LOCAL_ADDR
;
2560 if (nla_len(adm_ctx
.peer_addr
) == connection
->peer_addr_len
&&
2561 !memcmp(nla_data(adm_ctx
.peer_addr
), &connection
->peer_addr
,
2562 connection
->peer_addr_len
)) {
2563 retcode
= ERR_PEER_ADDR
;
2569 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2570 connection
= first_connection(adm_ctx
.resource
);
2571 conn_reconfig_start(connection
);
2573 if (connection
->cstate
> C_STANDALONE
) {
2574 retcode
= ERR_NET_CONFIGURED
;
2578 /* allocation not in the IO path, drbdsetup / netlink process context */
2579 new_net_conf
= kzalloc(sizeof(*new_net_conf
), GFP_KERNEL
);
2580 if (!new_net_conf
) {
2581 retcode
= ERR_NOMEM
;
2585 set_net_conf_defaults(new_net_conf
);
2587 err
= net_conf_from_attrs(new_net_conf
, info
);
2588 if (err
&& err
!= -ENOMSG
) {
2589 retcode
= ERR_MANDATORY_TAG
;
2590 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2594 retcode
= check_net_options(connection
, new_net_conf
);
2595 if (retcode
!= NO_ERROR
)
2598 retcode
= alloc_crypto(&crypto
, new_net_conf
);
2599 if (retcode
!= NO_ERROR
)
2602 ((char *)new_net_conf
->shared_secret
)[SHARED_SECRET_MAX
-1] = 0;
2604 drbd_flush_workqueue(&connection
->sender_work
);
2606 mutex_lock(&adm_ctx
.resource
->conf_update
);
2607 old_net_conf
= connection
->net_conf
;
2609 retcode
= ERR_NET_CONFIGURED
;
2610 mutex_unlock(&adm_ctx
.resource
->conf_update
);
2613 rcu_assign_pointer(connection
->net_conf
, new_net_conf
);
2615 conn_free_crypto(connection
);
2616 connection
->cram_hmac_tfm
= crypto
.cram_hmac_tfm
;
2617 connection
->integrity_tfm
= crypto
.integrity_tfm
;
2618 connection
->csums_tfm
= crypto
.csums_tfm
;
2619 connection
->verify_tfm
= crypto
.verify_tfm
;
2621 connection
->my_addr_len
= nla_len(adm_ctx
.my_addr
);
2622 memcpy(&connection
->my_addr
, nla_data(adm_ctx
.my_addr
), connection
->my_addr_len
);
2623 connection
->peer_addr_len
= nla_len(adm_ctx
.peer_addr
);
2624 memcpy(&connection
->peer_addr
, nla_data(adm_ctx
.peer_addr
), connection
->peer_addr_len
);
2626 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
2630 connection_to_info(&connection_info
, connection
);
2631 flags
= (peer_devices
--) ? NOTIFY_CONTINUES
: 0;
2632 mutex_lock(¬ification_mutex
);
2633 notify_connection_state(NULL
, 0, connection
, &connection_info
, NOTIFY_CREATE
| flags
);
2634 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
2635 struct peer_device_info peer_device_info
;
2637 peer_device_to_info(&peer_device_info
, peer_device
);
2638 flags
= (peer_devices
--) ? NOTIFY_CONTINUES
: 0;
2639 notify_peer_device_state(NULL
, 0, peer_device
, &peer_device_info
, NOTIFY_CREATE
| flags
);
2641 mutex_unlock(¬ification_mutex
);
2642 mutex_unlock(&adm_ctx
.resource
->conf_update
);
2645 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
2646 struct drbd_device
*device
= peer_device
->device
;
2647 device
->send_cnt
= 0;
2648 device
->recv_cnt
= 0;
2652 retcode
= conn_request_state(connection
, NS(conn
, C_UNCONNECTED
), CS_VERBOSE
);
2654 conn_reconfig_done(connection
);
2655 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2656 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2660 free_crypto(&crypto
);
2661 kfree(new_net_conf
);
2663 conn_reconfig_done(connection
);
2664 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2666 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2670 static enum drbd_state_rv
conn_try_disconnect(struct drbd_connection
*connection
, bool force
)
2672 enum drbd_state_rv rv
;
2674 rv
= conn_request_state(connection
, NS(conn
, C_DISCONNECTING
),
2675 force
? CS_HARD
: 0);
2678 case SS_NOTHING_TO_DO
:
2680 case SS_ALREADY_STANDALONE
:
2682 case SS_PRIMARY_NOP
:
2683 /* Our state checking code wants to see the peer outdated. */
2684 rv
= conn_request_state(connection
, NS2(conn
, C_DISCONNECTING
, pdsk
, D_OUTDATED
), 0);
2686 if (rv
== SS_OUTDATE_WO_CONN
) /* lost connection before graceful disconnect succeeded */
2687 rv
= conn_request_state(connection
, NS(conn
, C_DISCONNECTING
), CS_VERBOSE
);
2690 case SS_CW_FAILED_BY_PEER
:
2691 /* The peer probably wants to see us outdated. */
2692 rv
= conn_request_state(connection
, NS2(conn
, C_DISCONNECTING
,
2693 disk
, D_OUTDATED
), 0);
2694 if (rv
== SS_IS_DISKLESS
|| rv
== SS_LOWER_THAN_OUTDATED
) {
2695 rv
= conn_request_state(connection
, NS(conn
, C_DISCONNECTING
),
2700 /* no special handling necessary */
2703 if (rv
>= SS_SUCCESS
) {
2704 enum drbd_state_rv rv2
;
2705 /* No one else can reconfigure the network while I am here.
2706 * The state handling only uses drbd_thread_stop_nowait(),
2707 * we want to really wait here until the receiver is no more.
2709 drbd_thread_stop(&connection
->receiver
);
2711 /* Race breaker. This additional state change request may be
2712 * necessary, if this was a forced disconnect during a receiver
2713 * restart. We may have "killed" the receiver thread just
2714 * after drbd_receiver() returned. Typically, we should be
2715 * C_STANDALONE already, now, and this becomes a no-op.
2717 rv2
= conn_request_state(connection
, NS(conn
, C_STANDALONE
),
2718 CS_VERBOSE
| CS_HARD
);
2719 if (rv2
< SS_SUCCESS
)
2720 drbd_err(connection
,
2721 "unexpected rv2=%d in conn_try_disconnect()\n",
2723 /* Unlike in DRBD 9, the state engine has generated
2724 * NOTIFY_DESTROY events before clearing connection->net_conf. */
2729 int drbd_adm_disconnect(struct sk_buff
*skb
, struct genl_info
*info
)
2731 struct drbd_config_context adm_ctx
;
2732 struct disconnect_parms parms
;
2733 struct drbd_connection
*connection
;
2734 enum drbd_state_rv rv
;
2735 enum drbd_ret_code retcode
;
2738 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_CONNECTION
);
2739 if (!adm_ctx
.reply_skb
)
2741 if (retcode
!= NO_ERROR
)
2744 connection
= adm_ctx
.connection
;
2745 memset(&parms
, 0, sizeof(parms
));
2746 if (info
->attrs
[DRBD_NLA_DISCONNECT_PARMS
]) {
2747 err
= disconnect_parms_from_attrs(&parms
, info
);
2749 retcode
= ERR_MANDATORY_TAG
;
2750 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2755 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2756 rv
= conn_try_disconnect(connection
, parms
.force_disconnect
);
2757 if (rv
< SS_SUCCESS
)
2758 retcode
= rv
; /* FIXME: Type mismatch. */
2761 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2763 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2767 void resync_after_online_grow(struct drbd_device
*device
)
2769 int iass
; /* I am sync source */
2771 drbd_info(device
, "Resync of new storage after online grow\n");
2772 if (device
->state
.role
!= device
->state
.peer
)
2773 iass
= (device
->state
.role
== R_PRIMARY
);
2775 iass
= test_bit(RESOLVE_CONFLICTS
, &first_peer_device(device
)->connection
->flags
);
2778 drbd_start_resync(device
, C_SYNC_SOURCE
);
2780 _drbd_request_state(device
, NS(conn
, C_WF_SYNC_UUID
), CS_VERBOSE
+ CS_SERIALIZE
);
2783 int drbd_adm_resize(struct sk_buff
*skb
, struct genl_info
*info
)
2785 struct drbd_config_context adm_ctx
;
2786 struct disk_conf
*old_disk_conf
, *new_disk_conf
= NULL
;
2787 struct resize_parms rs
;
2788 struct drbd_device
*device
;
2789 enum drbd_ret_code retcode
;
2790 enum determine_dev_size dd
;
2791 bool change_al_layout
= false;
2792 enum dds_flags ddsf
;
2796 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
2797 if (!adm_ctx
.reply_skb
)
2799 if (retcode
!= NO_ERROR
)
2802 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2803 device
= adm_ctx
.device
;
2804 if (!get_ldev(device
)) {
2805 retcode
= ERR_NO_DISK
;
2809 memset(&rs
, 0, sizeof(struct resize_parms
));
2810 rs
.al_stripes
= device
->ldev
->md
.al_stripes
;
2811 rs
.al_stripe_size
= device
->ldev
->md
.al_stripe_size_4k
* 4;
2812 if (info
->attrs
[DRBD_NLA_RESIZE_PARMS
]) {
2813 err
= resize_parms_from_attrs(&rs
, info
);
2815 retcode
= ERR_MANDATORY_TAG
;
2816 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2821 if (device
->state
.conn
> C_CONNECTED
) {
2822 retcode
= ERR_RESIZE_RESYNC
;
2826 if (device
->state
.role
== R_SECONDARY
&&
2827 device
->state
.peer
== R_SECONDARY
) {
2828 retcode
= ERR_NO_PRIMARY
;
2832 if (rs
.no_resync
&& first_peer_device(device
)->connection
->agreed_pro_version
< 93) {
2833 retcode
= ERR_NEED_APV_93
;
2838 u_size
= rcu_dereference(device
->ldev
->disk_conf
)->disk_size
;
2840 if (u_size
!= (sector_t
)rs
.resize_size
) {
2841 new_disk_conf
= kmalloc(sizeof(struct disk_conf
), GFP_KERNEL
);
2842 if (!new_disk_conf
) {
2843 retcode
= ERR_NOMEM
;
2848 if (device
->ldev
->md
.al_stripes
!= rs
.al_stripes
||
2849 device
->ldev
->md
.al_stripe_size_4k
!= rs
.al_stripe_size
/ 4) {
2850 u32 al_size_k
= rs
.al_stripes
* rs
.al_stripe_size
;
2852 if (al_size_k
> (16 * 1024 * 1024)) {
2853 retcode
= ERR_MD_LAYOUT_TOO_BIG
;
2857 if (al_size_k
< MD_32kB_SECT
/2) {
2858 retcode
= ERR_MD_LAYOUT_TOO_SMALL
;
2862 if (device
->state
.conn
!= C_CONNECTED
&& !rs
.resize_force
) {
2863 retcode
= ERR_MD_LAYOUT_CONNECTED
;
2867 change_al_layout
= true;
2870 if (device
->ldev
->known_size
!= drbd_get_capacity(device
->ldev
->backing_bdev
))
2871 device
->ldev
->known_size
= drbd_get_capacity(device
->ldev
->backing_bdev
);
2873 if (new_disk_conf
) {
2874 mutex_lock(&device
->resource
->conf_update
);
2875 old_disk_conf
= device
->ldev
->disk_conf
;
2876 *new_disk_conf
= *old_disk_conf
;
2877 new_disk_conf
->disk_size
= (sector_t
)rs
.resize_size
;
2878 rcu_assign_pointer(device
->ldev
->disk_conf
, new_disk_conf
);
2879 mutex_unlock(&device
->resource
->conf_update
);
2881 kfree(old_disk_conf
);
2882 new_disk_conf
= NULL
;
2885 ddsf
= (rs
.resize_force
? DDSF_FORCED
: 0) | (rs
.no_resync
? DDSF_NO_RESYNC
: 0);
2886 dd
= drbd_determine_dev_size(device
, ddsf
, change_al_layout
? &rs
: NULL
);
2887 drbd_md_sync(device
);
2889 if (dd
== DS_ERROR
) {
2890 retcode
= ERR_NOMEM_BITMAP
;
2892 } else if (dd
== DS_ERROR_SPACE_MD
) {
2893 retcode
= ERR_MD_LAYOUT_NO_FIT
;
2895 } else if (dd
== DS_ERROR_SHRINK
) {
2896 retcode
= ERR_IMPLICIT_SHRINK
;
2900 if (device
->state
.conn
== C_CONNECTED
) {
2902 set_bit(RESIZE_PENDING
, &device
->flags
);
2904 drbd_send_uuids(first_peer_device(device
));
2905 drbd_send_sizes(first_peer_device(device
), 1, ddsf
);
2909 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2911 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2916 kfree(new_disk_conf
);
2920 int drbd_adm_resource_opts(struct sk_buff
*skb
, struct genl_info
*info
)
2922 struct drbd_config_context adm_ctx
;
2923 enum drbd_ret_code retcode
;
2924 struct res_opts res_opts
;
2927 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_RESOURCE
);
2928 if (!adm_ctx
.reply_skb
)
2930 if (retcode
!= NO_ERROR
)
2933 res_opts
= adm_ctx
.resource
->res_opts
;
2934 if (should_set_defaults(info
))
2935 set_res_opts_defaults(&res_opts
);
2937 err
= res_opts_from_attrs(&res_opts
, info
);
2938 if (err
&& err
!= -ENOMSG
) {
2939 retcode
= ERR_MANDATORY_TAG
;
2940 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
2944 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2945 err
= set_resource_options(adm_ctx
.resource
, &res_opts
);
2947 retcode
= ERR_INVALID_REQUEST
;
2949 retcode
= ERR_NOMEM
;
2951 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
2954 drbd_adm_finish(&adm_ctx
, info
, retcode
);
2958 int drbd_adm_invalidate(struct sk_buff
*skb
, struct genl_info
*info
)
2960 struct drbd_config_context adm_ctx
;
2961 struct drbd_device
*device
;
2962 int retcode
; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2964 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
2965 if (!adm_ctx
.reply_skb
)
2967 if (retcode
!= NO_ERROR
)
2970 device
= adm_ctx
.device
;
2971 if (!get_ldev(device
)) {
2972 retcode
= ERR_NO_DISK
;
2976 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
2978 /* If there is still bitmap IO pending, probably because of a previous
2979 * resync just being finished, wait for it before requesting a new resync.
2980 * Also wait for it's after_state_ch(). */
2981 drbd_suspend_io(device
);
2982 wait_event(device
->misc_wait
, !test_bit(BITMAP_IO
, &device
->flags
));
2983 drbd_flush_workqueue(&first_peer_device(device
)->connection
->sender_work
);
2985 /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2986 * D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
2987 * try to start a resync handshake as sync target for full sync.
2989 if (device
->state
.conn
== C_STANDALONE
&& device
->state
.role
== R_SECONDARY
) {
2990 retcode
= drbd_request_state(device
, NS(disk
, D_INCONSISTENT
));
2991 if (retcode
>= SS_SUCCESS
) {
2992 if (drbd_bitmap_io(device
, &drbd_bmio_set_n_write
,
2993 "set_n_write from invalidate", BM_LOCKED_MASK
))
2994 retcode
= ERR_IO_MD_DISK
;
2997 retcode
= drbd_request_state(device
, NS(conn
, C_STARTING_SYNC_T
));
2998 drbd_resume_io(device
);
2999 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3002 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3006 static int drbd_adm_simple_request_state(struct sk_buff
*skb
, struct genl_info
*info
,
3007 union drbd_state mask
, union drbd_state val
)
3009 struct drbd_config_context adm_ctx
;
3010 enum drbd_ret_code retcode
;
3012 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3013 if (!adm_ctx
.reply_skb
)
3015 if (retcode
!= NO_ERROR
)
3018 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
3019 retcode
= drbd_request_state(adm_ctx
.device
, mask
, val
);
3020 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3022 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3026 static int drbd_bmio_set_susp_al(struct drbd_device
*device
) __must_hold(local
)
3030 rv
= drbd_bmio_set_n_write(device
);
3031 drbd_suspend_al(device
);
3035 int drbd_adm_invalidate_peer(struct sk_buff
*skb
, struct genl_info
*info
)
3037 struct drbd_config_context adm_ctx
;
3038 int retcode
; /* drbd_ret_code, drbd_state_rv */
3039 struct drbd_device
*device
;
3041 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3042 if (!adm_ctx
.reply_skb
)
3044 if (retcode
!= NO_ERROR
)
3047 device
= adm_ctx
.device
;
3048 if (!get_ldev(device
)) {
3049 retcode
= ERR_NO_DISK
;
3053 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
3055 /* If there is still bitmap IO pending, probably because of a previous
3056 * resync just being finished, wait for it before requesting a new resync.
3057 * Also wait for it's after_state_ch(). */
3058 drbd_suspend_io(device
);
3059 wait_event(device
->misc_wait
, !test_bit(BITMAP_IO
, &device
->flags
));
3060 drbd_flush_workqueue(&first_peer_device(device
)->connection
->sender_work
);
3062 /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
3063 * in the bitmap. Otherwise, try to start a resync handshake
3064 * as sync source for full sync.
3066 if (device
->state
.conn
== C_STANDALONE
&& device
->state
.role
== R_PRIMARY
) {
3067 /* The peer will get a resync upon connect anyways. Just make that
3068 into a full resync. */
3069 retcode
= drbd_request_state(device
, NS(pdsk
, D_INCONSISTENT
));
3070 if (retcode
>= SS_SUCCESS
) {
3071 if (drbd_bitmap_io(device
, &drbd_bmio_set_susp_al
,
3072 "set_n_write from invalidate_peer",
3073 BM_LOCKED_SET_ALLOWED
))
3074 retcode
= ERR_IO_MD_DISK
;
3077 retcode
= drbd_request_state(device
, NS(conn
, C_STARTING_SYNC_S
));
3078 drbd_resume_io(device
);
3079 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3082 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3086 int drbd_adm_pause_sync(struct sk_buff
*skb
, struct genl_info
*info
)
3088 struct drbd_config_context adm_ctx
;
3089 enum drbd_ret_code retcode
;
3091 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3092 if (!adm_ctx
.reply_skb
)
3094 if (retcode
!= NO_ERROR
)
3097 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
3098 if (drbd_request_state(adm_ctx
.device
, NS(user_isp
, 1)) == SS_NOTHING_TO_DO
)
3099 retcode
= ERR_PAUSE_IS_SET
;
3100 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3102 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3106 int drbd_adm_resume_sync(struct sk_buff
*skb
, struct genl_info
*info
)
3108 struct drbd_config_context adm_ctx
;
3109 union drbd_dev_state s
;
3110 enum drbd_ret_code retcode
;
3112 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3113 if (!adm_ctx
.reply_skb
)
3115 if (retcode
!= NO_ERROR
)
3118 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
3119 if (drbd_request_state(adm_ctx
.device
, NS(user_isp
, 0)) == SS_NOTHING_TO_DO
) {
3120 s
= adm_ctx
.device
->state
;
3121 if (s
.conn
== C_PAUSED_SYNC_S
|| s
.conn
== C_PAUSED_SYNC_T
) {
3122 retcode
= s
.aftr_isp
? ERR_PIC_AFTER_DEP
:
3123 s
.peer_isp
? ERR_PIC_PEER_DEP
: ERR_PAUSE_IS_CLEAR
;
3125 retcode
= ERR_PAUSE_IS_CLEAR
;
3128 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3130 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3134 int drbd_adm_suspend_io(struct sk_buff
*skb
, struct genl_info
*info
)
3136 return drbd_adm_simple_request_state(skb
, info
, NS(susp
, 1));
3139 int drbd_adm_resume_io(struct sk_buff
*skb
, struct genl_info
*info
)
3141 struct drbd_config_context adm_ctx
;
3142 struct drbd_device
*device
;
3143 int retcode
; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3145 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3146 if (!adm_ctx
.reply_skb
)
3148 if (retcode
!= NO_ERROR
)
3151 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
3152 device
= adm_ctx
.device
;
3153 if (test_bit(NEW_CUR_UUID
, &device
->flags
)) {
3154 if (get_ldev_if_state(device
, D_ATTACHING
)) {
3155 drbd_uuid_new_current(device
);
3158 /* This is effectively a multi-stage "forced down".
3159 * The NEW_CUR_UUID bit is supposedly only set, if we
3160 * lost the replication connection, and are configured
3161 * to freeze IO and wait for some fence-peer handler.
3162 * So we still don't have a replication connection.
3163 * And now we don't have a local disk either. After
3164 * resume, we will fail all pending and new IO, because
3165 * we don't have any data anymore. Which means we will
3166 * eventually be able to terminate all users of this
3167 * device, and then take it down. By bumping the
3168 * "effective" data uuid, we make sure that you really
3169 * need to tear down before you reconfigure, we will
3170 * the refuse to re-connect or re-attach (because no
3171 * matching real data uuid exists).
3174 get_random_bytes(&val
, sizeof(u64
));
3175 drbd_set_ed_uuid(device
, val
);
3176 drbd_warn(device
, "Resumed without access to data; please tear down before attempting to re-configure.\n");
3178 clear_bit(NEW_CUR_UUID
, &device
->flags
);
3180 drbd_suspend_io(device
);
3181 retcode
= drbd_request_state(device
, NS3(susp
, 0, susp_nod
, 0, susp_fen
, 0));
3182 if (retcode
== SS_SUCCESS
) {
3183 if (device
->state
.conn
< C_CONNECTED
)
3184 tl_clear(first_peer_device(device
)->connection
);
3185 if (device
->state
.disk
== D_DISKLESS
|| device
->state
.disk
== D_FAILED
)
3186 tl_restart(first_peer_device(device
)->connection
, FAIL_FROZEN_DISK_IO
);
3188 drbd_resume_io(device
);
3189 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
3191 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3195 int drbd_adm_outdate(struct sk_buff
*skb
, struct genl_info
*info
)
3197 return drbd_adm_simple_request_state(skb
, info
, NS(disk
, D_OUTDATED
));
3200 static int nla_put_drbd_cfg_context(struct sk_buff
*skb
,
3201 struct drbd_resource
*resource
,
3202 struct drbd_connection
*connection
,
3203 struct drbd_device
*device
)
3206 nla
= nla_nest_start(skb
, DRBD_NLA_CFG_CONTEXT
);
3208 goto nla_put_failure
;
3210 nla_put_u32(skb
, T_ctx_volume
, device
->vnr
))
3211 goto nla_put_failure
;
3212 if (nla_put_string(skb
, T_ctx_resource_name
, resource
->name
))
3213 goto nla_put_failure
;
3215 if (connection
->my_addr_len
&&
3216 nla_put(skb
, T_ctx_my_addr
, connection
->my_addr_len
, &connection
->my_addr
))
3217 goto nla_put_failure
;
3218 if (connection
->peer_addr_len
&&
3219 nla_put(skb
, T_ctx_peer_addr
, connection
->peer_addr_len
, &connection
->peer_addr
))
3220 goto nla_put_failure
;
3222 nla_nest_end(skb
, nla
);
3227 nla_nest_cancel(skb
, nla
);
3232 * The generic netlink dump callbacks are called outside the genl_lock(), so
3233 * they cannot use the simple attribute parsing code which uses global
3236 static struct nlattr
*find_cfg_context_attr(const struct nlmsghdr
*nlh
, int attr
)
3238 const unsigned hdrlen
= GENL_HDRLEN
+ GENL_MAGIC_FAMILY_HDRSZ
;
3239 const int maxtype
= ARRAY_SIZE(drbd_cfg_context_nl_policy
) - 1;
3242 nla
= nla_find(nlmsg_attrdata(nlh
, hdrlen
), nlmsg_attrlen(nlh
, hdrlen
),
3243 DRBD_NLA_CFG_CONTEXT
);
3246 return drbd_nla_find_nested(maxtype
, nla
, __nla_type(attr
));
3249 static void resource_to_info(struct resource_info
*, struct drbd_resource
*);
3251 int drbd_adm_dump_resources(struct sk_buff
*skb
, struct netlink_callback
*cb
)
3253 struct drbd_genlmsghdr
*dh
;
3254 struct drbd_resource
*resource
;
3255 struct resource_info resource_info
;
3256 struct resource_statistics resource_statistics
;
3261 for_each_resource_rcu(resource
, &drbd_resources
)
3262 if (resource
== (struct drbd_resource
*)cb
->args
[0])
3263 goto found_resource
;
3264 err
= 0; /* resource was probably deleted */
3267 resource
= list_entry(&drbd_resources
,
3268 struct drbd_resource
, resources
);
3271 list_for_each_entry_continue_rcu(resource
, &drbd_resources
, resources
) {
3278 dh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
3279 cb
->nlh
->nlmsg_seq
, &drbd_genl_family
,
3280 NLM_F_MULTI
, DRBD_ADM_GET_RESOURCES
);
3285 dh
->ret_code
= NO_ERROR
;
3286 err
= nla_put_drbd_cfg_context(skb
, resource
, NULL
, NULL
);
3289 err
= res_opts_to_skb(skb
, &resource
->res_opts
, !capable(CAP_SYS_ADMIN
));
3292 resource_to_info(&resource_info
, resource
);
3293 err
= resource_info_to_skb(skb
, &resource_info
, !capable(CAP_SYS_ADMIN
));
3296 resource_statistics
.res_stat_write_ordering
= resource
->write_ordering
;
3297 err
= resource_statistics_to_skb(skb
, &resource_statistics
, !capable(CAP_SYS_ADMIN
));
3300 cb
->args
[0] = (long)resource
;
3301 genlmsg_end(skb
, dh
);
3311 static void device_to_statistics(struct device_statistics
*s
,
3312 struct drbd_device
*device
)
3314 memset(s
, 0, sizeof(*s
));
3315 s
->dev_upper_blocked
= !may_inc_ap_bio(device
);
3316 if (get_ldev(device
)) {
3317 struct drbd_md
*md
= &device
->ldev
->md
;
3318 u64
*history_uuids
= (u64
*)s
->history_uuids
;
3319 struct request_queue
*q
;
3322 spin_lock_irq(&md
->uuid_lock
);
3323 s
->dev_current_uuid
= md
->uuid
[UI_CURRENT
];
3324 BUILD_BUG_ON(sizeof(s
->history_uuids
) < UI_HISTORY_END
- UI_HISTORY_START
+ 1);
3325 for (n
= 0; n
< UI_HISTORY_END
- UI_HISTORY_START
+ 1; n
++)
3326 history_uuids
[n
] = md
->uuid
[UI_HISTORY_START
+ n
];
3327 for (; n
< HISTORY_UUIDS
; n
++)
3328 history_uuids
[n
] = 0;
3329 s
->history_uuids_len
= HISTORY_UUIDS
;
3330 spin_unlock_irq(&md
->uuid_lock
);
3332 s
->dev_disk_flags
= md
->flags
;
3333 q
= bdev_get_queue(device
->ldev
->backing_bdev
);
3334 s
->dev_lower_blocked
=
3335 bdi_congested(q
->backing_dev_info
,
3336 (1 << WB_async_congested
) |
3337 (1 << WB_sync_congested
));
3340 s
->dev_size
= drbd_get_capacity(device
->this_bdev
);
3341 s
->dev_read
= device
->read_cnt
;
3342 s
->dev_write
= device
->writ_cnt
;
3343 s
->dev_al_writes
= device
->al_writ_cnt
;
3344 s
->dev_bm_writes
= device
->bm_writ_cnt
;
3345 s
->dev_upper_pending
= atomic_read(&device
->ap_bio_cnt
);
3346 s
->dev_lower_pending
= atomic_read(&device
->local_cnt
);
3347 s
->dev_al_suspended
= test_bit(AL_SUSPENDED
, &device
->flags
);
3348 s
->dev_exposed_data_uuid
= device
->ed_uuid
;
3351 static int put_resource_in_arg0(struct netlink_callback
*cb
, int holder_nr
)
3354 struct drbd_resource
*resource
=
3355 (struct drbd_resource
*)cb
->args
[0];
3356 kref_put(&resource
->kref
, drbd_destroy_resource
);
3362 int drbd_adm_dump_devices_done(struct netlink_callback
*cb
) {
3363 return put_resource_in_arg0(cb
, 7);
3366 static void device_to_info(struct device_info
*, struct drbd_device
*);
3368 int drbd_adm_dump_devices(struct sk_buff
*skb
, struct netlink_callback
*cb
)
3370 struct nlattr
*resource_filter
;
3371 struct drbd_resource
*resource
;
3372 struct drbd_device
*uninitialized_var(device
);
3373 int minor
, err
, retcode
;
3374 struct drbd_genlmsghdr
*dh
;
3375 struct device_info device_info
;
3376 struct device_statistics device_statistics
;
3377 struct idr
*idr_to_search
;
3379 resource
= (struct drbd_resource
*)cb
->args
[0];
3380 if (!cb
->args
[0] && !cb
->args
[1]) {
3381 resource_filter
= find_cfg_context_attr(cb
->nlh
, T_ctx_resource_name
);
3382 if (resource_filter
) {
3383 retcode
= ERR_RES_NOT_KNOWN
;
3384 resource
= drbd_find_resource(nla_data(resource_filter
));
3387 cb
->args
[0] = (long)resource
;
3392 minor
= cb
->args
[1];
3393 idr_to_search
= resource
? &resource
->devices
: &drbd_devices
;
3394 device
= idr_get_next(idr_to_search
, &minor
);
3399 idr_for_each_entry_continue(idr_to_search
, device
, minor
) {
3401 goto put_result
; /* only one iteration */
3404 goto out
; /* no more devices */
3407 dh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
3408 cb
->nlh
->nlmsg_seq
, &drbd_genl_family
,
3409 NLM_F_MULTI
, DRBD_ADM_GET_DEVICES
);
3413 dh
->ret_code
= retcode
;
3415 if (retcode
== NO_ERROR
) {
3416 dh
->minor
= device
->minor
;
3417 err
= nla_put_drbd_cfg_context(skb
, device
->resource
, NULL
, device
);
3420 if (get_ldev(device
)) {
3421 struct disk_conf
*disk_conf
=
3422 rcu_dereference(device
->ldev
->disk_conf
);
3424 err
= disk_conf_to_skb(skb
, disk_conf
, !capable(CAP_SYS_ADMIN
));
3429 device_to_info(&device_info
, device
);
3430 err
= device_info_to_skb(skb
, &device_info
, !capable(CAP_SYS_ADMIN
));
3434 device_to_statistics(&device_statistics
, device
);
3435 err
= device_statistics_to_skb(skb
, &device_statistics
, !capable(CAP_SYS_ADMIN
));
3438 cb
->args
[1] = minor
+ 1;
3440 genlmsg_end(skb
, dh
);
3450 int drbd_adm_dump_connections_done(struct netlink_callback
*cb
)
3452 return put_resource_in_arg0(cb
, 6);
3455 enum { SINGLE_RESOURCE
, ITERATE_RESOURCES
};
3457 int drbd_adm_dump_connections(struct sk_buff
*skb
, struct netlink_callback
*cb
)
3459 struct nlattr
*resource_filter
;
3460 struct drbd_resource
*resource
= NULL
, *next_resource
;
3461 struct drbd_connection
*uninitialized_var(connection
);
3462 int err
= 0, retcode
;
3463 struct drbd_genlmsghdr
*dh
;
3464 struct connection_info connection_info
;
3465 struct connection_statistics connection_statistics
;
3468 resource
= (struct drbd_resource
*)cb
->args
[0];
3470 resource_filter
= find_cfg_context_attr(cb
->nlh
, T_ctx_resource_name
);
3471 if (resource_filter
) {
3472 retcode
= ERR_RES_NOT_KNOWN
;
3473 resource
= drbd_find_resource(nla_data(resource_filter
));
3476 cb
->args
[0] = (long)resource
;
3477 cb
->args
[1] = SINGLE_RESOURCE
;
3481 if (list_empty(&drbd_resources
))
3483 resource
= list_first_entry(&drbd_resources
, struct drbd_resource
, resources
);
3484 kref_get(&resource
->kref
);
3485 cb
->args
[0] = (long)resource
;
3486 cb
->args
[1] = ITERATE_RESOURCES
;
3491 mutex_lock(&resource
->conf_update
);
3494 for_each_connection_rcu(connection
, resource
)
3495 if (connection
== (struct drbd_connection
*)cb
->args
[2])
3496 goto found_connection
;
3497 /* connection was probably deleted */
3498 goto no_more_connections
;
3500 connection
= list_entry(&resource
->connections
, struct drbd_connection
, connections
);
3503 list_for_each_entry_continue_rcu(connection
, &resource
->connections
, connections
) {
3504 if (!has_net_conf(connection
))
3507 goto put_result
; /* only one iteration */
3510 no_more_connections
:
3511 if (cb
->args
[1] == ITERATE_RESOURCES
) {
3512 for_each_resource_rcu(next_resource
, &drbd_resources
) {
3513 if (next_resource
== resource
)
3514 goto found_resource
;
3516 /* resource was probably deleted */
3521 list_for_each_entry_continue_rcu(next_resource
, &drbd_resources
, resources
) {
3522 mutex_unlock(&resource
->conf_update
);
3523 kref_put(&resource
->kref
, drbd_destroy_resource
);
3524 resource
= next_resource
;
3525 kref_get(&resource
->kref
);
3526 cb
->args
[0] = (long)resource
;
3530 goto out
; /* no more resources */
3533 dh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
3534 cb
->nlh
->nlmsg_seq
, &drbd_genl_family
,
3535 NLM_F_MULTI
, DRBD_ADM_GET_CONNECTIONS
);
3539 dh
->ret_code
= retcode
;
3541 if (retcode
== NO_ERROR
) {
3542 struct net_conf
*net_conf
;
3544 err
= nla_put_drbd_cfg_context(skb
, resource
, connection
, NULL
);
3547 net_conf
= rcu_dereference(connection
->net_conf
);
3549 err
= net_conf_to_skb(skb
, net_conf
, !capable(CAP_SYS_ADMIN
));
3553 connection_to_info(&connection_info
, connection
);
3554 err
= connection_info_to_skb(skb
, &connection_info
, !capable(CAP_SYS_ADMIN
));
3557 connection_statistics
.conn_congested
= test_bit(NET_CONGESTED
, &connection
->flags
);
3558 err
= connection_statistics_to_skb(skb
, &connection_statistics
, !capable(CAP_SYS_ADMIN
));
3561 cb
->args
[2] = (long)connection
;
3563 genlmsg_end(skb
, dh
);
3569 mutex_unlock(&resource
->conf_update
);
3575 enum mdf_peer_flag
{
3576 MDF_PEER_CONNECTED
= 1 << 0,
3577 MDF_PEER_OUTDATED
= 1 << 1,
3578 MDF_PEER_FENCING
= 1 << 2,
3579 MDF_PEER_FULL_SYNC
= 1 << 3,
3582 static void peer_device_to_statistics(struct peer_device_statistics
*s
,
3583 struct drbd_peer_device
*peer_device
)
3585 struct drbd_device
*device
= peer_device
->device
;
3587 memset(s
, 0, sizeof(*s
));
3588 s
->peer_dev_received
= device
->recv_cnt
;
3589 s
->peer_dev_sent
= device
->send_cnt
;
3590 s
->peer_dev_pending
= atomic_read(&device
->ap_pending_cnt
) +
3591 atomic_read(&device
->rs_pending_cnt
);
3592 s
->peer_dev_unacked
= atomic_read(&device
->unacked_cnt
);
3593 s
->peer_dev_out_of_sync
= drbd_bm_total_weight(device
) << (BM_BLOCK_SHIFT
- 9);
3594 s
->peer_dev_resync_failed
= device
->rs_failed
<< (BM_BLOCK_SHIFT
- 9);
3595 if (get_ldev(device
)) {
3596 struct drbd_md
*md
= &device
->ldev
->md
;
3598 spin_lock_irq(&md
->uuid_lock
);
3599 s
->peer_dev_bitmap_uuid
= md
->uuid
[UI_BITMAP
];
3600 spin_unlock_irq(&md
->uuid_lock
);
3602 (drbd_md_test_flag(device
->ldev
, MDF_CONNECTED_IND
) ?
3603 MDF_PEER_CONNECTED
: 0) +
3604 (drbd_md_test_flag(device
->ldev
, MDF_CONSISTENT
) &&
3605 !drbd_md_test_flag(device
->ldev
, MDF_WAS_UP_TO_DATE
) ?
3606 MDF_PEER_OUTDATED
: 0) +
3607 /* FIXME: MDF_PEER_FENCING? */
3608 (drbd_md_test_flag(device
->ldev
, MDF_FULL_SYNC
) ?
3609 MDF_PEER_FULL_SYNC
: 0);
3614 int drbd_adm_dump_peer_devices_done(struct netlink_callback
*cb
)
3616 return put_resource_in_arg0(cb
, 9);
3619 int drbd_adm_dump_peer_devices(struct sk_buff
*skb
, struct netlink_callback
*cb
)
3621 struct nlattr
*resource_filter
;
3622 struct drbd_resource
*resource
;
3623 struct drbd_device
*uninitialized_var(device
);
3624 struct drbd_peer_device
*peer_device
= NULL
;
3625 int minor
, err
, retcode
;
3626 struct drbd_genlmsghdr
*dh
;
3627 struct idr
*idr_to_search
;
3629 resource
= (struct drbd_resource
*)cb
->args
[0];
3630 if (!cb
->args
[0] && !cb
->args
[1]) {
3631 resource_filter
= find_cfg_context_attr(cb
->nlh
, T_ctx_resource_name
);
3632 if (resource_filter
) {
3633 retcode
= ERR_RES_NOT_KNOWN
;
3634 resource
= drbd_find_resource(nla_data(resource_filter
));
3638 cb
->args
[0] = (long)resource
;
3642 minor
= cb
->args
[1];
3643 idr_to_search
= resource
? &resource
->devices
: &drbd_devices
;
3644 device
= idr_find(idr_to_search
, minor
);
3649 device
= idr_get_next(idr_to_search
, &minor
);
3656 for_each_peer_device(peer_device
, device
)
3657 if (peer_device
== (struct drbd_peer_device
*)cb
->args
[2])
3658 goto found_peer_device
;
3659 /* peer device was probably deleted */
3662 /* Make peer_device point to the list head (not the first entry). */
3663 peer_device
= list_entry(&device
->peer_devices
, struct drbd_peer_device
, peer_devices
);
3666 list_for_each_entry_continue_rcu(peer_device
, &device
->peer_devices
, peer_devices
) {
3667 if (!has_net_conf(peer_device
->connection
))
3670 goto put_result
; /* only one iteration */
3675 dh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
3676 cb
->nlh
->nlmsg_seq
, &drbd_genl_family
,
3677 NLM_F_MULTI
, DRBD_ADM_GET_PEER_DEVICES
);
3681 dh
->ret_code
= retcode
;
3683 if (retcode
== NO_ERROR
) {
3684 struct peer_device_info peer_device_info
;
3685 struct peer_device_statistics peer_device_statistics
;
3688 err
= nla_put_drbd_cfg_context(skb
, device
->resource
, peer_device
->connection
, device
);
3691 peer_device_to_info(&peer_device_info
, peer_device
);
3692 err
= peer_device_info_to_skb(skb
, &peer_device_info
, !capable(CAP_SYS_ADMIN
));
3695 peer_device_to_statistics(&peer_device_statistics
, peer_device
);
3696 err
= peer_device_statistics_to_skb(skb
, &peer_device_statistics
, !capable(CAP_SYS_ADMIN
));
3699 cb
->args
[1] = minor
;
3700 cb
->args
[2] = (long)peer_device
;
3702 genlmsg_end(skb
, dh
);
3712 * Return the connection of @resource if @resource has exactly one connection.
3714 static struct drbd_connection
*the_only_connection(struct drbd_resource
*resource
)
3716 struct list_head
*connections
= &resource
->connections
;
3718 if (list_empty(connections
) || connections
->next
->next
!= connections
)
3720 return list_first_entry(&resource
->connections
, struct drbd_connection
, connections
);
3723 static int nla_put_status_info(struct sk_buff
*skb
, struct drbd_device
*device
,
3724 const struct sib_info
*sib
)
3726 struct drbd_resource
*resource
= device
->resource
;
3727 struct state_info
*si
= NULL
; /* for sizeof(si->member); */
3731 int exclude_sensitive
;
3733 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
3734 * to. So we better exclude_sensitive information.
3736 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
3737 * in the context of the requesting user process. Exclude sensitive
3738 * information, unless current has superuser.
3740 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
3741 * relies on the current implementation of netlink_dump(), which
3742 * executes the dump callback successively from netlink_recvmsg(),
3743 * always in the context of the receiving process */
3744 exclude_sensitive
= sib
|| !capable(CAP_SYS_ADMIN
);
3746 got_ldev
= get_ldev(device
);
3748 /* We need to add connection name and volume number information still.
3749 * Minor number is in drbd_genlmsghdr. */
3750 if (nla_put_drbd_cfg_context(skb
, resource
, the_only_connection(resource
), device
))
3751 goto nla_put_failure
;
3753 if (res_opts_to_skb(skb
, &device
->resource
->res_opts
, exclude_sensitive
))
3754 goto nla_put_failure
;
3758 struct disk_conf
*disk_conf
;
3760 disk_conf
= rcu_dereference(device
->ldev
->disk_conf
);
3761 err
= disk_conf_to_skb(skb
, disk_conf
, exclude_sensitive
);
3764 struct net_conf
*nc
;
3766 nc
= rcu_dereference(first_peer_device(device
)->connection
->net_conf
);
3768 err
= net_conf_to_skb(skb
, nc
, exclude_sensitive
);
3772 goto nla_put_failure
;
3774 nla
= nla_nest_start(skb
, DRBD_NLA_STATE_INFO
);
3776 goto nla_put_failure
;
3777 if (nla_put_u32(skb
, T_sib_reason
, sib
? sib
->sib_reason
: SIB_GET_STATUS_REPLY
) ||
3778 nla_put_u32(skb
, T_current_state
, device
->state
.i
) ||
3779 nla_put_u64_0pad(skb
, T_ed_uuid
, device
->ed_uuid
) ||
3780 nla_put_u64_0pad(skb
, T_capacity
,
3781 drbd_get_capacity(device
->this_bdev
)) ||
3782 nla_put_u64_0pad(skb
, T_send_cnt
, device
->send_cnt
) ||
3783 nla_put_u64_0pad(skb
, T_recv_cnt
, device
->recv_cnt
) ||
3784 nla_put_u64_0pad(skb
, T_read_cnt
, device
->read_cnt
) ||
3785 nla_put_u64_0pad(skb
, T_writ_cnt
, device
->writ_cnt
) ||
3786 nla_put_u64_0pad(skb
, T_al_writ_cnt
, device
->al_writ_cnt
) ||
3787 nla_put_u64_0pad(skb
, T_bm_writ_cnt
, device
->bm_writ_cnt
) ||
3788 nla_put_u32(skb
, T_ap_bio_cnt
, atomic_read(&device
->ap_bio_cnt
)) ||
3789 nla_put_u32(skb
, T_ap_pending_cnt
, atomic_read(&device
->ap_pending_cnt
)) ||
3790 nla_put_u32(skb
, T_rs_pending_cnt
, atomic_read(&device
->rs_pending_cnt
)))
3791 goto nla_put_failure
;
3796 spin_lock_irq(&device
->ldev
->md
.uuid_lock
);
3797 err
= nla_put(skb
, T_uuids
, sizeof(si
->uuids
), device
->ldev
->md
.uuid
);
3798 spin_unlock_irq(&device
->ldev
->md
.uuid_lock
);
3801 goto nla_put_failure
;
3803 if (nla_put_u32(skb
, T_disk_flags
, device
->ldev
->md
.flags
) ||
3804 nla_put_u64_0pad(skb
, T_bits_total
, drbd_bm_bits(device
)) ||
3805 nla_put_u64_0pad(skb
, T_bits_oos
,
3806 drbd_bm_total_weight(device
)))
3807 goto nla_put_failure
;
3808 if (C_SYNC_SOURCE
<= device
->state
.conn
&&
3809 C_PAUSED_SYNC_T
>= device
->state
.conn
) {
3810 if (nla_put_u64_0pad(skb
, T_bits_rs_total
,
3811 device
->rs_total
) ||
3812 nla_put_u64_0pad(skb
, T_bits_rs_failed
,
3814 goto nla_put_failure
;
3819 switch(sib
->sib_reason
) {
3820 case SIB_SYNC_PROGRESS
:
3821 case SIB_GET_STATUS_REPLY
:
3823 case SIB_STATE_CHANGE
:
3824 if (nla_put_u32(skb
, T_prev_state
, sib
->os
.i
) ||
3825 nla_put_u32(skb
, T_new_state
, sib
->ns
.i
))
3826 goto nla_put_failure
;
3828 case SIB_HELPER_POST
:
3829 if (nla_put_u32(skb
, T_helper_exit_code
,
3830 sib
->helper_exit_code
))
3831 goto nla_put_failure
;
3833 case SIB_HELPER_PRE
:
3834 if (nla_put_string(skb
, T_helper
, sib
->helper_name
))
3835 goto nla_put_failure
;
3839 nla_nest_end(skb
, nla
);
3849 int drbd_adm_get_status(struct sk_buff
*skb
, struct genl_info
*info
)
3851 struct drbd_config_context adm_ctx
;
3852 enum drbd_ret_code retcode
;
3855 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
3856 if (!adm_ctx
.reply_skb
)
3858 if (retcode
!= NO_ERROR
)
3861 err
= nla_put_status_info(adm_ctx
.reply_skb
, adm_ctx
.device
, NULL
);
3863 nlmsg_free(adm_ctx
.reply_skb
);
3867 drbd_adm_finish(&adm_ctx
, info
, retcode
);
3871 static int get_one_status(struct sk_buff
*skb
, struct netlink_callback
*cb
)
3873 struct drbd_device
*device
;
3874 struct drbd_genlmsghdr
*dh
;
3875 struct drbd_resource
*pos
= (struct drbd_resource
*)cb
->args
[0];
3876 struct drbd_resource
*resource
= NULL
;
3877 struct drbd_resource
*tmp
;
3878 unsigned volume
= cb
->args
[1];
3880 /* Open coded, deferred, iteration:
3881 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3882 * connection = "first connection of resource or undefined";
3883 * idr_for_each_entry(&resource->devices, device, i) {
3887 * where resource is cb->args[0];
3888 * and i is cb->args[1];
3890 * cb->args[2] indicates if we shall loop over all resources,
3891 * or just dump all volumes of a single resource.
3893 * This may miss entries inserted after this dump started,
3894 * or entries deleted before they are reached.
3896 * We need to make sure the device won't disappear while
3897 * we are looking at it, and revalidate our iterators
3898 * on each iteration.
3901 /* synchronize with conn_create()/drbd_destroy_connection() */
3903 /* revalidate iterator position */
3904 for_each_resource_rcu(tmp
, &drbd_resources
) {
3906 /* first iteration */
3918 device
= idr_get_next(&resource
->devices
, &volume
);
3920 /* No more volumes to dump on this resource.
3921 * Advance resource iterator. */
3922 pos
= list_entry_rcu(resource
->resources
.next
,
3923 struct drbd_resource
, resources
);
3924 /* Did we dump any volume of this resource yet? */
3926 /* If we reached the end of the list,
3927 * or only a single resource dump was requested,
3929 if (&pos
->resources
== &drbd_resources
|| cb
->args
[2])
3937 dh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
3938 cb
->nlh
->nlmsg_seq
, &drbd_genl_family
,
3939 NLM_F_MULTI
, DRBD_ADM_GET_STATUS
);
3944 /* This is a connection without a single volume.
3945 * Suprisingly enough, it may have a network
3947 struct drbd_connection
*connection
;
3950 dh
->ret_code
= NO_ERROR
;
3951 connection
= the_only_connection(resource
);
3952 if (nla_put_drbd_cfg_context(skb
, resource
, connection
, NULL
))
3955 struct net_conf
*nc
;
3957 nc
= rcu_dereference(connection
->net_conf
);
3958 if (nc
&& net_conf_to_skb(skb
, nc
, 1) != 0)
3964 D_ASSERT(device
, device
->vnr
== volume
);
3965 D_ASSERT(device
, device
->resource
== resource
);
3967 dh
->minor
= device_to_minor(device
);
3968 dh
->ret_code
= NO_ERROR
;
3970 if (nla_put_status_info(skb
, device
, NULL
)) {
3972 genlmsg_cancel(skb
, dh
);
3976 genlmsg_end(skb
, dh
);
3981 /* where to start the next iteration */
3982 cb
->args
[0] = (long)pos
;
3983 cb
->args
[1] = (pos
== resource
) ? volume
+ 1 : 0;
3985 /* No more resources/volumes/minors found results in an empty skb.
3986 * Which will terminate the dump. */
3991 * Request status of all resources, or of all volumes within a single resource.
3993 * This is a dump, as the answer may not fit in a single reply skb otherwise.
3994 * Which means we cannot use the family->attrbuf or other such members, because
3995 * dump is NOT protected by the genl_lock(). During dump, we only have access
3996 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3998 * Once things are setup properly, we call into get_one_status().
4000 int drbd_adm_get_status_all(struct sk_buff
*skb
, struct netlink_callback
*cb
)
4002 const unsigned hdrlen
= GENL_HDRLEN
+ GENL_MAGIC_FAMILY_HDRSZ
;
4004 const char *resource_name
;
4005 struct drbd_resource
*resource
;
4008 /* Is this a followup call? */
4010 /* ... of a single resource dump,
4011 * and the resource iterator has been advanced already? */
4012 if (cb
->args
[2] && cb
->args
[2] != cb
->args
[0])
4013 return 0; /* DONE. */
4017 /* First call (from netlink_dump_start). We need to figure out
4018 * which resource(s) the user wants us to dump. */
4019 nla
= nla_find(nlmsg_attrdata(cb
->nlh
, hdrlen
),
4020 nlmsg_attrlen(cb
->nlh
, hdrlen
),
4021 DRBD_NLA_CFG_CONTEXT
);
4023 /* No explicit context given. Dump all. */
4026 maxtype
= ARRAY_SIZE(drbd_cfg_context_nl_policy
) - 1;
4027 nla
= drbd_nla_find_nested(maxtype
, nla
, __nla_type(T_ctx_resource_name
));
4029 return PTR_ERR(nla
);
4030 /* context given, but no name present? */
4033 resource_name
= nla_data(nla
);
4034 if (!*resource_name
)
4036 resource
= drbd_find_resource(resource_name
);
4040 kref_put(&resource
->kref
, drbd_destroy_resource
); /* get_one_status() revalidates the resource */
4042 /* prime iterators, and set "filter" mode mark:
4043 * only dump this connection. */
4044 cb
->args
[0] = (long)resource
;
4045 /* cb->args[1] = 0; passed in this way. */
4046 cb
->args
[2] = (long)resource
;
4049 return get_one_status(skb
, cb
);
4052 int drbd_adm_get_timeout_type(struct sk_buff
*skb
, struct genl_info
*info
)
4054 struct drbd_config_context adm_ctx
;
4055 enum drbd_ret_code retcode
;
4056 struct timeout_parms tp
;
4059 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
4060 if (!adm_ctx
.reply_skb
)
4062 if (retcode
!= NO_ERROR
)
4066 adm_ctx
.device
->state
.pdsk
== D_OUTDATED
? UT_PEER_OUTDATED
:
4067 test_bit(USE_DEGR_WFC_T
, &adm_ctx
.device
->flags
) ? UT_DEGRADED
:
4070 err
= timeout_parms_to_priv_skb(adm_ctx
.reply_skb
, &tp
);
4072 nlmsg_free(adm_ctx
.reply_skb
);
4076 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4080 int drbd_adm_start_ov(struct sk_buff
*skb
, struct genl_info
*info
)
4082 struct drbd_config_context adm_ctx
;
4083 struct drbd_device
*device
;
4084 enum drbd_ret_code retcode
;
4085 struct start_ov_parms parms
;
4087 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
4088 if (!adm_ctx
.reply_skb
)
4090 if (retcode
!= NO_ERROR
)
4093 device
= adm_ctx
.device
;
4095 /* resume from last known position, if possible */
4096 parms
.ov_start_sector
= device
->ov_start_sector
;
4097 parms
.ov_stop_sector
= ULLONG_MAX
;
4098 if (info
->attrs
[DRBD_NLA_START_OV_PARMS
]) {
4099 int err
= start_ov_parms_from_attrs(&parms
, info
);
4101 retcode
= ERR_MANDATORY_TAG
;
4102 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
4106 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
4108 /* w_make_ov_request expects position to be aligned */
4109 device
->ov_start_sector
= parms
.ov_start_sector
& ~(BM_SECT_PER_BIT
-1);
4110 device
->ov_stop_sector
= parms
.ov_stop_sector
;
4112 /* If there is still bitmap IO pending, e.g. previous resync or verify
4113 * just being finished, wait for it before requesting a new resync. */
4114 drbd_suspend_io(device
);
4115 wait_event(device
->misc_wait
, !test_bit(BITMAP_IO
, &device
->flags
));
4116 retcode
= drbd_request_state(device
, NS(conn
, C_VERIFY_S
));
4117 drbd_resume_io(device
);
4119 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
4121 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4126 int drbd_adm_new_c_uuid(struct sk_buff
*skb
, struct genl_info
*info
)
4128 struct drbd_config_context adm_ctx
;
4129 struct drbd_device
*device
;
4130 enum drbd_ret_code retcode
;
4131 int skip_initial_sync
= 0;
4133 struct new_c_uuid_parms args
;
4135 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
4136 if (!adm_ctx
.reply_skb
)
4138 if (retcode
!= NO_ERROR
)
4141 device
= adm_ctx
.device
;
4142 memset(&args
, 0, sizeof(args
));
4143 if (info
->attrs
[DRBD_NLA_NEW_C_UUID_PARMS
]) {
4144 err
= new_c_uuid_parms_from_attrs(&args
, info
);
4146 retcode
= ERR_MANDATORY_TAG
;
4147 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
4152 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
4153 mutex_lock(device
->state_mutex
); /* Protects us against serialized state changes. */
4155 if (!get_ldev(device
)) {
4156 retcode
= ERR_NO_DISK
;
4160 /* this is "skip initial sync", assume to be clean */
4161 if (device
->state
.conn
== C_CONNECTED
&&
4162 first_peer_device(device
)->connection
->agreed_pro_version
>= 90 &&
4163 device
->ldev
->md
.uuid
[UI_CURRENT
] == UUID_JUST_CREATED
&& args
.clear_bm
) {
4164 drbd_info(device
, "Preparing to skip initial sync\n");
4165 skip_initial_sync
= 1;
4166 } else if (device
->state
.conn
!= C_STANDALONE
) {
4167 retcode
= ERR_CONNECTED
;
4171 drbd_uuid_set(device
, UI_BITMAP
, 0); /* Rotate UI_BITMAP to History 1, etc... */
4172 drbd_uuid_new_current(device
); /* New current, previous to UI_BITMAP */
4174 if (args
.clear_bm
) {
4175 err
= drbd_bitmap_io(device
, &drbd_bmio_clear_n_write
,
4176 "clear_n_write from new_c_uuid", BM_LOCKED_MASK
);
4178 drbd_err(device
, "Writing bitmap failed with %d\n", err
);
4179 retcode
= ERR_IO_MD_DISK
;
4181 if (skip_initial_sync
) {
4182 drbd_send_uuids_skip_initial_sync(first_peer_device(device
));
4183 _drbd_uuid_set(device
, UI_BITMAP
, 0);
4184 drbd_print_uuids(device
, "cleared bitmap UUID");
4185 spin_lock_irq(&device
->resource
->req_lock
);
4186 _drbd_set_state(_NS2(device
, disk
, D_UP_TO_DATE
, pdsk
, D_UP_TO_DATE
),
4188 spin_unlock_irq(&device
->resource
->req_lock
);
4192 drbd_md_sync(device
);
4196 mutex_unlock(device
->state_mutex
);
4197 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
4199 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4203 static enum drbd_ret_code
4204 drbd_check_resource_name(struct drbd_config_context
*adm_ctx
)
4206 const char *name
= adm_ctx
->resource_name
;
4207 if (!name
|| !name
[0]) {
4208 drbd_msg_put_info(adm_ctx
->reply_skb
, "resource name missing");
4209 return ERR_MANDATORY_TAG
;
4211 /* if we want to use these in sysfs/configfs/debugfs some day,
4212 * we must not allow slashes */
4213 if (strchr(name
, '/')) {
4214 drbd_msg_put_info(adm_ctx
->reply_skb
, "invalid resource name");
4215 return ERR_INVALID_REQUEST
;
4220 static void resource_to_info(struct resource_info
*info
,
4221 struct drbd_resource
*resource
)
4223 info
->res_role
= conn_highest_role(first_connection(resource
));
4224 info
->res_susp
= resource
->susp
;
4225 info
->res_susp_nod
= resource
->susp_nod
;
4226 info
->res_susp_fen
= resource
->susp_fen
;
4229 int drbd_adm_new_resource(struct sk_buff
*skb
, struct genl_info
*info
)
4231 struct drbd_connection
*connection
;
4232 struct drbd_config_context adm_ctx
;
4233 enum drbd_ret_code retcode
;
4234 struct res_opts res_opts
;
4237 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, 0);
4238 if (!adm_ctx
.reply_skb
)
4240 if (retcode
!= NO_ERROR
)
4243 set_res_opts_defaults(&res_opts
);
4244 err
= res_opts_from_attrs(&res_opts
, info
);
4245 if (err
&& err
!= -ENOMSG
) {
4246 retcode
= ERR_MANDATORY_TAG
;
4247 drbd_msg_put_info(adm_ctx
.reply_skb
, from_attrs_err_to_txt(err
));
4251 retcode
= drbd_check_resource_name(&adm_ctx
);
4252 if (retcode
!= NO_ERROR
)
4255 if (adm_ctx
.resource
) {
4256 if (info
->nlhdr
->nlmsg_flags
& NLM_F_EXCL
) {
4257 retcode
= ERR_INVALID_REQUEST
;
4258 drbd_msg_put_info(adm_ctx
.reply_skb
, "resource exists");
4260 /* else: still NO_ERROR */
4264 /* not yet safe for genl_family.parallel_ops */
4265 mutex_lock(&resources_mutex
);
4266 connection
= conn_create(adm_ctx
.resource_name
, &res_opts
);
4267 mutex_unlock(&resources_mutex
);
4270 struct resource_info resource_info
;
4272 mutex_lock(¬ification_mutex
);
4273 resource_to_info(&resource_info
, connection
->resource
);
4274 notify_resource_state(NULL
, 0, connection
->resource
,
4275 &resource_info
, NOTIFY_CREATE
);
4276 mutex_unlock(¬ification_mutex
);
4278 retcode
= ERR_NOMEM
;
4281 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4285 static void device_to_info(struct device_info
*info
,
4286 struct drbd_device
*device
)
4288 info
->dev_disk_state
= device
->state
.disk
;
4292 int drbd_adm_new_minor(struct sk_buff
*skb
, struct genl_info
*info
)
4294 struct drbd_config_context adm_ctx
;
4295 struct drbd_genlmsghdr
*dh
= info
->userhdr
;
4296 enum drbd_ret_code retcode
;
4298 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_RESOURCE
);
4299 if (!adm_ctx
.reply_skb
)
4301 if (retcode
!= NO_ERROR
)
4304 if (dh
->minor
> MINORMASK
) {
4305 drbd_msg_put_info(adm_ctx
.reply_skb
, "requested minor out of range");
4306 retcode
= ERR_INVALID_REQUEST
;
4309 if (adm_ctx
.volume
> DRBD_VOLUME_MAX
) {
4310 drbd_msg_put_info(adm_ctx
.reply_skb
, "requested volume id out of range");
4311 retcode
= ERR_INVALID_REQUEST
;
4315 /* drbd_adm_prepare made sure already
4316 * that first_peer_device(device)->connection and device->vnr match the request. */
4317 if (adm_ctx
.device
) {
4318 if (info
->nlhdr
->nlmsg_flags
& NLM_F_EXCL
)
4319 retcode
= ERR_MINOR_OR_VOLUME_EXISTS
;
4320 /* else: still NO_ERROR */
4324 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
4325 retcode
= drbd_create_device(&adm_ctx
, dh
->minor
);
4326 if (retcode
== NO_ERROR
) {
4327 struct drbd_device
*device
;
4328 struct drbd_peer_device
*peer_device
;
4329 struct device_info info
;
4330 unsigned int peer_devices
= 0;
4331 enum drbd_notification_type flags
;
4333 device
= minor_to_device(dh
->minor
);
4334 for_each_peer_device(peer_device
, device
) {
4335 if (!has_net_conf(peer_device
->connection
))
4340 device_to_info(&info
, device
);
4341 mutex_lock(¬ification_mutex
);
4342 flags
= (peer_devices
--) ? NOTIFY_CONTINUES
: 0;
4343 notify_device_state(NULL
, 0, device
, &info
, NOTIFY_CREATE
| flags
);
4344 for_each_peer_device(peer_device
, device
) {
4345 struct peer_device_info peer_device_info
;
4347 if (!has_net_conf(peer_device
->connection
))
4349 peer_device_to_info(&peer_device_info
, peer_device
);
4350 flags
= (peer_devices
--) ? NOTIFY_CONTINUES
: 0;
4351 notify_peer_device_state(NULL
, 0, peer_device
, &peer_device_info
,
4352 NOTIFY_CREATE
| flags
);
4354 mutex_unlock(¬ification_mutex
);
4356 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
4358 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4362 static enum drbd_ret_code
adm_del_minor(struct drbd_device
*device
)
4364 struct drbd_peer_device
*peer_device
;
4366 if (device
->state
.disk
== D_DISKLESS
&&
4367 /* no need to be device->state.conn == C_STANDALONE &&
4368 * we may want to delete a minor from a live replication group.
4370 device
->state
.role
== R_SECONDARY
) {
4371 struct drbd_connection
*connection
=
4372 first_connection(device
->resource
);
4374 _drbd_request_state(device
, NS(conn
, C_WF_REPORT_PARAMS
),
4375 CS_VERBOSE
+ CS_WAIT_COMPLETE
);
4377 /* If the state engine hasn't stopped the sender thread yet, we
4378 * need to flush the sender work queue before generating the
4379 * DESTROY events here. */
4380 if (get_t_state(&connection
->worker
) == RUNNING
)
4381 drbd_flush_workqueue(&connection
->sender_work
);
4383 mutex_lock(¬ification_mutex
);
4384 for_each_peer_device(peer_device
, device
) {
4385 if (!has_net_conf(peer_device
->connection
))
4387 notify_peer_device_state(NULL
, 0, peer_device
, NULL
,
4388 NOTIFY_DESTROY
| NOTIFY_CONTINUES
);
4390 notify_device_state(NULL
, 0, device
, NULL
, NOTIFY_DESTROY
);
4391 mutex_unlock(¬ification_mutex
);
4393 drbd_delete_device(device
);
4396 return ERR_MINOR_CONFIGURED
;
4399 int drbd_adm_del_minor(struct sk_buff
*skb
, struct genl_info
*info
)
4401 struct drbd_config_context adm_ctx
;
4402 enum drbd_ret_code retcode
;
4404 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_MINOR
);
4405 if (!adm_ctx
.reply_skb
)
4407 if (retcode
!= NO_ERROR
)
4410 mutex_lock(&adm_ctx
.resource
->adm_mutex
);
4411 retcode
= adm_del_minor(adm_ctx
.device
);
4412 mutex_unlock(&adm_ctx
.resource
->adm_mutex
);
4414 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4418 static int adm_del_resource(struct drbd_resource
*resource
)
4420 struct drbd_connection
*connection
;
4422 for_each_connection(connection
, resource
) {
4423 if (connection
->cstate
> C_STANDALONE
)
4424 return ERR_NET_CONFIGURED
;
4426 if (!idr_is_empty(&resource
->devices
))
4427 return ERR_RES_IN_USE
;
4429 /* The state engine has stopped the sender thread, so we don't
4430 * need to flush the sender work queue before generating the
4431 * DESTROY event here. */
4432 mutex_lock(¬ification_mutex
);
4433 notify_resource_state(NULL
, 0, resource
, NULL
, NOTIFY_DESTROY
);
4434 mutex_unlock(¬ification_mutex
);
4436 mutex_lock(&resources_mutex
);
4437 list_del_rcu(&resource
->resources
);
4438 mutex_unlock(&resources_mutex
);
4439 /* Make sure all threads have actually stopped: state handling only
4440 * does drbd_thread_stop_nowait(). */
4441 list_for_each_entry(connection
, &resource
->connections
, connections
)
4442 drbd_thread_stop(&connection
->worker
);
4444 drbd_free_resource(resource
);
4448 int drbd_adm_down(struct sk_buff
*skb
, struct genl_info
*info
)
4450 struct drbd_config_context adm_ctx
;
4451 struct drbd_resource
*resource
;
4452 struct drbd_connection
*connection
;
4453 struct drbd_device
*device
;
4454 int retcode
; /* enum drbd_ret_code rsp. enum drbd_state_rv */
4457 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_RESOURCE
);
4458 if (!adm_ctx
.reply_skb
)
4460 if (retcode
!= NO_ERROR
)
4463 resource
= adm_ctx
.resource
;
4464 mutex_lock(&resource
->adm_mutex
);
4466 for_each_connection(connection
, resource
) {
4467 struct drbd_peer_device
*peer_device
;
4469 idr_for_each_entry(&connection
->peer_devices
, peer_device
, i
) {
4470 retcode
= drbd_set_role(peer_device
->device
, R_SECONDARY
, 0);
4471 if (retcode
< SS_SUCCESS
) {
4472 drbd_msg_put_info(adm_ctx
.reply_skb
, "failed to demote");
4477 retcode
= conn_try_disconnect(connection
, 0);
4478 if (retcode
< SS_SUCCESS
) {
4479 drbd_msg_put_info(adm_ctx
.reply_skb
, "failed to disconnect");
4485 idr_for_each_entry(&resource
->devices
, device
, i
) {
4486 retcode
= adm_detach(device
, 0);
4487 if (retcode
< SS_SUCCESS
|| retcode
> NO_ERROR
) {
4488 drbd_msg_put_info(adm_ctx
.reply_skb
, "failed to detach");
4493 /* delete volumes */
4494 idr_for_each_entry(&resource
->devices
, device
, i
) {
4495 retcode
= adm_del_minor(device
);
4496 if (retcode
!= NO_ERROR
) {
4497 /* "can not happen" */
4498 drbd_msg_put_info(adm_ctx
.reply_skb
, "failed to delete volume");
4503 retcode
= adm_del_resource(resource
);
4505 mutex_unlock(&resource
->adm_mutex
);
4507 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4511 int drbd_adm_del_resource(struct sk_buff
*skb
, struct genl_info
*info
)
4513 struct drbd_config_context adm_ctx
;
4514 struct drbd_resource
*resource
;
4515 enum drbd_ret_code retcode
;
4517 retcode
= drbd_adm_prepare(&adm_ctx
, skb
, info
, DRBD_ADM_NEED_RESOURCE
);
4518 if (!adm_ctx
.reply_skb
)
4520 if (retcode
!= NO_ERROR
)
4522 resource
= adm_ctx
.resource
;
4524 mutex_lock(&resource
->adm_mutex
);
4525 retcode
= adm_del_resource(resource
);
4526 mutex_unlock(&resource
->adm_mutex
);
4528 drbd_adm_finish(&adm_ctx
, info
, retcode
);
4532 void drbd_bcast_event(struct drbd_device
*device
, const struct sib_info
*sib
)
4534 struct sk_buff
*msg
;
4535 struct drbd_genlmsghdr
*d_out
;
4539 seq
= atomic_inc_return(&drbd_genl_seq
);
4540 msg
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4545 d_out
= genlmsg_put(msg
, 0, seq
, &drbd_genl_family
, 0, DRBD_EVENT
);
4546 if (!d_out
) /* cannot happen, but anyways. */
4547 goto nla_put_failure
;
4548 d_out
->minor
= device_to_minor(device
);
4549 d_out
->ret_code
= NO_ERROR
;
4551 if (nla_put_status_info(msg
, device
, sib
))
4552 goto nla_put_failure
;
4553 genlmsg_end(msg
, d_out
);
4554 err
= drbd_genl_multicast_events(msg
, GFP_NOWAIT
);
4555 /* msg has been consumed or freed in netlink_broadcast() */
4556 if (err
&& err
!= -ESRCH
)
4564 drbd_err(device
, "Error %d while broadcasting event. "
4565 "Event seq:%u sib_reason:%u\n",
4566 err
, seq
, sib
->sib_reason
);
4569 static int nla_put_notification_header(struct sk_buff
*msg
,
4570 enum drbd_notification_type type
)
4572 struct drbd_notification_header nh
= {
4576 return drbd_notification_header_to_skb(msg
, &nh
, true);
4579 void notify_resource_state(struct sk_buff
*skb
,
4581 struct drbd_resource
*resource
,
4582 struct resource_info
*resource_info
,
4583 enum drbd_notification_type type
)
4585 struct resource_statistics resource_statistics
;
4586 struct drbd_genlmsghdr
*dh
;
4587 bool multicast
= false;
4591 seq
= atomic_inc_return(¬ify_genl_seq
);
4592 skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4600 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_RESOURCE_STATE
);
4602 goto nla_put_failure
;
4604 dh
->ret_code
= NO_ERROR
;
4605 if (nla_put_drbd_cfg_context(skb
, resource
, NULL
, NULL
) ||
4606 nla_put_notification_header(skb
, type
) ||
4607 ((type
& ~NOTIFY_FLAGS
) != NOTIFY_DESTROY
&&
4608 resource_info_to_skb(skb
, resource_info
, true)))
4609 goto nla_put_failure
;
4610 resource_statistics
.res_stat_write_ordering
= resource
->write_ordering
;
4611 err
= resource_statistics_to_skb(skb
, &resource_statistics
, !capable(CAP_SYS_ADMIN
));
4613 goto nla_put_failure
;
4614 genlmsg_end(skb
, dh
);
4616 err
= drbd_genl_multicast_events(skb
, GFP_NOWAIT
);
4617 /* skb has been consumed or freed in netlink_broadcast() */
4618 if (err
&& err
!= -ESRCH
)
4626 drbd_err(resource
, "Error %d while broadcasting event. Event seq:%u\n",
4630 void notify_device_state(struct sk_buff
*skb
,
4632 struct drbd_device
*device
,
4633 struct device_info
*device_info
,
4634 enum drbd_notification_type type
)
4636 struct device_statistics device_statistics
;
4637 struct drbd_genlmsghdr
*dh
;
4638 bool multicast
= false;
4642 seq
= atomic_inc_return(¬ify_genl_seq
);
4643 skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4651 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_DEVICE_STATE
);
4653 goto nla_put_failure
;
4654 dh
->minor
= device
->minor
;
4655 dh
->ret_code
= NO_ERROR
;
4656 if (nla_put_drbd_cfg_context(skb
, device
->resource
, NULL
, device
) ||
4657 nla_put_notification_header(skb
, type
) ||
4658 ((type
& ~NOTIFY_FLAGS
) != NOTIFY_DESTROY
&&
4659 device_info_to_skb(skb
, device_info
, true)))
4660 goto nla_put_failure
;
4661 device_to_statistics(&device_statistics
, device
);
4662 device_statistics_to_skb(skb
, &device_statistics
, !capable(CAP_SYS_ADMIN
));
4663 genlmsg_end(skb
, dh
);
4665 err
= drbd_genl_multicast_events(skb
, GFP_NOWAIT
);
4666 /* skb has been consumed or freed in netlink_broadcast() */
4667 if (err
&& err
!= -ESRCH
)
4675 drbd_err(device
, "Error %d while broadcasting event. Event seq:%u\n",
4679 void notify_connection_state(struct sk_buff
*skb
,
4681 struct drbd_connection
*connection
,
4682 struct connection_info
*connection_info
,
4683 enum drbd_notification_type type
)
4685 struct connection_statistics connection_statistics
;
4686 struct drbd_genlmsghdr
*dh
;
4687 bool multicast
= false;
4691 seq
= atomic_inc_return(¬ify_genl_seq
);
4692 skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4700 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_CONNECTION_STATE
);
4702 goto nla_put_failure
;
4704 dh
->ret_code
= NO_ERROR
;
4705 if (nla_put_drbd_cfg_context(skb
, connection
->resource
, connection
, NULL
) ||
4706 nla_put_notification_header(skb
, type
) ||
4707 ((type
& ~NOTIFY_FLAGS
) != NOTIFY_DESTROY
&&
4708 connection_info_to_skb(skb
, connection_info
, true)))
4709 goto nla_put_failure
;
4710 connection_statistics
.conn_congested
= test_bit(NET_CONGESTED
, &connection
->flags
);
4711 connection_statistics_to_skb(skb
, &connection_statistics
, !capable(CAP_SYS_ADMIN
));
4712 genlmsg_end(skb
, dh
);
4714 err
= drbd_genl_multicast_events(skb
, GFP_NOWAIT
);
4715 /* skb has been consumed or freed in netlink_broadcast() */
4716 if (err
&& err
!= -ESRCH
)
4724 drbd_err(connection
, "Error %d while broadcasting event. Event seq:%u\n",
4728 void notify_peer_device_state(struct sk_buff
*skb
,
4730 struct drbd_peer_device
*peer_device
,
4731 struct peer_device_info
*peer_device_info
,
4732 enum drbd_notification_type type
)
4734 struct peer_device_statistics peer_device_statistics
;
4735 struct drbd_resource
*resource
= peer_device
->device
->resource
;
4736 struct drbd_genlmsghdr
*dh
;
4737 bool multicast
= false;
4741 seq
= atomic_inc_return(¬ify_genl_seq
);
4742 skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4750 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_PEER_DEVICE_STATE
);
4752 goto nla_put_failure
;
4754 dh
->ret_code
= NO_ERROR
;
4755 if (nla_put_drbd_cfg_context(skb
, resource
, peer_device
->connection
, peer_device
->device
) ||
4756 nla_put_notification_header(skb
, type
) ||
4757 ((type
& ~NOTIFY_FLAGS
) != NOTIFY_DESTROY
&&
4758 peer_device_info_to_skb(skb
, peer_device_info
, true)))
4759 goto nla_put_failure
;
4760 peer_device_to_statistics(&peer_device_statistics
, peer_device
);
4761 peer_device_statistics_to_skb(skb
, &peer_device_statistics
, !capable(CAP_SYS_ADMIN
));
4762 genlmsg_end(skb
, dh
);
4764 err
= drbd_genl_multicast_events(skb
, GFP_NOWAIT
);
4765 /* skb has been consumed or freed in netlink_broadcast() */
4766 if (err
&& err
!= -ESRCH
)
4774 drbd_err(peer_device
, "Error %d while broadcasting event. Event seq:%u\n",
4778 void notify_helper(enum drbd_notification_type type
,
4779 struct drbd_device
*device
, struct drbd_connection
*connection
,
4780 const char *name
, int status
)
4782 struct drbd_resource
*resource
= device
? device
->resource
: connection
->resource
;
4783 struct drbd_helper_info helper_info
;
4784 unsigned int seq
= atomic_inc_return(¬ify_genl_seq
);
4785 struct sk_buff
*skb
= NULL
;
4786 struct drbd_genlmsghdr
*dh
;
4789 strlcpy(helper_info
.helper_name
, name
, sizeof(helper_info
.helper_name
));
4790 helper_info
.helper_name_len
= min(strlen(name
), sizeof(helper_info
.helper_name
));
4791 helper_info
.helper_status
= status
;
4793 skb
= genlmsg_new(NLMSG_GOODSIZE
, GFP_NOIO
);
4799 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_HELPER
);
4802 dh
->minor
= device
? device
->minor
: -1;
4803 dh
->ret_code
= NO_ERROR
;
4804 mutex_lock(¬ification_mutex
);
4805 if (nla_put_drbd_cfg_context(skb
, resource
, connection
, device
) ||
4806 nla_put_notification_header(skb
, type
) ||
4807 drbd_helper_info_to_skb(skb
, &helper_info
, true))
4809 genlmsg_end(skb
, dh
);
4810 err
= drbd_genl_multicast_events(skb
, GFP_NOWAIT
);
4812 /* skb has been consumed or freed in netlink_broadcast() */
4813 if (err
&& err
!= -ESRCH
)
4815 mutex_unlock(¬ification_mutex
);
4819 mutex_unlock(¬ification_mutex
);
4822 drbd_err(resource
, "Error %d while broadcasting event. Event seq:%u\n",
4826 static void notify_initial_state_done(struct sk_buff
*skb
, unsigned int seq
)
4828 struct drbd_genlmsghdr
*dh
;
4832 dh
= genlmsg_put(skb
, 0, seq
, &drbd_genl_family
, 0, DRBD_INITIAL_STATE_DONE
);
4834 goto nla_put_failure
;
4836 dh
->ret_code
= NO_ERROR
;
4837 if (nla_put_notification_header(skb
, NOTIFY_EXISTS
))
4838 goto nla_put_failure
;
4839 genlmsg_end(skb
, dh
);
4844 pr_err("Error %d sending event. Event seq:%u\n", err
, seq
);
4847 static void free_state_changes(struct list_head
*list
)
4849 while (!list_empty(list
)) {
4850 struct drbd_state_change
*state_change
=
4851 list_first_entry(list
, struct drbd_state_change
, list
);
4852 list_del(&state_change
->list
);
4853 forget_state_change(state_change
);
4857 static unsigned int notifications_for_state_change(struct drbd_state_change
*state_change
)
4860 state_change
->n_connections
+
4861 state_change
->n_devices
+
4862 state_change
->n_devices
* state_change
->n_connections
;
4865 static int get_initial_state(struct sk_buff
*skb
, struct netlink_callback
*cb
)
4867 struct drbd_state_change
*state_change
= (struct drbd_state_change
*)cb
->args
[0];
4868 unsigned int seq
= cb
->args
[2];
4870 enum drbd_notification_type flags
= 0;
4872 /* There is no need for taking notification_mutex here: it doesn't
4873 matter if the initial state events mix with later state chage
4874 events; we can always tell the events apart by the NOTIFY_EXISTS
4878 if (cb
->args
[5] == 1) {
4879 notify_initial_state_done(skb
, seq
);
4883 if (cb
->args
[4] < cb
->args
[3])
4884 flags
|= NOTIFY_CONTINUES
;
4886 notify_resource_state_change(skb
, seq
, state_change
->resource
,
4887 NOTIFY_EXISTS
| flags
);
4891 if (n
< state_change
->n_connections
) {
4892 notify_connection_state_change(skb
, seq
, &state_change
->connections
[n
],
4893 NOTIFY_EXISTS
| flags
);
4896 n
-= state_change
->n_connections
;
4897 if (n
< state_change
->n_devices
) {
4898 notify_device_state_change(skb
, seq
, &state_change
->devices
[n
],
4899 NOTIFY_EXISTS
| flags
);
4902 n
-= state_change
->n_devices
;
4903 if (n
< state_change
->n_devices
* state_change
->n_connections
) {
4904 notify_peer_device_state_change(skb
, seq
, &state_change
->peer_devices
[n
],
4905 NOTIFY_EXISTS
| flags
);
4910 if (cb
->args
[4] == cb
->args
[3]) {
4911 struct drbd_state_change
*next_state_change
=
4912 list_entry(state_change
->list
.next
,
4913 struct drbd_state_change
, list
);
4914 cb
->args
[0] = (long)next_state_change
;
4915 cb
->args
[3] = notifications_for_state_change(next_state_change
);
4922 int drbd_adm_get_initial_state(struct sk_buff
*skb
, struct netlink_callback
*cb
)
4924 struct drbd_resource
*resource
;
4927 if (cb
->args
[5] >= 1) {
4928 if (cb
->args
[5] > 1)
4929 return get_initial_state(skb
, cb
);
4931 struct drbd_state_change
*state_change
=
4932 (struct drbd_state_change
*)cb
->args
[0];
4934 /* connect list to head */
4935 list_add(&head
, &state_change
->list
);
4936 free_state_changes(&head
);
4941 cb
->args
[5] = 2; /* number of iterations */
4942 mutex_lock(&resources_mutex
);
4943 for_each_resource(resource
, &drbd_resources
) {
4944 struct drbd_state_change
*state_change
;
4946 state_change
= remember_old_state(resource
, GFP_KERNEL
);
4947 if (!state_change
) {
4948 if (!list_empty(&head
))
4949 free_state_changes(&head
);
4950 mutex_unlock(&resources_mutex
);
4953 copy_old_to_new_state_change(state_change
);
4954 list_add_tail(&state_change
->list
, &head
);
4955 cb
->args
[5] += notifications_for_state_change(state_change
);
4957 mutex_unlock(&resources_mutex
);
4959 if (!list_empty(&head
)) {
4960 struct drbd_state_change
*state_change
=
4961 list_entry(head
.next
, struct drbd_state_change
, list
);
4962 cb
->args
[0] = (long)state_change
;
4963 cb
->args
[3] = notifications_for_state_change(state_change
);
4964 list_del(&head
); /* detach list from head */
4967 cb
->args
[2] = cb
->nlh
->nlmsg_seq
;
4968 return get_initial_state(skb
, cb
);