1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * Copyright (C) 2004, 2005 Oracle. All rights reserved.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public
17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA.
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/sysctl.h>
25 #include <linux/configfs.h>
29 #include "nodemanager.h"
30 #include "heartbeat.h"
35 /* for now we operate under the assertion that there can be only one
36 * cluster active at a time. Changing this will require trickling
37 * cluster references throughout where nodes are looked up */
38 static struct o2nm_cluster
*o2nm_single_cluster
= NULL
;
40 #define OCFS2_MAX_HB_CTL_PATH 256
41 static char ocfs2_hb_ctl_path
[OCFS2_MAX_HB_CTL_PATH
] = "/sbin/ocfs2_hb_ctl";
43 static ctl_table ocfs2_nm_table
[] = {
46 .procname
= "hb_ctl_path",
47 .data
= ocfs2_hb_ctl_path
,
48 .maxlen
= OCFS2_MAX_HB_CTL_PATH
,
50 .proc_handler
= &proc_dostring
,
51 .strategy
= &sysctl_string
,
56 static ctl_table ocfs2_mod_table
[] = {
58 .ctl_name
= KERN_OCFS2_NM
,
63 .child
= ocfs2_nm_table
68 static ctl_table ocfs2_kern_table
[] = {
70 .ctl_name
= KERN_OCFS2
,
75 .child
= ocfs2_mod_table
80 static ctl_table ocfs2_root_table
[] = {
87 .child
= ocfs2_kern_table
92 static struct ctl_table_header
*ocfs2_table_header
= NULL
;
94 const char *o2nm_get_hb_ctl_path(void)
96 return ocfs2_hb_ctl_path
;
98 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path
);
100 struct o2nm_cluster
{
101 struct config_group cl_group
;
102 unsigned cl_has_local
:1;
104 rwlock_t cl_nodes_lock
;
105 struct o2nm_node
*cl_nodes
[O2NM_MAX_NODES
];
106 struct rb_root cl_node_ip_tree
;
107 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
108 unsigned long cl_nodes_bitmap
[BITS_TO_LONGS(O2NM_MAX_NODES
)];
111 struct o2nm_node
*o2nm_get_node_by_num(u8 node_num
)
113 struct o2nm_node
*node
= NULL
;
115 if (node_num
>= O2NM_MAX_NODES
|| o2nm_single_cluster
== NULL
)
118 read_lock(&o2nm_single_cluster
->cl_nodes_lock
);
119 node
= o2nm_single_cluster
->cl_nodes
[node_num
];
121 config_item_get(&node
->nd_item
);
122 read_unlock(&o2nm_single_cluster
->cl_nodes_lock
);
126 EXPORT_SYMBOL_GPL(o2nm_get_node_by_num
);
128 int o2nm_configured_node_map(unsigned long *map
, unsigned bytes
)
130 struct o2nm_cluster
*cluster
= o2nm_single_cluster
;
132 BUG_ON(bytes
< (sizeof(cluster
->cl_nodes_bitmap
)));
137 read_lock(&cluster
->cl_nodes_lock
);
138 memcpy(map
, cluster
->cl_nodes_bitmap
, sizeof(cluster
->cl_nodes_bitmap
));
139 read_unlock(&cluster
->cl_nodes_lock
);
143 EXPORT_SYMBOL_GPL(o2nm_configured_node_map
);
145 static struct o2nm_node
*o2nm_node_ip_tree_lookup(struct o2nm_cluster
*cluster
,
147 struct rb_node
***ret_p
,
148 struct rb_node
**ret_parent
)
150 struct rb_node
**p
= &cluster
->cl_node_ip_tree
.rb_node
;
151 struct rb_node
*parent
= NULL
;
152 struct o2nm_node
*node
, *ret
= NULL
;
158 node
= rb_entry(parent
, struct o2nm_node
, nd_ip_node
);
160 cmp
= memcmp(&ip_needle
, &node
->nd_ipv4_address
,
174 if (ret_parent
!= NULL
)
175 *ret_parent
= parent
;
180 struct o2nm_node
*o2nm_get_node_by_ip(__be32 addr
)
182 struct o2nm_node
*node
= NULL
;
183 struct o2nm_cluster
*cluster
= o2nm_single_cluster
;
188 read_lock(&cluster
->cl_nodes_lock
);
189 node
= o2nm_node_ip_tree_lookup(cluster
, addr
, NULL
, NULL
);
191 config_item_get(&node
->nd_item
);
192 read_unlock(&cluster
->cl_nodes_lock
);
197 EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip
);
199 void o2nm_node_put(struct o2nm_node
*node
)
201 config_item_put(&node
->nd_item
);
203 EXPORT_SYMBOL_GPL(o2nm_node_put
);
205 void o2nm_node_get(struct o2nm_node
*node
)
207 config_item_get(&node
->nd_item
);
209 EXPORT_SYMBOL_GPL(o2nm_node_get
);
211 u8
o2nm_this_node(void)
213 u8 node_num
= O2NM_MAX_NODES
;
215 if (o2nm_single_cluster
&& o2nm_single_cluster
->cl_has_local
)
216 node_num
= o2nm_single_cluster
->cl_local_node
;
220 EXPORT_SYMBOL_GPL(o2nm_this_node
);
222 /* node configfs bits */
224 static struct o2nm_cluster
*to_o2nm_cluster(struct config_item
*item
)
227 container_of(to_config_group(item
), struct o2nm_cluster
,
232 static struct o2nm_node
*to_o2nm_node(struct config_item
*item
)
234 return item
? container_of(item
, struct o2nm_node
, nd_item
) : NULL
;
237 static void o2nm_node_release(struct config_item
*item
)
239 struct o2nm_node
*node
= to_o2nm_node(item
);
243 static ssize_t
o2nm_node_num_read(struct o2nm_node
*node
, char *page
)
245 return sprintf(page
, "%d\n", node
->nd_num
);
248 static struct o2nm_cluster
*to_o2nm_cluster_from_node(struct o2nm_node
*node
)
250 /* through the first node_set .parent
251 * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */
252 return to_o2nm_cluster(node
->nd_item
.ci_parent
->ci_parent
);
256 O2NM_NODE_ATTR_NUM
= 0,
258 O2NM_NODE_ATTR_ADDRESS
,
259 O2NM_NODE_ATTR_LOCAL
,
262 static ssize_t
o2nm_node_num_write(struct o2nm_node
*node
, const char *page
,
265 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
267 char *p
= (char *)page
;
269 tmp
= simple_strtoul(p
, &p
, 0);
270 if (!p
|| (*p
&& (*p
!= '\n')))
273 if (tmp
>= O2NM_MAX_NODES
)
276 /* once we're in the cl_nodes tree networking can look us up by
277 * node number and try to use our address and port attributes
278 * to connect to this node.. make sure that they've been set
279 * before writing the node attribute? */
280 if (!test_bit(O2NM_NODE_ATTR_ADDRESS
, &node
->nd_set_attributes
) ||
281 !test_bit(O2NM_NODE_ATTR_PORT
, &node
->nd_set_attributes
))
282 return -EINVAL
; /* XXX */
284 write_lock(&cluster
->cl_nodes_lock
);
285 if (cluster
->cl_nodes
[tmp
])
288 cluster
->cl_nodes
[tmp
] = node
;
290 set_bit(tmp
, cluster
->cl_nodes_bitmap
);
292 write_unlock(&cluster
->cl_nodes_lock
);
298 static ssize_t
o2nm_node_ipv4_port_read(struct o2nm_node
*node
, char *page
)
300 return sprintf(page
, "%u\n", ntohs(node
->nd_ipv4_port
));
303 static ssize_t
o2nm_node_ipv4_port_write(struct o2nm_node
*node
,
304 const char *page
, size_t count
)
307 char *p
= (char *)page
;
309 tmp
= simple_strtoul(p
, &p
, 0);
310 if (!p
|| (*p
&& (*p
!= '\n')))
318 node
->nd_ipv4_port
= htons(tmp
);
323 static ssize_t
o2nm_node_ipv4_address_read(struct o2nm_node
*node
, char *page
)
325 return sprintf(page
, "%u.%u.%u.%u\n", NIPQUAD(node
->nd_ipv4_address
));
328 static ssize_t
o2nm_node_ipv4_address_write(struct o2nm_node
*node
,
332 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
334 struct rb_node
**p
, *parent
;
335 unsigned int octets
[4];
336 __be32 ipv4_addr
= 0;
338 ret
= sscanf(page
, "%3u.%3u.%3u.%3u", &octets
[3], &octets
[2],
339 &octets
[1], &octets
[0]);
343 for (i
= 0; i
< ARRAY_SIZE(octets
); i
++) {
346 be32_add_cpu(&ipv4_addr
, octets
[i
] << (i
* 8));
350 write_lock(&cluster
->cl_nodes_lock
);
351 if (o2nm_node_ip_tree_lookup(cluster
, ipv4_addr
, &p
, &parent
))
354 rb_link_node(&node
->nd_ip_node
, parent
, p
);
355 rb_insert_color(&node
->nd_ip_node
, &cluster
->cl_node_ip_tree
);
357 write_unlock(&cluster
->cl_nodes_lock
);
361 memcpy(&node
->nd_ipv4_address
, &ipv4_addr
, sizeof(ipv4_addr
));
366 static ssize_t
o2nm_node_local_read(struct o2nm_node
*node
, char *page
)
368 return sprintf(page
, "%d\n", node
->nd_local
);
371 static ssize_t
o2nm_node_local_write(struct o2nm_node
*node
, const char *page
,
374 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
376 char *p
= (char *)page
;
379 tmp
= simple_strtoul(p
, &p
, 0);
380 if (!p
|| (*p
&& (*p
!= '\n')))
383 tmp
= !!tmp
; /* boolean of whether this node wants to be local */
385 /* setting local turns on networking rx for now so we require having
386 * set everything else first */
387 if (!test_bit(O2NM_NODE_ATTR_ADDRESS
, &node
->nd_set_attributes
) ||
388 !test_bit(O2NM_NODE_ATTR_NUM
, &node
->nd_set_attributes
) ||
389 !test_bit(O2NM_NODE_ATTR_PORT
, &node
->nd_set_attributes
))
390 return -EINVAL
; /* XXX */
392 /* the only failure case is trying to set a new local node
393 * when a different one is already set */
394 if (tmp
&& tmp
== cluster
->cl_has_local
&&
395 cluster
->cl_local_node
!= node
->nd_num
)
398 /* bring up the rx thread if we're setting the new local node. */
399 if (tmp
&& !cluster
->cl_has_local
) {
400 ret
= o2net_start_listening(node
);
405 if (!tmp
&& cluster
->cl_has_local
&&
406 cluster
->cl_local_node
== node
->nd_num
) {
407 o2net_stop_listening(node
);
408 cluster
->cl_local_node
= O2NM_INVALID_NODE_NUM
;
411 node
->nd_local
= tmp
;
412 if (node
->nd_local
) {
413 cluster
->cl_has_local
= tmp
;
414 cluster
->cl_local_node
= node
->nd_num
;
420 struct o2nm_node_attribute
{
421 struct configfs_attribute attr
;
422 ssize_t (*show
)(struct o2nm_node
*, char *);
423 ssize_t (*store
)(struct o2nm_node
*, const char *, size_t);
426 static struct o2nm_node_attribute o2nm_node_attr_num
= {
427 .attr
= { .ca_owner
= THIS_MODULE
,
429 .ca_mode
= S_IRUGO
| S_IWUSR
},
430 .show
= o2nm_node_num_read
,
431 .store
= o2nm_node_num_write
,
434 static struct o2nm_node_attribute o2nm_node_attr_ipv4_port
= {
435 .attr
= { .ca_owner
= THIS_MODULE
,
436 .ca_name
= "ipv4_port",
437 .ca_mode
= S_IRUGO
| S_IWUSR
},
438 .show
= o2nm_node_ipv4_port_read
,
439 .store
= o2nm_node_ipv4_port_write
,
442 static struct o2nm_node_attribute o2nm_node_attr_ipv4_address
= {
443 .attr
= { .ca_owner
= THIS_MODULE
,
444 .ca_name
= "ipv4_address",
445 .ca_mode
= S_IRUGO
| S_IWUSR
},
446 .show
= o2nm_node_ipv4_address_read
,
447 .store
= o2nm_node_ipv4_address_write
,
450 static struct o2nm_node_attribute o2nm_node_attr_local
= {
451 .attr
= { .ca_owner
= THIS_MODULE
,
453 .ca_mode
= S_IRUGO
| S_IWUSR
},
454 .show
= o2nm_node_local_read
,
455 .store
= o2nm_node_local_write
,
458 static struct configfs_attribute
*o2nm_node_attrs
[] = {
459 [O2NM_NODE_ATTR_NUM
] = &o2nm_node_attr_num
.attr
,
460 [O2NM_NODE_ATTR_PORT
] = &o2nm_node_attr_ipv4_port
.attr
,
461 [O2NM_NODE_ATTR_ADDRESS
] = &o2nm_node_attr_ipv4_address
.attr
,
462 [O2NM_NODE_ATTR_LOCAL
] = &o2nm_node_attr_local
.attr
,
466 static int o2nm_attr_index(struct configfs_attribute
*attr
)
469 for (i
= 0; i
< ARRAY_SIZE(o2nm_node_attrs
); i
++) {
470 if (attr
== o2nm_node_attrs
[i
])
477 static ssize_t
o2nm_node_show(struct config_item
*item
,
478 struct configfs_attribute
*attr
,
481 struct o2nm_node
*node
= to_o2nm_node(item
);
482 struct o2nm_node_attribute
*o2nm_node_attr
=
483 container_of(attr
, struct o2nm_node_attribute
, attr
);
486 if (o2nm_node_attr
->show
)
487 ret
= o2nm_node_attr
->show(node
, page
);
491 static ssize_t
o2nm_node_store(struct config_item
*item
,
492 struct configfs_attribute
*attr
,
493 const char *page
, size_t count
)
495 struct o2nm_node
*node
= to_o2nm_node(item
);
496 struct o2nm_node_attribute
*o2nm_node_attr
=
497 container_of(attr
, struct o2nm_node_attribute
, attr
);
499 int attr_index
= o2nm_attr_index(attr
);
501 if (o2nm_node_attr
->store
== NULL
) {
506 if (test_bit(attr_index
, &node
->nd_set_attributes
))
509 ret
= o2nm_node_attr
->store(node
, page
, count
);
513 set_bit(attr_index
, &node
->nd_set_attributes
);
518 static struct configfs_item_operations o2nm_node_item_ops
= {
519 .release
= o2nm_node_release
,
520 .show_attribute
= o2nm_node_show
,
521 .store_attribute
= o2nm_node_store
,
524 static struct config_item_type o2nm_node_type
= {
525 .ct_item_ops
= &o2nm_node_item_ops
,
526 .ct_attrs
= o2nm_node_attrs
,
527 .ct_owner
= THIS_MODULE
,
532 struct o2nm_node_group
{
533 struct config_group ns_group
;
538 static struct o2nm_node_group
*to_o2nm_node_group(struct config_group
*group
)
541 container_of(group
, struct o2nm_node_group
, ns_group
)
546 static struct config_item
*o2nm_node_group_make_item(struct config_group
*group
,
549 struct o2nm_node
*node
= NULL
;
550 struct config_item
*ret
= NULL
;
552 if (strlen(name
) > O2NM_MAX_NAME_LEN
)
553 goto out
; /* ENAMETOOLONG */
555 node
= kcalloc(1, sizeof(struct o2nm_node
), GFP_KERNEL
);
557 goto out
; /* ENOMEM */
559 strcpy(node
->nd_name
, name
); /* use item.ci_namebuf instead? */
560 config_item_init_type_name(&node
->nd_item
, name
, &o2nm_node_type
);
561 spin_lock_init(&node
->nd_lock
);
563 ret
= &node
->nd_item
;
572 static void o2nm_node_group_drop_item(struct config_group
*group
,
573 struct config_item
*item
)
575 struct o2nm_node
*node
= to_o2nm_node(item
);
576 struct o2nm_cluster
*cluster
= to_o2nm_cluster(group
->cg_item
.ci_parent
);
578 o2net_disconnect_node(node
);
580 if (cluster
->cl_has_local
&&
581 (cluster
->cl_local_node
== node
->nd_num
)) {
582 cluster
->cl_has_local
= 0;
583 cluster
->cl_local_node
= O2NM_INVALID_NODE_NUM
;
584 o2net_stop_listening(node
);
587 /* XXX call into net to stop this node from trading messages */
589 write_lock(&cluster
->cl_nodes_lock
);
592 if (node
->nd_ipv4_address
)
593 rb_erase(&node
->nd_ip_node
, &cluster
->cl_node_ip_tree
);
595 /* nd_num might be 0 if the node number hasn't been set.. */
596 if (cluster
->cl_nodes
[node
->nd_num
] == node
) {
597 cluster
->cl_nodes
[node
->nd_num
] = NULL
;
598 clear_bit(node
->nd_num
, cluster
->cl_nodes_bitmap
);
600 write_unlock(&cluster
->cl_nodes_lock
);
602 config_item_put(item
);
605 static struct configfs_group_operations o2nm_node_group_group_ops
= {
606 .make_item
= o2nm_node_group_make_item
,
607 .drop_item
= o2nm_node_group_drop_item
,
610 static struct config_item_type o2nm_node_group_type
= {
611 .ct_group_ops
= &o2nm_node_group_group_ops
,
612 .ct_owner
= THIS_MODULE
,
617 static void o2nm_cluster_release(struct config_item
*item
)
619 struct o2nm_cluster
*cluster
= to_o2nm_cluster(item
);
621 kfree(cluster
->cl_group
.default_groups
);
625 static struct configfs_item_operations o2nm_cluster_item_ops
= {
626 .release
= o2nm_cluster_release
,
629 static struct config_item_type o2nm_cluster_type
= {
630 .ct_item_ops
= &o2nm_cluster_item_ops
,
631 .ct_owner
= THIS_MODULE
,
636 struct o2nm_cluster_group
{
637 struct configfs_subsystem cs_subsys
;
642 static struct o2nm_cluster_group
*to_o2nm_cluster_group(struct config_group
*group
)
645 container_of(to_configfs_subsystem(group
), struct o2nm_cluster_group
, cs_subsys
)
650 static struct config_group
*o2nm_cluster_group_make_group(struct config_group
*group
,
653 struct o2nm_cluster
*cluster
= NULL
;
654 struct o2nm_node_group
*ns
= NULL
;
655 struct config_group
*o2hb_group
= NULL
, *ret
= NULL
;
658 /* this runs under the parent dir's i_mutex; there can be only
659 * one caller in here at a time */
660 if (o2nm_single_cluster
)
661 goto out
; /* ENOSPC */
663 cluster
= kcalloc(1, sizeof(struct o2nm_cluster
), GFP_KERNEL
);
664 ns
= kcalloc(1, sizeof(struct o2nm_node_group
), GFP_KERNEL
);
665 defs
= kcalloc(3, sizeof(struct config_group
*), GFP_KERNEL
);
666 o2hb_group
= o2hb_alloc_hb_set();
667 if (cluster
== NULL
|| ns
== NULL
|| o2hb_group
== NULL
|| defs
== NULL
)
670 config_group_init_type_name(&cluster
->cl_group
, name
,
672 config_group_init_type_name(&ns
->ns_group
, "node",
673 &o2nm_node_group_type
);
675 cluster
->cl_group
.default_groups
= defs
;
676 cluster
->cl_group
.default_groups
[0] = &ns
->ns_group
;
677 cluster
->cl_group
.default_groups
[1] = o2hb_group
;
678 cluster
->cl_group
.default_groups
[2] = NULL
;
679 rwlock_init(&cluster
->cl_nodes_lock
);
680 cluster
->cl_node_ip_tree
= RB_ROOT
;
682 ret
= &cluster
->cl_group
;
683 o2nm_single_cluster
= cluster
;
689 o2hb_free_hb_set(o2hb_group
);
696 static void o2nm_cluster_group_drop_item(struct config_group
*group
, struct config_item
*item
)
698 struct o2nm_cluster
*cluster
= to_o2nm_cluster(item
);
700 struct config_item
*killme
;
702 BUG_ON(o2nm_single_cluster
!= cluster
);
703 o2nm_single_cluster
= NULL
;
705 for (i
= 0; cluster
->cl_group
.default_groups
[i
]; i
++) {
706 killme
= &cluster
->cl_group
.default_groups
[i
]->cg_item
;
707 cluster
->cl_group
.default_groups
[i
] = NULL
;
708 config_item_put(killme
);
711 config_item_put(item
);
714 static struct configfs_group_operations o2nm_cluster_group_group_ops
= {
715 .make_group
= o2nm_cluster_group_make_group
,
716 .drop_item
= o2nm_cluster_group_drop_item
,
719 static struct config_item_type o2nm_cluster_group_type
= {
720 .ct_group_ops
= &o2nm_cluster_group_group_ops
,
721 .ct_owner
= THIS_MODULE
,
724 static struct o2nm_cluster_group o2nm_cluster_group
= {
728 .ci_namebuf
= "cluster",
729 .ci_type
= &o2nm_cluster_group_type
,
735 static void __exit
exit_o2nm(void)
737 if (ocfs2_table_header
)
738 unregister_sysctl_table(ocfs2_table_header
);
740 /* XXX sync with hb callbacks and shut down hb? */
741 o2net_unregister_hb_callbacks();
742 configfs_unregister_subsystem(&o2nm_cluster_group
.cs_subsys
);
748 static int __init
init_o2nm(void)
752 cluster_print_version();
757 ocfs2_table_header
= register_sysctl_table(ocfs2_root_table
, 0);
758 if (!ocfs2_table_header
) {
759 printk(KERN_ERR
"nodemanager: unable to register sysctl\n");
760 ret
= -ENOMEM
; /* or something. */
764 ret
= o2net_register_hb_callbacks();
768 config_group_init(&o2nm_cluster_group
.cs_subsys
.su_group
);
769 init_MUTEX(&o2nm_cluster_group
.cs_subsys
.su_sem
);
770 ret
= configfs_register_subsystem(&o2nm_cluster_group
.cs_subsys
);
772 printk(KERN_ERR
"nodemanager: Registration returned %d\n", ret
);
776 ret
= o2cb_sys_init();
780 configfs_unregister_subsystem(&o2nm_cluster_group
.cs_subsys
);
782 o2net_unregister_hb_callbacks();
784 unregister_sysctl_table(ocfs2_table_header
);
791 MODULE_AUTHOR("Oracle");
792 MODULE_LICENSE("GPL");
794 module_init(init_o2nm
)
795 module_exit(exit_o2nm
)