1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * Copyright (C) 2004, 2005 Oracle. All rights reserved.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public
17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA.
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/sysctl.h>
25 #include <linux/configfs.h>
29 #include "nodemanager.h"
30 #include "heartbeat.h"
35 /* for now we operate under the assertion that there can be only one
36 * cluster active at a time. Changing this will require trickling
37 * cluster references throughout where nodes are looked up */
38 static struct o2nm_cluster
*o2nm_single_cluster
= NULL
;
40 #define OCFS2_MAX_HB_CTL_PATH 256
41 static char ocfs2_hb_ctl_path
[OCFS2_MAX_HB_CTL_PATH
] = "/sbin/ocfs2_hb_ctl";
43 static ctl_table ocfs2_nm_table
[] = {
46 .procname
= "hb_ctl_path",
47 .data
= ocfs2_hb_ctl_path
,
48 .maxlen
= OCFS2_MAX_HB_CTL_PATH
,
50 .proc_handler
= &proc_dostring
,
51 .strategy
= &sysctl_string
,
56 static ctl_table ocfs2_mod_table
[] = {
58 .ctl_name
= KERN_OCFS2_NM
,
63 .child
= ocfs2_nm_table
68 static ctl_table ocfs2_kern_table
[] = {
70 .ctl_name
= KERN_OCFS2
,
75 .child
= ocfs2_mod_table
80 static ctl_table ocfs2_root_table
[] = {
87 .child
= ocfs2_kern_table
92 static struct ctl_table_header
*ocfs2_table_header
= NULL
;
94 const char *o2nm_get_hb_ctl_path(void)
96 return ocfs2_hb_ctl_path
;
98 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path
);
100 struct o2nm_cluster
{
101 struct config_group cl_group
;
102 unsigned cl_has_local
:1;
104 rwlock_t cl_nodes_lock
;
105 struct o2nm_node
*cl_nodes
[O2NM_MAX_NODES
];
106 struct rb_root cl_node_ip_tree
;
107 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
108 unsigned long cl_nodes_bitmap
[BITS_TO_LONGS(O2NM_MAX_NODES
)];
111 struct o2nm_node
*o2nm_get_node_by_num(u8 node_num
)
113 struct o2nm_node
*node
= NULL
;
115 if (node_num
>= O2NM_MAX_NODES
|| o2nm_single_cluster
== NULL
)
118 read_lock(&o2nm_single_cluster
->cl_nodes_lock
);
119 node
= o2nm_single_cluster
->cl_nodes
[node_num
];
121 config_item_get(&node
->nd_item
);
122 read_unlock(&o2nm_single_cluster
->cl_nodes_lock
);
126 EXPORT_SYMBOL_GPL(o2nm_get_node_by_num
);
128 int o2nm_configured_node_map(unsigned long *map
, unsigned bytes
)
130 struct o2nm_cluster
*cluster
= o2nm_single_cluster
;
132 BUG_ON(bytes
< (sizeof(cluster
->cl_nodes_bitmap
)));
137 read_lock(&cluster
->cl_nodes_lock
);
138 memcpy(map
, cluster
->cl_nodes_bitmap
, sizeof(cluster
->cl_nodes_bitmap
));
139 read_unlock(&cluster
->cl_nodes_lock
);
143 EXPORT_SYMBOL_GPL(o2nm_configured_node_map
);
145 static struct o2nm_node
*o2nm_node_ip_tree_lookup(struct o2nm_cluster
*cluster
,
147 struct rb_node
***ret_p
,
148 struct rb_node
**ret_parent
)
150 struct rb_node
**p
= &cluster
->cl_node_ip_tree
.rb_node
;
151 struct rb_node
*parent
= NULL
;
152 struct o2nm_node
*node
, *ret
= NULL
;
156 node
= rb_entry(parent
, struct o2nm_node
, nd_ip_node
);
158 if (memcmp(&ip_needle
, &node
->nd_ipv4_address
,
159 sizeof(ip_needle
)) < 0)
161 else if (memcmp(&ip_needle
, &node
->nd_ipv4_address
,
162 sizeof(ip_needle
)) > 0)
172 if (ret_parent
!= NULL
)
173 *ret_parent
= parent
;
178 struct o2nm_node
*o2nm_get_node_by_ip(__be32 addr
)
180 struct o2nm_node
*node
= NULL
;
181 struct o2nm_cluster
*cluster
= o2nm_single_cluster
;
186 read_lock(&cluster
->cl_nodes_lock
);
187 node
= o2nm_node_ip_tree_lookup(cluster
, addr
, NULL
, NULL
);
189 config_item_get(&node
->nd_item
);
190 read_unlock(&cluster
->cl_nodes_lock
);
195 EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip
);
197 void o2nm_node_put(struct o2nm_node
*node
)
199 config_item_put(&node
->nd_item
);
201 EXPORT_SYMBOL_GPL(o2nm_node_put
);
203 void o2nm_node_get(struct o2nm_node
*node
)
205 config_item_get(&node
->nd_item
);
207 EXPORT_SYMBOL_GPL(o2nm_node_get
);
209 u8
o2nm_this_node(void)
211 u8 node_num
= O2NM_MAX_NODES
;
213 if (o2nm_single_cluster
&& o2nm_single_cluster
->cl_has_local
)
214 node_num
= o2nm_single_cluster
->cl_local_node
;
218 EXPORT_SYMBOL_GPL(o2nm_this_node
);
220 /* node configfs bits */
222 static struct o2nm_cluster
*to_o2nm_cluster(struct config_item
*item
)
225 container_of(to_config_group(item
), struct o2nm_cluster
,
230 static struct o2nm_node
*to_o2nm_node(struct config_item
*item
)
232 return item
? container_of(item
, struct o2nm_node
, nd_item
) : NULL
;
235 static void o2nm_node_release(struct config_item
*item
)
237 struct o2nm_node
*node
= to_o2nm_node(item
);
241 static ssize_t
o2nm_node_num_read(struct o2nm_node
*node
, char *page
)
243 return sprintf(page
, "%d\n", node
->nd_num
);
246 static struct o2nm_cluster
*to_o2nm_cluster_from_node(struct o2nm_node
*node
)
248 /* through the first node_set .parent
249 * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */
250 return to_o2nm_cluster(node
->nd_item
.ci_parent
->ci_parent
);
254 O2NM_NODE_ATTR_NUM
= 0,
256 O2NM_NODE_ATTR_ADDRESS
,
257 O2NM_NODE_ATTR_LOCAL
,
260 static ssize_t
o2nm_node_num_write(struct o2nm_node
*node
, const char *page
,
263 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
265 char *p
= (char *)page
;
267 tmp
= simple_strtoul(p
, &p
, 0);
268 if (!p
|| (*p
&& (*p
!= '\n')))
271 if (tmp
>= O2NM_MAX_NODES
)
274 /* once we're in the cl_nodes tree networking can look us up by
275 * node number and try to use our address and port attributes
276 * to connect to this node.. make sure that they've been set
277 * before writing the node attribute? */
278 if (!test_bit(O2NM_NODE_ATTR_ADDRESS
, &node
->nd_set_attributes
) ||
279 !test_bit(O2NM_NODE_ATTR_PORT
, &node
->nd_set_attributes
))
280 return -EINVAL
; /* XXX */
282 write_lock(&cluster
->cl_nodes_lock
);
283 if (cluster
->cl_nodes
[tmp
])
286 cluster
->cl_nodes
[tmp
] = node
;
288 set_bit(tmp
, cluster
->cl_nodes_bitmap
);
290 write_unlock(&cluster
->cl_nodes_lock
);
296 static ssize_t
o2nm_node_ipv4_port_read(struct o2nm_node
*node
, char *page
)
298 return sprintf(page
, "%u\n", ntohs(node
->nd_ipv4_port
));
301 static ssize_t
o2nm_node_ipv4_port_write(struct o2nm_node
*node
,
302 const char *page
, size_t count
)
305 char *p
= (char *)page
;
307 tmp
= simple_strtoul(p
, &p
, 0);
308 if (!p
|| (*p
&& (*p
!= '\n')))
316 node
->nd_ipv4_port
= htons(tmp
);
321 static ssize_t
o2nm_node_ipv4_address_read(struct o2nm_node
*node
, char *page
)
323 return sprintf(page
, "%u.%u.%u.%u\n", NIPQUAD(node
->nd_ipv4_address
));
326 static ssize_t
o2nm_node_ipv4_address_write(struct o2nm_node
*node
,
330 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
332 struct rb_node
**p
, *parent
;
333 unsigned int octets
[4];
334 __be32 ipv4_addr
= 0;
336 ret
= sscanf(page
, "%3u.%3u.%3u.%3u", &octets
[3], &octets
[2],
337 &octets
[1], &octets
[0]);
341 for (i
= 0; i
< ARRAY_SIZE(octets
); i
++) {
344 be32_add_cpu(&ipv4_addr
, octets
[i
] << (i
* 8));
348 write_lock(&cluster
->cl_nodes_lock
);
349 if (o2nm_node_ip_tree_lookup(cluster
, ipv4_addr
, &p
, &parent
))
352 rb_link_node(&node
->nd_ip_node
, parent
, p
);
353 rb_insert_color(&node
->nd_ip_node
, &cluster
->cl_node_ip_tree
);
355 write_unlock(&cluster
->cl_nodes_lock
);
359 memcpy(&node
->nd_ipv4_address
, &ipv4_addr
, sizeof(ipv4_addr
));
364 static ssize_t
o2nm_node_local_read(struct o2nm_node
*node
, char *page
)
366 return sprintf(page
, "%d\n", node
->nd_local
);
369 static ssize_t
o2nm_node_local_write(struct o2nm_node
*node
, const char *page
,
372 struct o2nm_cluster
*cluster
= to_o2nm_cluster_from_node(node
);
374 char *p
= (char *)page
;
377 tmp
= simple_strtoul(p
, &p
, 0);
378 if (!p
|| (*p
&& (*p
!= '\n')))
381 tmp
= !!tmp
; /* boolean of whether this node wants to be local */
383 /* setting local turns on networking rx for now so we require having
384 * set everything else first */
385 if (!test_bit(O2NM_NODE_ATTR_ADDRESS
, &node
->nd_set_attributes
) ||
386 !test_bit(O2NM_NODE_ATTR_NUM
, &node
->nd_set_attributes
) ||
387 !test_bit(O2NM_NODE_ATTR_PORT
, &node
->nd_set_attributes
))
388 return -EINVAL
; /* XXX */
390 /* the only failure case is trying to set a new local node
391 * when a different one is already set */
392 if (tmp
&& tmp
== cluster
->cl_has_local
&&
393 cluster
->cl_local_node
!= node
->nd_num
)
396 /* bring up the rx thread if we're setting the new local node. */
397 if (tmp
&& !cluster
->cl_has_local
) {
398 ret
= o2net_start_listening(node
);
403 if (!tmp
&& cluster
->cl_has_local
&&
404 cluster
->cl_local_node
== node
->nd_num
) {
405 o2net_stop_listening(node
);
406 cluster
->cl_local_node
= O2NM_INVALID_NODE_NUM
;
409 node
->nd_local
= tmp
;
410 if (node
->nd_local
) {
411 cluster
->cl_has_local
= tmp
;
412 cluster
->cl_local_node
= node
->nd_num
;
418 struct o2nm_node_attribute
{
419 struct configfs_attribute attr
;
420 ssize_t (*show
)(struct o2nm_node
*, char *);
421 ssize_t (*store
)(struct o2nm_node
*, const char *, size_t);
424 static struct o2nm_node_attribute o2nm_node_attr_num
= {
425 .attr
= { .ca_owner
= THIS_MODULE
,
427 .ca_mode
= S_IRUGO
| S_IWUSR
},
428 .show
= o2nm_node_num_read
,
429 .store
= o2nm_node_num_write
,
432 static struct o2nm_node_attribute o2nm_node_attr_ipv4_port
= {
433 .attr
= { .ca_owner
= THIS_MODULE
,
434 .ca_name
= "ipv4_port",
435 .ca_mode
= S_IRUGO
| S_IWUSR
},
436 .show
= o2nm_node_ipv4_port_read
,
437 .store
= o2nm_node_ipv4_port_write
,
440 static struct o2nm_node_attribute o2nm_node_attr_ipv4_address
= {
441 .attr
= { .ca_owner
= THIS_MODULE
,
442 .ca_name
= "ipv4_address",
443 .ca_mode
= S_IRUGO
| S_IWUSR
},
444 .show
= o2nm_node_ipv4_address_read
,
445 .store
= o2nm_node_ipv4_address_write
,
448 static struct o2nm_node_attribute o2nm_node_attr_local
= {
449 .attr
= { .ca_owner
= THIS_MODULE
,
451 .ca_mode
= S_IRUGO
| S_IWUSR
},
452 .show
= o2nm_node_local_read
,
453 .store
= o2nm_node_local_write
,
456 static struct configfs_attribute
*o2nm_node_attrs
[] = {
457 [O2NM_NODE_ATTR_NUM
] = &o2nm_node_attr_num
.attr
,
458 [O2NM_NODE_ATTR_PORT
] = &o2nm_node_attr_ipv4_port
.attr
,
459 [O2NM_NODE_ATTR_ADDRESS
] = &o2nm_node_attr_ipv4_address
.attr
,
460 [O2NM_NODE_ATTR_LOCAL
] = &o2nm_node_attr_local
.attr
,
464 static int o2nm_attr_index(struct configfs_attribute
*attr
)
467 for (i
= 0; i
< ARRAY_SIZE(o2nm_node_attrs
); i
++) {
468 if (attr
== o2nm_node_attrs
[i
])
475 static ssize_t
o2nm_node_show(struct config_item
*item
,
476 struct configfs_attribute
*attr
,
479 struct o2nm_node
*node
= to_o2nm_node(item
);
480 struct o2nm_node_attribute
*o2nm_node_attr
=
481 container_of(attr
, struct o2nm_node_attribute
, attr
);
484 if (o2nm_node_attr
->show
)
485 ret
= o2nm_node_attr
->show(node
, page
);
489 static ssize_t
o2nm_node_store(struct config_item
*item
,
490 struct configfs_attribute
*attr
,
491 const char *page
, size_t count
)
493 struct o2nm_node
*node
= to_o2nm_node(item
);
494 struct o2nm_node_attribute
*o2nm_node_attr
=
495 container_of(attr
, struct o2nm_node_attribute
, attr
);
497 int attr_index
= o2nm_attr_index(attr
);
499 if (o2nm_node_attr
->store
== NULL
) {
504 if (test_bit(attr_index
, &node
->nd_set_attributes
))
507 ret
= o2nm_node_attr
->store(node
, page
, count
);
511 set_bit(attr_index
, &node
->nd_set_attributes
);
516 static struct configfs_item_operations o2nm_node_item_ops
= {
517 .release
= o2nm_node_release
,
518 .show_attribute
= o2nm_node_show
,
519 .store_attribute
= o2nm_node_store
,
522 static struct config_item_type o2nm_node_type
= {
523 .ct_item_ops
= &o2nm_node_item_ops
,
524 .ct_attrs
= o2nm_node_attrs
,
525 .ct_owner
= THIS_MODULE
,
530 struct o2nm_node_group
{
531 struct config_group ns_group
;
536 static struct o2nm_node_group
*to_o2nm_node_group(struct config_group
*group
)
539 container_of(group
, struct o2nm_node_group
, ns_group
)
544 static struct config_item
*o2nm_node_group_make_item(struct config_group
*group
,
547 struct o2nm_node
*node
= NULL
;
548 struct config_item
*ret
= NULL
;
550 if (strlen(name
) > O2NM_MAX_NAME_LEN
)
551 goto out
; /* ENAMETOOLONG */
553 node
= kcalloc(1, sizeof(struct o2nm_node
), GFP_KERNEL
);
555 goto out
; /* ENOMEM */
557 strcpy(node
->nd_name
, name
); /* use item.ci_namebuf instead? */
558 config_item_init_type_name(&node
->nd_item
, name
, &o2nm_node_type
);
559 spin_lock_init(&node
->nd_lock
);
561 ret
= &node
->nd_item
;
570 static void o2nm_node_group_drop_item(struct config_group
*group
,
571 struct config_item
*item
)
573 struct o2nm_node
*node
= to_o2nm_node(item
);
574 struct o2nm_cluster
*cluster
= to_o2nm_cluster(group
->cg_item
.ci_parent
);
576 o2net_disconnect_node(node
);
578 if (cluster
->cl_has_local
&&
579 (cluster
->cl_local_node
== node
->nd_num
)) {
580 cluster
->cl_has_local
= 0;
581 cluster
->cl_local_node
= O2NM_INVALID_NODE_NUM
;
582 o2net_stop_listening(node
);
585 /* XXX call into net to stop this node from trading messages */
587 write_lock(&cluster
->cl_nodes_lock
);
590 if (node
->nd_ipv4_address
)
591 rb_erase(&node
->nd_ip_node
, &cluster
->cl_node_ip_tree
);
593 /* nd_num might be 0 if the node number hasn't been set.. */
594 if (cluster
->cl_nodes
[node
->nd_num
] == node
) {
595 cluster
->cl_nodes
[node
->nd_num
] = NULL
;
596 clear_bit(node
->nd_num
, cluster
->cl_nodes_bitmap
);
598 write_unlock(&cluster
->cl_nodes_lock
);
600 config_item_put(item
);
603 static struct configfs_group_operations o2nm_node_group_group_ops
= {
604 .make_item
= o2nm_node_group_make_item
,
605 .drop_item
= o2nm_node_group_drop_item
,
608 static struct config_item_type o2nm_node_group_type
= {
609 .ct_group_ops
= &o2nm_node_group_group_ops
,
610 .ct_owner
= THIS_MODULE
,
615 static void o2nm_cluster_release(struct config_item
*item
)
617 struct o2nm_cluster
*cluster
= to_o2nm_cluster(item
);
619 kfree(cluster
->cl_group
.default_groups
);
623 static struct configfs_item_operations o2nm_cluster_item_ops
= {
624 .release
= o2nm_cluster_release
,
627 static struct config_item_type o2nm_cluster_type
= {
628 .ct_item_ops
= &o2nm_cluster_item_ops
,
629 .ct_owner
= THIS_MODULE
,
634 struct o2nm_cluster_group
{
635 struct configfs_subsystem cs_subsys
;
640 static struct o2nm_cluster_group
*to_o2nm_cluster_group(struct config_group
*group
)
643 container_of(to_configfs_subsystem(group
), struct o2nm_cluster_group
, cs_subsys
)
648 static struct config_group
*o2nm_cluster_group_make_group(struct config_group
*group
,
651 struct o2nm_cluster
*cluster
= NULL
;
652 struct o2nm_node_group
*ns
= NULL
;
653 struct config_group
*o2hb_group
= NULL
, *ret
= NULL
;
656 /* this runs under the parent dir's i_mutex; there can be only
657 * one caller in here at a time */
658 if (o2nm_single_cluster
)
659 goto out
; /* ENOSPC */
661 cluster
= kcalloc(1, sizeof(struct o2nm_cluster
), GFP_KERNEL
);
662 ns
= kcalloc(1, sizeof(struct o2nm_node_group
), GFP_KERNEL
);
663 defs
= kcalloc(3, sizeof(struct config_group
*), GFP_KERNEL
);
664 o2hb_group
= o2hb_alloc_hb_set();
665 if (cluster
== NULL
|| ns
== NULL
|| o2hb_group
== NULL
|| defs
== NULL
)
668 config_group_init_type_name(&cluster
->cl_group
, name
,
670 config_group_init_type_name(&ns
->ns_group
, "node",
671 &o2nm_node_group_type
);
673 cluster
->cl_group
.default_groups
= defs
;
674 cluster
->cl_group
.default_groups
[0] = &ns
->ns_group
;
675 cluster
->cl_group
.default_groups
[1] = o2hb_group
;
676 cluster
->cl_group
.default_groups
[2] = NULL
;
677 rwlock_init(&cluster
->cl_nodes_lock
);
678 cluster
->cl_node_ip_tree
= RB_ROOT
;
680 ret
= &cluster
->cl_group
;
681 o2nm_single_cluster
= cluster
;
687 o2hb_free_hb_set(o2hb_group
);
694 static void o2nm_cluster_group_drop_item(struct config_group
*group
, struct config_item
*item
)
696 struct o2nm_cluster
*cluster
= to_o2nm_cluster(item
);
698 struct config_item
*killme
;
700 BUG_ON(o2nm_single_cluster
!= cluster
);
701 o2nm_single_cluster
= NULL
;
703 for (i
= 0; cluster
->cl_group
.default_groups
[i
]; i
++) {
704 killme
= &cluster
->cl_group
.default_groups
[i
]->cg_item
;
705 cluster
->cl_group
.default_groups
[i
] = NULL
;
706 config_item_put(killme
);
709 config_item_put(item
);
712 static struct configfs_group_operations o2nm_cluster_group_group_ops
= {
713 .make_group
= o2nm_cluster_group_make_group
,
714 .drop_item
= o2nm_cluster_group_drop_item
,
717 static struct config_item_type o2nm_cluster_group_type
= {
718 .ct_group_ops
= &o2nm_cluster_group_group_ops
,
719 .ct_owner
= THIS_MODULE
,
722 static struct o2nm_cluster_group o2nm_cluster_group
= {
726 .ci_namebuf
= "cluster",
727 .ci_type
= &o2nm_cluster_group_type
,
733 static void __exit
exit_o2nm(void)
735 if (ocfs2_table_header
)
736 unregister_sysctl_table(ocfs2_table_header
);
738 /* XXX sync with hb callbacks and shut down hb? */
739 o2net_unregister_hb_callbacks();
740 configfs_unregister_subsystem(&o2nm_cluster_group
.cs_subsys
);
746 static int __init
init_o2nm(void)
750 cluster_print_version();
755 ocfs2_table_header
= register_sysctl_table(ocfs2_root_table
, 0);
756 if (!ocfs2_table_header
) {
757 printk(KERN_ERR
"nodemanager: unable to register sysctl\n");
758 ret
= -ENOMEM
; /* or something. */
762 ret
= o2net_register_hb_callbacks();
766 config_group_init(&o2nm_cluster_group
.cs_subsys
.su_group
);
767 init_MUTEX(&o2nm_cluster_group
.cs_subsys
.su_sem
);
768 ret
= configfs_register_subsystem(&o2nm_cluster_group
.cs_subsys
);
770 printk(KERN_ERR
"nodemanager: Registration returned %d\n", ret
);
774 ret
= o2cb_sys_init();
778 configfs_unregister_subsystem(&o2nm_cluster_group
.cs_subsys
);
780 o2net_unregister_hb_callbacks();
782 unregister_sysctl_table(ocfs2_table_header
);
789 MODULE_AUTHOR("Oracle");
790 MODULE_LICENSE("GPL");
792 module_init(init_o2nm
)
793 module_exit(exit_o2nm
)