Expose confdb write to the library.
[openais.git] / exec / amfnode.c
blobe210aad6d371a3ef8a5962da98766601fa1067bd
1 /** @file amfnode.c
2 *
3 * Copyright (c) 2006 Ericsson AB.
4 * Author: Hans Feldt, Anders Eriksson, Lars Holm
5 * - Constructors/destructors
6 * - Serializers/deserializers
8 * All rights reserved.
11 * This software licensed under BSD license, the text of which follows:
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
16 * - Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 * - Redistributions in binary form must reproduce the above copyright notice,
19 * this list of conditions and the following disclaimer in the documentation
20 * and/or other materials provided with the distribution.
21 * - Neither the name of the MontaVista Software, Inc. nor the names of its
22 * contributors may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
35 * THE POSSIBILITY OF SUCH DAMAGE.
37 * AMF Node Class Implementation
39 * This file contains functions for handling AMF nodes. It can be
40 * viewed as the implementation of the AMF Node class (called NODE)
41 * as described in SAI-Overview-B.02.01. The SA Forum specification
42 * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
43 * and is referred to as 'the spec' below.
45 * The functions in this file are responsible for:
46 * - controlling the instantiation of the SUs hosted on current node and
47 * controlling the assigning of workload to them when a node joins the
48 * cluster (cluster start is controlled by the Cluster Class)
49 * - controlling node level recovery and repair functions
50 * - implementing error escallation level 2 and 3 (paragraph 3.12.2.2 and
51 * 3.12.2.3 in the spec)
52 * - handling run time attributes of the AMF NODE; cached
53 * attributes are stored as variables and sent to the IMM service (future)
54 * upon the changes described in the specification
56 * The node class contains the following state machines:
57 * - administrative state machine (ADSM)
58 * - operational state machine (OPSM)
59 * - availability control state machine (ACSM)
61 * The administrative state machine will be implemented in the future.
63 * The operational state machine is primarily used to report status of the
64 * node.
66 * The availability control state machine is used for control purposes.
67 * ACSM contains three states of which two are composite.
68 * Being a composite state means that the state contains substates.
69 * ACSM states are:
70 * - REPAIR_NEEDED
71 * - IDLE (ESCALATION_LEVEL_0, ESCALATION_LEVEL_2 and ESCALATION_LEVEL_3)
72 * - MANAGING_HOSTED_SERVICE_UNITS (
73 * . FAILING_FAST (REBOOTING_NODE and ACTIVATING_STANDBY_NODE)
74 * . FAILING_GRACEFULLY (SWITCHING_OVER, FAILING_OVER and REBOOTING_NODE)
75 * . LEAVING_SPONTANEOUSLY (SWITCHING_OVER, FAILING_OVER and
76 * WAITING_FOR_NODE_TO_JOIN)
77 * . JOINING (STARTING_APPLICATIONS and ASSIGNING_WORKLOAD)
79 * REPAIR_NEEDED indicates the node needs a manual repair and this state will be
80 * maintained until the administrative command REPAIRED is entered (implemented
81 * in the future)
83 * IDLE is a composite state where no actions are actually performed
84 * and used only to remember the escallation level. Substate LEVEL_0 indicates
85 * no escallation. LEVEL_2 indicates that so many component restarts have been
86 * executed recently that a new component restart request will escalate
87 * to service unit restart action. Node will request a service unit restart
88 * from SU.
89 * LEVEL_3 will be entered if either there are too many service unit restarts
90 * been made or a component failover recovery action is requested. On level 3
91 * the recovery action performed is service unit failover (paragraph 3.12.1.3).
93 * FAILING_FAST state executes a node re-boot and waits for the node to join
94 * the cluster again. (not implemented)
96 * FAILING_GRACEFULLY state requests all SGs which have SUs hosted on current
97 * node to switch or failover according to the procedures described in
98 * paragraphs 3.12.1.3 before re-boot is executed. Then the confirmation is
99 * awaited from all concerned SGs and finally a node re-boot is executed as
100 * the repair action (see paragraph 2.12.1.4).
102 * LEAVING_SPONTANEOUSLY state handles the spontaneous leave of a node.
104 * JOINING state handles the start of a node in all cases except cluster start,
105 * which is handled by the CLUSTER class.
107 * 1. Node Availability Control State Machine
108 * ==========================================
110 * 1.1 State Transition Table
112 * State: Event: Action: New state:
113 * ============================================================================
114 * ESCALATION_LEVEL_X node_sync_ready A6 JOINING_STARTING_APPLS
115 * ESCALATION_LEVEL_X node_leave A9,A8 LEAVING_SP_FAILING_OVER
116 * ESCALATION_LEVEL_X failover A11 GRACEFULLY_FAILING_OVER
117 * ESCALATION_LEVEL_2 comp_restart_req [!C6]A13 ESCALATION_LEVEL_2
118 * ESCALATION_LEVEL_2 comp_restart_req [C6]A14 ESCALATION_LEVEL_3
119 * ESCALATION_LEVEL_3 comp_restart_req [!C7]A14 ESCALATION_LEVEL_3
120 * ESCALATION_LEVEL_3 comp_failover_req [!C7]A14 ESCALATION_LEVEL_3
121 * ESCALATION_LEVEL_3 comp_restart_req [C7]A15 ESCALATION_LEVEL_3
122 * ESCALATION_LEVEL_3 comp_failover_req [C7]A15 ESCALATION_LEVEL_3
123 * JOINING_STARTING_APPLS appl_started [C4] A7 JOINING_ASSIGNING_WL
124 * JOINING_ASSIGNING_WL appl_assigned [C5] ESCALATION_LEVEL_X
125 * LEAVING_SP_FAILING_OVER sg_failed_over [C1] LEAVING_SP_WAIT_FOR_JOIN
126 * LEAVING_SP_WAIT_FOR_JOIN node_sync_ready A6 JOINING_STARTING_APPLS
127 * GRACEFULLY_FAILING_OVER sg_failed_over [C1] A12 GRACEFULLY_REBOOTING
128 * GRACEFULLY_REBOOTING node_leave ESCALATION_LEVEL_X
130 * 1.2 State Description
131 * =====================
132 * ESCALATION_LEVEL_X - Node is synchronized and idle (X = 0,2 or 3).
133 * JOINING_STARTING_APPLS - JOINING_STARTING_APPLICATIONS
134 * Node has ordered all applications to start its SUs
135 * hosted on current node and is now waiting for them
136 * to acknowledge that they have started.
137 * GRACEFULLY_FAILING_OVER - FAILING_GRACEFULLY_FAILING_OVER
138 * Node has ordered all SGs in the cluster to
139 * failover all SUs that are hosted on a specific
140 * node and waits for the SGs to confirm the
141 * failover is completed.
142 * GRACEFULLY_REBOOTING - FAILING_GRACEFULLY_REBOOTING_NODE
143 * Node has ordered reboot and waits for the rebooted
144 * node to join the cluster again.
145 * JOINING_ASSIGNING_WL - JOINING_ASSIGNING_WORKLOAD
146 * Node has ordered all applications to assign workload
147 * to all its SUs which currently have no workload and
148 * is now waiting for the applications to acknowledge.
150 * LEAVING_SP_FAILING_OVER - LEAVING_SPONTANEOUSLY_FAILING_OVER
151 * Node has received an event telling that this node
152 * has left the cluster and has ordered all service
153 * groups to failover those of its SUs that were
154 * hosted on current node.
156 * LEAVING_SP_WAIT_FOR_JOIN - LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN
157 * Node is waiting for current node to join again.
159 * 1.3 Actions
160 * ===========
161 * A1 -
162 * A2 -
163 * A3 -
164 * A4 -
165 * A5 -
166 * A6 - [foreach application in cluster]start application
167 * A7 - [foreach application in cluster]assign workload to application
168 * A8 - [foreach application in cluster]
169 * [foreach SG in application ]failover node
170 * A9 - [foreach application in cluster]
171 * [foreach SG in application ]
172 * [foreach SU in SG where the SU is hosted on current node]
173 * [foreach comp in such an SU]indicate that the node has left the cluster
174 * A10-
175 * A11- [foreach SG in cluster]failover node
176 * A12- reboot node
177 * A13- restart SU
178 * A14- failover SU
179 * A15- failover node
181 * 1.4 Guards
182 * ==========
183 * C1 - All SG availability control state machines (ACSM) == IDLE
184 * C2 -
185 * C3 -
186 * C4 - No applications are in ACSM state == STARTING_SGS
187 * C5 - All applications have ACSM state == WORKLOAD_ASSIGNED
188 * C6 - Specified number of SU restarts have been done.
189 * C7 - Specified number of SU failover actions have been done.
192 #include <stdlib.h>
193 #include <assert.h>
194 #include <unistd.h>
195 #include "amf.h"
196 #include "util.h"
197 #include "logsys.h"
198 #include "main.h"
200 LOGSYS_DECLARE_SUBSYS ("AMF", LOG_INFO)
202 /******************************************************************************
203 * Internal (static) utility functions
204 *****************************************************************************/
206 static void node_acsm_enter_leaving_spontaneously(struct amf_node *node)
208 ENTER("'%s'", node->name.value);
209 node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
210 node->nodeid = 0;
213 static void node_acsm_enter_failing_over (struct amf_node *node)
215 struct amf_application *app;
216 struct amf_sg *sg;
217 struct amf_su *su;
218 struct amf_comp *component = NULL;
220 ENTER("'%s'", node->name.value);
221 node->acsm_state = NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER;
223 * Indicate to each component object in the model that current
224 * node has left the cluster
226 for (app = amf_cluster->application_head; app != NULL; app = app->next) {
227 for (sg = app->sg_head; sg != NULL; sg = sg->next) {
228 for (su = sg->su_head; su != NULL; su = su->next) {
229 if (name_match(&node->name, &su->saAmfSUHostedByNode)) {
230 for (component = su->comp_head; component != NULL;
231 component = component->next) {
232 amf_comp_node_left(component);
240 * Let all service groups with service units hosted on current node failover
241 * its workload
243 for (app = amf_cluster->application_head; app != NULL; app =
244 app->next) {
245 for (sg = app->sg_head; sg != NULL; sg =
246 sg->next) {
247 amf_sg_failover_node_req(sg, node);
252 static void failover_all_sg_on_node (amf_node_t *node)
254 amf_application_t *app;
255 amf_sg_t *sg;
256 amf_su_t *su;
257 for (app = amf_cluster->application_head; app != NULL; app = app->next) {
258 for (sg = app->sg_head; sg != NULL; sg = sg->next) {
259 for (su = sg->su_head; su != NULL; su = su->next) {
260 if (name_match(&su->saAmfSUHostedByNode, &node->name)) {
261 amf_sg_failover_node_req (sg, node);
262 break;
270 static void node_acsm_enter_failing_gracefully_failing_over (amf_node_t *node)
272 ENTER("");
273 node->acsm_state = NODE_ACSM_FAILING_GRACEFULLY_FAILING_OVER;
274 failover_all_sg_on_node (node);
277 static int has_all_sg_on_node_failed_over (amf_node_t *node)
279 amf_application_t *app;
280 amf_sg_t *sg;
281 amf_su_t *su;
282 int has_all_sg_on_node_failed_over = 1;
284 for (app = amf_cluster->application_head; app != NULL; app = app->next) {
285 for (sg = app->sg_head; sg != NULL; sg = sg->next) {
286 for (su = sg->su_head; su != NULL; su = su->next) {
287 if (name_match(&su->saAmfSUHostedByNode, &node->name)) {
289 if (sg->avail_state != SG_AC_Idle) {
290 TRACE1("%s %s",sg->name.value, su->name.value);
291 has_all_sg_on_node_failed_over = 0;
292 goto out;
294 break;
300 out:
301 return has_all_sg_on_node_failed_over;
304 static void repair_node (amf_node_t *node)
306 ENTER("");
307 char hostname[256];
308 gethostname (hostname, 256);
309 if (!strcmp (hostname, (const char*)node->saAmfNodeClmNode.value)) {
310 /* TODO if(saAmfAutoRepair == SA_TRUE) */
311 #ifdef DEBUG
312 exit (0);
313 #else
314 system ("reboot");
315 #endif
319 static void enter_failing_gracefully_rebooting_node (amf_node_t *node)
321 ENTER("");
322 node->acsm_state = NODE_ACSM_FAILING_GRACEFULLY_REBOOTING_NODE;
323 repair_node (node);
326 static void node_acsm_enter_idle (amf_node_t *node)
328 ENTER ("history_state=%d",node->history_state);
329 node->acsm_state = node->history_state;
332 static void node_acsm_enter_joining_assigning_workload (struct amf_node *node,
333 struct amf_application *app)
335 log_printf(LOG_NOTICE,
336 "Node=%s: all applications started, assigning workload.",
337 node->name.value);
339 ENTER("");
340 node->acsm_state = NODE_ACSM_JOINING_ASSIGNING_WORKLOAD;
341 for (app = app->cluster->application_head; app != NULL;
342 app = app->next) {
343 amf_application_assign_workload (app, node);
347 /******************************************************************************
348 * Event methods
349 *****************************************************************************/
352 * This event indicates that a node has unexpectedly left the cluster. Node
353 * leave event is obtained from amf_confchg_fn.
355 * @param node
357 void amf_node_leave (struct amf_node *node)
359 assert (node != NULL);
360 ENTER("'%s', CLM node '%s'", node->name.value,
361 node->saAmfNodeClmNode.value);
364 switch (node->acsm_state) {
365 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
366 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
367 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
368 node_acsm_enter_leaving_spontaneously(node);
369 node_acsm_enter_failing_over (node);
370 break;
371 case NODE_ACSM_REPAIR_NEEDED:
372 break;
373 case NODE_ACSM_FAILING_GRACEFULLY_REBOOTING_NODE:
374 node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
375 node_acsm_enter_idle (node);
376 break;
377 default:
378 log_printf (LOG_LEVEL_ERROR, "amf_node_leave called in state = %d"
379 " (should have been deferred)", node->acsm_state);
380 openais_exit_error (AIS_DONE_FATAL_ERR);
381 break;
387 * This function handles a detected error that by a pre-analysis executed
388 * elsewhere has been decided to be recovered by a node fail over.
389 * @param node
391 void amf_node_failover (struct amf_node *node)
393 assert (node != NULL);
394 ENTER("'%s', CLM node '%s'", node->name.value,
395 node->saAmfNodeClmNode.value);
397 switch (node->acsm_state) {
398 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
399 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
400 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
401 node_acsm_enter_failing_gracefully_failing_over (node);
402 break;
403 case NODE_ACSM_REPAIR_NEEDED:
404 break;
405 default:
406 log_printf (LOG_LEVEL_ERROR, "amf_node_leave()called in state = %d"
407 " (should have been deferred)", node->acsm_state);
408 openais_exit_error (AIS_DONE_FATAL_ERR);
409 break;
415 * @param node
417 void amf_node_switchover (struct amf_node *node)
424 * @param node
426 void amf_node_failfast (struct amf_node *node)
432 * This event is a request to restart a component which has been escalated,
433 * because the component has already been restarted the number of times
434 * specified by the configuration.
435 * This function evaluates which recovery measure shall now be
436 * taken and initiates the action which result from the evaluation.
437 * @param node
438 * @param comp
440 void amf_node_comp_restart_req (struct amf_node *node, struct amf_comp *comp)
442 amf_su_t *su = comp->su;
443 ENTER("");
444 switch (node->acsm_state) {
445 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
446 node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_2;
447 amf_node_comp_restart_req (node, comp);
448 break;
449 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
450 if (su->saAmfSURestartCount >= su->sg->saAmfSGSuRestartMax) {
451 SaNameT dn;
452 node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_3;
453 amf_comp_operational_state_set (comp, SA_AMF_OPERATIONAL_DISABLED);
454 amf_su_operational_state_set (su, SA_AMF_OPERATIONAL_DISABLED);
455 amf_comp_dn_make (comp, &dn);
457 log_printf (LOG_NOTICE, "Error detected for '%s', recovery "
458 "action:\n\t\tSU failover", dn.value);
460 amf_sg_failover_su_req (su->sg, su, node);
461 } else {
462 amf_su_restart (su);
464 break;
465 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
466 if (su->su_failover_cnt < node->saAmfNodeSuFailoverMax) {
467 SaNameT dn;
468 amf_comp_operational_state_set (comp, SA_AMF_OPERATIONAL_DISABLED);
469 amf_su_operational_state_set (su, SA_AMF_OPERATIONAL_DISABLED);
470 amf_comp_dn_make (comp, &dn);
472 log_printf (LOG_NOTICE, "Error detected for '%s', recovery "
473 "action:\n\t\tSU failover", dn.value);
475 amf_sg_failover_su_req (su->sg, su, node);
476 return;
477 } else {
478 node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
479 amf_node_failover (node);
481 break;
482 default:
483 dprintf("%d",node->acsm_state);
484 assert (0);
485 break;
490 * This event is a request to failover the specified component.
491 * This function evaluates which recovery measure shall actually be
492 * taken considering the escalation policy and initiates the action
493 * which result from the evaluation.
494 * @param node
495 * @param comp
497 void amf_node_comp_failover_req (amf_node_t *node, amf_comp_t *comp)
499 ENTER("");
500 switch (node->acsm_state) {
501 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
502 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
503 if (comp->su->saAmfSUFailover) {
504 /* SU failover */
505 amf_sg_failover_su_req (comp->su->sg,comp->su, node);
507 break;
508 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
509 if (comp->su->su_failover_cnt < node->saAmfNodeSuFailoverMax) {
510 if (comp->su->saAmfSUFailover) {
511 /* SU failover */
512 amf_sg_failover_su_req (comp->su->sg,comp->su, node);
515 } else {
516 node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
517 amf_node_failover (node);
519 break;
520 default:
521 dprintf("%d",node->acsm_state);
522 assert (0);
523 break;
528 * This event indicates that current node has joined and its cluster model has
529 * been synchronized with the other nodes cluster models.
531 * @param node
533 void amf_node_sync_ready (struct amf_node *node)
535 struct amf_application *app;
537 assert (node != NULL);
539 log_printf(LOG_NOTICE, "Node=%s: sync ready, starting hosted SUs.",
540 node->name.value);
541 node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
543 switch (node->acsm_state) {
544 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
545 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
546 case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
547 case NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN:
548 node->acsm_state = NODE_ACSM_JOINING_STARTING_APPLICATIONS;
549 for (app = amf_cluster->application_head; app != NULL; app = app->next) {
550 amf_application_start (app, node);
552 break;
553 case NODE_ACSM_REPAIR_NEEDED:
554 break;
555 default:
556 log_printf (LOG_LEVEL_ERROR, "amf_node_sync_ready() was called in "
557 "state = %d (should have been deferred)",
558 node->acsm_state);
559 openais_exit_error (AIS_DONE_FATAL_ERR);
560 break;
565 /******************************************************************************
566 * Event response methods
567 *****************************************************************************/
570 * This event indicates that an application has started. Started in this context
571 * means that none of its contained service units is in an -ING state with other
572 * words successfully instantiated, instantiation has failed or instantiation
573 * was not possible (due to the node on which the SU was to be hosted is not
574 * operational).
576 * @param node
577 * @param application which has been started
579 void amf_node_application_started (struct amf_node *node,
580 struct amf_application *app)
582 assert (node != NULL && app != NULL );
583 ENTER ("Node=%s: application '%s' started", node->name.value,
584 app->name.value);
586 switch (node->acsm_state) {
587 case NODE_ACSM_JOINING_STARTING_APPLICATIONS:
588 if (amf_cluster_applications_started_with_no_starting_sgs(
589 app->cluster)) {
591 node_acsm_enter_joining_assigning_workload(node, app);
593 break;
594 default:
595 log_printf (LOG_LEVEL_ERROR, "amf_node_application_started()"
596 "called in state = %d (unexpected !!)", node->acsm_state);
597 openais_exit_error (AIS_DONE_FATAL_ERR);
598 break;
604 * This event indicates that an application has been assigned workload.
606 * @param node
607 * @param app - Application which has been assigned workload
609 void amf_node_application_workload_assigned (struct amf_node *node,
610 struct amf_application *app)
612 assert (node != NULL && app != NULL );
613 ENTER ("Node=%s: application '%s' started", node->name.value,
614 app->name.value);
616 switch (node->acsm_state) {
617 case NODE_ACSM_JOINING_ASSIGNING_WORKLOAD:
619 if (amf_cluster_applications_assigned (amf_cluster)) {
620 log_printf(LOG_NOTICE, "Node=%s: all workload assigned",
621 node->name.value);
622 node_acsm_enter_idle (node);
624 break;
625 default:
626 log_printf (LOG_LEVEL_ERROR, "amf_node_application_workload_assigned()"
627 "called in state = %d (unexpected !!)", node->acsm_state);
628 openais_exit_error (AIS_DONE_FATAL_ERR);
629 break;
634 * This event indicates that an SG has failed over its workload after a node
635 * failure.
637 * @param node
638 * @param sg_in SG which is now ready with its failover
640 void amf_node_sg_failed_over (struct amf_node *node, struct amf_sg *sg_in)
642 assert (node != NULL);
643 ENTER ("Node=%s: SG '%s' started %d", node->name.value,
644 sg_in->name.value,node->acsm_state);
646 switch (node->acsm_state) {
647 case NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER:
648 if (has_all_sg_on_node_failed_over (node)) { /*C2*/
649 node->acsm_state =
650 NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN;
652 break;
653 case NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN:
654 /* Accept reports of failed over sg that has completed. */
655 break;
656 case NODE_ACSM_FAILING_GRACEFULLY_FAILING_OVER:
657 if (has_all_sg_on_node_failed_over (node)) { /*C2*/
658 enter_failing_gracefully_rebooting_node (node);
660 break;
661 default:
662 log_printf (LOG_LEVEL_ERROR, "amf_node_sg_failed_over()"
663 "called in state = %d (unexpected !!)", node->acsm_state);
664 openais_exit_error (AIS_DONE_FATAL_ERR);
665 break;
669 /******************************************************************************
670 * General methods
671 *****************************************************************************/
674 * Node constructor
675 * @param cluster
676 * @param name - RDN of node
678 struct amf_node *amf_node_new (struct amf_cluster *cluster, char *name) {
679 struct amf_node *node = amf_calloc (1, sizeof (struct amf_node));
681 setSaNameT (&node->name, name);
682 node->saAmfNodeAdminState = SA_AMF_ADMIN_UNLOCKED;
683 node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
684 node->saAmfNodeAutoRepair = SA_TRUE;
685 node->saAmfNodeSuFailOverProb = -1;
686 node->saAmfNodeSuFailoverMax = ~0;
687 node->cluster = cluster;
688 node->next = cluster->node_head;
689 cluster->node_head = node;
690 node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
691 node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
692 return node;
695 void *amf_node_serialize (struct amf_node *node, int *len)
697 char *buf = NULL;
698 int offset = 0, size = 0;
700 TRACE8 ("%s", node->name.value);
702 buf = amf_serialize_SaNameT (buf, &size, &offset, &node->name);
703 buf = amf_serialize_SaNameT (buf, &size, &offset, &node->saAmfNodeClmNode);
704 buf = amf_serialize_SaUint32T (buf, &size, &offset,
705 node->saAmfNodeSuFailOverProb);
706 buf = amf_serialize_SaUint32T (buf, &size, &offset,
707 node->saAmfNodeSuFailoverMax);
708 buf = amf_serialize_SaUint32T (buf, &size, &offset,
709 node->saAmfNodeAutoRepair);
710 buf = amf_serialize_SaUint32T (buf, &size, &offset,
711 node->saAmfNodeRebootOnInstantiationFailure);
712 buf = amf_serialize_SaUint32T (buf, &size, &offset,
713 node->saAmfNodeRebootOnTerminationFailure);
714 buf = amf_serialize_SaUint32T (buf, &size, &offset,
715 node->saAmfNodeAdminState);
716 buf = amf_serialize_SaUint32T (buf, &size, &offset,
717 node->saAmfNodeOperState);
718 buf = amf_serialize_SaUint32T (buf, &size, &offset,
719 node->nodeid);
720 buf = amf_serialize_SaUint32T (buf, &size, &offset,
721 node->acsm_state);
722 buf = amf_serialize_SaUint32T (buf, &size, &offset,
723 node->history_state);
725 *len = offset;
727 return buf;
730 struct amf_node *amf_node_deserialize (struct amf_cluster *cluster, char *buf) {
731 char *tmp = buf;
732 struct amf_node *node = amf_node_new (cluster, "");
734 tmp = amf_deserialize_SaNameT (tmp, &node->name);
735 tmp = amf_deserialize_SaNameT (tmp, &node->saAmfNodeClmNode);
736 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeSuFailOverProb);
737 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeSuFailoverMax);
738 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeAutoRepair);
739 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeRebootOnInstantiationFailure);
740 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeRebootOnTerminationFailure);
741 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeAdminState);
742 tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeOperState);
743 tmp = amf_deserialize_SaUint32T (tmp, &node->nodeid);
744 tmp = amf_deserialize_SaUint32T (tmp, &node->acsm_state);
745 tmp = amf_deserialize_SaUint32T (tmp, &node->history_state);
747 return node;
750 struct amf_node *amf_node_find (SaNameT *name) {
751 struct amf_node *node;
753 assert (name != NULL && amf_cluster != NULL);
755 for (node = amf_cluster->node_head; node != NULL; node = node->next) {
756 if (name_match (&node->name, name)) {
757 return node;
761 dprintf ("node %s not found in configuration!", name->value);
763 return NULL;
766 struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid) {
767 struct amf_node *node;
769 assert (amf_cluster != NULL);
771 for (node = amf_cluster->node_head; node != NULL; node = node->next) {
772 if (node->nodeid == nodeid) {
773 return node;
777 dprintf ("node %u not found in configuration!", nodeid);
779 return NULL;
782 struct amf_node *amf_node_find_by_hostname (const char *hostname) {
783 struct amf_node *node;
785 assert (hostname != NULL && amf_cluster != NULL);
787 for (node = amf_cluster->node_head; node != NULL; node = node->next) {
788 if (strcmp ((char*)node->saAmfNodeClmNode.value, hostname) == 0) {
789 return node;
793 dprintf ("node %s not found in configuration!", hostname);
795 return NULL;