4 * Copyright (c) 2006 Ericsson AB.
5 * Author: Hans Feldt, Anders Eriksson, Lars Holm
6 * - Refactoring of code into several AMF files
7 * - Constructors/destructors
8 * - Serializers/deserializers
10 * All rights reserved.
13 * This software licensed under BSD license, the text of which follows:
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are met:
18 * - Redistributions of source code must retain the above copyright notice,
19 * this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 * - Neither the name of the MontaVista Software, Inc. nor the names of its
24 * contributors may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
37 * THE POSSIBILITY OF SUCH DAMAGE.
39 * AMF Cluster Class Implementation
41 * This file contains functions for handling the AMF cluster. It can be
42 * viewed as the implementation of the AMF Cluster class
43 * as described in SAI-Overview-B.02.01. The SA Forum specification
44 * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
45 * and is referred to as 'the spec' below.
47 * The functions in this file are responsible for:
48 * - to start the cluster initially
49 * - to handle the administrative operation support for the cluster (FUTURE)
51 * The cluster class contains the following state machines:
52 * - administrative state machine (ADSM)
53 * - availability control state machine (ACSM)
55 * The administrative state machine will be implemented in the future.
57 * ACSM handles initial start of the cluster. In the future it will also handle
58 * administrative commands on the cluster as described in paragraph 7.4 of the
59 * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
60 * number of states to control the transition between the stable states.
62 * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
63 * future this state will also be assumed after the LOCK_INSTANTIATION
64 * administrative command.)
66 * State STARTED is assumed when the cluster has been initially started and
67 * will in the future be re-assumed after the administrative command RESTART
70 * 1. Cluster Availability Control State Machine
71 * =============================================
73 * 1.1 State Transition Table
75 * State: Event: Action: New state:
76 * ===========================================================================
77 * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
78 * STARTING_APPS sync_ready A2,A1 STARTING_APPS
79 * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
80 * STARTING_APPS local_timer_expired A8 STARTING_APPS
81 * STARTING_APPS time_out A7,A8 WAITING_OVERTIME_1
82 * WAITING_OVERTIME_1 sync_ready A4 WAITING_OVERTIME_1
83 * WAITING_OVERTIME_1 time_out [C2] A7 ASSIGNING_WORKLOAD
84 * WAITING_OVERTIME_1 time_out A7 WAITING_OVERTIME_2
85 * WAITING_OVERTIME_1 app_started [C2] A3 ASSIGNING_WORKLOAD
86 * WAITING_OVERTIME_2 sync_ready A4 WAITING_OVERTIME_2
87 * WAITING_OVERTIME_2 app_started [C2] A3 ASSIGNING_WORKLOAD
88 * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
89 * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
90 * STARTED sync_ready A5 STARTED
92 * 1.2 State Description
93 * =====================
94 * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
95 * STARTING_APPLICATIONS - All applications have been requested to start
96 * their contained SGs, which in its turn has requested
97 * their contained SUs to instantiate all their
98 * components. The cluster startup timer is running.
99 * WAITING_OVERTIME_1 - The cluster startup timer has expired but all
100 * applications have yet not responded that they have been
101 * started. The time-out message is broadcasted again to
102 * make sure there are no other broadcast messages pending.
103 * (This assures first of all that there is no pending
104 * 'component instantiate' message.)
105 * WAITING_OVERTIME_2 - The cluster startup timer has expired but all
106 * applications have yet not responded that they have been
107 * started. Cluster will wait infinitely for the
108 * applications to respond. It is correct to do so even when
109 * the startup timer has expired, because the applications
110 * will report they are started as soon as there is no
111 * attempt to instantiate any of its components pending,
112 * because attempts to instantiate a component can not go on
113 * forever, see saAmfCompInstantiateTimeout,
114 * saAmfCompNumMaxInstantiateWithoutDelay and
115 * saAmfCompNumMaxInstantiateWithDelay.
116 * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
117 * specified workload to it's service units according to
118 * the redundancy model specified by it's SGs.
119 * STARTED - A best effort has been made to instatiate the components of all
120 * applications and assign the specified workload as close as possible
121 * to what is described in the configuration.
125 * A1 - [foreach application in cluster]/start application
126 * A2 - start cluster startup timer
127 * A3 - [foreach application in cluster]/assign workload to application
128 * A4 - defer sync_ready event
129 * A5 - forward sync_ready to appropriate node object
130 * A6 - recall deferred event
131 * A7 - stop node local instance of cluster startup timer
132 * A8 - multicast 'cluster startup timer time-out' event (time_out)
136 * C1 - Administrative state == UNLOCKED
137 * C2 - No SU has presence state == INSTANTIATING
138 * C3 - All SGs are fully instantiated
139 * C4 - No Application has Availability Control state == ASSIGNING_WORKLOAD
152 LOGSYS_DECLARE_SUBSYS ("AMF", LOG_INFO
);
154 typedef struct cluster_event
{
155 amf_cluster_event_type_t event_type
;
156 amf_cluster_t
*cluster
;
160 /******************************************************************************
161 * Internal (static) utility functions
162 *****************************************************************************/
164 static void cluster_defer_event (amf_cluster_event_type_t event_type
,
165 struct amf_cluster
*cluster
, struct amf_node
* node
)
167 cluster_event_t sync_ready_event
= {event_type
, cluster
, node
};
168 amf_fifo_put (event_type
, &cluster
->deferred_events
,
169 sizeof (cluster_event_t
),
173 static void cluster_recall_deferred_events (amf_cluster_t
*cluster
)
175 cluster_event_t cluster_event
;
177 if (amf_fifo_get (&cluster
->deferred_events
, &cluster_event
)) {
178 switch (cluster_event
.event_type
) {
179 case CLUSTER_SYNC_READY_EV
:
180 log_printf (LOG_NOTICE
,
181 "Recall CLUSTER_SYNC_READY_EV");
183 amf_node_sync_ready (cluster_event
.node
);
192 static void timer_function_cluster_recall_deferred_events (void *data
)
194 amf_cluster_t
*cluster
= (amf_cluster_t
*)data
;
197 cluster_recall_deferred_events (cluster
);
201 * Determine if all applications are started so that all
202 * SUs is in SA_AMF_PRESENCE_INSTANTIATED presense state
205 * @return 1; All applications are started
207 static int cluster_applications_started_instantiated (struct amf_cluster
*cluster
)
210 struct amf_application
*app
;
214 for (app
= cluster
->application_head
; app
!= NULL
; app
= app
->next
) {
215 for (sg
= app
->sg_head
; sg
!= NULL
; sg
= sg
->next
) {
216 for (su
= sg
->su_head
; su
!= NULL
; su
= su
->next
) {
217 if (su
->saAmfSUPresenceState
!= SA_AMF_PRESENCE_INSTANTIATED
) {
230 * Determine if any SGs are in the process of instantiating their SUs.
233 * @return 1; At least one SG is in the process of instantiating.
235 static int cluster_applications_are_starting_sgs(struct amf_cluster
*cluster
)
237 amf_application_t
*application
;
240 int is_starting_sgs
= 0;
242 for (application
= cluster
->application_head
; application
!= NULL
;
243 application
= application
->next
) {
244 for (sg
= application
->sg_head
; sg
!= NULL
; sg
= sg
->next
) {
245 for (su
= sg
->su_head
; su
!= NULL
; su
= su
->next
) {
247 if (su
->saAmfSUPresenceState
==
248 SA_AMF_PRESENCE_INSTANTIATING
) {
255 return is_starting_sgs
;
258 static void amf_cluster_assign_workload (struct amf_cluster
*cluster
)
260 struct amf_application
*app
;
263 for (app
= cluster
->application_head
; app
!= NULL
; app
= app
->next
) {
264 amf_application_assign_workload (app
, NULL
);
268 static void acsm_cluster_enter_assigning_workload (struct amf_cluster
*cluster
)
270 log_printf(LOG_NOTICE
,
271 "Cluster: all applications started, assigning workload.");
272 cluster
->acsm_state
= CLUSTER_AC_ASSIGNING_WORKLOAD
;
273 amf_cluster_assign_workload (cluster
);
276 static void timer_function_cluster_assign_workload_tmo (void *cluster
)
278 ((struct amf_cluster
*)cluster
)->timeout_handle
= 0;
282 amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO
, &this_amf_node
->name
,
286 static inline void stop_cluster_startup_timer (struct amf_cluster
*cluster
)
288 if (cluster
->timeout_handle
) {
289 dprintf ("Stop cluster startup timer");
290 poll_timer_delete (aisexec_poll_handle
,
291 cluster
->timeout_handle
);
292 cluster
->timeout_handle
= 0;
296 static void start_cluster_startup_timer (struct amf_cluster
*cluster
)
298 if (cluster
->timeout_handle
== 0) {
299 poll_timer_add (aisexec_poll_handle
,
300 cluster
->saAmfClusterStartupTimeout
,
302 timer_function_cluster_assign_workload_tmo
,
303 &cluster
->timeout_handle
);
307 static inline void cluster_enter_starting_applications (
308 struct amf_cluster
*cluster
)
311 start_cluster_startup_timer (cluster
);
312 amf_cluster
->acsm_state
= CLUSTER_AC_STARTING_APPLICATIONS
;
313 amf_cluster_start_applications (cluster
);
316 static void acsm_cluster_enter_started (amf_cluster_t
*cluster
)
319 amf_cluster
->acsm_state
= CLUSTER_AC_STARTED
;
320 amf_call_function_asynchronous (
321 timer_function_cluster_recall_deferred_events
, cluster
);
324 /******************************************************************************
326 *****************************************************************************/
328 void amf_cluster_start_tmo_event (int is_sync_masterm
,
329 struct amf_cluster
*cluster
, SaNameT
*sourceNodeName
)
331 ENTER ("acsm_state = %d", amf_cluster
->acsm_state
);
333 stop_cluster_startup_timer (cluster
);
335 switch (cluster
->acsm_state
) {
336 case CLUSTER_AC_WAITING_OVER_TIME_1
:
337 if (cluster_applications_are_starting_sgs (cluster
)) {
338 dprintf ("Cluster startup timeout,"
339 "start waiting over time");
340 amf_cluster
->acsm_state
=
341 CLUSTER_AC_WAITING_OVER_TIME_2
;
343 dprintf ("Cluster startup timeout,"
344 " assigning workload");
345 acsm_cluster_enter_assigning_workload (cluster
);
348 case CLUSTER_AC_STARTING_APPLICATIONS
:
349 cluster
->acsm_state
= CLUSTER_AC_WAITING_OVER_TIME_1
;
350 if (name_match (&this_amf_node
->name
, sourceNodeName
)) {
351 timer_function_cluster_assign_workload_tmo (cluster
);
355 case CLUSTER_AC_ASSIGNING_WORKLOAD
:
356 /* ignore cluster startup timer expiration */
357 case CLUSTER_AC_STARTED
:
358 /* ignore cluster startup timer expiration */
359 case CLUSTER_AC_WAITING_OVER_TIME_2
:
360 /* ignore cluster startup timer expiration */
363 log_printf(LOG_LEVEL_ERROR
, "Cluster timout expired"
365 " state = %d", cluster
->acsm_state
);
373 * Start all applications in the cluster and start
374 * the cluster startup timeout.
378 void amf_cluster_start_applications(struct amf_cluster
*cluster
)
380 struct amf_application
*app
;
381 for (app
= cluster
->application_head
; app
!= NULL
; app
= app
->next
) {
382 amf_application_start (app
, NULL
);
387 * A new node has joined the cluster and is now synchronized with the nodes that
388 * was part of the cluster before.
392 void amf_cluster_sync_ready (struct amf_cluster
*cluster
, struct amf_node
*node
)
395 switch (amf_cluster
->acsm_state
) {
396 case CLUSTER_AC_UNINSTANTIATED
:
397 if (amf_cluster
->saAmfClusterAdminState
==
398 SA_AMF_ADMIN_UNLOCKED
) {
399 cluster_enter_starting_applications (cluster
);
402 case CLUSTER_AC_STARTING_APPLICATIONS
:
403 cluster_enter_starting_applications(cluster
);
405 case CLUSTER_AC_ASSIGNING_WORKLOAD
:
407 * Defer assigning workload to those syncronized nodes to
408 * CLUSTER_AC_STARTED state.
410 cluster_defer_event (CLUSTER_SYNC_READY_EV
, cluster
,
413 case CLUSTER_AC_WAITING_OVER_TIME_2
:
415 * Defer assigning workload to those syncronized nodes to
416 * CLUSTER_AC_STARTED state.
418 cluster_defer_event (CLUSTER_SYNC_READY_EV
, cluster
,
421 case CLUSTER_AC_STARTED
:
422 TRACE1 ("Node sync ready sent from cluster in "
423 "CLUSTER_AC_STARTED state");
424 amf_node_sync_ready (node
);
428 log_printf(LOG_LEVEL_ERROR
, "Cluster sync ready event"
429 " received in wrong cluster"
430 " state = %d", cluster
->acsm_state
);
436 /******************************************************************************
437 * Event response methods
438 *****************************************************************************/
441 * An application indicates it has been started or the application indicates it
442 * was not even possible to try to start because the required nodes were not
447 void amf_cluster_application_started (
448 struct amf_cluster
*cluster
, struct amf_application
*application
)
450 ENTER ("application '%s' started %d", application
->name
.value
,
451 cluster
->acsm_state
);
452 switch (cluster
->acsm_state
) {
453 case CLUSTER_AC_STARTING_APPLICATIONS
:
454 if (cluster_applications_started_instantiated (cluster
)) {
455 stop_cluster_startup_timer (cluster
);
456 acsm_cluster_enter_assigning_workload (cluster
);
459 case CLUSTER_AC_WAITING_OVER_TIME_1
:
460 case CLUSTER_AC_WAITING_OVER_TIME_2
:
461 if (amf_cluster_applications_started_with_no_starting_sgs (cluster
)) {
462 acsm_cluster_enter_assigning_workload (cluster
);
466 log_printf (LOG_ERR
,"Error invalid cluster availability state %d",
467 cluster
->acsm_state
);
468 openais_exit_error(cluster
->acsm_state
);
475 * An application indicates it has assigned workload to all its contained SUs.
478 void amf_cluster_application_workload_assigned (
479 struct amf_cluster
*cluster
, struct amf_application
*app
)
482 switch (cluster
->acsm_state
) {
483 case CLUSTER_AC_ASSIGNING_WORKLOAD
:
484 log_printf (LOG_NOTICE
, "Cluster: application %s assigned.",
486 if (amf_cluster_applications_assigned (cluster
)) {
487 acsm_cluster_enter_started (cluster
);
496 /******************************************************************************
498 *****************************************************************************/
500 struct amf_cluster
*amf_cluster_new (void)
502 struct amf_cluster
*cluster
= amf_calloc (1,
503 sizeof (struct amf_cluster
));
505 cluster
->saAmfClusterStartupTimeout
= -1;
506 cluster
->saAmfClusterAdminState
= SA_AMF_ADMIN_UNLOCKED
;
507 cluster
->deferred_events
= 0;
508 cluster
->acsm_state
= CLUSTER_AC_UNINSTANTIATED
;
512 void *amf_cluster_serialize (struct amf_cluster
*cluster
, int *len
)
515 int offset
= 0, size
= 0;
517 TRACE8 ("%s", cluster
->name
.value
);
519 buf
= amf_serialize_SaNameT (buf
, &size
, &offset
, &cluster
->name
);
520 buf
= amf_serialize_SaUint32T (buf
, &size
, &offset
,
521 cluster
->saAmfClusterStartupTimeout
);
522 buf
= amf_serialize_SaNameT (buf
, &size
, &offset
,
523 &cluster
->saAmfClusterClmCluster
);
524 buf
= amf_serialize_SaUint32T (buf
, &size
, &offset
,
525 cluster
->saAmfClusterAdminState
);
526 buf
= amf_serialize_SaUint32T (buf
, &size
, &offset
, cluster
->acsm_state
);
533 struct amf_cluster
*amf_cluster_deserialize (char *buf
)
536 struct amf_cluster
*cluster
= amf_cluster_new ();
538 tmp
= amf_deserialize_SaNameT (tmp
, &cluster
->name
);
539 tmp
= amf_deserialize_SaUint32T (tmp
, &cluster
->saAmfClusterStartupTimeout
);
540 tmp
= amf_deserialize_SaNameT (tmp
, &cluster
->saAmfClusterClmCluster
);
541 tmp
= amf_deserialize_SaUint32T (tmp
, &cluster
->saAmfClusterAdminState
);
542 tmp
= amf_deserialize_SaUint32T (tmp
, &cluster
->acsm_state
);
548 * Determine if any SGs are in the process of instantiating their SUs.
551 * @return 1; At least one SG is in the process of instantiating.
553 int amf_cluster_applications_started_with_no_starting_sgs (
554 struct amf_cluster
*cluster
)
556 return !cluster_applications_are_starting_sgs (cluster
);
560 * Determine if all Applications have been assigned workload.
563 * @return 1; All Applications have been assigned workload.
565 int amf_cluster_applications_assigned (struct amf_cluster
*cluster
)
567 struct amf_application
*app
= 0;
568 int is_all_application_assigned
= 1;
570 for (app
= cluster
->application_head
; app
!= NULL
; app
= app
->next
) {
571 if (app
->acsm_state
!= APP_AC_WORKLOAD_ASSIGNED
) {
572 is_all_application_assigned
= 0;
576 return is_all_application_assigned
;