2 //=============================================================================
4 * @file FT_ReplicationManagerFaultAnalyzer.cpp
6 * This file is part of TAO's implementation of Fault Tolerant CORBA.
8 * @author Steve Totten <totten_s@ociweb.com>
10 //=============================================================================
12 #include "orbsvcs/Log_Macros.h"
13 #include "FT_ReplicationManagerFaultAnalyzer.h"
14 #include "orbsvcs/CosNotifyCommC.h"
15 #include "orbsvcs/FT_NotifierC.h"
16 #include "orbsvcs/FT_ReplicationManager/FT_ReplicationManager.h"
17 #include "orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h"
18 #include "orbsvcs/PortableGroup/PG_Property_Utils.h"
19 #include "orbsvcs/PortableGroup/PG_Operators.h"
20 #include "orbsvcs/FaultTolerance/FT_IOGR_Property.h"
21 #include "tao/debug.h"
24 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
27 TAO::FT_ReplicationManagerFaultAnalyzer::FT_ReplicationManagerFaultAnalyzer (
28 const TAO::FT_ReplicationManager
* replication_manager
)
29 : replication_manager_ (
30 const_cast<TAO::FT_ReplicationManager
*> (replication_manager
))
35 TAO::FT_ReplicationManagerFaultAnalyzer::~FT_ReplicationManagerFaultAnalyzer ()
39 // Validate the event to make sure it is one we can handle.
40 // If it is not an event we can handle, this function logs the error
42 int TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type (
43 const CosNotification::StructuredEvent
& event
)
45 // Delegate to base class.
46 //@@ Visual C++ 6.0 won't compile this if I include the namespace name
48 // return TAO::FT_DefaultFaultAnalyzer::validate_event_type (event);
49 return FT_DefaultFaultAnalyzer::validate_event_type (event
);
52 /// Analyze a fault event.
53 int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event (
54 const CosNotification::StructuredEvent
& event
)
58 const CosNotification::FilterableEventBody
& filterable
=
59 event
.filterable_data
;
60 CORBA::ULong item_count
= filterable
.length ();
61 if (TAO_debug_level
> 6)
63 for (CORBA::ULong n_prop
= 0; n_prop
< item_count
; ++n_prop
)
65 ORBSVCS_DEBUG ((LM_DEBUG
,
66 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: ")
67 ACE_TEXT("Property Name: <%C>\n"),
68 filterable
[n_prop
].name
.in()
73 // Populate a TAO::FT_FaultEventDescriptor structure from the
74 // properties in the event.
75 TAO::FT_FaultEventDescriptor fault_event_desc
;
77 // Extract the location.
80 result
= this->get_location (
81 filterable
[1].value
, fault_event_desc
.location
.out());
84 // CORBA 3.0.2, section 23.4.5.1 states:
86 // The fault detector may or may not set the TypeId and
87 // ObjectGroupId fields with the following interpretations:
88 // - Neither is set if all objects at the given location have failed.
89 // - TypeId is set and ObjectGroupId is not set if all objects at
90 // the given location with the given type have failed.
91 // - Both are set if the member with the given ObjectGroupId at the
92 // given location has failed.
94 if ((result
== 0) && (item_count
== 2))
96 // All objects at location failed.
97 fault_event_desc
.all_at_location_failed
= 1;
100 if ((result
== 0) && (item_count
== 3))
102 // All objects of type at location failed.
103 fault_event_desc
.all_of_type_at_location_failed
= 1;
104 result
= this->get_type_id (
105 filterable
[2].value
, fault_event_desc
.type_id
.out());
108 if ((result
== 0) && (item_count
== 4))
110 // An object (replica) at a location failed.
111 fault_event_desc
.object_at_location_failed
= 1;
112 result
= this->get_type_id (
113 filterable
[2].value
, fault_event_desc
.type_id
.out());
116 result
= this->get_object_group_id (
117 filterable
[3].value
, fault_event_desc
.object_group_id
);
121 // A specific object at a location failed.
122 if ((result
== 0) && (fault_event_desc
.object_at_location_failed
== 1))
124 result
= this->single_replica_failure (fault_event_desc
);
127 // All objects at location failed.
128 if ((result
== 0) && (fault_event_desc
.all_at_location_failed
== 1))
130 result
= this->location_failure (fault_event_desc
);
133 // All objects of type at location failed.
134 if ((result
== 0) && (fault_event_desc
.all_of_type_at_location_failed
== 1))
136 result
= this->type_failure (fault_event_desc
);
139 // Debugging support.
140 if (TAO_debug_level
> 6)
142 fault_event_desc
.dump ();
148 // Extract a string type_id from CORBA::Any.
149 // Caller owns the string returned via <type_id>.
150 int TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id (
151 const CORBA::Any
& val
, PortableGroup::TypeId_out type_id
)
153 const char* type_id_value
;
154 if ((val
>>= type_id_value
) == 0)
156 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
157 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id: ")
158 ACE_TEXT("Could not extract TypeId value from any.\n")),
162 // Make a deep copy of the TypeId string.
163 type_id
= CORBA::string_dup (type_id_value
);
167 // Extract the ObjectGroupId from CORBA::Any.
168 int TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id (
169 const CORBA::Any
& val
, PortableGroup::ObjectGroupId
& id
)
171 PortableGroup::ObjectGroupId temp_id
= (PortableGroup::ObjectGroupId
)0;
172 if ((val
>>= temp_id
) == 0)
174 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
175 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id: ")
176 ACE_TEXT("Could not extract ObjectGroupId value from any.\n")),
183 int TAO::FT_ReplicationManagerFaultAnalyzer::get_location (
184 const CORBA::Any
& val
, PortableGroup::Location_out location
)
186 const PortableGroup::Location
* temp_loc
;
187 if ((val
>>= temp_loc
) == 0)
189 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
190 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_location: ")
191 ACE_TEXT("Could not extract Location value from fault event.\n")),
194 // Make a deep copy of the Location.
195 ACE_NEW_RETURN (location
, PortableGroup::Location (*temp_loc
), -1);
200 //TODO: Use TAO::PG_Property_Set to get property values from properties
201 // instead of all these specific "get" functions.
204 // Get the MembershipStyle property.
205 int TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style (
206 const PortableGroup::Properties
& properties
,
207 PortableGroup::MembershipStyleValue
& membership_style
)
209 PortableGroup::Name
prop_name (1);
210 prop_name
.length (1);
211 prop_name
[0].id
= CORBA::string_dup (FT::FT_MEMBERSHIP_STYLE
);
214 PortableGroup::Value value
;
215 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
216 && ((value
>>= membership_style
) == 1))
218 if (TAO_debug_level
> 6)
220 ORBSVCS_DEBUG ((LM_DEBUG
,
221 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style: ")
222 ACE_TEXT("MembershipStyle is <%d>:\n"),
235 int TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style (
236 const PortableGroup::Properties
& properties
,
237 FT::ReplicationStyleValue
& replication_style
)
239 PortableGroup::Name
prop_name (1);
240 prop_name
.length (1);
241 prop_name
[0].id
= CORBA::string_dup (FT::FT_REPLICATION_STYLE
);
244 PortableGroup::Value value
;
245 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
246 && ((value
>>= replication_style
) == 1))
248 if (TAO_debug_level
> 6)
250 ORBSVCS_DEBUG ((LM_DEBUG
,
251 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style: ")
252 ACE_TEXT ("ReplicationStyle is <%d>:\n"),
265 int TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members (
266 const PortableGroup::Properties
& properties
,
267 PortableGroup::MinimumNumberMembersValue
& minimum_number_members
)
269 PortableGroup::Name
prop_name (1);
270 prop_name
.length (1);
271 prop_name
[0].id
= CORBA::string_dup (FT::FT_MINIMUM_NUMBER_MEMBERS
);
274 PortableGroup::Value value
;
275 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
276 && ((value
>>= minimum_number_members
) == 1))
278 if (TAO_debug_level
> 6)
280 ORBSVCS_DEBUG ((LM_DEBUG
,
281 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members: ")
282 ACE_TEXT ("MinimumNumberMembers is <%d>:\n"),
283 minimum_number_members
295 int TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members (
296 const PortableGroup::Properties
& properties
,
297 PortableGroup::InitialNumberMembersValue
& initial_number_members
)
299 PortableGroup::Name
prop_name (1);
300 prop_name
.length (1);
301 prop_name
[0].id
= CORBA::string_dup (FT::FT_INITIAL_NUMBER_MEMBERS
);
304 PortableGroup::Value value
;
305 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
306 && ((value
>>= initial_number_members
) == 1))
308 if (TAO_debug_level
> 6)
310 ORBSVCS_DEBUG ((LM_DEBUG
,
311 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members: ")
312 ACE_TEXT ("InitialNumberMembers is <%d>:\n"),
313 initial_number_members
325 int TAO::FT_ReplicationManagerFaultAnalyzer::get_factories (
326 const PortableGroup::Properties
& properties
,
327 PortableGroup::FactoryInfos_out factories
)
329 PortableGroup::Name
prop_name (1);
330 prop_name
.length (1);
331 prop_name
[0].id
= CORBA::string_dup (FT::FT_FACTORIES
);
334 const PortableGroup::FactoryInfos
* temp_factories
= 0;
335 PortableGroup::Value value
;
336 if (TAO_PG::get_property_value (prop_name
, properties
, value
) == 1)
338 if ((value
>>= temp_factories
) == 0)
340 ORBSVCS_ERROR ((LM_ERROR
,
341 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
342 ACE_TEXT ("Could not extract Factories from properties.\n")
348 // Make a deep copy of the Factories.
349 ACE_NEW_RETURN (factories
, PortableGroup::FactoryInfos (*temp_factories
), -1);
355 ORBSVCS_ERROR ((LM_ERROR
,
356 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
357 ACE_TEXT ("Could not find Factories property.\n")
364 int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member (
365 const PortableGroup::ObjectGroup_ptr iogr
,
366 const PortableGroup::Location
& location
,
367 int & object_is_primary
)
369 // To determine if this was a primary that faulted:
370 // Get the TagFTGroupTaggedComponent from the IOGR and search
371 // for the primary, using the TAO_FT_IOGR_Property helper class.
372 // Then, compare the TypeId and Location of the failed object with
373 // those of the primary. If they match, it was a primary fault.
376 object_is_primary
= 0;
380 // Create an "empty" TAO_FT_IOGR_Property and use it to get the
382 TAO_FT_IOGR_Property temp_ft_prop
;
383 FT::TagFTGroupTaggedComponent ft_group_tagged_component
;
384 CORBA::Boolean got_tagged_component
=
385 temp_ft_prop
.get_tagged_component (
386 iogr
, ft_group_tagged_component
);
387 if (got_tagged_component
)
389 // Create a new TAO_FT_IOGR_Property with the tagged
391 TAO_FT_IOGR_Property
ft_prop (ft_group_tagged_component
);
393 // Check to see if a primary is set.
394 CORBA::Boolean primary_is_set
= ft_prop
.is_primary_set (
398 // Get the primary object.
399 CORBA::Object_var primary_obj
= ft_prop
.get_primary (
401 if (CORBA::is_nil (primary_obj
.in()))
403 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
404 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
405 ACE_TEXT ("Could not get primary IOR from IOGR.\n")),
409 // Get the object reference of the failed member.
410 CORBA::Object_var failed_obj
=
411 this->replication_manager_
->get_member_ref (
413 if (CORBA::is_nil (failed_obj
.in()))
415 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
416 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
417 ACE_TEXT ("Could not get IOR of failed member from IOGR.\n")),
421 // Are the two object refs (primary and failed) equivalent?
422 CORBA::Boolean equiv
= primary_obj
->_is_equivalent (
426 object_is_primary
= 1;
430 else // primary is not set
432 ORBSVCS_ERROR ((LM_ERROR
,
433 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
434 ACE_TEXT ("Primary is not set on IOGR.\n")
439 else // could not get tagged component
441 ORBSVCS_ERROR ((LM_ERROR
,
442 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
443 ACE_TEXT ("Could not get tagged component from IOGR.\n")
448 catch (const CORBA::Exception
& ex
)
450 ex
._tao_print_exception (
451 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: "));
458 // Handle a single replica failure.
459 int TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure (
460 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
463 PortableGroup::ObjectGroup_var the_object_group
= PortableGroup::ObjectGroup::_nil();
464 PortableGroup::Properties_var properties
;
468 // Get the object group reference based on the ObjectGroupId.
470 this->replication_manager_
->get_object_group_ref_from_id (
471 fault_event_desc
.object_group_id
);
473 // This should not happen, but let us be safe.
474 if (CORBA::is_nil (the_object_group
.in()))
476 ORBSVCS_ERROR ((LM_ERROR
,
477 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
478 ACE_TEXT ("Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"),
479 fault_event_desc
.object_group_id
481 throw PortableGroup::ObjectGroupNotFound ();
484 // Get the properties associated with this ObjectGroup.
485 properties
= this->replication_manager_
->get_properties (
486 the_object_group
.in());
488 catch (const CORBA::Exception
& ex
)
490 ex
._tao_print_exception (
492 "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: "));
498 // Get the MembershipStyle property.
499 PortableGroup::MembershipStyleValue membership_style
;
500 result
= this->get_membership_style (properties
.in(), membership_style
);
503 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
504 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
505 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
506 ACE_TEXT ("Could not extract MembershipStyle from properties on ")
507 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
508 fault_event_desc
.object_group_id
),
513 fault_event_desc
.membership_style
= membership_style
;
514 if (TAO_debug_level
> 6)
516 ORBSVCS_DEBUG ((LM_DEBUG
,
517 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
518 ACE_TEXT ("MembershipStyleValue = <%d>"),
519 fault_event_desc
.membership_style
524 // Get the ReplicationStyle property.
525 FT::ReplicationStyleValue replication_style
;
526 result
= this->get_replication_style (properties
.in(), replication_style
);
529 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
530 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
531 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
532 ACE_TEXT ("Could not extract ReplicationStyle from properties on ")
533 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
534 fault_event_desc
.object_group_id
),
539 fault_event_desc
.replication_style
= replication_style
;
540 if (TAO_debug_level
> 6)
542 ORBSVCS_DEBUG ((LM_DEBUG
,
543 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
544 ACE_TEXT ("ReplicationStyleValue = <%d>"),
545 fault_event_desc
.replication_style
550 // Get the MinimumNumberMembers property.
551 PortableGroup::MinimumNumberMembersValue minimum_number_members
;
552 result
= this->get_minimum_number_members (
553 properties
.in(), minimum_number_members
);
556 // This is not a fatal error. It may be App Controlled.
558 if (TAO_debug_level
> 3)
560 ORBSVCS_ERROR ((LM_ERROR
,
561 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
562 ACE_TEXT ("Could not extract MinimumNumberMembers from properties on ")
563 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
564 fault_event_desc
.object_group_id
));
569 fault_event_desc
.minimum_number_members
= minimum_number_members
;
570 if (TAO_debug_level
> 6)
572 ORBSVCS_DEBUG ((LM_DEBUG
,
573 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
574 ACE_TEXT ("MinimumNumberMembers = <%d>"),
575 fault_event_desc
.minimum_number_members
580 // Get the InitialNumberMembers property.
581 PortableGroup::InitialNumberMembersValue initial_number_members
;
582 result
= this->get_initial_number_members (
583 properties
.in(), initial_number_members
);
586 // This is not a fatal error. It may be App Controlled.
588 if (TAO_debug_level
> 3)
590 ORBSVCS_ERROR ((LM_ERROR
,
591 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
592 ACE_TEXT ("Could not extract InitialNumberMembers from properties on ")
593 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
594 fault_event_desc
.object_group_id
));
599 fault_event_desc
.initial_number_members
= initial_number_members
;
600 if (TAO_debug_level
> 6)
602 ORBSVCS_DEBUG ((LM_DEBUG
,
603 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
604 ACE_TEXT ("InitialNumberMembers = <%d>"),
605 fault_event_desc
.initial_number_members
610 // Get the Factories property.
611 result
= this->get_factories (
613 fault_event_desc
.factories
.out());
616 // This is not a fatal error. It may be App Controlled.
618 if (TAO_debug_level
> 3)
620 ORBSVCS_ERROR ((LM_ERROR
,
621 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
622 ACE_TEXT ("Could not extract Factories from properties on ")
623 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
624 fault_event_desc
.object_group_id
));
629 if (TAO_debug_level
> 6)
631 ORBSVCS_DEBUG ((LM_DEBUG
,
632 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
633 ACE_TEXT ("Got Factories from properties on ")
634 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
635 fault_event_desc
.object_group_id
642 // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or
643 // SEMI_ACTIVE, we can see if it was the primary replica that
646 (fault_event_desc
.replication_style
== FT::COLD_PASSIVE
||
647 fault_event_desc
.replication_style
== FT::WARM_PASSIVE
||
648 fault_event_desc
.replication_style
== FT::SEMI_ACTIVE
))
650 if (TAO_debug_level
> 6)
652 ORBSVCS_DEBUG ((LM_DEBUG
,
653 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
654 ACE_TEXT ("Checking to see if failed replica was the primary for ")
655 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
656 fault_event_desc
.object_group_id
659 result
= this->is_primary_member (
660 the_object_group
.in(),
661 fault_event_desc
.location
.in(),
662 fault_event_desc
.object_is_primary
);
665 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
666 // controlled) and the primary has faulted, establish a new primary.
667 // We get back a new object group.
669 (fault_event_desc
.membership_style
== FT::MEMB_INF_CTRL
))
671 PortableGroup::ObjectGroup_var new_object_group
;
672 result
= this->remove_failed_member (
673 the_object_group
.in(),
675 new_object_group
.out());
678 the_object_group
= new_object_group
;
681 if (fault_event_desc
.object_is_primary
== 1)
683 if (TAO_debug_level
> 6)
685 ORBSVCS_DEBUG ((LM_DEBUG
,
686 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
687 ACE_TEXT ("Setting new primary for ")
688 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
689 fault_event_desc
.object_group_id
692 result
= this->set_new_primary (
693 the_object_group
.in(),
695 new_object_group
.out());
698 the_object_group
= new_object_group
;
703 #if 0 // According to the FT CORBA specification, this will be handled by the ObjectGroupManager::remove_member method
704 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
705 // controlled) and the number of remaining members is less than
706 // the MinimumNumberMembers property, add new members.
707 // We get back a new object group.
709 (fault_event_desc
.membership_style
== FT::MEMB_INF_CTRL
))
711 if (TAO_debug_level
> 6)
713 ORBSVCS_DEBUG ((LM_DEBUG
,
714 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
715 ACE_TEXT ("Potentially adding new members to ")
716 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
717 fault_event_desc
.object_group_id
720 result
= this->add_members (
721 the_object_group
.in(),
723 new_object_group
.out());
724 the_object_group
= new_object_group
;
730 int TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member (
731 PortableGroup::ObjectGroup_ptr iogr
,
732 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
733 PortableGroup::ObjectGroup_out new_iogr
)
736 new_iogr
= PortableGroup::ObjectGroup::_nil ();
740 // Remove the old primary member from the object group.
741 PortableGroup::ObjectGroup_var temp_iogr
=
742 this->replication_manager_
->remove_member (
744 fault_event_desc
.location
.in());
745 new_iogr
= temp_iogr
._retn ();
747 catch (const CORBA::Exception
& ex
)
749 ex
._tao_print_exception (
750 "TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member: ");
757 // Choose a new primary member for the ObjectGroup.
758 // Sets <new_iogr> and returns 0 on success.
759 // Returns -1 on failure.
760 int TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary (
761 PortableGroup::ObjectGroup_ptr iogr
,
762 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
763 PortableGroup::ObjectGroup_out new_iogr
)
766 new_iogr
= PortableGroup::ObjectGroup::_nil ();
770 // Get the locations of the remaining members of the object group.
771 PortableGroup::Locations_var locations
=
772 this->replication_manager_
->locations_of_members (
775 // Choose the first location as our new primary location.
776 if (locations
->length() >= 1)
778 new_iogr
= this->replication_manager_
->set_primary_member (
784 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
785 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ")
786 ACE_TEXT ("No locations remaining in ObjectGroup with id <%Q>.\n"),
787 fault_event_desc
.object_group_id
),
791 catch (const CORBA::Exception
& ex
)
793 ex
._tao_print_exception (
794 "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ");
801 #if 0 // this is handled by the remove_member method
802 // While the number of members in the object group is less than
803 // the MinimumNumberMembers property, add new members.
804 // Sets <new_iogr> and returns 0 on success.
805 // Returns -1 on failure.
806 int TAO::FT_ReplicationManagerFaultAnalyzer::add_members (
807 PortableGroup::ObjectGroup_ptr iogr
,
808 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
809 PortableGroup::ObjectGroup_out new_iogr
)
812 new_iogr
= PortableGroup::ObjectGroup::_nil ();
816 // Get current number of members in object group
817 // (same as number of locations).
818 PortableGroup::Locations_var locations
=
819 this->replication_manager_
->locations_of_members (
821 CORBA::ULong num_members
= locations
->length();
823 // If it is less than the MinimumNumberMembers property, add
825 if (num_members
< fault_event_desc
.minimum_number_members
)
827 //@@ To create a member, we need to know the ObjectGroup,
828 // Location, TypeId, and Criteria.
830 // Get the factory registry from the Replication Manager.
831 PortableGroup::Criteria fake_criteria
;
832 PortableGroup::FactoryRegistry_var factory_registry
=
833 this->replication_manager_
->get_factory_registry (
837 // @@ DLW SAYS: we need to find out the role played by this object
838 // group so we can use the correct set of factories.
839 // Get the list of factories for the type of the failed replica.
840 CORBA::String_var type_id
;
841 PortableGroup::FactoryInfos_var factories_by_type
=
842 factory_registry
->list_factories_by_role (
843 fault_event_desc
.type_id
.in(), type_id
);
846 // Build a set of locations of factories for this type that we
847 // can use to create new members (i.e., at locations where
848 // members do not currently exist).
850 FT_Location_Set valid_locations
;
852 // For each factory that can be used for this type...
853 for (CORBA::ULong f
=0; f
<factories_by_type
->length(); ++f
)
855 // ...insert its location into valid_locations set.
856 valid_locations
.insert (factories_by_type
[f
].the_location
);
859 // Now remove any locations where members already exist.
860 for (CORBA::ULong m
=0; m
<num_members
; ++m
)
862 if (valid_locations
.find (locations
[m
]))
863 valid_locations
.remove (locations
[m
]);
866 // The valid_locations set now contains all the factory
867 // locations we can use to add members to this object group.
868 // So, now we add new members until we reach
869 // the value of the MinimumNumberMembers property.
870 PortableGroup::Location_var good_location
;
871 for (FT_Location_Set::iterator
iter (valid_locations
);
872 iter
.next (good_location
.out()) &&
873 fault_event_desc
.minimum_number_members
> num_members
;
874 iter
.advance(), ++num_members
)
876 // Create a new member of the object group at this location.
877 new_iogr
= this->replication_manager_
->create_member (
880 fault_event_desc
.type_id
.in(),
883 // Stop adding members when we reach the value of the
884 // MinimumNumberMembers property.
885 // if (num_members++ >= fault_event_desc.minimum_number_members)
891 catch (const CORBA::Exception
& ex
)
893 ex
._tao_print_exception (
894 "TAO::FT_ReplicationManagerFaultAnalyzer::add_members: ");
902 // Handle a location failure.
903 int TAO::FT_ReplicationManagerFaultAnalyzer::location_failure (
904 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
908 // To handle a location failure, we should:
909 // - Unregister all the factories at that location.
910 // (We do this first so that we don't try to create a new replica
911 // at that location for any of the affected object groups.)
912 // - Determine all the object groups that had members at that
914 // - Handle each one of them as a single replica failure.
918 // Get the factory registry from the Replication Manager.
919 PortableGroup::Criteria fake_criteria
;
920 PortableGroup::FactoryRegistry_var factory_registry
=
921 this->replication_manager_
->get_factory_registry (
924 // Unregister all factories at the failed location.
925 factory_registry
->unregister_factory_by_location (
926 fault_event_desc
.location
.in());
928 // Determine all the object groups that had members at that
930 PortableGroup::ObjectGroups_var object_groups_at_location
=
931 this->replication_manager_
->groups_at_location (
932 fault_event_desc
.location
.in());
934 // Handle each one of them as a single replica failure.
935 for (CORBA::ULong i
=0;
936 result
==0 && i
<object_groups_at_location
->length();
939 // Get the object group id.
940 fault_event_desc
.object_group_id
=
941 this->replication_manager_
->get_object_group_id (
942 object_groups_at_location
[i
]);
944 // Get type id of this object group.
945 fault_event_desc
.type_id
=
946 this->replication_manager_
->type_id (
947 object_groups_at_location
[i
]);
949 // Handle it as a single replica failure.
950 result
= this->single_replica_failure (fault_event_desc
);
953 catch (const CORBA::Exception
& ex
)
955 ex
._tao_print_exception (
956 "TAO::FT_ReplicationManagerFaultAnalyzer::location_failure: ");
963 // Handle a type failure.
964 int TAO::FT_ReplicationManagerFaultAnalyzer::type_failure (
965 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
969 // To handle a type failure, we should:
970 // - Unregister the factory at the location of the failure
971 // that is associated with the failed type.
972 // (We do this first so that we don't try to create a new replica
973 // with that factory for any of the affected object groups.)
974 // - Determine all the object groups that had members at that
975 // location of that type.
976 // - Handle each one of them as a single replica failure.
980 // Get the factory registry from the Replication Manager.
981 PortableGroup::Criteria fake_criteria
;
982 PortableGroup::FactoryRegistry_var factory_registry
=
983 this->replication_manager_
->get_factory_registry (
986 // Unregister the factory at the failed location associated with
988 //@@ Using type_id as the role for now.
989 factory_registry
->unregister_factory (
990 fault_event_desc
.type_id
.in(),
991 fault_event_desc
.location
.in());
993 // Get all the object groups that had members at that
995 PortableGroup::ObjectGroups_var object_groups_at_location
=
996 this->replication_manager_
->groups_at_location (
997 fault_event_desc
.location
.in());
999 // For each one, if it was of the same type as the failed type,
1000 // handle it as a single replica failure.
1001 for (CORBA::ULong i
=0;
1002 result
==0 && i
<object_groups_at_location
->length();
1005 // Get the object group id.
1006 fault_event_desc
.object_group_id
=
1007 this->replication_manager_
->get_object_group_id (
1008 object_groups_at_location
[i
]);
1010 // Get type id of this object group.
1011 PortableGroup::TypeId_var type_id
=
1012 this->replication_manager_
->type_id (
1013 object_groups_at_location
[i
]);
1015 // If the type id is the same as the failed type id...
1016 if (ACE_OS::strcmp (type_id
.in(), fault_event_desc
.type_id
.in()) == 0)
1018 // Handle it as a single replica failure.
1019 result
= this->single_replica_failure (fault_event_desc
);
1023 catch (const CORBA::Exception
& ex
)
1025 ex
._tao_print_exception (
1026 "TAO::FT_ReplicationManagerFaultAnalyzer::type_failure: ");
1033 TAO_END_VERSIONED_NAMESPACE_DECL