2 //=============================================================================
4 * @file FT_ReplicationManagerFaultAnalyzer.cpp
6 * This file is part of TAO's implementation of Fault Tolerant CORBA.
8 * @author Steve Totten <totten_s@ociweb.com>
10 //=============================================================================
12 #include "orbsvcs/Log_Macros.h"
13 #include "FT_ReplicationManagerFaultAnalyzer.h"
14 #include "orbsvcs/CosNotifyCommC.h"
15 #include "orbsvcs/FT_NotifierC.h"
16 #include "orbsvcs/FT_ReplicationManager/FT_ReplicationManager.h"
17 #include "orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h"
18 #include "orbsvcs/PortableGroup/PG_Property_Utils.h"
19 #include "orbsvcs/PortableGroup/PG_Operators.h"
20 #include "orbsvcs/FaultTolerance/FT_IOGR_Property.h"
21 #include "tao/debug.h"
24 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
27 TAO::FT_ReplicationManagerFaultAnalyzer::FT_ReplicationManagerFaultAnalyzer (
28 const TAO::FT_ReplicationManager
* replication_manager
)
29 : replication_manager_ (
30 const_cast<TAO::FT_ReplicationManager
*> (replication_manager
))
35 TAO::FT_ReplicationManagerFaultAnalyzer::~FT_ReplicationManagerFaultAnalyzer ()
39 // Validate the event to make sure it is one we can handle.
40 // If it is not an event we can handle, this function logs the error
42 int TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type (
43 const CosNotification::StructuredEvent
& event
)
45 // Delegate to base class.
46 //@@ Visual C++ 6.0 won't compile this if I include the namespace name
48 // return TAO::FT_DefaultFaultAnalyzer::validate_event_type (event);
49 return FT_DefaultFaultAnalyzer::validate_event_type (event
);
52 /// Analyze a fault event.
53 int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event (
54 const CosNotification::StructuredEvent
& event
)
58 const CosNotification::FilterableEventBody
& filterable
=
59 event
.filterable_data
;
60 CORBA::ULong item_count
= filterable
.length ();
61 if (TAO_debug_level
> 6)
63 for (CORBA::ULong n_prop
= 0; n_prop
< item_count
; ++n_prop
)
65 ORBSVCS_DEBUG ((LM_DEBUG
,
66 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: ")
67 ACE_TEXT("Property Name: <%C>\n"),
68 filterable
[n_prop
].name
.in()
73 // Populate a TAO::FT_FaultEventDescriptor structure from the
74 // properties in the event.
75 TAO::FT_FaultEventDescriptor fault_event_desc
;
77 // Extract the location.
80 result
= this->get_location (
81 filterable
[1].value
, fault_event_desc
.location
.out());
84 // CORBA 3.0.2, section 23.4.5.1 states:
86 // The fault detector may or may not set the TypeId and
87 // ObjectGroupId fields with the following interpretations:
88 // - Neither is set if all objects at the given location have failed.
89 // - TypeId is set and ObjectGroupId is not set if all objects at
90 // the given location with the given type have failed.
91 // - Both are set if the member with the given ObjectGroupId at the
92 // given location has failed.
94 if ((result
== 0) && (item_count
== 2))
96 // All objects at location failed.
97 fault_event_desc
.all_at_location_failed
= 1;
100 if ((result
== 0) && (item_count
== 3))
102 // All objects of type at location failed.
103 fault_event_desc
.all_of_type_at_location_failed
= 1;
104 result
= this->get_type_id (
105 filterable
[2].value
, fault_event_desc
.type_id
.out());
108 if ((result
== 0) && (item_count
== 4))
110 // An object (replica) at a location failed.
111 fault_event_desc
.object_at_location_failed
= 1;
112 result
= this->get_type_id (
113 filterable
[2].value
, fault_event_desc
.type_id
.out());
116 result
= this->get_object_group_id (
117 filterable
[3].value
, fault_event_desc
.object_group_id
);
121 // A specific object at a location failed.
122 if ((result
== 0) && (fault_event_desc
.object_at_location_failed
== 1))
124 result
= this->single_replica_failure (fault_event_desc
);
127 // All objects at location failed.
128 if ((result
== 0) && (fault_event_desc
.all_at_location_failed
== 1))
130 result
= this->location_failure (fault_event_desc
);
133 // All objects of type at location failed.
134 if ((result
== 0) && (fault_event_desc
.all_of_type_at_location_failed
== 1))
136 result
= this->type_failure (fault_event_desc
);
139 // Debugging support.
140 if (TAO_debug_level
> 6)
142 fault_event_desc
.dump ();
148 // Extract a string type_id from CORBA::Any.
149 // Caller owns the string returned via <type_id>.
150 int TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id (
151 const CORBA::Any
& val
, PortableGroup::TypeId_out type_id
)
153 const char* type_id_value
;
154 if ((val
>>= type_id_value
) == 0)
156 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
157 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id: ")
158 ACE_TEXT("Could not extract TypeId value from any.\n")),
162 // Make a deep copy of the TypeId string.
163 type_id
= CORBA::string_dup (type_id_value
);
167 // Extract the ObjectGroupId from CORBA::Any.
168 int TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id (
169 const CORBA::Any
& val
, PortableGroup::ObjectGroupId
& id
)
171 PortableGroup::ObjectGroupId temp_id
= (PortableGroup::ObjectGroupId
)0;
172 if ((val
>>= temp_id
) == 0)
174 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
175 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id: ")
176 ACE_TEXT("Could not extract ObjectGroupId value from any.\n")),
183 int TAO::FT_ReplicationManagerFaultAnalyzer::get_location (
184 const CORBA::Any
& val
, PortableGroup::Location_out location
)
186 const PortableGroup::Location
* temp_loc
;
187 if ((val
>>= temp_loc
) == 0)
189 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
190 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_location: ")
191 ACE_TEXT("Could not extract Location value from fault event.\n")),
194 // Make a deep copy of the Location.
195 ACE_NEW_RETURN (location
, PortableGroup::Location (*temp_loc
), -1);
200 //TODO: Use TAO::PG_Property_Set to get property values from properties
201 // instead of all these specific "get" functions.
204 // Get the MembershipStyle property.
205 int TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style (
206 const PortableGroup::Properties
& properties
,
207 PortableGroup::MembershipStyleValue
& membership_style
)
209 PortableGroup::Name
prop_name (1);
210 prop_name
.length (1);
211 prop_name
[0].id
= CORBA::string_dup (FT::FT_MEMBERSHIP_STYLE
);
214 PortableGroup::Value value
;
215 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
216 && ((value
>>= membership_style
) == 1))
218 if (TAO_debug_level
> 6)
220 ORBSVCS_DEBUG ((LM_DEBUG
,
221 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style: ")
222 ACE_TEXT("MembershipStyle is <%d>:\n"),
235 int TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style (
236 const PortableGroup::Properties
& properties
,
237 FT::ReplicationStyleValue
& replication_style
)
239 PortableGroup::Name
prop_name (1);
240 prop_name
.length (1);
241 prop_name
[0].id
= CORBA::string_dup (FT::FT_REPLICATION_STYLE
);
244 PortableGroup::Value value
;
245 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
246 && ((value
>>= replication_style
) == 1))
248 if (TAO_debug_level
> 6)
250 ORBSVCS_DEBUG ((LM_DEBUG
,
251 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style: ")
252 ACE_TEXT ("ReplicationStyle is <%d>:\n"),
265 int TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members (
266 const PortableGroup::Properties
& properties
,
267 PortableGroup::MinimumNumberMembersValue
& minimum_number_members
)
269 PortableGroup::Name
prop_name (1);
270 prop_name
.length (1);
271 prop_name
[0].id
= CORBA::string_dup (FT::FT_MINIMUM_NUMBER_MEMBERS
);
274 PortableGroup::Value value
;
275 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
276 && ((value
>>= minimum_number_members
) == 1))
278 if (TAO_debug_level
> 6)
280 ORBSVCS_DEBUG ((LM_DEBUG
,
281 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members: ")
282 ACE_TEXT ("MinimumNumberMembers is <%d>:\n"),
283 minimum_number_members
295 int TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members (
296 const PortableGroup::Properties
& properties
,
297 PortableGroup::InitialNumberMembersValue
& initial_number_members
)
299 PortableGroup::Name
prop_name (1);
300 prop_name
.length (1);
301 prop_name
[0].id
= CORBA::string_dup (FT::FT_INITIAL_NUMBER_MEMBERS
);
304 PortableGroup::Value value
;
305 if (TAO_PG::get_property_value (prop_name
, properties
, value
)
306 && ((value
>>= initial_number_members
) == 1))
308 if (TAO_debug_level
> 6)
310 ORBSVCS_DEBUG ((LM_DEBUG
,
311 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members: ")
312 ACE_TEXT ("InitialNumberMembers is <%d>:\n"),
313 initial_number_members
325 int TAO::FT_ReplicationManagerFaultAnalyzer::get_factories (
326 const PortableGroup::Properties
& properties
,
327 PortableGroup::FactoryInfos_out factories
)
329 PortableGroup::Name
prop_name (1);
330 prop_name
.length (1);
331 prop_name
[0].id
= CORBA::string_dup (FT::FT_FACTORIES
);
334 const PortableGroup::FactoryInfos
* temp_factories
= 0;
335 PortableGroup::Value value
;
336 if (TAO_PG::get_property_value (prop_name
, properties
, value
) == 1)
338 if ((value
>>= temp_factories
) == 0)
340 ORBSVCS_ERROR ((LM_ERROR
,
341 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
342 ACE_TEXT ("Could not extract Factories from properties.\n")
348 // Make a deep copy of the Factories.
349 ACE_NEW_RETURN (factories
, PortableGroup::FactoryInfos (*temp_factories
), -1);
355 ORBSVCS_ERROR ((LM_ERROR
,
356 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
357 ACE_TEXT ("Could not find Factories property.\n")
364 int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member (
365 const PortableGroup::ObjectGroup_ptr iogr
,
366 const PortableGroup::Location
& location
,
367 int & object_is_primary
)
370 // To determine if this was a primary that faulted:
371 // Get the TagFTGroupTaggedComponent from the IOGR and search
372 // for the primary, using the TAO_FT_IOGR_Property helper class.
373 // Then, compare the TypeId and Location of the failed object with
374 // those of the primary. If they match, it was a primary fault.
377 object_is_primary
= 0;
381 // Create an "empty" TAO_FT_IOGR_Property and use it to get the
383 TAO_FT_IOGR_Property temp_ft_prop
;
384 FT::TagFTGroupTaggedComponent ft_group_tagged_component
;
385 CORBA::Boolean got_tagged_component
=
386 temp_ft_prop
.get_tagged_component (
387 iogr
, ft_group_tagged_component
);
388 if (got_tagged_component
)
390 // Create a new TAO_FT_IOGR_Property with the tagged
392 TAO_FT_IOGR_Property
ft_prop (ft_group_tagged_component
);
394 // Check to see if a primary is set.
395 CORBA::Boolean primary_is_set
= ft_prop
.is_primary_set (
399 // Get the primary object.
400 CORBA::Object_var primary_obj
= ft_prop
.get_primary (
402 if (CORBA::is_nil (primary_obj
.in()))
404 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
405 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
406 ACE_TEXT ("Could not get primary IOR from IOGR.\n")),
410 // Get the object reference of the failed member.
411 CORBA::Object_var failed_obj
=
412 this->replication_manager_
->get_member_ref (
414 if (CORBA::is_nil (failed_obj
.in()))
416 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
417 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
418 ACE_TEXT ("Could not get IOR of failed member from IOGR.\n")),
422 // Are the two object refs (primary and failed) equivalent?
423 CORBA::Boolean equiv
= primary_obj
->_is_equivalent (
427 object_is_primary
= 1;
431 else // primary is not set
433 ORBSVCS_ERROR ((LM_ERROR
,
434 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
435 ACE_TEXT ("Primary is not set on IOGR.\n")
440 else // could not get tagged component
442 ORBSVCS_ERROR ((LM_ERROR
,
443 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
444 ACE_TEXT ("Could not get tagged component from IOGR.\n")
449 catch (const CORBA::Exception
& ex
)
451 ex
._tao_print_exception (
452 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: "));
459 // Handle a single replica failure.
460 int TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure (
461 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
464 PortableGroup::ObjectGroup_var the_object_group
= PortableGroup::ObjectGroup::_nil();
465 PortableGroup::Properties_var properties
;
469 // Get the object group reference based on the ObjectGroupId.
471 this->replication_manager_
->get_object_group_ref_from_id (
472 fault_event_desc
.object_group_id
);
474 // This should not happen, but let us be safe.
475 if (CORBA::is_nil (the_object_group
.in()))
477 ORBSVCS_ERROR ((LM_ERROR
,
478 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
479 ACE_TEXT ("Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"),
480 fault_event_desc
.object_group_id
482 throw PortableGroup::ObjectGroupNotFound ();
485 // Get the properties associated with this ObjectGroup.
486 properties
= this->replication_manager_
->get_properties (
487 the_object_group
.in());
489 catch (const CORBA::Exception
& ex
)
491 ex
._tao_print_exception (
493 "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: "));
499 // Get the MembershipStyle property.
500 PortableGroup::MembershipStyleValue membership_style
;
501 result
= this->get_membership_style (properties
.in(), membership_style
);
504 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
505 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
506 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
507 ACE_TEXT ("Could not extract MembershipStyle from properties on ")
508 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
509 fault_event_desc
.object_group_id
),
514 fault_event_desc
.membership_style
= membership_style
;
515 if (TAO_debug_level
> 6)
517 ORBSVCS_DEBUG ((LM_DEBUG
,
518 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
519 ACE_TEXT ("MembershipStyleValue = <%d>"),
520 fault_event_desc
.membership_style
525 // Get the ReplicationStyle property.
526 FT::ReplicationStyleValue replication_style
;
527 result
= this->get_replication_style (properties
.in(), replication_style
);
530 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
531 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
532 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
533 ACE_TEXT ("Could not extract ReplicationStyle from properties on ")
534 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
535 fault_event_desc
.object_group_id
),
540 fault_event_desc
.replication_style
= replication_style
;
541 if (TAO_debug_level
> 6)
543 ORBSVCS_DEBUG ((LM_DEBUG
,
544 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
545 ACE_TEXT ("ReplicationStyleValue = <%d>"),
546 fault_event_desc
.replication_style
551 // Get the MinimumNumberMembers property.
552 PortableGroup::MinimumNumberMembersValue minimum_number_members
;
553 result
= this->get_minimum_number_members (
554 properties
.in(), minimum_number_members
);
557 // This is not a fatal error. It may be App Controlled.
559 if (TAO_debug_level
> 3)
561 ORBSVCS_ERROR ((LM_ERROR
,
562 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
563 ACE_TEXT ("Could not extract MinimumNumberMembers from properties on ")
564 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
565 fault_event_desc
.object_group_id
));
570 fault_event_desc
.minimum_number_members
= minimum_number_members
;
571 if (TAO_debug_level
> 6)
573 ORBSVCS_DEBUG ((LM_DEBUG
,
574 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
575 ACE_TEXT ("MinimumNumberMembers = <%d>"),
576 fault_event_desc
.minimum_number_members
581 // Get the InitialNumberMembers property.
582 PortableGroup::InitialNumberMembersValue initial_number_members
;
583 result
= this->get_initial_number_members (
584 properties
.in(), initial_number_members
);
587 // This is not a fatal error. It may be App Controlled.
589 if (TAO_debug_level
> 3)
591 ORBSVCS_ERROR ((LM_ERROR
,
592 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
593 ACE_TEXT ("Could not extract InitialNumberMembers from properties on ")
594 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
595 fault_event_desc
.object_group_id
));
600 fault_event_desc
.initial_number_members
= initial_number_members
;
601 if (TAO_debug_level
> 6)
603 ORBSVCS_DEBUG ((LM_DEBUG
,
604 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
605 ACE_TEXT ("InitialNumberMembers = <%d>"),
606 fault_event_desc
.initial_number_members
611 // Get the Factories property.
612 result
= this->get_factories (
614 fault_event_desc
.factories
.out());
617 // This is not a fatal error. It may be App Controlled.
619 if (TAO_debug_level
> 3)
621 ORBSVCS_ERROR ((LM_ERROR
,
622 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
623 ACE_TEXT ("Could not extract Factories from properties on ")
624 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
625 fault_event_desc
.object_group_id
));
630 if (TAO_debug_level
> 6)
632 ORBSVCS_DEBUG ((LM_DEBUG
,
633 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
634 ACE_TEXT ("Got Factories from properties on ")
635 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
636 fault_event_desc
.object_group_id
643 // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or
644 // SEMI_ACTIVE, we can see if it was the primary replica that
647 (fault_event_desc
.replication_style
== FT::COLD_PASSIVE
||
648 fault_event_desc
.replication_style
== FT::WARM_PASSIVE
||
649 fault_event_desc
.replication_style
== FT::SEMI_ACTIVE
))
651 if (TAO_debug_level
> 6)
653 ORBSVCS_DEBUG ((LM_DEBUG
,
654 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
655 ACE_TEXT ("Checking to see if failed replica was the primary for ")
656 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
657 fault_event_desc
.object_group_id
660 result
= this->is_primary_member (
661 the_object_group
.in(),
662 fault_event_desc
.location
.in(),
663 fault_event_desc
.object_is_primary
);
666 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
667 // controlled) and the primary has faulted, establish a new primary.
668 // We get back a new object group.
670 (fault_event_desc
.membership_style
== FT::MEMB_INF_CTRL
))
673 PortableGroup::ObjectGroup_var new_object_group
;
674 result
= this->remove_failed_member (
675 the_object_group
.in(),
677 new_object_group
.out());
680 the_object_group
= new_object_group
;
683 if (fault_event_desc
.object_is_primary
== 1)
685 if (TAO_debug_level
> 6)
687 ORBSVCS_DEBUG ((LM_DEBUG
,
688 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
689 ACE_TEXT ("Setting new primary for ")
690 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
691 fault_event_desc
.object_group_id
694 result
= this->set_new_primary (
695 the_object_group
.in(),
697 new_object_group
.out());
700 the_object_group
= new_object_group
;
705 #if 0 // According to the FT CORBA specification, this will be handled by the ObjectGroupManager::remove_member method
706 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
707 // controlled) and the number of remaining members is less than
708 // the MinimumNumberMembers property, add new members.
709 // We get back a new object group.
711 (fault_event_desc
.membership_style
== FT::MEMB_INF_CTRL
))
713 if (TAO_debug_level
> 6)
715 ORBSVCS_DEBUG ((LM_DEBUG
,
716 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
717 ACE_TEXT ("Potentially adding new members to ")
718 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
719 fault_event_desc
.object_group_id
722 result
= this->add_members (
723 the_object_group
.in(),
725 new_object_group
.out());
726 the_object_group
= new_object_group
;
732 int TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member (
733 PortableGroup::ObjectGroup_ptr iogr
,
734 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
735 PortableGroup::ObjectGroup_out new_iogr
)
738 new_iogr
= PortableGroup::ObjectGroup::_nil ();
742 // Remove the old primary member from the object group.
743 PortableGroup::ObjectGroup_var temp_iogr
=
744 this->replication_manager_
->remove_member (
746 fault_event_desc
.location
.in());
747 new_iogr
= temp_iogr
._retn ();
749 catch (const CORBA::Exception
& ex
)
751 ex
._tao_print_exception (
752 "TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member: ");
759 // Choose a new primary member for the ObjectGroup.
760 // Sets <new_iogr> and returns 0 on success.
761 // Returns -1 on failure.
762 int TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary (
763 PortableGroup::ObjectGroup_ptr iogr
,
764 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
765 PortableGroup::ObjectGroup_out new_iogr
)
768 new_iogr
= PortableGroup::ObjectGroup::_nil ();
772 // Get the locations of the remaining members of the object group.
773 PortableGroup::Locations_var locations
=
774 this->replication_manager_
->locations_of_members (
777 // Choose the first location as our new primary location.
778 if (locations
->length() >= 1)
780 new_iogr
= this->replication_manager_
->set_primary_member (
786 ORBSVCS_ERROR_RETURN ((LM_ERROR
,
787 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ")
788 ACE_TEXT ("No locations remaining in ObjectGroup with id <%Q>.\n"),
789 fault_event_desc
.object_group_id
),
793 catch (const CORBA::Exception
& ex
)
795 ex
._tao_print_exception (
796 "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ");
803 #if 0 // this is handled by the remove_member method
804 // While the number of members in the object group is less than
805 // the MinimumNumberMembers property, add new members.
806 // Sets <new_iogr> and returns 0 on success.
807 // Returns -1 on failure.
808 int TAO::FT_ReplicationManagerFaultAnalyzer::add_members (
809 PortableGroup::ObjectGroup_ptr iogr
,
810 TAO::FT_FaultEventDescriptor
& fault_event_desc
,
811 PortableGroup::ObjectGroup_out new_iogr
)
814 new_iogr
= PortableGroup::ObjectGroup::_nil ();
818 // Get current number of members in object group
819 // (same as number of locations).
820 PortableGroup::Locations_var locations
=
821 this->replication_manager_
->locations_of_members (
823 CORBA::ULong num_members
= locations
->length();
825 // If it is less than the MinimumNumberMembers property, add
827 if (num_members
< fault_event_desc
.minimum_number_members
)
829 //@@ To create a member, we need to know the ObjectGroup,
830 // Location, TypeId, and Criteria.
832 // Get the factory registry from the Replication Manager.
833 PortableGroup::Criteria fake_criteria
;
834 PortableGroup::FactoryRegistry_var factory_registry
=
835 this->replication_manager_
->get_factory_registry (
839 // @@ DLW SAYS: we need to find out the role played by this object
840 // group so we can use the correct set of factories.
841 // Get the list of factories for the type of the failed replica.
842 CORBA::String_var type_id
;
843 PortableGroup::FactoryInfos_var factories_by_type
=
844 factory_registry
->list_factories_by_role (
845 fault_event_desc
.type_id
.in(), type_id
);
848 // Build a set of locations of factories for this type that we
849 // can use to create new members (i.e., at locations where
850 // members do not currently exist).
852 FT_Location_Set valid_locations
;
854 // For each factory that can be used for this type...
855 for (CORBA::ULong f
=0; f
<factories_by_type
->length(); ++f
)
857 // ...insert its location into valid_locations set.
858 valid_locations
.insert (factories_by_type
[f
].the_location
);
861 // Now remove any locations where members already exist.
862 for (CORBA::ULong m
=0; m
<num_members
; ++m
)
864 if (valid_locations
.find (locations
[m
]))
865 valid_locations
.remove (locations
[m
]);
868 // The valid_locations set now contains all the factory
869 // locations we can use to add members to this object group.
870 // So, now we add new members until we reach
871 // the value of the MinimumNumberMembers property.
872 PortableGroup::Location_var good_location
;
873 for (FT_Location_Set::iterator
iter (valid_locations
);
874 iter
.next (good_location
.out()) &&
875 fault_event_desc
.minimum_number_members
> num_members
;
876 iter
.advance(), ++num_members
)
878 // Create a new member of the object group at this location.
879 new_iogr
= this->replication_manager_
->create_member (
882 fault_event_desc
.type_id
.in(),
885 // Stop adding members when we reach the value of the
886 // MinimumNumberMembers property.
887 // if (num_members++ >= fault_event_desc.minimum_number_members)
893 catch (const CORBA::Exception
& ex
)
895 ex
._tao_print_exception (
896 "TAO::FT_ReplicationManagerFaultAnalyzer::add_members: ");
904 // Handle a location failure.
905 int TAO::FT_ReplicationManagerFaultAnalyzer::location_failure (
906 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
910 // To handle a location failure, we should:
911 // - Unregister all the factories at that location.
912 // (We do this first so that we don't try to create a new replica
913 // at that location for any of the affected object groups.)
914 // - Determine all the object groups that had members at that
916 // - Handle each one of them as a single replica failure.
920 // Get the factory registry from the Replication Manager.
921 PortableGroup::Criteria fake_criteria
;
922 PortableGroup::FactoryRegistry_var factory_registry
=
923 this->replication_manager_
->get_factory_registry (
926 // Unregister all factories at the failed location.
927 factory_registry
->unregister_factory_by_location (
928 fault_event_desc
.location
.in());
930 // Determine all the object groups that had members at that
932 PortableGroup::ObjectGroups_var object_groups_at_location
=
933 this->replication_manager_
->groups_at_location (
934 fault_event_desc
.location
.in());
936 // Handle each one of them as a single replica failure.
937 for (CORBA::ULong i
=0;
938 result
==0 && i
<object_groups_at_location
->length();
941 // Get the object group id.
942 fault_event_desc
.object_group_id
=
943 this->replication_manager_
->get_object_group_id (
944 object_groups_at_location
[i
]);
946 // Get type id of this object group.
947 fault_event_desc
.type_id
=
948 this->replication_manager_
->type_id (
949 object_groups_at_location
[i
]);
951 // Handle it as a single replica failure.
952 result
= this->single_replica_failure (fault_event_desc
);
955 catch (const CORBA::Exception
& ex
)
957 ex
._tao_print_exception (
958 "TAO::FT_ReplicationManagerFaultAnalyzer::location_failure: ");
965 // Handle a type failure.
966 int TAO::FT_ReplicationManagerFaultAnalyzer::type_failure (
967 TAO::FT_FaultEventDescriptor
& fault_event_desc
)
971 // To handle a type failure, we should:
972 // - Unregister the factory at the location of the failure
973 // that is associated with the failed type.
974 // (We do this first so that we don't try to create a new replica
975 // with that factory for any of the affected object groups.)
976 // - Determine all the object groups that had members at that
977 // location of that type.
978 // - Handle each one of them as a single replica failure.
982 // Get the factory registry from the Replication Manager.
983 PortableGroup::Criteria fake_criteria
;
984 PortableGroup::FactoryRegistry_var factory_registry
=
985 this->replication_manager_
->get_factory_registry (
988 // Unregister the factory at the failed location associated with
990 //@@ Using type_id as the role for now.
991 factory_registry
->unregister_factory (
992 fault_event_desc
.type_id
.in(),
993 fault_event_desc
.location
.in());
995 // Get all the object groups that had members at that
997 PortableGroup::ObjectGroups_var object_groups_at_location
=
998 this->replication_manager_
->groups_at_location (
999 fault_event_desc
.location
.in());
1001 // For each one, if it was of the same type as the failed type,
1002 // handle it as a single replica failure.
1003 for (CORBA::ULong i
=0;
1004 result
==0 && i
<object_groups_at_location
->length();
1007 // Get the object group id.
1008 fault_event_desc
.object_group_id
=
1009 this->replication_manager_
->get_object_group_id (
1010 object_groups_at_location
[i
]);
1012 // Get type id of this object group.
1013 PortableGroup::TypeId_var type_id
=
1014 this->replication_manager_
->type_id (
1015 object_groups_at_location
[i
]);
1017 // If the type id is the same as the failed type id...
1018 if (ACE_OS::strcmp (type_id
.in(), fault_event_desc
.type_id
.in()) == 0)
1020 // Handle it as a single replica failure.
1021 result
= this->single_replica_failure (fault_event_desc
);
1025 catch (const CORBA::Exception
& ex
)
1027 ex
._tao_print_exception (
1028 "TAO::FT_ReplicationManagerFaultAnalyzer::type_failure: ");
1035 TAO_END_VERSIONED_NAMESPACE_DECL