Cleanup ACE_HAS_PTHREAD_SIGMASK_PROTOTYPE, all platforms support it so far as I can...
[ACE_TAO.git] / TAO / orbsvcs / FT_ReplicationManager / FT_ReplicationManagerFaultAnalyzer.cpp
blob4c8faf712941438311f56dc917bb78010cdf5808
1 /* -*- C++ -*- */
2 //=============================================================================
3 /**
4 * @file FT_ReplicationManagerFaultAnalyzer.cpp
6 * This file is part of TAO's implementation of Fault Tolerant CORBA.
8 * @author Steve Totten <totten_s@ociweb.com>
9 */
10 //=============================================================================
12 #include "orbsvcs/Log_Macros.h"
13 #include "FT_ReplicationManagerFaultAnalyzer.h"
14 #include "orbsvcs/CosNotifyCommC.h"
15 #include "orbsvcs/FT_NotifierC.h"
16 #include "orbsvcs/FT_ReplicationManager/FT_ReplicationManager.h"
17 #include "orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h"
18 #include "orbsvcs/PortableGroup/PG_Property_Utils.h"
19 #include "orbsvcs/PortableGroup/PG_Operators.h"
20 #include "orbsvcs/FaultTolerance/FT_IOGR_Property.h"
21 #include "tao/debug.h"
22 #include <iostream>
24 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
26 /// Constructor.
27 TAO::FT_ReplicationManagerFaultAnalyzer::FT_ReplicationManagerFaultAnalyzer (
28 const TAO::FT_ReplicationManager * replication_manager)
29 : replication_manager_ (
30 const_cast<TAO::FT_ReplicationManager *> (replication_manager))
34 /// Destructor.
35 TAO::FT_ReplicationManagerFaultAnalyzer::~FT_ReplicationManagerFaultAnalyzer ()
39 // Validate the event to make sure it is one we can handle.
40 // If it is not an event we can handle, this function logs the error
41 // and returns -1.
42 int TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type (
43 const CosNotification::StructuredEvent & event)
45 // Delegate to base class.
46 //@@ Visual C++ 6.0 won't compile this if I include the namespace name
47 // on the base class.
48 // return TAO::FT_DefaultFaultAnalyzer::validate_event_type (event);
49 return FT_DefaultFaultAnalyzer::validate_event_type (event);
52 /// Analyze a fault event.
53 int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event (
54 const CosNotification::StructuredEvent & event)
56 int result = 0;
58 const CosNotification::FilterableEventBody & filterable =
59 event.filterable_data;
60 CORBA::ULong item_count = filterable.length ();
61 if (TAO_debug_level > 6)
63 for (CORBA::ULong n_prop = 0; n_prop < item_count; ++n_prop)
65 ORBSVCS_DEBUG ((LM_DEBUG,
66 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: ")
67 ACE_TEXT("Property Name: <%C>\n"),
68 filterable[n_prop].name.in()
69 ));
73 // Populate a TAO::FT_FaultEventDescriptor structure from the
74 // properties in the event.
75 TAO::FT_FaultEventDescriptor fault_event_desc;
77 // Extract the location.
78 if (result == 0)
80 result = this->get_location (
81 filterable[1].value, fault_event_desc.location.out());
84 // CORBA 3.0.2, section 23.4.5.1 states:
86 // The fault detector may or may not set the TypeId and
87 // ObjectGroupId fields with the following interpretations:
88 // - Neither is set if all objects at the given location have failed.
89 // - TypeId is set and ObjectGroupId is not set if all objects at
90 // the given location with the given type have failed.
91 // - Both are set if the member with the given ObjectGroupId at the
92 // given location has failed.
94 if ((result == 0) && (item_count == 2))
96 // All objects at location failed.
97 fault_event_desc.all_at_location_failed = 1;
100 if ((result == 0) && (item_count == 3))
102 // All objects of type at location failed.
103 fault_event_desc.all_of_type_at_location_failed = 1;
104 result = this->get_type_id (
105 filterable[2].value, fault_event_desc.type_id.out());
108 if ((result == 0) && (item_count == 4))
110 // An object (replica) at a location failed.
111 fault_event_desc.object_at_location_failed = 1;
112 result = this->get_type_id (
113 filterable[2].value, fault_event_desc.type_id.out());
114 if (result == 0)
116 result = this->get_object_group_id (
117 filterable[3].value, fault_event_desc.object_group_id);
121 // A specific object at a location failed.
122 if ((result == 0) && (fault_event_desc.object_at_location_failed == 1))
124 result = this->single_replica_failure (fault_event_desc);
127 // All objects at location failed.
128 if ((result == 0) && (fault_event_desc.all_at_location_failed == 1))
130 result = this->location_failure (fault_event_desc);
133 // All objects of type at location failed.
134 if ((result == 0) && (fault_event_desc.all_of_type_at_location_failed == 1))
136 result = this->type_failure (fault_event_desc);
139 // Debugging support.
140 if (TAO_debug_level > 6)
142 fault_event_desc.dump ();
145 return result;
148 // Extract a string type_id from CORBA::Any.
149 // Caller owns the string returned via <type_id>.
150 int TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id (
151 const CORBA::Any& val, PortableGroup::TypeId_out type_id)
153 const char* type_id_value;
154 if ((val >>= type_id_value) == 0)
156 ORBSVCS_ERROR_RETURN ((LM_ERROR,
157 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id: ")
158 ACE_TEXT("Could not extract TypeId value from any.\n")),
159 -1);
162 // Make a deep copy of the TypeId string.
163 type_id = CORBA::string_dup (type_id_value);
164 return 0;
167 // Extract the ObjectGroupId from CORBA::Any.
168 int TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id (
169 const CORBA::Any& val, PortableGroup::ObjectGroupId& id)
171 PortableGroup::ObjectGroupId temp_id = (PortableGroup::ObjectGroupId)0;
172 if ((val >>= temp_id) == 0)
174 ORBSVCS_ERROR_RETURN ((LM_ERROR,
175 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id: ")
176 ACE_TEXT("Could not extract ObjectGroupId value from any.\n")),
177 -1);
179 id = temp_id;
180 return 0;
183 int TAO::FT_ReplicationManagerFaultAnalyzer::get_location (
184 const CORBA::Any& val, PortableGroup::Location_out location)
186 const PortableGroup::Location* temp_loc;
187 if ((val >>= temp_loc) == 0)
189 ORBSVCS_ERROR_RETURN ((LM_ERROR,
190 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_location: ")
191 ACE_TEXT("Could not extract Location value from fault event.\n")),
192 -1);
194 // Make a deep copy of the Location.
195 ACE_NEW_RETURN (location, PortableGroup::Location (*temp_loc), -1);
196 return 0;
200 //TODO: Use TAO::PG_Property_Set to get property values from properties
201 // instead of all these specific "get" functions.
204 // Get the MembershipStyle property.
205 int TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style (
206 const PortableGroup::Properties & properties,
207 PortableGroup::MembershipStyleValue & membership_style)
209 PortableGroup::Name prop_name (1);
210 prop_name.length (1);
211 prop_name[0].id = CORBA::string_dup (FT::FT_MEMBERSHIP_STYLE);
212 int result = 0;
214 PortableGroup::Value value;
215 if (TAO_PG::get_property_value (prop_name, properties, value)
216 && ((value >>= membership_style) == 1))
218 if (TAO_debug_level > 6)
220 ORBSVCS_DEBUG ((LM_DEBUG,
221 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style: ")
222 ACE_TEXT("MembershipStyle is <%d>:\n"),
223 membership_style
227 else
229 result = -1;
232 return result;
235 int TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style (
236 const PortableGroup::Properties & properties,
237 FT::ReplicationStyleValue & replication_style)
239 PortableGroup::Name prop_name (1);
240 prop_name.length (1);
241 prop_name[0].id = CORBA::string_dup (FT::FT_REPLICATION_STYLE);
242 int result = 0;
244 PortableGroup::Value value;
245 if (TAO_PG::get_property_value (prop_name, properties, value)
246 && ((value >>= replication_style) == 1))
248 if (TAO_debug_level > 6)
250 ORBSVCS_DEBUG ((LM_DEBUG,
251 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style: ")
252 ACE_TEXT ("ReplicationStyle is <%d>:\n"),
253 replication_style
257 else
259 result = -1;
262 return result;
265 int TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members (
266 const PortableGroup::Properties & properties,
267 PortableGroup::MinimumNumberMembersValue & minimum_number_members)
269 PortableGroup::Name prop_name (1);
270 prop_name.length (1);
271 prop_name[0].id = CORBA::string_dup (FT::FT_MINIMUM_NUMBER_MEMBERS);
272 int result = 0;
274 PortableGroup::Value value;
275 if (TAO_PG::get_property_value (prop_name, properties, value)
276 && ((value >>= minimum_number_members) == 1))
278 if (TAO_debug_level > 6)
280 ORBSVCS_DEBUG ((LM_DEBUG,
281 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members: ")
282 ACE_TEXT ("MinimumNumberMembers is <%d>:\n"),
283 minimum_number_members
287 else
289 result = -1;
292 return result;
295 int TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members (
296 const PortableGroup::Properties & properties,
297 PortableGroup::InitialNumberMembersValue & initial_number_members)
299 PortableGroup::Name prop_name (1);
300 prop_name.length (1);
301 prop_name[0].id = CORBA::string_dup (FT::FT_INITIAL_NUMBER_MEMBERS);
302 int result = 0;
304 PortableGroup::Value value;
305 if (TAO_PG::get_property_value (prop_name, properties, value)
306 && ((value >>= initial_number_members) == 1))
308 if (TAO_debug_level > 6)
310 ORBSVCS_DEBUG ((LM_DEBUG,
311 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members: ")
312 ACE_TEXT ("InitialNumberMembers is <%d>:\n"),
313 initial_number_members
317 else
319 result = -1;
322 return result;
325 int TAO::FT_ReplicationManagerFaultAnalyzer::get_factories (
326 const PortableGroup::Properties & properties,
327 PortableGroup::FactoryInfos_out factories)
329 PortableGroup::Name prop_name (1);
330 prop_name.length (1);
331 prop_name[0].id = CORBA::string_dup (FT::FT_FACTORIES);
332 int result = 0;
334 const PortableGroup::FactoryInfos* temp_factories = 0;
335 PortableGroup::Value value;
336 if (TAO_PG::get_property_value (prop_name, properties, value) == 1)
338 if ((value >>= temp_factories) == 0)
340 ORBSVCS_ERROR ((LM_ERROR,
341 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
342 ACE_TEXT ("Could not extract Factories from properties.\n")
344 result = -1;
346 else
348 // Make a deep copy of the Factories.
349 ACE_NEW_RETURN (factories, PortableGroup::FactoryInfos (*temp_factories), -1);
350 result = 0;
353 else
355 ORBSVCS_ERROR ((LM_ERROR,
356 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
357 ACE_TEXT ("Could not find Factories property.\n")
359 result = -1;
361 return result;
364 int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member (
365 const PortableGroup::ObjectGroup_ptr iogr,
366 const PortableGroup::Location & location,
367 int & object_is_primary)
369 // To determine if this was a primary that faulted:
370 // Get the TagFTGroupTaggedComponent from the IOGR and search
371 // for the primary, using the TAO_FT_IOGR_Property helper class.
372 // Then, compare the TypeId and Location of the failed object with
373 // those of the primary. If they match, it was a primary fault.
375 int result = 0;
376 object_is_primary = 0;
380 // Create an "empty" TAO_FT_IOGR_Property and use it to get the
381 // tagged component.
382 TAO_FT_IOGR_Property temp_ft_prop;
383 FT::TagFTGroupTaggedComponent ft_group_tagged_component;
384 CORBA::Boolean got_tagged_component =
385 temp_ft_prop.get_tagged_component (
386 iogr, ft_group_tagged_component);
387 if (got_tagged_component)
389 // Create a new TAO_FT_IOGR_Property with the tagged
390 // component.
391 TAO_FT_IOGR_Property ft_prop (ft_group_tagged_component);
393 // Check to see if a primary is set.
394 CORBA::Boolean primary_is_set = ft_prop.is_primary_set (
395 iogr);
396 if (primary_is_set)
398 // Get the primary object.
399 CORBA::Object_var primary_obj = ft_prop.get_primary (
400 iogr);
401 if (CORBA::is_nil (primary_obj.in()))
403 ORBSVCS_ERROR_RETURN ((LM_ERROR,
404 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
405 ACE_TEXT ("Could not get primary IOR from IOGR.\n")),
406 -1);
409 // Get the object reference of the failed member.
410 CORBA::Object_var failed_obj =
411 this->replication_manager_->get_member_ref (
412 iogr, location);
413 if (CORBA::is_nil (failed_obj.in()))
415 ORBSVCS_ERROR_RETURN ((LM_ERROR,
416 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
417 ACE_TEXT ("Could not get IOR of failed member from IOGR.\n")),
418 -1);
421 // Are the two object refs (primary and failed) equivalent?
422 CORBA::Boolean equiv = primary_obj->_is_equivalent (
423 failed_obj.in());
424 if (equiv)
426 object_is_primary = 1;
427 result = 0;
430 else // primary is not set
432 ORBSVCS_ERROR ((LM_ERROR,
433 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
434 ACE_TEXT ("Primary is not set on IOGR.\n")
436 result = -1;
439 else // could not get tagged component
441 ORBSVCS_ERROR ((LM_ERROR,
442 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
443 ACE_TEXT ("Could not get tagged component from IOGR.\n")
445 result = -1;
448 catch (const CORBA::Exception& ex)
450 ex._tao_print_exception (
451 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: "));
452 result = -1;
455 return result;
458 // Handle a single replica failure.
459 int TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure (
460 TAO::FT_FaultEventDescriptor & fault_event_desc)
462 int result = 0;
463 PortableGroup::ObjectGroup_var the_object_group = PortableGroup::ObjectGroup::_nil();
464 PortableGroup::Properties_var properties;
468 // Get the object group reference based on the ObjectGroupId.
469 the_object_group =
470 this->replication_manager_->get_object_group_ref_from_id (
471 fault_event_desc.object_group_id);
473 // This should not happen, but let us be safe.
474 if (CORBA::is_nil (the_object_group.in()))
476 ORBSVCS_ERROR ((LM_ERROR,
477 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
478 ACE_TEXT ("Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"),
479 fault_event_desc.object_group_id
481 throw PortableGroup::ObjectGroupNotFound ();
484 // Get the properties associated with this ObjectGroup.
485 properties = this->replication_manager_->get_properties (
486 the_object_group.in());
488 catch (const CORBA::Exception& ex)
490 ex._tao_print_exception (
491 ACE_TEXT (
492 "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: "));
493 result = -1;
496 if (result == 0)
498 // Get the MembershipStyle property.
499 PortableGroup::MembershipStyleValue membership_style;
500 result = this->get_membership_style (properties.in(), membership_style);
501 if (result != 0)
503 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
504 ORBSVCS_ERROR_RETURN ((LM_ERROR,
505 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
506 ACE_TEXT ("Could not extract MembershipStyle from properties on ")
507 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
508 fault_event_desc.object_group_id),
509 -1);
511 else
513 fault_event_desc.membership_style = membership_style;
514 if (TAO_debug_level > 6)
516 ORBSVCS_DEBUG ((LM_DEBUG,
517 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
518 ACE_TEXT ("MembershipStyleValue = <%d>"),
519 fault_event_desc.membership_style
524 // Get the ReplicationStyle property.
525 FT::ReplicationStyleValue replication_style;
526 result = this->get_replication_style (properties.in(), replication_style);
527 if (result != 0)
529 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
530 ORBSVCS_ERROR_RETURN ((LM_ERROR,
531 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
532 ACE_TEXT ("Could not extract ReplicationStyle from properties on ")
533 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
534 fault_event_desc.object_group_id),
535 -1);
537 else
539 fault_event_desc.replication_style = replication_style;
540 if (TAO_debug_level > 6)
542 ORBSVCS_DEBUG ((LM_DEBUG,
543 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
544 ACE_TEXT ("ReplicationStyleValue = <%d>"),
545 fault_event_desc.replication_style
550 // Get the MinimumNumberMembers property.
551 PortableGroup::MinimumNumberMembersValue minimum_number_members;
552 result = this->get_minimum_number_members (
553 properties.in(), minimum_number_members);
554 if (result != 0)
556 // This is not a fatal error. It may be App Controlled.
557 result = 0;
558 if (TAO_debug_level > 3)
560 ORBSVCS_ERROR ((LM_ERROR,
561 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
562 ACE_TEXT ("Could not extract MinimumNumberMembers from properties on ")
563 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
564 fault_event_desc.object_group_id));
567 else
569 fault_event_desc.minimum_number_members = minimum_number_members;
570 if (TAO_debug_level > 6)
572 ORBSVCS_DEBUG ((LM_DEBUG,
573 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
574 ACE_TEXT ("MinimumNumberMembers = <%d>"),
575 fault_event_desc.minimum_number_members
580 // Get the InitialNumberMembers property.
581 PortableGroup::InitialNumberMembersValue initial_number_members;
582 result = this->get_initial_number_members (
583 properties.in(), initial_number_members);
584 if (result != 0)
586 // This is not a fatal error. It may be App Controlled.
587 result = 0;
588 if (TAO_debug_level > 3)
590 ORBSVCS_ERROR ((LM_ERROR,
591 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
592 ACE_TEXT ("Could not extract InitialNumberMembers from properties on ")
593 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
594 fault_event_desc.object_group_id));
597 else
599 fault_event_desc.initial_number_members = initial_number_members;
600 if (TAO_debug_level > 6)
602 ORBSVCS_DEBUG ((LM_DEBUG,
603 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
604 ACE_TEXT ("InitialNumberMembers = <%d>"),
605 fault_event_desc.initial_number_members
610 // Get the Factories property.
611 result = this->get_factories (
612 properties.in(),
613 fault_event_desc.factories.out());
614 if (result != 0)
616 // This is not a fatal error. It may be App Controlled.
617 result = 0;
618 if (TAO_debug_level > 3)
620 ORBSVCS_ERROR ((LM_ERROR,
621 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
622 ACE_TEXT ("Could not extract Factories from properties on ")
623 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
624 fault_event_desc.object_group_id));
627 else
629 if (TAO_debug_level > 6)
631 ORBSVCS_DEBUG ((LM_DEBUG,
632 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
633 ACE_TEXT ("Got Factories from properties on ")
634 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
635 fault_event_desc.object_group_id
642 // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or
643 // SEMI_ACTIVE, we can see if it was the primary replica that
644 // failed.
645 if ((result == 0) &&
646 (fault_event_desc.replication_style == FT::COLD_PASSIVE ||
647 fault_event_desc.replication_style == FT::WARM_PASSIVE ||
648 fault_event_desc.replication_style == FT::SEMI_ACTIVE))
650 if (TAO_debug_level > 6)
652 ORBSVCS_DEBUG ((LM_DEBUG,
653 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
654 ACE_TEXT ("Checking to see if failed replica was the primary for ")
655 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
656 fault_event_desc.object_group_id
659 result = this->is_primary_member (
660 the_object_group.in(),
661 fault_event_desc.location.in(),
662 fault_event_desc.object_is_primary);
665 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
666 // controlled) and the primary has faulted, establish a new primary.
667 // We get back a new object group.
668 if ((result == 0) &&
669 (fault_event_desc.membership_style == FT::MEMB_INF_CTRL))
671 PortableGroup::ObjectGroup_var new_object_group;
672 result = this->remove_failed_member (
673 the_object_group.in(),
674 fault_event_desc,
675 new_object_group.out());
676 if (result == 0)
678 the_object_group = new_object_group;
681 if (fault_event_desc.object_is_primary == 1)
683 if (TAO_debug_level > 6)
685 ORBSVCS_DEBUG ((LM_DEBUG,
686 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
687 ACE_TEXT ("Setting new primary for ")
688 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
689 fault_event_desc.object_group_id
692 result = this->set_new_primary (
693 the_object_group.in(),
694 fault_event_desc,
695 new_object_group.out());
696 if (result == 0)
698 the_object_group = new_object_group;
703 #if 0 // According to the FT CORBA specification, this will be handled by the ObjectGroupManager::remove_member method
704 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
705 // controlled) and the number of remaining members is less than
706 // the MinimumNumberMembers property, add new members.
707 // We get back a new object group.
708 if ((result == 0) &&
709 (fault_event_desc.membership_style == FT::MEMB_INF_CTRL))
711 if (TAO_debug_level > 6)
713 ORBSVCS_DEBUG ((LM_DEBUG,
714 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
715 ACE_TEXT ("Potentially adding new members to ")
716 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
717 fault_event_desc.object_group_id
720 result = this->add_members (
721 the_object_group.in(),
722 fault_event_desc,
723 new_object_group.out());
724 the_object_group = new_object_group;
726 #endif
727 return result;
730 int TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member (
731 PortableGroup::ObjectGroup_ptr iogr,
732 TAO::FT_FaultEventDescriptor & fault_event_desc,
733 PortableGroup::ObjectGroup_out new_iogr)
735 int result = 0;
736 new_iogr = PortableGroup::ObjectGroup::_nil ();
740 // Remove the old primary member from the object group.
741 PortableGroup::ObjectGroup_var temp_iogr =
742 this->replication_manager_->remove_member (
743 iogr,
744 fault_event_desc.location.in());
745 new_iogr = temp_iogr._retn ();
747 catch (const CORBA::Exception& ex)
749 ex._tao_print_exception (
750 "TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member: ");
751 result = -1;
753 return result;
757 // Choose a new primary member for the ObjectGroup.
758 // Sets <new_iogr> and returns 0 on success.
759 // Returns -1 on failure.
760 int TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary (
761 PortableGroup::ObjectGroup_ptr iogr,
762 TAO::FT_FaultEventDescriptor & fault_event_desc,
763 PortableGroup::ObjectGroup_out new_iogr)
765 int result = 0;
766 new_iogr = PortableGroup::ObjectGroup::_nil ();
770 // Get the locations of the remaining members of the object group.
771 PortableGroup::Locations_var locations =
772 this->replication_manager_->locations_of_members (
773 iogr);
775 // Choose the first location as our new primary location.
776 if (locations->length() >= 1)
778 new_iogr = this->replication_manager_->set_primary_member (
779 iogr,
780 (*locations)[0]);
782 else
784 ORBSVCS_ERROR_RETURN ((LM_ERROR,
785 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ")
786 ACE_TEXT ("No locations remaining in ObjectGroup with id <%Q>.\n"),
787 fault_event_desc.object_group_id),
788 -1);
791 catch (const CORBA::Exception& ex)
793 ex._tao_print_exception (
794 "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ");
795 result = -1;
798 return result;
801 #if 0 // this is handled by the remove_member method
802 // While the number of members in the object group is less than
803 // the MinimumNumberMembers property, add new members.
804 // Sets <new_iogr> and returns 0 on success.
805 // Returns -1 on failure.
806 int TAO::FT_ReplicationManagerFaultAnalyzer::add_members (
807 PortableGroup::ObjectGroup_ptr iogr,
808 TAO::FT_FaultEventDescriptor & fault_event_desc,
809 PortableGroup::ObjectGroup_out new_iogr)
811 int result = 0;
812 new_iogr = PortableGroup::ObjectGroup::_nil ();
816 // Get current number of members in object group
817 // (same as number of locations).
818 PortableGroup::Locations_var locations =
819 this->replication_manager_->locations_of_members (
820 iogr);
821 CORBA::ULong num_members = locations->length();
823 // If it is less than the MinimumNumberMembers property, add
824 // new members.
825 if (num_members < fault_event_desc.minimum_number_members)
827 //@@ To create a member, we need to know the ObjectGroup,
828 // Location, TypeId, and Criteria.
830 // Get the factory registry from the Replication Manager.
831 PortableGroup::Criteria fake_criteria;
832 PortableGroup::FactoryRegistry_var factory_registry =
833 this->replication_manager_->get_factory_registry (
834 fake_criteria);
837 // @@ DLW SAYS: we need to find out the role played by this object
838 // group so we can use the correct set of factories.
839 // Get the list of factories for the type of the failed replica.
840 CORBA::String_var type_id;
841 PortableGroup::FactoryInfos_var factories_by_type =
842 factory_registry->list_factories_by_role (
843 fault_event_desc.type_id.in(), type_id);
846 // Build a set of locations of factories for this type that we
847 // can use to create new members (i.e., at locations where
848 // members do not currently exist).
850 FT_Location_Set valid_locations;
852 // For each factory that can be used for this type...
853 for (CORBA::ULong f=0; f<factories_by_type->length(); ++f)
855 // ...insert its location into valid_locations set.
856 valid_locations.insert (factories_by_type[f].the_location);
859 // Now remove any locations where members already exist.
860 for (CORBA::ULong m=0; m<num_members; ++m)
862 if (valid_locations.find (locations[m]))
863 valid_locations.remove (locations[m]);
866 // The valid_locations set now contains all the factory
867 // locations we can use to add members to this object group.
868 // So, now we add new members until we reach
869 // the value of the MinimumNumberMembers property.
870 PortableGroup::Location_var good_location;
871 for (FT_Location_Set::iterator iter (valid_locations);
872 iter.next (good_location.out()) &&
873 fault_event_desc.minimum_number_members > num_members;
874 iter.advance(), ++num_members)
876 // Create a new member of the object group at this location.
877 new_iogr = this->replication_manager_->create_member (
878 iogr,
879 good_location.in(),
880 fault_event_desc.type_id.in(),
881 fake_criteria);
883 // Stop adding members when we reach the value of the
884 // MinimumNumberMembers property.
885 // if (num_members++ >= fault_event_desc.minimum_number_members)
886 // break;
891 catch (const CORBA::Exception& ex)
893 ex._tao_print_exception (
894 "TAO::FT_ReplicationManagerFaultAnalyzer::add_members: ");
895 result = -1;
898 return result;
900 #endif // 0
902 // Handle a location failure.
903 int TAO::FT_ReplicationManagerFaultAnalyzer::location_failure (
904 TAO::FT_FaultEventDescriptor & fault_event_desc)
906 int result = 0;
908 // To handle a location failure, we should:
909 // - Unregister all the factories at that location.
910 // (We do this first so that we don't try to create a new replica
911 // at that location for any of the affected object groups.)
912 // - Determine all the object groups that had members at that
913 // location.
914 // - Handle each one of them as a single replica failure.
918 // Get the factory registry from the Replication Manager.
919 PortableGroup::Criteria fake_criteria;
920 PortableGroup::FactoryRegistry_var factory_registry =
921 this->replication_manager_->get_factory_registry (
922 fake_criteria);
924 // Unregister all factories at the failed location.
925 factory_registry->unregister_factory_by_location (
926 fault_event_desc.location.in());
928 // Determine all the object groups that had members at that
929 // location.
930 PortableGroup::ObjectGroups_var object_groups_at_location =
931 this->replication_manager_->groups_at_location (
932 fault_event_desc.location.in());
934 // Handle each one of them as a single replica failure.
935 for (CORBA::ULong i=0;
936 result==0 && i<object_groups_at_location->length();
937 ++i)
939 // Get the object group id.
940 fault_event_desc.object_group_id =
941 this->replication_manager_->get_object_group_id (
942 object_groups_at_location[i]);
944 // Get type id of this object group.
945 fault_event_desc.type_id =
946 this->replication_manager_->type_id (
947 object_groups_at_location[i]);
949 // Handle it as a single replica failure.
950 result = this->single_replica_failure (fault_event_desc);
953 catch (const CORBA::Exception& ex)
955 ex._tao_print_exception (
956 "TAO::FT_ReplicationManagerFaultAnalyzer::location_failure: ");
957 result = -1;
960 return result;
963 // Handle a type failure.
964 int TAO::FT_ReplicationManagerFaultAnalyzer::type_failure (
965 TAO::FT_FaultEventDescriptor & fault_event_desc)
967 int result = 0;
969 // To handle a type failure, we should:
970 // - Unregister the factory at the location of the failure
971 // that is associated with the failed type.
972 // (We do this first so that we don't try to create a new replica
973 // with that factory for any of the affected object groups.)
974 // - Determine all the object groups that had members at that
975 // location of that type.
976 // - Handle each one of them as a single replica failure.
980 // Get the factory registry from the Replication Manager.
981 PortableGroup::Criteria fake_criteria;
982 PortableGroup::FactoryRegistry_var factory_registry =
983 this->replication_manager_->get_factory_registry (
984 fake_criteria);
986 // Unregister the factory at the failed location associated with
987 // the role.
988 //@@ Using type_id as the role for now.
989 factory_registry->unregister_factory (
990 fault_event_desc.type_id.in(),
991 fault_event_desc.location.in());
993 // Get all the object groups that had members at that
994 // location.
995 PortableGroup::ObjectGroups_var object_groups_at_location =
996 this->replication_manager_->groups_at_location (
997 fault_event_desc.location.in());
999 // For each one, if it was of the same type as the failed type,
1000 // handle it as a single replica failure.
1001 for (CORBA::ULong i=0;
1002 result==0 && i<object_groups_at_location->length();
1003 ++i)
1005 // Get the object group id.
1006 fault_event_desc.object_group_id =
1007 this->replication_manager_->get_object_group_id (
1008 object_groups_at_location[i]);
1010 // Get type id of this object group.
1011 PortableGroup::TypeId_var type_id =
1012 this->replication_manager_->type_id (
1013 object_groups_at_location[i]);
1015 // If the type id is the same as the failed type id...
1016 if (ACE_OS::strcmp (type_id.in(), fault_event_desc.type_id.in()) == 0)
1018 // Handle it as a single replica failure.
1019 result = this->single_replica_failure (fault_event_desc);
1023 catch (const CORBA::Exception& ex)
1025 ex._tao_print_exception (
1026 "TAO::FT_ReplicationManagerFaultAnalyzer::type_failure: ");
1027 result = -1;
1030 return result;
1033 TAO_END_VERSIONED_NAMESPACE_DECL