Merge pull request #1844 from jrw972/monterey
[ACE_TAO.git] / TAO / orbsvcs / FT_ReplicationManager / FT_ReplicationManagerFaultAnalyzer.cpp
bloba5ae04091f5da6723a3e87f40170338cec52af8c
1 /* -*- C++ -*- */
2 //=============================================================================
3 /**
4 * @file FT_ReplicationManagerFaultAnalyzer.cpp
6 * This file is part of TAO's implementation of Fault Tolerant CORBA.
8 * @author Steve Totten <totten_s@ociweb.com>
9 */
10 //=============================================================================
12 #include "orbsvcs/Log_Macros.h"
13 #include "FT_ReplicationManagerFaultAnalyzer.h"
14 #include "orbsvcs/CosNotifyCommC.h"
15 #include "orbsvcs/FT_NotifierC.h"
16 #include "orbsvcs/FT_ReplicationManager/FT_ReplicationManager.h"
17 #include "orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h"
18 #include "orbsvcs/PortableGroup/PG_Property_Utils.h"
19 #include "orbsvcs/PortableGroup/PG_Operators.h"
20 #include "orbsvcs/FaultTolerance/FT_IOGR_Property.h"
21 #include "tao/debug.h"
22 #include <iostream>
24 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
26 /// Constructor.
27 TAO::FT_ReplicationManagerFaultAnalyzer::FT_ReplicationManagerFaultAnalyzer (
28 const TAO::FT_ReplicationManager * replication_manager)
29 : replication_manager_ (
30 const_cast<TAO::FT_ReplicationManager *> (replication_manager))
34 /// Destructor.
35 TAO::FT_ReplicationManagerFaultAnalyzer::~FT_ReplicationManagerFaultAnalyzer ()
39 // Validate the event to make sure it is one we can handle.
40 // If it is not an event we can handle, this function logs the error
41 // and returns -1.
42 int TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type (
43 const CosNotification::StructuredEvent & event)
45 // Delegate to base class.
46 //@@ Visual C++ 6.0 won't compile this if I include the namespace name
47 // on the base class.
48 // return TAO::FT_DefaultFaultAnalyzer::validate_event_type (event);
49 return FT_DefaultFaultAnalyzer::validate_event_type (event);
52 /// Analyze a fault event.
53 int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event (
54 const CosNotification::StructuredEvent & event)
56 int result = 0;
58 const CosNotification::FilterableEventBody & filterable =
59 event.filterable_data;
60 CORBA::ULong item_count = filterable.length ();
61 if (TAO_debug_level > 6)
63 for (CORBA::ULong n_prop = 0; n_prop < item_count; ++n_prop)
65 ORBSVCS_DEBUG ((LM_DEBUG,
66 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: ")
67 ACE_TEXT("Property Name: <%C>\n"),
68 filterable[n_prop].name.in()
69 ));
73 // Populate a TAO::FT_FaultEventDescriptor structure from the
74 // properties in the event.
75 TAO::FT_FaultEventDescriptor fault_event_desc;
77 // Extract the location.
78 if (result == 0)
80 result = this->get_location (
81 filterable[1].value, fault_event_desc.location.out());
84 // CORBA 3.0.2, section 23.4.5.1 states:
86 // The fault detector may or may not set the TypeId and
87 // ObjectGroupId fields with the following interpretations:
88 // - Neither is set if all objects at the given location have failed.
89 // - TypeId is set and ObjectGroupId is not set if all objects at
90 // the given location with the given type have failed.
91 // - Both are set if the member with the given ObjectGroupId at the
92 // given location has failed.
94 if ((result == 0) && (item_count == 2))
96 // All objects at location failed.
97 fault_event_desc.all_at_location_failed = 1;
100 if ((result == 0) && (item_count == 3))
102 // All objects of type at location failed.
103 fault_event_desc.all_of_type_at_location_failed = 1;
104 result = this->get_type_id (
105 filterable[2].value, fault_event_desc.type_id.out());
108 if ((result == 0) && (item_count == 4))
110 // An object (replica) at a location failed.
111 fault_event_desc.object_at_location_failed = 1;
112 result = this->get_type_id (
113 filterable[2].value, fault_event_desc.type_id.out());
114 if (result == 0)
116 result = this->get_object_group_id (
117 filterable[3].value, fault_event_desc.object_group_id);
121 // A specific object at a location failed.
122 if ((result == 0) && (fault_event_desc.object_at_location_failed == 1))
124 result = this->single_replica_failure (fault_event_desc);
127 // All objects at location failed.
128 if ((result == 0) && (fault_event_desc.all_at_location_failed == 1))
130 result = this->location_failure (fault_event_desc);
133 // All objects of type at location failed.
134 if ((result == 0) && (fault_event_desc.all_of_type_at_location_failed == 1))
136 result = this->type_failure (fault_event_desc);
139 // Debugging support.
140 if (TAO_debug_level > 6)
142 fault_event_desc.dump ();
145 return result;
148 // Extract a string type_id from CORBA::Any.
149 // Caller owns the string returned via <type_id>.
150 int TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id (
151 const CORBA::Any& val, PortableGroup::TypeId_out type_id)
153 const char* type_id_value;
154 if ((val >>= type_id_value) == 0)
156 ORBSVCS_ERROR_RETURN ((LM_ERROR,
157 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id: ")
158 ACE_TEXT("Could not extract TypeId value from any.\n")),
159 -1);
162 // Make a deep copy of the TypeId string.
163 type_id = CORBA::string_dup (type_id_value);
164 return 0;
167 // Extract the ObjectGroupId from CORBA::Any.
168 int TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id (
169 const CORBA::Any& val, PortableGroup::ObjectGroupId& id)
171 PortableGroup::ObjectGroupId temp_id = (PortableGroup::ObjectGroupId)0;
172 if ((val >>= temp_id) == 0)
174 ORBSVCS_ERROR_RETURN ((LM_ERROR,
175 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id: ")
176 ACE_TEXT("Could not extract ObjectGroupId value from any.\n")),
177 -1);
179 id = temp_id;
180 return 0;
183 int TAO::FT_ReplicationManagerFaultAnalyzer::get_location (
184 const CORBA::Any& val, PortableGroup::Location_out location)
186 const PortableGroup::Location* temp_loc;
187 if ((val >>= temp_loc) == 0)
189 ORBSVCS_ERROR_RETURN ((LM_ERROR,
190 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_location: ")
191 ACE_TEXT("Could not extract Location value from fault event.\n")),
192 -1);
194 // Make a deep copy of the Location.
195 ACE_NEW_RETURN (location, PortableGroup::Location (*temp_loc), -1);
196 return 0;
200 //TODO: Use TAO::PG_Property_Set to get property values from properties
201 // instead of all these specific "get" functions.
204 // Get the MembershipStyle property.
205 int TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style (
206 const PortableGroup::Properties & properties,
207 PortableGroup::MembershipStyleValue & membership_style)
209 PortableGroup::Name prop_name (1);
210 prop_name.length (1);
211 prop_name[0].id = CORBA::string_dup (FT::FT_MEMBERSHIP_STYLE);
212 int result = 0;
214 PortableGroup::Value value;
215 if (TAO_PG::get_property_value (prop_name, properties, value)
216 && ((value >>= membership_style) == 1))
218 if (TAO_debug_level > 6)
220 ORBSVCS_DEBUG ((LM_DEBUG,
221 ACE_TEXT("TAO::FT_ReplicationManagerFaultAnalyzer::get_membership_style: ")
222 ACE_TEXT("MembershipStyle is <%d>:\n"),
223 membership_style
227 else
229 result = -1;
232 return result;
235 int TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style (
236 const PortableGroup::Properties & properties,
237 FT::ReplicationStyleValue & replication_style)
239 PortableGroup::Name prop_name (1);
240 prop_name.length (1);
241 prop_name[0].id = CORBA::string_dup (FT::FT_REPLICATION_STYLE);
242 int result = 0;
244 PortableGroup::Value value;
245 if (TAO_PG::get_property_value (prop_name, properties, value)
246 && ((value >>= replication_style) == 1))
248 if (TAO_debug_level > 6)
250 ORBSVCS_DEBUG ((LM_DEBUG,
251 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_replication_style: ")
252 ACE_TEXT ("ReplicationStyle is <%d>:\n"),
253 replication_style
257 else
259 result = -1;
262 return result;
265 int TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members (
266 const PortableGroup::Properties & properties,
267 PortableGroup::MinimumNumberMembersValue & minimum_number_members)
269 PortableGroup::Name prop_name (1);
270 prop_name.length (1);
271 prop_name[0].id = CORBA::string_dup (FT::FT_MINIMUM_NUMBER_MEMBERS);
272 int result = 0;
274 PortableGroup::Value value;
275 if (TAO_PG::get_property_value (prop_name, properties, value)
276 && ((value >>= minimum_number_members) == 1))
278 if (TAO_debug_level > 6)
280 ORBSVCS_DEBUG ((LM_DEBUG,
281 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_minimum_number_members: ")
282 ACE_TEXT ("MinimumNumberMembers is <%d>:\n"),
283 minimum_number_members
287 else
289 result = -1;
292 return result;
295 int TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members (
296 const PortableGroup::Properties & properties,
297 PortableGroup::InitialNumberMembersValue & initial_number_members)
299 PortableGroup::Name prop_name (1);
300 prop_name.length (1);
301 prop_name[0].id = CORBA::string_dup (FT::FT_INITIAL_NUMBER_MEMBERS);
302 int result = 0;
304 PortableGroup::Value value;
305 if (TAO_PG::get_property_value (prop_name, properties, value)
306 && ((value >>= initial_number_members) == 1))
308 if (TAO_debug_level > 6)
310 ORBSVCS_DEBUG ((LM_DEBUG,
311 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_members: ")
312 ACE_TEXT ("InitialNumberMembers is <%d>:\n"),
313 initial_number_members
317 else
319 result = -1;
322 return result;
325 int TAO::FT_ReplicationManagerFaultAnalyzer::get_factories (
326 const PortableGroup::Properties & properties,
327 PortableGroup::FactoryInfos_out factories)
329 PortableGroup::Name prop_name (1);
330 prop_name.length (1);
331 prop_name[0].id = CORBA::string_dup (FT::FT_FACTORIES);
332 int result = 0;
334 const PortableGroup::FactoryInfos* temp_factories = 0;
335 PortableGroup::Value value;
336 if (TAO_PG::get_property_value (prop_name, properties, value) == 1)
338 if ((value >>= temp_factories) == 0)
340 ORBSVCS_ERROR ((LM_ERROR,
341 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
342 ACE_TEXT ("Could not extract Factories from properties.\n")
344 result = -1;
346 else
348 // Make a deep copy of the Factories.
349 ACE_NEW_RETURN (factories, PortableGroup::FactoryInfos (*temp_factories), -1);
350 result = 0;
353 else
355 ORBSVCS_ERROR ((LM_ERROR,
356 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: ")
357 ACE_TEXT ("Could not find Factories property.\n")
359 result = -1;
361 return result;
364 int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member (
365 const PortableGroup::ObjectGroup_ptr iogr,
366 const PortableGroup::Location & location,
367 int & object_is_primary)
370 // To determine if this was a primary that faulted:
371 // Get the TagFTGroupTaggedComponent from the IOGR and search
372 // for the primary, using the TAO_FT_IOGR_Property helper class.
373 // Then, compare the TypeId and Location of the failed object with
374 // those of the primary. If they match, it was a primary fault.
376 int result = 0;
377 object_is_primary = 0;
381 // Create an "empty" TAO_FT_IOGR_Property and use it to get the
382 // tagged component.
383 TAO_FT_IOGR_Property temp_ft_prop;
384 FT::TagFTGroupTaggedComponent ft_group_tagged_component;
385 CORBA::Boolean got_tagged_component =
386 temp_ft_prop.get_tagged_component (
387 iogr, ft_group_tagged_component);
388 if (got_tagged_component)
390 // Create a new TAO_FT_IOGR_Property with the tagged
391 // component.
392 TAO_FT_IOGR_Property ft_prop (ft_group_tagged_component);
394 // Check to see if a primary is set.
395 CORBA::Boolean primary_is_set = ft_prop.is_primary_set (
396 iogr);
397 if (primary_is_set)
399 // Get the primary object.
400 CORBA::Object_var primary_obj = ft_prop.get_primary (
401 iogr);
402 if (CORBA::is_nil (primary_obj.in()))
404 ORBSVCS_ERROR_RETURN ((LM_ERROR,
405 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
406 ACE_TEXT ("Could not get primary IOR from IOGR.\n")),
407 -1);
410 // Get the object reference of the failed member.
411 CORBA::Object_var failed_obj =
412 this->replication_manager_->get_member_ref (
413 iogr, location);
414 if (CORBA::is_nil (failed_obj.in()))
416 ORBSVCS_ERROR_RETURN ((LM_ERROR,
417 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
418 ACE_TEXT ("Could not get IOR of failed member from IOGR.\n")),
419 -1);
422 // Are the two object refs (primary and failed) equivalent?
423 CORBA::Boolean equiv = primary_obj->_is_equivalent (
424 failed_obj.in());
425 if (equiv)
427 object_is_primary = 1;
428 result = 0;
431 else // primary is not set
433 ORBSVCS_ERROR ((LM_ERROR,
434 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
435 ACE_TEXT ("Primary is not set on IOGR.\n")
437 result = -1;
440 else // could not get tagged component
442 ORBSVCS_ERROR ((LM_ERROR,
443 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: ")
444 ACE_TEXT ("Could not get tagged component from IOGR.\n")
446 result = -1;
449 catch (const CORBA::Exception& ex)
451 ex._tao_print_exception (
452 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member: "));
453 result = -1;
456 return result;
459 // Handle a single replica failure.
460 int TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure (
461 TAO::FT_FaultEventDescriptor & fault_event_desc)
463 int result = 0;
464 PortableGroup::ObjectGroup_var the_object_group = PortableGroup::ObjectGroup::_nil();
465 PortableGroup::Properties_var properties;
469 // Get the object group reference based on the ObjectGroupId.
470 the_object_group =
471 this->replication_manager_->get_object_group_ref_from_id (
472 fault_event_desc.object_group_id);
474 // This should not happen, but let us be safe.
475 if (CORBA::is_nil (the_object_group.in()))
477 ORBSVCS_ERROR ((LM_ERROR,
478 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
479 ACE_TEXT ("Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"),
480 fault_event_desc.object_group_id
482 throw PortableGroup::ObjectGroupNotFound ();
485 // Get the properties associated with this ObjectGroup.
486 properties = this->replication_manager_->get_properties (
487 the_object_group.in());
489 catch (const CORBA::Exception& ex)
491 ex._tao_print_exception (
492 ACE_TEXT (
493 "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: "));
494 result = -1;
497 if (result == 0)
499 // Get the MembershipStyle property.
500 PortableGroup::MembershipStyleValue membership_style;
501 result = this->get_membership_style (properties.in(), membership_style);
502 if (result != 0)
504 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
505 ORBSVCS_ERROR_RETURN ((LM_ERROR,
506 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
507 ACE_TEXT ("Could not extract MembershipStyle from properties on ")
508 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
509 fault_event_desc.object_group_id),
510 -1);
512 else
514 fault_event_desc.membership_style = membership_style;
515 if (TAO_debug_level > 6)
517 ORBSVCS_DEBUG ((LM_DEBUG,
518 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
519 ACE_TEXT ("MembershipStyleValue = <%d>"),
520 fault_event_desc.membership_style
525 // Get the ReplicationStyle property.
526 FT::ReplicationStyleValue replication_style;
527 result = this->get_replication_style (properties.in(), replication_style);
528 if (result != 0)
530 //@@ it seems a shame to fail here. We should at least remove the failed replica from the group.
531 ORBSVCS_ERROR_RETURN ((LM_ERROR,
532 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
533 ACE_TEXT ("Could not extract ReplicationStyle from properties on ")
534 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
535 fault_event_desc.object_group_id),
536 -1);
538 else
540 fault_event_desc.replication_style = replication_style;
541 if (TAO_debug_level > 6)
543 ORBSVCS_DEBUG ((LM_DEBUG,
544 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
545 ACE_TEXT ("ReplicationStyleValue = <%d>"),
546 fault_event_desc.replication_style
551 // Get the MinimumNumberMembers property.
552 PortableGroup::MinimumNumberMembersValue minimum_number_members;
553 result = this->get_minimum_number_members (
554 properties.in(), minimum_number_members);
555 if (result != 0)
557 // This is not a fatal error. It may be App Controlled.
558 result = 0;
559 if (TAO_debug_level > 3)
561 ORBSVCS_ERROR ((LM_ERROR,
562 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
563 ACE_TEXT ("Could not extract MinimumNumberMembers from properties on ")
564 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
565 fault_event_desc.object_group_id));
568 else
570 fault_event_desc.minimum_number_members = minimum_number_members;
571 if (TAO_debug_level > 6)
573 ORBSVCS_DEBUG ((LM_DEBUG,
574 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
575 ACE_TEXT ("MinimumNumberMembers = <%d>"),
576 fault_event_desc.minimum_number_members
581 // Get the InitialNumberMembers property.
582 PortableGroup::InitialNumberMembersValue initial_number_members;
583 result = this->get_initial_number_members (
584 properties.in(), initial_number_members);
585 if (result != 0)
587 // This is not a fatal error. It may be App Controlled.
588 result = 0;
589 if (TAO_debug_level > 3)
591 ORBSVCS_ERROR ((LM_ERROR,
592 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
593 ACE_TEXT ("Could not extract InitialNumberMembers from properties on ")
594 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
595 fault_event_desc.object_group_id));
598 else
600 fault_event_desc.initial_number_members = initial_number_members;
601 if (TAO_debug_level > 6)
603 ORBSVCS_DEBUG ((LM_DEBUG,
604 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
605 ACE_TEXT ("InitialNumberMembers = <%d>"),
606 fault_event_desc.initial_number_members
611 // Get the Factories property.
612 result = this->get_factories (
613 properties.in(),
614 fault_event_desc.factories.out());
615 if (result != 0)
617 // This is not a fatal error. It may be App Controlled.
618 result = 0;
619 if (TAO_debug_level > 3)
621 ORBSVCS_ERROR ((LM_ERROR,
622 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
623 ACE_TEXT ("Could not extract Factories from properties on ")
624 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
625 fault_event_desc.object_group_id));
628 else
630 if (TAO_debug_level > 6)
632 ORBSVCS_DEBUG ((LM_DEBUG,
633 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
634 ACE_TEXT ("Got Factories from properties on ")
635 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
636 fault_event_desc.object_group_id
643 // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or
644 // SEMI_ACTIVE, we can see if it was the primary replica that
645 // failed.
646 if ((result == 0) &&
647 (fault_event_desc.replication_style == FT::COLD_PASSIVE ||
648 fault_event_desc.replication_style == FT::WARM_PASSIVE ||
649 fault_event_desc.replication_style == FT::SEMI_ACTIVE))
651 if (TAO_debug_level > 6)
653 ORBSVCS_DEBUG ((LM_DEBUG,
654 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
655 ACE_TEXT ("Checking to see if failed replica was the primary for ")
656 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
657 fault_event_desc.object_group_id
660 result = this->is_primary_member (
661 the_object_group.in(),
662 fault_event_desc.location.in(),
663 fault_event_desc.object_is_primary);
666 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
667 // controlled) and the primary has faulted, establish a new primary.
668 // We get back a new object group.
669 if ((result == 0) &&
670 (fault_event_desc.membership_style == FT::MEMB_INF_CTRL))
673 PortableGroup::ObjectGroup_var new_object_group;
674 result = this->remove_failed_member (
675 the_object_group.in(),
676 fault_event_desc,
677 new_object_group.out());
678 if (result == 0)
680 the_object_group = new_object_group;
683 if (fault_event_desc.object_is_primary == 1)
685 if (TAO_debug_level > 6)
687 ORBSVCS_DEBUG ((LM_DEBUG,
688 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
689 ACE_TEXT ("Setting new primary for ")
690 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
691 fault_event_desc.object_group_id
694 result = this->set_new_primary (
695 the_object_group.in(),
696 fault_event_desc,
697 new_object_group.out());
698 if (result == 0)
700 the_object_group = new_object_group;
705 #if 0 // According to the FT CORBA specification, this will be handled by the ObjectGroupManager::remove_member method
706 // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure
707 // controlled) and the number of remaining members is less than
708 // the MinimumNumberMembers property, add new members.
709 // We get back a new object group.
710 if ((result == 0) &&
711 (fault_event_desc.membership_style == FT::MEMB_INF_CTRL))
713 if (TAO_debug_level > 6)
715 ORBSVCS_DEBUG ((LM_DEBUG,
716 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ")
717 ACE_TEXT ("Potentially adding new members to ")
718 ACE_TEXT ("ObjectGroup with id <%Q>.\n"),
719 fault_event_desc.object_group_id
722 result = this->add_members (
723 the_object_group.in(),
724 fault_event_desc,
725 new_object_group.out());
726 the_object_group = new_object_group;
728 #endif
729 return result;
732 int TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member (
733 PortableGroup::ObjectGroup_ptr iogr,
734 TAO::FT_FaultEventDescriptor & fault_event_desc,
735 PortableGroup::ObjectGroup_out new_iogr)
737 int result = 0;
738 new_iogr = PortableGroup::ObjectGroup::_nil ();
742 // Remove the old primary member from the object group.
743 PortableGroup::ObjectGroup_var temp_iogr =
744 this->replication_manager_->remove_member (
745 iogr,
746 fault_event_desc.location.in());
747 new_iogr = temp_iogr._retn ();
749 catch (const CORBA::Exception& ex)
751 ex._tao_print_exception (
752 "TAO::FT_ReplicationManagerFaultAnalyzer::remove_failed_member: ");
753 result = -1;
755 return result;
759 // Choose a new primary member for the ObjectGroup.
760 // Sets <new_iogr> and returns 0 on success.
761 // Returns -1 on failure.
762 int TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary (
763 PortableGroup::ObjectGroup_ptr iogr,
764 TAO::FT_FaultEventDescriptor & fault_event_desc,
765 PortableGroup::ObjectGroup_out new_iogr)
767 int result = 0;
768 new_iogr = PortableGroup::ObjectGroup::_nil ();
772 // Get the locations of the remaining members of the object group.
773 PortableGroup::Locations_var locations =
774 this->replication_manager_->locations_of_members (
775 iogr);
777 // Choose the first location as our new primary location.
778 if (locations->length() >= 1)
780 new_iogr = this->replication_manager_->set_primary_member (
781 iogr,
782 (*locations)[0]);
784 else
786 ORBSVCS_ERROR_RETURN ((LM_ERROR,
787 ACE_TEXT ("TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ")
788 ACE_TEXT ("No locations remaining in ObjectGroup with id <%Q>.\n"),
789 fault_event_desc.object_group_id),
790 -1);
793 catch (const CORBA::Exception& ex)
795 ex._tao_print_exception (
796 "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: ");
797 result = -1;
800 return result;
803 #if 0 // this is handled by the remove_member method
804 // While the number of members in the object group is less than
805 // the MinimumNumberMembers property, add new members.
806 // Sets <new_iogr> and returns 0 on success.
807 // Returns -1 on failure.
808 int TAO::FT_ReplicationManagerFaultAnalyzer::add_members (
809 PortableGroup::ObjectGroup_ptr iogr,
810 TAO::FT_FaultEventDescriptor & fault_event_desc,
811 PortableGroup::ObjectGroup_out new_iogr)
813 int result = 0;
814 new_iogr = PortableGroup::ObjectGroup::_nil ();
818 // Get current number of members in object group
819 // (same as number of locations).
820 PortableGroup::Locations_var locations =
821 this->replication_manager_->locations_of_members (
822 iogr);
823 CORBA::ULong num_members = locations->length();
825 // If it is less than the MinimumNumberMembers property, add
826 // new members.
827 if (num_members < fault_event_desc.minimum_number_members)
829 //@@ To create a member, we need to know the ObjectGroup,
830 // Location, TypeId, and Criteria.
832 // Get the factory registry from the Replication Manager.
833 PortableGroup::Criteria fake_criteria;
834 PortableGroup::FactoryRegistry_var factory_registry =
835 this->replication_manager_->get_factory_registry (
836 fake_criteria);
839 // @@ DLW SAYS: we need to find out the role played by this object
840 // group so we can use the correct set of factories.
841 // Get the list of factories for the type of the failed replica.
842 CORBA::String_var type_id;
843 PortableGroup::FactoryInfos_var factories_by_type =
844 factory_registry->list_factories_by_role (
845 fault_event_desc.type_id.in(), type_id);
848 // Build a set of locations of factories for this type that we
849 // can use to create new members (i.e., at locations where
850 // members do not currently exist).
852 FT_Location_Set valid_locations;
854 // For each factory that can be used for this type...
855 for (CORBA::ULong f=0; f<factories_by_type->length(); ++f)
857 // ...insert its location into valid_locations set.
858 valid_locations.insert (factories_by_type[f].the_location);
861 // Now remove any locations where members already exist.
862 for (CORBA::ULong m=0; m<num_members; ++m)
864 if (valid_locations.find (locations[m]))
865 valid_locations.remove (locations[m]);
868 // The valid_locations set now contains all the factory
869 // locations we can use to add members to this object group.
870 // So, now we add new members until we reach
871 // the value of the MinimumNumberMembers property.
872 PortableGroup::Location_var good_location;
873 for (FT_Location_Set::iterator iter (valid_locations);
874 iter.next (good_location.out()) &&
875 fault_event_desc.minimum_number_members > num_members;
876 iter.advance(), ++num_members)
878 // Create a new member of the object group at this location.
879 new_iogr = this->replication_manager_->create_member (
880 iogr,
881 good_location.in(),
882 fault_event_desc.type_id.in(),
883 fake_criteria);
885 // Stop adding members when we reach the value of the
886 // MinimumNumberMembers property.
887 // if (num_members++ >= fault_event_desc.minimum_number_members)
888 // break;
893 catch (const CORBA::Exception& ex)
895 ex._tao_print_exception (
896 "TAO::FT_ReplicationManagerFaultAnalyzer::add_members: ");
897 result = -1;
900 return result;
902 #endif // 0
904 // Handle a location failure.
905 int TAO::FT_ReplicationManagerFaultAnalyzer::location_failure (
906 TAO::FT_FaultEventDescriptor & fault_event_desc)
908 int result = 0;
910 // To handle a location failure, we should:
911 // - Unregister all the factories at that location.
912 // (We do this first so that we don't try to create a new replica
913 // at that location for any of the affected object groups.)
914 // - Determine all the object groups that had members at that
915 // location.
916 // - Handle each one of them as a single replica failure.
920 // Get the factory registry from the Replication Manager.
921 PortableGroup::Criteria fake_criteria;
922 PortableGroup::FactoryRegistry_var factory_registry =
923 this->replication_manager_->get_factory_registry (
924 fake_criteria);
926 // Unregister all factories at the failed location.
927 factory_registry->unregister_factory_by_location (
928 fault_event_desc.location.in());
930 // Determine all the object groups that had members at that
931 // location.
932 PortableGroup::ObjectGroups_var object_groups_at_location =
933 this->replication_manager_->groups_at_location (
934 fault_event_desc.location.in());
936 // Handle each one of them as a single replica failure.
937 for (CORBA::ULong i=0;
938 result==0 && i<object_groups_at_location->length();
939 ++i)
941 // Get the object group id.
942 fault_event_desc.object_group_id =
943 this->replication_manager_->get_object_group_id (
944 object_groups_at_location[i]);
946 // Get type id of this object group.
947 fault_event_desc.type_id =
948 this->replication_manager_->type_id (
949 object_groups_at_location[i]);
951 // Handle it as a single replica failure.
952 result = this->single_replica_failure (fault_event_desc);
955 catch (const CORBA::Exception& ex)
957 ex._tao_print_exception (
958 "TAO::FT_ReplicationManagerFaultAnalyzer::location_failure: ");
959 result = -1;
962 return result;
965 // Handle a type failure.
966 int TAO::FT_ReplicationManagerFaultAnalyzer::type_failure (
967 TAO::FT_FaultEventDescriptor & fault_event_desc)
969 int result = 0;
971 // To handle a type failure, we should:
972 // - Unregister the factory at the location of the failure
973 // that is associated with the failed type.
974 // (We do this first so that we don't try to create a new replica
975 // with that factory for any of the affected object groups.)
976 // - Determine all the object groups that had members at that
977 // location of that type.
978 // - Handle each one of them as a single replica failure.
982 // Get the factory registry from the Replication Manager.
983 PortableGroup::Criteria fake_criteria;
984 PortableGroup::FactoryRegistry_var factory_registry =
985 this->replication_manager_->get_factory_registry (
986 fake_criteria);
988 // Unregister the factory at the failed location associated with
989 // the role.
990 //@@ Using type_id as the role for now.
991 factory_registry->unregister_factory (
992 fault_event_desc.type_id.in(),
993 fault_event_desc.location.in());
995 // Get all the object groups that had members at that
996 // location.
997 PortableGroup::ObjectGroups_var object_groups_at_location =
998 this->replication_manager_->groups_at_location (
999 fault_event_desc.location.in());
1001 // For each one, if it was of the same type as the failed type,
1002 // handle it as a single replica failure.
1003 for (CORBA::ULong i=0;
1004 result==0 && i<object_groups_at_location->length();
1005 ++i)
1007 // Get the object group id.
1008 fault_event_desc.object_group_id =
1009 this->replication_manager_->get_object_group_id (
1010 object_groups_at_location[i]);
1012 // Get type id of this object group.
1013 PortableGroup::TypeId_var type_id =
1014 this->replication_manager_->type_id (
1015 object_groups_at_location[i]);
1017 // If the type id is the same as the failed type id...
1018 if (ACE_OS::strcmp (type_id.in(), fault_event_desc.type_id.in()) == 0)
1020 // Handle it as a single replica failure.
1021 result = this->single_replica_failure (fault_event_desc);
1025 catch (const CORBA::Exception& ex)
1027 ex._tao_print_exception (
1028 "TAO::FT_ReplicationManagerFaultAnalyzer::type_failure: ");
1029 result = -1;
1032 return result;
1035 TAO_END_VERSIONED_NAMESPACE_DECL