usr/src/cmd/fm/fmd/common/fmd_dr.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24
  25 /*
  26  * FMD Dynamic Reconfiguration (DR) Event Handling
  27  *
  28  * Fault manager scheme plug-ins must track characteristics of individual
  29  * pieces of hardware.  As these components can be added or removed by a DR
  30  * operation, we need to provide a means by which plug-ins can determine when
  31  * they need to re-examine the current configuration.  We provide a simple
  32  * mechanism whereby this task can be implemented using lazy evaluation: a
  33  * simple 64-bit generation counter is maintained and incremented on *any* DR.
  34  * Schemes can store the generation number in scheme-specific data structures,
  35  * and then revalidate their contents if the current generation number has
  36  * changed since the resource information was cached.  This method saves time,
  37  * avoids the complexity of direct participation in DR, avoids the need for
  38  * resource-specific processing of DR events, and is relatively easy to port
  39  * to other systems that support dynamic reconfiguration.
  40  *
  41  * The dr generation is only incremented in response to hardware changes.  Since
  42  * ASRUs can be in any scheme, including the device scheme, we must also be
  43  * aware of software configuration changes which may affect the resource cache.
  44  * In addition, we take a snapshot of the topology whenever a reconfiguration
  45  * event occurs and notify any modules of the change.
  46  */
  47
  48 #include <sys/types.h>
  49 #include <sys/sunddi.h>
  50 #include <sys/sysevent/dr.h>
  51 #include <sys/sysevent/eventdefs.h>
  52
  53 #include <stdio.h>
  54 #include <string.h>
  55 #include <unistd.h>
  56 #include <libsysevent.h>
  57
  58 #undef MUTEX_HELD
  59 #undef RW_READ_HELD
  60 #undef RW_WRITE_HELD
  61
  62 #include <fmd_asru.h>
  63 #include <fmd_error.h>
  64 #include <fmd_event.h>
  65 #include <fmd_fmri.h>
  66 #include <fmd_module.h>
  67 #include <fmd_subr.h>
  68 #include <fmd_topo.h>
  69 #include <fmd.h>
  70
  71 void
  72 fmd_dr_event(sysevent_t *sep)
  73 {
  74         uint64_t gen;
  75         fmd_event_t *e;
  76         const char *class = sysevent_get_class_name(sep);
  77         const char *subclass = sysevent_get_subclass_name(sep);
  78         hrtime_t evtime;
  79         fmd_topo_t *ftp, *prev;
  80
  81         if (strcmp(class, EC_DR) == 0) {
  82                 if (strcmp(subclass, ESC_DR_AP_STATE_CHANGE) != 0 &&
  83                     strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) != 0)
  84                         return;
  85         /* LINTED: E_NOP_IF_STMT */
  86         } else if (strcmp(class, EC_DEVFS) == 0) {
  87                 /*
  88                  * A devfs configuration event can change the topology,
  89                  * as disk nodes only exist when the device is configured.
  90                  */
  91         } else if (strcmp(class, EC_PLATFORM) == 0) {
  92                 /*
  93                  * Since we rely on the SP to enumerate fans,
  94                  * power-supplies and sensors/leds, it would be prudent
  95                  * to take a new snapshot if the SP resets.
  96                  */
  97                 if (strcmp(subclass, ESC_PLATFORM_SP_RESET) != 0)
  98                         return;
  99         } else if (strcmp(class, EC_DEV_ADD) == 0 ||
 100             strcmp(class, EC_DEV_REMOVE) == 0) {
 101                 if (strcmp(subclass, ESC_DISK) != 0)
 102                         return;
 103         } else
 104                 return;
 105
 106         /*
 107          * Take a topo snapshot and notify modules of the change.  Picking an
 108          * accurate time here is difficult.  On one hand, we have the timestamp
 109          * of the underlying sysevent, indicating when the reconfiguration event
 110          * occurred.  On the other hand, we are taking the topo snapshot
 111          * asynchronously, and hence the timestamp of the snapshot is the
 112          * current time.  Pretending this topo snapshot was valid at the time
 113          * the sysevent was posted seems wrong, so we instead opt for the
 114          * current time as an upper bound on the snapshot validity.
 115          *
 116          * Along these lines, we keep track of the last time we dispatched a
 117          * topo snapshot.  If the sysevent occurred before the last topo
 118          * snapshot, then don't bother dispatching another topo change event.
 119          * We've already indicated (to the best of our ability) the change in
 120          * topology.  This prevents endless topo snapshots in response to a
 121          * flurry of sysevents.
 122          */
 123         sysevent_get_time(sep, &evtime);
 124         prev = fmd_topo_hold();
 125         if (evtime <= prev->ft_time_begin &&
 126             fmd.d_clockops == &fmd_timeops_native) {
 127                 fmd_topo_rele(prev);
 128                 return;
 129         }
 130         fmd_topo_rele(prev);
 131
 132         (void) pthread_mutex_lock(&fmd.d_stats_lock);
 133         gen = fmd.d_stats->ds_dr_gen.fmds_value.ui64++;
 134         (void) pthread_mutex_unlock(&fmd.d_stats_lock);
 135
 136         TRACE((FMD_DBG_XPRT, "dr event %p, gen=%llu", (void *)sep, gen));
 137         fmd_topo_update();
 138
 139         ftp = fmd_topo_hold();
 140         e = fmd_event_create(FMD_EVT_TOPO, ftp->ft_time_end, NULL, ftp);
 141         fmd_modhash_dispatch(fmd.d_mod_hash, e);
 142 }