4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This file contains the environmental PICL plug-in module.
31 * This plugin sets up the PICLTREE for Chicago WS.
32 * It provides functionality to get/set temperatures and
35 * The environmental policy defaults to the auto mode
36 * as programmed by OBP at boot time.
41 #include <sys/sysmacros.h>
48 #include <sys/processor.h>
57 #include <libdevinfo.h>
61 #include <sys/utsname.h>
62 #include <sys/systeminfo.h>
64 #include <sys/pic16f747.h>
66 #include <sys/scsi/scsi.h>
67 #include <sys/scsi/generic/commands.h>
71 * PICL plugin entry points
73 static void piclenvd_register(void);
74 static void piclenvd_init(void);
75 static void piclenvd_fini(void);
80 extern void env_picl_setup(void);
81 extern void env_picl_destroy(void);
82 extern int env_picl_setup_tuneables(void);
84 static boolean_t
has_fan_failed(env_fan_t
*fanp
);
87 * PSU fan fault handling
89 static boolean_t
has_psufan_failed(void);
90 static int psufan_last_status
= FAN_OK
;
92 #pragma init(piclenvd_register)
95 * Plugin registration information
97 static picld_plugin_reg_t my_reg_info
= {
99 PICLD_PLUGIN_CRITICAL
,
105 #define REGISTER_INFORMATION_STRING_LENGTH 16
106 static char fan_rpm_string
[REGISTER_INFORMATION_STRING_LENGTH
] = {0};
107 static char fan_status_string
[REGISTER_INFORMATION_STRING_LENGTH
] = {0};
109 static int scsi_log_sense(env_disk_t
*diskp
, uchar_t page_code
,
110 void *pagebuf
, uint16_t pagelen
, int page_control
);
111 static int scsi_mode_select(env_disk_t
*diskp
, uchar_t page_code
,
112 uchar_t
*pagebuf
, uint16_t pagelen
);
114 static int get_disk_temp(env_disk_t
*);
119 static es_sensor_blk_t sensor_ctl
[MAX_SENSORS
];
122 * Default limits for sensors, in case ES segment is not present, or has
123 * inconsistent information
125 static es_sensor_blk_t sensor_default_ctl
[MAX_SENSORS
] = {
127 CPU0_HIGH_POWER_OFF
, CPU0_HIGH_SHUTDOWN
, CPU0_HIGH_WARNING
,
128 CPU0_LOW_WARNING
, CPU0_LOW_SHUTDOWN
, CPU0_LOW_POWER_OFF
131 CPU1_HIGH_POWER_OFF
, CPU1_HIGH_SHUTDOWN
, CPU1_HIGH_WARNING
,
132 CPU1_LOW_WARNING
, CPU1_LOW_SHUTDOWN
, CPU1_LOW_POWER_OFF
135 ADT7462_HIGH_POWER_OFF
, ADT7462_HIGH_SHUTDOWN
, ADT7462_HIGH_WARNING
,
136 ADT7462_LOW_WARNING
, ADT7462_LOW_SHUTDOWN
, ADT7462_LOW_POWER_OFF
139 MB_HIGH_POWER_OFF
, MB_HIGH_SHUTDOWN
, MB_HIGH_WARNING
,
140 MB_LOW_WARNING
, MB_LOW_SHUTDOWN
, MB_LOW_POWER_OFF
143 LM95221_HIGH_POWER_OFF
, LM95221_HIGH_SHUTDOWN
, LM95221_HIGH_WARNING
,
144 LM95221_LOW_WARNING
, LM95221_LOW_SHUTDOWN
, LM95221_LOW_POWER_OFF
147 FIRE_HIGH_POWER_OFF
, FIRE_HIGH_SHUTDOWN
, FIRE_HIGH_WARNING
,
148 FIRE_LOW_WARNING
, FIRE_LOW_SHUTDOWN
, FIRE_LOW_POWER_OFF
151 LSI1064_HIGH_POWER_OFF
, LSI1064_HIGH_SHUTDOWN
, LSI1064_HIGH_WARNING
,
152 LSI1064_LOW_WARNING
, LSI1064_LOW_SHUTDOWN
, LSI1064_LOW_POWER_OFF
155 FRONT_PANEL_HIGH_POWER_OFF
, FRONT_PANEL_HIGH_SHUTDOWN
,
156 FRONT_PANEL_HIGH_WARNING
, FRONT_PANEL_LOW_WARNING
,
157 FRONT_PANEL_LOW_SHUTDOWN
, FRONT_PANEL_LOW_POWER_OFF
160 PSU_HIGH_POWER_OFF
, PSU_HIGH_SHUTDOWN
, PSU_HIGH_WARNING
,
161 PSU_LOW_WARNING
, PSU_LOW_SHUTDOWN
, PSU_LOW_POWER_OFF
166 * Env thread variables
168 static boolean_t system_shutdown_started
= B_FALSE
;
169 static boolean_t system_temp_thr_created
= B_FALSE
;
170 static pthread_t system_temp_thr_id
;
171 static pthread_attr_t thr_attr
;
172 static boolean_t disk_temp_thr_created
= B_FALSE
;
173 static pthread_t disk_temp_thr_id
;
174 static boolean_t fan_thr_created
= B_FALSE
;
175 static pthread_t fan_thr_id
;
178 * PM thread related variables
180 static pthread_t pmthr_tid
; /* pmthr thread ID */
181 static int pm_fd
= -1; /* PM device file descriptor */
182 static boolean_t pmthr_created
= B_FALSE
;
183 static int cur_lpstate
; /* cur low power state */
186 * Envd plug-in verbose flag set by SUNW_PICLENVD_DEBUG environment var
187 * Setting the verbose tuneable also enables debugging for better
193 * These are debug variables for keeping track of the total number
194 * of Fan and Temp sensor retries over the lifetime of the plugin.
196 static int total_fan_retries
= 0;
197 static int total_temp_retries
= 0;
202 static env_fan_t envd_system_fan0
= {
203 ENV_SYSTEM_FAN0
, ENV_SYSTEM_FAN0_DEVFS
, SYSTEM_FAN0_ID
,
204 SYSTEM_FAN_SPEED_MIN
, SYSTEM_FAN_SPEED_MAX
, -1, -1,
206 static env_fan_t envd_system_fan1
= {
207 ENV_SYSTEM_FAN1
, ENV_SYSTEM_FAN1_DEVFS
, SYSTEM_FAN1_ID
,
208 SYSTEM_FAN_SPEED_MIN
, SYSTEM_FAN_SPEED_MAX
, -1, -1,
210 static env_fan_t envd_system_fan2
= {
211 ENV_SYSTEM_FAN2
, ENV_SYSTEM_FAN2_DEVFS
, SYSTEM_FAN2_ID
,
212 SYSTEM_FAN_SPEED_MIN
, SYSTEM_FAN_SPEED_MAX
, -1, -1,
214 static env_fan_t envd_system_fan3
= {
215 ENV_SYSTEM_FAN3
, ENV_SYSTEM_FAN3_DEVFS
, SYSTEM_FAN3_ID
,
216 SYSTEM_FAN_SPEED_MIN
, SYSTEM_FAN_SPEED_MAX
, -1, -1,
218 static env_fan_t envd_system_fan4
= {
219 ENV_SYSTEM_FAN4
, ENV_SYSTEM_FAN4_DEVFS
, SYSTEM_FAN4_ID
,
220 SYSTEM_FAN_SPEED_MIN
, SYSTEM_FAN_SPEED_MAX
, -1, -1,
226 static env_disk_t envd_disk0
= {
227 ENV_DISK0
, ENV_DISK0_DEVFS
, DISK0_PHYSPATH
, DISK0_NODE_PATH
,
230 static env_disk_t envd_disk1
= {
231 ENV_DISK1
, ENV_DISK1_DEVFS
, DISK1_PHYSPATH
, DISK1_NODE_PATH
,
234 static env_disk_t envd_disk2
= {
235 ENV_DISK2
, ENV_DISK2_DEVFS
, DISK2_PHYSPATH
, DISK2_NODE_PATH
,
238 static env_disk_t envd_disk3
= {
239 ENV_DISK3
, ENV_DISK3_DEVFS
, DISK3_PHYSPATH
, DISK3_NODE_PATH
,
246 static env_sensor_t envd_sensor_cpu0
= {
247 SENSOR_CPU0
, SENSOR_CPU0_DEVFS
, CPU0_SENSOR_ID
, -1, NULL
,
249 static env_sensor_t envd_sensor_cpu1
= {
250 SENSOR_CPU1
, SENSOR_CPU1_DEVFS
, CPU1_SENSOR_ID
, -1, NULL
,
252 static env_sensor_t envd_sensor_adt7462
= {
253 SENSOR_ADT7462
, SENSOR_ADT7462_DEVFS
, ADT7462_SENSOR_ID
, -1, NULL
,
255 static env_sensor_t envd_sensor_mb
= {
256 SENSOR_MB
, SENSOR_MB_DEVFS
, MB_SENSOR_ID
, -1, NULL
,
258 static env_sensor_t envd_sensor_lm95221
= {
259 SENSOR_LM95221
, SENSOR_LM95221_DEVFS
, LM95221_SENSOR_ID
, -1, NULL
,
261 static env_sensor_t envd_sensor_fire
= {
262 SENSOR_FIRE
, SENSOR_FIRE_DEVFS
, FIRE_SENSOR_ID
, -1, NULL
,
264 static env_sensor_t envd_sensor_lsi1064
= {
265 SENSOR_LSI1064
, SENSOR_LSI1064_DEVFS
, LSI1064_SENSOR_ID
, -1, NULL
,
267 static env_sensor_t envd_sensor_front_panel
= {
268 SENSOR_FRONT_PANEL
, SENSOR_FRONT_PANEL_DEVFS
, FRONT_PANEL_SENSOR_ID
,
271 static env_sensor_t envd_sensor_psu
= {
272 SENSOR_PSU
, SENSOR_PSU_DEVFS
, PSU_SENSOR_ID
, -1, NULL
,
276 * The vendor-id and device-id are the properties associated with
277 * the SCSI controller. This is used to identify a particular controller
280 #define VENDOR_ID "vendor-id"
281 #define DEVICE_ID "device-id"
284 * The implementation for SCSI disk drives to supply info. about
285 * temperature is not mandatory. Hence we first determine if the
286 * temperature page is supported. To do this we need to scan the list
287 * of pages supported.
289 #define SUPPORTED_LPAGES 0
290 #define TEMPERATURE_PAGE 0x0D
291 #define LOGPAGEHDRSIZE 4
294 * NULL terminated array of fans
296 static env_fan_t
*envd_fans
[] = {
306 * NULL terminated array of disks
308 static env_disk_t
*envd_disks
[] = {
317 * NULL terminated array of temperature sensors
319 #define N_ENVD_SENSORS 9
320 static env_sensor_t
*envd_sensors
[] = {
323 &envd_sensor_adt7462
,
325 &envd_sensor_lm95221
,
327 &envd_sensor_lsi1064
,
328 &envd_sensor_front_panel
,
333 #define NOT_AVAILABLE "NA"
341 static int disk_high_warn_temperature
= DISK_HIGH_WARN_TEMPERATURE
;
342 static int disk_low_warn_temperature
= DISK_LOW_WARN_TEMPERATURE
;
343 static int disk_high_shutdown_temperature
=
344 DISK_HIGH_SHUTDOWN_TEMPERATURE
;
345 static int disk_low_shutdown_temperature
= DISK_LOW_SHUTDOWN_TEMPERATURE
;
347 static int disk_scan_interval
= DISK_SCAN_INTERVAL
;
348 static int sensor_scan_interval
= SENSOR_SCAN_INTERVAL
;
349 static int fan_scan_interval
= FAN_SCAN_INTERVAL
;
351 static int get_int_val(ptree_rarg_t
*parg
, void *buf
);
352 static int set_int_val(ptree_warg_t
*parg
, const void *buf
);
353 static int get_string_val(ptree_rarg_t
*parg
, void *buf
);
354 static int set_string_val(ptree_warg_t
*parg
, const void *buf
);
356 static int shutdown_override
= 0;
357 static int sensor_warning_interval
= SENSOR_WARNING_INTERVAL
;
358 static int sensor_warning_duration
= SENSOR_WARNING_DURATION
;
359 static int sensor_shutdown_interval
= SENSOR_SHUTDOWN_INTERVAL
;
360 static int disk_warning_interval
= DISK_WARNING_INTERVAL
;
361 static int disk_warning_duration
= DISK_WARNING_DURATION
;
362 static int disk_shutdown_interval
= DISK_SHUTDOWN_INTERVAL
;
364 static int system_temp_monitor
= 1; /* enabled */
365 static int fan_monitor
= 1; /* enabled */
366 static int pm_monitor
= 1; /* enabled */
368 /* Disable disk temperature monitoring until we have LSI fw support */
369 int disk_temp_monitor
= 0;
371 static char shutdown_cmd
[] = SHUTDOWN_CMD
;
372 const char *iofru_devname
= I2C_DEVFS
"/" IOFRU_DEV
;
374 env_tuneable_t tuneables
[] = {
375 {"system_temp-monitor", PICL_PTYPE_INT
, &system_temp_monitor
,
376 &get_int_val
, &set_int_val
, sizeof (int)},
378 {"fan-monitor", PICL_PTYPE_INT
, &fan_monitor
,
379 &get_int_val
, &set_int_val
, sizeof (int)},
381 {"pm-monitor", PICL_PTYPE_INT
, &pm_monitor
,
382 &get_int_val
, &set_int_val
, sizeof (int)},
384 {"shutdown-override", PICL_PTYPE_INT
, &shutdown_override
,
385 &get_int_val
, &set_int_val
, sizeof (int)},
387 {"sensor-warning-duration", PICL_PTYPE_INT
,
388 &sensor_warning_duration
,
389 &get_int_val
, &set_int_val
,
392 {"disk-scan-interval", PICL_PTYPE_INT
,
394 &get_int_val
, &set_int_val
,
397 {"fan-scan-interval", PICL_PTYPE_INT
,
399 &get_int_val
, &set_int_val
,
402 {"sensor-scan-interval", PICL_PTYPE_INT
,
403 &sensor_scan_interval
,
404 &get_int_val
, &set_int_val
,
407 {"sensor_warning-interval", PICL_PTYPE_INT
, &sensor_warning_interval
,
408 &get_int_val
, &set_int_val
,
411 {"sensor_shutdown-interval", PICL_PTYPE_INT
, &sensor_shutdown_interval
,
412 &get_int_val
, &set_int_val
,
415 {"disk_warning-interval", PICL_PTYPE_INT
, &disk_warning_interval
,
416 &get_int_val
, &set_int_val
,
419 {"disk_warning-duration", PICL_PTYPE_INT
, &disk_warning_duration
,
420 &get_int_val
, &set_int_val
,
423 {"disk_shutdown-interval", PICL_PTYPE_INT
, &disk_shutdown_interval
,
424 &get_int_val
, &set_int_val
,
427 {"shutdown-command", PICL_PTYPE_CHARSTRING
, shutdown_cmd
,
428 &get_string_val
, &set_string_val
,
429 sizeof (shutdown_cmd
)},
431 {"monitor-disk-temp", PICL_PTYPE_INT
, &disk_temp_monitor
,
432 &get_int_val
, &set_int_val
, sizeof (int)},
434 {"disk-high-warn-temperature", PICL_PTYPE_INT
,
435 &disk_high_warn_temperature
, &get_int_val
,
436 &set_int_val
, sizeof (int)},
438 {"disk-low-warn-temperature", PICL_PTYPE_INT
,
439 &disk_low_warn_temperature
, &get_int_val
,
440 &set_int_val
, sizeof (int)},
442 {"disk-high-shutdown-temperature", PICL_PTYPE_INT
,
443 &disk_high_shutdown_temperature
, &get_int_val
,
444 &set_int_val
, sizeof (int)},
446 {"disk-low-shutdown-temperature", PICL_PTYPE_INT
,
447 &disk_low_shutdown_temperature
, &get_int_val
,
448 &set_int_val
, sizeof (int)},
450 {"verbose", PICL_PTYPE_INT
, &env_debug
,
451 &get_int_val
, &set_int_val
, sizeof (int)}
455 * We use this to figure out how many tuneables there are
456 * This is variable because the publishing routine needs this info
459 int ntuneables
= (sizeof (tuneables
)/sizeof (tuneables
[0]));
462 * Lookup fan and return a pointer to env_fan_t data structure.
465 fan_lookup(char *name
)
470 for (i
= 0; (fanp
= envd_fans
[i
]) != NULL
; i
++) {
471 if (strcmp(fanp
->name
, name
) == 0)
478 * Lookup sensor and return a pointer to env_sensor_t data structure.
481 sensor_lookup(char *name
)
483 env_sensor_t
*sensorp
;
486 for (i
= 0; i
< N_ENVD_SENSORS
; ++i
) {
487 sensorp
= envd_sensors
[i
];
488 if (strcmp(sensorp
->name
, name
) == 0)
495 * Lookup disk and return a pointer to env_disk_t data structure.
498 disk_lookup(char *name
)
503 for (i
= 0; (diskp
= envd_disks
[i
]) != NULL
; i
++) {
504 if (strncmp(diskp
->name
, name
, strlen(name
)) == 0)
511 * Get current temperature
512 * Returns -1 on error, 0 if successful
515 get_temperature(env_sensor_t
*sensorp
, tempr_t
*temp
)
517 int fd
= sensorp
->fd
;
522 else if (ioctl(fd
, PIC_GET_TEMPERATURE
, temp
) != 0) {
528 if (sensorp
->error
== MAX_SENSOR_RETRIES
) {
529 envd_log(LOG_WARNING
, ENV_SENSOR_ACCESS_FAIL
,
530 sensorp
->name
, errno
, strerror(errno
));
533 total_temp_retries
++;
536 } else if (sensorp
->error
!= 0) {
537 if (sensorp
->error
>= MAX_SENSOR_RETRIES
) {
538 envd_log(LOG_WARNING
, ENV_SENSOR_ACCESS_OK
,
544 if (total_temp_retries
&& env_debug
) {
545 envd_log(LOG_WARNING
,
546 "Total retries for sensors = %d",
555 * Get current disk temperature
556 * Returns -1 on error, 0 if successful
559 disk_temperature(env_disk_t
*diskp
, tempr_t
*temp
)
566 *temp
= diskp
->current_temp
;
572 * Get current fan speed
573 * This function returns a RPM value for fanspeed
575 * Returns -1 on error, 0 if successful
578 get_fan_speed(env_fan_t
*fanp
, fanspeed_t
*fanspeedp
)
587 if (has_fan_failed(fanp
)) {
592 /* try to read the fan information */
593 for (retries
= 0; retries
< MAX_FAN_RETRIES
; retries
++) {
594 if (ioctl(fanp
->fd
, PIC_GET_FAN_SPEED
, &tach
) == 0)
599 total_fan_retries
+= retries
;
600 if (retries
>= MAX_FAN_RETRIES
)
603 if (total_fan_retries
&& env_debug
) {
604 envd_log(LOG_WARNING
, "total retries for fan = %d",
608 real_tach
= tach
<< 8;
609 *fanspeedp
= TACH_TO_RPM(real_tach
);
615 * This function accepts a percentage of fan speed
616 * from 0-100 and programs the HW monitor fans to the corresponding
618 * Returns -1 on error, -2 on invalid args passed, 0 if successful
621 set_fan_speed(env_fan_t
*fanp
, fanspeed_t fanspeed
)
628 if (fanspeed
< 0 || fanspeed
> 100)
632 if (ioctl(fanp
->fd
, PIC_SET_FAN_SPEED
, &speed
) != 0)
639 * close all fan devices
642 envd_close_fans(void)
647 for (i
= 0; (fanp
= envd_fans
[i
]) != NULL
; i
++) {
648 if (fanp
->fd
!= -1) {
649 (void) close(fanp
->fd
);
656 * Close sensor devices and freeup resources
659 envd_close_sensors(void)
661 env_sensor_t
*sensorp
;
664 for (i
= 0; i
< N_ENVD_SENSORS
; ++i
) {
665 sensorp
= envd_sensors
[i
];
666 if (sensorp
->fd
!= -1) {
667 (void) close(sensorp
->fd
);
674 * Open fan devices and initialize per fan data structure.
677 envd_setup_fans(void)
682 picl_nodehdl_t tnodeh
;
684 for (i
= 0; (fanp
= envd_fans
[i
]) != NULL
; i
++) {
685 fanp
->last_status
= FAN_OK
;
687 /* Make sure cpu0/1 present for validating cpu fans */
688 if (fanp
->id
== CPU0_FAN_ID
) {
689 if (ptree_get_node_by_path(CPU0_PATH
, &tnodeh
) !=
693 "get node by path failed for %s\n",
696 fanp
->present
= B_FALSE
;
700 if (fanp
->id
== CPU1_FAN_ID
) {
701 if (ptree_get_node_by_path(CPU1_PATH
, &tnodeh
) !=
705 "get node by path failed for %s\n", CPU0_PATH
);
707 fanp
->present
= B_FALSE
;
711 if ((fd
= open(fanp
->devfs_path
, O_RDWR
)) == -1) {
713 ENV_FAN_OPEN_FAIL
, fanp
->name
,
714 fanp
->devfs_path
, errno
, strerror(errno
));
715 fanp
->present
= B_FALSE
;
719 fanp
->present
= B_TRUE
;
730 envd_setup_disks(void)
732 int ret
, i
, page_index
, page_len
;
733 picl_nodehdl_t tnodeh
;
737 uchar_t log_page
[256];
739 if (ptree_get_node_by_path(SCSI_CONTROLLER_NODE_PATH
,
740 &tnodeh
) != PICL_SUCCESS
) {
742 envd_log(LOG_ERR
, "On-Board SCSI controller %s "
743 "not found in the system.\n",
744 SCSI_CONTROLLER_NODE_PATH
);
749 if ((ret
= ptree_get_propval_by_name(tnodeh
, VENDOR_ID
,
750 &vendor_id
, sizeof (vendor_id
))) != 0) {
752 envd_log(LOG_ERR
, "Error in getting vendor-id "
753 "for SCSI controller. ret = %d errno = 0x%d\n",
758 if ((ret
= ptree_get_propval_by_name(tnodeh
, DEVICE_ID
,
759 &device_id
, sizeof (device_id
))) != 0) {
761 envd_log(LOG_ERR
, "Error in getting device-id "
762 "for SCSI controller. ret = %d errno = 0x%d\n",
769 * We have found LSI1064 SCSi controller onboard.
771 for (i
= 0; (diskp
= envd_disks
[i
]) != NULL
; i
++) {
772 if (ptree_get_node_by_path(diskp
->nodepath
,
773 &tnodeh
) != PICL_SUCCESS
) {
774 diskp
->present
= B_FALSE
;
777 "DISK %d: %s not found in the system.\n",
778 diskp
->id
, diskp
->nodepath
);
782 if ((diskp
->fd
= open(diskp
->devfs_path
, O_RDONLY
)) == -1) {
783 diskp
->present
= B_FALSE
;
786 "Error in opening %s errno = 0x%x\n",
787 diskp
->devfs_path
, errno
);
791 diskp
->present
= B_TRUE
;
792 diskp
->tpage_supported
= B_FALSE
;
793 diskp
->smart_supported
= B_FALSE
;
794 diskp
->warning_tstamp
= 0;
795 diskp
->shutdown_tstamp
= 0;
796 diskp
->high_warning
= disk_high_warn_temperature
;
797 diskp
->low_warning
= disk_low_warn_temperature
;
798 diskp
->high_shutdown
= disk_high_shutdown_temperature
;
799 diskp
->low_shutdown
= disk_low_shutdown_temperature
;
801 * Find out if the Temperature page is supported by the disk.
803 if (scsi_log_sense(diskp
, SUPPORTED_LPAGES
, log_page
,
804 sizeof (log_page
), 1) == 0) {
806 page_len
= ((log_page
[2] << 8) & 0xFF00) | log_page
[3];
808 for (page_index
= LOGPAGEHDRSIZE
;
809 page_index
< page_len
+ LOGPAGEHDRSIZE
;
811 if (log_page
[page_index
] != TEMPERATURE_PAGE
)
814 diskp
->tpage_supported
= B_TRUE
;
817 "tpage supported for %s\n",
823 * If the temp log page failed, we can check if this is
824 * a SATA drive and attempt to read the temperature
825 * using the SMART interface.
827 if (diskp
->tpage_supported
!= B_TRUE
) {
828 uchar_t iec_page
[IEC_PAGE_SIZE
];
831 envd_log(LOG_ERR
, "Turning on SMART\n");
833 (void) memset(iec_page
, 0, sizeof (iec_page
));
834 iec_page
[0] = IEC_PAGE
; /* SMART PAGE */
835 iec_page
[1] = 0xa; /* length */
836 /* Notification, only when requested */
837 iec_page
[3] = REPORT_ON_REQUEST
;
839 ret
= scsi_mode_select(diskp
, IEC_PAGE
,
840 iec_page
, sizeof (iec_page
));
843 * Since we know this is a SMART capable
844 * drive, we will try to set the page and
845 * determine if the drive is not capable
846 * of reading the TEMP page when we
847 * try to read the temperature and disable
848 * it then. We do not fail when reading
849 * or writing this page because we will
850 * determine the SMART capabilities
851 * when reading the temperature.
853 if ((ret
!= 0) && (env_debug
)) {
855 "Failed to set mode page");
858 diskp
->smart_supported
= B_TRUE
;
859 diskp
->tpage_supported
= B_TRUE
;
862 if (get_disk_temp(diskp
) < 0) {
863 envd_log(LOG_ERR
, " error reading temperature of:%s\n",
865 } else if (env_debug
) {
866 envd_log(LOG_ERR
, "%s: temperature = %d\n",
867 diskp
->name
, diskp
->current_temp
);
878 seeprom_scn_t scn_hdr
;
879 seeprom_seg_t seg_hdr
;
881 es_sensor_t
*sensorp
;
883 int envseg_len
, esd_len
;
887 * Open the front io fru
889 if ((fd
= open(iofru_devname
, O_RDONLY
)) == -1) {
890 envd_log(LOG_ERR
, ENV_FRU_OPEN_FAIL
, iofru_devname
, errno
);
895 * Read section header from the fru SEEPROM
897 if (lseek(fd
, SSCN_OFFSET
, SEEK_SET
) == (off_t
)-1 ||
898 read(fd
, &scn_hdr
, sizeof (scn_hdr
)) != sizeof (scn_hdr
)) {
899 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
903 if ((scn_hdr
.sscn_tag
!= SSCN_TAG
) ||
904 (GET_UNALIGN16(&scn_hdr
.sscn_ver
) != SSCN_VER
)) {
905 envd_log(LOG_ERR
, ENV_FRU_BAD_SCNHDR
, scn_hdr
.sscn_tag
,
906 GET_UNALIGN16(&scn_hdr
.sscn_ver
));
912 * Locate environmental segment
914 for (i
= 0; i
< scn_hdr
.sscn_nsegs
; i
++) {
915 if (read(fd
, &seg_hdr
, sizeof (seg_hdr
)) != sizeof (seg_hdr
)) {
916 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
923 "Seg name: %x off:%x len:%x\n",
924 GET_UNALIGN16(&seg_hdr
.sseg_name
),
925 GET_UNALIGN16(&seg_hdr
.sseg_off
),
926 GET_UNALIGN16(&seg_hdr
.sseg_len
));
929 if (GET_UNALIGN16(&seg_hdr
.sseg_name
) == ENVSEG_NAME
)
932 if (i
== scn_hdr
.sscn_nsegs
) {
933 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
939 * Read environmental segment
941 envseg_len
= GET_UNALIGN16(&seg_hdr
.sseg_len
);
942 if ((envseg
= malloc(envseg_len
)) == NULL
) {
943 envd_log(LOG_ERR
, ENV_FRU_NOMEM_FOR_SEG
, envseg_len
);
948 if (lseek(fd
, (off_t
)GET_UNALIGN16(&seg_hdr
.sseg_off
),
949 SEEK_SET
) == (off_t
)-1 ||
950 read(fd
, envseg
, envseg_len
) != envseg_len
) {
951 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
958 * Check environmental segment data for consistency
960 esd_len
= sizeof (*envseg
) +
961 (envseg
->esd_nsensors
- 1) * sizeof (envseg
->esd_sensors
[0]);
962 if (envseg
->esd_ver
!= ENVSEG_VERSION
|| envseg_len
< esd_len
) {
963 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
970 * Process environmental segment data
972 if (envseg
->esd_nsensors
> MAX_SENSORS
) {
973 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
979 sensorp
= &(envseg
->esd_sensors
[0]);
980 envsegp
= (char *)envseg
;
981 for (i
= 0; i
< envseg
->esd_nsensors
; i
++) {
984 (void) memcpy(&ess_id
,
985 sensorp
->ess_id
, sizeof (sensorp
->ess_id
));
988 envd_log(LOG_INFO
, "\n Sensor Id %x offset %x",
989 ess_id
, sensorp
->ess_off
);
991 if (ess_id
>= MAX_SENSORS
) {
992 envd_log(LOG_ERR
, ENV_FRU_BAD_ENVSEG
, iofru_devname
);
997 (void) memcpy(&sensor_ctl
[ess_id
], &envsegp
[sensorp
->ess_off
],
998 sizeof (es_sensor_blk_t
));
1004 * Match sensor/ES id and point to correct data based on IDs
1006 for (i
= 0; i
< N_ENVD_SENSORS
; i
++) {
1007 id
= envd_sensors
[i
]->id
;
1008 envd_sensors
[i
]->es
= &sensor_ctl
[id
];
1012 * Cleanup and return
1021 envd_es_default_setup(void)
1025 for (i
= 0; i
< N_ENVD_SENSORS
; i
++) {
1026 id
= envd_sensors
[i
]->id
;
1027 envd_sensors
[i
]->es
= &sensor_default_ctl
[id
];
1032 * Open temperature sensor devices and initialize per sensor data structure.
1035 envd_setup_sensors(void)
1037 env_sensor_t
*sensorp
;
1040 picl_nodehdl_t tnodeh
;
1042 for (i
= 0; i
< N_ENVD_SENSORS
; i
++) {
1044 envd_log(LOG_ERR
, "scanning sensor %d\n", i
);
1046 sensorp
= envd_sensors
[i
];
1048 /* Initialize sensor's initial state */
1049 sensorp
->shutdown_initiated
= B_FALSE
;
1050 sensorp
->warning_tstamp
= 0;
1051 sensorp
->shutdown_tstamp
= 0;
1054 /* Make sure cpu0/1 sensors are present */
1055 if (sensorp
->id
== CPU0_SENSOR_ID
) {
1056 if (ptree_get_node_by_path(CPU0_PATH
, &tnodeh
) !=
1060 "get node by path failed for %s\n",
1063 sensorp
->present
= B_FALSE
;
1067 if (sensorp
->id
== CPU1_SENSOR_ID
) {
1068 if (ptree_get_node_by_path(CPU1_PATH
, &tnodeh
) !=
1072 "get node by path failed for %s\n",
1075 sensorp
->present
= B_FALSE
;
1080 sensorp
->fd
= open(sensorp
->devfs_path
, O_RDWR
);
1081 if (sensorp
->fd
== -1) {
1083 envd_log(LOG_ERR
, ENV_SENSOR_OPEN_FAIL
,
1084 sensorp
->name
, sensorp
->devfs_path
,
1085 errno
, strerror(errno
));
1087 sensorp
->present
= B_FALSE
;
1092 * Determine if the front panel is attached, we want the
1093 * information if it exists, but should not shut down
1094 * the system if it is removed.
1096 if (sensorp
->id
== FRONT_PANEL_SENSOR_ID
) {
1100 for (tries
= 0; tries
< MAX_SENSOR_RETRIES
; tries
++) {
1101 if (ioctl(sensorp
->fd
, PIC_GET_TEMPERATURE
,
1107 if (tries
== MAX_SENSOR_RETRIES
)
1108 sensorp
->present
= B_FALSE
;
1111 sensorp
->present
= B_TRUE
;
1125 pm_state_change_t pmstate
;
1126 char physpath
[PATH_MAX
];
1128 uint8_t estar_state
;
1131 pmstate
.physpath
= physpath
;
1132 pmstate
.size
= sizeof (physpath
);
1136 pm_fd
= open(PM_DEVICE
, O_RDWR
);
1138 envd_log(LOG_ERR
, PM_THREAD_EXITING
, errno
, strerror(errno
));
1143 * Get PM state change events to check if the system
1144 * is in lowest power state and inform PIC which controls
1147 * To minimize polling, we use the blocking interface
1148 * to get the power state change event here.
1150 if (ioctl(pm_fd
, PM_GET_STATE_CHANGE_WAIT
, &pmstate
) != 0) {
1159 "pmstate event:0x%x flags:%x"
1160 "comp:%d oldval:%d newval:%d path:%s\n",
1161 pmstate
.event
, pmstate
.flags
,
1168 (pmstate
.flags
& PSC_ALL_LOWEST
) ? 1 : 0;
1169 } while (ioctl(pm_fd
, PM_GET_STATE_CHANGE
, &pmstate
) == 0);
1171 if (pre_lpstate
!= cur_lpstate
) {
1172 pre_lpstate
= cur_lpstate
;
1173 estar_state
= (cur_lpstate
& 0x1);
1176 "setting PIC ESTAR SATE to %x\n",
1179 env_monitor_fd
= open(ENV_MONITOR_DEVFS
, O_RDWR
);
1180 if (env_monitor_fd
!= -1) {
1181 if (ioctl(env_monitor_fd
, PIC_SET_ESTAR_MODE
,
1182 &estar_state
) < 0) {
1185 "unable to set ESTAR_MODE in PIC\n");
1187 (void) close(env_monitor_fd
);
1191 "Failed to open %s\n",
1202 * This is env thread which monitors the current temperature when
1203 * warning threshold is exceeded. The job is to make sure it does
1204 * not execced/decrease shutdown threshold. If it does it will start
1205 * forced shutdown to avoid reaching hardware poweroff via THERM interrupt.
1209 system_temp_thr(void *args
)
1211 char syscmd
[BUFSIZ
];
1212 char msgbuf
[BUFSIZ
];
1215 env_sensor_t
*sensorp
;
1216 pthread_mutex_t env_monitor_mutex
= PTHREAD_MUTEX_INITIALIZER
;
1217 pthread_cond_t env_monitor_cv
= PTHREAD_COND_INITIALIZER
;
1223 * Sleep for specified seconds before issuing IOCTL
1226 (void) pthread_mutex_lock(&env_monitor_mutex
);
1227 ret
= pthread_cond_reltimedwait_np(&env_monitor_cv
,
1228 &env_monitor_mutex
, &to
);
1229 to
.tv_sec
= sensor_scan_interval
;
1231 if (ret
!= ETIMEDOUT
) {
1232 (void) pthread_mutex_unlock(&env_monitor_mutex
);
1236 (void) pthread_mutex_unlock(&env_monitor_mutex
);
1237 for (i
= 0; i
< N_ENVD_SENSORS
; i
++) {
1238 sensorp
= envd_sensors
[i
];
1239 if (sensorp
->present
== B_FALSE
)
1241 if (get_temperature(sensorp
, &temp
) == -1)
1244 sensorp
->cur_temp
= temp
;
1248 sensorp
->name
, sensorp
->cur_temp
);
1252 * If this sensor already triggered system shutdown,
1253 * don't log any more shutdown/warning messages for it.
1255 if (sensorp
->shutdown_initiated
)
1259 * Check for the temperature in warning and shutdown
1260 * range and take appropriate action.
1262 if (SENSOR_TEMP_IN_WARNING_RANGE(sensorp
->cur_temp
,
1265 * Check if the temperature has been in
1266 * warning range during last
1267 * sensor_warning_duration interval. If so,
1268 * the temperature is truly in warning range
1269 * and we need to log a warning message, but
1270 * no more than once every
1271 * sensor_warning_interval seconds.
1273 time_t wtstamp
= sensorp
->warning_tstamp
;
1275 ct
= (time_t)(gethrtime() / NANOSEC
);
1276 if (sensorp
->warning_start
== 0)
1277 sensorp
->warning_start
= ct
;
1278 if (((ct
- sensorp
->warning_start
) >=
1279 sensor_warning_duration
) &&
1280 (wtstamp
== 0 || (ct
- wtstamp
) >=
1281 sensor_warning_interval
)) {
1282 envd_log(LOG_CRIT
, ENV_WARNING_MSG
,
1283 sensorp
->name
, sensorp
->cur_temp
,
1285 sensorp
->es
->esb_low_warning
,
1287 sensorp
->es
->esb_high_warning
);
1289 sensorp
->warning_tstamp
= ct
;
1291 } else if (sensorp
->warning_start
!= 0)
1292 sensorp
->warning_start
= 0;
1294 if (!shutdown_override
&&
1295 SENSOR_TEMP_IN_SHUTDOWN_RANGE(sensorp
->cur_temp
,
1297 ct
= (time_t)(gethrtime() / NANOSEC
);
1298 if (sensorp
->shutdown_tstamp
== 0)
1299 sensorp
->shutdown_tstamp
= ct
;
1302 * Shutdown the system if the temperature
1303 * remains in the shutdown range for over
1304 * sensor_shutdown_interval seconds.
1306 if ((ct
- sensorp
->shutdown_tstamp
) >=
1307 sensor_shutdown_interval
) {
1311 sensorp
->shutdown_initiated
= B_TRUE
;
1313 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1314 ENV_SHUTDOWN_MSG
, sensorp
->name
,
1317 sensorp
->es
->esb_low_shutdown
,
1319 sensorp
->es
->esb_high_shutdown
);
1321 envd_log(LOG_ALERT
, msgbuf
);
1324 * Shutdown the system (only once)
1326 if (system_shutdown_started
==
1328 (void) snprintf(syscmd
,
1330 "%s \"%s\"", shutdown_cmd
,
1333 envd_log(LOG_ALERT
, syscmd
);
1334 system_shutdown_started
=
1337 (void) system(syscmd
);
1340 } else if (sensorp
->shutdown_tstamp
!= 0)
1341 sensorp
->shutdown_tstamp
= 0;
1343 } /* end of forever loop */
1350 scsi_log_sense(env_disk_t
*diskp
, uchar_t page_code
, void *pagebuf
,
1351 uint16_t pagelen
, int page_control
)
1353 struct uscsi_cmd ucmd_buf
;
1354 uchar_t cdb_buf
[CDB_GROUP1
];
1355 struct scsi_extended_sense sense_buf
;
1358 bzero(&cdb_buf
, sizeof (cdb_buf
));
1359 bzero(&ucmd_buf
, sizeof (ucmd_buf
));
1360 bzero(&sense_buf
, sizeof (sense_buf
));
1362 cdb_buf
[0] = SCMD_LOG_SENSE_G1
;
1365 * For SATA we need to have the current threshold value set.
1366 * For SAS drives we can use the current cumulative value.
1367 * This is set for non-SMART drives, by passing a non-zero
1371 cdb_buf
[2] = (0x01 << 6) | page_code
;
1373 cdb_buf
[2] = page_code
;
1375 cdb_buf
[7] = (uchar_t
)((pagelen
& 0xFF00) >> 8);
1376 cdb_buf
[8] = (uchar_t
)(pagelen
& 0x00FF);
1378 ucmd_buf
.uscsi_cdb
= (char *)cdb_buf
;
1379 ucmd_buf
.uscsi_cdblen
= sizeof (cdb_buf
);
1380 ucmd_buf
.uscsi_bufaddr
= (caddr_t
)pagebuf
;
1381 ucmd_buf
.uscsi_buflen
= pagelen
;
1382 ucmd_buf
.uscsi_rqbuf
= (caddr_t
)&sense_buf
;
1383 ucmd_buf
.uscsi_rqlen
= sizeof (struct scsi_extended_sense
);
1384 ucmd_buf
.uscsi_flags
= USCSI_RQENABLE
| USCSI_READ
| USCSI_SILENT
;
1385 ucmd_buf
.uscsi_timeout
= DEFAULT_SCSI_TIMEOUT
;
1387 ret_val
= ioctl(diskp
->fd
, USCSICMD
, ucmd_buf
);
1388 if ((ret_val
== 0) && (ucmd_buf
.uscsi_status
== 0)) {
1391 "log sense command for page_code 0x%x succeeded\n", page_code
);
1395 envd_log(LOG_ERR
, "log sense command for %s failed. "
1396 "page_code 0x%x ret_val = 0x%x "
1397 "status = 0x%x errno = 0x%x\n", diskp
->name
, page_code
,
1398 ret_val
, ucmd_buf
.uscsi_status
, errno
);
1405 get_disk_temp(env_disk_t
*diskp
)
1410 if (diskp
->smart_supported
== B_TRUE
) {
1411 smart_structure smartpage
;
1412 smart_attribute
*temp_attrib
= NULL
;
1417 bzero(&smartpage
, sizeof (smartpage
));
1419 ret
= scsi_log_sense(diskp
, GET_SMART_INFO
,
1420 &smartpage
, sizeof (smartpage
), 0);
1423 diskp
->current_temp
= DISK_INVALID_TEMP
;
1424 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1429 * verify the checksum of the data. A 2's compliment
1430 * of the result addition of the is stored in the
1431 * last byte. The sum of all the checksum should be
1432 * 0. If the checksum is bad, return an error for
1435 index
= (uint8_t *)&smartpage
;
1437 for (i
= checksum
= 0; i
< 512; i
++)
1438 checksum
+= index
[i
];
1440 if ((checksum
!= 0) && env_debug
) {
1442 "SMART checksum error! 0x%x\n", checksum
);
1445 * We got bad data back from the drive, fail this
1446 * time around and picl will retry again. If this
1447 * continues to fail picl will give this drive a
1450 diskp
->current_temp
= DISK_INVALID_TEMP
;
1451 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1457 * Scan through the various SMART data and look for
1458 * the complete drive temp.
1461 for (i
= 0; (i
< SMART_FIELDS
) &&
1462 (smartpage
.attribute
[i
].id
!= 0) &&
1463 (temp_attrib
== NULL
); i
++) {
1465 if (smartpage
.attribute
[i
].id
== HDA_TEMP
) {
1466 temp_attrib
= &smartpage
.attribute
[i
];
1471 * If we dont find any temp SMART attributes, this drive
1472 * does not support this page, disable temp checking
1475 if (temp_attrib
== NULL
) {
1478 * If the checksum is valid, the temp. attributes are
1479 * not supported, disable this drive from temp.
1484 "Temp ATTRIBUTE not supported\n");
1485 diskp
->smart_supported
= B_FALSE
;
1486 diskp
->tpage_supported
= B_FALSE
;
1487 diskp
->current_temp
= DISK_INVALID_TEMP
;
1488 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1494 envd_log(LOG_ERR
, "flags = 0x%x%x,curr = 0x%x,"
1495 "data = 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1496 temp_attrib
->flags
[0], temp_attrib
->flags
[1],
1497 temp_attrib
->raw_data
[0], temp_attrib
->raw_data
[1],
1498 temp_attrib
->raw_data
[2], temp_attrib
->raw_data
[3],
1499 temp_attrib
->raw_data
[4], temp_attrib
->raw_data
[5],
1500 temp_attrib
->raw_data
[6], temp_attrib
->raw_data
[7]);
1502 if (temp_attrib
->raw_data
[1] != 0xFF) {
1503 diskp
->current_temp
= temp_attrib
->raw_data
[2];
1504 diskp
->ref_temp
= temp_attrib
->raw_data
[2];
1506 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1507 diskp
->current_temp
= DISK_INVALID_TEMP
;
1513 ret
= scsi_log_sense(diskp
, TEMPERATURE_PAGE
, tpage
,
1517 diskp
->current_temp
= DISK_INVALID_TEMP
;
1518 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1522 * For the current temperature verify that the parameter
1523 * length is 0x02 and the parameter code is 0x00
1524 * Temperature value of 255(0xFF) is considered INVALID.
1526 if ((tpage
[7] == 0x02) && (tpage
[4] == 0x00) &&
1527 (tpage
[5] == 0x00)) {
1528 if (tpage
[9] == 0xFF) {
1529 diskp
->current_temp
= DISK_INVALID_TEMP
;
1532 diskp
->current_temp
= tpage
[9];
1537 * For the reference temperature verify that the parameter
1538 * length is 0x02 and the parameter code is 0x01
1539 * Temperature value of 255(0xFF) is considered INVALID.
1541 if ((tpage
[13] == 0x02) && (tpage
[10] == 0x00) &&
1542 (tpage
[11] == 0x01)) {
1543 if (tpage
[15] == 0xFF) {
1544 diskp
->ref_temp
= DISK_INVALID_TEMP
;
1546 diskp
->ref_temp
= tpage
[15];
1555 disk_temp_thr(void *args
)
1557 char syscmd
[BUFSIZ
];
1558 char msgbuf
[BUFSIZ
];
1562 pthread_mutex_t env_monitor_mutex
= PTHREAD_MUTEX_INITIALIZER
;
1563 pthread_cond_t env_monitor_cv
= PTHREAD_COND_INITIALIZER
;
1564 pm_state_change_t pmstate
;
1569 if ((disk_pm_fd
= open(PM_DEVICE
, O_RDWR
)) == -1) {
1570 envd_log(LOG_ERR
, DISK_TEMP_THREAD_EXITING
,
1571 errno
, strerror(errno
));
1577 * Sleep for specified seconds before issuing IOCTL
1580 (void) pthread_mutex_lock(&env_monitor_mutex
);
1581 ret
= pthread_cond_reltimedwait_np(&env_monitor_cv
,
1582 &env_monitor_mutex
, &to
);
1584 to
.tv_sec
= disk_scan_interval
;
1587 if (ret
!= ETIMEDOUT
) {
1588 (void) pthread_mutex_unlock(
1589 &env_monitor_mutex
);
1592 (void) pthread_mutex_unlock(&env_monitor_mutex
);
1594 for (i
= 0; (diskp
= envd_disks
[i
]) != NULL
; i
++) {
1595 if (diskp
->present
== B_FALSE
)
1597 if (diskp
->tpage_supported
== B_FALSE
)
1600 * If the disk temperature is above the warning threshold
1601 * continue monitoring until the temperature drops below
1602 * warning threshold.
1603 * if the temperature is in the NORMAL range monitor only
1604 * when the disk is BUSY.
1605 * We do not want to read the disk temperature if the disk is
1606 * is idling. The reason for this is disk will never get into
1607 * lowest power mode if we scan the disk temperature
1608 * peridoically. To avoid this situation we first determine
1609 * the idle_time of the disk. If the disk has been IDLE since
1610 * we scanned the temperature last time we will not read the
1613 if (!DISK_TEMP_IN_WARNING_RANGE(diskp
->current_temp
, diskp
)) {
1614 pmstate
.physpath
= diskp
->physpath
;
1615 pmstate
.size
= strlen(diskp
->physpath
);
1616 pmstate
.component
= 0;
1618 ioctl(disk_pm_fd
, PM_GET_TIME_IDLE
,
1621 if (errno
!= EINTR
) {
1624 "ioctl PM_GET_TIME_IDLE failed for DISK0. errno=0x%x\n",
1630 if (idle_time
>= (disk_scan_interval
/2)) {
1632 envd_log(LOG_ERR
, "%s idle time = %d\n",
1633 diskp
->name
, idle_time
);
1638 ret
= get_disk_temp(diskp
);
1642 envd_log(LOG_ERR
, "%s temp = %d ref. temp = %d\n",
1643 diskp
->name
, diskp
->current_temp
, diskp
->ref_temp
);
1646 * If this disk already triggered system shutdown, don't
1647 * log any more shutdown/warning messages for it.
1649 if (diskp
->shutdown_initiated
)
1653 * Check for the temperature in warning and shutdown range
1654 * and take appropriate action.
1656 if (DISK_TEMP_IN_WARNING_RANGE(diskp
->current_temp
, diskp
)) {
1658 * Check if the temperature has been in warning
1659 * range during last disk_warning_duration interval.
1660 * If so, the temperature is truly in warning
1661 * range and we need to log a warning message,
1662 * but no more than once every disk_warning_interval
1665 time_t wtstamp
= diskp
->warning_tstamp
;
1667 ct
= (time_t)(gethrtime() / NANOSEC
);
1668 if (diskp
->warning_start
== 0)
1669 diskp
->warning_start
= ct
;
1670 if (((ct
- diskp
->warning_start
) >=
1671 disk_warning_duration
) && (wtstamp
== 0 ||
1672 (ct
- wtstamp
) >= disk_warning_interval
)) {
1673 envd_log(LOG_CRIT
, ENV_WARNING_MSG
,
1674 diskp
->name
, diskp
->current_temp
,
1676 diskp
->high_warning
);
1677 diskp
->warning_tstamp
= ct
;
1679 } else if (diskp
->warning_start
!= 0)
1680 diskp
->warning_start
= 0;
1682 if (!shutdown_override
&&
1683 DISK_TEMP_IN_SHUTDOWN_RANGE(diskp
->current_temp
, diskp
)) {
1684 ct
= (time_t)(gethrtime() / NANOSEC
);
1685 if (diskp
->shutdown_tstamp
== 0)
1686 diskp
->shutdown_tstamp
= ct
;
1689 * Shutdown the system if the temperature remains
1690 * in the shutdown range for over disk_shutdown_interval
1693 if ((ct
- diskp
->shutdown_tstamp
) >=
1694 disk_shutdown_interval
) {
1696 diskp
->shutdown_initiated
= B_TRUE
;
1697 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1698 ENV_SHUTDOWN_MSG
, diskp
->name
,
1699 diskp
->current_temp
, diskp
->low_shutdown
,
1700 diskp
->high_shutdown
);
1701 envd_log(LOG_ALERT
, msgbuf
);
1703 /* shutdown the system (only once) */
1704 if (system_shutdown_started
== B_FALSE
) {
1705 (void) snprintf(syscmd
, sizeof (syscmd
),
1706 "%s \"%s\"", shutdown_cmd
, msgbuf
);
1707 envd_log(LOG_ALERT
, syscmd
);
1708 system_shutdown_started
= B_TRUE
;
1709 (void) system(syscmd
);
1712 } else if (diskp
->shutdown_tstamp
!= 0)
1713 diskp
->shutdown_tstamp
= 0;
1715 } /* end of forever loop */
1721 char msgbuf
[BUFSIZ
];
1724 pthread_mutex_t env_monitor_mutex
= PTHREAD_MUTEX_INITIALIZER
;
1725 pthread_cond_t env_monitor_cv
= PTHREAD_COND_INITIALIZER
;
1731 * Sleep for specified seconds before issuing IOCTL
1734 (void) pthread_mutex_lock(&env_monitor_mutex
);
1735 ret
= pthread_cond_reltimedwait_np(&env_monitor_cv
,
1736 &env_monitor_mutex
, &to
);
1737 to
.tv_sec
= fan_scan_interval
;
1739 if (ret
!= ETIMEDOUT
) {
1740 (void) pthread_mutex_unlock(&env_monitor_mutex
);
1743 (void) pthread_mutex_unlock(&env_monitor_mutex
);
1745 for (i
= 0; (fanp
= envd_fans
[i
]) != NULL
; i
++) {
1746 if (fanp
->present
== B_FALSE
)
1749 if (has_fan_failed(fanp
) == B_TRUE
) {
1750 if (fanp
->last_status
== FAN_FAILED
)
1752 fanp
->last_status
= FAN_FAILED
;
1753 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1754 ENV_FAN_FAILURE_WARNING_MSG
, fanp
->name
,
1755 fan_rpm_string
, fan_status_string
);
1756 envd_log(LOG_ALERT
, msgbuf
);
1758 if (fanp
->last_status
== FAN_OK
)
1760 fanp
->last_status
= FAN_OK
;
1761 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1762 ENV_FAN_OK_MSG
, fanp
->name
);
1763 envd_log(LOG_ALERT
, msgbuf
);
1767 if (has_psufan_failed() == B_TRUE
) {
1768 if (psufan_last_status
== FAN_FAILED
)
1770 psufan_last_status
= FAN_FAILED
;
1771 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1772 ENV_FAN_FAILURE_WARNING_MSG
, SENSOR_PSU
,
1773 fan_rpm_string
, fan_status_string
);
1774 envd_log(LOG_ALERT
, msgbuf
);
1776 if (psufan_last_status
== FAN_OK
)
1778 psufan_last_status
= FAN_OK
;
1779 (void) snprintf(msgbuf
, sizeof (msgbuf
),
1780 ENV_FAN_OK_MSG
, SENSOR_PSU
);
1781 envd_log(LOG_ALERT
, msgbuf
);
1790 * Setup envrionmental monitor state and start threads to monitor
1791 * temperature, fan, disk and power management state.
1792 * Returns -1 on error, 0 if successful.
1798 if (getenv("SUNW_piclenvd_debug") != NULL
)
1801 if (pthread_attr_init(&thr_attr
) != 0 ||
1802 pthread_attr_setscope(&thr_attr
, PTHREAD_SCOPE_SYSTEM
) != 0) {
1807 * If ES segment is not present or has inconsistent information, we
1808 * use default values for sensor limits. For the sake of simplicity,
1809 * we still store these limits internally in the 'es' member in the
1812 if (envd_es_setup() < 0) {
1813 envd_log(LOG_WARNING
, ENV_DEFAULT_LIMITS
);
1814 envd_es_default_setup();
1817 if (envd_setup_sensors() < 0) {
1819 envd_log(LOG_ERR
, "Failed to setup sensors\n");
1820 system_temp_monitor
= 0;
1823 if (envd_setup_fans() < 0) {
1825 envd_log(LOG_ERR
, "Failed to setup fans\n");
1831 * Disable disk temperature monitoring until we have
1832 * LSI fw support to read SATA disk temperature
1834 if (disk_temp_monitor
) {
1835 if (envd_setup_disks() < 0) {
1837 envd_log(LOG_ERR
, "Failed to setup disks\n");
1838 disk_temp_monitor
= 0;
1843 * Create a thread to monitor system temperatures
1845 if ((system_temp_monitor
) && (system_temp_thr_created
== B_FALSE
)) {
1846 if (pthread_create(&system_temp_thr_id
, &thr_attr
,
1847 system_temp_thr
, NULL
) != 0) {
1848 envd_log(LOG_ERR
, ENVTHR_THREAD_CREATE_FAILED
);
1850 system_temp_thr_created
= B_TRUE
;
1853 "Created thread to monitor system temperatures\n");
1858 * Create a thread to monitor fans
1860 if ((fan_monitor
) && (fan_thr_created
== B_FALSE
)) {
1861 if (pthread_create(&fan_thr_id
, &thr_attr
, fan_thr
, NULL
) != 0)
1862 envd_log(LOG_ERR
, ENVTHR_THREAD_CREATE_FAILED
);
1864 fan_thr_created
= B_TRUE
;
1867 "Created thread to monitor system fans\n");
1873 * Create a thread to monitor PM state
1875 if ((pm_monitor
) && (pmthr_created
== B_FALSE
)) {
1876 if (pthread_create(&pmthr_tid
, &thr_attr
, pmthr
, NULL
) != 0)
1877 envd_log(LOG_CRIT
, PM_THREAD_CREATE_FAILED
);
1879 pmthr_created
= B_TRUE
;
1882 "Created thread to monitor system power state\n");
1887 * Create a thread to monitor disk temperature
1889 if ((disk_temp_monitor
) && (disk_temp_thr_created
== B_FALSE
)) {
1890 if (pthread_create(&disk_temp_thr_id
, &thr_attr
,
1891 disk_temp_thr
, NULL
) != 0) {
1892 envd_log(LOG_ERR
, ENVTHR_THREAD_CREATE_FAILED
);
1894 disk_temp_thr_created
= B_TRUE
;
1897 "Created thread for disk temperatures\n");
1905 piclenvd_register(void)
1907 picld_plugin_register(&my_reg_info
);
1914 (void) env_picl_setup_tuneables();
1917 * Do not allow disk temperature monitoring to be enabled
1918 * via tuneables. Disk temperature monitoring is disabled
1919 * until we have LSI fw support to read the temperature of
1922 disk_temp_monitor
= 0;
1925 * Setup the environmental data structures
1927 if (envd_setup() != 0) {
1928 envd_log(LOG_CRIT
, ENVD_PLUGIN_INIT_FAILED
);
1933 * Now setup/populate PICL tree
1943 * Invoke env_picl_destroy() to remove any PICL nodes/properties
1944 * (including volatile properties) we created. Once this call
1945 * returns, there can't be any more calls from the PICL framework
1946 * to get current temperature or fan speed.
1949 envd_close_sensors();
1955 envd_log(int pri
, const char *fmt
, ...)
1960 vsyslog(pri
, fmt
, ap
);
1965 * Tunables support functions
1967 static env_tuneable_t
*
1968 tuneable_lookup(picl_prophdl_t proph
)
1971 env_tuneable_t
*tuneablep
= NULL
;
1973 for (i
= 0; i
< ntuneables
; i
++) {
1974 tuneablep
= &tuneables
[i
];
1975 if (tuneablep
->proph
== proph
)
1983 get_string_val(ptree_rarg_t
*parg
, void *buf
)
1985 picl_prophdl_t proph
;
1986 env_tuneable_t
*tuneablep
;
1988 proph
= parg
->proph
;
1990 tuneablep
= tuneable_lookup(proph
);
1992 if (tuneablep
== NULL
)
1993 return (PICL_FAILURE
);
1995 (void) memcpy(buf
, tuneablep
->value
, tuneablep
->nbytes
);
1997 return (PICL_SUCCESS
);
2001 set_string_val(ptree_warg_t
*parg
, const void *buf
)
2003 picl_prophdl_t proph
;
2004 env_tuneable_t
*tuneablep
;
2006 if (parg
->cred
.dc_euid
!= 0)
2007 return (PICL_PERMDENIED
);
2009 proph
= parg
->proph
;
2011 tuneablep
= tuneable_lookup(proph
);
2013 if (tuneablep
== NULL
)
2014 return (PICL_FAILURE
);
2016 (void) memcpy(tuneables
->value
, buf
, tuneables
->nbytes
);
2019 return (PICL_SUCCESS
);
2023 get_int_val(ptree_rarg_t
*parg
, void *buf
)
2025 picl_prophdl_t proph
;
2026 env_tuneable_t
*tuneablep
;
2028 proph
= parg
->proph
;
2030 tuneablep
= tuneable_lookup(proph
);
2032 if (tuneablep
== NULL
)
2033 return (PICL_FAILURE
);
2035 (void) memcpy(buf
, tuneablep
->value
, tuneablep
->nbytes
);
2037 return (PICL_SUCCESS
);
2041 set_int_val(ptree_warg_t
*parg
, const void *buf
)
2043 picl_prophdl_t proph
;
2044 env_tuneable_t
*tuneablep
;
2046 if (parg
->cred
.dc_euid
!= 0)
2047 return (PICL_PERMDENIED
);
2049 proph
= parg
->proph
;
2051 tuneablep
= tuneable_lookup(proph
);
2053 if (tuneablep
== NULL
)
2054 return (PICL_FAILURE
);
2056 (void) memcpy(tuneablep
->value
, buf
, tuneablep
->nbytes
);
2058 return (PICL_SUCCESS
);
2062 has_fan_failed(env_fan_t
*fanp
)
2064 fanspeed_t fan_speed
;
2074 * Read RF_FAN_STATUS bit of the fan fault register, retry if
2075 * the PIC is busy, with a 1 second delay to allow it to update.
2077 for (ntries
= 0; ntries
< MAX_RETRIES_FOR_FAN_FAULT
; ntries
++) {
2078 ret
= ioctl(fanp
->fd
, PIC_GET_FAN_STATUS
, &status
);
2079 if ((ret
== 0) && ((status
& 0x1) == 0))
2087 "%d retries attempted in reading fan status.\n",
2092 if (ntries
== MAX_RETRIES_FOR_FAN_FAULT
) {
2093 (void) strncpy(fan_status_string
, NOT_AVAILABLE
,
2094 sizeof (fan_status_string
));
2095 (void) strncpy(fan_rpm_string
, NOT_AVAILABLE
,
2096 sizeof (fan_rpm_string
));
2101 envd_log(LOG_ERR
, "fan status = 0x%x\n", status
);
2104 * ST_FFAULT bit isn't implemented yet and we're reading only
2105 * individual fan status
2108 (void) snprintf(fan_status_string
, sizeof (fan_status_string
),
2110 if (ioctl(fanp
->fd
, PIC_GET_FAN_SPEED
, &tach
) != 0) {
2111 (void) strncpy(fan_rpm_string
, NOT_AVAILABLE
,
2112 sizeof (fan_rpm_string
));
2114 real_tach
= tach
<< 8;
2115 fan_speed
= TACH_TO_RPM(real_tach
);
2116 (void) snprintf(fan_rpm_string
, sizeof (fan_rpm_string
),
2126 has_psufan_failed(void)
2131 if (envd_sensor_psu
.fd
== -1)
2135 * For psu, only fan fault is visible, no fan speed
2137 (void) strncpy(fan_rpm_string
, NOT_AVAILABLE
, sizeof (fan_rpm_string
));
2140 * Read RF_FAN_STATUS bit of the fan fault register, retry if
2141 * the PIC is busy, with a 1 second delay to allow it to update.
2143 for (ntries
= 0; ntries
< MAX_RETRIES_FOR_FAN_FAULT
; ntries
++) {
2144 ret
= ioctl(envd_sensor_psu
.fd
, PIC_GET_FAN_STATUS
, &status
);
2145 if ((ret
== 0) && ((status
& 0x1) == 0))
2153 "%d retries attempted in reading fan status.\n",
2158 if (ntries
== MAX_RETRIES_FOR_FAN_FAULT
) {
2159 (void) strncpy(fan_status_string
, NOT_AVAILABLE
,
2160 sizeof (fan_status_string
));
2165 envd_log(LOG_ERR
, "fan status = 0x%x\n", status
);
2168 (void) snprintf(fan_status_string
, sizeof (fan_status_string
),
2177 scsi_mode_select(env_disk_t
*diskp
, uchar_t page_code
, uchar_t
*pagebuf
,
2180 struct uscsi_cmd ucmd_buf
;
2181 uchar_t cdb_buf
[CDB_GROUP1
];
2182 struct scsi_extended_sense sense_buf
;
2185 bzero(&cdb_buf
, sizeof (cdb_buf
));
2186 bzero(&ucmd_buf
, sizeof (ucmd_buf
));
2187 bzero(&sense_buf
, sizeof (sense_buf
));
2189 cdb_buf
[0] = SCMD_MODE_SELECT_G1
;
2190 cdb_buf
[1] = 1<<PAGE_FMT
;
2192 cdb_buf
[7] = (uchar_t
)((pagelen
& 0xFF00) >> 8);
2193 cdb_buf
[8] = (uchar_t
)(pagelen
& 0x00FF);
2195 ucmd_buf
.uscsi_cdb
= (char *)cdb_buf
;
2196 ucmd_buf
.uscsi_cdblen
= sizeof (cdb_buf
);
2197 ucmd_buf
.uscsi_bufaddr
= (caddr_t
)pagebuf
;
2198 ucmd_buf
.uscsi_buflen
= pagelen
;
2199 ucmd_buf
.uscsi_rqbuf
= (caddr_t
)&sense_buf
;
2200 ucmd_buf
.uscsi_rqlen
= sizeof (struct scsi_extended_sense
);
2201 ucmd_buf
.uscsi_flags
= USCSI_RQENABLE
| USCSI_WRITE
| USCSI_SILENT
;
2202 ucmd_buf
.uscsi_timeout
= DEFAULT_SCSI_TIMEOUT
;
2204 ret_val
= ioctl(diskp
->fd
, USCSICMD
, ucmd_buf
);
2206 if (ret_val
== 0 && ucmd_buf
.uscsi_status
== 0) {
2210 envd_log(LOG_ERR
, "mode select command for %s failed. "
2211 "page_code 0x%x ret_val = 0x%x "
2212 "status = 0x%x errno = 0x%x\n", diskp
->name
, page_code
,
2213 ret_val
, ucmd_buf
.uscsi_status
, errno
);