2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # EXAMPLE DATA FROM: WDC SSC-D0128SC-2100
29 #/dev/sda ATA WDC_SSC-D0128SC- 1 Raw_Read_Error_Rate 0x000b 100 100 050 Pre-fail Always - 16777215
30 #/dev/sda ATA WDC_SSC-D0128SC- 3 Spin_Up_Time 0x0007 100 100 050 Pre-fail Always - 0
31 #/dev/sda ATA WDC_SSC-D0128SC- 5 Reallocated_Sector_Ct 0x0013 100 100 050 Pre-fail Always - 0
32 #/dev/sda ATA WDC_SSC-D0128SC- 7 Seek_Error_Rate 0x000b 100 100 050 Pre-fail Always - 0
33 #/dev/sda ATA WDC_SSC-D0128SC- 9 Power_On_Hours 0x0012 100 100 000 Old_age Always - 1408
34 #/dev/sda ATA WDC_SSC-D0128SC- 10 Spin_Retry_Count 0x0013 100 100 050 Pre-fail Always - 0
35 #/dev/sda ATA WDC_SSC-D0128SC- 12 Power_Cycle_Count 0x0012 100 100 000 Old_age Always - 523
36 #/dev/sda ATA WDC_SSC-D0128SC- 168 Unknown_Attribute 0x0012 100 100 000 Old_age Always - 1
37 #/dev/sda ATA WDC_SSC-D0128SC- 175 Program_Fail_Count_Chip 0x0003 100 100 010 Pre-fail Always - 0
38 #/dev/sda ATA WDC_SSC-D0128SC- 192 Power-Off_Retract_Count 0x0012 100 100 000 Old_age Always - 0
39 #/dev/sda ATA WDC_SSC-D0128SC- 194 Temperature_Celsius 0x0022 040 100 000 Old_age Always - 40 (Lifetime Min/Max 30/60)
40 #/dev/sda ATA WDC_SSC-D0128SC- 197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 0
41 #/dev/sda ATA WDC_SSC-D0128SC- 240 Head_Flying_Hours 0x0013 100 100 050 Pre-fail Always - 0
42 #/dev/sda ATA WDC_SSC-D0128SC- 170 Unknown_Attribute 0x0003 100 100 010 Pre-fail Always - 1769478
43 #/dev/sda ATA WDC_SSC-D0128SC- 173 Unknown_Attribute 0x0012 100 100 000 Old_age Always - 4217788040605
45 # TODO: Need to completely rework smart check. Use IDs instead of changing
46 # descriptions! But be careful: There is no standard neither for IDs nor for
47 # descriptions. Only use those, which are common sense.
49 factory_settings
["smart_temp_default_levels"] = {"levels": (35, 40)}
51 smart_stats_default_levels
= {
52 'realloc_events': (1, 1),
53 'realloc_sectors': (1, 1),
54 'spin_retries': (1, 1),
55 'pending_retries': (1, 1),
56 'pending_sectors': (1, 1),
57 'cmd_timeouts': (5, 10),
59 'uncorr_errs': (1, 1),
64 def parse_smart_raw_values(info
):
70 if line
[0] != disk_name
:
73 disks
[disk_name
] = disk
76 if field
!= "Unknown_Attribute":
77 value
= saveint(line
[12])
82 def parse_smart_normalized_values(info
):
88 if line
[0] != disk_name
:
91 disks
[disk_name
] = disk
94 if field
!= "Unknown_Attribute":
96 if isinstance(line
[8], int):
97 threshold
= int(line
[8])
100 disk
[field
] = value
, threshold
104 smart_stats_fields
= [
105 'Reallocated_Sector_Ct',
107 'Reallocated_Event_Count',
108 'Current_Pending_Sector',
111 'Reported_Uncorrect',
112 'Uncorrectable_Error_Cnt',
113 'UDMA_CRC_Error_Count',
118 def inventory_smart_stats(info
):
119 disks
= parse_smart_raw_values(info
)
121 for disk_name
, disk
in disks
.items():
122 for field
in disk
.keys():
123 if field
in smart_stats_fields
: # found at least one interesting field
124 cleaned
= dict([(f
, disk
[f
]) for f
in smart_stats_fields
if f
in disk
])
125 inventory
.append((disk_name
, cleaned
))
130 def check_smart_stats(item
, params
, info
):
131 # params is a snapshot of all counters at the point of time of inventory
133 disks
= parse_smart_raw_values(info
)
134 normalized
= parse_smart_normalized_values(info
)
136 if item
not in disks
:
137 return 3, "Disk not found"
144 for unit
, field
, descr
in [
145 (' hours', 'Power_On_Hours', 'Powered on'),
146 ('', 'Power_Cycle_Count', 'Power cycles'),
147 ('', 'Reallocated_Sector_Ct', 'Reallocated sectors'),
148 ('', 'Reallocated_Event_Count', 'Reallocated events'),
149 ('', 'Spin_Retry_Count', 'Spin retries'),
150 ('', 'Current_Pending_Sector', 'Pending sectors'),
151 ('', 'Command_Timeout', 'Command timeouts'),
152 ('', 'End-to-End_Error', 'End-to-End errors'),
153 ('', 'Reported_Uncorrect', 'Uncorrectable errors'),
154 ('', 'Uncorrectable_Error_Cnt', 'Uncorrectable errors'),
155 ('', 'UDMA_CRC_Error_Count', 'UDMA CRC errors'),
156 ('', 'CRC_Error_Count', 'UDMA CRC errors'),
160 infos
.append("%s: %d%s" % (descr
, value
, unit
))
161 perfdata
.append((field
, value
))
164 ref_value
= params
[field
]
166 # For reallocated event counts we experienced to many reported errors for disks
167 # which still seem to be OK. The raw value increased by a small amount but the
168 # aggregated value remained at it's initial/ok state. So we use the aggregated
169 # value now. Only for this field.
170 if field
== "Reallocated_Event_Count":
171 infos
[-1] += " (was %d during discovery; normalized value looks OK)" % ref_value
172 norm_value
, norm_threshold
= normalized
[item
][field
]
173 if norm_value
<= norm_threshold
:
176 elif value
> ref_value
:
178 infos
[-1] += "(!!) (was %d during discovery)" % ref_value
180 return state
, ", ".join(infos
), perfdata
183 check_info
["smart.stats"] = {
184 'check_function': check_smart_stats
,
185 'inventory_function': inventory_smart_stats
,
186 'has_perfdata': True,
187 'service_description': 'SMART %s Stats',
191 def inventory_smart_temp(info
):
192 disks
= parse_smart_raw_values(info
)
193 for disk_name
, disk
in disks
.iteritems():
194 if "Temperature_Celsius" in disk
or \
195 "Temperature_Internal" in disk
:
199 def check_smart_temp(item
, params
, info
):
200 disks
= parse_smart_raw_values(info
)
202 if "Temperature_Celsius" in disks
[item
]:
203 temperature
= disks
[item
]["Temperature_Celsius"]
204 elif "Temperature_Internal" in disks
[item
]:
205 temperature
= disks
[item
]["Temperature_Internal"]
209 return check_temperature(temperature
, params
, "smart_%s" % item
)
212 check_info
["smart.temp"] = {
213 'check_function': check_smart_temp
,
214 'inventory_function': inventory_smart_temp
,
215 'service_description': 'Temperature SMART %s',
216 'group': 'temperature',
217 'has_perfdata': True,
218 'includes': ['temperature.include'],
219 'default_levels_variable': "smart_temp_default_levels"