4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
26 #ifndef _SYS_IB_ADAPTERS_HERMON_FM_H
27 #define _SYS_IB_ADAPTERS_HERMON_FM_H
32 #include <sys/ddifm.h>
33 #include <sys/fm/protocol.h>
34 #include <sys/fm/util.h>
35 #include <sys/fm/io/ddi.h>
42 * HCA FMA compile note.
44 * FMA_TEST is used for HCA function tests, and
45 * the macro can be on by changing Makefile.
50 * in case of non-DEBUG (DEBUG is off)
55 * HCA FM common data structure
60 * This structure is used to catch HCA HW errors.
63 uint32_t ref_cnt
; /* the number of instances referring to this */
64 kmutex_t lock
; /* protection for last_err & polling thread */
65 struct i_hca_acc_handle
*hdl
; /* HCA FM acc handle structure */
66 struct kmem_cache
*fm_acc_cache
; /* HCA acc handle cache */
71 * HCA FM acc handle structure
72 * This structure is holding ddi_acc_handle_t and other members
73 * to deal with HCA PIO FM.
75 struct i_hca_acc_handle
{
76 struct i_hca_acc_handle
*next
; /* next structure */
77 ddi_acc_handle_t save_hdl
; /* acc handle */
78 kmutex_t lock
; /* mutex lock for thread count */
79 uint32_t thread_cnt
; /* number of threads issuing PIOs */
81 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", i_hca_acc_handle::save_hdl
))
82 #define fm_acc_hdl(hdl) (((struct i_hca_acc_handle *)(hdl))->save_hdl)
83 #define FM_POLL_INTERVAL (10000000) /* 10ms (nano) */
86 * HCA FM function test structure
87 * This structure can be used to test the basic fm function test for HCA.
88 * The test code is included if the FMA_TEST macro is defined.
90 struct i_hca_fm_test
{
91 int num
; /* serial numner */
92 int type
; /* PIO or Hermon specific errors */
93 #define HCA_TEST_PIO 0x1
94 #define HCA_TEST_IBA 0x2
95 int trigger
; /* how to trigger a HW error */
96 #define HCA_TEST_TRANSIENT 0x0001
97 #define HCA_TEST_PERSISTENT 0x0002
98 #define HCA_TEST_ATTACH 0x0010
99 #define HCA_TEST_START 0x0100
100 #define HCA_TEST_END 0x0200
101 void (*pio_injection
)(struct i_hca_fm_test
*, ddi_fm_error_t
*);
102 int errcnt
; /* how many transient error occurs */
103 int line_num
; /* line number in the source code */
104 char *file_name
; /* source filename */
105 char *hash_key
; /* hash table for test items */
106 void *private; /* private data */
110 * Hermon FM data structure
112 typedef struct i_hca_fm hermon_hca_fm_t
;
113 typedef struct i_hca_acc_handle hermon_acc_handle_t
;
114 typedef struct i_hca_fm_test hermon_test_t
;
117 * The following defines are to supplement device error reporting.
118 * At each place where the planned FMA error matrix specifies that
119 * an ereport will be generated, for now there is a HERMON_FMANOTE()
120 * call generating an appropriate message string.
122 * This has been revised since it has been realized that FMA is only
123 * to be used for hardware errors. HERMON_FMANOTE() is used to report
124 * errors that are likely to be hardware, but possibly are not.
126 #define HERMON_FMANOTE(state, string) \
127 cmn_err(CE_WARN, "hermon%d: Device Error: %s", \
128 (state)->hs_instance, string)
130 /* CQE Syndrome errors - see hermon_cq.c */
132 #define HERMON_FMA_LOCLEN "CQE local length error"
133 #define HERMON_FMA_LOCQPOP "CQE local qp operation error"
134 #define HERMON_FMA_LOCPROT "CQE local protection error"
135 #define HERMON_FMA_WQFLUSH "CQE wqe flushed in error"
136 #define HERMON_FMA_MWBIND "CQE memory window bind error"
137 #define HERMON_FMA_RESP "CQE bad response"
138 #define HERMON_FMA_LOCACC "CQE local access error"
139 #define HERMON_FMA_REMREQ "CQE remote invalid request error"
140 #define HERMON_FMA_REMACC "CQE remote access error"
141 #define HERMON_FMA_REMOP "CQE remote operation error"
142 #define HERMON_FMA_XPORTCNT "CQE transport retry counter exceeded"
143 #define HERMON_FMA_RNRCNT "CQE RNR retry counter exceeded"
144 #define HERMON_FMA_REMABRT "CQE remote aborted error"
145 #define HERMON_FMA_UNKN "CQE unknown/reserved error returned"
147 /* event errors - see hermon_event.c */
148 #define HERMON_FMA_OVERRUN "EQE cq overrun or protection error"
149 #define HERMON_FMA_LOCCAT "EQE local work queue catastrophic error"
150 #define HERMON_FMA_QPCAT "EQE local queue pair catastrophic error"
151 #define HERMON_FMA_PATHMIG "EQE path migration failed"
152 #define HERMON_FMA_LOCINV "EQE invalid request - local work queue"
153 #define HERMON_FMA_LOCACEQ "EQE local access violation"
154 #define HERMON_FMA_SRQCAT "EQE shared received queue catastrophic"
155 #define HERMON_FMA_INTERNAL "EQE hca internal error"
157 /* HCR device failure returns - see hermon_cmd.c */
158 #define HERMON_FMA_HCRINT "HCR internal error processing command"
159 #define HERMON_FMA_NVMEM "HCR NVRAM checksum/CRC failure"
160 #define HERMON_FMA_TOTOG "HCR Timeout waiting for command toggle"
161 #define HERMON_FMA_GOBIT "HCR Timeout waiting for command go bit"
162 #define HERMON_FMA_RSRC "HCR Command insufficient resources"
163 #define HERMON_FMA_CMDINV "HCR Command invalid status returned"
165 /* HCA initialization errors - see hermon.c */
166 #define HERMON_FMA_FWVER "HCA firmware not at minimum version"
167 #define HERMON_FMA_PCIID "HCA PCIe devid not supported"
168 #define HERMON_FMA_MAINT "HCA device set to memory controller mode"
169 #define HERMON_FMA_BADNVMEM "HCR bad NVMEM error"
176 #define HCA_NO_FM 0x0000 /* HCA FM is not supported */
177 /* HCA FM state flags */
178 #define HCA_PIO_FM 0x0001 /* PIO is fma-protected */
179 #define HCA_DMA_FM 0x0002 /* DMA is fma-protected */
180 #define HCA_EREPORT_FM 0x0004 /* FMA ereport is available */
181 #define HCA_ERRCB_FM 0x0010 /* FMA error callback is supported */
183 #define HCA_ATTCH_FM 0x0100 /* HCA FM attach mode */
184 #define HCA_RUNTM_FM 0x0200 /* HCA FM runtime mode */
186 /* HCA ererport type */
187 #define HCA_SYS_ERR 0x001 /* HW error reported by Solaris FMA */
188 #define HCA_IBA_ERR 0x002 /* IB specific HW error */
190 /* HCA ereport detail */
191 #define HCA_ERR_TRANSIENT 0x010 /* HCA temporary error */
192 #define HCA_ERR_NON_FATAL 0x020 /* HCA persistent error */
193 #define HCA_ERR_SRV_LOST 0x040 /* HCA attach failure */
194 #define HCA_ERR_DEGRADED 0x080 /* HCA maintenance mode */
195 #define HCA_ERR_FATAL 0x100 /* HCA critical situation */
196 #define HCA_ERR_IOCTL 0x200 /* EIO */
198 /* Ignore HCA HW error check */
199 #define HCA_SKIP_HW_CHK (-1)
201 /* HCA FM pio retry operation state */
202 #define HCA_PIO_OK (0) /* No HW errors */
203 #define HCA_PIO_TRANSIENT (1) /* transient error */
204 #define HCA_PIO_PERSISTENT (2) /* persistent error */
205 #define HCA_PIO_RETRY_CNT (3)
207 /* HCA firmware faults */
208 #define HCA_FW_MISC 0x1 /* firmware misc faults */
209 #define HCA_FW_CORRUPT 0x2 /* firmware corruption */
210 #define HCA_FW_MISMATCH 0x3 /* firmware version mismatch */
217 #define TEST_DECLARE(tst) hermon_test_t *tst;
218 #define REGISTER_PIO_TEST(st, tst) \
219 tst = hermon_test_register(st, __FILE__, __LINE__, HCA_TEST_PIO)
220 #define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, tst)
221 #define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, tst)
223 #define TEST_DECLARE(tst)
224 #define REGISTER_PIO_TEST(st, tst)
225 #define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, NULL)
226 #define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, NULL)
227 #endif /* FMA_TEST */
230 * hermon_pio_init() is a macro initializing variables.
232 #define hermon_pio_init(cnt, status, tst) \
234 int status = HCA_PIO_OK; \
235 int cnt = HCA_PIO_RETRY_CNT
238 * hermon_pio_start() is one of a pair of macros checking HW errors
239 * at PIO requests, which should be called before the requests are issued.
241 #define hermon_pio_start(st, hdl, label, cnt, status, tst) \
242 if (st->hs_fm_state & HCA_PIO_FM) { \
243 if (st->hs_fm_async_fatal) { \
244 hermon_fm_ereport(st, HCA_SYS_ERR, \
245 HCA_ERR_NON_FATAL); \
248 REGISTER_PIO_TEST(st, tst); \
249 cnt = HCA_PIO_RETRY_CNT; \
250 if (PIO_START(st, hdl, tst) == \
251 HCA_PIO_PERSISTENT) { \
256 status = HCA_SKIP_HW_CHK; \
261 * hermon_pio_end() is the other of a pair of macros checking HW errors
262 * at PIO requests, which should be called after the requests end.
263 * If a HW error is detected and can be isolated well, these macros
264 * retry the operation to determine if the error is persistent or not.
266 #define hermon_pio_end(st, hdl, label, cnt, status, tst) \
267 if (status != HCA_SKIP_HW_CHK) { \
268 if (st->hs_fm_async_fatal) { \
269 hermon_fm_ereport(st, HCA_SYS_ERR, \
270 HCA_ERR_NON_FATAL); \
273 if ((status = PIO_END(st, hdl, cnt, tst)) == \
274 HCA_PIO_PERSISTENT) { \
276 } else if (status == HCA_PIO_TRANSIENT) { \
277 hermon_fm_ereport(st, HCA_SYS_ERR, \
278 HCA_ERR_TRANSIENT); \
281 } while (status == HCA_PIO_TRANSIENT)
283 extern void hermon_fm_init(hermon_state_t
*);
284 extern void hermon_fm_fini(hermon_state_t
*);
285 extern int hermon_fm_ereport_init(hermon_state_t
*);
286 extern void hermon_fm_ereport_fini(hermon_state_t
*);
287 extern int hermon_get_state(hermon_state_t
*);
288 extern boolean_t
hermon_init_failure(hermon_state_t
*);
289 extern boolean_t
hermon_cmd_retry_ok(hermon_cmd_post_t
*, int);
290 extern void hermon_fm_ereport(hermon_state_t
*, int, int);
291 extern int hermon_regs_map_setup(hermon_state_t
*, uint_t
, caddr_t
*, offset_t
,
292 offset_t
, ddi_device_acc_attr_t
*, ddi_acc_handle_t
*);
293 extern void hermon_regs_map_free(hermon_state_t
*, ddi_acc_handle_t
*);
294 extern int hermon_pci_config_setup(hermon_state_t
*, ddi_acc_handle_t
*);
295 extern void hermon_pci_config_teardown(hermon_state_t
*, ddi_acc_handle_t
*);
296 extern ushort_t
hermon_devacc_attr_version(hermon_state_t
*);
297 extern uchar_t
hermon_devacc_attr_access(hermon_state_t
*);
298 extern int hermon_PIO_start(hermon_state_t
*, ddi_acc_handle_t
,
300 extern int hermon_PIO_end(hermon_state_t
*, ddi_acc_handle_t
, int *,
302 extern ddi_acc_handle_t
hermon_rsrc_alloc_uarhdl(hermon_state_t
*);
303 extern ddi_acc_handle_t
hermon_get_uarhdl(hermon_state_t
*);
304 extern ddi_acc_handle_t
hermon_get_cmdhdl(hermon_state_t
*);
305 extern ddi_acc_handle_t
hermon_get_msix_tblhdl(hermon_state_t
*);
306 extern ddi_acc_handle_t
hermon_get_msix_pbahdl(hermon_state_t
*);
307 extern ddi_acc_handle_t
hermon_get_pcihdl(hermon_state_t
*);
308 extern void hermon_clr_state_nolock(hermon_state_t
*, int);
309 extern void hermon_inter_err_chk(void *);
312 extern hermon_test_t
*hermon_test_register(hermon_state_t
*, char *, int, int);
313 extern void hermon_test_deregister(void);
314 extern int hermon_test_num
;
315 #endif /* FMA_TEST */
321 #endif /* _SYS_IB_ADAPTERS_HERMON_FM_H */