4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Hermon Configuration Profile Routines
30 * Implements the routines necessary for initializing and (later) tearing
31 * down the list of Hermon configuration information.
34 #include <sys/sysmacros.h>
35 #include <sys/types.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
42 #include <sys/ib/adapters/hermon/hermon.h>
45 * Below are the elements that make up the Hermon configuration profile.
46 * For advanced users who wish to alter these values, this can be done via
47 * the /etc/system file. By default, values are assigned to the number of
48 * supported resources, either from the HCA's reported capacities or by
49 * a by-design limit in the driver.
52 /* Number of supported QPs, CQs and SRQs */
53 uint32_t hermon_log_num_qp
= HERMON_NUM_QP_SHIFT
;
54 uint32_t hermon_log_num_cq
= HERMON_NUM_CQ_SHIFT
;
55 uint32_t hermon_log_num_srq
= HERMON_NUM_SRQ_SHIFT
;
57 /* Number of supported SGL per WQE for SQ/RQ, and for SRQ */
58 /* XXX use the same for all queues if limitation in srq.h is resolved */
59 uint32_t hermon_wqe_max_sgl
= HERMON_NUM_SGL_PER_WQE
;
60 uint32_t hermon_srq_max_sgl
= HERMON_SRQ_MAX_SGL
;
62 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
63 uint32_t hermon_log_num_rdb_per_qp
= HERMON_LOG_NUM_RDB_PER_QP
;
66 * Number of multicast groups (MCGs), number of QP per MCG, and the number
67 * of entries (from the total number) in the multicast group "hash table"
69 uint32_t hermon_log_num_mcg
= HERMON_NUM_MCG_SHIFT
;
70 uint32_t hermon_num_qp_per_mcg
= HERMON_NUM_QP_PER_MCG
;
71 uint32_t hermon_log_num_mcg_hash
= HERMON_NUM_MCG_HASH_SHIFT
;
73 /* Number of UD AVs */
74 uint32_t hermon_log_num_ah
= HERMON_NUM_AH_SHIFT
;
76 /* Number of EQs and their default size */
77 uint32_t hermon_log_num_eq
= HERMON_NUM_EQ_SHIFT
;
78 uint32_t hermon_log_eq_sz
= HERMON_DEFAULT_EQ_SZ_SHIFT
;
81 * Number of supported MPTs, MTTs and also the maximum MPT size.
83 uint32_t hermon_log_num_mtt
= HERMON_NUM_MTT_SHIFT
;
84 uint32_t hermon_log_num_dmpt
= HERMON_NUM_DMPT_SHIFT
;
85 uint32_t hermon_log_max_mrw_sz
= HERMON_MAX_MEM_MPT_SHIFT
;
88 * Number of supported UAR (User Access Regions) for this HCA.
89 * We could in the future read in uar_sz from devlim, and thus
90 * derive the number of UAR. Since this is derived from PAGESIZE,
91 * however, this means that x86 systems would have twice as many
92 * UARs as SPARC systems. Therefore for consistency's sake, we will
93 * just use 1024 pages, which is the maximum on SPARC systems.
95 uint32_t hermon_log_num_uar
= HERMON_NUM_UAR_SHIFT
;
98 * Number of remaps allowed for FMR before a sync is required. This value
99 * determines how many times we can fmr_deregister() before the underlying fmr
100 * framework places the region to wait for an MTT_SYNC operation, cleaning up
103 uint32_t hermon_fmr_num_remaps
= HERMON_FMR_MAX_REMAPS
;
106 * Number of supported Hermon mailboxes ("In" and "Out") and their maximum
107 * sizes, respectively
109 uint32_t hermon_log_num_inmbox
= HERMON_NUM_MAILBOXES_SHIFT
;
110 uint32_t hermon_log_num_outmbox
= HERMON_NUM_MAILBOXES_SHIFT
;
111 uint32_t hermon_log_inmbox_size
= HERMON_MBOX_SIZE_SHIFT
;
112 uint32_t hermon_log_outmbox_size
= HERMON_MBOX_SIZE_SHIFT
;
113 uint32_t hermon_log_num_intr_inmbox
= HERMON_NUM_INTR_MAILBOXES_SHIFT
;
114 uint32_t hermon_log_num_intr_outmbox
= HERMON_NUM_INTR_MAILBOXES_SHIFT
;
116 /* Number of supported Protection Domains (PD) */
117 uint32_t hermon_log_num_pd
= HERMON_NUM_PD_SHIFT
;
120 * Number of total supported PKeys per PKey table (i.e.
121 * per port). Also the number of SGID per GID table.
123 uint32_t hermon_log_max_pkeytbl
= HERMON_NUM_PKEYTBL_SHIFT
;
124 uint32_t hermon_log_max_gidtbl
= HERMON_NUM_GIDTBL_SHIFT
;
126 /* Maximum supported MTU and portwidth */
127 uint32_t hermon_max_mtu
= HERMON_MAX_MTU
;
128 uint32_t hermon_max_port_width
= HERMON_MAX_PORT_WIDTH
;
130 /* Number of supported Virtual Lanes (VL) */
131 uint32_t hermon_max_vlcap
= HERMON_MAX_VLCAP
;
134 * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
137 uint32_t hermon_qp0_agents_in_fw
= 0;
138 uint32_t hermon_qp1_agents_in_fw
= 0;
141 * Whether DMA mappings should bypass the PCI IOMMU or not.
142 * hermon_iommu_bypass is a global setting for all memory addresses.
144 uint32_t hermon_iommu_bypass
= 1;
147 * Whether *DATA* buffers should be bound w/ Relaxed Ordering (RO) turned on
148 * via the SW workaround (HCAs don't support RO in HW). Defaulted on,
149 * though care must be taken w/ some Userland clients that *MAY* have
150 * peeked in the data to understand when data xfer was done - MPI does
154 uint32_t hermon_kernel_data_ro
= HERMON_RO_ENABLED
; /* default */
155 uint32_t hermon_user_data_ro
= HERMON_RO_ENABLED
; /* default */
158 * Whether Hermon should use MSI (Message Signaled Interrupts), if available.
159 * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
161 uint32_t hermon_use_msi_if_avail
= 1;
164 * This is a patchable variable that determines the time we will wait after
165 * initiating SW reset before we do our first read from Hermon config space.
166 * If this value is set too small (less than the default 100ms), it is
167 * possible for Hermon hardware to be unready to respond to the config cycle
168 * reads. This could cause master abort on the PCI bridge. Note: If
169 * "hermon_sw_reset_delay" is set to zero, then no software reset of the Hermon
170 * device will be attempted.
172 uint32_t hermon_sw_reset_delay
= HERMON_SW_RESET_DELAY
;
175 * These are patchable variables for hermon command polling. The poll_delay is
176 * the number of usec to wait in-between calls to poll the 'go' bit. The
177 * poll_max is the total number of usec to loop in waiting for the 'go' bit to
180 uint32_t hermon_cmd_poll_delay
= HERMON_CMD_POLL_DELAY
;
181 uint32_t hermon_cmd_poll_max
= HERMON_CMD_POLL_MAX
;
184 * This is a patchable variable that determines the frequency with which
185 * the AckReq bit will be set in outgoing RC packets. The AckReq bit will be
186 * set in at least every 2^hermon_qp_ackreq_freq packets (but at least once
187 * per message, i.e. in the last packet). Tuning this value can increase
188 * IB fabric utilization by cutting down on the number of unnecessary ACKs.
190 uint32_t hermon_qp_ackreq_freq
= HERMON_QP_ACKREQ_FREQ
;
192 static void hermon_cfg_wqe_sizes(hermon_state_t
*state
,
193 hermon_cfg_profile_t
*cp
);
196 * hermon_cfg_profile_init_phase1()
197 * Context: Only called from attach() path context
200 hermon_cfg_profile_init_phase1(hermon_state_t
*state
)
202 hermon_cfg_profile_t
*cp
;
205 * Allocate space for the configuration profile structure
207 cp
= (hermon_cfg_profile_t
*)kmem_zalloc(sizeof (hermon_cfg_profile_t
),
211 * Common to all profiles.
213 cp
->cp_qp0_agents_in_fw
= hermon_qp0_agents_in_fw
;
214 cp
->cp_qp1_agents_in_fw
= hermon_qp1_agents_in_fw
;
215 cp
->cp_sw_reset_delay
= hermon_sw_reset_delay
;
216 cp
->cp_cmd_poll_delay
= hermon_cmd_poll_delay
;
217 cp
->cp_cmd_poll_max
= hermon_cmd_poll_max
;
218 cp
->cp_ackreq_freq
= hermon_qp_ackreq_freq
;
219 cp
->cp_fmr_max_remaps
= hermon_fmr_num_remaps
;
222 * Although most of the configuration is enabled in "phase2" of the
223 * cfg_profile_init, we have to setup the OUT mailboxes soon, since
224 * they are used immediately after this "phase1" completes, to run the
225 * firmware and get the device limits, which we'll need for 'phase2'.
226 * That's done in rsrc_init_phase1, called shortly after we do this
227 * and the sw reset - see hermon.c
229 if (state
->hs_cfg_profile_setting
== HERMON_CFG_MEMFREE
) {
230 cp
->cp_log_num_outmbox
= hermon_log_num_outmbox
;
231 cp
->cp_log_outmbox_size
= hermon_log_outmbox_size
;
232 cp
->cp_log_num_inmbox
= hermon_log_num_inmbox
;
233 cp
->cp_log_inmbox_size
= hermon_log_inmbox_size
;
234 cp
->cp_log_num_intr_inmbox
= hermon_log_num_intr_inmbox
;
235 cp
->cp_log_num_intr_outmbox
= hermon_log_num_intr_outmbox
;
238 return (DDI_FAILURE
);
242 * Set IOMMU bypass or not. Ensure consistency of flags with
245 cp
->cp_iommu_bypass
= HERMON_BINDMEM_NORMAL
;
247 /* Attach the configuration profile to Hermon softstate */
248 state
->hs_cfg_profile
= cp
;
250 return (DDI_SUCCESS
);
254 * hermon_cfg_profile_init_phase2()
255 * Context: Only called from attach() path context
258 hermon_cfg_profile_init_phase2(hermon_state_t
*state
)
260 hermon_cfg_profile_t
*cp
;
261 hermon_hw_querydevlim_t
*devlim
;
262 hermon_hw_query_port_t
*port
;
266 /* Read in the device limits */
267 devlim
= &state
->hs_devlim
;
268 /* and the port information */
269 port
= &state
->hs_queryport
;
271 /* Read the configuration profile */
272 cp
= state
->hs_cfg_profile
;
275 * We configure all Hermon HCAs with the same profile, which
276 * is based upon the default value assignments above. If we want to
277 * add additional profiles in the future, they can be added here.
278 * Note the reference to "Memfree" is a holdover from Arbel/Sinai
280 if (state
->hs_cfg_profile_setting
!= HERMON_CFG_MEMFREE
) {
281 return (DDI_FAILURE
);
285 * Note for most configuration parameters, we use the lesser of our
286 * desired configuration value or the device-defined maximum value.
288 cp
->cp_log_num_mtt
= min(hermon_log_num_mtt
, devlim
->log_max_mtt
);
289 cp
->cp_log_num_dmpt
= min(hermon_log_num_dmpt
, devlim
->log_max_dmpt
);
290 cp
->cp_log_num_cmpt
= HERMON_LOG_CMPT_PER_TYPE
+ 2; /* times 4, */
292 cp
->cp_log_max_mrw_sz
= min(hermon_log_max_mrw_sz
,
293 devlim
->log_max_mrw_sz
);
294 cp
->cp_log_num_pd
= min(hermon_log_num_pd
, devlim
->log_max_pd
);
295 cp
->cp_log_num_qp
= min(hermon_log_num_qp
, devlim
->log_max_qp
);
296 cp
->cp_log_num_cq
= min(hermon_log_num_cq
, devlim
->log_max_cq
);
297 cp
->cp_log_num_srq
= min(hermon_log_num_srq
, devlim
->log_max_srq
);
298 cp
->cp_log_num_eq
= min(hermon_log_num_eq
, devlim
->log_max_eq
);
299 cp
->cp_log_eq_sz
= min(hermon_log_eq_sz
, devlim
->log_max_eq_sz
);
300 cp
->cp_log_num_rdb
= cp
->cp_log_num_qp
+
301 min(hermon_log_num_rdb_per_qp
, devlim
->log_max_ra_req_qp
);
302 cp
->cp_hca_max_rdma_in_qp
= cp
->cp_hca_max_rdma_out_qp
=
303 1 << min(hermon_log_num_rdb_per_qp
, devlim
->log_max_ra_req_qp
);
304 cp
->cp_num_qp_per_mcg
= max(hermon_num_qp_per_mcg
,
305 HERMON_NUM_QP_PER_MCG_MIN
);
306 cp
->cp_num_qp_per_mcg
= min(cp
->cp_num_qp_per_mcg
,
307 (1 << devlim
->log_max_qp_mcg
) - 8);
308 cp
->cp_num_qp_per_mcg
= (1 << highbit(cp
->cp_num_qp_per_mcg
+ 7)) - 8;
309 cp
->cp_log_num_mcg
= min(hermon_log_num_mcg
, devlim
->log_max_mcg
);
310 cp
->cp_log_num_mcg_hash
= hermon_log_num_mcg_hash
;
312 /* until srq_resize is debugged, disable it */
313 cp
->cp_srq_resize_enabled
= 0;
315 /* cp->cp_log_num_uar = hermon_log_num_uar; */
317 * now, we HAVE to calculate the number of UAR pages, so that we can
318 * get the blueflame stuff correct as well
321 size
= devlim
->log_max_uar_sz
;
322 /* 1MB (2^^20) times size (2^^size) / sparc_pg (2^^13) */
323 num
= (20 + size
) - 13; /* XXX - consider using PAGESHIFT */
325 num
-= 1; /* if blueflame, only half the size for UARs */
326 cp
->cp_log_num_uar
= min(hermon_log_num_uar
, num
);
329 /* while we're at it, calculate the index of the kernel uar page */
330 /* either the reserved uar's or 128, whichever is smaller */
331 state
->hs_kernel_uar_index
= (devlim
->num_rsvd_uar
> 128) ?
332 devlim
->num_rsvd_uar
: 128;
334 cp
->cp_log_max_pkeytbl
= port
->log_max_pkey
;
336 cp
->cp_log_max_qp_sz
= devlim
->log_max_qp_sz
;
337 cp
->cp_log_max_cq_sz
= devlim
->log_max_cq_sz
;
338 cp
->cp_log_max_srq_sz
= devlim
->log_max_srq_sz
;
339 cp
->cp_log_max_gidtbl
= port
->log_max_gid
;
340 cp
->cp_max_mtu
= port
->ib_mtu
; /* XXX now from query_port */
341 cp
->cp_max_port_width
= port
->ib_port_wid
; /* now from query_port */
342 cp
->cp_max_vlcap
= port
->max_vl
;
343 cp
->cp_log_num_ah
= hermon_log_num_ah
;
345 /* Paranoia, ensure no arrays indexed by port_num are out of bounds */
346 cp
->cp_num_ports
= devlim
->num_ports
;
347 if (cp
->cp_num_ports
> HERMON_MAX_PORTS
) {
348 cmn_err(CE_CONT
, "device has more ports (%d) than are "
349 "supported; Using %d ports\n",
350 cp
->cp_num_ports
, HERMON_MAX_PORTS
);
351 cp
->cp_num_ports
= HERMON_MAX_PORTS
;
354 /* allocate variable sized arrays */
355 for (i
= 0; i
< HERMON_MAX_PORTS
; i
++) {
356 state
->hs_pkey
[i
] = kmem_zalloc((1 << cp
->cp_log_max_pkeytbl
) *
357 sizeof (ib_pkey_t
), KM_SLEEP
);
358 state
->hs_guid
[i
] = kmem_zalloc((1 << cp
->cp_log_max_gidtbl
) *
359 sizeof (ib_guid_t
), KM_SLEEP
);
362 /* Determine WQE sizes from requested max SGLs */
363 hermon_cfg_wqe_sizes(state
, cp
);
365 /* Set whether to use MSIs or not */
366 cp
->cp_use_msi_if_avail
= hermon_use_msi_if_avail
;
370 * Need to reduce the hermon kernel virtual memory footprint
373 cp
->cp_log_num_mtt
-= 6;
374 cp
->cp_log_num_dmpt
-= 6;
375 cp
->cp_log_num_pd
-= 6;
376 cp
->cp_log_num_qp
-= 6;
377 cp
->cp_log_num_cq
-= 6;
378 cp
->cp_log_num_srq
-= 6;
379 cp
->cp_log_num_rdb
= cp
->cp_log_num_qp
+
380 min(hermon_log_num_rdb_per_qp
, devlim
->log_max_ra_req_qp
);
381 cp
->cp_hca_max_rdma_in_qp
= cp
->cp_hca_max_rdma_out_qp
=
382 1 << min(hermon_log_num_rdb_per_qp
, devlim
->log_max_ra_req_qp
);
385 return (DDI_SUCCESS
);
390 * hermon_cfg_profile_fini()
391 * Context: Only called from attach() and/or detach() path contexts
394 hermon_cfg_profile_fini(hermon_state_t
*state
)
397 * Free up the space for configuration profile
399 kmem_free(state
->hs_cfg_profile
, sizeof (hermon_cfg_profile_t
));
404 * hermon_cfg_wqe_sizes()
405 * Context: Only called from attach() path context
408 hermon_cfg_wqe_sizes(hermon_state_t
*state
, hermon_cfg_profile_t
*cp
)
410 uint_t max_size
, log2
;
411 uint_t max_sgl
, real_max_sgl
;
414 * Get the requested maximum number SGL per WQE from the Hermon
417 max_sgl
= hermon_wqe_max_sgl
;
420 * Use requested maximum number of SGL to calculate the max descriptor
421 * size (while guaranteeing that the descriptor size is a power-of-2
422 * cachelines). We have to use the calculation for QP1 MLX transport
423 * because the possibility that we might need to inline a GRH, along
424 * with all the other headers and alignment restrictions, sets the
425 * maximum for the number of SGLs that we can advertise support for.
427 max_size
= (HERMON_QP_WQE_MLX_QP1_HDRS
+ (max_sgl
<< 4));
428 log2
= highbit(max_size
);
429 if (ISP2(max_size
)) {
432 max_size
= (1 << log2
);
434 max_size
= min(max_size
, state
->hs_devlim
.max_desc_sz_sq
);
437 * Then use the calculated max descriptor size to determine the "real"
438 * maximum SGL (the number beyond which we would roll over to the next
441 real_max_sgl
= (max_size
- HERMON_QP_WQE_MLX_QP1_HDRS
) >> 4;
443 /* Then save away this configuration information */
444 cp
->cp_wqe_max_sgl
= max_sgl
;
445 cp
->cp_wqe_real_max_sgl
= real_max_sgl
;
447 /* SRQ SGL gets set to it's own patchable variable value */
448 cp
->cp_srq_max_sgl
= hermon_srq_max_sgl
;