4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection
29 * Domain, and port-related operations
31 * Implements all the routines necessary for allocating, freeing, querying
32 * and modifying Address Handles and Protection Domains. Also implements
33 * all the routines necessary for adding and removing Queue Pairs to/from
34 * Multicast Groups. Lastly, it implements the routines necessary for
35 * port-related query and modify operations.
38 #include <sys/types.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/bitmap.h>
44 #include <sys/sysmacros.h>
46 #include <sys/ib/adapters/hermon/hermon.h>
48 extern int hermon_rdma_debug
;
49 int hermon_fmr_verbose
= 0;
51 static int hermon_mcg_qplist_add(hermon_state_t
*state
, hermon_mcghdl_t mcg
,
52 hermon_hw_mcg_qp_list_t
*mcg_qplist
, hermon_qphdl_t qp
, uint_t
*qp_found
);
53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg
,
54 hermon_hw_mcg_qp_list_t
*mcg_qplist
, hermon_qphdl_t qp
);
55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp
);
56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp
);
57 static uint_t
hermon_mcg_walk_mgid_hash(hermon_state_t
*state
,
58 uint64_t start_indx
, ib_gid_t mgid
, uint_t
*prev_indx
);
59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg
,
60 hermon_hw_mcg_t
*mcg_hdr
, ib_gid_t mgid
, hermon_rsrc_t
*mcg_rsrc
);
61 static int hermon_mcg_hash_list_remove(hermon_state_t
*state
, uint_t curr_indx
,
62 uint_t prev_indx
, hermon_hw_mcg_t
*mcg_entry
);
63 static int hermon_mcg_entry_invalidate(hermon_state_t
*state
,
64 hermon_hw_mcg_t
*mcg_entry
, uint_t indx
);
65 static int hermon_mgid_is_valid(ib_gid_t gid
);
66 static int hermon_mlid_is_valid(ib_lid_t lid
);
67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool
);
70 #define HERMON_MAX_DBR_PAGES_PER_USER 64
71 #define HERMON_DBR_KEY(index, page) \
72 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
74 static hermon_udbr_page_t
*
75 hermon_dbr_new_user_page(hermon_state_t
*state
, uint_t index
,
78 hermon_udbr_page_t
*pagep
;
79 ddi_dma_attr_t dma_attr
;
82 hermon_umap_db_entry_t
*umapdb
;
83 ulong_t pagesize
= PAGESIZE
;
85 pagep
= kmem_alloc(sizeof (*pagep
), KM_SLEEP
);
86 pagep
->upg_index
= page
;
87 pagep
->upg_nfree
= pagesize
/ sizeof (hermon_dbr_t
);
89 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
90 pagep
->upg_free
= kmem_zalloc(pagesize
/ sizeof (hermon_dbr_t
) / 8,
92 pagep
->upg_kvaddr
= ddi_umem_alloc(pagesize
, DDI_UMEM_SLEEP
,
93 &pagep
->upg_umemcookie
); /* not HERMON_PAGESIZE here */
95 pagep
->upg_buf
= ddi_umem_iosetup(pagep
->upg_umemcookie
, 0,
96 pagesize
, B_WRITE
, 0, 0, NULL
, DDI_UMEM_SLEEP
);
98 hermon_dma_attr_init(state
, &dma_attr
);
99 status
= ddi_dma_alloc_handle(state
->hs_dip
, &dma_attr
,
100 DDI_DMA_SLEEP
, NULL
, &pagep
->upg_dmahdl
);
101 if (status
!= DDI_SUCCESS
) {
102 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
103 "ddi_dma_buf_bind_handle failed: %d", status
);
106 status
= ddi_dma_buf_bind_handle(pagep
->upg_dmahdl
,
107 pagep
->upg_buf
, DDI_DMA_RDWR
| DDI_DMA_CONSISTENT
,
108 DDI_DMA_SLEEP
, NULL
, &pagep
->upg_dmacookie
, &cookiecnt
);
109 if (status
!= DDI_SUCCESS
) {
110 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
111 "ddi_dma_buf_bind_handle failed: %d", status
);
112 ddi_dma_free_handle(&pagep
->upg_dmahdl
);
115 ASSERT(cookiecnt
== 1);
117 /* create db entry for mmap */
118 umapdb
= hermon_umap_db_alloc(state
->hs_instance
,
119 HERMON_DBR_KEY(index
, page
), MLNX_UMAP_DBRMEM_RSRC
,
120 (uint64_t)(uintptr_t)pagep
);
121 hermon_umap_db_add(umapdb
);
128 hermon_user_dbr_alloc(hermon_state_t
*state
, uint_t index
,
129 ddi_acc_handle_t
*acchdl
, hermon_dbr_t
**vdbr
, uint64_t *pdbr
,
132 hermon_user_dbr_t
*udbr
;
133 hermon_udbr_page_t
*pagep
;
136 int i1
, i2
, i3
, last
;
139 mutex_enter(&state
->hs_dbr_lock
);
140 for (udbr
= state
->hs_user_dbr
; udbr
!= NULL
; udbr
= udbr
->udbr_link
)
141 if (udbr
->udbr_index
== index
)
144 udbr
= kmem_alloc(sizeof (*udbr
), KM_SLEEP
);
145 udbr
->udbr_link
= state
->hs_user_dbr
;
146 state
->hs_user_dbr
= udbr
;
147 udbr
->udbr_index
= index
;
148 udbr
->udbr_pagep
= NULL
;
150 pagep
= udbr
->udbr_pagep
;
151 next_page
= (pagep
== NULL
) ? 0 : (pagep
->upg_index
+ 1);
152 while (pagep
!= NULL
)
153 if (pagep
->upg_nfree
> 0)
156 pagep
= pagep
->upg_link
;
158 pagep
= hermon_dbr_new_user_page(state
, index
, next_page
);
160 mutex_exit(&state
->hs_dbr_lock
);
161 return (DDI_FAILURE
);
163 pagep
->upg_link
= udbr
->udbr_pagep
;
164 udbr
->udbr_pagep
= pagep
;
167 /* Since nfree > 0, we're assured the loops below will succeed */
169 /* First, find a 64-bit (not ~0) that has a free dbr */
170 last
= PAGESIZE
/ sizeof (uint64_t) / 64;
172 for (i1
= 0; i1
< last
; i1
++)
173 if ((pagep
->upg_free
[i1
] & mask
) != mask
)
175 u64
= pagep
->upg_free
[i1
];
177 /* Second, find a byte (not 0xff) that has a free dbr */
178 last
= sizeof (uint64_t) / sizeof (uint8_t);
179 for (i2
= 0, mask
= 0xff; i2
< last
; i2
++, mask
<<= 8)
180 if ((u64
& mask
) != mask
)
183 /* Third, find a bit that is free (0) */
184 for (i3
= 0; i3
< sizeof (uint64_t) / sizeof (uint8_t); i3
++)
185 if ((u64
& (1ul << (i3
+ 8 * i2
))) == 0)
188 /* Mark it as allocated */
189 pagep
->upg_free
[i1
] |= (1ul << (i3
+ 8 * i2
));
191 dbr_index
= ((i1
* sizeof (uint64_t)) + i2
) * sizeof (uint64_t) + i3
;
193 ((uint64_t *)(void *)pagep
->upg_kvaddr
)[dbr_index
] = 0; /* clear dbr */
194 *mapoffset
= ((HERMON_DBR_KEY(index
, pagep
->upg_index
) <<
195 MLNX_UMAP_RSRC_TYPE_SHIFT
) | MLNX_UMAP_DBRMEM_RSRC
) << PAGESHIFT
;
196 *vdbr
= (hermon_dbr_t
*)((uint64_t *)(void *)pagep
->upg_kvaddr
+
198 *pdbr
= pagep
->upg_dmacookie
.dmac_laddress
+ dbr_index
*
201 mutex_exit(&state
->hs_dbr_lock
);
202 return (DDI_SUCCESS
);
206 hermon_user_dbr_free(hermon_state_t
*state
, uint_t index
, hermon_dbr_t
*record
)
208 hermon_user_dbr_t
*udbr
;
209 hermon_udbr_page_t
*pagep
;
212 uint_t max_free
= PAGESIZE
/ sizeof (hermon_dbr_t
);
215 dbr_index
= (uintptr_t)record
& PAGEOFFSET
; /* offset (not yet index) */
216 kvaddr
= (caddr_t
)record
- dbr_index
;
217 dbr_index
/= sizeof (hermon_dbr_t
); /* now it's the index */
219 mutex_enter(&state
->hs_dbr_lock
);
220 for (udbr
= state
->hs_user_dbr
; udbr
!= NULL
; udbr
= udbr
->udbr_link
)
221 if (udbr
->udbr_index
== index
)
224 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
225 "found for index %x", index
);
226 mutex_exit(&state
->hs_dbr_lock
);
229 for (pagep
= udbr
->udbr_pagep
; pagep
!= NULL
; pagep
= pagep
->upg_link
)
230 if (pagep
->upg_kvaddr
== kvaddr
)
233 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
234 " found for index %x, kvaddr %p, DBR index %x",
235 index
, kvaddr
, dbr_index
);
236 mutex_exit(&state
->hs_dbr_lock
);
239 if (pagep
->upg_nfree
>= max_free
) {
240 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
241 "UCE index %x, DBR index %x", index
, dbr_index
);
242 mutex_exit(&state
->hs_dbr_lock
);
245 ASSERT(dbr_index
< max_free
);
248 ASSERT((pagep
->upg_free
[i1
] & (1ul << i2
)) == (1ul << i2
));
249 pagep
->upg_free
[i1
] &= ~(1ul << i2
);
251 mutex_exit(&state
->hs_dbr_lock
);
255 * hermon_dbr_page_alloc()
256 * first page allocation - called from attach or open
257 * in this case, we want exactly one page per call, and aligned on a
258 * page - and may need to be mapped to the user for access
261 hermon_dbr_page_alloc(hermon_state_t
*state
, hermon_dbr_info_t
**dinfo
)
264 ddi_dma_handle_t dma_hdl
;
265 ddi_acc_handle_t acc_hdl
;
266 ddi_dma_attr_t dma_attr
;
267 ddi_dma_cookie_t cookie
;
270 hermon_dbr_info_t
*info
;
273 ulong_t pagesize
= PAGESIZE
;
275 info
= kmem_zalloc(sizeof (hermon_dbr_info_t
), KM_SLEEP
);
278 * Initialize many of the default DMA attributes. Then set additional
279 * alignment restrictions if necessary for the dbr memory, meaning
280 * page aligned. Also use the configured value for IOMMU bypass
282 hermon_dma_attr_init(state
, &dma_attr
);
283 dma_attr
.dma_attr_align
= pagesize
;
284 dma_attr
.dma_attr_sgllen
= 1; /* make sure only one cookie */
286 status
= ddi_dma_alloc_handle(state
->hs_dip
, &dma_attr
,
287 DDI_DMA_SLEEP
, NULL
, &dma_hdl
);
288 if (status
!= DDI_SUCCESS
) {
289 kmem_free((void *)info
, sizeof (hermon_dbr_info_t
));
290 cmn_err(CE_NOTE
, "dbr DMA handle alloc failed\n");
291 return (DDI_FAILURE
);
294 status
= ddi_dma_mem_alloc(dma_hdl
, pagesize
,
295 &state
->hs_reg_accattr
, DDI_DMA_CONSISTENT
, DDI_DMA_SLEEP
,
296 NULL
, &dmaaddr
, (size_t *)&dmalen
, &acc_hdl
);
297 if (status
!= DDI_SUCCESS
) {
298 ddi_dma_free_handle(&dma_hdl
);
299 cmn_err(CE_CONT
, "dbr DMA mem alloc failed(status %d)", status
);
300 kmem_free((void *)info
, sizeof (hermon_dbr_info_t
));
301 return (DDI_FAILURE
);
304 /* this memory won't be IB registered, so do the bind here */
305 status
= ddi_dma_addr_bind_handle(dma_hdl
, NULL
,
306 dmaaddr
, (size_t)dmalen
, DDI_DMA_RDWR
|
307 DDI_DMA_CONSISTENT
, DDI_DMA_SLEEP
, NULL
, &cookie
, &cookie_cnt
);
308 if (status
!= DDI_SUCCESS
) {
309 ddi_dma_mem_free(&acc_hdl
);
310 ddi_dma_free_handle(&dma_hdl
);
311 kmem_free((void *)info
, sizeof (hermon_dbr_info_t
));
312 cmn_err(CE_CONT
, "dbr DMA bind handle failed (status %d)",
314 return (DDI_FAILURE
);
316 *dinfo
= info
; /* Pass back the pointer */
318 /* init the info structure with returned info */
319 info
->dbr_dmahdl
= dma_hdl
;
320 info
->dbr_acchdl
= acc_hdl
;
321 info
->dbr_page
= (hermon_dbr_t
*)(void *)dmaaddr
;
322 info
->dbr_link
= NULL
;
323 /* extract the phys addr from the cookie */
324 info
->dbr_paddr
= cookie
.dmac_laddress
;
325 info
->dbr_firstfree
= 0;
326 info
->dbr_nfree
= HERMON_NUM_DBR_PER_PAGE
;
327 /* link all DBrs onto the free list */
328 for (i
= 0; i
< HERMON_NUM_DBR_PER_PAGE
; i
++) {
329 info
->dbr_page
[i
] = i
+ 1;
332 return (DDI_SUCCESS
);
338 * DBr record allocation - called from alloc cq/qp/srq
339 * will check for available dbrs in current
340 * page - if needed it will allocate another and link them
344 hermon_dbr_alloc(hermon_state_t
*state
, uint_t index
, ddi_acc_handle_t
*acchdl
,
345 hermon_dbr_t
**vdbr
, uint64_t *pdbr
, uint64_t *mapoffset
)
347 hermon_dbr_t
*record
= NULL
;
348 hermon_dbr_info_t
*info
= NULL
;
352 if (index
!= state
->hs_kernel_uar_index
)
353 return (hermon_user_dbr_alloc(state
, index
, acchdl
, vdbr
, pdbr
,
356 mutex_enter(&state
->hs_dbr_lock
);
357 for (info
= state
->hs_kern_dbr
; info
!= NULL
; info
= info
->dbr_link
)
358 if (info
->dbr_nfree
!= 0)
359 break; /* found a page w/ one available */
361 if (info
== NULL
) { /* did NOT find a page with one available */
362 status
= hermon_dbr_page_alloc(state
, &info
);
363 if (status
!= DDI_SUCCESS
) {
364 /* do error handling */
365 mutex_exit(&state
->hs_dbr_lock
);
366 return (DDI_FAILURE
);
368 /* got a new page, so link it in. */
369 info
->dbr_link
= state
->hs_kern_dbr
;
370 state
->hs_kern_dbr
= info
;
372 idx
= info
->dbr_firstfree
;
373 record
= info
->dbr_page
+ idx
;
374 info
->dbr_firstfree
= *record
;
378 *acchdl
= info
->dbr_acchdl
;
380 *pdbr
= info
->dbr_paddr
+ idx
* sizeof (hermon_dbr_t
);
381 mutex_exit(&state
->hs_dbr_lock
);
382 return (DDI_SUCCESS
);
387 * DBr record deallocation - called from free cq/qp
388 * will update the counter in the header, and invalidate
389 * the dbr, but will NEVER free pages of dbrs - small
390 * price to pay, but userland access never will anyway
393 hermon_dbr_free(hermon_state_t
*state
, uint_t indx
, hermon_dbr_t
*record
)
396 hermon_dbr_info_t
*info
;
398 if (indx
!= state
->hs_kernel_uar_index
) {
399 hermon_user_dbr_free(state
, indx
, record
);
402 page
= (hermon_dbr_t
*)(uintptr_t)((uintptr_t)record
& PAGEMASK
);
403 mutex_enter(&state
->hs_dbr_lock
);
404 for (info
= state
->hs_kern_dbr
; info
!= NULL
; info
= info
->dbr_link
)
405 if (info
->dbr_page
== page
)
407 ASSERT(info
!= NULL
);
408 *record
= info
->dbr_firstfree
;
409 info
->dbr_firstfree
= record
- info
->dbr_page
;
411 mutex_exit(&state
->hs_dbr_lock
);
415 * hermon_dbr_kern_free()
416 * Context: Can be called only from detach context.
418 * Free all kernel dbr pages. This includes the freeing of all the dma
419 * resources acquired during the allocation of the pages.
421 * Also, free all the user dbr pages.
424 hermon_dbr_kern_free(hermon_state_t
*state
)
426 hermon_dbr_info_t
*info
, *link
;
427 hermon_user_dbr_t
*udbr
, *next
;
428 hermon_udbr_page_t
*pagep
, *nextp
;
429 hermon_umap_db_entry_t
*umapdb
;
430 int instance
, status
;
432 extern hermon_umap_db_t hermon_userland_rsrc_db
;
434 mutex_enter(&state
->hs_dbr_lock
);
435 for (info
= state
->hs_kern_dbr
; info
!= NULL
; info
= link
) {
436 (void) ddi_dma_unbind_handle(info
->dbr_dmahdl
);
437 ddi_dma_mem_free(&info
->dbr_acchdl
); /* free page */
438 ddi_dma_free_handle(&info
->dbr_dmahdl
);
439 link
= info
->dbr_link
;
440 kmem_free(info
, sizeof (hermon_dbr_info_t
));
443 udbr
= state
->hs_user_dbr
;
444 instance
= state
->hs_instance
;
445 mutex_enter(&hermon_userland_rsrc_db
.hdl_umapdb_lock
);
446 while (udbr
!= NULL
) {
447 pagep
= udbr
->udbr_pagep
;
448 while (pagep
!= NULL
) {
449 /* probably need to remove "db" */
450 (void) ddi_dma_unbind_handle(pagep
->upg_dmahdl
);
451 ddi_dma_free_handle(&pagep
->upg_dmahdl
);
452 freerbuf(pagep
->upg_buf
);
453 ddi_umem_free(pagep
->upg_umemcookie
);
454 status
= hermon_umap_db_find_nolock(instance
,
455 HERMON_DBR_KEY(udbr
->udbr_index
,
456 pagep
->upg_index
), MLNX_UMAP_DBRMEM_RSRC
,
457 &value
, HERMON_UMAP_DB_REMOVE
, &umapdb
);
458 if (status
== DDI_SUCCESS
)
459 hermon_umap_db_free(umapdb
);
460 kmem_free(pagep
->upg_free
,
461 PAGESIZE
/ sizeof (hermon_dbr_t
) / 8);
462 nextp
= pagep
->upg_link
;
463 kmem_free(pagep
, sizeof (*pagep
));
466 next
= udbr
->udbr_link
;
467 kmem_free(udbr
, sizeof (*udbr
));
470 mutex_exit(&hermon_userland_rsrc_db
.hdl_umapdb_lock
);
471 mutex_exit(&state
->hs_dbr_lock
);
476 * Context: Can be called only from user or kernel context.
479 hermon_ah_alloc(hermon_state_t
*state
, hermon_pdhdl_t pd
,
480 ibt_adds_vect_t
*attr_p
, hermon_ahhdl_t
*ahhdl
, uint_t sleepflag
)
483 hermon_hw_udav_t
*udav
;
488 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
489 * indicate that we wish to allocate an "invalid" (i.e. empty)
493 /* Validate that specified port number is legal */
494 if (!hermon_portnum_is_valid(state
, attr_p
->av_port_num
)) {
495 return (IBT_HCA_PORT_INVALID
);
499 * Allocate the software structure for tracking the address handle
500 * (i.e. the Hermon Address Handle struct).
502 status
= hermon_rsrc_alloc(state
, HERMON_AHHDL
, 1, sleepflag
, &rsrc
);
503 if (status
!= DDI_SUCCESS
) {
504 return (IBT_INSUFF_RESOURCE
);
506 ah
= (hermon_ahhdl_t
)rsrc
->hr_addr
;
507 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah
))
509 /* Increment the reference count on the protection domain (PD) */
510 hermon_pd_refcnt_inc(pd
);
512 udav
= (hermon_hw_udav_t
*)kmem_zalloc(sizeof (hermon_hw_udav_t
),
514 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav
))
517 * Fill in the UDAV data. We first zero out the UDAV, then populate
518 * it by then calling hermon_set_addr_path() to fill in the common
519 * portions that can be pulled from the "ibt_adds_vect_t" passed in
521 status
= hermon_set_addr_path(state
, attr_p
,
522 (hermon_hw_addr_path_t
*)udav
, HERMON_ADDRPATH_UDAV
);
523 if (status
!= DDI_SUCCESS
) {
524 hermon_pd_refcnt_dec(pd
);
525 hermon_rsrc_free(state
, &rsrc
);
528 udav
->pd
= pd
->pd_pdnum
;
529 udav
->sl
= attr_p
->av_srvl
;
532 * Fill in the rest of the Hermon Address Handle struct.
534 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
535 * here because we may need to return it later to the IBTF (as a
536 * result of a subsequent query operation). Unlike the other UDAV
537 * parameters, the value of "av_dgid.gid_guid" is not always preserved.
538 * The reason for this is described in hermon_set_addr_path().
543 ah
->ah_save_guid
= attr_p
->av_dgid
.gid_guid
;
546 return (DDI_SUCCESS
);
552 * Context: Can be called only from user or kernel context.
556 hermon_ah_free(hermon_state_t
*state
, hermon_ahhdl_t
*ahhdl
, uint_t sleepflag
)
563 * Pull all the necessary information from the Hermon Address Handle
564 * struct. This is necessary here because the resource for the
565 * AH is going to be freed up as part of this operation.
568 mutex_enter(&ah
->ah_lock
);
571 mutex_exit(&ah
->ah_lock
);
572 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah
))
574 /* Free the UDAV memory */
575 kmem_free(ah
->ah_udav
, sizeof (hermon_hw_udav_t
));
577 /* Decrement the reference count on the protection domain (PD) */
578 hermon_pd_refcnt_dec(pd
);
580 /* Free the Hermon Address Handle structure */
581 hermon_rsrc_free(state
, &rsrc
);
583 /* Set the ahhdl pointer to NULL and return success */
586 return (DDI_SUCCESS
);
592 * Context: Can be called from interrupt or base context.
596 hermon_ah_query(hermon_state_t
*state
, hermon_ahhdl_t ah
, hermon_pdhdl_t
*pd
,
597 ibt_adds_vect_t
*attr_p
)
599 mutex_enter(&ah
->ah_lock
);
600 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p
))
603 * Pull the PD and UDAV from the Hermon Address Handle structure
608 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill
609 * the common portions that can be pulled from the UDAV we pass in.
611 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
612 * "ah_save_guid" field we have previously saved away. The reason
613 * for this is described in hermon_ah_alloc() and hermon_ah_modify().
615 hermon_get_addr_path(state
, (hermon_hw_addr_path_t
*)ah
->ah_udav
,
616 attr_p
, HERMON_ADDRPATH_UDAV
);
618 attr_p
->av_dgid
.gid_guid
= ah
->ah_save_guid
;
620 mutex_exit(&ah
->ah_lock
);
621 return (DDI_SUCCESS
);
627 * Context: Can be called from interrupt or base context.
631 hermon_ah_modify(hermon_state_t
*state
, hermon_ahhdl_t ah
,
632 ibt_adds_vect_t
*attr_p
)
634 hermon_hw_udav_t old_udav
;
638 /* Validate that specified port number is legal */
639 if (!hermon_portnum_is_valid(state
, attr_p
->av_port_num
)) {
640 return (IBT_HCA_PORT_INVALID
);
643 mutex_enter(&ah
->ah_lock
);
645 /* Save a copy of the current UDAV data in old_udav. */
646 bcopy(ah
->ah_udav
, &old_udav
, sizeof (hermon_hw_udav_t
));
649 * Fill in the new UDAV with the caller's data, passed in via the
650 * "ibt_adds_vect_t" structure.
652 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
653 * field here (just as we did during hermon_ah_alloc()) because we
654 * may need to return it later to the IBTF (as a result of a
655 * subsequent query operation). As explained in hermon_ah_alloc(),
656 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
657 * is not always preserved. The reason for this is described in
658 * hermon_set_addr_path().
660 status
= hermon_set_addr_path(state
, attr_p
,
661 (hermon_hw_addr_path_t
*)ah
->ah_udav
, HERMON_ADDRPATH_UDAV
);
662 if (status
!= DDI_SUCCESS
) {
663 mutex_exit(&ah
->ah_lock
);
666 ah
->ah_save_guid
= attr_p
->av_dgid
.gid_guid
;
667 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah
->ah_udav
)))
668 ah
->ah_udav
->sl
= attr_p
->av_srvl
;
671 * Copy changes into the new UDAV.
672 * Note: We copy in 64-bit chunks. For the first two of these
673 * chunks it is necessary to read the current contents of the
674 * UDAV, mask off the modifiable portions (maintaining any
675 * of the "reserved" portions), and then mask on the new data.
677 size
= sizeof (hermon_hw_udav_t
) >> 3;
678 for (i
= 0; i
< size
; i
++) {
679 data_old
= ((uint64_t *)&old_udav
)[i
];
682 * Apply mask to change only the relevant values.
685 data_old
= data_old
& HERMON_UDAV_MODIFY_MASK0
;
687 data_old
= data_old
& HERMON_UDAV_MODIFY_MASK1
;
692 /* Store the updated values to the UDAV */
693 ((uint64_t *)ah
->ah_udav
)[i
] |= data_old
;
697 * Put the valid PD number back into the UDAV entry, as it
698 * might have been clobbered above.
700 ah
->ah_udav
->pd
= old_udav
.pd
;
703 mutex_exit(&ah
->ah_lock
);
704 return (DDI_SUCCESS
);
708 * hermon_mcg_attach()
709 * Context: Can be called only from user or kernel context.
712 hermon_mcg_attach(hermon_state_t
*state
, hermon_qphdl_t qp
, ib_gid_t gid
,
716 hermon_hw_mcg_t
*mcg_entry
;
717 hermon_hw_mcg_qp_list_t
*mcg_entry_qplist
;
718 hermon_mcghdl_t mcg
, newmcg
;
725 * It is only allowed to attach MCG to UD queue pairs. Verify
726 * that the intended QP is of the appropriate transport type
728 if (qp
->qp_serv_type
!= HERMON_QP_UD
) {
729 return (IBT_QP_SRV_TYPE_INVALID
);
733 * Check for invalid Multicast DLID. Specifically, all Multicast
734 * LIDs should be within a well defined range. If the specified LID
735 * is outside of that range, then return an error.
737 if (hermon_mlid_is_valid(lid
) == 0) {
738 return (IBT_MC_MLID_INVALID
);
741 * Check for invalid Multicast GID. All Multicast GIDs should have
742 * a well-defined pattern of bits and flags that are allowable. If
743 * the specified GID does not meet the criteria, then return an error.
745 if (hermon_mgid_is_valid(gid
) == 0) {
746 return (IBT_MC_MGID_INVALID
);
750 * Compute the MGID hash value. Since the MCG table is arranged as
751 * a number of separate hash chains, this operation converts the
752 * specified MGID into the starting index of an entry in the hash
753 * table (i.e. the index for the start of the appropriate hash chain).
754 * Subsequent operations below will walk the chain searching for the
755 * right place to add this new QP.
757 status
= hermon_mgid_hash_cmd_post(state
, gid
.gid_prefix
, gid
.gid_guid
,
758 &mgid_hash
, HERMON_SLEEPFLAG_FOR_CONTEXT());
759 if (status
!= HERMON_CMD_SUCCESS
) {
760 cmn_err(CE_CONT
, "Hermon: MGID_HASH command failed: %08x\n",
762 if (status
== HERMON_CMD_INVALID_STATUS
) {
763 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
765 return (ibc_get_ci_failure(0));
769 * Grab the multicast group mutex. Then grab the pre-allocated
770 * temporary buffer used for holding and/or modifying MCG entries.
771 * Zero out the temporary MCG entry before we begin.
773 mutex_enter(&state
->hs_mcglock
);
774 mcg_entry
= state
->hs_mcgtmp
;
775 mcg_entry_qplist
= HERMON_MCG_GET_QPLIST_PTR(mcg_entry
);
776 bzero(mcg_entry
, HERMON_MCGMEM_SZ(state
));
779 * Walk through the array of MCG entries starting at "mgid_hash".
780 * Try to find the appropriate place for this new QP to be added.
781 * This could happen when the first entry of the chain has MGID == 0
782 * (which means that the hash chain is empty), or because we find
783 * an entry with the same MGID (in which case we'll add the QP to
784 * that MCG), or because we come to the end of the chain (in which
785 * case this is the first QP being added to the multicast group that
786 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine
787 * walks the list and returns an index into the MCG table. The entry
788 * at this index is then checked to determine which case we have
789 * fallen into (see below). Note: We are using the "shadow" MCG
790 * list (of hermon_mcg_t structs) for this lookup because the real
791 * MCG entries are in hardware (and the lookup process would be much
792 * more time consuming).
794 end_indx
= hermon_mcg_walk_mgid_hash(state
, mgid_hash
, gid
, NULL
);
795 mcg
= &state
->hs_mcghdl
[end_indx
];
798 * If MGID == 0, then the hash chain is empty. Just fill in the
799 * current entry. Note: No need to allocate an MCG table entry
800 * as all the hash chain "heads" are already preallocated.
802 if ((mcg
->mcg_mgid_h
== 0) && (mcg
->mcg_mgid_l
== 0)) {
804 /* Fill in the current entry in the "shadow" MCG list */
805 hermon_mcg_setup_new_hdr(mcg
, mcg_entry
, gid
, NULL
);
808 * Try to add the new QP number to the list. This (and the
809 * above) routine fills in a temporary MCG. The "mcg_entry"
810 * and "mcg_entry_qplist" pointers simply point to different
811 * offsets within the same temporary copy of the MCG (for
812 * convenience). Note: If this fails, we need to invalidate
813 * the entries we've already put into the "shadow" list entry
816 status
= hermon_mcg_qplist_add(state
, mcg
, mcg_entry_qplist
, qp
,
818 if (status
!= DDI_SUCCESS
) {
819 bzero(mcg
, sizeof (struct hermon_sw_mcg_list_s
));
820 mutex_exit(&state
->hs_mcglock
);
824 mcg_entry
->member_cnt
= (mcg
->mcg_num_qps
+ 1);
825 /* set the member count */
828 * Once the temporary MCG has been filled in, write the entry
829 * into the appropriate location in the Hermon MCG entry table.
830 * If it's successful, then drop the lock and return success.
831 * Note: In general, this operation shouldn't fail. If it
832 * does, then it is an indication that something (probably in
833 * HW, but maybe in SW) has gone seriously wrong. We still
834 * want to zero out the entries that we've filled in above
835 * (in the hermon_mcg_setup_new_hdr() routine).
837 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, end_indx
,
838 HERMON_CMD_NOSLEEP_SPIN
);
839 if (status
!= HERMON_CMD_SUCCESS
) {
840 bzero(mcg
, sizeof (struct hermon_sw_mcg_list_s
));
841 mutex_exit(&state
->hs_mcglock
);
842 HERMON_WARNING(state
, "failed to write MCG entry");
843 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: "
845 if (status
== HERMON_CMD_INVALID_STATUS
) {
846 hermon_fm_ereport(state
, HCA_SYS_ERR
,
849 return (ibc_get_ci_failure(0));
853 * Now that we know all the Hermon firmware accesses have been
854 * successful, we update the "shadow" MCG entry by incrementing
855 * the "number of attached QPs" count.
857 * We increment only if the QP is not already part of the
858 * MCG by checking the 'qp_found' flag returned from the
865 * Increment the refcnt for this QP. Because the QP
866 * was added to this MCG, the refcnt must be
869 hermon_qp_mcg_refcnt_inc(qp
);
873 * We drop the lock and return success.
875 mutex_exit(&state
->hs_mcglock
);
876 return (DDI_SUCCESS
);
880 * If the specified MGID matches the MGID in the current entry, then
881 * we need to try to add the QP to the current MCG entry. In this
882 * case, it means that we need to read the existing MCG entry (into
883 * the temporary MCG), add the new QP number to the temporary entry
884 * (using the same method we used above), and write the entry back
885 * to the hardware (same as above).
887 if ((mcg
->mcg_mgid_h
== gid
.gid_prefix
) &&
888 (mcg
->mcg_mgid_l
== gid
.gid_guid
)) {
891 * Read the current MCG entry into the temporary MCG. Note:
892 * In general, this operation shouldn't fail. If it does,
893 * then it is an indication that something (probably in HW,
894 * but maybe in SW) has gone seriously wrong.
896 status
= hermon_read_mgm_cmd_post(state
, mcg_entry
, end_indx
,
897 HERMON_CMD_NOSLEEP_SPIN
);
898 if (status
!= HERMON_CMD_SUCCESS
) {
899 mutex_exit(&state
->hs_mcglock
);
900 HERMON_WARNING(state
, "failed to read MCG entry");
901 cmn_err(CE_CONT
, "Hermon: READ_MGM command failed: "
903 if (status
== HERMON_CMD_INVALID_STATUS
) {
904 hermon_fm_ereport(state
, HCA_SYS_ERR
,
907 return (ibc_get_ci_failure(0));
911 * Try to add the new QP number to the list. This routine
912 * fills in the necessary pieces of the temporary MCG. The
913 * "mcg_entry_qplist" pointer is used to point to the portion
914 * of the temporary MCG that holds the QP numbers.
916 * Note: hermon_mcg_qplist_add() returns SUCCESS if it
917 * already found the QP in the list. In this case, the QP is
918 * not added on to the list again. Check the flag 'qp_found'
919 * if this value is needed to be known.
922 status
= hermon_mcg_qplist_add(state
, mcg
, mcg_entry_qplist
, qp
,
924 if (status
!= DDI_SUCCESS
) {
925 mutex_exit(&state
->hs_mcglock
);
929 mcg_entry
->member_cnt
= (mcg
->mcg_num_qps
+ 1);
930 /* set the member count */
933 * Once the temporary MCG has been updated, write the entry
934 * into the appropriate location in the Hermon MCG entry table.
935 * If it's successful, then drop the lock and return success.
936 * Note: In general, this operation shouldn't fail. If it
937 * does, then it is an indication that something (probably in
938 * HW, but maybe in SW) has gone seriously wrong.
940 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, end_indx
,
941 HERMON_CMD_NOSLEEP_SPIN
);
942 if (status
!= HERMON_CMD_SUCCESS
) {
943 mutex_exit(&state
->hs_mcglock
);
944 HERMON_WARNING(state
, "failed to write MCG entry");
945 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: "
947 if (status
== HERMON_CMD_INVALID_STATUS
) {
948 hermon_fm_ereport(state
, HCA_SYS_ERR
,
951 return (ibc_get_ci_failure(0));
955 * Now that we know all the Hermon firmware accesses have been
956 * successful, we update the current "shadow" MCG entry by
957 * incrementing the "number of attached QPs" count.
959 * We increment only if the QP is not already part of the
960 * MCG by checking the 'qp_found' flag returned
961 * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
967 * Increment the refcnt for this QP. Because the QP
968 * was added to this MCG, the refcnt must be
971 hermon_qp_mcg_refcnt_inc(qp
);
975 * We drop the lock and return success.
977 mutex_exit(&state
->hs_mcglock
);
978 return (DDI_SUCCESS
);
982 * If we've reached here, then we're at the end of the hash chain.
983 * We need to allocate a new MCG entry, fill it in, write it to Hermon,
984 * and update the previous entry to link the new one to the end of the
989 * Allocate an MCG table entry. This will be filled in with all
990 * the necessary parameters to define the multicast group. Then it
991 * will be written to the hardware in the next-to-last step below.
993 status
= hermon_rsrc_alloc(state
, HERMON_MCG
, 1, HERMON_NOSLEEP
, &rsrc
);
994 if (status
!= DDI_SUCCESS
) {
995 mutex_exit(&state
->hs_mcglock
);
996 return (IBT_INSUFF_RESOURCE
);
1000 * Fill in the new entry in the "shadow" MCG list. Note: Just as
1001 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1002 * of the temporary MCG entry (the rest of which will be filled in by
1003 * hermon_mcg_qplist_add() below)
1005 newmcg
= &state
->hs_mcghdl
[rsrc
->hr_indx
];
1006 hermon_mcg_setup_new_hdr(newmcg
, mcg_entry
, gid
, rsrc
);
1009 * Try to add the new QP number to the list. This routine fills in
1010 * the final necessary pieces of the temporary MCG. The
1011 * "mcg_entry_qplist" pointer is used to point to the portion of the
1012 * temporary MCG that holds the QP numbers. If we fail here, we
1013 * must undo the previous resource allocation.
1015 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1016 * found the QP in the list. In this case, the QP is not added on to
1017 * the list again. Check the flag 'qp_found' if this value is needed
1020 status
= hermon_mcg_qplist_add(state
, newmcg
, mcg_entry_qplist
, qp
,
1022 if (status
!= DDI_SUCCESS
) {
1023 bzero(newmcg
, sizeof (struct hermon_sw_mcg_list_s
));
1024 hermon_rsrc_free(state
, &rsrc
);
1025 mutex_exit(&state
->hs_mcglock
);
1028 mcg_entry
->member_cnt
= (newmcg
->mcg_num_qps
+ 1);
1029 /* set the member count */
1032 * Once the temporary MCG has been updated, write the entry into the
1033 * appropriate location in the Hermon MCG entry table. If this is
1034 * successful, then we need to chain the previous entry to this one.
1035 * Note: In general, this operation shouldn't fail. If it does, then
1036 * it is an indication that something (probably in HW, but maybe in
1037 * SW) has gone seriously wrong.
1039 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, rsrc
->hr_indx
,
1040 HERMON_CMD_NOSLEEP_SPIN
);
1041 if (status
!= HERMON_CMD_SUCCESS
) {
1042 bzero(newmcg
, sizeof (struct hermon_sw_mcg_list_s
));
1043 hermon_rsrc_free(state
, &rsrc
);
1044 mutex_exit(&state
->hs_mcglock
);
1045 HERMON_WARNING(state
, "failed to write MCG entry");
1046 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: %08x\n",
1048 if (status
== HERMON_CMD_INVALID_STATUS
) {
1049 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1051 return (ibc_get_ci_failure(0));
1055 * Now read the current MCG entry (the one previously at the end of
1056 * hash chain) into the temporary MCG. We are going to update its
1057 * "next_gid_indx" now and write the entry back to the MCG table.
1058 * Note: In general, this operation shouldn't fail. If it does, then
1059 * it is an indication that something (probably in HW, but maybe in SW)
1060 * has gone seriously wrong. We will free up the MCG entry resource,
1061 * but we will not undo the previously written MCG entry in the HW.
1062 * This is OK, though, because the MCG entry is not currently attached
1063 * to any hash chain.
1065 status
= hermon_read_mgm_cmd_post(state
, mcg_entry
, end_indx
,
1066 HERMON_CMD_NOSLEEP_SPIN
);
1067 if (status
!= HERMON_CMD_SUCCESS
) {
1068 bzero(newmcg
, sizeof (struct hermon_sw_mcg_list_s
));
1069 hermon_rsrc_free(state
, &rsrc
);
1070 mutex_exit(&state
->hs_mcglock
);
1071 HERMON_WARNING(state
, "failed to read MCG entry");
1072 cmn_err(CE_CONT
, "Hermon: READ_MGM command failed: %08x\n",
1074 if (status
== HERMON_CMD_INVALID_STATUS
) {
1075 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1077 return (ibc_get_ci_failure(0));
1081 * Finally, we update the "next_gid_indx" field in the temporary MCG
1082 * and attempt to write the entry back into the Hermon MCG table. If
1083 * this succeeds, then we update the "shadow" list to reflect the
1084 * change, drop the lock, and return success. Note: In general, this
1085 * operation shouldn't fail. If it does, then it is an indication
1086 * that something (probably in HW, but maybe in SW) has gone seriously
1087 * wrong. Just as we do above, we will free up the MCG entry resource,
1088 * but we will not try to undo the previously written MCG entry. This
1089 * is OK, though, because (since we failed here to update the end of
1090 * the chain) that other entry is not currently attached to any chain.
1092 mcg_entry
->next_gid_indx
= rsrc
->hr_indx
;
1093 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, end_indx
,
1094 HERMON_CMD_NOSLEEP_SPIN
);
1095 if (status
!= HERMON_CMD_SUCCESS
) {
1096 bzero(newmcg
, sizeof (struct hermon_sw_mcg_list_s
));
1097 hermon_rsrc_free(state
, &rsrc
);
1098 mutex_exit(&state
->hs_mcglock
);
1099 HERMON_WARNING(state
, "failed to write MCG entry");
1100 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: %08x\n",
1102 if (status
== HERMON_CMD_INVALID_STATUS
) {
1103 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1105 return (ibc_get_ci_failure(0));
1107 mcg
= &state
->hs_mcghdl
[end_indx
];
1108 mcg
->mcg_next_indx
= rsrc
->hr_indx
;
1111 * Now that we know all the Hermon firmware accesses have been
1112 * successful, we update the new "shadow" MCG entry by incrementing
1113 * the "number of attached QPs" count. Then we drop the lock and
1116 newmcg
->mcg_num_qps
++;
1119 * Increment the refcnt for this QP. Because the QP
1120 * was added to this MCG, the refcnt must be
1123 hermon_qp_mcg_refcnt_inc(qp
);
1125 mutex_exit(&state
->hs_mcglock
);
1126 return (DDI_SUCCESS
);
1131 * hermon_mcg_detach()
1132 * Context: Can be called only from user or kernel context.
1135 hermon_mcg_detach(hermon_state_t
*state
, hermon_qphdl_t qp
, ib_gid_t gid
,
1138 hermon_hw_mcg_t
*mcg_entry
;
1139 hermon_hw_mcg_qp_list_t
*mcg_entry_qplist
;
1140 hermon_mcghdl_t mcg
;
1142 uint32_t end_indx
, prev_indx
;
1146 * Check for invalid Multicast DLID. Specifically, all Multicast
1147 * LIDs should be within a well defined range. If the specified LID
1148 * is outside of that range, then return an error.
1150 if (hermon_mlid_is_valid(lid
) == 0) {
1151 return (IBT_MC_MLID_INVALID
);
1155 * Compute the MGID hash value. As described above, the MCG table is
1156 * arranged as a number of separate hash chains. This operation
1157 * converts the specified MGID into the starting index of an entry in
1158 * the hash table (i.e. the index for the start of the appropriate
1159 * hash chain). Subsequent operations below will walk the chain
1160 * searching for a matching entry from which to attempt to remove
1163 status
= hermon_mgid_hash_cmd_post(state
, gid
.gid_prefix
, gid
.gid_guid
,
1164 &mgid_hash
, HERMON_SLEEPFLAG_FOR_CONTEXT());
1165 if (status
!= HERMON_CMD_SUCCESS
) {
1166 cmn_err(CE_CONT
, "Hermon: MGID_HASH command failed: %08x\n",
1168 if (status
== HERMON_CMD_INVALID_STATUS
) {
1169 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1171 return (ibc_get_ci_failure(0));
1175 * Grab the multicast group mutex. Then grab the pre-allocated
1176 * temporary buffer used for holding and/or modifying MCG entries.
1178 mutex_enter(&state
->hs_mcglock
);
1179 mcg_entry
= state
->hs_mcgtmp
;
1180 mcg_entry_qplist
= HERMON_MCG_GET_QPLIST_PTR(mcg_entry
);
1183 * Walk through the array of MCG entries starting at "mgid_hash".
1184 * Try to find an MCG entry with a matching MGID. The
1185 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1186 * index into the MCG table. The entry at this index is checked to
1187 * determine whether it is a match or not. If it is a match, then
1188 * we continue on to attempt to remove the QP from the MCG. If it
1189 * is not a match (or not a valid MCG entry), then we return an error.
1191 end_indx
= hermon_mcg_walk_mgid_hash(state
, mgid_hash
, gid
, &prev_indx
);
1192 mcg
= &state
->hs_mcghdl
[end_indx
];
1195 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1196 * does not match the MGID in the current entry, then return
1197 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1200 if (((mcg
->mcg_mgid_h
== 0) && (mcg
->mcg_mgid_l
== 0)) ||
1201 ((mcg
->mcg_mgid_h
!= gid
.gid_prefix
) ||
1202 (mcg
->mcg_mgid_l
!= gid
.gid_guid
))) {
1203 mutex_exit(&state
->hs_mcglock
);
1204 return (IBT_MC_MGID_INVALID
);
1208 * Read the current MCG entry into the temporary MCG. Note: In
1209 * general, this operation shouldn't fail. If it does, then it is
1210 * an indication that something (probably in HW, but maybe in SW)
1211 * has gone seriously wrong.
1213 status
= hermon_read_mgm_cmd_post(state
, mcg_entry
, end_indx
,
1214 HERMON_CMD_NOSLEEP_SPIN
);
1215 if (status
!= HERMON_CMD_SUCCESS
) {
1216 mutex_exit(&state
->hs_mcglock
);
1217 HERMON_WARNING(state
, "failed to read MCG entry");
1218 cmn_err(CE_CONT
, "Hermon: READ_MGM command failed: %08x\n",
1220 if (status
== HERMON_CMD_INVALID_STATUS
) {
1221 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1223 return (ibc_get_ci_failure(0));
1227 * Search the QP number list for a match. If a match is found, then
1228 * remove the entry from the QP list. Otherwise, if no match is found,
1231 status
= hermon_mcg_qplist_remove(mcg
, mcg_entry_qplist
, qp
);
1232 if (status
!= DDI_SUCCESS
) {
1233 mutex_exit(&state
->hs_mcglock
);
1238 * Decrement the MCG count for this QP. When the 'qp_mcg'
1239 * field becomes 0, then this QP is no longer a member of any
1242 hermon_qp_mcg_refcnt_dec(qp
);
1245 * If the current MCG's QP number list is about to be made empty
1246 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1247 * chain. Otherwise, just write the updated MCG entry back to the
1248 * hardware. In either case, once we successfully update the hardware
1249 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1250 * count (or zero out the entire "shadow" list entry) before returning
1251 * success. Note: Zeroing out the "shadow" list entry is done
1252 * inside of hermon_mcg_hash_list_remove().
1254 if (mcg
->mcg_num_qps
== 1) {
1256 /* Remove an MCG entry from the hash chain */
1257 status
= hermon_mcg_hash_list_remove(state
, end_indx
, prev_indx
,
1259 if (status
!= DDI_SUCCESS
) {
1260 mutex_exit(&state
->hs_mcglock
);
1266 * Write the updated MCG entry back to the Hermon MCG table.
1267 * If this succeeds, then we update the "shadow" list to
1268 * reflect the change (i.e. decrement the "mcg_num_qps"),
1269 * drop the lock, and return success. Note: In general,
1270 * this operation shouldn't fail. If it does, then it is an
1271 * indication that something (probably in HW, but maybe in SW)
1272 * has gone seriously wrong.
1274 mcg_entry
->member_cnt
= (mcg
->mcg_num_qps
- 1);
1275 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, end_indx
,
1276 HERMON_CMD_NOSLEEP_SPIN
);
1277 if (status
!= HERMON_CMD_SUCCESS
) {
1278 mutex_exit(&state
->hs_mcglock
);
1279 HERMON_WARNING(state
, "failed to write MCG entry");
1280 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: "
1282 if (status
== HERMON_CMD_INVALID_STATUS
) {
1283 hermon_fm_ereport(state
, HCA_SYS_ERR
,
1286 return (ibc_get_ci_failure(0));
1291 mutex_exit(&state
->hs_mcglock
);
1292 return (DDI_SUCCESS
);
1296 * hermon_qp_mcg_refcnt_inc()
1297 * Context: Can be called from interrupt or base context.
1300 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp
)
1302 /* Increment the QP's MCG reference count */
1303 mutex_enter(&qp
->qp_lock
);
1304 qp
->qp_mcg_refcnt
++;
1305 mutex_exit(&qp
->qp_lock
);
1310 * hermon_qp_mcg_refcnt_dec()
1311 * Context: Can be called from interrupt or base context.
1314 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp
)
1316 /* Decrement the QP's MCG reference count */
1317 mutex_enter(&qp
->qp_lock
);
1318 qp
->qp_mcg_refcnt
--;
1319 mutex_exit(&qp
->qp_lock
);
1324 * hermon_mcg_qplist_add()
1325 * Context: Can be called from interrupt or base context.
1328 hermon_mcg_qplist_add(hermon_state_t
*state
, hermon_mcghdl_t mcg
,
1329 hermon_hw_mcg_qp_list_t
*mcg_qplist
, hermon_qphdl_t qp
,
1334 ASSERT(MUTEX_HELD(&state
->hs_mcglock
));
1336 qplist_indx
= mcg
->mcg_num_qps
;
1339 * Determine if we have exceeded the maximum number of QP per
1340 * multicast group. If we have, then return an error
1342 if (qplist_indx
>= state
->hs_cfg_profile
->cp_num_qp_per_mcg
) {
1343 return (IBT_HCA_MCG_QP_EXCEEDED
);
1347 * Determine if the QP is already attached to this MCG table. If it
1348 * is, then we break out and treat this operation as a NO-OP
1350 for (qplist_indx
= 0; qplist_indx
< mcg
->mcg_num_qps
;
1352 if (mcg_qplist
[qplist_indx
].qpn
== qp
->qp_qpnum
) {
1358 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1359 * return SUCCESS in this case, but the qplist will not have been
1360 * updated because the QP was already on the list.
1362 if (qplist_indx
< mcg
->mcg_num_qps
) {
1366 * Otherwise, append the new QP number to the end of the
1367 * current QP list. Note: We will increment the "mcg_num_qps"
1368 * field on the "shadow" MCG list entry later (after we know
1369 * that all necessary Hermon firmware accesses have been
1372 * Set 'qp_found' to 0 so we know the QP was added on to the
1375 mcg_qplist
[qplist_indx
].qpn
=
1376 (qp
->qp_qpnum
| HERMON_MCG_QPN_BLOCK_LB
);
1380 return (DDI_SUCCESS
);
1386 * hermon_mcg_qplist_remove()
1387 * Context: Can be called from interrupt or base context.
1390 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg
,
1391 hermon_hw_mcg_qp_list_t
*mcg_qplist
, hermon_qphdl_t qp
)
1393 uint_t i
, qplist_indx
;
1396 * Search the MCG QP list for a matching QPN. When
1397 * it's found, we swap the last entry with the current
1398 * one, set the last entry to zero, decrement the last
1399 * entry, and return. If it's not found, then it's
1402 qplist_indx
= mcg
->mcg_num_qps
;
1403 for (i
= 0; i
< qplist_indx
; i
++) {
1404 if (mcg_qplist
[i
].qpn
== qp
->qp_qpnum
) {
1405 mcg_qplist
[i
] = mcg_qplist
[qplist_indx
- 1];
1406 mcg_qplist
[qplist_indx
- 1].qpn
= 0;
1408 return (DDI_SUCCESS
);
1412 return (IBT_QP_HDL_INVALID
);
1417 * hermon_mcg_walk_mgid_hash()
1418 * Context: Can be called from interrupt or base context.
1421 hermon_mcg_walk_mgid_hash(hermon_state_t
*state
, uint64_t start_indx
,
1422 ib_gid_t mgid
, uint_t
*p_indx
)
1424 hermon_mcghdl_t curr_mcghdl
;
1425 uint_t curr_indx
, prev_indx
;
1427 ASSERT(MUTEX_HELD(&state
->hs_mcglock
));
1429 /* Start at the head of the hash chain */
1430 curr_indx
= (uint_t
)start_indx
;
1431 prev_indx
= curr_indx
;
1432 curr_mcghdl
= &state
->hs_mcghdl
[curr_indx
];
1434 /* If the first entry in the chain has MGID == 0, then stop */
1435 if ((curr_mcghdl
->mcg_mgid_h
== 0) &&
1436 (curr_mcghdl
->mcg_mgid_l
== 0)) {
1437 goto end_mgid_hash_walk
;
1440 /* If the first entry in the chain matches the MGID, then stop */
1441 if ((curr_mcghdl
->mcg_mgid_h
== mgid
.gid_prefix
) &&
1442 (curr_mcghdl
->mcg_mgid_l
== mgid
.gid_guid
)) {
1443 goto end_mgid_hash_walk
;
1446 /* Otherwise, walk the hash chain looking for a match */
1447 while (curr_mcghdl
->mcg_next_indx
!= 0) {
1448 prev_indx
= curr_indx
;
1449 curr_indx
= curr_mcghdl
->mcg_next_indx
;
1450 curr_mcghdl
= &state
->hs_mcghdl
[curr_indx
];
1452 if ((curr_mcghdl
->mcg_mgid_h
== mgid
.gid_prefix
) &&
1453 (curr_mcghdl
->mcg_mgid_l
== mgid
.gid_guid
)) {
1460 * If necessary, return the index of the previous entry too. This
1461 * is primarily used for detaching a QP from a multicast group. It
1462 * may be necessary, in that case, to delete an MCG entry from the
1463 * hash chain and having the index of the previous entry is helpful.
1465 if (p_indx
!= NULL
) {
1466 *p_indx
= prev_indx
;
1473 * hermon_mcg_setup_new_hdr()
1474 * Context: Can be called from interrupt or base context.
1477 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg
, hermon_hw_mcg_t
*mcg_hdr
,
1478 ib_gid_t mgid
, hermon_rsrc_t
*mcg_rsrc
)
1481 * Fill in the fields of the "shadow" entry used by software
1482 * to track MCG hardware entry
1484 mcg
->mcg_mgid_h
= mgid
.gid_prefix
;
1485 mcg
->mcg_mgid_l
= mgid
.gid_guid
;
1486 mcg
->mcg_rsrcp
= mcg_rsrc
;
1487 mcg
->mcg_next_indx
= 0;
1488 mcg
->mcg_num_qps
= 0;
1491 * Fill the header fields of the MCG entry (in the temporary copy)
1493 mcg_hdr
->mgid_h
= mgid
.gid_prefix
;
1494 mcg_hdr
->mgid_l
= mgid
.gid_guid
;
1495 mcg_hdr
->next_gid_indx
= 0;
1500 * hermon_mcg_hash_list_remove()
1501 * Context: Can be called only from user or kernel context.
1504 hermon_mcg_hash_list_remove(hermon_state_t
*state
, uint_t curr_indx
,
1505 uint_t prev_indx
, hermon_hw_mcg_t
*mcg_entry
)
1507 hermon_mcghdl_t curr_mcg
, prev_mcg
, next_mcg
;
1511 /* Get the pointer to "shadow" list for current entry */
1512 curr_mcg
= &state
->hs_mcghdl
[curr_indx
];
1515 * If this is the first entry on a hash chain, then attempt to replace
1516 * the entry with the next entry on the chain. If there are no
1517 * subsequent entries on the chain, then this is the only entry and
1518 * should be invalidated.
1520 if (curr_indx
== prev_indx
) {
1523 * If this is the only entry on the chain, then invalidate it.
1524 * Note: Invalidating an MCG entry means writing all zeros
1525 * to the entry. This is only necessary for those MCG
1526 * entries that are the "head" entries of the individual hash
1527 * chains. Regardless of whether this operation returns
1528 * success or failure, return that result to the caller.
1530 next_indx
= curr_mcg
->mcg_next_indx
;
1531 if (next_indx
== 0) {
1532 status
= hermon_mcg_entry_invalidate(state
, mcg_entry
,
1534 bzero(curr_mcg
, sizeof (struct hermon_sw_mcg_list_s
));
1539 * Otherwise, this is just the first entry on the chain, so
1542 next_mcg
= &state
->hs_mcghdl
[next_indx
];
1545 * Read the next MCG entry into the temporary MCG. Note:
1546 * In general, this operation shouldn't fail. If it does,
1547 * then it is an indication that something (probably in HW,
1548 * but maybe in SW) has gone seriously wrong.
1550 status
= hermon_read_mgm_cmd_post(state
, mcg_entry
, next_indx
,
1551 HERMON_CMD_NOSLEEP_SPIN
);
1552 if (status
!= HERMON_CMD_SUCCESS
) {
1553 HERMON_WARNING(state
, "failed to read MCG entry");
1554 cmn_err(CE_CONT
, "Hermon: READ_MGM command failed: "
1556 if (status
== HERMON_CMD_INVALID_STATUS
) {
1557 hermon_fm_ereport(state
, HCA_SYS_ERR
,
1560 return (ibc_get_ci_failure(0));
1564 * Copy/Write the temporary MCG back to the hardware MCG list
1565 * using the current index. This essentially removes the
1566 * current MCG entry from the list by writing over it with
1567 * the next one. If this is successful, then we can do the
1568 * same operation for the "shadow" list. And we can also
1569 * free up the Hermon MCG entry resource that was associated
1570 * with the (old) next entry. Note: In general, this
1571 * operation shouldn't fail. If it does, then it is an
1572 * indication that something (probably in HW, but maybe in SW)
1573 * has gone seriously wrong.
1575 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, curr_indx
,
1576 HERMON_CMD_NOSLEEP_SPIN
);
1577 if (status
!= HERMON_CMD_SUCCESS
) {
1578 HERMON_WARNING(state
, "failed to write MCG entry");
1579 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: "
1581 if (status
== HERMON_CMD_INVALID_STATUS
) {
1582 hermon_fm_ereport(state
, HCA_SYS_ERR
,
1585 return (ibc_get_ci_failure(0));
1589 * Copy all the software tracking information from the next
1590 * entry on the "shadow" MCG list into the current entry on
1591 * the list. Then invalidate (zero out) the other "shadow"
1594 bcopy(next_mcg
, curr_mcg
, sizeof (struct hermon_sw_mcg_list_s
));
1595 bzero(next_mcg
, sizeof (struct hermon_sw_mcg_list_s
));
1598 * Free up the Hermon MCG entry resource used by the "next"
1599 * MCG entry. That resource is no longer needed by any
1600 * MCG entry which is first on a hash chain (like the "next"
1601 * entry has just become).
1603 hermon_rsrc_free(state
, &curr_mcg
->mcg_rsrcp
);
1605 return (DDI_SUCCESS
);
1609 * Else if this is the last entry on the hash chain (or a middle
1610 * entry, then we update the previous entry's "next_gid_index" field
1611 * to make it point instead to the next entry on the chain. By
1612 * skipping over the removed entry in this way, we can then free up
1613 * any resources associated with the current entry. Note: We don't
1614 * need to invalidate the "skipped over" hardware entry because it
1615 * will no be longer connected to any hash chains, and if/when it is
1616 * finally re-used, it will be written with entirely new values.
1620 * Read the next MCG entry into the temporary MCG. Note: In general,
1621 * this operation shouldn't fail. If it does, then it is an
1622 * indication that something (probably in HW, but maybe in SW) has
1623 * gone seriously wrong.
1625 status
= hermon_read_mgm_cmd_post(state
, mcg_entry
, prev_indx
,
1626 HERMON_CMD_NOSLEEP_SPIN
);
1627 if (status
!= HERMON_CMD_SUCCESS
) {
1628 HERMON_WARNING(state
, "failed to read MCG entry");
1629 cmn_err(CE_CONT
, "Hermon: READ_MGM command failed: %08x\n",
1631 if (status
== HERMON_CMD_INVALID_STATUS
) {
1632 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1634 return (ibc_get_ci_failure(0));
1638 * Finally, we update the "next_gid_indx" field in the temporary MCG
1639 * and attempt to write the entry back into the Hermon MCG table. If
1640 * this succeeds, then we update the "shadow" list to reflect the
1641 * change, free up the Hermon MCG entry resource that was associated
1642 * with the current entry, and return success. Note: In general,
1643 * this operation shouldn't fail. If it does, then it is an indication
1644 * that something (probably in HW, but maybe in SW) has gone seriously
1647 mcg_entry
->next_gid_indx
= curr_mcg
->mcg_next_indx
;
1648 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, prev_indx
,
1649 HERMON_CMD_NOSLEEP_SPIN
);
1650 if (status
!= HERMON_CMD_SUCCESS
) {
1651 HERMON_WARNING(state
, "failed to write MCG entry");
1652 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: %08x\n",
1654 if (status
== HERMON_CMD_INVALID_STATUS
) {
1655 hermon_fm_ereport(state
, HCA_SYS_ERR
,
1658 return (ibc_get_ci_failure(0));
1662 * Get the pointer to the "shadow" MCG list entry for the previous
1663 * MCG. Update its "mcg_next_indx" to point to the next entry
1664 * the one after the current entry. Note: This next index may be
1665 * zero, indicating the end of the list.
1667 prev_mcg
= &state
->hs_mcghdl
[prev_indx
];
1668 prev_mcg
->mcg_next_indx
= curr_mcg
->mcg_next_indx
;
1671 * Free up the Hermon MCG entry resource used by the current entry.
1672 * This resource is no longer needed because the chain now skips over
1673 * the current entry. Then invalidate (zero out) the current "shadow"
1676 hermon_rsrc_free(state
, &curr_mcg
->mcg_rsrcp
);
1677 bzero(curr_mcg
, sizeof (struct hermon_sw_mcg_list_s
));
1679 return (DDI_SUCCESS
);
1684 * hermon_mcg_entry_invalidate()
1685 * Context: Can be called only from user or kernel context.
1688 hermon_mcg_entry_invalidate(hermon_state_t
*state
, hermon_hw_mcg_t
*mcg_entry
,
1694 * Invalidate the hardware MCG entry by zeroing out this temporary
1695 * MCG and writing it the the hardware. Note: In general, this
1696 * operation shouldn't fail. If it does, then it is an indication
1697 * that something (probably in HW, but maybe in SW) has gone seriously
1700 bzero(mcg_entry
, HERMON_MCGMEM_SZ(state
));
1701 status
= hermon_write_mgm_cmd_post(state
, mcg_entry
, indx
,
1702 HERMON_CMD_NOSLEEP_SPIN
);
1703 if (status
!= HERMON_CMD_SUCCESS
) {
1704 HERMON_WARNING(state
, "failed to write MCG entry");
1705 cmn_err(CE_CONT
, "Hermon: WRITE_MGM command failed: %08x\n",
1707 if (status
== HERMON_CMD_INVALID_STATUS
) {
1708 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1710 return (ibc_get_ci_failure(0));
1713 return (DDI_SUCCESS
);
1718 * hermon_mgid_is_valid()
1719 * Context: Can be called from interrupt or base context.
1722 hermon_mgid_is_valid(ib_gid_t gid
)
1724 uint_t topbits
, flags
, scope
;
1727 * According to IBA 1.1 specification (section 4.1.1) a valid
1728 * "multicast GID" must have its top eight bits set to all ones
1730 topbits
= (gid
.gid_prefix
>> HERMON_MCG_TOPBITS_SHIFT
) &
1731 HERMON_MCG_TOPBITS_MASK
;
1732 if (topbits
!= HERMON_MCG_TOPBITS
) {
1737 * The next 4 bits are the "flag" bits. These are valid only
1738 * if they are "0" (which correspond to permanently assigned/
1739 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1740 * multicast GIDs). All other values are reserved.
1742 flags
= (gid
.gid_prefix
>> HERMON_MCG_FLAGS_SHIFT
) &
1743 HERMON_MCG_FLAGS_MASK
;
1744 if (!((flags
== HERMON_MCG_FLAGS_PERM
) ||
1745 (flags
== HERMON_MCG_FLAGS_NONPERM
))) {
1750 * The next 4 bits are the "scope" bits. These are valid only
1751 * if they are "2" (Link-local), "5" (Site-local), "8"
1752 * (Organization-local) or "E" (Global). All other values
1753 * are reserved (or currently unassigned).
1755 scope
= (gid
.gid_prefix
>> HERMON_MCG_SCOPE_SHIFT
) &
1756 HERMON_MCG_SCOPE_MASK
;
1757 if (!((scope
== HERMON_MCG_SCOPE_LINKLOC
) ||
1758 (scope
== HERMON_MCG_SCOPE_SITELOC
) ||
1759 (scope
== HERMON_MCG_SCOPE_ORGLOC
) ||
1760 (scope
== HERMON_MCG_SCOPE_GLOBAL
))) {
1765 * If it passes all of the above checks, then we will consider it
1766 * a valid multicast GID.
1773 * hermon_mlid_is_valid()
1774 * Context: Can be called from interrupt or base context.
1777 hermon_mlid_is_valid(ib_lid_t lid
)
1780 * According to IBA 1.1 specification (section 4.1.1) a valid
1781 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1783 if ((lid
< IB_LID_MC_FIRST
) || (lid
> IB_LID_MC_LAST
)) {
1793 * Context: Can be called only from user or kernel context.
1796 hermon_pd_alloc(hermon_state_t
*state
, hermon_pdhdl_t
*pdhdl
, uint_t sleepflag
)
1798 hermon_rsrc_t
*rsrc
;
1803 * Allocate the software structure for tracking the protection domain
1804 * (i.e. the Hermon Protection Domain handle). By default each PD
1805 * structure will have a unique PD number assigned to it. All that
1806 * is necessary is for software to initialize the PD reference count
1807 * (to zero) and return success.
1809 status
= hermon_rsrc_alloc(state
, HERMON_PDHDL
, 1, sleepflag
, &rsrc
);
1810 if (status
!= DDI_SUCCESS
) {
1811 return (IBT_INSUFF_RESOURCE
);
1813 pd
= (hermon_pdhdl_t
)rsrc
->hr_addr
;
1814 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd
))
1819 return (DDI_SUCCESS
);
1825 * Context: Can be called only from user or kernel context.
1828 hermon_pd_free(hermon_state_t
*state
, hermon_pdhdl_t
*pdhdl
)
1830 hermon_rsrc_t
*rsrc
;
1834 * Pull all the necessary information from the Hermon Protection Domain
1835 * handle. This is necessary here because the resource for the
1836 * PD is going to be freed up as part of this operation.
1839 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd
))
1840 rsrc
= pd
->pd_rsrcp
;
1843 * Check the PD reference count. If the reference count is non-zero,
1844 * then it means that this protection domain is still referenced by
1845 * some memory region, queue pair, address handle, or other IB object
1846 * If it is non-zero, then return an error. Otherwise, free the
1847 * Hermon resource and return success.
1849 if (pd
->pd_refcnt
!= 0) {
1850 return (IBT_PD_IN_USE
);
1853 /* Free the Hermon Protection Domain handle */
1854 hermon_rsrc_free(state
, &rsrc
);
1856 /* Set the pdhdl pointer to NULL and return success */
1857 *pdhdl
= (hermon_pdhdl_t
)NULL
;
1859 return (DDI_SUCCESS
);
1864 * hermon_pd_refcnt_inc()
1865 * Context: Can be called from interrupt or base context.
1868 hermon_pd_refcnt_inc(hermon_pdhdl_t pd
)
1870 /* Increment the protection domain's reference count */
1871 atomic_inc_32(&pd
->pd_refcnt
);
1876 * hermon_pd_refcnt_dec()
1877 * Context: Can be called from interrupt or base context.
1880 hermon_pd_refcnt_dec(hermon_pdhdl_t pd
)
1882 /* Decrement the protection domain's reference count */
1883 atomic_dec_32(&pd
->pd_refcnt
);
1888 * hermon_port_query()
1889 * Context: Can be called only from user or kernel context.
1892 hermon_port_query(hermon_state_t
*state
, uint_t port
, ibt_hca_portinfo_t
*pi
)
1894 sm_portinfo_t portinfo
;
1895 sm_guidinfo_t guidinfo
;
1896 sm_pkey_table_t pkeytable
;
1898 uint_t sgid_max
, pkey_max
, tbl_size
;
1899 int i
, j
, indx
, status
;
1903 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi
))
1904 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state
))
1906 /* Validate that specified port number is legal */
1907 if (!hermon_portnum_is_valid(state
, port
)) {
1908 return (IBT_HCA_PORT_INVALID
);
1910 pkeyp
= state
->hs_pkey
[port
- 1];
1911 guidp
= state
->hs_guid
[port
- 1];
1914 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1915 * to the firmware (for the specified port number). This returns
1916 * a full PortInfo MAD (in "portinfo") which we subsequently
1917 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1920 status
= hermon_getportinfo_cmd_post(state
, port
,
1921 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo
);
1922 if (status
!= HERMON_CMD_SUCCESS
) {
1923 cmn_err(CE_CONT
, "Hermon: GetPortInfo (port %02d) command "
1924 "failed: %08x\n", port
, status
);
1925 if (status
== HERMON_CMD_INVALID_STATUS
) {
1926 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
1928 return (ibc_get_ci_failure(0));
1932 * Parse the PortInfo MAD and fill in the IBTF structure
1934 pi
->p_base_lid
= portinfo
.LID
;
1935 pi
->p_qkey_violations
= portinfo
.Q_KeyViolations
;
1936 pi
->p_pkey_violations
= portinfo
.P_KeyViolations
;
1937 pi
->p_sm_sl
= portinfo
.MasterSMSL
;
1938 pi
->p_sm_lid
= portinfo
.MasterSMLID
;
1939 pi
->p_linkstate
= portinfo
.PortState
;
1940 pi
->p_port_num
= portinfo
.LocalPortNum
;
1941 pi
->p_phys_state
= portinfo
.PortPhysicalState
;
1942 pi
->p_width_supported
= portinfo
.LinkWidthSupported
;
1943 pi
->p_width_enabled
= portinfo
.LinkWidthEnabled
;
1944 pi
->p_width_active
= portinfo
.LinkWidthActive
;
1945 pi
->p_speed_supported
= portinfo
.LinkSpeedSupported
;
1946 pi
->p_speed_enabled
= portinfo
.LinkSpeedEnabled
;
1947 pi
->p_speed_active
= portinfo
.LinkSpeedActive
;
1948 pi
->p_mtu
= portinfo
.MTUCap
;
1949 pi
->p_lmc
= portinfo
.LMC
;
1950 pi
->p_max_vl
= portinfo
.VLCap
;
1951 pi
->p_subnet_timeout
= portinfo
.SubnetTimeOut
;
1952 pi
->p_msg_sz
= ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ
);
1953 tbl_size
= state
->hs_cfg_profile
->cp_log_max_gidtbl
;
1954 pi
->p_sgid_tbl_sz
= (1 << tbl_size
);
1955 tbl_size
= state
->hs_cfg_profile
->cp_log_max_pkeytbl
;
1956 pi
->p_pkey_tbl_sz
= (1 << tbl_size
);
1957 state
->hs_sn_prefix
[port
- 1] = portinfo
.GidPrefix
;
1960 * Convert InfiniBand-defined port capability flags to the format
1961 * specified by the IBTF
1963 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_SM
)
1964 pi
->p_capabilities
|= IBT_PORT_CAP_SM
;
1965 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_SM_DISABLED
)
1966 pi
->p_capabilities
|= IBT_PORT_CAP_SM_DISABLED
;
1967 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_SNMP_SUPPD
)
1968 pi
->p_capabilities
|= IBT_PORT_CAP_SNMP_TUNNEL
;
1969 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_DM_SUPPD
)
1970 pi
->p_capabilities
|= IBT_PORT_CAP_DM
;
1971 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_VM_SUPPD
)
1972 pi
->p_capabilities
|= IBT_PORT_CAP_VENDOR
;
1973 if (portinfo
.CapabilityMask
& SM_CAP_MASK_IS_CLNT_REREG_SUPPD
)
1974 pi
->p_capabilities
|= IBT_PORT_CAP_CLNT_REREG
;
1977 * Fill in the SGID table. Since the only access to the Hermon
1978 * GID tables is through the firmware's MAD_IFC interface, we
1979 * post as many GetGUIDInfo MADs as necessary to read in the entire
1980 * contents of the SGID table (for the specified port). Note: The
1981 * GetGUIDInfo command only gets eight GUIDs per operation. These
1982 * GUIDs are then appended to the GID prefix for the port (from the
1983 * GetPortInfo above) to form the entire SGID table.
1985 for (i
= 0; i
< pi
->p_sgid_tbl_sz
; i
+= 8) {
1986 status
= hermon_getguidinfo_cmd_post(state
, port
, i
>> 3,
1987 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo
);
1988 if (status
!= HERMON_CMD_SUCCESS
) {
1989 cmn_err(CE_CONT
, "Hermon: GetGUIDInfo (port %02d) "
1990 "command failed: %08x\n", port
, status
);
1991 if (status
== HERMON_CMD_INVALID_STATUS
) {
1992 hermon_fm_ereport(state
, HCA_SYS_ERR
,
1995 return (ibc_get_ci_failure(0));
1998 /* Figure out how many of the entries are valid */
1999 sgid_max
= min((pi
->p_sgid_tbl_sz
- i
), 8);
2000 for (j
= 0; j
< sgid_max
; j
++) {
2002 sgid
= &pi
->p_sgid_tbl
[indx
];
2003 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid
))
2004 sgid
->gid_prefix
= portinfo
.GidPrefix
;
2005 guidp
[indx
] = sgid
->gid_guid
=
2006 guidinfo
.GUIDBlocks
[j
];
2011 * Fill in the PKey table. Just as for the GID tables above, the
2012 * only access to the Hermon PKey tables is through the firmware's
2013 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
2014 * to read in the entire contents of the PKey table (for the specified
2015 * port). Note: The GetPKeyTable command only gets 32 PKeys per
2018 for (i
= 0; i
< pi
->p_pkey_tbl_sz
; i
+= 32) {
2019 status
= hermon_getpkeytable_cmd_post(state
, port
, i
,
2020 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable
);
2021 if (status
!= HERMON_CMD_SUCCESS
) {
2022 cmn_err(CE_CONT
, "Hermon: GetPKeyTable (port %02d) "
2023 "command failed: %08x\n", port
, status
);
2024 if (status
== HERMON_CMD_INVALID_STATUS
) {
2025 hermon_fm_ereport(state
, HCA_SYS_ERR
,
2028 return (ibc_get_ci_failure(0));
2031 /* Figure out how many of the entries are valid */
2032 pkey_max
= min((pi
->p_pkey_tbl_sz
- i
), 32);
2033 for (j
= 0; j
< pkey_max
; j
++) {
2035 pkeyp
[indx
] = pi
->p_pkey_tbl
[indx
] =
2036 pkeytable
.P_KeyTableBlocks
[j
];
2040 return (DDI_SUCCESS
);
2045 * hermon_port_modify()
2046 * Context: Can be called only from user or kernel context.
2050 hermon_port_modify(hermon_state_t
*state
, uint8_t port
,
2051 ibt_port_modify_flags_t flags
, uint8_t init_type
)
2053 sm_portinfo_t portinfo
;
2056 hermon_hw_set_port_t set_port
;
2059 * Return an error if either of the unsupported flags are set
2061 if ((flags
& IBT_PORT_SHUTDOWN
) ||
2062 (flags
& IBT_PORT_SET_INIT_TYPE
)) {
2063 return (IBT_NOT_SUPPORTED
);
2066 bzero(&set_port
, sizeof (set_port
));
2069 * Determine whether we are trying to reset the QKey counter
2071 if (flags
& IBT_PORT_RESET_QKEY
)
2074 /* Validate that specified port number is legal */
2075 if (!hermon_portnum_is_valid(state
, port
)) {
2076 return (IBT_HCA_PORT_INVALID
);
2080 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2081 * firmware (for the specified port number). This returns a full
2082 * PortInfo MAD (in "portinfo") from which we pull the current
2083 * capability mask. We then modify the capability mask as directed
2084 * by the "pmod_flags" field, and write the updated capability mask
2085 * using the Hermon SET_IB command (below).
2087 status
= hermon_getportinfo_cmd_post(state
, port
,
2088 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo
);
2089 if (status
!= HERMON_CMD_SUCCESS
) {
2090 if (status
== HERMON_CMD_INVALID_STATUS
) {
2091 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
2093 return (ibc_get_ci_failure(0));
2097 * Convert InfiniBand-defined port capability flags to the format
2098 * specified by the IBTF. Specifically, we modify the capability
2099 * mask based on the specified values.
2101 capmask
= portinfo
.CapabilityMask
;
2103 if (flags
& IBT_PORT_RESET_SM
)
2104 capmask
&= ~SM_CAP_MASK_IS_SM
;
2105 else if (flags
& IBT_PORT_SET_SM
)
2106 capmask
|= SM_CAP_MASK_IS_SM
;
2108 if (flags
& IBT_PORT_RESET_SNMP
)
2109 capmask
&= ~SM_CAP_MASK_IS_SNMP_SUPPD
;
2110 else if (flags
& IBT_PORT_SET_SNMP
)
2111 capmask
|= SM_CAP_MASK_IS_SNMP_SUPPD
;
2113 if (flags
& IBT_PORT_RESET_DEVMGT
)
2114 capmask
&= ~SM_CAP_MASK_IS_DM_SUPPD
;
2115 else if (flags
& IBT_PORT_SET_DEVMGT
)
2116 capmask
|= SM_CAP_MASK_IS_DM_SUPPD
;
2118 if (flags
& IBT_PORT_RESET_VENDOR
)
2119 capmask
&= ~SM_CAP_MASK_IS_VM_SUPPD
;
2120 else if (flags
& IBT_PORT_SET_VENDOR
)
2121 capmask
|= SM_CAP_MASK_IS_VM_SUPPD
;
2123 set_port
.cap_mask
= capmask
;
2126 * Use the Hermon SET_PORT command to update the capability mask and
2127 * (possibly) reset the QKey violation counter for the specified port.
2128 * Note: In general, this operation shouldn't fail. If it does, then
2129 * it is an indication that something (probably in HW, but maybe in
2130 * SW) has gone seriously wrong.
2132 status
= hermon_set_port_cmd_post(state
, &set_port
, port
,
2133 HERMON_SLEEPFLAG_FOR_CONTEXT());
2134 if (status
!= HERMON_CMD_SUCCESS
) {
2135 HERMON_WARNING(state
, "failed to modify port capabilities");
2136 cmn_err(CE_CONT
, "Hermon: SET_IB (port %02d) command failed: "
2137 "%08x\n", port
, status
);
2138 if (status
== HERMON_CMD_INVALID_STATUS
) {
2139 hermon_fm_ereport(state
, HCA_SYS_ERR
, HCA_ERR_SRV_LOST
);
2141 return (ibc_get_ci_failure(0));
2144 return (DDI_SUCCESS
);
2149 * hermon_set_addr_path()
2150 * Context: Can be called from interrupt or base context.
2152 * Note: This routine is used for two purposes. It is used to fill in the
2153 * Hermon UDAV fields, and it is used to fill in the address path information
2154 * for QPs. Because the two Hermon structures are similar, common fields can
2155 * be filled in here. Because they are different, however, we pass
2156 * an additional flag to indicate which type is being filled and do each one
2160 int hermon_srate_override
= -1; /* allows ease of testing */
2163 hermon_set_addr_path(hermon_state_t
*state
, ibt_adds_vect_t
*av
,
2164 hermon_hw_addr_path_t
*path
, uint_t type
)
2167 hermon_hw_udav_t
*udav
;
2169 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av
))
2170 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path
))
2172 udav
= (hermon_hw_udav_t
*)(void *)path
;
2173 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav
))
2174 path
->mlid
= av
->av_src_path
;
2175 path
->rlid
= av
->av_dlid
;
2177 switch (av
->av_srate
) {
2178 case IBT_SRATE_2
: /* 1xSDR-2.5Gb/s injection rate */
2179 path
->max_stat_rate
= 7; break;
2180 case IBT_SRATE_10
: /* 4xSDR-10.0Gb/s injection rate */
2181 path
->max_stat_rate
= 8; break;
2182 case IBT_SRATE_30
: /* 12xSDR-30Gb/s injection rate */
2183 path
->max_stat_rate
= 9; break;
2184 case IBT_SRATE_5
: /* 1xDDR-5Gb/s injection rate */
2185 path
->max_stat_rate
= 10; break;
2186 case IBT_SRATE_20
: /* 4xDDR-20Gb/s injection rate */
2187 path
->max_stat_rate
= 11; break;
2188 case IBT_SRATE_40
: /* 4xQDR-40Gb/s injection rate */
2189 path
->max_stat_rate
= 12; break;
2190 case IBT_SRATE_60
: /* 12xDDR-60Gb/s injection rate */
2191 path
->max_stat_rate
= 13; break;
2192 case IBT_SRATE_80
: /* 8xQDR-80Gb/s injection rate */
2193 path
->max_stat_rate
= 14; break;
2194 case IBT_SRATE_120
: /* 12xQDR-120Gb/s injection rate */
2195 path
->max_stat_rate
= 15; break;
2196 case IBT_SRATE_NOT_SPECIFIED
: /* Max */
2197 path
->max_stat_rate
= 0; break;
2199 return (IBT_STATIC_RATE_INVALID
);
2201 if (hermon_srate_override
!= -1) /* for evaluating HCA firmware */
2202 path
->max_stat_rate
= hermon_srate_override
;
2204 /* If "grh" flag is set, then check for valid SGID index too */
2205 gidtbl_sz
= (1 << state
->hs_queryport
.log_max_gid
);
2206 if ((av
->av_send_grh
) && (av
->av_sgid_ix
> gidtbl_sz
)) {
2207 return (IBT_SGID_INVALID
);
2211 * Fill in all "global" values regardless of the value in the GRH
2212 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2213 * hardware will ignore the other "global" values as necessary. Note:
2214 * SW does this here to enable later query operations to return
2215 * exactly the same params that were passed when the addr path was
2218 path
->grh
= av
->av_send_grh
;
2219 if (type
== HERMON_ADDRPATH_QP
) {
2220 path
->mgid_index
= av
->av_sgid_ix
;
2223 * For Hermon UDAV, the "mgid_index" field is the index into
2224 * a combined table (not a per-port table), but having sections
2225 * for each port. So some extra calculations are necessary.
2228 path
->mgid_index
= ((av
->av_port_num
- 1) * gidtbl_sz
) +
2231 udav
->portnum
= av
->av_port_num
;
2235 * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2236 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2237 * only need to do it for UDAV's. So we enforce that here.
2239 * NOTE: The entire 64 bits worth of GUID info is actually being
2240 * preserved (for UDAVs) by the callers of this function
2241 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2242 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2245 if ((path
->grh
) || (type
== HERMON_ADDRPATH_QP
)) {
2246 path
->flow_label
= av
->av_flow
;
2247 path
->tclass
= av
->av_tclass
;
2248 path
->hop_limit
= av
->av_hop
;
2249 bcopy(&(av
->av_dgid
.gid_prefix
), &(path
->rgid_h
),
2251 bcopy(&(av
->av_dgid
.gid_guid
), &(path
->rgid_l
),
2255 path
->flow_label
= 0;
2257 path
->hop_limit
= 0;
2260 /* extract the default service level */
2261 udav
->sl
= (HERMON_DEF_SCHED_SELECTION
& 0x3C) >> 2;
2263 return (DDI_SUCCESS
);
2268 * hermon_get_addr_path()
2269 * Context: Can be called from interrupt or base context.
2271 * Note: Just like hermon_set_addr_path() above, this routine is used for two
2272 * purposes. It is used to read in the Hermon UDAV fields, and it is used to
2273 * read in the address path information for QPs. Because the two Hermon
2274 * structures are similar, common fields can be read in here. But because
2275 * they are slightly different, we pass an additional flag to indicate which
2276 * type is being read.
2279 hermon_get_addr_path(hermon_state_t
*state
, hermon_hw_addr_path_t
*path
,
2280 ibt_adds_vect_t
*av
, uint_t type
)
2284 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path
))
2285 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av
))
2287 av
->av_src_path
= path
->mlid
;
2288 av
->av_dlid
= path
->rlid
;
2290 /* Set "av_ipd" value from max_stat_rate */
2291 switch (path
->max_stat_rate
) {
2292 case 7: /* 1xSDR-2.5Gb/s injection rate */
2293 av
->av_srate
= IBT_SRATE_2
; break;
2294 case 8: /* 4xSDR-10.0Gb/s injection rate */
2295 av
->av_srate
= IBT_SRATE_10
; break;
2296 case 9: /* 12xSDR-30Gb/s injection rate */
2297 av
->av_srate
= IBT_SRATE_30
; break;
2298 case 10: /* 1xDDR-5Gb/s injection rate */
2299 av
->av_srate
= IBT_SRATE_5
; break;
2300 case 11: /* 4xDDR-20Gb/s injection rate */
2301 av
->av_srate
= IBT_SRATE_20
; break;
2302 case 12: /* xQDR-40Gb/s injection rate */
2303 av
->av_srate
= IBT_SRATE_40
; break;
2304 case 13: /* 12xDDR-60Gb/s injection rate */
2305 av
->av_srate
= IBT_SRATE_60
; break;
2306 case 14: /* 8xQDR-80Gb/s injection rate */
2307 av
->av_srate
= IBT_SRATE_80
; break;
2308 case 15: /* 12xQDR-120Gb/s injection rate */
2309 av
->av_srate
= IBT_SRATE_120
; break;
2311 av
->av_srate
= IBT_SRATE_NOT_SPECIFIED
; break;
2312 default: /* 1x injection rate */
2313 av
->av_srate
= IBT_SRATE_1X
;
2317 * Extract all "global" values regardless of the value in the GRH
2318 * flag. Because "av_send_grh" is set only if "grh" is set, software
2319 * knows to ignore the other "global" values as necessary. Note: SW
2320 * does it this way to enable these query operations to return exactly
2321 * the same params that were passed when the addr path was last written.
2323 av
->av_send_grh
= path
->grh
;
2324 if (type
== HERMON_ADDRPATH_QP
) {
2325 av
->av_sgid_ix
= path
->mgid_index
;
2328 * For Hermon UDAV, the "mgid_index" field is the index into
2329 * a combined table (not a per-port table).
2331 gidtbl_sz
= (1 << state
->hs_queryport
.log_max_gid
);
2332 av
->av_sgid_ix
= path
->mgid_index
- ((av
->av_port_num
- 1) *
2335 av
->av_port_num
= ((hermon_hw_udav_t
*)(void *)path
)->portnum
;
2337 av
->av_flow
= path
->flow_label
;
2338 av
->av_tclass
= path
->tclass
;
2339 av
->av_hop
= path
->hop_limit
;
2340 /* this is for alignment issue w/ the addr path struct in Hermon */
2341 bcopy(&(path
->rgid_h
), &(av
->av_dgid
.gid_prefix
), sizeof (uint64_t));
2342 bcopy(&(path
->rgid_l
), &(av
->av_dgid
.gid_guid
), sizeof (uint64_t));
2347 * hermon_portnum_is_valid()
2348 * Context: Can be called from interrupt or base context.
2351 hermon_portnum_is_valid(hermon_state_t
*state
, uint_t portnum
)
2355 max_port
= state
->hs_cfg_profile
->cp_num_ports
;
2356 if ((portnum
<= max_port
) && (portnum
!= 0)) {
2365 * hermon_pkeyindex_is_valid()
2366 * Context: Can be called from interrupt or base context.
2369 hermon_pkeyindex_is_valid(hermon_state_t
*state
, uint_t pkeyindx
)
2371 uint_t max_pkeyindx
;
2373 max_pkeyindx
= 1 << state
->hs_cfg_profile
->cp_log_max_pkeytbl
;
2374 if (pkeyindx
< max_pkeyindx
) {
2383 * hermon_queue_alloc()
2384 * Context: Can be called from interrupt or base context.
2387 hermon_queue_alloc(hermon_state_t
*state
, hermon_qalloc_info_t
*qa_info
,
2390 ddi_dma_attr_t dma_attr
;
2391 int (*callback
)(caddr_t
);
2392 uint64_t realsize
, alloc_mask
;
2395 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info
))
2397 /* Set the callback flag appropriately */
2398 callback
= (sleepflag
== HERMON_SLEEP
) ? DDI_DMA_SLEEP
:
2402 * Initialize many of the default DMA attributes. Then set additional
2403 * alignment restrictions as necessary for the queue memory. Also
2404 * respect the configured value for IOMMU bypass
2406 hermon_dma_attr_init(state
, &dma_attr
);
2407 dma_attr
.dma_attr_align
= qa_info
->qa_bind_align
;
2409 /* Allocate a DMA handle */
2410 status
= ddi_dma_alloc_handle(state
->hs_dip
, &dma_attr
, callback
, NULL
,
2411 &qa_info
->qa_dmahdl
);
2412 if (status
!= DDI_SUCCESS
) {
2413 return (DDI_FAILURE
);
2417 * Determine the amount of memory to allocate, depending on the values
2418 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2419 * to solve here is that allocating a DMA handle with IOMMU bypass
2420 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2421 * that are less restrictive than the page size. Since we may need
2422 * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2423 * (e.g. in Hermon QP work queue memory allocation), we use the
2424 * following method to calculate how much additional memory to request,
2425 * and we enforce our own alignment on the allocated result.
2427 alloc_mask
= qa_info
->qa_alloc_align
- 1;
2428 if (qa_info
->qa_bind_align
== qa_info
->qa_alloc_align
) {
2429 realsize
= qa_info
->qa_size
;
2431 realsize
= qa_info
->qa_size
+ alloc_mask
;
2435 * If we are to allocate the queue from system memory, then use
2436 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a
2437 * host memory allocation, use ddi_umem_alloc(). In either case,
2438 * return a pointer to the memory range allocated (including any
2439 * necessary alignment adjustments), the "real" memory pointer,
2440 * the "real" size, and a ddi_acc_handle_t to use when reading
2441 * from/writing to the memory.
2443 if (qa_info
->qa_location
== HERMON_QUEUE_LOCATION_NORMAL
) {
2444 /* Allocate system memory for the queue */
2445 status
= ddi_dma_mem_alloc(qa_info
->qa_dmahdl
, realsize
,
2446 &state
->hs_reg_accattr
, DDI_DMA_CONSISTENT
, callback
, NULL
,
2447 (caddr_t
*)&qa_info
->qa_buf_real
,
2448 (size_t *)&qa_info
->qa_buf_realsz
, &qa_info
->qa_acchdl
);
2449 if (status
!= DDI_SUCCESS
) {
2450 ddi_dma_free_handle(&qa_info
->qa_dmahdl
);
2451 return (DDI_FAILURE
);
2455 * Save temporary copy of the real pointer. (This may be
2456 * modified in the last step below).
2458 qa_info
->qa_buf_aligned
= qa_info
->qa_buf_real
;
2460 bzero(qa_info
->qa_buf_real
, qa_info
->qa_buf_realsz
);
2462 } else { /* HERMON_QUEUE_LOCATION_USERLAND */
2464 /* Allocate userland mappable memory for the queue */
2465 flag
= (sleepflag
== HERMON_SLEEP
) ? DDI_UMEM_SLEEP
:
2467 qa_info
->qa_buf_real
= ddi_umem_alloc(realsize
, flag
,
2468 &qa_info
->qa_umemcookie
);
2469 if (qa_info
->qa_buf_real
== NULL
) {
2470 ddi_dma_free_handle(&qa_info
->qa_dmahdl
);
2471 return (DDI_FAILURE
);
2475 * Save temporary copy of the real pointer. (This may be
2476 * modified in the last step below).
2478 qa_info
->qa_buf_aligned
= qa_info
->qa_buf_real
;
2483 * The next to last step is to ensure that the final address
2484 * ("qa_buf_aligned") has the appropriate "alloc" alignment
2485 * restriction applied to it (if necessary).
2487 if (qa_info
->qa_bind_align
!= qa_info
->qa_alloc_align
) {
2488 qa_info
->qa_buf_aligned
= (uint32_t *)(uintptr_t)(((uintptr_t)
2489 qa_info
->qa_buf_aligned
+ alloc_mask
) & ~alloc_mask
);
2492 * The last step is to figure out the offset of the start relative
2493 * to the first page of the region - will be used in the eqc/cqc
2496 qa_info
->qa_pgoffs
= (uint_t
)((uintptr_t)
2497 qa_info
->qa_buf_aligned
& HERMON_PAGEOFFSET
);
2499 return (DDI_SUCCESS
);
2504 * hermon_queue_free()
2505 * Context: Can be called from interrupt or base context.
2508 hermon_queue_free(hermon_qalloc_info_t
*qa_info
)
2510 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info
))
2513 * Depending on how (i.e. from where) we allocated the memory for
2514 * this queue, we choose the appropriate method for releasing the
2517 if (qa_info
->qa_location
== HERMON_QUEUE_LOCATION_NORMAL
) {
2519 ddi_dma_mem_free(&qa_info
->qa_acchdl
);
2521 } else if (qa_info
->qa_location
== HERMON_QUEUE_LOCATION_USERLAND
) {
2523 ddi_umem_free(qa_info
->qa_umemcookie
);
2527 /* Always free the dma handle */
2528 ddi_dma_free_handle(&qa_info
->qa_dmahdl
);
2532 * hermon_create_fmr_pool()
2533 * Create a pool of FMRs.
2534 * Context: Can be called from kernel context only.
2537 hermon_create_fmr_pool(hermon_state_t
*state
, hermon_pdhdl_t pd
,
2538 ibt_fmr_pool_attr_t
*fmr_attr
, hermon_fmrhdl_t
*fmrpoolp
)
2540 hermon_fmrhdl_t fmrpool
;
2541 hermon_fmr_list_t
*fmr
, *fmr_next
;
2547 sleep
= (fmr_attr
->fmr_flags
& IBT_MR_SLEEP
) ? HERMON_SLEEP
:
2549 if ((sleep
== HERMON_SLEEP
) &&
2550 (sleep
!= HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2551 return (IBT_INVALID_PARAM
);
2554 fmrpool
= (hermon_fmrhdl_t
)kmem_zalloc(sizeof (*fmrpool
), sleep
);
2555 if (fmrpool
== NULL
) {
2556 status
= IBT_INSUFF_RESOURCE
;
2559 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool
))
2561 mutex_init(&fmrpool
->fmr_lock
, NULL
, MUTEX_DRIVER
,
2562 DDI_INTR_PRI(state
->hs_intrmsi_pri
));
2563 mutex_init(&fmrpool
->remap_lock
, NULL
, MUTEX_DRIVER
,
2564 DDI_INTR_PRI(state
->hs_intrmsi_pri
));
2565 mutex_init(&fmrpool
->dirty_lock
, NULL
, MUTEX_DRIVER
,
2566 DDI_INTR_PRI(state
->hs_intrmsi_pri
));
2568 fmrpool
->fmr_state
= state
;
2569 fmrpool
->fmr_flush_function
= fmr_attr
->fmr_func_hdlr
;
2570 fmrpool
->fmr_flush_arg
= fmr_attr
->fmr_func_arg
;
2571 fmrpool
->fmr_pool_size
= 0;
2572 fmrpool
->fmr_max_pages
= fmr_attr
->fmr_max_pages_per_fmr
;
2573 fmrpool
->fmr_page_sz
= fmr_attr
->fmr_page_sz
;
2574 fmrpool
->fmr_dirty_watermark
= fmr_attr
->fmr_pool_size
/ 4;
2575 fmrpool
->fmr_dirty_len
= 0;
2576 fmrpool
->fmr_remap_watermark
= fmr_attr
->fmr_pool_size
/ 32;
2577 fmrpool
->fmr_remap_len
= 0;
2578 fmrpool
->fmr_flags
= fmr_attr
->fmr_flags
;
2579 fmrpool
->fmr_stat_register
= 0;
2580 fmrpool
->fmr_max_remaps
= state
->hs_cfg_profile
->cp_fmr_max_remaps
;
2581 fmrpool
->fmr_remap_gen
= 1;
2583 fmrpool
->fmr_free_list_tail
= &fmrpool
->fmr_free_list
;
2584 fmrpool
->fmr_dirty_list
= NULL
;
2585 fmrpool
->fmr_dirty_list_tail
= &fmrpool
->fmr_dirty_list
;
2586 fmrpool
->fmr_remap_list
= NULL
;
2587 fmrpool
->fmr_remap_list_tail
= &fmrpool
->fmr_remap_list
;
2588 fmrpool
->fmr_pool_size
= fmrpool
->fmr_free_len
=
2589 fmr_attr
->fmr_pool_size
;
2591 for (i
= 0; i
< fmr_attr
->fmr_pool_size
; i
++) {
2592 status
= hermon_mr_alloc_fmr(state
, pd
, fmrpool
, &mr
);
2593 if (status
!= DDI_SUCCESS
) {
2597 fmr
= (hermon_fmr_list_t
*)kmem_zalloc(
2598 sizeof (hermon_fmr_list_t
), sleep
);
2599 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr
))
2602 fmr
->fmr_remaps
= 0;
2603 fmr
->fmr_remap_gen
= fmrpool
->fmr_remap_gen
;
2604 fmr
->fmr_pool
= fmrpool
;
2605 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr
))
2608 if (!i
) /* address of last entry's link */
2609 fmrpool
->fmr_free_list_tail
= &fmr
->fmr_next
;
2610 fmr
->fmr_next
= fmrpool
->fmr_free_list
;
2611 fmrpool
->fmr_free_list
= fmr
;
2614 /* Set to return pool */
2615 *fmrpoolp
= fmrpool
;
2617 IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS");
2618 return (IBT_SUCCESS
);
2620 for (fmr
= fmrpool
->fmr_free_list
; fmr
!= NULL
; fmr
= fmr_next
) {
2621 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr
))
2622 fmr_next
= fmr
->fmr_next
;
2623 (void) hermon_mr_dealloc_fmr(state
, &fmr
->fmr
);
2624 kmem_free(fmr
, sizeof (hermon_fmr_list_t
));
2626 kmem_free(fmrpool
, sizeof (*fmrpool
));
2629 IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED");
2630 if (status
== DDI_FAILURE
) {
2631 return (ibc_get_ci_failure(0));
2638 * hermon_destroy_fmr_pool()
2639 * Destroy an FMR pool and free all associated resources.
2640 * Context: Can be called from kernel context only.
2643 hermon_destroy_fmr_pool(hermon_state_t
*state
, hermon_fmrhdl_t fmrpool
)
2645 hermon_fmr_list_t
*fmr
, *fmr_next
;
2647 mutex_enter(&fmrpool
->fmr_lock
);
2648 hermon_fmr_cleanup(fmrpool
);
2650 for (fmr
= fmrpool
->fmr_free_list
; fmr
!= NULL
; fmr
= fmr_next
) {
2651 fmr_next
= fmr
->fmr_next
;
2653 (void) hermon_mr_dealloc_fmr(state
, &fmr
->fmr
);
2654 kmem_free(fmr
, sizeof (hermon_fmr_list_t
));
2656 --fmrpool
->fmr_pool_size
;
2658 ASSERT(fmrpool
->fmr_pool_size
== 0);
2659 mutex_exit(&fmrpool
->fmr_lock
);
2661 mutex_destroy(&fmrpool
->fmr_lock
);
2662 mutex_destroy(&fmrpool
->dirty_lock
);
2663 mutex_destroy(&fmrpool
->remap_lock
);
2665 kmem_free(fmrpool
, sizeof (*fmrpool
));
2666 IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS");
2667 return (DDI_SUCCESS
);
2671 * hermon_flush_fmr_pool()
2672 * Ensure that all unmapped FMRs are fully invalidated.
2673 * Context: Can be called from kernel context only.
2677 hermon_flush_fmr_pool(hermon_state_t
*state
, hermon_fmrhdl_t fmrpool
)
2680 * Force the unmapping of all entries on the dirty list, regardless of
2681 * whether the watermark has been hit yet.
2683 /* grab the pool lock */
2684 mutex_enter(&fmrpool
->fmr_lock
);
2685 hermon_fmr_cleanup(fmrpool
);
2686 mutex_exit(&fmrpool
->fmr_lock
);
2687 return (DDI_SUCCESS
);
2691 * hermon_register_physical_fmr()
2692 * Map memory into FMR
2693 * Context: Can be called from interrupt or base context.
2696 hermon_register_physical_fmr(hermon_state_t
*state
, hermon_fmrhdl_t fmrpool
,
2697 ibt_pmr_attr_t
*mem_pattr
, hermon_mrhdl_t
*mr
,
2698 ibt_pmr_desc_t
*mem_desc_p
)
2700 hermon_fmr_list_t
*fmr
;
2704 if (mem_pattr
->pmr_len
< 1 || (mem_pattr
->pmr_num_buf
>
2705 fmrpool
->fmr_max_pages
)) {
2706 return (IBT_MR_LEN_INVALID
);
2709 mutex_enter(&fmrpool
->fmr_lock
);
2710 if (fmrpool
->fmr_free_list
== NULL
) {
2711 if (hermon_fmr_verbose
& 2)
2712 IBTF_DPRINTF_L2("fmr", "register needs remap");
2713 mutex_enter(&fmrpool
->remap_lock
);
2714 if (fmrpool
->fmr_remap_list
) {
2715 /* add to free list */
2716 *(fmrpool
->fmr_free_list_tail
) =
2717 fmrpool
->fmr_remap_list
;
2718 fmrpool
->fmr_remap_list
= NULL
;
2719 fmrpool
->fmr_free_list_tail
=
2720 fmrpool
->fmr_remap_list_tail
;
2723 fmrpool
->fmr_remap_list_tail
= &fmrpool
->fmr_remap_list
;
2724 fmrpool
->fmr_free_len
+= fmrpool
->fmr_remap_len
;
2725 fmrpool
->fmr_remap_len
= 0;
2727 mutex_exit(&fmrpool
->remap_lock
);
2729 if (fmrpool
->fmr_free_list
== NULL
) {
2730 if (hermon_fmr_verbose
& 2)
2731 IBTF_DPRINTF_L2("fmr", "register needs cleanup");
2732 hermon_fmr_cleanup(fmrpool
);
2735 /* grab next free entry */
2736 fmr
= fmrpool
->fmr_free_list
;
2738 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource");
2739 cmn_err(CE_CONT
, "no free fmr resource\n");
2740 mutex_exit(&fmrpool
->fmr_lock
);
2741 return (IBT_INSUFF_RESOURCE
);
2744 if ((fmrpool
->fmr_free_list
= fmr
->fmr_next
) == NULL
)
2745 fmrpool
->fmr_free_list_tail
= &fmrpool
->fmr_free_list
;
2746 fmr
->fmr_next
= NULL
;
2747 fmrpool
->fmr_stat_register
++;
2748 mutex_exit(&fmrpool
->fmr_lock
);
2750 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr
))
2751 status
= hermon_mr_register_physical_fmr(state
, mem_pattr
, fmr
->fmr
,
2753 if (status
!= DDI_SUCCESS
) {
2756 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr
->fmr
))
2757 if (hermon_rdma_debug
& 0x4)
2758 IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x",
2759 fmr
->fmr
, fmr
->fmr
->mr_rkey
);
2760 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr
->fmr
))
2761 if (fmr
->fmr_remap_gen
!= fmrpool
->fmr_remap_gen
) {
2762 fmr
->fmr_remap_gen
= fmrpool
->fmr_remap_gen
;
2763 fmr
->fmr_remaps
= 0;
2768 *mr
= (hermon_mrhdl_t
)fmr
->fmr
;
2770 return (DDI_SUCCESS
);
2774 * hermon_deregister_fmr()
2776 * Context: Can be called from kernel context only.
2779 hermon_deregister_fmr(hermon_state_t
*state
, hermon_mrhdl_t mr
)
2781 hermon_fmrhdl_t fmrpool
;
2782 hermon_fmr_list_t
*fmr
, **fmrlast
;
2786 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr
))
2787 fmrpool
= fmr
->fmr_pool
;
2789 /* mark as owned by software */
2790 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr
->fmr
)))
2791 *(uint8_t *)(fmr
->fmr
->mr_mptrsrcp
->hr_addr
) = 0xF0;
2793 if (fmr
->fmr_remaps
<
2794 state
->hs_cfg_profile
->cp_fmr_max_remaps
) {
2795 /* add to remap list */
2796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr
->fmr
)))
2797 if (hermon_rdma_debug
& 0x4)
2798 IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x",
2799 fmr
->fmr
, fmr
->fmr
->mr_rkey
);
2800 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr
->fmr
)))
2801 mutex_enter(&fmrpool
->remap_lock
);
2802 fmr
->fmr_next
= NULL
;
2803 *(fmrpool
->fmr_remap_list_tail
) = fmr
;
2804 fmrpool
->fmr_remap_list_tail
= &fmr
->fmr_next
;
2805 fmrpool
->fmr_remap_len
++;
2807 /* conditionally add remap list back to free list */
2809 if (fmrpool
->fmr_remap_len
>=
2810 fmrpool
->fmr_remap_watermark
) {
2811 fmr
= fmrpool
->fmr_remap_list
;
2812 fmrlast
= fmrpool
->fmr_remap_list_tail
;
2813 len
= fmrpool
->fmr_remap_len
;
2814 fmrpool
->fmr_remap_len
= 0;
2815 fmrpool
->fmr_remap_list
= NULL
;
2816 fmrpool
->fmr_remap_list_tail
=
2817 &fmrpool
->fmr_remap_list
;
2819 mutex_exit(&fmrpool
->remap_lock
);
2821 mutex_enter(&fmrpool
->fmr_lock
);
2822 *(fmrpool
->fmr_free_list_tail
) = fmr
;
2823 fmrpool
->fmr_free_list_tail
= fmrlast
;
2824 fmrpool
->fmr_free_len
+= len
;
2825 mutex_exit(&fmrpool
->fmr_lock
);
2828 /* add to dirty list */
2829 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr
->fmr
)))
2830 if (hermon_rdma_debug
& 0x4)
2831 IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x",
2832 fmr
->fmr
, fmr
->fmr
->mr_rkey
);
2833 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr
->fmr
)))
2835 mutex_enter(&fmrpool
->dirty_lock
);
2836 fmr
->fmr_next
= NULL
;
2837 *(fmrpool
->fmr_dirty_list_tail
) = fmr
;
2838 fmrpool
->fmr_dirty_list_tail
= &fmr
->fmr_next
;
2839 fmrpool
->fmr_dirty_len
++;
2841 if (fmrpool
->fmr_dirty_len
>=
2842 fmrpool
->fmr_dirty_watermark
) {
2843 mutex_exit(&fmrpool
->dirty_lock
);
2844 mutex_enter(&fmrpool
->fmr_lock
);
2845 hermon_fmr_cleanup(fmrpool
);
2846 mutex_exit(&fmrpool
->fmr_lock
);
2848 mutex_exit(&fmrpool
->dirty_lock
);
2850 return (DDI_SUCCESS
);
2854 * hermon_fmr_cleanup()
2855 * Context: Called from any context.
2858 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool
)
2862 ASSERT(MUTEX_HELD(&fmrpool
->fmr_lock
));
2864 if (fmrpool
->fmr_stat_register
== 0)
2867 fmrpool
->fmr_stat_register
= 0;
2870 if (hermon_fmr_verbose
)
2871 IBTF_DPRINTF_L2("fmr", "TPT_SYNC");
2872 status
= hermon_sync_tpt_cmd_post(fmrpool
->fmr_state
,
2873 HERMON_CMD_NOSLEEP_SPIN
);
2874 if (status
!= HERMON_CMD_SUCCESS
) {
2875 cmn_err(CE_WARN
, "fmr SYNC_TPT failed(%x)\n", status
);
2877 fmrpool
->fmr_remap_gen
++;
2879 /* add everything back to the free list */
2880 mutex_enter(&fmrpool
->dirty_lock
);
2881 if (fmrpool
->fmr_dirty_list
) {
2882 /* add to free list */
2883 *(fmrpool
->fmr_free_list_tail
) = fmrpool
->fmr_dirty_list
;
2884 fmrpool
->fmr_dirty_list
= NULL
;
2885 fmrpool
->fmr_free_list_tail
= fmrpool
->fmr_dirty_list_tail
;
2888 fmrpool
->fmr_dirty_list_tail
= &fmrpool
->fmr_dirty_list
;
2889 fmrpool
->fmr_free_len
+= fmrpool
->fmr_dirty_len
;
2890 fmrpool
->fmr_dirty_len
= 0;
2892 mutex_exit(&fmrpool
->dirty_lock
);
2894 mutex_enter(&fmrpool
->remap_lock
);
2895 if (fmrpool
->fmr_remap_list
) {
2896 /* add to free list */
2897 *(fmrpool
->fmr_free_list_tail
) = fmrpool
->fmr_remap_list
;
2898 fmrpool
->fmr_remap_list
= NULL
;
2899 fmrpool
->fmr_free_list_tail
= fmrpool
->fmr_remap_list_tail
;
2902 fmrpool
->fmr_remap_list_tail
= &fmrpool
->fmr_remap_list
;
2903 fmrpool
->fmr_free_len
+= fmrpool
->fmr_remap_len
;
2904 fmrpool
->fmr_remap_len
= 0;
2906 mutex_exit(&fmrpool
->remap_lock
);
2908 if (fmrpool
->fmr_flush_function
!= NULL
) {
2909 (void) fmrpool
->fmr_flush_function(
2910 (ibc_fmr_pool_hdl_t
)fmrpool
,
2911 fmrpool
->fmr_flush_arg
);