preprocessor cleanup: __sparc
[unleashed/tickless.git] / usr / src / uts / common / io / ib / adapters / hermon / hermon_misc.c
blob082fd365a24197a58f8e5331af6f7f64e12d4db3
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
27 * hermon_misc.c
28 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection
29 * Domain, and port-related operations
31 * Implements all the routines necessary for allocating, freeing, querying
32 * and modifying Address Handles and Protection Domains. Also implements
33 * all the routines necessary for adding and removing Queue Pairs to/from
34 * Multicast Groups. Lastly, it implements the routines necessary for
35 * port-related query and modify operations.
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/bitmap.h>
44 #include <sys/sysmacros.h>
46 #include <sys/ib/adapters/hermon/hermon.h>
48 extern int hermon_rdma_debug;
49 int hermon_fmr_verbose = 0;
51 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
52 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
54 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
57 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
60 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
61 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
62 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
63 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
64 hermon_hw_mcg_t *mcg_entry, uint_t indx);
65 static int hermon_mgid_is_valid(ib_gid_t gid);
66 static int hermon_mlid_is_valid(ib_lid_t lid);
67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool);
70 #define HERMON_MAX_DBR_PAGES_PER_USER 64
71 #define HERMON_DBR_KEY(index, page) \
72 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
74 static hermon_udbr_page_t *
75 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
76 uint_t page)
78 hermon_udbr_page_t *pagep;
79 ddi_dma_attr_t dma_attr;
80 uint_t cookiecnt;
81 int status;
82 hermon_umap_db_entry_t *umapdb;
83 ulong_t pagesize = PAGESIZE;
85 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
86 pagep->upg_index = page;
87 pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t);
89 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
90 pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8,
91 KM_SLEEP);
92 pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP,
93 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
95 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
96 pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
98 hermon_dma_attr_init(state, &dma_attr);
99 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
100 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
101 if (status != DDI_SUCCESS) {
102 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
103 "ddi_dma_buf_bind_handle failed: %d", status);
104 return (NULL);
106 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
107 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
108 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
109 if (status != DDI_SUCCESS) {
110 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
111 "ddi_dma_buf_bind_handle failed: %d", status);
112 ddi_dma_free_handle(&pagep->upg_dmahdl);
113 return (NULL);
115 ASSERT(cookiecnt == 1);
117 /* create db entry for mmap */
118 umapdb = hermon_umap_db_alloc(state->hs_instance,
119 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
120 (uint64_t)(uintptr_t)pagep);
121 hermon_umap_db_add(umapdb);
122 return (pagep);
126 /*ARGSUSED*/
127 static int
128 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
129 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
130 uint64_t *mapoffset)
132 hermon_user_dbr_t *udbr;
133 hermon_udbr_page_t *pagep;
134 uint_t next_page;
135 int dbr_index;
136 int i1, i2, i3, last;
137 uint64_t u64, mask;
139 mutex_enter(&state->hs_dbr_lock);
140 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
141 if (udbr->udbr_index == index)
142 break;
143 if (udbr == NULL) {
144 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
145 udbr->udbr_link = state->hs_user_dbr;
146 state->hs_user_dbr = udbr;
147 udbr->udbr_index = index;
148 udbr->udbr_pagep = NULL;
150 pagep = udbr->udbr_pagep;
151 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
152 while (pagep != NULL)
153 if (pagep->upg_nfree > 0)
154 break;
155 else
156 pagep = pagep->upg_link;
157 if (pagep == NULL) {
158 pagep = hermon_dbr_new_user_page(state, index, next_page);
159 if (pagep == NULL) {
160 mutex_exit(&state->hs_dbr_lock);
161 return (DDI_FAILURE);
163 pagep->upg_link = udbr->udbr_pagep;
164 udbr->udbr_pagep = pagep;
167 /* Since nfree > 0, we're assured the loops below will succeed */
169 /* First, find a 64-bit (not ~0) that has a free dbr */
170 last = PAGESIZE / sizeof (uint64_t) / 64;
171 mask = ~0ull;
172 for (i1 = 0; i1 < last; i1++)
173 if ((pagep->upg_free[i1] & mask) != mask)
174 break;
175 u64 = pagep->upg_free[i1];
177 /* Second, find a byte (not 0xff) that has a free dbr */
178 last = sizeof (uint64_t) / sizeof (uint8_t);
179 for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8)
180 if ((u64 & mask) != mask)
181 break;
183 /* Third, find a bit that is free (0) */
184 for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++)
185 if ((u64 & (1ul << (i3 + 8 * i2))) == 0)
186 break;
188 /* Mark it as allocated */
189 pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2));
191 dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3;
192 pagep->upg_nfree--;
193 ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0; /* clear dbr */
194 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
195 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
196 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr +
197 dbr_index);
198 *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index *
199 sizeof (uint64_t);
201 mutex_exit(&state->hs_dbr_lock);
202 return (DDI_SUCCESS);
205 static void
206 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
208 hermon_user_dbr_t *udbr;
209 hermon_udbr_page_t *pagep;
210 caddr_t kvaddr;
211 uint_t dbr_index;
212 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t);
213 int i1, i2;
215 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
216 kvaddr = (caddr_t)record - dbr_index;
217 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
219 mutex_enter(&state->hs_dbr_lock);
220 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
221 if (udbr->udbr_index == index)
222 break;
223 if (udbr == NULL) {
224 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
225 "found for index %x", index);
226 mutex_exit(&state->hs_dbr_lock);
227 return;
229 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
230 if (pagep->upg_kvaddr == kvaddr)
231 break;
232 if (pagep == NULL) {
233 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
234 " found for index %x, kvaddr %p, DBR index %x",
235 index, kvaddr, dbr_index);
236 mutex_exit(&state->hs_dbr_lock);
237 return;
239 if (pagep->upg_nfree >= max_free) {
240 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
241 "UCE index %x, DBR index %x", index, dbr_index);
242 mutex_exit(&state->hs_dbr_lock);
243 return;
245 ASSERT(dbr_index < max_free);
246 i1 = dbr_index / 64;
247 i2 = dbr_index % 64;
248 ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2));
249 pagep->upg_free[i1] &= ~(1ul << i2);
250 pagep->upg_nfree++;
251 mutex_exit(&state->hs_dbr_lock);
255 * hermon_dbr_page_alloc()
256 * first page allocation - called from attach or open
257 * in this case, we want exactly one page per call, and aligned on a
258 * page - and may need to be mapped to the user for access
261 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
263 int status;
264 ddi_dma_handle_t dma_hdl;
265 ddi_acc_handle_t acc_hdl;
266 ddi_dma_attr_t dma_attr;
267 ddi_dma_cookie_t cookie;
268 uint_t cookie_cnt;
269 int i;
270 hermon_dbr_info_t *info;
271 caddr_t dmaaddr;
272 uint64_t dmalen;
273 ulong_t pagesize = PAGESIZE;
275 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
278 * Initialize many of the default DMA attributes. Then set additional
279 * alignment restrictions if necessary for the dbr memory, meaning
280 * page aligned. Also use the configured value for IOMMU bypass
282 hermon_dma_attr_init(state, &dma_attr);
283 dma_attr.dma_attr_align = pagesize;
284 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */
286 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
287 DDI_DMA_SLEEP, NULL, &dma_hdl);
288 if (status != DDI_SUCCESS) {
289 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
290 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
291 return (DDI_FAILURE);
294 status = ddi_dma_mem_alloc(dma_hdl, pagesize,
295 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
296 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
297 if (status != DDI_SUCCESS) {
298 ddi_dma_free_handle(&dma_hdl);
299 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
300 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
301 return (DDI_FAILURE);
304 /* this memory won't be IB registered, so do the bind here */
305 status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
306 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
307 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
308 if (status != DDI_SUCCESS) {
309 ddi_dma_mem_free(&acc_hdl);
310 ddi_dma_free_handle(&dma_hdl);
311 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
312 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
313 status);
314 return (DDI_FAILURE);
316 *dinfo = info; /* Pass back the pointer */
318 /* init the info structure with returned info */
319 info->dbr_dmahdl = dma_hdl;
320 info->dbr_acchdl = acc_hdl;
321 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr;
322 info->dbr_link = NULL;
323 /* extract the phys addr from the cookie */
324 info->dbr_paddr = cookie.dmac_laddress;
325 info->dbr_firstfree = 0;
326 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
327 /* link all DBrs onto the free list */
328 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
329 info->dbr_page[i] = i + 1;
332 return (DDI_SUCCESS);
337 * hermon_dbr_alloc()
338 * DBr record allocation - called from alloc cq/qp/srq
339 * will check for available dbrs in current
340 * page - if needed it will allocate another and link them
344 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
345 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
347 hermon_dbr_t *record = NULL;
348 hermon_dbr_info_t *info = NULL;
349 uint32_t idx;
350 int status;
352 if (index != state->hs_kernel_uar_index)
353 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
354 mapoffset));
356 mutex_enter(&state->hs_dbr_lock);
357 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
358 if (info->dbr_nfree != 0)
359 break; /* found a page w/ one available */
361 if (info == NULL) { /* did NOT find a page with one available */
362 status = hermon_dbr_page_alloc(state, &info);
363 if (status != DDI_SUCCESS) {
364 /* do error handling */
365 mutex_exit(&state->hs_dbr_lock);
366 return (DDI_FAILURE);
368 /* got a new page, so link it in. */
369 info->dbr_link = state->hs_kern_dbr;
370 state->hs_kern_dbr = info;
372 idx = info->dbr_firstfree;
373 record = info->dbr_page + idx;
374 info->dbr_firstfree = *record;
375 info->dbr_nfree--;
376 *record = 0;
378 *acchdl = info->dbr_acchdl;
379 *vdbr = record;
380 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
381 mutex_exit(&state->hs_dbr_lock);
382 return (DDI_SUCCESS);
386 * hermon_dbr_free()
387 * DBr record deallocation - called from free cq/qp
388 * will update the counter in the header, and invalidate
389 * the dbr, but will NEVER free pages of dbrs - small
390 * price to pay, but userland access never will anyway
392 void
393 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
395 hermon_dbr_t *page;
396 hermon_dbr_info_t *info;
398 if (indx != state->hs_kernel_uar_index) {
399 hermon_user_dbr_free(state, indx, record);
400 return;
402 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
403 mutex_enter(&state->hs_dbr_lock);
404 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
405 if (info->dbr_page == page)
406 break;
407 ASSERT(info != NULL);
408 *record = info->dbr_firstfree;
409 info->dbr_firstfree = record - info->dbr_page;
410 info->dbr_nfree++;
411 mutex_exit(&state->hs_dbr_lock);
415 * hermon_dbr_kern_free()
416 * Context: Can be called only from detach context.
418 * Free all kernel dbr pages. This includes the freeing of all the dma
419 * resources acquired during the allocation of the pages.
421 * Also, free all the user dbr pages.
423 void
424 hermon_dbr_kern_free(hermon_state_t *state)
426 hermon_dbr_info_t *info, *link;
427 hermon_user_dbr_t *udbr, *next;
428 hermon_udbr_page_t *pagep, *nextp;
429 hermon_umap_db_entry_t *umapdb;
430 int instance, status;
431 uint64_t value;
432 extern hermon_umap_db_t hermon_userland_rsrc_db;
434 mutex_enter(&state->hs_dbr_lock);
435 for (info = state->hs_kern_dbr; info != NULL; info = link) {
436 (void) ddi_dma_unbind_handle(info->dbr_dmahdl);
437 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */
438 ddi_dma_free_handle(&info->dbr_dmahdl);
439 link = info->dbr_link;
440 kmem_free(info, sizeof (hermon_dbr_info_t));
443 udbr = state->hs_user_dbr;
444 instance = state->hs_instance;
445 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
446 while (udbr != NULL) {
447 pagep = udbr->udbr_pagep;
448 while (pagep != NULL) {
449 /* probably need to remove "db" */
450 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
451 ddi_dma_free_handle(&pagep->upg_dmahdl);
452 freerbuf(pagep->upg_buf);
453 ddi_umem_free(pagep->upg_umemcookie);
454 status = hermon_umap_db_find_nolock(instance,
455 HERMON_DBR_KEY(udbr->udbr_index,
456 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
457 &value, HERMON_UMAP_DB_REMOVE, &umapdb);
458 if (status == DDI_SUCCESS)
459 hermon_umap_db_free(umapdb);
460 kmem_free(pagep->upg_free,
461 PAGESIZE / sizeof (hermon_dbr_t) / 8);
462 nextp = pagep->upg_link;
463 kmem_free(pagep, sizeof (*pagep));
464 pagep = nextp;
466 next = udbr->udbr_link;
467 kmem_free(udbr, sizeof (*udbr));
468 udbr = next;
470 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
471 mutex_exit(&state->hs_dbr_lock);
475 * hermon_ah_alloc()
476 * Context: Can be called only from user or kernel context.
479 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
480 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
482 hermon_rsrc_t *rsrc;
483 hermon_hw_udav_t *udav;
484 hermon_ahhdl_t ah;
485 int status;
488 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
489 * indicate that we wish to allocate an "invalid" (i.e. empty)
490 * address handle XXX
493 /* Validate that specified port number is legal */
494 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
495 return (IBT_HCA_PORT_INVALID);
499 * Allocate the software structure for tracking the address handle
500 * (i.e. the Hermon Address Handle struct).
502 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
503 if (status != DDI_SUCCESS) {
504 return (IBT_INSUFF_RESOURCE);
506 ah = (hermon_ahhdl_t)rsrc->hr_addr;
507 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
509 /* Increment the reference count on the protection domain (PD) */
510 hermon_pd_refcnt_inc(pd);
512 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
513 KM_SLEEP);
514 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
517 * Fill in the UDAV data. We first zero out the UDAV, then populate
518 * it by then calling hermon_set_addr_path() to fill in the common
519 * portions that can be pulled from the "ibt_adds_vect_t" passed in
521 status = hermon_set_addr_path(state, attr_p,
522 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
523 if (status != DDI_SUCCESS) {
524 hermon_pd_refcnt_dec(pd);
525 hermon_rsrc_free(state, &rsrc);
526 return (status);
528 udav->pd = pd->pd_pdnum;
529 udav->sl = attr_p->av_srvl;
532 * Fill in the rest of the Hermon Address Handle struct.
534 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
535 * here because we may need to return it later to the IBTF (as a
536 * result of a subsequent query operation). Unlike the other UDAV
537 * parameters, the value of "av_dgid.gid_guid" is not always preserved.
538 * The reason for this is described in hermon_set_addr_path().
540 ah->ah_rsrcp = rsrc;
541 ah->ah_pdhdl = pd;
542 ah->ah_udav = udav;
543 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
544 *ahhdl = ah;
546 return (DDI_SUCCESS);
551 * hermon_ah_free()
552 * Context: Can be called only from user or kernel context.
554 /* ARGSUSED */
556 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
558 hermon_rsrc_t *rsrc;
559 hermon_pdhdl_t pd;
560 hermon_ahhdl_t ah;
563 * Pull all the necessary information from the Hermon Address Handle
564 * struct. This is necessary here because the resource for the
565 * AH is going to be freed up as part of this operation.
567 ah = *ahhdl;
568 mutex_enter(&ah->ah_lock);
569 rsrc = ah->ah_rsrcp;
570 pd = ah->ah_pdhdl;
571 mutex_exit(&ah->ah_lock);
572 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
574 /* Free the UDAV memory */
575 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
577 /* Decrement the reference count on the protection domain (PD) */
578 hermon_pd_refcnt_dec(pd);
580 /* Free the Hermon Address Handle structure */
581 hermon_rsrc_free(state, &rsrc);
583 /* Set the ahhdl pointer to NULL and return success */
584 *ahhdl = NULL;
586 return (DDI_SUCCESS);
591 * hermon_ah_query()
592 * Context: Can be called from interrupt or base context.
594 /* ARGSUSED */
596 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
597 ibt_adds_vect_t *attr_p)
599 mutex_enter(&ah->ah_lock);
600 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
603 * Pull the PD and UDAV from the Hermon Address Handle structure
605 *pd = ah->ah_pdhdl;
608 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill
609 * the common portions that can be pulled from the UDAV we pass in.
611 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
612 * "ah_save_guid" field we have previously saved away. The reason
613 * for this is described in hermon_ah_alloc() and hermon_ah_modify().
615 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
616 attr_p, HERMON_ADDRPATH_UDAV);
618 attr_p->av_dgid.gid_guid = ah->ah_save_guid;
620 mutex_exit(&ah->ah_lock);
621 return (DDI_SUCCESS);
626 * hermon_ah_modify()
627 * Context: Can be called from interrupt or base context.
629 /* ARGSUSED */
631 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
632 ibt_adds_vect_t *attr_p)
634 hermon_hw_udav_t old_udav;
635 uint64_t data_old;
636 int status, size, i;
638 /* Validate that specified port number is legal */
639 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
640 return (IBT_HCA_PORT_INVALID);
643 mutex_enter(&ah->ah_lock);
645 /* Save a copy of the current UDAV data in old_udav. */
646 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
649 * Fill in the new UDAV with the caller's data, passed in via the
650 * "ibt_adds_vect_t" structure.
652 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
653 * field here (just as we did during hermon_ah_alloc()) because we
654 * may need to return it later to the IBTF (as a result of a
655 * subsequent query operation). As explained in hermon_ah_alloc(),
656 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
657 * is not always preserved. The reason for this is described in
658 * hermon_set_addr_path().
660 status = hermon_set_addr_path(state, attr_p,
661 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
662 if (status != DDI_SUCCESS) {
663 mutex_exit(&ah->ah_lock);
664 return (status);
666 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
667 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav)))
668 ah->ah_udav->sl = attr_p->av_srvl;
671 * Copy changes into the new UDAV.
672 * Note: We copy in 64-bit chunks. For the first two of these
673 * chunks it is necessary to read the current contents of the
674 * UDAV, mask off the modifiable portions (maintaining any
675 * of the "reserved" portions), and then mask on the new data.
677 size = sizeof (hermon_hw_udav_t) >> 3;
678 for (i = 0; i < size; i++) {
679 data_old = ((uint64_t *)&old_udav)[i];
682 * Apply mask to change only the relevant values.
684 if (i == 0) {
685 data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
686 } else if (i == 1) {
687 data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
688 } else {
689 data_old = 0;
692 /* Store the updated values to the UDAV */
693 ((uint64_t *)ah->ah_udav)[i] |= data_old;
697 * Put the valid PD number back into the UDAV entry, as it
698 * might have been clobbered above.
700 ah->ah_udav->pd = old_udav.pd;
703 mutex_exit(&ah->ah_lock);
704 return (DDI_SUCCESS);
708 * hermon_mcg_attach()
709 * Context: Can be called only from user or kernel context.
712 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
713 ib_lid_t lid)
715 hermon_rsrc_t *rsrc;
716 hermon_hw_mcg_t *mcg_entry;
717 hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
718 hermon_mcghdl_t mcg, newmcg;
719 uint64_t mgid_hash;
720 uint32_t end_indx;
721 int status;
722 uint_t qp_found;
725 * It is only allowed to attach MCG to UD queue pairs. Verify
726 * that the intended QP is of the appropriate transport type
728 if (qp->qp_serv_type != HERMON_QP_UD) {
729 return (IBT_QP_SRV_TYPE_INVALID);
733 * Check for invalid Multicast DLID. Specifically, all Multicast
734 * LIDs should be within a well defined range. If the specified LID
735 * is outside of that range, then return an error.
737 if (hermon_mlid_is_valid(lid) == 0) {
738 return (IBT_MC_MLID_INVALID);
741 * Check for invalid Multicast GID. All Multicast GIDs should have
742 * a well-defined pattern of bits and flags that are allowable. If
743 * the specified GID does not meet the criteria, then return an error.
745 if (hermon_mgid_is_valid(gid) == 0) {
746 return (IBT_MC_MGID_INVALID);
750 * Compute the MGID hash value. Since the MCG table is arranged as
751 * a number of separate hash chains, this operation converts the
752 * specified MGID into the starting index of an entry in the hash
753 * table (i.e. the index for the start of the appropriate hash chain).
754 * Subsequent operations below will walk the chain searching for the
755 * right place to add this new QP.
757 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
758 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
759 if (status != HERMON_CMD_SUCCESS) {
760 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
761 status);
762 if (status == HERMON_CMD_INVALID_STATUS) {
763 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
765 return (ibc_get_ci_failure(0));
769 * Grab the multicast group mutex. Then grab the pre-allocated
770 * temporary buffer used for holding and/or modifying MCG entries.
771 * Zero out the temporary MCG entry before we begin.
773 mutex_enter(&state->hs_mcglock);
774 mcg_entry = state->hs_mcgtmp;
775 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
776 bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
779 * Walk through the array of MCG entries starting at "mgid_hash".
780 * Try to find the appropriate place for this new QP to be added.
781 * This could happen when the first entry of the chain has MGID == 0
782 * (which means that the hash chain is empty), or because we find
783 * an entry with the same MGID (in which case we'll add the QP to
784 * that MCG), or because we come to the end of the chain (in which
785 * case this is the first QP being added to the multicast group that
786 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine
787 * walks the list and returns an index into the MCG table. The entry
788 * at this index is then checked to determine which case we have
789 * fallen into (see below). Note: We are using the "shadow" MCG
790 * list (of hermon_mcg_t structs) for this lookup because the real
791 * MCG entries are in hardware (and the lookup process would be much
792 * more time consuming).
794 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
795 mcg = &state->hs_mcghdl[end_indx];
798 * If MGID == 0, then the hash chain is empty. Just fill in the
799 * current entry. Note: No need to allocate an MCG table entry
800 * as all the hash chain "heads" are already preallocated.
802 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
804 /* Fill in the current entry in the "shadow" MCG list */
805 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
808 * Try to add the new QP number to the list. This (and the
809 * above) routine fills in a temporary MCG. The "mcg_entry"
810 * and "mcg_entry_qplist" pointers simply point to different
811 * offsets within the same temporary copy of the MCG (for
812 * convenience). Note: If this fails, we need to invalidate
813 * the entries we've already put into the "shadow" list entry
814 * above.
816 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
817 &qp_found);
818 if (status != DDI_SUCCESS) {
819 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
820 mutex_exit(&state->hs_mcglock);
821 return (status);
823 if (!qp_found)
824 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
825 /* set the member count */
828 * Once the temporary MCG has been filled in, write the entry
829 * into the appropriate location in the Hermon MCG entry table.
830 * If it's successful, then drop the lock and return success.
831 * Note: In general, this operation shouldn't fail. If it
832 * does, then it is an indication that something (probably in
833 * HW, but maybe in SW) has gone seriously wrong. We still
834 * want to zero out the entries that we've filled in above
835 * (in the hermon_mcg_setup_new_hdr() routine).
837 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
838 HERMON_CMD_NOSLEEP_SPIN);
839 if (status != HERMON_CMD_SUCCESS) {
840 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
841 mutex_exit(&state->hs_mcglock);
842 HERMON_WARNING(state, "failed to write MCG entry");
843 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
844 "%08x\n", status);
845 if (status == HERMON_CMD_INVALID_STATUS) {
846 hermon_fm_ereport(state, HCA_SYS_ERR,
847 HCA_ERR_SRV_LOST);
849 return (ibc_get_ci_failure(0));
853 * Now that we know all the Hermon firmware accesses have been
854 * successful, we update the "shadow" MCG entry by incrementing
855 * the "number of attached QPs" count.
857 * We increment only if the QP is not already part of the
858 * MCG by checking the 'qp_found' flag returned from the
859 * qplist_add above.
861 if (!qp_found) {
862 mcg->mcg_num_qps++;
865 * Increment the refcnt for this QP. Because the QP
866 * was added to this MCG, the refcnt must be
867 * incremented.
869 hermon_qp_mcg_refcnt_inc(qp);
873 * We drop the lock and return success.
875 mutex_exit(&state->hs_mcglock);
876 return (DDI_SUCCESS);
880 * If the specified MGID matches the MGID in the current entry, then
881 * we need to try to add the QP to the current MCG entry. In this
882 * case, it means that we need to read the existing MCG entry (into
883 * the temporary MCG), add the new QP number to the temporary entry
884 * (using the same method we used above), and write the entry back
885 * to the hardware (same as above).
887 if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
888 (mcg->mcg_mgid_l == gid.gid_guid)) {
891 * Read the current MCG entry into the temporary MCG. Note:
892 * In general, this operation shouldn't fail. If it does,
893 * then it is an indication that something (probably in HW,
894 * but maybe in SW) has gone seriously wrong.
896 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
897 HERMON_CMD_NOSLEEP_SPIN);
898 if (status != HERMON_CMD_SUCCESS) {
899 mutex_exit(&state->hs_mcglock);
900 HERMON_WARNING(state, "failed to read MCG entry");
901 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
902 "%08x\n", status);
903 if (status == HERMON_CMD_INVALID_STATUS) {
904 hermon_fm_ereport(state, HCA_SYS_ERR,
905 HCA_ERR_SRV_LOST);
907 return (ibc_get_ci_failure(0));
911 * Try to add the new QP number to the list. This routine
912 * fills in the necessary pieces of the temporary MCG. The
913 * "mcg_entry_qplist" pointer is used to point to the portion
914 * of the temporary MCG that holds the QP numbers.
916 * Note: hermon_mcg_qplist_add() returns SUCCESS if it
917 * already found the QP in the list. In this case, the QP is
918 * not added on to the list again. Check the flag 'qp_found'
919 * if this value is needed to be known.
922 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
923 &qp_found);
924 if (status != DDI_SUCCESS) {
925 mutex_exit(&state->hs_mcglock);
926 return (status);
928 if (!qp_found)
929 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
930 /* set the member count */
933 * Once the temporary MCG has been updated, write the entry
934 * into the appropriate location in the Hermon MCG entry table.
935 * If it's successful, then drop the lock and return success.
936 * Note: In general, this operation shouldn't fail. If it
937 * does, then it is an indication that something (probably in
938 * HW, but maybe in SW) has gone seriously wrong.
940 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
941 HERMON_CMD_NOSLEEP_SPIN);
942 if (status != HERMON_CMD_SUCCESS) {
943 mutex_exit(&state->hs_mcglock);
944 HERMON_WARNING(state, "failed to write MCG entry");
945 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
946 "%08x\n", status);
947 if (status == HERMON_CMD_INVALID_STATUS) {
948 hermon_fm_ereport(state, HCA_SYS_ERR,
949 HCA_ERR_SRV_LOST);
951 return (ibc_get_ci_failure(0));
955 * Now that we know all the Hermon firmware accesses have been
956 * successful, we update the current "shadow" MCG entry by
957 * incrementing the "number of attached QPs" count.
959 * We increment only if the QP is not already part of the
960 * MCG by checking the 'qp_found' flag returned
961 * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
963 if (!qp_found) {
964 mcg->mcg_num_qps++;
967 * Increment the refcnt for this QP. Because the QP
968 * was added to this MCG, the refcnt must be
969 * incremented.
971 hermon_qp_mcg_refcnt_inc(qp);
975 * We drop the lock and return success.
977 mutex_exit(&state->hs_mcglock);
978 return (DDI_SUCCESS);
982 * If we've reached here, then we're at the end of the hash chain.
983 * We need to allocate a new MCG entry, fill it in, write it to Hermon,
984 * and update the previous entry to link the new one to the end of the
985 * chain.
989 * Allocate an MCG table entry. This will be filled in with all
990 * the necessary parameters to define the multicast group. Then it
991 * will be written to the hardware in the next-to-last step below.
993 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
994 if (status != DDI_SUCCESS) {
995 mutex_exit(&state->hs_mcglock);
996 return (IBT_INSUFF_RESOURCE);
1000 * Fill in the new entry in the "shadow" MCG list. Note: Just as
1001 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1002 * of the temporary MCG entry (the rest of which will be filled in by
1003 * hermon_mcg_qplist_add() below)
1005 newmcg = &state->hs_mcghdl[rsrc->hr_indx];
1006 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
1009 * Try to add the new QP number to the list. This routine fills in
1010 * the final necessary pieces of the temporary MCG. The
1011 * "mcg_entry_qplist" pointer is used to point to the portion of the
1012 * temporary MCG that holds the QP numbers. If we fail here, we
1013 * must undo the previous resource allocation.
1015 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1016 * found the QP in the list. In this case, the QP is not added on to
1017 * the list again. Check the flag 'qp_found' if this value is needed
1018 * to be known.
1020 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
1021 &qp_found);
1022 if (status != DDI_SUCCESS) {
1023 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1024 hermon_rsrc_free(state, &rsrc);
1025 mutex_exit(&state->hs_mcglock);
1026 return (status);
1028 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1029 /* set the member count */
1032 * Once the temporary MCG has been updated, write the entry into the
1033 * appropriate location in the Hermon MCG entry table. If this is
1034 * successful, then we need to chain the previous entry to this one.
1035 * Note: In general, this operation shouldn't fail. If it does, then
1036 * it is an indication that something (probably in HW, but maybe in
1037 * SW) has gone seriously wrong.
1039 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1040 HERMON_CMD_NOSLEEP_SPIN);
1041 if (status != HERMON_CMD_SUCCESS) {
1042 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1043 hermon_rsrc_free(state, &rsrc);
1044 mutex_exit(&state->hs_mcglock);
1045 HERMON_WARNING(state, "failed to write MCG entry");
1046 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1047 status);
1048 if (status == HERMON_CMD_INVALID_STATUS) {
1049 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1051 return (ibc_get_ci_failure(0));
1055 * Now read the current MCG entry (the one previously at the end of
1056 * hash chain) into the temporary MCG. We are going to update its
1057 * "next_gid_indx" now and write the entry back to the MCG table.
1058 * Note: In general, this operation shouldn't fail. If it does, then
1059 * it is an indication that something (probably in HW, but maybe in SW)
1060 * has gone seriously wrong. We will free up the MCG entry resource,
1061 * but we will not undo the previously written MCG entry in the HW.
1062 * This is OK, though, because the MCG entry is not currently attached
1063 * to any hash chain.
1065 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1066 HERMON_CMD_NOSLEEP_SPIN);
1067 if (status != HERMON_CMD_SUCCESS) {
1068 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1069 hermon_rsrc_free(state, &rsrc);
1070 mutex_exit(&state->hs_mcglock);
1071 HERMON_WARNING(state, "failed to read MCG entry");
1072 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1073 status);
1074 if (status == HERMON_CMD_INVALID_STATUS) {
1075 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1077 return (ibc_get_ci_failure(0));
1081 * Finally, we update the "next_gid_indx" field in the temporary MCG
1082 * and attempt to write the entry back into the Hermon MCG table. If
1083 * this succeeds, then we update the "shadow" list to reflect the
1084 * change, drop the lock, and return success. Note: In general, this
1085 * operation shouldn't fail. If it does, then it is an indication
1086 * that something (probably in HW, but maybe in SW) has gone seriously
1087 * wrong. Just as we do above, we will free up the MCG entry resource,
1088 * but we will not try to undo the previously written MCG entry. This
1089 * is OK, though, because (since we failed here to update the end of
1090 * the chain) that other entry is not currently attached to any chain.
1092 mcg_entry->next_gid_indx = rsrc->hr_indx;
1093 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1094 HERMON_CMD_NOSLEEP_SPIN);
1095 if (status != HERMON_CMD_SUCCESS) {
1096 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1097 hermon_rsrc_free(state, &rsrc);
1098 mutex_exit(&state->hs_mcglock);
1099 HERMON_WARNING(state, "failed to write MCG entry");
1100 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1101 status);
1102 if (status == HERMON_CMD_INVALID_STATUS) {
1103 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1105 return (ibc_get_ci_failure(0));
1107 mcg = &state->hs_mcghdl[end_indx];
1108 mcg->mcg_next_indx = rsrc->hr_indx;
1111 * Now that we know all the Hermon firmware accesses have been
1112 * successful, we update the new "shadow" MCG entry by incrementing
1113 * the "number of attached QPs" count. Then we drop the lock and
1114 * return success.
1116 newmcg->mcg_num_qps++;
1119 * Increment the refcnt for this QP. Because the QP
1120 * was added to this MCG, the refcnt must be
1121 * incremented.
1123 hermon_qp_mcg_refcnt_inc(qp);
1125 mutex_exit(&state->hs_mcglock);
1126 return (DDI_SUCCESS);
1131 * hermon_mcg_detach()
1132 * Context: Can be called only from user or kernel context.
1135 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1136 ib_lid_t lid)
1138 hermon_hw_mcg_t *mcg_entry;
1139 hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
1140 hermon_mcghdl_t mcg;
1141 uint64_t mgid_hash;
1142 uint32_t end_indx, prev_indx;
1143 int status;
1146 * Check for invalid Multicast DLID. Specifically, all Multicast
1147 * LIDs should be within a well defined range. If the specified LID
1148 * is outside of that range, then return an error.
1150 if (hermon_mlid_is_valid(lid) == 0) {
1151 return (IBT_MC_MLID_INVALID);
1155 * Compute the MGID hash value. As described above, the MCG table is
1156 * arranged as a number of separate hash chains. This operation
1157 * converts the specified MGID into the starting index of an entry in
1158 * the hash table (i.e. the index for the start of the appropriate
1159 * hash chain). Subsequent operations below will walk the chain
1160 * searching for a matching entry from which to attempt to remove
1161 * the specified QP.
1163 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1164 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1165 if (status != HERMON_CMD_SUCCESS) {
1166 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1167 status);
1168 if (status == HERMON_CMD_INVALID_STATUS) {
1169 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1171 return (ibc_get_ci_failure(0));
1175 * Grab the multicast group mutex. Then grab the pre-allocated
1176 * temporary buffer used for holding and/or modifying MCG entries.
1178 mutex_enter(&state->hs_mcglock);
1179 mcg_entry = state->hs_mcgtmp;
1180 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1183 * Walk through the array of MCG entries starting at "mgid_hash".
1184 * Try to find an MCG entry with a matching MGID. The
1185 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1186 * index into the MCG table. The entry at this index is checked to
1187 * determine whether it is a match or not. If it is a match, then
1188 * we continue on to attempt to remove the QP from the MCG. If it
1189 * is not a match (or not a valid MCG entry), then we return an error.
1191 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1192 mcg = &state->hs_mcghdl[end_indx];
1195 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1196 * does not match the MGID in the current entry, then return
1197 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1198 * valid).
1200 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1201 ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1202 (mcg->mcg_mgid_l != gid.gid_guid))) {
1203 mutex_exit(&state->hs_mcglock);
1204 return (IBT_MC_MGID_INVALID);
1208 * Read the current MCG entry into the temporary MCG. Note: In
1209 * general, this operation shouldn't fail. If it does, then it is
1210 * an indication that something (probably in HW, but maybe in SW)
1211 * has gone seriously wrong.
1213 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1214 HERMON_CMD_NOSLEEP_SPIN);
1215 if (status != HERMON_CMD_SUCCESS) {
1216 mutex_exit(&state->hs_mcglock);
1217 HERMON_WARNING(state, "failed to read MCG entry");
1218 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1219 status);
1220 if (status == HERMON_CMD_INVALID_STATUS) {
1221 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1223 return (ibc_get_ci_failure(0));
1227 * Search the QP number list for a match. If a match is found, then
1228 * remove the entry from the QP list. Otherwise, if no match is found,
1229 * return an error.
1231 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1232 if (status != DDI_SUCCESS) {
1233 mutex_exit(&state->hs_mcglock);
1234 return (status);
1238 * Decrement the MCG count for this QP. When the 'qp_mcg'
1239 * field becomes 0, then this QP is no longer a member of any
1240 * MCG.
1242 hermon_qp_mcg_refcnt_dec(qp);
1245 * If the current MCG's QP number list is about to be made empty
1246 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1247 * chain. Otherwise, just write the updated MCG entry back to the
1248 * hardware. In either case, once we successfully update the hardware
1249 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1250 * count (or zero out the entire "shadow" list entry) before returning
1251 * success. Note: Zeroing out the "shadow" list entry is done
1252 * inside of hermon_mcg_hash_list_remove().
1254 if (mcg->mcg_num_qps == 1) {
1256 /* Remove an MCG entry from the hash chain */
1257 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1258 mcg_entry);
1259 if (status != DDI_SUCCESS) {
1260 mutex_exit(&state->hs_mcglock);
1261 return (status);
1264 } else {
1266 * Write the updated MCG entry back to the Hermon MCG table.
1267 * If this succeeds, then we update the "shadow" list to
1268 * reflect the change (i.e. decrement the "mcg_num_qps"),
1269 * drop the lock, and return success. Note: In general,
1270 * this operation shouldn't fail. If it does, then it is an
1271 * indication that something (probably in HW, but maybe in SW)
1272 * has gone seriously wrong.
1274 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1275 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1276 HERMON_CMD_NOSLEEP_SPIN);
1277 if (status != HERMON_CMD_SUCCESS) {
1278 mutex_exit(&state->hs_mcglock);
1279 HERMON_WARNING(state, "failed to write MCG entry");
1280 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1281 "%08x\n", status);
1282 if (status == HERMON_CMD_INVALID_STATUS) {
1283 hermon_fm_ereport(state, HCA_SYS_ERR,
1284 HCA_ERR_SRV_LOST);
1286 return (ibc_get_ci_failure(0));
1288 mcg->mcg_num_qps--;
1291 mutex_exit(&state->hs_mcglock);
1292 return (DDI_SUCCESS);
1296 * hermon_qp_mcg_refcnt_inc()
1297 * Context: Can be called from interrupt or base context.
1299 static void
1300 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1302 /* Increment the QP's MCG reference count */
1303 mutex_enter(&qp->qp_lock);
1304 qp->qp_mcg_refcnt++;
1305 mutex_exit(&qp->qp_lock);
1310 * hermon_qp_mcg_refcnt_dec()
1311 * Context: Can be called from interrupt or base context.
1313 static void
1314 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1316 /* Decrement the QP's MCG reference count */
1317 mutex_enter(&qp->qp_lock);
1318 qp->qp_mcg_refcnt--;
1319 mutex_exit(&qp->qp_lock);
1324 * hermon_mcg_qplist_add()
1325 * Context: Can be called from interrupt or base context.
1327 static int
1328 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1329 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1330 uint_t *qp_found)
1332 uint_t qplist_indx;
1334 ASSERT(MUTEX_HELD(&state->hs_mcglock));
1336 qplist_indx = mcg->mcg_num_qps;
1339 * Determine if we have exceeded the maximum number of QP per
1340 * multicast group. If we have, then return an error
1342 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1343 return (IBT_HCA_MCG_QP_EXCEEDED);
1347 * Determine if the QP is already attached to this MCG table. If it
1348 * is, then we break out and treat this operation as a NO-OP
1350 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1351 qplist_indx++) {
1352 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1353 break;
1358 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1359 * return SUCCESS in this case, but the qplist will not have been
1360 * updated because the QP was already on the list.
1362 if (qplist_indx < mcg->mcg_num_qps) {
1363 *qp_found = 1;
1364 } else {
1366 * Otherwise, append the new QP number to the end of the
1367 * current QP list. Note: We will increment the "mcg_num_qps"
1368 * field on the "shadow" MCG list entry later (after we know
1369 * that all necessary Hermon firmware accesses have been
1370 * successful).
1372 * Set 'qp_found' to 0 so we know the QP was added on to the
1373 * list for sure.
1375 mcg_qplist[qplist_indx].qpn =
1376 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1377 *qp_found = 0;
1380 return (DDI_SUCCESS);
1386 * hermon_mcg_qplist_remove()
1387 * Context: Can be called from interrupt or base context.
1389 static int
1390 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1391 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1393 uint_t i, qplist_indx;
1396 * Search the MCG QP list for a matching QPN. When
1397 * it's found, we swap the last entry with the current
1398 * one, set the last entry to zero, decrement the last
1399 * entry, and return. If it's not found, then it's
1400 * and error.
1402 qplist_indx = mcg->mcg_num_qps;
1403 for (i = 0; i < qplist_indx; i++) {
1404 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1405 mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1406 mcg_qplist[qplist_indx - 1].qpn = 0;
1408 return (DDI_SUCCESS);
1412 return (IBT_QP_HDL_INVALID);
1417 * hermon_mcg_walk_mgid_hash()
1418 * Context: Can be called from interrupt or base context.
1420 static uint_t
1421 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1422 ib_gid_t mgid, uint_t *p_indx)
1424 hermon_mcghdl_t curr_mcghdl;
1425 uint_t curr_indx, prev_indx;
1427 ASSERT(MUTEX_HELD(&state->hs_mcglock));
1429 /* Start at the head of the hash chain */
1430 curr_indx = (uint_t)start_indx;
1431 prev_indx = curr_indx;
1432 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1434 /* If the first entry in the chain has MGID == 0, then stop */
1435 if ((curr_mcghdl->mcg_mgid_h == 0) &&
1436 (curr_mcghdl->mcg_mgid_l == 0)) {
1437 goto end_mgid_hash_walk;
1440 /* If the first entry in the chain matches the MGID, then stop */
1441 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1442 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1443 goto end_mgid_hash_walk;
1446 /* Otherwise, walk the hash chain looking for a match */
1447 while (curr_mcghdl->mcg_next_indx != 0) {
1448 prev_indx = curr_indx;
1449 curr_indx = curr_mcghdl->mcg_next_indx;
1450 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1452 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1453 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1454 break;
1458 end_mgid_hash_walk:
1460 * If necessary, return the index of the previous entry too. This
1461 * is primarily used for detaching a QP from a multicast group. It
1462 * may be necessary, in that case, to delete an MCG entry from the
1463 * hash chain and having the index of the previous entry is helpful.
1465 if (p_indx != NULL) {
1466 *p_indx = prev_indx;
1468 return (curr_indx);
1473 * hermon_mcg_setup_new_hdr()
1474 * Context: Can be called from interrupt or base context.
1476 static void
1477 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1478 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1481 * Fill in the fields of the "shadow" entry used by software
1482 * to track MCG hardware entry
1484 mcg->mcg_mgid_h = mgid.gid_prefix;
1485 mcg->mcg_mgid_l = mgid.gid_guid;
1486 mcg->mcg_rsrcp = mcg_rsrc;
1487 mcg->mcg_next_indx = 0;
1488 mcg->mcg_num_qps = 0;
1491 * Fill the header fields of the MCG entry (in the temporary copy)
1493 mcg_hdr->mgid_h = mgid.gid_prefix;
1494 mcg_hdr->mgid_l = mgid.gid_guid;
1495 mcg_hdr->next_gid_indx = 0;
1500 * hermon_mcg_hash_list_remove()
1501 * Context: Can be called only from user or kernel context.
1503 static int
1504 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1505 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1507 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg;
1508 uint_t next_indx;
1509 int status;
1511 /* Get the pointer to "shadow" list for current entry */
1512 curr_mcg = &state->hs_mcghdl[curr_indx];
1515 * If this is the first entry on a hash chain, then attempt to replace
1516 * the entry with the next entry on the chain. If there are no
1517 * subsequent entries on the chain, then this is the only entry and
1518 * should be invalidated.
1520 if (curr_indx == prev_indx) {
1523 * If this is the only entry on the chain, then invalidate it.
1524 * Note: Invalidating an MCG entry means writing all zeros
1525 * to the entry. This is only necessary for those MCG
1526 * entries that are the "head" entries of the individual hash
1527 * chains. Regardless of whether this operation returns
1528 * success or failure, return that result to the caller.
1530 next_indx = curr_mcg->mcg_next_indx;
1531 if (next_indx == 0) {
1532 status = hermon_mcg_entry_invalidate(state, mcg_entry,
1533 curr_indx);
1534 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1535 return (status);
1539 * Otherwise, this is just the first entry on the chain, so
1540 * grab the next one
1542 next_mcg = &state->hs_mcghdl[next_indx];
1545 * Read the next MCG entry into the temporary MCG. Note:
1546 * In general, this operation shouldn't fail. If it does,
1547 * then it is an indication that something (probably in HW,
1548 * but maybe in SW) has gone seriously wrong.
1550 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1551 HERMON_CMD_NOSLEEP_SPIN);
1552 if (status != HERMON_CMD_SUCCESS) {
1553 HERMON_WARNING(state, "failed to read MCG entry");
1554 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1555 "%08x\n", status);
1556 if (status == HERMON_CMD_INVALID_STATUS) {
1557 hermon_fm_ereport(state, HCA_SYS_ERR,
1558 HCA_ERR_SRV_LOST);
1560 return (ibc_get_ci_failure(0));
1564 * Copy/Write the temporary MCG back to the hardware MCG list
1565 * using the current index. This essentially removes the
1566 * current MCG entry from the list by writing over it with
1567 * the next one. If this is successful, then we can do the
1568 * same operation for the "shadow" list. And we can also
1569 * free up the Hermon MCG entry resource that was associated
1570 * with the (old) next entry. Note: In general, this
1571 * operation shouldn't fail. If it does, then it is an
1572 * indication that something (probably in HW, but maybe in SW)
1573 * has gone seriously wrong.
1575 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1576 HERMON_CMD_NOSLEEP_SPIN);
1577 if (status != HERMON_CMD_SUCCESS) {
1578 HERMON_WARNING(state, "failed to write MCG entry");
1579 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1580 "%08x\n", status);
1581 if (status == HERMON_CMD_INVALID_STATUS) {
1582 hermon_fm_ereport(state, HCA_SYS_ERR,
1583 HCA_ERR_SRV_LOST);
1585 return (ibc_get_ci_failure(0));
1589 * Copy all the software tracking information from the next
1590 * entry on the "shadow" MCG list into the current entry on
1591 * the list. Then invalidate (zero out) the other "shadow"
1592 * list entry.
1594 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1595 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1598 * Free up the Hermon MCG entry resource used by the "next"
1599 * MCG entry. That resource is no longer needed by any
1600 * MCG entry which is first on a hash chain (like the "next"
1601 * entry has just become).
1603 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1605 return (DDI_SUCCESS);
1609 * Else if this is the last entry on the hash chain (or a middle
1610 * entry, then we update the previous entry's "next_gid_index" field
1611 * to make it point instead to the next entry on the chain. By
1612 * skipping over the removed entry in this way, we can then free up
1613 * any resources associated with the current entry. Note: We don't
1614 * need to invalidate the "skipped over" hardware entry because it
1615 * will no be longer connected to any hash chains, and if/when it is
1616 * finally re-used, it will be written with entirely new values.
1620 * Read the next MCG entry into the temporary MCG. Note: In general,
1621 * this operation shouldn't fail. If it does, then it is an
1622 * indication that something (probably in HW, but maybe in SW) has
1623 * gone seriously wrong.
1625 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1626 HERMON_CMD_NOSLEEP_SPIN);
1627 if (status != HERMON_CMD_SUCCESS) {
1628 HERMON_WARNING(state, "failed to read MCG entry");
1629 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1630 status);
1631 if (status == HERMON_CMD_INVALID_STATUS) {
1632 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1634 return (ibc_get_ci_failure(0));
1638 * Finally, we update the "next_gid_indx" field in the temporary MCG
1639 * and attempt to write the entry back into the Hermon MCG table. If
1640 * this succeeds, then we update the "shadow" list to reflect the
1641 * change, free up the Hermon MCG entry resource that was associated
1642 * with the current entry, and return success. Note: In general,
1643 * this operation shouldn't fail. If it does, then it is an indication
1644 * that something (probably in HW, but maybe in SW) has gone seriously
1645 * wrong.
1647 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1648 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1649 HERMON_CMD_NOSLEEP_SPIN);
1650 if (status != HERMON_CMD_SUCCESS) {
1651 HERMON_WARNING(state, "failed to write MCG entry");
1652 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1653 status);
1654 if (status == HERMON_CMD_INVALID_STATUS) {
1655 hermon_fm_ereport(state, HCA_SYS_ERR,
1656 HCA_ERR_SRV_LOST);
1658 return (ibc_get_ci_failure(0));
1662 * Get the pointer to the "shadow" MCG list entry for the previous
1663 * MCG. Update its "mcg_next_indx" to point to the next entry
1664 * the one after the current entry. Note: This next index may be
1665 * zero, indicating the end of the list.
1667 prev_mcg = &state->hs_mcghdl[prev_indx];
1668 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1671 * Free up the Hermon MCG entry resource used by the current entry.
1672 * This resource is no longer needed because the chain now skips over
1673 * the current entry. Then invalidate (zero out) the current "shadow"
1674 * list entry.
1676 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1677 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1679 return (DDI_SUCCESS);
1684 * hermon_mcg_entry_invalidate()
1685 * Context: Can be called only from user or kernel context.
1687 static int
1688 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1689 uint_t indx)
1691 int status;
1694 * Invalidate the hardware MCG entry by zeroing out this temporary
1695 * MCG and writing it the the hardware. Note: In general, this
1696 * operation shouldn't fail. If it does, then it is an indication
1697 * that something (probably in HW, but maybe in SW) has gone seriously
1698 * wrong.
1700 bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1701 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1702 HERMON_CMD_NOSLEEP_SPIN);
1703 if (status != HERMON_CMD_SUCCESS) {
1704 HERMON_WARNING(state, "failed to write MCG entry");
1705 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1706 status);
1707 if (status == HERMON_CMD_INVALID_STATUS) {
1708 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1710 return (ibc_get_ci_failure(0));
1713 return (DDI_SUCCESS);
1718 * hermon_mgid_is_valid()
1719 * Context: Can be called from interrupt or base context.
1721 static int
1722 hermon_mgid_is_valid(ib_gid_t gid)
1724 uint_t topbits, flags, scope;
1727 * According to IBA 1.1 specification (section 4.1.1) a valid
1728 * "multicast GID" must have its top eight bits set to all ones
1730 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1731 HERMON_MCG_TOPBITS_MASK;
1732 if (topbits != HERMON_MCG_TOPBITS) {
1733 return (0);
1737 * The next 4 bits are the "flag" bits. These are valid only
1738 * if they are "0" (which correspond to permanently assigned/
1739 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1740 * multicast GIDs). All other values are reserved.
1742 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1743 HERMON_MCG_FLAGS_MASK;
1744 if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1745 (flags == HERMON_MCG_FLAGS_NONPERM))) {
1746 return (0);
1750 * The next 4 bits are the "scope" bits. These are valid only
1751 * if they are "2" (Link-local), "5" (Site-local), "8"
1752 * (Organization-local) or "E" (Global). All other values
1753 * are reserved (or currently unassigned).
1755 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1756 HERMON_MCG_SCOPE_MASK;
1757 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1758 (scope == HERMON_MCG_SCOPE_SITELOC) ||
1759 (scope == HERMON_MCG_SCOPE_ORGLOC) ||
1760 (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1761 return (0);
1765 * If it passes all of the above checks, then we will consider it
1766 * a valid multicast GID.
1768 return (1);
1773 * hermon_mlid_is_valid()
1774 * Context: Can be called from interrupt or base context.
1776 static int
1777 hermon_mlid_is_valid(ib_lid_t lid)
1780 * According to IBA 1.1 specification (section 4.1.1) a valid
1781 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1783 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1784 return (0);
1787 return (1);
1792 * hermon_pd_alloc()
1793 * Context: Can be called only from user or kernel context.
1796 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1798 hermon_rsrc_t *rsrc;
1799 hermon_pdhdl_t pd;
1800 int status;
1803 * Allocate the software structure for tracking the protection domain
1804 * (i.e. the Hermon Protection Domain handle). By default each PD
1805 * structure will have a unique PD number assigned to it. All that
1806 * is necessary is for software to initialize the PD reference count
1807 * (to zero) and return success.
1809 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1810 if (status != DDI_SUCCESS) {
1811 return (IBT_INSUFF_RESOURCE);
1813 pd = (hermon_pdhdl_t)rsrc->hr_addr;
1814 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1816 pd->pd_refcnt = 0;
1817 *pdhdl = pd;
1819 return (DDI_SUCCESS);
1824 * hermon_pd_free()
1825 * Context: Can be called only from user or kernel context.
1828 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1830 hermon_rsrc_t *rsrc;
1831 hermon_pdhdl_t pd;
1834 * Pull all the necessary information from the Hermon Protection Domain
1835 * handle. This is necessary here because the resource for the
1836 * PD is going to be freed up as part of this operation.
1838 pd = *pdhdl;
1839 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1840 rsrc = pd->pd_rsrcp;
1843 * Check the PD reference count. If the reference count is non-zero,
1844 * then it means that this protection domain is still referenced by
1845 * some memory region, queue pair, address handle, or other IB object
1846 * If it is non-zero, then return an error. Otherwise, free the
1847 * Hermon resource and return success.
1849 if (pd->pd_refcnt != 0) {
1850 return (IBT_PD_IN_USE);
1853 /* Free the Hermon Protection Domain handle */
1854 hermon_rsrc_free(state, &rsrc);
1856 /* Set the pdhdl pointer to NULL and return success */
1857 *pdhdl = (hermon_pdhdl_t)NULL;
1859 return (DDI_SUCCESS);
1864 * hermon_pd_refcnt_inc()
1865 * Context: Can be called from interrupt or base context.
1867 void
1868 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1870 /* Increment the protection domain's reference count */
1871 atomic_inc_32(&pd->pd_refcnt);
1876 * hermon_pd_refcnt_dec()
1877 * Context: Can be called from interrupt or base context.
1879 void
1880 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1882 /* Decrement the protection domain's reference count */
1883 atomic_dec_32(&pd->pd_refcnt);
1888 * hermon_port_query()
1889 * Context: Can be called only from user or kernel context.
1892 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1894 sm_portinfo_t portinfo;
1895 sm_guidinfo_t guidinfo;
1896 sm_pkey_table_t pkeytable;
1897 ib_gid_t *sgid;
1898 uint_t sgid_max, pkey_max, tbl_size;
1899 int i, j, indx, status;
1900 ib_pkey_t *pkeyp;
1901 ib_guid_t *guidp;
1903 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1904 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1906 /* Validate that specified port number is legal */
1907 if (!hermon_portnum_is_valid(state, port)) {
1908 return (IBT_HCA_PORT_INVALID);
1910 pkeyp = state->hs_pkey[port - 1];
1911 guidp = state->hs_guid[port - 1];
1914 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1915 * to the firmware (for the specified port number). This returns
1916 * a full PortInfo MAD (in "portinfo") which we subsequently
1917 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1918 * to the IBTF.
1920 status = hermon_getportinfo_cmd_post(state, port,
1921 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1922 if (status != HERMON_CMD_SUCCESS) {
1923 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1924 "failed: %08x\n", port, status);
1925 if (status == HERMON_CMD_INVALID_STATUS) {
1926 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1928 return (ibc_get_ci_failure(0));
1932 * Parse the PortInfo MAD and fill in the IBTF structure
1934 pi->p_base_lid = portinfo.LID;
1935 pi->p_qkey_violations = portinfo.Q_KeyViolations;
1936 pi->p_pkey_violations = portinfo.P_KeyViolations;
1937 pi->p_sm_sl = portinfo.MasterSMSL;
1938 pi->p_sm_lid = portinfo.MasterSMLID;
1939 pi->p_linkstate = portinfo.PortState;
1940 pi->p_port_num = portinfo.LocalPortNum;
1941 pi->p_phys_state = portinfo.PortPhysicalState;
1942 pi->p_width_supported = portinfo.LinkWidthSupported;
1943 pi->p_width_enabled = portinfo.LinkWidthEnabled;
1944 pi->p_width_active = portinfo.LinkWidthActive;
1945 pi->p_speed_supported = portinfo.LinkSpeedSupported;
1946 pi->p_speed_enabled = portinfo.LinkSpeedEnabled;
1947 pi->p_speed_active = portinfo.LinkSpeedActive;
1948 pi->p_mtu = portinfo.MTUCap;
1949 pi->p_lmc = portinfo.LMC;
1950 pi->p_max_vl = portinfo.VLCap;
1951 pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1952 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1953 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1954 pi->p_sgid_tbl_sz = (1 << tbl_size);
1955 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1956 pi->p_pkey_tbl_sz = (1 << tbl_size);
1957 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1960 * Convert InfiniBand-defined port capability flags to the format
1961 * specified by the IBTF
1963 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1964 pi->p_capabilities |= IBT_PORT_CAP_SM;
1965 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1966 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1967 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1968 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1969 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1970 pi->p_capabilities |= IBT_PORT_CAP_DM;
1971 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1972 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1973 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1974 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1977 * Fill in the SGID table. Since the only access to the Hermon
1978 * GID tables is through the firmware's MAD_IFC interface, we
1979 * post as many GetGUIDInfo MADs as necessary to read in the entire
1980 * contents of the SGID table (for the specified port). Note: The
1981 * GetGUIDInfo command only gets eight GUIDs per operation. These
1982 * GUIDs are then appended to the GID prefix for the port (from the
1983 * GetPortInfo above) to form the entire SGID table.
1985 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1986 status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1987 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1988 if (status != HERMON_CMD_SUCCESS) {
1989 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1990 "command failed: %08x\n", port, status);
1991 if (status == HERMON_CMD_INVALID_STATUS) {
1992 hermon_fm_ereport(state, HCA_SYS_ERR,
1993 HCA_ERR_SRV_LOST);
1995 return (ibc_get_ci_failure(0));
1998 /* Figure out how many of the entries are valid */
1999 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
2000 for (j = 0; j < sgid_max; j++) {
2001 indx = (i + j);
2002 sgid = &pi->p_sgid_tbl[indx];
2003 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
2004 sgid->gid_prefix = portinfo.GidPrefix;
2005 guidp[indx] = sgid->gid_guid =
2006 guidinfo.GUIDBlocks[j];
2011 * Fill in the PKey table. Just as for the GID tables above, the
2012 * only access to the Hermon PKey tables is through the firmware's
2013 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
2014 * to read in the entire contents of the PKey table (for the specified
2015 * port). Note: The GetPKeyTable command only gets 32 PKeys per
2016 * operation.
2018 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
2019 status = hermon_getpkeytable_cmd_post(state, port, i,
2020 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
2021 if (status != HERMON_CMD_SUCCESS) {
2022 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2023 "command failed: %08x\n", port, status);
2024 if (status == HERMON_CMD_INVALID_STATUS) {
2025 hermon_fm_ereport(state, HCA_SYS_ERR,
2026 HCA_ERR_SRV_LOST);
2028 return (ibc_get_ci_failure(0));
2031 /* Figure out how many of the entries are valid */
2032 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2033 for (j = 0; j < pkey_max; j++) {
2034 indx = (i + j);
2035 pkeyp[indx] = pi->p_pkey_tbl[indx] =
2036 pkeytable.P_KeyTableBlocks[j];
2040 return (DDI_SUCCESS);
2045 * hermon_port_modify()
2046 * Context: Can be called only from user or kernel context.
2048 /* ARGSUSED */
2050 hermon_port_modify(hermon_state_t *state, uint8_t port,
2051 ibt_port_modify_flags_t flags, uint8_t init_type)
2053 sm_portinfo_t portinfo;
2054 uint32_t capmask;
2055 int status;
2056 hermon_hw_set_port_t set_port;
2059 * Return an error if either of the unsupported flags are set
2061 if ((flags & IBT_PORT_SHUTDOWN) ||
2062 (flags & IBT_PORT_SET_INIT_TYPE)) {
2063 return (IBT_NOT_SUPPORTED);
2066 bzero(&set_port, sizeof (set_port));
2069 * Determine whether we are trying to reset the QKey counter
2071 if (flags & IBT_PORT_RESET_QKEY)
2072 set_port.rqk = 1;
2074 /* Validate that specified port number is legal */
2075 if (!hermon_portnum_is_valid(state, port)) {
2076 return (IBT_HCA_PORT_INVALID);
2080 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2081 * firmware (for the specified port number). This returns a full
2082 * PortInfo MAD (in "portinfo") from which we pull the current
2083 * capability mask. We then modify the capability mask as directed
2084 * by the "pmod_flags" field, and write the updated capability mask
2085 * using the Hermon SET_IB command (below).
2087 status = hermon_getportinfo_cmd_post(state, port,
2088 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2089 if (status != HERMON_CMD_SUCCESS) {
2090 if (status == HERMON_CMD_INVALID_STATUS) {
2091 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2093 return (ibc_get_ci_failure(0));
2097 * Convert InfiniBand-defined port capability flags to the format
2098 * specified by the IBTF. Specifically, we modify the capability
2099 * mask based on the specified values.
2101 capmask = portinfo.CapabilityMask;
2103 if (flags & IBT_PORT_RESET_SM)
2104 capmask &= ~SM_CAP_MASK_IS_SM;
2105 else if (flags & IBT_PORT_SET_SM)
2106 capmask |= SM_CAP_MASK_IS_SM;
2108 if (flags & IBT_PORT_RESET_SNMP)
2109 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2110 else if (flags & IBT_PORT_SET_SNMP)
2111 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2113 if (flags & IBT_PORT_RESET_DEVMGT)
2114 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2115 else if (flags & IBT_PORT_SET_DEVMGT)
2116 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2118 if (flags & IBT_PORT_RESET_VENDOR)
2119 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2120 else if (flags & IBT_PORT_SET_VENDOR)
2121 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2123 set_port.cap_mask = capmask;
2126 * Use the Hermon SET_PORT command to update the capability mask and
2127 * (possibly) reset the QKey violation counter for the specified port.
2128 * Note: In general, this operation shouldn't fail. If it does, then
2129 * it is an indication that something (probably in HW, but maybe in
2130 * SW) has gone seriously wrong.
2132 status = hermon_set_port_cmd_post(state, &set_port, port,
2133 HERMON_SLEEPFLAG_FOR_CONTEXT());
2134 if (status != HERMON_CMD_SUCCESS) {
2135 HERMON_WARNING(state, "failed to modify port capabilities");
2136 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2137 "%08x\n", port, status);
2138 if (status == HERMON_CMD_INVALID_STATUS) {
2139 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2141 return (ibc_get_ci_failure(0));
2144 return (DDI_SUCCESS);
2149 * hermon_set_addr_path()
2150 * Context: Can be called from interrupt or base context.
2152 * Note: This routine is used for two purposes. It is used to fill in the
2153 * Hermon UDAV fields, and it is used to fill in the address path information
2154 * for QPs. Because the two Hermon structures are similar, common fields can
2155 * be filled in here. Because they are different, however, we pass
2156 * an additional flag to indicate which type is being filled and do each one
2157 * uniquely
2160 int hermon_srate_override = -1; /* allows ease of testing */
2163 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2164 hermon_hw_addr_path_t *path, uint_t type)
2166 uint_t gidtbl_sz;
2167 hermon_hw_udav_t *udav;
2169 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2170 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2172 udav = (hermon_hw_udav_t *)(void *)path;
2173 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
2174 path->mlid = av->av_src_path;
2175 path->rlid = av->av_dlid;
2177 switch (av->av_srate) {
2178 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */
2179 path->max_stat_rate = 7; break;
2180 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */
2181 path->max_stat_rate = 8; break;
2182 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */
2183 path->max_stat_rate = 9; break;
2184 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */
2185 path->max_stat_rate = 10; break;
2186 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */
2187 path->max_stat_rate = 11; break;
2188 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */
2189 path->max_stat_rate = 12; break;
2190 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */
2191 path->max_stat_rate = 13; break;
2192 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */
2193 path->max_stat_rate = 14; break;
2194 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */
2195 path->max_stat_rate = 15; break;
2196 case IBT_SRATE_NOT_SPECIFIED: /* Max */
2197 path->max_stat_rate = 0; break;
2198 default:
2199 return (IBT_STATIC_RATE_INVALID);
2201 if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2202 path->max_stat_rate = hermon_srate_override;
2204 /* If "grh" flag is set, then check for valid SGID index too */
2205 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2206 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2207 return (IBT_SGID_INVALID);
2211 * Fill in all "global" values regardless of the value in the GRH
2212 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2213 * hardware will ignore the other "global" values as necessary. Note:
2214 * SW does this here to enable later query operations to return
2215 * exactly the same params that were passed when the addr path was
2216 * last written.
2218 path->grh = av->av_send_grh;
2219 if (type == HERMON_ADDRPATH_QP) {
2220 path->mgid_index = av->av_sgid_ix;
2221 } else {
2223 * For Hermon UDAV, the "mgid_index" field is the index into
2224 * a combined table (not a per-port table), but having sections
2225 * for each port. So some extra calculations are necessary.
2228 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2229 av->av_sgid_ix;
2231 udav->portnum = av->av_port_num;
2235 * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2236 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2237 * only need to do it for UDAV's. So we enforce that here.
2239 * NOTE: The entire 64 bits worth of GUID info is actually being
2240 * preserved (for UDAVs) by the callers of this function
2241 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2242 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2243 * "don't care".
2245 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2246 path->flow_label = av->av_flow;
2247 path->tclass = av->av_tclass;
2248 path->hop_limit = av->av_hop;
2249 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2250 sizeof (uint64_t));
2251 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2252 sizeof (uint64_t));
2253 } else {
2254 path->rgid_l = 0x2;
2255 path->flow_label = 0;
2256 path->tclass = 0;
2257 path->hop_limit = 0;
2258 path->rgid_h = 0;
2260 /* extract the default service level */
2261 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2263 return (DDI_SUCCESS);
2268 * hermon_get_addr_path()
2269 * Context: Can be called from interrupt or base context.
2271 * Note: Just like hermon_set_addr_path() above, this routine is used for two
2272 * purposes. It is used to read in the Hermon UDAV fields, and it is used to
2273 * read in the address path information for QPs. Because the two Hermon
2274 * structures are similar, common fields can be read in here. But because
2275 * they are slightly different, we pass an additional flag to indicate which
2276 * type is being read.
2278 void
2279 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2280 ibt_adds_vect_t *av, uint_t type)
2282 uint_t gidtbl_sz;
2284 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2285 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2287 av->av_src_path = path->mlid;
2288 av->av_dlid = path->rlid;
2290 /* Set "av_ipd" value from max_stat_rate */
2291 switch (path->max_stat_rate) {
2292 case 7: /* 1xSDR-2.5Gb/s injection rate */
2293 av->av_srate = IBT_SRATE_2; break;
2294 case 8: /* 4xSDR-10.0Gb/s injection rate */
2295 av->av_srate = IBT_SRATE_10; break;
2296 case 9: /* 12xSDR-30Gb/s injection rate */
2297 av->av_srate = IBT_SRATE_30; break;
2298 case 10: /* 1xDDR-5Gb/s injection rate */
2299 av->av_srate = IBT_SRATE_5; break;
2300 case 11: /* 4xDDR-20Gb/s injection rate */
2301 av->av_srate = IBT_SRATE_20; break;
2302 case 12: /* xQDR-40Gb/s injection rate */
2303 av->av_srate = IBT_SRATE_40; break;
2304 case 13: /* 12xDDR-60Gb/s injection rate */
2305 av->av_srate = IBT_SRATE_60; break;
2306 case 14: /* 8xQDR-80Gb/s injection rate */
2307 av->av_srate = IBT_SRATE_80; break;
2308 case 15: /* 12xQDR-120Gb/s injection rate */
2309 av->av_srate = IBT_SRATE_120; break;
2310 case 0: /* max */
2311 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2312 default: /* 1x injection rate */
2313 av->av_srate = IBT_SRATE_1X;
2317 * Extract all "global" values regardless of the value in the GRH
2318 * flag. Because "av_send_grh" is set only if "grh" is set, software
2319 * knows to ignore the other "global" values as necessary. Note: SW
2320 * does it this way to enable these query operations to return exactly
2321 * the same params that were passed when the addr path was last written.
2323 av->av_send_grh = path->grh;
2324 if (type == HERMON_ADDRPATH_QP) {
2325 av->av_sgid_ix = path->mgid_index;
2326 } else {
2328 * For Hermon UDAV, the "mgid_index" field is the index into
2329 * a combined table (not a per-port table).
2331 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2332 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2333 gidtbl_sz);
2335 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2337 av->av_flow = path->flow_label;
2338 av->av_tclass = path->tclass;
2339 av->av_hop = path->hop_limit;
2340 /* this is for alignment issue w/ the addr path struct in Hermon */
2341 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2342 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2347 * hermon_portnum_is_valid()
2348 * Context: Can be called from interrupt or base context.
2351 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2353 uint_t max_port;
2355 max_port = state->hs_cfg_profile->cp_num_ports;
2356 if ((portnum <= max_port) && (portnum != 0)) {
2357 return (1);
2358 } else {
2359 return (0);
2365 * hermon_pkeyindex_is_valid()
2366 * Context: Can be called from interrupt or base context.
2369 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2371 uint_t max_pkeyindx;
2373 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2374 if (pkeyindx < max_pkeyindx) {
2375 return (1);
2376 } else {
2377 return (0);
2383 * hermon_queue_alloc()
2384 * Context: Can be called from interrupt or base context.
2387 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2388 uint_t sleepflag)
2390 ddi_dma_attr_t dma_attr;
2391 int (*callback)(caddr_t);
2392 uint64_t realsize, alloc_mask;
2393 int flag, status;
2395 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2397 /* Set the callback flag appropriately */
2398 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2399 DDI_DMA_DONTWAIT;
2402 * Initialize many of the default DMA attributes. Then set additional
2403 * alignment restrictions as necessary for the queue memory. Also
2404 * respect the configured value for IOMMU bypass
2406 hermon_dma_attr_init(state, &dma_attr);
2407 dma_attr.dma_attr_align = qa_info->qa_bind_align;
2409 /* Allocate a DMA handle */
2410 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2411 &qa_info->qa_dmahdl);
2412 if (status != DDI_SUCCESS) {
2413 return (DDI_FAILURE);
2417 * Determine the amount of memory to allocate, depending on the values
2418 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2419 * to solve here is that allocating a DMA handle with IOMMU bypass
2420 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2421 * that are less restrictive than the page size. Since we may need
2422 * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2423 * (e.g. in Hermon QP work queue memory allocation), we use the
2424 * following method to calculate how much additional memory to request,
2425 * and we enforce our own alignment on the allocated result.
2427 alloc_mask = qa_info->qa_alloc_align - 1;
2428 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2429 realsize = qa_info->qa_size;
2430 } else {
2431 realsize = qa_info->qa_size + alloc_mask;
2435 * If we are to allocate the queue from system memory, then use
2436 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a
2437 * host memory allocation, use ddi_umem_alloc(). In either case,
2438 * return a pointer to the memory range allocated (including any
2439 * necessary alignment adjustments), the "real" memory pointer,
2440 * the "real" size, and a ddi_acc_handle_t to use when reading
2441 * from/writing to the memory.
2443 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2444 /* Allocate system memory for the queue */
2445 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2446 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2447 (caddr_t *)&qa_info->qa_buf_real,
2448 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2449 if (status != DDI_SUCCESS) {
2450 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2451 return (DDI_FAILURE);
2455 * Save temporary copy of the real pointer. (This may be
2456 * modified in the last step below).
2458 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2460 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2462 } else { /* HERMON_QUEUE_LOCATION_USERLAND */
2464 /* Allocate userland mappable memory for the queue */
2465 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2466 DDI_UMEM_NOSLEEP;
2467 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2468 &qa_info->qa_umemcookie);
2469 if (qa_info->qa_buf_real == NULL) {
2470 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2471 return (DDI_FAILURE);
2475 * Save temporary copy of the real pointer. (This may be
2476 * modified in the last step below).
2478 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2483 * The next to last step is to ensure that the final address
2484 * ("qa_buf_aligned") has the appropriate "alloc" alignment
2485 * restriction applied to it (if necessary).
2487 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2488 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2489 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2492 * The last step is to figure out the offset of the start relative
2493 * to the first page of the region - will be used in the eqc/cqc
2494 * passed to the HW
2496 qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2497 qa_info->qa_buf_aligned & HERMON_PAGEOFFSET);
2499 return (DDI_SUCCESS);
2504 * hermon_queue_free()
2505 * Context: Can be called from interrupt or base context.
2507 void
2508 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2510 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2513 * Depending on how (i.e. from where) we allocated the memory for
2514 * this queue, we choose the appropriate method for releasing the
2515 * resources.
2517 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2519 ddi_dma_mem_free(&qa_info->qa_acchdl);
2521 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2523 ddi_umem_free(qa_info->qa_umemcookie);
2527 /* Always free the dma handle */
2528 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2532 * hermon_create_fmr_pool()
2533 * Create a pool of FMRs.
2534 * Context: Can be called from kernel context only.
2537 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2538 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2540 hermon_fmrhdl_t fmrpool;
2541 hermon_fmr_list_t *fmr, *fmr_next;
2542 hermon_mrhdl_t mr;
2543 int status;
2544 int sleep;
2545 int i;
2547 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2548 HERMON_NOSLEEP;
2549 if ((sleep == HERMON_SLEEP) &&
2550 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2551 return (IBT_INVALID_PARAM);
2554 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2555 if (fmrpool == NULL) {
2556 status = IBT_INSUFF_RESOURCE;
2557 goto fail;
2559 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool))
2561 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2562 DDI_INTR_PRI(state->hs_intrmsi_pri));
2563 mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER,
2564 DDI_INTR_PRI(state->hs_intrmsi_pri));
2565 mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER,
2566 DDI_INTR_PRI(state->hs_intrmsi_pri));
2568 fmrpool->fmr_state = state;
2569 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2570 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg;
2571 fmrpool->fmr_pool_size = 0;
2572 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr;
2573 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz;
2574 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4;
2575 fmrpool->fmr_dirty_len = 0;
2576 fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32;
2577 fmrpool->fmr_remap_len = 0;
2578 fmrpool->fmr_flags = fmr_attr->fmr_flags;
2579 fmrpool->fmr_stat_register = 0;
2580 fmrpool->fmr_max_remaps = state->hs_cfg_profile->cp_fmr_max_remaps;
2581 fmrpool->fmr_remap_gen = 1;
2583 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2584 fmrpool->fmr_dirty_list = NULL;
2585 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2586 fmrpool->fmr_remap_list = NULL;
2587 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2588 fmrpool->fmr_pool_size = fmrpool->fmr_free_len =
2589 fmr_attr->fmr_pool_size;
2591 for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2592 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2593 if (status != DDI_SUCCESS) {
2594 goto fail2;
2597 fmr = (hermon_fmr_list_t *)kmem_zalloc(
2598 sizeof (hermon_fmr_list_t), sleep);
2599 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2601 fmr->fmr = mr;
2602 fmr->fmr_remaps = 0;
2603 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2604 fmr->fmr_pool = fmrpool;
2605 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2606 mr->mr_fmr = fmr;
2608 if (!i) /* address of last entry's link */
2609 fmrpool->fmr_free_list_tail = &fmr->fmr_next;
2610 fmr->fmr_next = fmrpool->fmr_free_list;
2611 fmrpool->fmr_free_list = fmr;
2614 /* Set to return pool */
2615 *fmrpoolp = fmrpool;
2617 IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS");
2618 return (IBT_SUCCESS);
2619 fail2:
2620 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2621 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2622 fmr_next = fmr->fmr_next;
2623 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2624 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2626 kmem_free(fmrpool, sizeof (*fmrpool));
2627 fail:
2628 *fmrpoolp = NULL;
2629 IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED");
2630 if (status == DDI_FAILURE) {
2631 return (ibc_get_ci_failure(0));
2632 } else {
2633 return (status);
2638 * hermon_destroy_fmr_pool()
2639 * Destroy an FMR pool and free all associated resources.
2640 * Context: Can be called from kernel context only.
2643 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2645 hermon_fmr_list_t *fmr, *fmr_next;
2647 mutex_enter(&fmrpool->fmr_lock);
2648 hermon_fmr_cleanup(fmrpool);
2650 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2651 fmr_next = fmr->fmr_next;
2653 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2654 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2656 --fmrpool->fmr_pool_size;
2658 ASSERT(fmrpool->fmr_pool_size == 0);
2659 mutex_exit(&fmrpool->fmr_lock);
2661 mutex_destroy(&fmrpool->fmr_lock);
2662 mutex_destroy(&fmrpool->dirty_lock);
2663 mutex_destroy(&fmrpool->remap_lock);
2665 kmem_free(fmrpool, sizeof (*fmrpool));
2666 IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS");
2667 return (DDI_SUCCESS);
2671 * hermon_flush_fmr_pool()
2672 * Ensure that all unmapped FMRs are fully invalidated.
2673 * Context: Can be called from kernel context only.
2675 /* ARGSUSED */
2677 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2680 * Force the unmapping of all entries on the dirty list, regardless of
2681 * whether the watermark has been hit yet.
2683 /* grab the pool lock */
2684 mutex_enter(&fmrpool->fmr_lock);
2685 hermon_fmr_cleanup(fmrpool);
2686 mutex_exit(&fmrpool->fmr_lock);
2687 return (DDI_SUCCESS);
2691 * hermon_register_physical_fmr()
2692 * Map memory into FMR
2693 * Context: Can be called from interrupt or base context.
2696 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2697 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2698 ibt_pmr_desc_t *mem_desc_p)
2700 hermon_fmr_list_t *fmr;
2701 int status;
2703 /* Check length */
2704 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2705 fmrpool->fmr_max_pages)) {
2706 return (IBT_MR_LEN_INVALID);
2709 mutex_enter(&fmrpool->fmr_lock);
2710 if (fmrpool->fmr_free_list == NULL) {
2711 if (hermon_fmr_verbose & 2)
2712 IBTF_DPRINTF_L2("fmr", "register needs remap");
2713 mutex_enter(&fmrpool->remap_lock);
2714 if (fmrpool->fmr_remap_list) {
2715 /* add to free list */
2716 *(fmrpool->fmr_free_list_tail) =
2717 fmrpool->fmr_remap_list;
2718 fmrpool->fmr_remap_list = NULL;
2719 fmrpool->fmr_free_list_tail =
2720 fmrpool->fmr_remap_list_tail;
2722 /* reset list */
2723 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2724 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2725 fmrpool->fmr_remap_len = 0;
2727 mutex_exit(&fmrpool->remap_lock);
2729 if (fmrpool->fmr_free_list == NULL) {
2730 if (hermon_fmr_verbose & 2)
2731 IBTF_DPRINTF_L2("fmr", "register needs cleanup");
2732 hermon_fmr_cleanup(fmrpool);
2735 /* grab next free entry */
2736 fmr = fmrpool->fmr_free_list;
2737 if (fmr == NULL) {
2738 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource");
2739 cmn_err(CE_CONT, "no free fmr resource\n");
2740 mutex_exit(&fmrpool->fmr_lock);
2741 return (IBT_INSUFF_RESOURCE);
2744 if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL)
2745 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2746 fmr->fmr_next = NULL;
2747 fmrpool->fmr_stat_register++;
2748 mutex_exit(&fmrpool->fmr_lock);
2750 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2751 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2752 mem_desc_p);
2753 if (status != DDI_SUCCESS) {
2754 return (status);
2756 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2757 if (hermon_rdma_debug & 0x4)
2758 IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x",
2759 fmr->fmr, fmr->fmr->mr_rkey);
2760 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2761 if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) {
2762 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2763 fmr->fmr_remaps = 0;
2766 fmr->fmr_remaps++;
2768 *mr = (hermon_mrhdl_t)fmr->fmr;
2770 return (DDI_SUCCESS);
2774 * hermon_deregister_fmr()
2775 * Unmap FMR
2776 * Context: Can be called from kernel context only.
2779 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2781 hermon_fmrhdl_t fmrpool;
2782 hermon_fmr_list_t *fmr, **fmrlast;
2783 int len;
2785 fmr = mr->mr_fmr;
2786 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2787 fmrpool = fmr->fmr_pool;
2789 /* mark as owned by software */
2790 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2791 *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0;
2793 if (fmr->fmr_remaps <
2794 state->hs_cfg_profile->cp_fmr_max_remaps) {
2795 /* add to remap list */
2796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2797 if (hermon_rdma_debug & 0x4)
2798 IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x",
2799 fmr->fmr, fmr->fmr->mr_rkey);
2800 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2801 mutex_enter(&fmrpool->remap_lock);
2802 fmr->fmr_next = NULL;
2803 *(fmrpool->fmr_remap_list_tail) = fmr;
2804 fmrpool->fmr_remap_list_tail = &fmr->fmr_next;
2805 fmrpool->fmr_remap_len++;
2807 /* conditionally add remap list back to free list */
2808 fmrlast = NULL;
2809 if (fmrpool->fmr_remap_len >=
2810 fmrpool->fmr_remap_watermark) {
2811 fmr = fmrpool->fmr_remap_list;
2812 fmrlast = fmrpool->fmr_remap_list_tail;
2813 len = fmrpool->fmr_remap_len;
2814 fmrpool->fmr_remap_len = 0;
2815 fmrpool->fmr_remap_list = NULL;
2816 fmrpool->fmr_remap_list_tail =
2817 &fmrpool->fmr_remap_list;
2819 mutex_exit(&fmrpool->remap_lock);
2820 if (fmrlast) {
2821 mutex_enter(&fmrpool->fmr_lock);
2822 *(fmrpool->fmr_free_list_tail) = fmr;
2823 fmrpool->fmr_free_list_tail = fmrlast;
2824 fmrpool->fmr_free_len += len;
2825 mutex_exit(&fmrpool->fmr_lock);
2827 } else {
2828 /* add to dirty list */
2829 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2830 if (hermon_rdma_debug & 0x4)
2831 IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x",
2832 fmr->fmr, fmr->fmr->mr_rkey);
2833 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2835 mutex_enter(&fmrpool->dirty_lock);
2836 fmr->fmr_next = NULL;
2837 *(fmrpool->fmr_dirty_list_tail) = fmr;
2838 fmrpool->fmr_dirty_list_tail = &fmr->fmr_next;
2839 fmrpool->fmr_dirty_len++;
2841 if (fmrpool->fmr_dirty_len >=
2842 fmrpool->fmr_dirty_watermark) {
2843 mutex_exit(&fmrpool->dirty_lock);
2844 mutex_enter(&fmrpool->fmr_lock);
2845 hermon_fmr_cleanup(fmrpool);
2846 mutex_exit(&fmrpool->fmr_lock);
2847 } else
2848 mutex_exit(&fmrpool->dirty_lock);
2850 return (DDI_SUCCESS);
2854 * hermon_fmr_cleanup()
2855 * Context: Called from any context.
2857 static void
2858 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool)
2860 int status;
2862 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2864 if (fmrpool->fmr_stat_register == 0)
2865 return;
2867 fmrpool->fmr_stat_register = 0;
2868 membar_producer();
2870 if (hermon_fmr_verbose)
2871 IBTF_DPRINTF_L2("fmr", "TPT_SYNC");
2872 status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state,
2873 HERMON_CMD_NOSLEEP_SPIN);
2874 if (status != HERMON_CMD_SUCCESS) {
2875 cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status);
2877 fmrpool->fmr_remap_gen++;
2879 /* add everything back to the free list */
2880 mutex_enter(&fmrpool->dirty_lock);
2881 if (fmrpool->fmr_dirty_list) {
2882 /* add to free list */
2883 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list;
2884 fmrpool->fmr_dirty_list = NULL;
2885 fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail;
2887 /* reset list */
2888 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2889 fmrpool->fmr_free_len += fmrpool->fmr_dirty_len;
2890 fmrpool->fmr_dirty_len = 0;
2892 mutex_exit(&fmrpool->dirty_lock);
2894 mutex_enter(&fmrpool->remap_lock);
2895 if (fmrpool->fmr_remap_list) {
2896 /* add to free list */
2897 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list;
2898 fmrpool->fmr_remap_list = NULL;
2899 fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail;
2901 /* reset list */
2902 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2903 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2904 fmrpool->fmr_remap_len = 0;
2906 mutex_exit(&fmrpool->remap_lock);
2908 if (fmrpool->fmr_flush_function != NULL) {
2909 (void) fmrpool->fmr_flush_function(
2910 (ibc_fmr_pool_hdl_t)fmrpool,
2911 fmrpool->fmr_flush_arg);