2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * Written by: Karen Xie (kxie@chelsio.com)
37 #define DRV_NAME "libcxgb"
38 #define pr_fmt(fmt) DRV_NAME ": " fmt
40 #include <linux/kernel.h>
41 #include <linux/module.h>
42 #include <linux/errno.h>
43 #include <linux/types.h>
44 #include <linux/debugfs.h>
45 #include <linux/export.h>
46 #include <linux/list.h>
47 #include <linux/skbuff.h>
48 #include <linux/pci.h>
49 #include <linux/scatterlist.h>
51 #include "libcxgb_ppm.h"
53 /* Direct Data Placement -
54 * Directly place the iSCSI Data-In or Data-Out PDU's payload into
55 * pre-posted final destination host-memory buffers based on the
56 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
57 * in Data-Out PDUs. The host memory address is programmed into
58 * h/w in the format of pagepod entries. The location of the
59 * pagepod entry is encoded into ddp tag which is used as the base
63 /* Direct-Data Placement page size adjustment
65 int cxgbi_ppm_find_page_index(struct cxgbi_ppm
*ppm
, unsigned long pgsz
)
67 struct cxgbi_tag_format
*tformat
= &ppm
->tformat
;
70 for (i
= 0; i
< DDP_PGIDX_MAX
; i
++) {
71 if (pgsz
== 1UL << (DDP_PGSZ_BASE_SHIFT
+
72 tformat
->pgsz_order
[i
])) {
73 pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
74 __func__
, ppm
->ndev
->name
, pgsz
, i
);
78 pr_info("ippm: ddp page size %lu not supported.\n", pgsz
);
82 /* DDP setup & teardown
84 static int ppm_find_unused_entries(unsigned long *bmap
,
85 unsigned int max_ppods
,
88 unsigned int align_mask
)
92 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, start
, nr
, align_mask
);
94 if (unlikely(i
>= max_ppods
) && (start
> nr
))
95 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, 0, start
- 1,
97 if (unlikely(i
>= max_ppods
))
100 bitmap_set(bmap
, i
, nr
);
104 static void ppm_mark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
,
105 unsigned long caller_data
)
107 struct cxgbi_ppod_data
*pdata
= ppm
->ppod_data
+ i
;
109 pdata
->caller_data
= caller_data
;
110 pdata
->npods
= count
;
112 if (pdata
->color
== ((1 << PPOD_IDX_SHIFT
) - 1))
118 static int ppm_get_cpu_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
119 unsigned long caller_data
)
121 struct cxgbi_ppm_pool
*pool
;
129 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
130 spin_lock_bh(&pool
->lock
);
133 i
= ppm_find_unused_entries(pool
->bmap
, ppm
->pool_index_max
,
134 pool
->next
, count
, 0);
137 spin_unlock_bh(&pool
->lock
);
141 pool
->next
= i
+ count
;
142 if (pool
->next
>= ppm
->pool_index_max
)
145 spin_unlock_bh(&pool
->lock
);
147 pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
148 __func__
, cpu
, i
, count
, i
+ cpu
* ppm
->pool_index_max
,
151 i
+= cpu
* ppm
->pool_index_max
;
152 ppm_mark_entries(ppm
, i
, count
, caller_data
);
157 static int ppm_get_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
158 unsigned long caller_data
)
162 spin_lock_bh(&ppm
->map_lock
);
163 i
= ppm_find_unused_entries(ppm
->ppod_bmap
, ppm
->bmap_index_max
,
164 ppm
->next
, count
, 0);
167 spin_unlock_bh(&ppm
->map_lock
);
168 pr_debug("ippm: NO suitable entries %u available.\n",
173 ppm
->next
= i
+ count
;
174 if (ppm
->max_index_in_edram
&& (ppm
->next
>= ppm
->max_index_in_edram
))
176 else if (ppm
->next
>= ppm
->bmap_index_max
)
179 spin_unlock_bh(&ppm
->map_lock
);
181 pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
182 __func__
, i
, count
, i
+ ppm
->pool_rsvd
, ppm
->next
,
186 ppm_mark_entries(ppm
, i
, count
, caller_data
);
191 static void ppm_unmark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
)
193 pr_debug("%s: idx %d + %d.\n", __func__
, i
, count
);
195 if (i
< ppm
->pool_rsvd
) {
197 struct cxgbi_ppm_pool
*pool
;
199 cpu
= i
/ ppm
->pool_index_max
;
200 i
%= ppm
->pool_index_max
;
202 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
203 spin_lock_bh(&pool
->lock
);
204 bitmap_clear(pool
->bmap
, i
, count
);
208 spin_unlock_bh(&pool
->lock
);
210 pr_debug("%s: cpu %u, idx %d, next %u.\n",
211 __func__
, cpu
, i
, pool
->next
);
213 spin_lock_bh(&ppm
->map_lock
);
216 bitmap_clear(ppm
->ppod_bmap
, i
, count
);
220 spin_unlock_bh(&ppm
->map_lock
);
222 pr_debug("%s: idx %d, next %u.\n", __func__
, i
, ppm
->next
);
226 void cxgbi_ppm_ppod_release(struct cxgbi_ppm
*ppm
, u32 idx
)
228 struct cxgbi_ppod_data
*pdata
;
230 if (idx
>= ppm
->ppmax
) {
231 pr_warn("ippm: idx too big %u > %u.\n", idx
, ppm
->ppmax
);
235 pdata
= ppm
->ppod_data
+ idx
;
237 pr_warn("ippm: idx %u, npods 0.\n", idx
);
241 pr_debug("release idx %u, npods %u.\n", idx
, pdata
->npods
);
242 ppm_unmark_entries(ppm
, idx
, pdata
->npods
);
244 EXPORT_SYMBOL(cxgbi_ppm_ppod_release
);
246 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm
*ppm
, unsigned short nr_pages
,
247 u32 per_tag_pg_idx
, u32
*ppod_idx
,
248 u32
*ddp_tag
, unsigned long caller_data
)
250 struct cxgbi_ppod_data
*pdata
;
256 npods
= (nr_pages
+ PPOD_PAGES_MAX
- 1) >> PPOD_PAGES_SHIFT
;
258 pr_warn("%s: pages %u -> npods %u, full.\n",
259 __func__
, nr_pages
, npods
);
263 /* grab from cpu pool first */
264 idx
= ppm_get_cpu_entries(ppm
, npods
, caller_data
);
265 /* try the general pool */
267 idx
= ppm_get_entries(ppm
, npods
, caller_data
);
269 pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
270 nr_pages
, npods
, ppm
->next
, caller_data
);
274 pdata
= ppm
->ppod_data
+ idx
;
275 hwidx
= ppm
->base_idx
+ idx
;
277 tag
= cxgbi_ppm_make_ddp_tag(hwidx
, pdata
->color
);
280 tag
|= (per_tag_pg_idx
<< 30) & 0xC0000000;
285 pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
286 nr_pages
, tag
, idx
, npods
, caller_data
);
290 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve
);
292 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm
*ppm
, u32 tag
,
293 unsigned int tid
, unsigned int offset
,
295 struct cxgbi_pagepod_hdr
*hdr
)
297 /* The ddp tag in pagepod should be with bit 31:30 set to 0.
298 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
302 hdr
->vld_tid
= htonl(PPOD_VALID_FLAG
| PPOD_TID(tid
));
305 hdr
->pgsz_tag_clr
= htonl(tag
& ppm
->tformat
.idx_clr_mask
);
306 hdr
->max_offset
= htonl(length
);
307 hdr
->page_offset
= htonl(offset
);
309 pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
310 tag
, tid
, length
, offset
);
312 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr
);
314 static void ppm_free(struct cxgbi_ppm
*ppm
)
319 static void ppm_destroy(struct kref
*kref
)
321 struct cxgbi_ppm
*ppm
= container_of(kref
,
324 pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
325 ppm
->ndev
->name
, ppm
);
329 free_percpu(ppm
->pool
);
333 int cxgbi_ppm_release(struct cxgbi_ppm
*ppm
)
338 rv
= kref_put(&ppm
->refcnt
, ppm_destroy
);
343 EXPORT_SYMBOL(cxgbi_ppm_release
);
345 static struct cxgbi_ppm_pool
*ppm_alloc_cpu_pool(unsigned int *total
,
346 unsigned int *pcpu_ppmax
)
348 struct cxgbi_ppm_pool
*pools
;
349 unsigned int ppmax
= (*total
) / num_possible_cpus();
350 unsigned int max
= (PCPU_MIN_UNIT_SIZE
- sizeof(*pools
)) << 3;
352 unsigned int alloc_sz
;
353 unsigned int count
= 0;
356 /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
360 /* pool size must be multiple of unsigned long */
361 bmap
= ppmax
/ BITS_PER_TYPE(unsigned long);
365 ppmax
= (bmap
* sizeof(unsigned long)) << 3;
367 alloc_sz
= sizeof(*pools
) + sizeof(unsigned long) * bmap
;
368 pools
= __alloc_percpu(alloc_sz
, __alignof__(struct cxgbi_ppm_pool
));
373 for_each_possible_cpu(cpu
) {
374 struct cxgbi_ppm_pool
*ppool
= per_cpu_ptr(pools
, cpu
);
376 memset(ppool
, 0, alloc_sz
);
377 spin_lock_init(&ppool
->lock
);
387 int cxgbi_ppm_init(void **ppm_pp
, struct net_device
*ndev
,
388 struct pci_dev
*pdev
, void *lldev
,
389 struct cxgbi_tag_format
*tformat
, unsigned int iscsi_size
,
390 unsigned int llimit
, unsigned int start
,
391 unsigned int reserve_factor
, unsigned int iscsi_edram_start
,
392 unsigned int iscsi_edram_size
)
394 struct cxgbi_ppm
*ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
395 struct cxgbi_ppm_pool
*pool
= NULL
;
396 unsigned int pool_index_max
= 0;
397 unsigned int ppmax_pool
= 0;
398 unsigned int ppod_bmap_size
;
399 unsigned int alloc_sz
;
402 if (!iscsi_edram_start
)
403 iscsi_edram_size
= 0;
405 if (iscsi_edram_size
&&
406 ((iscsi_edram_start
+ iscsi_edram_size
) != start
)) {
407 pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
408 "size 0x%x DDR start 0x%x\n",
409 iscsi_edram_start
, iscsi_edram_size
, start
);
413 if (iscsi_edram_size
) {
415 start
= iscsi_edram_start
;
418 ppmax
= (iscsi_edram_size
+ iscsi_size
) >> PPOD_SIZE_SHIFT
;
421 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
422 ndev
->name
, ppm_pp
, ppm
, ppm
->ppmax
, ppmax
);
423 kref_get(&ppm
->refcnt
);
427 if (reserve_factor
) {
428 ppmax_pool
= ppmax
/ reserve_factor
;
429 pool
= ppm_alloc_cpu_pool(&ppmax_pool
, &pool_index_max
);
435 pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
436 ndev
->name
, ppmax
, ppmax_pool
, pool_index_max
);
439 ppod_bmap_size
= BITS_TO_LONGS(ppmax
- ppmax_pool
);
440 alloc_sz
= sizeof(struct cxgbi_ppm
) +
441 ppmax
* (sizeof(struct cxgbi_ppod_data
)) +
442 ppod_bmap_size
* sizeof(unsigned long);
444 ppm
= vzalloc(alloc_sz
);
446 goto release_ppm_pool
;
448 ppm
->ppod_bmap
= (unsigned long *)(&ppm
->ppod_data
[ppmax
]);
450 if ((ppod_bmap_size
>> 3) > (ppmax
- ppmax_pool
)) {
451 unsigned int start
= ppmax
- ppmax_pool
;
452 unsigned int end
= ppod_bmap_size
>> 3;
454 bitmap_set(ppm
->ppod_bmap
, ppmax
, end
- start
);
455 pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
456 __func__
, ppmax
, ppmax_pool
, ppod_bmap_size
, start
,
459 if (iscsi_edram_size
) {
460 unsigned int first_ddr_idx
=
461 iscsi_edram_size
>> PPOD_SIZE_SHIFT
;
463 ppm
->max_index_in_edram
= first_ddr_idx
- 1;
464 bitmap_set(ppm
->ppod_bmap
, first_ddr_idx
, 1);
465 pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx
);
468 spin_lock_init(&ppm
->map_lock
);
469 kref_init(&ppm
->refcnt
);
471 memcpy(&ppm
->tformat
, tformat
, sizeof(struct cxgbi_tag_format
));
473 ppm
->ppm_pp
= ppm_pp
;
479 ppm
->llimit
= llimit
;
480 ppm
->base_idx
= start
> llimit
?
481 (start
- llimit
+ 1) >> PPOD_SIZE_SHIFT
: 0;
482 ppm
->bmap_index_max
= ppmax
- ppmax_pool
;
485 ppm
->pool_rsvd
= ppmax_pool
;
486 ppm
->pool_index_max
= pool_index_max
;
488 /* check one more time */
491 ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
493 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
494 ndev
->name
, ppm_pp
, *ppm_pp
, ppm
->ppmax
, ppmax
);
496 kref_get(&ppm
->refcnt
);
501 ppm
->tformat
.pgsz_idx_dflt
= cxgbi_ppm_find_page_index(ppm
, PAGE_SIZE
);
503 pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
504 ndev
->name
, ppm_pp
, ppm
, ppm
->base_idx
, ppm
->ppmax
, PAGE_SIZE
,
505 ppm
->tformat
.pgsz_idx_dflt
, ppm
->pool_rsvd
,
506 ppm
->pool_index_max
);
514 EXPORT_SYMBOL(cxgbi_ppm_init
);
516 unsigned int cxgbi_tagmask_set(unsigned int ppmax
)
518 unsigned int bits
= fls(ppmax
);
520 if (bits
> PPOD_IDX_MAX_SIZE
)
521 bits
= PPOD_IDX_MAX_SIZE
;
523 pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
524 ppmax
, ppmax
, bits
, 1 << (bits
+ PPOD_IDX_SHIFT
));
526 return 1 << (bits
+ PPOD_IDX_SHIFT
);
528 EXPORT_SYMBOL(cxgbi_tagmask_set
);
530 MODULE_AUTHOR("Chelsio Communications");
531 MODULE_DESCRIPTION("Chelsio common library");
532 MODULE_LICENSE("Dual BSD/GPL");