2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * Written by: Karen Xie (kxie@chelsio.com)
37 #define DRV_NAME "libcxgb"
38 #define DRV_VERSION "1.0.0-ko"
39 #define pr_fmt(fmt) DRV_NAME ": " fmt
41 #include <linux/kernel.h>
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/types.h>
45 #include <linux/debugfs.h>
46 #include <linux/export.h>
47 #include <linux/list.h>
48 #include <linux/skbuff.h>
49 #include <linux/pci.h>
50 #include <linux/scatterlist.h>
52 #include "libcxgb_ppm.h"
54 /* Direct Data Placement -
55 * Directly place the iSCSI Data-In or Data-Out PDU's payload into
56 * pre-posted final destination host-memory buffers based on the
57 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
58 * in Data-Out PDUs. The host memory address is programmed into
59 * h/w in the format of pagepod entries. The location of the
60 * pagepod entry is encoded into ddp tag which is used as the base
64 /* Direct-Data Placement page size adjustment
66 int cxgbi_ppm_find_page_index(struct cxgbi_ppm
*ppm
, unsigned long pgsz
)
68 struct cxgbi_tag_format
*tformat
= &ppm
->tformat
;
71 for (i
= 0; i
< DDP_PGIDX_MAX
; i
++) {
72 if (pgsz
== 1UL << (DDP_PGSZ_BASE_SHIFT
+
73 tformat
->pgsz_order
[i
])) {
74 pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
75 __func__
, ppm
->ndev
->name
, pgsz
, i
);
79 pr_info("ippm: ddp page size %lu not supported.\n", pgsz
);
83 /* DDP setup & teardown
85 static int ppm_find_unused_entries(unsigned long *bmap
,
86 unsigned int max_ppods
,
89 unsigned int align_mask
)
93 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, start
, nr
, align_mask
);
95 if (unlikely(i
>= max_ppods
) && (start
> nr
))
96 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, 0, start
- 1,
98 if (unlikely(i
>= max_ppods
))
101 bitmap_set(bmap
, i
, nr
);
105 static void ppm_mark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
,
106 unsigned long caller_data
)
108 struct cxgbi_ppod_data
*pdata
= ppm
->ppod_data
+ i
;
110 pdata
->caller_data
= caller_data
;
111 pdata
->npods
= count
;
113 if (pdata
->color
== ((1 << PPOD_IDX_SHIFT
) - 1))
119 static int ppm_get_cpu_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
120 unsigned long caller_data
)
122 struct cxgbi_ppm_pool
*pool
;
130 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
131 spin_lock_bh(&pool
->lock
);
134 i
= ppm_find_unused_entries(pool
->bmap
, ppm
->pool_index_max
,
135 pool
->next
, count
, 0);
138 spin_unlock_bh(&pool
->lock
);
142 pool
->next
= i
+ count
;
143 if (pool
->next
>= ppm
->pool_index_max
)
146 spin_unlock_bh(&pool
->lock
);
148 pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
149 __func__
, cpu
, i
, count
, i
+ cpu
* ppm
->pool_index_max
,
152 i
+= cpu
* ppm
->pool_index_max
;
153 ppm_mark_entries(ppm
, i
, count
, caller_data
);
158 static int ppm_get_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
159 unsigned long caller_data
)
163 spin_lock_bh(&ppm
->map_lock
);
164 i
= ppm_find_unused_entries(ppm
->ppod_bmap
, ppm
->bmap_index_max
,
165 ppm
->next
, count
, 0);
168 spin_unlock_bh(&ppm
->map_lock
);
169 pr_debug("ippm: NO suitable entries %u available.\n",
174 ppm
->next
= i
+ count
;
175 if (ppm
->max_index_in_edram
&& (ppm
->next
>= ppm
->max_index_in_edram
))
177 else if (ppm
->next
>= ppm
->bmap_index_max
)
180 spin_unlock_bh(&ppm
->map_lock
);
182 pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
183 __func__
, i
, count
, i
+ ppm
->pool_rsvd
, ppm
->next
,
187 ppm_mark_entries(ppm
, i
, count
, caller_data
);
192 static void ppm_unmark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
)
194 pr_debug("%s: idx %d + %d.\n", __func__
, i
, count
);
196 if (i
< ppm
->pool_rsvd
) {
198 struct cxgbi_ppm_pool
*pool
;
200 cpu
= i
/ ppm
->pool_index_max
;
201 i
%= ppm
->pool_index_max
;
203 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
204 spin_lock_bh(&pool
->lock
);
205 bitmap_clear(pool
->bmap
, i
, count
);
209 spin_unlock_bh(&pool
->lock
);
211 pr_debug("%s: cpu %u, idx %d, next %u.\n",
212 __func__
, cpu
, i
, pool
->next
);
214 spin_lock_bh(&ppm
->map_lock
);
217 bitmap_clear(ppm
->ppod_bmap
, i
, count
);
221 spin_unlock_bh(&ppm
->map_lock
);
223 pr_debug("%s: idx %d, next %u.\n", __func__
, i
, ppm
->next
);
227 void cxgbi_ppm_ppod_release(struct cxgbi_ppm
*ppm
, u32 idx
)
229 struct cxgbi_ppod_data
*pdata
;
231 if (idx
>= ppm
->ppmax
) {
232 pr_warn("ippm: idx too big %u > %u.\n", idx
, ppm
->ppmax
);
236 pdata
= ppm
->ppod_data
+ idx
;
238 pr_warn("ippm: idx %u, npods 0.\n", idx
);
242 pr_debug("release idx %u, npods %u.\n", idx
, pdata
->npods
);
243 ppm_unmark_entries(ppm
, idx
, pdata
->npods
);
245 EXPORT_SYMBOL(cxgbi_ppm_ppod_release
);
247 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm
*ppm
, unsigned short nr_pages
,
248 u32 per_tag_pg_idx
, u32
*ppod_idx
,
249 u32
*ddp_tag
, unsigned long caller_data
)
251 struct cxgbi_ppod_data
*pdata
;
257 npods
= (nr_pages
+ PPOD_PAGES_MAX
- 1) >> PPOD_PAGES_SHIFT
;
259 pr_warn("%s: pages %u -> npods %u, full.\n",
260 __func__
, nr_pages
, npods
);
264 /* grab from cpu pool first */
265 idx
= ppm_get_cpu_entries(ppm
, npods
, caller_data
);
266 /* try the general pool */
268 idx
= ppm_get_entries(ppm
, npods
, caller_data
);
270 pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
271 nr_pages
, npods
, ppm
->next
, caller_data
);
275 pdata
= ppm
->ppod_data
+ idx
;
276 hwidx
= ppm
->base_idx
+ idx
;
278 tag
= cxgbi_ppm_make_ddp_tag(hwidx
, pdata
->color
);
281 tag
|= (per_tag_pg_idx
<< 30) & 0xC0000000;
286 pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
287 nr_pages
, tag
, idx
, npods
, caller_data
);
291 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve
);
293 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm
*ppm
, u32 tag
,
294 unsigned int tid
, unsigned int offset
,
296 struct cxgbi_pagepod_hdr
*hdr
)
298 /* The ddp tag in pagepod should be with bit 31:30 set to 0.
299 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
303 hdr
->vld_tid
= htonl(PPOD_VALID_FLAG
| PPOD_TID(tid
));
306 hdr
->pgsz_tag_clr
= htonl(tag
& ppm
->tformat
.idx_clr_mask
);
307 hdr
->max_offset
= htonl(length
);
308 hdr
->page_offset
= htonl(offset
);
310 pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
311 tag
, tid
, length
, offset
);
313 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr
);
315 static void ppm_free(struct cxgbi_ppm
*ppm
)
320 static void ppm_destroy(struct kref
*kref
)
322 struct cxgbi_ppm
*ppm
= container_of(kref
,
325 pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
326 ppm
->ndev
->name
, ppm
);
330 free_percpu(ppm
->pool
);
334 int cxgbi_ppm_release(struct cxgbi_ppm
*ppm
)
339 rv
= kref_put(&ppm
->refcnt
, ppm_destroy
);
344 EXPORT_SYMBOL(cxgbi_ppm_release
);
346 static struct cxgbi_ppm_pool
*ppm_alloc_cpu_pool(unsigned int *total
,
347 unsigned int *pcpu_ppmax
)
349 struct cxgbi_ppm_pool
*pools
;
350 unsigned int ppmax
= (*total
) / num_possible_cpus();
351 unsigned int max
= (PCPU_MIN_UNIT_SIZE
- sizeof(*pools
)) << 3;
353 unsigned int alloc_sz
;
354 unsigned int count
= 0;
357 /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
361 /* pool size must be multiple of unsigned long */
362 bmap
= ppmax
/ BITS_PER_TYPE(unsigned long);
366 ppmax
= (bmap
* sizeof(unsigned long)) << 3;
368 alloc_sz
= sizeof(*pools
) + sizeof(unsigned long) * bmap
;
369 pools
= __alloc_percpu(alloc_sz
, __alignof__(struct cxgbi_ppm_pool
));
374 for_each_possible_cpu(cpu
) {
375 struct cxgbi_ppm_pool
*ppool
= per_cpu_ptr(pools
, cpu
);
377 memset(ppool
, 0, alloc_sz
);
378 spin_lock_init(&ppool
->lock
);
388 int cxgbi_ppm_init(void **ppm_pp
, struct net_device
*ndev
,
389 struct pci_dev
*pdev
, void *lldev
,
390 struct cxgbi_tag_format
*tformat
, unsigned int iscsi_size
,
391 unsigned int llimit
, unsigned int start
,
392 unsigned int reserve_factor
, unsigned int iscsi_edram_start
,
393 unsigned int iscsi_edram_size
)
395 struct cxgbi_ppm
*ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
396 struct cxgbi_ppm_pool
*pool
= NULL
;
397 unsigned int pool_index_max
= 0;
398 unsigned int ppmax_pool
= 0;
399 unsigned int ppod_bmap_size
;
400 unsigned int alloc_sz
;
403 if (!iscsi_edram_start
)
404 iscsi_edram_size
= 0;
406 if (iscsi_edram_size
&&
407 ((iscsi_edram_start
+ iscsi_edram_size
) != start
)) {
408 pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
409 "size 0x%x DDR start 0x%x\n",
410 iscsi_edram_start
, iscsi_edram_size
, start
);
414 if (iscsi_edram_size
) {
416 start
= iscsi_edram_start
;
419 ppmax
= (iscsi_edram_size
+ iscsi_size
) >> PPOD_SIZE_SHIFT
;
422 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
423 ndev
->name
, ppm_pp
, ppm
, ppm
->ppmax
, ppmax
);
424 kref_get(&ppm
->refcnt
);
428 if (reserve_factor
) {
429 ppmax_pool
= ppmax
/ reserve_factor
;
430 pool
= ppm_alloc_cpu_pool(&ppmax_pool
, &pool_index_max
);
436 pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
437 ndev
->name
, ppmax
, ppmax_pool
, pool_index_max
);
440 ppod_bmap_size
= BITS_TO_LONGS(ppmax
- ppmax_pool
);
441 alloc_sz
= sizeof(struct cxgbi_ppm
) +
442 ppmax
* (sizeof(struct cxgbi_ppod_data
)) +
443 ppod_bmap_size
* sizeof(unsigned long);
445 ppm
= vzalloc(alloc_sz
);
447 goto release_ppm_pool
;
449 ppm
->ppod_bmap
= (unsigned long *)(&ppm
->ppod_data
[ppmax
]);
451 if ((ppod_bmap_size
>> 3) > (ppmax
- ppmax_pool
)) {
452 unsigned int start
= ppmax
- ppmax_pool
;
453 unsigned int end
= ppod_bmap_size
>> 3;
455 bitmap_set(ppm
->ppod_bmap
, ppmax
, end
- start
);
456 pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
457 __func__
, ppmax
, ppmax_pool
, ppod_bmap_size
, start
,
460 if (iscsi_edram_size
) {
461 unsigned int first_ddr_idx
=
462 iscsi_edram_size
>> PPOD_SIZE_SHIFT
;
464 ppm
->max_index_in_edram
= first_ddr_idx
- 1;
465 bitmap_set(ppm
->ppod_bmap
, first_ddr_idx
, 1);
466 pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx
);
469 spin_lock_init(&ppm
->map_lock
);
470 kref_init(&ppm
->refcnt
);
472 memcpy(&ppm
->tformat
, tformat
, sizeof(struct cxgbi_tag_format
));
474 ppm
->ppm_pp
= ppm_pp
;
480 ppm
->llimit
= llimit
;
481 ppm
->base_idx
= start
> llimit
?
482 (start
- llimit
+ 1) >> PPOD_SIZE_SHIFT
: 0;
483 ppm
->bmap_index_max
= ppmax
- ppmax_pool
;
486 ppm
->pool_rsvd
= ppmax_pool
;
487 ppm
->pool_index_max
= pool_index_max
;
489 /* check one more time */
492 ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
494 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
495 ndev
->name
, ppm_pp
, *ppm_pp
, ppm
->ppmax
, ppmax
);
497 kref_get(&ppm
->refcnt
);
502 ppm
->tformat
.pgsz_idx_dflt
= cxgbi_ppm_find_page_index(ppm
, PAGE_SIZE
);
504 pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
505 ndev
->name
, ppm_pp
, ppm
, ppm
->base_idx
, ppm
->ppmax
, PAGE_SIZE
,
506 ppm
->tformat
.pgsz_idx_dflt
, ppm
->pool_rsvd
,
507 ppm
->pool_index_max
);
515 EXPORT_SYMBOL(cxgbi_ppm_init
);
517 unsigned int cxgbi_tagmask_set(unsigned int ppmax
)
519 unsigned int bits
= fls(ppmax
);
521 if (bits
> PPOD_IDX_MAX_SIZE
)
522 bits
= PPOD_IDX_MAX_SIZE
;
524 pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
525 ppmax
, ppmax
, bits
, 1 << (bits
+ PPOD_IDX_SHIFT
));
527 return 1 << (bits
+ PPOD_IDX_SHIFT
);
529 EXPORT_SYMBOL(cxgbi_tagmask_set
);
531 MODULE_AUTHOR("Chelsio Communications");
532 MODULE_DESCRIPTION("Chelsio common library");
533 MODULE_VERSION(DRV_VERSION
);
534 MODULE_LICENSE("Dual BSD/GPL");