2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * Written by: Karen Xie (kxie@chelsio.com)
37 #define DRV_NAME "libcxgb"
38 #define DRV_VERSION "1.0.0-ko"
39 #define pr_fmt(fmt) DRV_NAME ": " fmt
41 #include <linux/kernel.h>
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/types.h>
45 #include <linux/debugfs.h>
46 #include <linux/export.h>
47 #include <linux/list.h>
48 #include <linux/skbuff.h>
49 #include <linux/pci.h>
50 #include <linux/scatterlist.h>
52 #include "libcxgb_ppm.h"
54 /* Direct Data Placement -
55 * Directly place the iSCSI Data-In or Data-Out PDU's payload into
56 * pre-posted final destination host-memory buffers based on the
57 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
58 * in Data-Out PDUs. The host memory address is programmed into
59 * h/w in the format of pagepod entries. The location of the
60 * pagepod entry is encoded into ddp tag which is used as the base
64 /* Direct-Data Placement page size adjustment
66 int cxgbi_ppm_find_page_index(struct cxgbi_ppm
*ppm
, unsigned long pgsz
)
68 struct cxgbi_tag_format
*tformat
= &ppm
->tformat
;
71 for (i
= 0; i
< DDP_PGIDX_MAX
; i
++) {
72 if (pgsz
== 1UL << (DDP_PGSZ_BASE_SHIFT
+
73 tformat
->pgsz_order
[i
])) {
74 pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
75 __func__
, ppm
->ndev
->name
, pgsz
, i
);
79 pr_info("ippm: ddp page size %lu not supported.\n", pgsz
);
83 /* DDP setup & teardown
85 static int ppm_find_unused_entries(unsigned long *bmap
,
86 unsigned int max_ppods
,
89 unsigned int align_mask
)
93 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, start
, nr
, align_mask
);
95 if (unlikely(i
>= max_ppods
) && (start
> nr
))
96 i
= bitmap_find_next_zero_area(bmap
, max_ppods
, 0, start
- 1,
98 if (unlikely(i
>= max_ppods
))
101 bitmap_set(bmap
, i
, nr
);
105 static void ppm_mark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
,
106 unsigned long caller_data
)
108 struct cxgbi_ppod_data
*pdata
= ppm
->ppod_data
+ i
;
110 pdata
->caller_data
= caller_data
;
111 pdata
->npods
= count
;
113 if (pdata
->color
== ((1 << PPOD_IDX_SHIFT
) - 1))
119 static int ppm_get_cpu_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
120 unsigned long caller_data
)
122 struct cxgbi_ppm_pool
*pool
;
127 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
128 spin_lock_bh(&pool
->lock
);
131 i
= ppm_find_unused_entries(pool
->bmap
, ppm
->pool_index_max
,
132 pool
->next
, count
, 0);
135 spin_unlock_bh(&pool
->lock
);
139 pool
->next
= i
+ count
;
140 if (pool
->next
>= ppm
->pool_index_max
)
143 spin_unlock_bh(&pool
->lock
);
145 pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
146 __func__
, cpu
, i
, count
, i
+ cpu
* ppm
->pool_index_max
,
149 i
+= cpu
* ppm
->pool_index_max
;
150 ppm_mark_entries(ppm
, i
, count
, caller_data
);
155 static int ppm_get_entries(struct cxgbi_ppm
*ppm
, unsigned int count
,
156 unsigned long caller_data
)
160 spin_lock_bh(&ppm
->map_lock
);
161 i
= ppm_find_unused_entries(ppm
->ppod_bmap
, ppm
->bmap_index_max
,
162 ppm
->next
, count
, 0);
165 spin_unlock_bh(&ppm
->map_lock
);
166 pr_debug("ippm: NO suitable entries %u available.\n",
171 ppm
->next
= i
+ count
;
172 if (ppm
->next
>= ppm
->bmap_index_max
)
175 spin_unlock_bh(&ppm
->map_lock
);
177 pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
178 __func__
, i
, count
, i
+ ppm
->pool_rsvd
, ppm
->next
,
182 ppm_mark_entries(ppm
, i
, count
, caller_data
);
187 static void ppm_unmark_entries(struct cxgbi_ppm
*ppm
, int i
, int count
)
189 pr_debug("%s: idx %d + %d.\n", __func__
, i
, count
);
191 if (i
< ppm
->pool_rsvd
) {
193 struct cxgbi_ppm_pool
*pool
;
195 cpu
= i
/ ppm
->pool_index_max
;
196 i
%= ppm
->pool_index_max
;
198 pool
= per_cpu_ptr(ppm
->pool
, cpu
);
199 spin_lock_bh(&pool
->lock
);
200 bitmap_clear(pool
->bmap
, i
, count
);
204 spin_unlock_bh(&pool
->lock
);
206 pr_debug("%s: cpu %u, idx %d, next %u.\n",
207 __func__
, cpu
, i
, pool
->next
);
209 spin_lock_bh(&ppm
->map_lock
);
212 bitmap_clear(ppm
->ppod_bmap
, i
, count
);
216 spin_unlock_bh(&ppm
->map_lock
);
218 pr_debug("%s: idx %d, next %u.\n", __func__
, i
, ppm
->next
);
222 void cxgbi_ppm_ppod_release(struct cxgbi_ppm
*ppm
, u32 idx
)
224 struct cxgbi_ppod_data
*pdata
;
226 if (idx
>= ppm
->ppmax
) {
227 pr_warn("ippm: idx too big %u > %u.\n", idx
, ppm
->ppmax
);
231 pdata
= ppm
->ppod_data
+ idx
;
233 pr_warn("ippm: idx %u, npods 0.\n", idx
);
237 pr_debug("release idx %u, npods %u.\n", idx
, pdata
->npods
);
238 ppm_unmark_entries(ppm
, idx
, pdata
->npods
);
240 EXPORT_SYMBOL(cxgbi_ppm_ppod_release
);
242 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm
*ppm
, unsigned short nr_pages
,
243 u32 per_tag_pg_idx
, u32
*ppod_idx
,
244 u32
*ddp_tag
, unsigned long caller_data
)
246 struct cxgbi_ppod_data
*pdata
;
252 npods
= (nr_pages
+ PPOD_PAGES_MAX
- 1) >> PPOD_PAGES_SHIFT
;
254 pr_warn("%s: pages %u -> npods %u, full.\n",
255 __func__
, nr_pages
, npods
);
259 /* grab from cpu pool first */
260 idx
= ppm_get_cpu_entries(ppm
, npods
, caller_data
);
261 /* try the general pool */
263 idx
= ppm_get_entries(ppm
, npods
, caller_data
);
265 pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
266 nr_pages
, npods
, ppm
->next
, caller_data
);
270 pdata
= ppm
->ppod_data
+ idx
;
271 hwidx
= ppm
->base_idx
+ idx
;
273 tag
= cxgbi_ppm_make_ddp_tag(hwidx
, pdata
->color
);
276 tag
|= (per_tag_pg_idx
<< 30) & 0xC0000000;
281 pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
282 nr_pages
, tag
, idx
, npods
, caller_data
);
286 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve
);
288 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm
*ppm
, u32 tag
,
289 unsigned int tid
, unsigned int offset
,
291 struct cxgbi_pagepod_hdr
*hdr
)
293 /* The ddp tag in pagepod should be with bit 31:30 set to 0.
294 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
298 hdr
->vld_tid
= htonl(PPOD_VALID_FLAG
| PPOD_TID(tid
));
301 hdr
->pgsz_tag_clr
= htonl(tag
& ppm
->tformat
.idx_clr_mask
);
302 hdr
->max_offset
= htonl(length
);
303 hdr
->page_offset
= htonl(offset
);
305 pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
306 tag
, tid
, length
, offset
);
308 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr
);
310 static void ppm_free(struct cxgbi_ppm
*ppm
)
315 static void ppm_destroy(struct kref
*kref
)
317 struct cxgbi_ppm
*ppm
= container_of(kref
,
320 pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
321 ppm
->ndev
->name
, ppm
);
325 free_percpu(ppm
->pool
);
329 int cxgbi_ppm_release(struct cxgbi_ppm
*ppm
)
334 rv
= kref_put(&ppm
->refcnt
, ppm_destroy
);
339 EXPORT_SYMBOL(cxgbi_ppm_release
);
341 static struct cxgbi_ppm_pool
*ppm_alloc_cpu_pool(unsigned int *total
,
342 unsigned int *pcpu_ppmax
)
344 struct cxgbi_ppm_pool
*pools
;
345 unsigned int ppmax
= (*total
) / num_possible_cpus();
346 unsigned int max
= (PCPU_MIN_UNIT_SIZE
- sizeof(*pools
)) << 3;
348 unsigned int alloc_sz
;
349 unsigned int count
= 0;
352 /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
356 /* pool size must be multiple of unsigned long */
357 bmap
= BITS_TO_LONGS(ppmax
);
358 ppmax
= (bmap
* sizeof(unsigned long)) << 3;
360 alloc_sz
= sizeof(*pools
) + sizeof(unsigned long) * bmap
;
361 pools
= __alloc_percpu(alloc_sz
, __alignof__(struct cxgbi_ppm_pool
));
366 for_each_possible_cpu(cpu
) {
367 struct cxgbi_ppm_pool
*ppool
= per_cpu_ptr(pools
, cpu
);
369 memset(ppool
, 0, alloc_sz
);
370 spin_lock_init(&ppool
->lock
);
380 int cxgbi_ppm_init(void **ppm_pp
, struct net_device
*ndev
,
381 struct pci_dev
*pdev
, void *lldev
,
382 struct cxgbi_tag_format
*tformat
,
386 unsigned int reserve_factor
)
388 struct cxgbi_ppm
*ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
389 struct cxgbi_ppm_pool
*pool
= NULL
;
390 unsigned int ppmax_pool
= 0;
391 unsigned int pool_index_max
= 0;
392 unsigned int alloc_sz
;
393 unsigned int ppod_bmap_size
;
396 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
397 ndev
->name
, ppm_pp
, ppm
, ppm
->ppmax
, ppmax
);
398 kref_get(&ppm
->refcnt
);
402 if (reserve_factor
) {
403 ppmax_pool
= ppmax
/ reserve_factor
;
404 pool
= ppm_alloc_cpu_pool(&ppmax_pool
, &pool_index_max
);
406 pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
407 ndev
->name
, ppmax
, ppmax_pool
, pool_index_max
);
410 ppod_bmap_size
= BITS_TO_LONGS(ppmax
- ppmax_pool
);
411 alloc_sz
= sizeof(struct cxgbi_ppm
) +
412 ppmax
* (sizeof(struct cxgbi_ppod_data
)) +
413 ppod_bmap_size
* sizeof(unsigned long);
415 ppm
= vmalloc(alloc_sz
);
417 goto release_ppm_pool
;
419 memset(ppm
, 0, alloc_sz
);
421 ppm
->ppod_bmap
= (unsigned long *)(&ppm
->ppod_data
[ppmax
]);
423 if ((ppod_bmap_size
>> 3) > (ppmax
- ppmax_pool
)) {
424 unsigned int start
= ppmax
- ppmax_pool
;
425 unsigned int end
= ppod_bmap_size
>> 3;
427 bitmap_set(ppm
->ppod_bmap
, ppmax
, end
- start
);
428 pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
429 __func__
, ppmax
, ppmax_pool
, ppod_bmap_size
, start
,
433 spin_lock_init(&ppm
->map_lock
);
434 kref_init(&ppm
->refcnt
);
436 memcpy(&ppm
->tformat
, tformat
, sizeof(struct cxgbi_tag_format
));
438 ppm
->ppm_pp
= ppm_pp
;
444 ppm
->llimit
= llimit
;
445 ppm
->base_idx
= start
> llimit
?
446 (start
- llimit
+ 1) >> PPOD_SIZE_SHIFT
: 0;
447 ppm
->bmap_index_max
= ppmax
- ppmax_pool
;
450 ppm
->pool_rsvd
= ppmax_pool
;
451 ppm
->pool_index_max
= pool_index_max
;
453 /* check one more time */
456 ppm
= (struct cxgbi_ppm
*)(*ppm_pp
);
458 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
459 ndev
->name
, ppm_pp
, *ppm_pp
, ppm
->ppmax
, ppmax
);
461 kref_get(&ppm
->refcnt
);
466 ppm
->tformat
.pgsz_idx_dflt
= cxgbi_ppm_find_page_index(ppm
, PAGE_SIZE
);
468 pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
469 ndev
->name
, ppm_pp
, ppm
, ppm
->base_idx
, ppm
->ppmax
, PAGE_SIZE
,
470 ppm
->tformat
.pgsz_idx_dflt
, ppm
->pool_rsvd
,
471 ppm
->pool_index_max
);
479 EXPORT_SYMBOL(cxgbi_ppm_init
);
481 unsigned int cxgbi_tagmask_set(unsigned int ppmax
)
483 unsigned int bits
= fls(ppmax
);
485 if (bits
> PPOD_IDX_MAX_SIZE
)
486 bits
= PPOD_IDX_MAX_SIZE
;
488 pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
489 ppmax
, ppmax
, bits
, 1 << (bits
+ PPOD_IDX_SHIFT
));
491 return 1 << (bits
+ PPOD_IDX_SHIFT
);
493 EXPORT_SYMBOL(cxgbi_tagmask_set
);
495 MODULE_AUTHOR("Chelsio Communications");
496 MODULE_DESCRIPTION("Chelsio common library");
497 MODULE_VERSION(DRV_VERSION
);
498 MODULE_LICENSE("Dual BSD/GPL");