1 // SPDX-License-Identifier: GPL-2.0+
3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
5 * Copyright 2018 IBM Corp.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
16 #include <asm/iommu.h>
20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb
*phb
)
22 struct pci_controller
*hose
= phb
->hose
;
23 struct device_node
*dn
= hose
->dn
;
24 unsigned long mask
= 0;
28 count
= of_property_count_u32_elems(dn
, "ibm,supported-tce-sizes");
30 mask
= SZ_4K
| SZ_64K
;
31 /* Add 16M for POWER8 by default */
32 if (cpu_has_feature(CPU_FTR_ARCH_207S
) &&
33 !cpu_has_feature(CPU_FTR_ARCH_300
))
34 mask
|= SZ_16M
| SZ_256M
;
38 for (i
= 0; i
< count
; i
++) {
39 rc
= of_property_read_u32_index(dn
, "ibm,supported-tce-sizes",
48 void pnv_pci_setup_iommu_table(struct iommu_table
*tbl
,
49 void *tce_mem
, u64 tce_size
,
50 u64 dma_offset
, unsigned int page_shift
)
52 tbl
->it_blocksize
= 16;
53 tbl
->it_base
= (unsigned long)tce_mem
;
54 tbl
->it_page_shift
= page_shift
;
55 tbl
->it_offset
= dma_offset
>> tbl
->it_page_shift
;
57 tbl
->it_size
= tce_size
>> 3;
59 tbl
->it_type
= TCE_PCI
;
62 static __be64
*pnv_alloc_tce_level(int nid
, unsigned int shift
)
64 struct page
*tce_mem
= NULL
;
67 tce_mem
= alloc_pages_node(nid
, GFP_ATOMIC
| __GFP_NOWARN
,
70 pr_err("Failed to allocate a TCE memory, level shift=%d\n",
74 addr
= page_address(tce_mem
);
75 memset(addr
, 0, 1UL << shift
);
80 static void pnv_pci_ioda2_table_do_free_pages(__be64
*addr
,
81 unsigned long size
, unsigned int levels
);
83 static __be64
*pnv_tce(struct iommu_table
*tbl
, bool user
, long idx
, bool alloc
)
85 __be64
*tmp
= user
? tbl
->it_userspace
: (__be64
*) tbl
->it_base
;
86 int level
= tbl
->it_indirect_levels
;
87 const long shift
= ilog2(tbl
->it_level_size
);
88 unsigned long mask
= (tbl
->it_level_size
- 1) << (level
* shift
);
91 int n
= (idx
& mask
) >> (level
* shift
);
92 unsigned long oldtce
, tce
= be64_to_cpu(READ_ONCE(tmp
[n
]));
100 tmp2
= pnv_alloc_tce_level(tbl
->it_nid
,
101 ilog2(tbl
->it_level_size
) + 3);
105 tce
= __pa(tmp2
) | TCE_PCI_READ
| TCE_PCI_WRITE
;
106 oldtce
= be64_to_cpu(cmpxchg(&tmp
[n
], 0,
109 pnv_pci_ioda2_table_do_free_pages(tmp2
,
110 ilog2(tbl
->it_level_size
) + 3, 1);
115 tmp
= __va(tce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
));
124 int pnv_tce_build(struct iommu_table
*tbl
, long index
, long npages
,
125 unsigned long uaddr
, enum dma_data_direction direction
,
128 u64 proto_tce
= iommu_direction_to_tce_perm(direction
);
129 u64 rpn
= __pa(uaddr
) >> tbl
->it_page_shift
;
132 if (proto_tce
& TCE_PCI_WRITE
)
133 proto_tce
|= TCE_PCI_READ
;
135 for (i
= 0; i
< npages
; i
++) {
136 unsigned long newtce
= proto_tce
|
137 ((rpn
+ i
) << tbl
->it_page_shift
);
138 unsigned long idx
= index
- tbl
->it_offset
+ i
;
140 *(pnv_tce(tbl
, false, idx
, true)) = cpu_to_be64(newtce
);
146 #ifdef CONFIG_IOMMU_API
147 int pnv_tce_xchg(struct iommu_table
*tbl
, long index
,
148 unsigned long *hpa
, enum dma_data_direction
*direction
,
151 u64 proto_tce
= iommu_direction_to_tce_perm(*direction
);
152 unsigned long newtce
= *hpa
| proto_tce
, oldtce
;
153 unsigned long idx
= index
- tbl
->it_offset
;
156 BUG_ON(*hpa
& ~IOMMU_PAGE_MASK(tbl
));
158 if (*direction
== DMA_NONE
) {
159 ptce
= pnv_tce(tbl
, false, idx
, false);
167 ptce
= pnv_tce(tbl
, false, idx
, alloc
);
172 if (newtce
& TCE_PCI_WRITE
)
173 newtce
|= TCE_PCI_READ
;
175 oldtce
= be64_to_cpu(xchg(ptce
, cpu_to_be64(newtce
)));
176 *hpa
= oldtce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
);
177 *direction
= iommu_tce_direction(oldtce
);
182 __be64
*pnv_tce_useraddrptr(struct iommu_table
*tbl
, long index
, bool alloc
)
184 if (WARN_ON_ONCE(!tbl
->it_userspace
))
187 return pnv_tce(tbl
, true, index
- tbl
->it_offset
, alloc
);
191 void pnv_tce_free(struct iommu_table
*tbl
, long index
, long npages
)
195 for (i
= 0; i
< npages
; i
++) {
196 unsigned long idx
= index
- tbl
->it_offset
+ i
;
197 __be64
*ptce
= pnv_tce(tbl
, false, idx
, false);
200 *ptce
= cpu_to_be64(0);
202 /* Skip the rest of the level */
203 i
|= tbl
->it_level_size
- 1;
207 unsigned long pnv_tce_get(struct iommu_table
*tbl
, long index
)
209 __be64
*ptce
= pnv_tce(tbl
, false, index
- tbl
->it_offset
, false);
214 return be64_to_cpu(*ptce
);
217 static void pnv_pci_ioda2_table_do_free_pages(__be64
*addr
,
218 unsigned long size
, unsigned int levels
)
220 const unsigned long addr_ul
= (unsigned long) addr
&
221 ~(TCE_PCI_READ
| TCE_PCI_WRITE
);
225 u64
*tmp
= (u64
*) addr_ul
;
227 for (i
= 0; i
< size
; ++i
) {
228 unsigned long hpa
= be64_to_cpu(tmp
[i
]);
230 if (!(hpa
& (TCE_PCI_READ
| TCE_PCI_WRITE
)))
233 pnv_pci_ioda2_table_do_free_pages(__va(hpa
), size
,
238 free_pages(addr_ul
, get_order(size
<< 3));
241 void pnv_pci_ioda2_table_free_pages(struct iommu_table
*tbl
)
243 const unsigned long size
= tbl
->it_indirect_levels
?
244 tbl
->it_level_size
: tbl
->it_size
;
249 pnv_pci_ioda2_table_do_free_pages((__be64
*)tbl
->it_base
, size
,
250 tbl
->it_indirect_levels
);
251 if (tbl
->it_userspace
) {
252 pnv_pci_ioda2_table_do_free_pages(tbl
->it_userspace
, size
,
253 tbl
->it_indirect_levels
);
257 static __be64
*pnv_pci_ioda2_table_do_alloc_pages(int nid
, unsigned int shift
,
258 unsigned int levels
, unsigned long limit
,
259 unsigned long *current_offset
, unsigned long *total_allocated
)
262 unsigned long allocated
= 1UL << shift
;
263 unsigned int entries
= 1UL << (shift
- 3);
266 addr
= pnv_alloc_tce_level(nid
, shift
);
267 *total_allocated
+= allocated
;
271 *current_offset
+= allocated
;
275 for (i
= 0; i
< entries
; ++i
) {
276 tmp
= pnv_pci_ioda2_table_do_alloc_pages(nid
, shift
,
277 levels
, limit
, current_offset
, total_allocated
);
281 addr
[i
] = cpu_to_be64(__pa(tmp
) |
282 TCE_PCI_READ
| TCE_PCI_WRITE
);
284 if (*current_offset
>= limit
)
291 long pnv_pci_ioda2_table_alloc_pages(int nid
, __u64 bus_offset
,
292 __u32 page_shift
, __u64 window_size
, __u32 levels
,
293 bool alloc_userspace_copy
, struct iommu_table
*tbl
)
295 void *addr
, *uas
= NULL
;
296 unsigned long offset
= 0, level_shift
, total_allocated
= 0;
297 unsigned long total_allocated_uas
= 0;
298 const unsigned int window_shift
= ilog2(window_size
);
299 unsigned int entries_shift
= window_shift
- page_shift
;
300 unsigned int table_shift
= max_t(unsigned int, entries_shift
+ 3,
302 const unsigned long tce_table_size
= 1UL << table_shift
;
304 if (!levels
|| (levels
> POWERNV_IOMMU_MAX_LEVELS
))
307 if (!is_power_of_2(window_size
))
310 /* Adjust direct table size from window_size and levels */
311 entries_shift
= (entries_shift
+ levels
- 1) / levels
;
312 level_shift
= entries_shift
+ 3;
313 level_shift
= max_t(unsigned int, level_shift
, PAGE_SHIFT
);
315 if ((level_shift
- 3) * levels
+ page_shift
>= 55)
318 /* Allocate TCE table */
319 addr
= pnv_pci_ioda2_table_do_alloc_pages(nid
, level_shift
,
320 1, tce_table_size
, &offset
, &total_allocated
);
322 /* addr==NULL means that the first level allocation failed */
327 * First level was allocated but some lower level failed as
328 * we did not allocate as much as we wanted,
329 * release partially allocated table.
331 if (levels
== 1 && offset
< tce_table_size
)
334 /* Allocate userspace view of the TCE table */
335 if (alloc_userspace_copy
) {
337 uas
= pnv_pci_ioda2_table_do_alloc_pages(nid
, level_shift
,
338 1, tce_table_size
, &offset
,
339 &total_allocated_uas
);
342 if (levels
== 1 && (offset
< tce_table_size
||
343 total_allocated_uas
!= total_allocated
))
347 /* Setup linux iommu table */
348 pnv_pci_setup_iommu_table(tbl
, addr
, tce_table_size
, bus_offset
,
350 tbl
->it_level_size
= 1ULL << (level_shift
- 3);
351 tbl
->it_indirect_levels
= levels
- 1;
352 tbl
->it_userspace
= uas
;
355 pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
356 window_size
, tce_table_size
, bus_offset
, tbl
->it_base
,
357 tbl
->it_userspace
, 1, levels
);
362 pnv_pci_ioda2_table_do_free_pages(uas
,
363 1ULL << (level_shift
- 3), levels
- 1);
365 pnv_pci_ioda2_table_do_free_pages(addr
,
366 1ULL << (level_shift
- 3), levels
- 1);
371 void pnv_pci_unlink_table_and_group(struct iommu_table
*tbl
,
372 struct iommu_table_group
*table_group
)
376 struct iommu_table_group_link
*tgl
;
378 if (!tbl
|| !table_group
)
381 /* Remove link to a group from table's list of attached groups */
383 list_for_each_entry_rcu(tgl
, &tbl
->it_group_list
, next
) {
384 if (tgl
->table_group
== table_group
) {
385 list_del_rcu(&tgl
->next
);
394 /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
396 for (i
= 0; i
< IOMMU_TABLE_GROUP_MAX_TABLES
; ++i
) {
397 if (table_group
->tables
[i
] == tbl
) {
398 iommu_tce_table_put(tbl
);
399 table_group
->tables
[i
] = NULL
;
407 long pnv_pci_link_table_and_group(int node
, int num
,
408 struct iommu_table
*tbl
,
409 struct iommu_table_group
*table_group
)
411 struct iommu_table_group_link
*tgl
= NULL
;
413 if (WARN_ON(!tbl
|| !table_group
))
416 tgl
= kzalloc_node(sizeof(struct iommu_table_group_link
), GFP_KERNEL
,
421 tgl
->table_group
= table_group
;
422 list_add_rcu(&tgl
->next
, &tbl
->it_group_list
);
424 table_group
->tables
[num
] = iommu_tce_table_get(tbl
);