1 // SPDX-License-Identifier: GPL-2.0
3 * cache.c - Intel VT-d cache invalidation
5 * Copyright (C) 2024 Intel Corporation
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
10 #define pr_fmt(fmt) "DMAR: " fmt
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
22 /* Check if an existing cache tag can be reused for a new association. */
23 static bool cache_tage_match(struct cache_tag
*tag
, u16 domain_id
,
24 struct intel_iommu
*iommu
, struct device
*dev
,
25 ioasid_t pasid
, enum cache_tag_type type
)
27 if (tag
->type
!= type
)
30 if (tag
->domain_id
!= domain_id
|| tag
->pasid
!= pasid
)
33 if (type
== CACHE_TAG_IOTLB
|| type
== CACHE_TAG_NESTING_IOTLB
)
34 return tag
->iommu
== iommu
;
36 if (type
== CACHE_TAG_DEVTLB
|| type
== CACHE_TAG_NESTING_DEVTLB
)
37 return tag
->dev
== dev
;
42 /* Assign a cache tag with specified type to domain. */
43 static int cache_tag_assign(struct dmar_domain
*domain
, u16 did
,
44 struct device
*dev
, ioasid_t pasid
,
45 enum cache_tag_type type
)
47 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
48 struct intel_iommu
*iommu
= info
->iommu
;
49 struct cache_tag
*tag
, *temp
;
52 tag
= kzalloc(sizeof(*tag
), GFP_KERNEL
);
62 if (type
== CACHE_TAG_DEVTLB
|| type
== CACHE_TAG_NESTING_DEVTLB
)
65 tag
->dev
= iommu
->iommu
.dev
;
67 spin_lock_irqsave(&domain
->cache_lock
, flags
);
68 list_for_each_entry(temp
, &domain
->cache_tags
, node
) {
69 if (cache_tage_match(temp
, did
, iommu
, dev
, pasid
, type
)) {
71 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);
73 trace_cache_tag_assign(temp
);
77 list_add_tail(&tag
->node
, &domain
->cache_tags
);
78 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);
79 trace_cache_tag_assign(tag
);
84 /* Unassign a cache tag with specified type from domain. */
85 static void cache_tag_unassign(struct dmar_domain
*domain
, u16 did
,
86 struct device
*dev
, ioasid_t pasid
,
87 enum cache_tag_type type
)
89 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
90 struct intel_iommu
*iommu
= info
->iommu
;
91 struct cache_tag
*tag
;
94 spin_lock_irqsave(&domain
->cache_lock
, flags
);
95 list_for_each_entry(tag
, &domain
->cache_tags
, node
) {
96 if (cache_tage_match(tag
, did
, iommu
, dev
, pasid
, type
)) {
97 trace_cache_tag_unassign(tag
);
98 if (--tag
->users
== 0) {
105 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);
108 static int __cache_tag_assign_domain(struct dmar_domain
*domain
, u16 did
,
109 struct device
*dev
, ioasid_t pasid
)
111 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
114 ret
= cache_tag_assign(domain
, did
, dev
, pasid
, CACHE_TAG_IOTLB
);
115 if (ret
|| !info
->ats_enabled
)
118 ret
= cache_tag_assign(domain
, did
, dev
, pasid
, CACHE_TAG_DEVTLB
);
120 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_IOTLB
);
125 static void __cache_tag_unassign_domain(struct dmar_domain
*domain
, u16 did
,
126 struct device
*dev
, ioasid_t pasid
)
128 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
130 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_IOTLB
);
132 if (info
->ats_enabled
)
133 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_DEVTLB
);
136 static int __cache_tag_assign_parent_domain(struct dmar_domain
*domain
, u16 did
,
137 struct device
*dev
, ioasid_t pasid
)
139 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
142 ret
= cache_tag_assign(domain
, did
, dev
, pasid
, CACHE_TAG_NESTING_IOTLB
);
143 if (ret
|| !info
->ats_enabled
)
146 ret
= cache_tag_assign(domain
, did
, dev
, pasid
, CACHE_TAG_NESTING_DEVTLB
);
148 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_NESTING_IOTLB
);
153 static void __cache_tag_unassign_parent_domain(struct dmar_domain
*domain
, u16 did
,
154 struct device
*dev
, ioasid_t pasid
)
156 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
158 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_NESTING_IOTLB
);
160 if (info
->ats_enabled
)
161 cache_tag_unassign(domain
, did
, dev
, pasid
, CACHE_TAG_NESTING_DEVTLB
);
164 static u16
domain_get_id_for_dev(struct dmar_domain
*domain
, struct device
*dev
)
166 struct device_domain_info
*info
= dev_iommu_priv_get(dev
);
167 struct intel_iommu
*iommu
= info
->iommu
;
170 * The driver assigns different domain IDs for all domains except
173 if (domain
->domain
.type
== IOMMU_DOMAIN_SVA
)
174 return FLPT_DEFAULT_DID
;
176 return domain_id_iommu(domain
, iommu
);
180 * Assign cache tags to a domain when it's associated with a device's
181 * PASID using a specific domain ID.
183 * On success (return value of 0), cache tags are created and added to the
184 * domain's cache tag list. On failure (negative return value), an error
185 * code is returned indicating the reason for the failure.
187 int cache_tag_assign_domain(struct dmar_domain
*domain
,
188 struct device
*dev
, ioasid_t pasid
)
190 u16 did
= domain_get_id_for_dev(domain
, dev
);
193 /* domain->qi_bach will be freed in iommu_free_domain() path. */
194 if (!domain
->qi_batch
) {
195 domain
->qi_batch
= kzalloc(sizeof(*domain
->qi_batch
), GFP_KERNEL
);
196 if (!domain
->qi_batch
)
200 ret
= __cache_tag_assign_domain(domain
, did
, dev
, pasid
);
201 if (ret
|| domain
->domain
.type
!= IOMMU_DOMAIN_NESTED
)
204 ret
= __cache_tag_assign_parent_domain(domain
->s2_domain
, did
, dev
, pasid
);
206 __cache_tag_unassign_domain(domain
, did
, dev
, pasid
);
212 * Remove the cache tags associated with a device's PASID when the domain is
213 * detached from the device.
215 * The cache tags must be previously assigned to the domain by calling the
218 void cache_tag_unassign_domain(struct dmar_domain
*domain
,
219 struct device
*dev
, ioasid_t pasid
)
221 u16 did
= domain_get_id_for_dev(domain
, dev
);
223 __cache_tag_unassign_domain(domain
, did
, dev
, pasid
);
224 if (domain
->domain
.type
== IOMMU_DOMAIN_NESTED
)
225 __cache_tag_unassign_parent_domain(domain
->s2_domain
, did
, dev
, pasid
);
228 static unsigned long calculate_psi_aligned_address(unsigned long start
,
230 unsigned long *_pages
,
231 unsigned long *_mask
)
233 unsigned long pages
= aligned_nrpages(start
, end
- start
+ 1);
234 unsigned long aligned_pages
= __roundup_pow_of_two(pages
);
235 unsigned long bitmask
= aligned_pages
- 1;
236 unsigned long mask
= ilog2(aligned_pages
);
237 unsigned long pfn
= IOVA_PFN(start
);
240 * PSI masks the low order bits of the base address. If the
241 * address isn't aligned to the mask, then compute a mask value
242 * needed to ensure the target range is flushed.
244 if (unlikely(bitmask
& pfn
)) {
245 unsigned long end_pfn
= pfn
+ pages
- 1, shared_bits
;
248 * Since end_pfn <= pfn + bitmask, the only way bits
249 * higher than bitmask can differ in pfn and end_pfn is
250 * by carrying. This means after masking out bitmask,
251 * high bits starting with the first set bit in
252 * shared_bits are all equal in both pfn and end_pfn.
254 shared_bits
= ~(pfn
^ end_pfn
) & ~bitmask
;
255 mask
= shared_bits
? __ffs(shared_bits
) : MAX_AGAW_PFN_WIDTH
;
256 aligned_pages
= 1UL << mask
;
259 *_pages
= aligned_pages
;
262 return ALIGN_DOWN(start
, VTD_PAGE_SIZE
<< mask
);
265 static void qi_batch_flush_descs(struct intel_iommu
*iommu
, struct qi_batch
*batch
)
267 if (!iommu
|| !batch
->index
)
270 qi_submit_sync(iommu
, batch
->descs
, batch
->index
, 0);
272 /* Reset the index value and clean the whole batch buffer. */
273 memset(batch
, 0, sizeof(*batch
));
276 static void qi_batch_increment_index(struct intel_iommu
*iommu
, struct qi_batch
*batch
)
278 if (++batch
->index
== QI_MAX_BATCHED_DESC_COUNT
)
279 qi_batch_flush_descs(iommu
, batch
);
282 static void qi_batch_add_iotlb(struct intel_iommu
*iommu
, u16 did
, u64 addr
,
283 unsigned int size_order
, u64 type
,
284 struct qi_batch
*batch
)
286 qi_desc_iotlb(iommu
, did
, addr
, size_order
, type
, &batch
->descs
[batch
->index
]);
287 qi_batch_increment_index(iommu
, batch
);
290 static void qi_batch_add_dev_iotlb(struct intel_iommu
*iommu
, u16 sid
, u16 pfsid
,
291 u16 qdep
, u64 addr
, unsigned int mask
,
292 struct qi_batch
*batch
)
295 * According to VT-d spec, software is recommended to not submit any Device-TLB
296 * invalidation requests while address remapping hardware is disabled.
298 if (!(iommu
->gcmd
& DMA_GCMD_TE
))
301 qi_desc_dev_iotlb(sid
, pfsid
, qdep
, addr
, mask
, &batch
->descs
[batch
->index
]);
302 qi_batch_increment_index(iommu
, batch
);
305 static void qi_batch_add_piotlb(struct intel_iommu
*iommu
, u16 did
, u32 pasid
,
306 u64 addr
, unsigned long npages
, bool ih
,
307 struct qi_batch
*batch
)
310 * npages == -1 means a PASID-selective invalidation, otherwise,
311 * a positive value for Page-selective-within-PASID invalidation.
312 * 0 is not a valid input.
317 qi_desc_piotlb(did
, pasid
, addr
, npages
, ih
, &batch
->descs
[batch
->index
]);
318 qi_batch_increment_index(iommu
, batch
);
321 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu
*iommu
, u16 sid
, u16 pfsid
,
322 u32 pasid
, u16 qdep
, u64 addr
,
323 unsigned int size_order
, struct qi_batch
*batch
)
326 * According to VT-d spec, software is recommended to not submit any
327 * Device-TLB invalidation requests while address remapping hardware
330 if (!(iommu
->gcmd
& DMA_GCMD_TE
))
333 qi_desc_dev_iotlb_pasid(sid
, pfsid
, pasid
, qdep
, addr
, size_order
,
334 &batch
->descs
[batch
->index
]);
335 qi_batch_increment_index(iommu
, batch
);
338 static void cache_tag_flush_iotlb(struct dmar_domain
*domain
, struct cache_tag
*tag
,
339 unsigned long addr
, unsigned long pages
,
340 unsigned long mask
, int ih
)
342 struct intel_iommu
*iommu
= tag
->iommu
;
343 u64 type
= DMA_TLB_PSI_FLUSH
;
345 if (domain
->use_first_level
) {
346 qi_batch_add_piotlb(iommu
, tag
->domain_id
, tag
->pasid
, addr
,
347 pages
, ih
, domain
->qi_batch
);
352 * Fallback to domain selective flush if no PSI support or the size
355 if (!cap_pgsel_inv(iommu
->cap
) ||
356 mask
> cap_max_amask_val(iommu
->cap
) || pages
== -1) {
360 type
= DMA_TLB_DSI_FLUSH
;
363 if (ecap_qis(iommu
->ecap
))
364 qi_batch_add_iotlb(iommu
, tag
->domain_id
, addr
| ih
, mask
, type
,
367 __iommu_flush_iotlb(iommu
, tag
->domain_id
, addr
| ih
, mask
, type
);
370 static void cache_tag_flush_devtlb_psi(struct dmar_domain
*domain
, struct cache_tag
*tag
,
371 unsigned long addr
, unsigned long mask
)
373 struct intel_iommu
*iommu
= tag
->iommu
;
374 struct device_domain_info
*info
;
377 info
= dev_iommu_priv_get(tag
->dev
);
378 sid
= PCI_DEVID(info
->bus
, info
->devfn
);
380 if (tag
->pasid
== IOMMU_NO_PASID
) {
381 qi_batch_add_dev_iotlb(iommu
, sid
, info
->pfsid
, info
->ats_qdep
,
382 addr
, mask
, domain
->qi_batch
);
383 if (info
->dtlb_extra_inval
)
384 qi_batch_add_dev_iotlb(iommu
, sid
, info
->pfsid
, info
->ats_qdep
,
385 addr
, mask
, domain
->qi_batch
);
389 qi_batch_add_pasid_dev_iotlb(iommu
, sid
, info
->pfsid
, tag
->pasid
,
390 info
->ats_qdep
, addr
, mask
, domain
->qi_batch
);
391 if (info
->dtlb_extra_inval
)
392 qi_batch_add_pasid_dev_iotlb(iommu
, sid
, info
->pfsid
, tag
->pasid
,
393 info
->ats_qdep
, addr
, mask
,
397 static void cache_tag_flush_devtlb_all(struct dmar_domain
*domain
, struct cache_tag
*tag
)
399 struct intel_iommu
*iommu
= tag
->iommu
;
400 struct device_domain_info
*info
;
403 info
= dev_iommu_priv_get(tag
->dev
);
404 sid
= PCI_DEVID(info
->bus
, info
->devfn
);
406 qi_batch_add_dev_iotlb(iommu
, sid
, info
->pfsid
, info
->ats_qdep
, 0,
407 MAX_AGAW_PFN_WIDTH
, domain
->qi_batch
);
408 if (info
->dtlb_extra_inval
)
409 qi_batch_add_dev_iotlb(iommu
, sid
, info
->pfsid
, info
->ats_qdep
, 0,
410 MAX_AGAW_PFN_WIDTH
, domain
->qi_batch
);
414 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
415 * when the memory mappings in the target domain have been modified.
417 void cache_tag_flush_range(struct dmar_domain
*domain
, unsigned long start
,
418 unsigned long end
, int ih
)
420 struct intel_iommu
*iommu
= NULL
;
421 unsigned long pages
, mask
, addr
;
422 struct cache_tag
*tag
;
425 addr
= calculate_psi_aligned_address(start
, end
, &pages
, &mask
);
427 spin_lock_irqsave(&domain
->cache_lock
, flags
);
428 list_for_each_entry(tag
, &domain
->cache_tags
, node
) {
429 if (iommu
&& iommu
!= tag
->iommu
)
430 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
434 case CACHE_TAG_IOTLB
:
435 case CACHE_TAG_NESTING_IOTLB
:
436 cache_tag_flush_iotlb(domain
, tag
, addr
, pages
, mask
, ih
);
438 case CACHE_TAG_NESTING_DEVTLB
:
440 * Address translation cache in device side caches the
441 * result of nested translation. There is no easy way
442 * to identify the exact set of nested translations
443 * affected by a change in S2. So just flush the entire
447 mask
= MAX_AGAW_PFN_WIDTH
;
449 case CACHE_TAG_DEVTLB
:
450 cache_tag_flush_devtlb_psi(domain
, tag
, addr
, mask
);
454 trace_cache_tag_flush_range(tag
, start
, end
, addr
, pages
, mask
);
456 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
457 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);
461 * Invalidates all ranges of IOVA when the memory mappings in the target
462 * domain have been modified.
464 void cache_tag_flush_all(struct dmar_domain
*domain
)
466 struct intel_iommu
*iommu
= NULL
;
467 struct cache_tag
*tag
;
470 spin_lock_irqsave(&domain
->cache_lock
, flags
);
471 list_for_each_entry(tag
, &domain
->cache_tags
, node
) {
472 if (iommu
&& iommu
!= tag
->iommu
)
473 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
477 case CACHE_TAG_IOTLB
:
478 case CACHE_TAG_NESTING_IOTLB
:
479 cache_tag_flush_iotlb(domain
, tag
, 0, -1, 0, 0);
481 case CACHE_TAG_DEVTLB
:
482 case CACHE_TAG_NESTING_DEVTLB
:
483 cache_tag_flush_devtlb_all(domain
, tag
);
487 trace_cache_tag_flush_all(tag
);
489 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
490 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);
494 * Invalidate a range of IOVA when new mappings are created in the target
497 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
498 * Set, any software updates to remapping structures other than first-
499 * stage mapping requires explicit invalidation of the caches.
500 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
501 * write buffer flushing, software must explicitly perform write-buffer
502 * flushing, if cache invalidation is not required.
504 void cache_tag_flush_range_np(struct dmar_domain
*domain
, unsigned long start
,
507 struct intel_iommu
*iommu
= NULL
;
508 unsigned long pages
, mask
, addr
;
509 struct cache_tag
*tag
;
512 addr
= calculate_psi_aligned_address(start
, end
, &pages
, &mask
);
514 spin_lock_irqsave(&domain
->cache_lock
, flags
);
515 list_for_each_entry(tag
, &domain
->cache_tags
, node
) {
516 if (iommu
&& iommu
!= tag
->iommu
)
517 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
520 if (!cap_caching_mode(iommu
->cap
) || domain
->use_first_level
) {
521 iommu_flush_write_buffer(iommu
);
525 if (tag
->type
== CACHE_TAG_IOTLB
||
526 tag
->type
== CACHE_TAG_NESTING_IOTLB
)
527 cache_tag_flush_iotlb(domain
, tag
, addr
, pages
, mask
, 0);
529 trace_cache_tag_flush_range_np(tag
, start
, end
, addr
, pages
, mask
);
531 qi_batch_flush_descs(iommu
, domain
->qi_batch
);
532 spin_unlock_irqrestore(&domain
->cache_lock
, flags
);