1 /**************************************************************************
2 * Copyright (c) 2007, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 **************************************************************************/
24 * Code for the SGX MMU:
28 * clflush on one processor only:
29 * clflush should apparently flush the cache line on all processors in an
35 * The usage of the slots must be completely encapsulated within a spinlock, and
36 * no other functions that may be using the locks for other purposed may be
37 * called from within the locked region.
38 * Since the slots are per processor, this will guarantee that we are the only
43 * TODO: Inserting ptes from an interrupt handler:
44 * This may be desirable for some SGX functionality where the GPU can fault in
45 * needed pages. For that, we need to make an atomic insert_pages function, that
47 * If it fails, the caller need to insert the page using a workqueue function,
48 * but on average it should be fast.
51 static inline uint32_t psb_mmu_pt_index(uint32_t offset
)
53 return (offset
>> PSB_PTE_SHIFT
) & 0x3FF;
56 static inline uint32_t psb_mmu_pd_index(uint32_t offset
)
58 return offset
>> PSB_PDE_SHIFT
;
61 #if defined(CONFIG_X86)
62 static inline void psb_clflush(void *addr
)
64 __asm__
__volatile__("clflush (%0)\n" : : "r"(addr
) : "memory");
67 static inline void psb_mmu_clflush(struct psb_mmu_driver
*driver
, void *addr
)
69 if (!driver
->has_clflush
)
78 static inline void psb_mmu_clflush(struct psb_mmu_driver
*driver
, void *addr
)
84 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver
*driver
, int force
)
86 struct drm_device
*dev
= driver
->dev
;
87 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
89 if (atomic_read(&driver
->needs_tlbflush
) || force
) {
90 uint32_t val
= PSB_RSGX32(PSB_CR_BIF_CTRL
);
91 PSB_WSGX32(val
| _PSB_CB_CTRL_INVALDC
, PSB_CR_BIF_CTRL
);
93 /* Make sure data cache is turned off before enabling it */
95 PSB_WSGX32(val
& ~_PSB_CB_CTRL_INVALDC
, PSB_CR_BIF_CTRL
);
96 (void)PSB_RSGX32(PSB_CR_BIF_CTRL
);
97 if (driver
->msvdx_mmu_invaldc
)
98 atomic_set(driver
->msvdx_mmu_invaldc
, 1);
100 atomic_set(&driver
->needs_tlbflush
, 0);
104 static void psb_mmu_flush_pd(struct psb_mmu_driver
*driver
, int force
)
106 down_write(&driver
->sem
);
107 psb_mmu_flush_pd_locked(driver
, force
);
108 up_write(&driver
->sem
);
112 void psb_mmu_flush(struct psb_mmu_driver
*driver
)
114 struct drm_device
*dev
= driver
->dev
;
115 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
118 down_write(&driver
->sem
);
119 val
= PSB_RSGX32(PSB_CR_BIF_CTRL
);
120 if (atomic_read(&driver
->needs_tlbflush
))
121 PSB_WSGX32(val
| _PSB_CB_CTRL_INVALDC
, PSB_CR_BIF_CTRL
);
123 PSB_WSGX32(val
| _PSB_CB_CTRL_FLUSH
, PSB_CR_BIF_CTRL
);
125 /* Make sure data cache is turned off and MMU is flushed before
126 restoring bank interface control register */
128 PSB_WSGX32(val
& ~(_PSB_CB_CTRL_FLUSH
| _PSB_CB_CTRL_INVALDC
),
130 (void)PSB_RSGX32(PSB_CR_BIF_CTRL
);
132 atomic_set(&driver
->needs_tlbflush
, 0);
133 if (driver
->msvdx_mmu_invaldc
)
134 atomic_set(driver
->msvdx_mmu_invaldc
, 1);
135 up_write(&driver
->sem
);
138 void psb_mmu_set_pd_context(struct psb_mmu_pd
*pd
, int hw_context
)
140 struct drm_device
*dev
= pd
->driver
->dev
;
141 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
142 uint32_t offset
= (hw_context
== 0) ? PSB_CR_BIF_DIR_LIST_BASE0
:
143 PSB_CR_BIF_DIR_LIST_BASE1
+ hw_context
* 4;
145 down_write(&pd
->driver
->sem
);
146 PSB_WSGX32(page_to_pfn(pd
->p
) << PAGE_SHIFT
, offset
);
148 psb_mmu_flush_pd_locked(pd
->driver
, 1);
149 pd
->hw_context
= hw_context
;
150 up_write(&pd
->driver
->sem
);
154 static inline unsigned long psb_pd_addr_end(unsigned long addr
,
157 addr
= (addr
+ PSB_PDE_MASK
+ 1) & ~PSB_PDE_MASK
;
158 return (addr
< end
) ? addr
: end
;
161 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn
, int type
)
163 uint32_t mask
= PSB_PTE_VALID
;
165 if (type
& PSB_MMU_CACHED_MEMORY
)
166 mask
|= PSB_PTE_CACHED
;
167 if (type
& PSB_MMU_RO_MEMORY
)
169 if (type
& PSB_MMU_WO_MEMORY
)
172 return (pfn
<< PAGE_SHIFT
) | mask
;
175 struct psb_mmu_pd
*psb_mmu_alloc_pd(struct psb_mmu_driver
*driver
,
176 int trap_pagefaults
, int invalid_type
)
178 struct psb_mmu_pd
*pd
= kmalloc(sizeof(*pd
), GFP_KERNEL
);
185 pd
->p
= alloc_page(GFP_DMA32
);
188 pd
->dummy_pt
= alloc_page(GFP_DMA32
);
191 pd
->dummy_page
= alloc_page(GFP_DMA32
);
195 if (!trap_pagefaults
) {
196 pd
->invalid_pde
= psb_mmu_mask_pte(page_to_pfn(pd
->dummy_pt
),
198 pd
->invalid_pte
= psb_mmu_mask_pte(page_to_pfn(pd
->dummy_page
),
205 v
= kmap(pd
->dummy_pt
);
206 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
207 v
[i
] = pd
->invalid_pte
;
209 kunmap(pd
->dummy_pt
);
212 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
213 v
[i
] = pd
->invalid_pde
;
217 clear_page(kmap(pd
->dummy_page
));
218 kunmap(pd
->dummy_page
);
220 pd
->tables
= vmalloc_user(sizeof(struct psb_mmu_pt
*) * 1024);
225 pd
->pd_mask
= PSB_PTE_VALID
;
231 __free_page(pd
->dummy_page
);
233 __free_page(pd
->dummy_pt
);
241 static void psb_mmu_free_pt(struct psb_mmu_pt
*pt
)
247 void psb_mmu_free_pagedir(struct psb_mmu_pd
*pd
)
249 struct psb_mmu_driver
*driver
= pd
->driver
;
250 struct drm_device
*dev
= driver
->dev
;
251 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
252 struct psb_mmu_pt
*pt
;
255 down_write(&driver
->sem
);
256 if (pd
->hw_context
!= -1) {
257 PSB_WSGX32(0, PSB_CR_BIF_DIR_LIST_BASE0
+ pd
->hw_context
* 4);
258 psb_mmu_flush_pd_locked(driver
, 1);
261 /* Should take the spinlock here, but we don't need to do that
262 since we have the semaphore in write mode. */
264 for (i
= 0; i
< 1024; ++i
) {
271 __free_page(pd
->dummy_page
);
272 __free_page(pd
->dummy_pt
);
275 up_write(&driver
->sem
);
278 static struct psb_mmu_pt
*psb_mmu_alloc_pt(struct psb_mmu_pd
*pd
)
280 struct psb_mmu_pt
*pt
= kmalloc(sizeof(*pt
), GFP_KERNEL
);
282 uint32_t clflush_add
= pd
->driver
->clflush_add
>> PAGE_SHIFT
;
283 uint32_t clflush_count
= PAGE_SIZE
/ clflush_add
;
284 spinlock_t
*lock
= &pd
->driver
->lock
;
292 pt
->p
= alloc_page(GFP_DMA32
);
300 v
= kmap_atomic(pt
->p
);
302 ptes
= (uint32_t *) v
;
303 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
304 *ptes
++ = pd
->invalid_pte
;
306 #if defined(CONFIG_X86)
307 if (pd
->driver
->has_clflush
&& pd
->hw_context
!= -1) {
309 for (i
= 0; i
< clflush_count
; ++i
) {
326 struct psb_mmu_pt
*psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd
*pd
,
329 uint32_t index
= psb_mmu_pd_index(addr
);
330 struct psb_mmu_pt
*pt
;
332 spinlock_t
*lock
= &pd
->driver
->lock
;
335 pt
= pd
->tables
[index
];
338 pt
= psb_mmu_alloc_pt(pd
);
343 if (pd
->tables
[index
]) {
347 pt
= pd
->tables
[index
];
351 v
= kmap_atomic(pd
->p
);
352 pd
->tables
[index
] = pt
;
353 v
[index
] = (page_to_pfn(pt
->p
) << 12) | pd
->pd_mask
;
355 kunmap_atomic((void *) v
);
357 if (pd
->hw_context
!= -1) {
358 psb_mmu_clflush(pd
->driver
, (void *)&v
[index
]);
359 atomic_set(&pd
->driver
->needs_tlbflush
, 1);
362 pt
->v
= kmap_atomic(pt
->p
);
366 static struct psb_mmu_pt
*psb_mmu_pt_map_lock(struct psb_mmu_pd
*pd
,
369 uint32_t index
= psb_mmu_pd_index(addr
);
370 struct psb_mmu_pt
*pt
;
371 spinlock_t
*lock
= &pd
->driver
->lock
;
374 pt
= pd
->tables
[index
];
379 pt
->v
= kmap_atomic(pt
->p
);
383 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt
*pt
)
385 struct psb_mmu_pd
*pd
= pt
->pd
;
388 kunmap_atomic(pt
->v
);
389 if (pt
->count
== 0) {
390 v
= kmap_atomic(pd
->p
);
391 v
[pt
->index
] = pd
->invalid_pde
;
392 pd
->tables
[pt
->index
] = NULL
;
394 if (pd
->hw_context
!= -1) {
395 psb_mmu_clflush(pd
->driver
, (void *)&v
[pt
->index
]);
396 atomic_set(&pd
->driver
->needs_tlbflush
, 1);
399 spin_unlock(&pd
->driver
->lock
);
403 spin_unlock(&pd
->driver
->lock
);
406 static inline void psb_mmu_set_pte(struct psb_mmu_pt
*pt
, unsigned long addr
,
409 pt
->v
[psb_mmu_pt_index(addr
)] = pte
;
412 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt
*pt
,
415 pt
->v
[psb_mmu_pt_index(addr
)] = pt
->pd
->invalid_pte
;
418 struct psb_mmu_pd
*psb_mmu_get_default_pd(struct psb_mmu_driver
*driver
)
420 struct psb_mmu_pd
*pd
;
422 down_read(&driver
->sem
);
423 pd
= driver
->default_pd
;
424 up_read(&driver
->sem
);
429 /* Returns the physical address of the PD shared by sgx/msvdx */
430 uint32_t psb_get_default_pd_addr(struct psb_mmu_driver
*driver
)
432 struct psb_mmu_pd
*pd
;
434 pd
= psb_mmu_get_default_pd(driver
);
435 return page_to_pfn(pd
->p
) << PAGE_SHIFT
;
438 void psb_mmu_driver_takedown(struct psb_mmu_driver
*driver
)
440 struct drm_device
*dev
= driver
->dev
;
441 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
443 PSB_WSGX32(driver
->bif_ctrl
, PSB_CR_BIF_CTRL
);
444 psb_mmu_free_pagedir(driver
->default_pd
);
448 struct psb_mmu_driver
*psb_mmu_driver_init(struct drm_device
*dev
,
451 atomic_t
*msvdx_mmu_invaldc
)
453 struct psb_mmu_driver
*driver
;
454 struct drm_psb_private
*dev_priv
= dev
->dev_private
;
456 driver
= kmalloc(sizeof(*driver
), GFP_KERNEL
);
462 driver
->default_pd
= psb_mmu_alloc_pd(driver
, trap_pagefaults
,
464 if (!driver
->default_pd
)
467 spin_lock_init(&driver
->lock
);
468 init_rwsem(&driver
->sem
);
469 down_write(&driver
->sem
);
470 atomic_set(&driver
->needs_tlbflush
, 1);
471 driver
->msvdx_mmu_invaldc
= msvdx_mmu_invaldc
;
473 driver
->bif_ctrl
= PSB_RSGX32(PSB_CR_BIF_CTRL
);
474 PSB_WSGX32(driver
->bif_ctrl
| _PSB_CB_CTRL_CLEAR_FAULT
,
476 PSB_WSGX32(driver
->bif_ctrl
& ~_PSB_CB_CTRL_CLEAR_FAULT
,
479 driver
->has_clflush
= 0;
481 #if defined(CONFIG_X86)
482 if (boot_cpu_has(X86_FEATURE_CLFLUSH
)) {
483 uint32_t tfms
, misc
, cap0
, cap4
, clflush_size
;
486 * clflush size is determined at kernel setup for x86_64 but not
487 * for i386. We have to do it here.
490 cpuid(0x00000001, &tfms
, &misc
, &cap0
, &cap4
);
491 clflush_size
= ((misc
>> 8) & 0xff) * 8;
492 driver
->has_clflush
= 1;
493 driver
->clflush_add
=
494 PAGE_SIZE
* clflush_size
/ sizeof(uint32_t);
495 driver
->clflush_mask
= driver
->clflush_add
- 1;
496 driver
->clflush_mask
= ~driver
->clflush_mask
;
500 up_write(&driver
->sem
);
508 #if defined(CONFIG_X86)
509 static void psb_mmu_flush_ptes(struct psb_mmu_pd
*pd
, unsigned long address
,
510 uint32_t num_pages
, uint32_t desired_tile_stride
,
511 uint32_t hw_tile_stride
)
513 struct psb_mmu_pt
*pt
;
520 unsigned long row_add
;
521 unsigned long clflush_add
= pd
->driver
->clflush_add
;
522 unsigned long clflush_mask
= pd
->driver
->clflush_mask
;
524 if (!pd
->driver
->has_clflush
)
528 rows
= num_pages
/ desired_tile_stride
;
530 desired_tile_stride
= num_pages
;
532 add
= desired_tile_stride
<< PAGE_SHIFT
;
533 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
535 for (i
= 0; i
< rows
; ++i
) {
541 next
= psb_pd_addr_end(addr
, end
);
542 pt
= psb_mmu_pt_map_lock(pd
, addr
);
546 psb_clflush(&pt
->v
[psb_mmu_pt_index(addr
)]);
547 } while (addr
+= clflush_add
,
548 (addr
& clflush_mask
) < next
);
550 psb_mmu_pt_unmap_unlock(pt
);
551 } while (addr
= next
, next
!= end
);
557 static void psb_mmu_flush_ptes(struct psb_mmu_pd
*pd
, unsigned long address
,
558 uint32_t num_pages
, uint32_t desired_tile_stride
,
559 uint32_t hw_tile_stride
)
561 drm_ttm_cache_flush();
565 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd
*pd
,
566 unsigned long address
, uint32_t num_pages
)
568 struct psb_mmu_pt
*pt
;
572 unsigned long f_address
= address
;
574 down_read(&pd
->driver
->sem
);
577 end
= addr
+ (num_pages
<< PAGE_SHIFT
);
580 next
= psb_pd_addr_end(addr
, end
);
581 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
585 psb_mmu_invalidate_pte(pt
, addr
);
587 } while (addr
+= PAGE_SIZE
, addr
< next
);
588 psb_mmu_pt_unmap_unlock(pt
);
590 } while (addr
= next
, next
!= end
);
593 if (pd
->hw_context
!= -1)
594 psb_mmu_flush_ptes(pd
, f_address
, num_pages
, 1, 1);
596 up_read(&pd
->driver
->sem
);
598 if (pd
->hw_context
!= -1)
599 psb_mmu_flush(pd
->driver
);
604 void psb_mmu_remove_pages(struct psb_mmu_pd
*pd
, unsigned long address
,
605 uint32_t num_pages
, uint32_t desired_tile_stride
,
606 uint32_t hw_tile_stride
)
608 struct psb_mmu_pt
*pt
;
615 unsigned long row_add
;
616 unsigned long f_address
= address
;
619 rows
= num_pages
/ desired_tile_stride
;
621 desired_tile_stride
= num_pages
;
623 add
= desired_tile_stride
<< PAGE_SHIFT
;
624 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
626 down_read(&pd
->driver
->sem
);
628 /* Make sure we only need to flush this processor's cache */
630 for (i
= 0; i
< rows
; ++i
) {
636 next
= psb_pd_addr_end(addr
, end
);
637 pt
= psb_mmu_pt_map_lock(pd
, addr
);
641 psb_mmu_invalidate_pte(pt
, addr
);
644 } while (addr
+= PAGE_SIZE
, addr
< next
);
645 psb_mmu_pt_unmap_unlock(pt
);
647 } while (addr
= next
, next
!= end
);
650 if (pd
->hw_context
!= -1)
651 psb_mmu_flush_ptes(pd
, f_address
, num_pages
,
652 desired_tile_stride
, hw_tile_stride
);
654 up_read(&pd
->driver
->sem
);
656 if (pd
->hw_context
!= -1)
657 psb_mmu_flush(pd
->driver
);
660 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd
*pd
, uint32_t start_pfn
,
661 unsigned long address
, uint32_t num_pages
,
664 struct psb_mmu_pt
*pt
;
669 unsigned long f_address
= address
;
672 down_read(&pd
->driver
->sem
);
675 end
= addr
+ (num_pages
<< PAGE_SHIFT
);
678 next
= psb_pd_addr_end(addr
, end
);
679 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
685 pte
= psb_mmu_mask_pte(start_pfn
++, type
);
686 psb_mmu_set_pte(pt
, addr
, pte
);
688 } while (addr
+= PAGE_SIZE
, addr
< next
);
689 psb_mmu_pt_unmap_unlock(pt
);
691 } while (addr
= next
, next
!= end
);
695 if (pd
->hw_context
!= -1)
696 psb_mmu_flush_ptes(pd
, f_address
, num_pages
, 1, 1);
698 up_read(&pd
->driver
->sem
);
700 if (pd
->hw_context
!= -1)
701 psb_mmu_flush(pd
->driver
);
706 int psb_mmu_insert_pages(struct psb_mmu_pd
*pd
, struct page
**pages
,
707 unsigned long address
, uint32_t num_pages
,
708 uint32_t desired_tile_stride
, uint32_t hw_tile_stride
,
711 struct psb_mmu_pt
*pt
;
719 unsigned long row_add
;
720 unsigned long f_address
= address
;
723 if (hw_tile_stride
) {
724 if (num_pages
% desired_tile_stride
!= 0)
726 rows
= num_pages
/ desired_tile_stride
;
728 desired_tile_stride
= num_pages
;
731 add
= desired_tile_stride
<< PAGE_SHIFT
;
732 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
734 down_read(&pd
->driver
->sem
);
736 for (i
= 0; i
< rows
; ++i
) {
742 next
= psb_pd_addr_end(addr
, end
);
743 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
747 pte
= psb_mmu_mask_pte(page_to_pfn(*pages
++),
749 psb_mmu_set_pte(pt
, addr
, pte
);
751 } while (addr
+= PAGE_SIZE
, addr
< next
);
752 psb_mmu_pt_unmap_unlock(pt
);
754 } while (addr
= next
, next
!= end
);
761 if (pd
->hw_context
!= -1)
762 psb_mmu_flush_ptes(pd
, f_address
, num_pages
,
763 desired_tile_stride
, hw_tile_stride
);
765 up_read(&pd
->driver
->sem
);
767 if (pd
->hw_context
!= -1)
768 psb_mmu_flush(pd
->driver
);
773 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd
*pd
, uint32_t virtual,
777 struct psb_mmu_pt
*pt
;
779 spinlock_t
*lock
= &pd
->driver
->lock
;
781 down_read(&pd
->driver
->sem
);
782 pt
= psb_mmu_pt_map_lock(pd
, virtual);
787 v
= kmap_atomic(pd
->p
);
788 tmp
= v
[psb_mmu_pd_index(virtual)];
792 if (tmp
!= pd
->invalid_pde
|| !(tmp
& PSB_PTE_VALID
) ||
793 !(pd
->invalid_pte
& PSB_PTE_VALID
)) {
798 *pfn
= pd
->invalid_pte
>> PAGE_SHIFT
;
801 tmp
= pt
->v
[psb_mmu_pt_index(virtual)];
802 if (!(tmp
& PSB_PTE_VALID
)) {
806 *pfn
= tmp
>> PAGE_SHIFT
;
808 psb_mmu_pt_unmap_unlock(pt
);
810 up_read(&pd
->driver
->sem
);