2 * PARISC TLB and cache flushing support
3 * Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
4 * Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
5 * Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * NOTE: fdc,fic, and pdc instructions that use base register modification
24 * should only use index and base registers that are not shadowed,
25 * so that the fast path emulation in the non access miss handler
43 #include <asm/assembly.h>
45 #include <asm/pgtable.h>
46 #include <asm/cache.h>
51 .export flush_tlb_all_local,code
59 * The pitlbe and pdtlbe instructions should only be used to
60 * flush the entire tlb. Also, there needs to be no intervening
61 * tlb operations, e.g. tlb misses, so the operation needs
62 * to happen in real mode with all interruptions disabled.
66 * Once again, we do the rfi dance ... some day we need examine
67 * all of our uses of this type of code and see what can be
71 rsm PSW_SM_I, %r19 /* relied upon translation! PA 2.0 Arch. F-5 */
80 rsm PSW_SM_Q, %r0 /* Turn off Q bit to load iia queue */
81 ldil L%REAL_MODE_PSW, %r1
82 ldo R%REAL_MODE_PSW(%r1), %r1
84 mtctl %r0, %cr17 /* Clear IIASQ tail */
85 mtctl %r0, %cr17 /* Clear IIASQ head */
87 ldo R%PA(1f)(%r1), %r1
88 mtctl %r1, %cr18 /* IIAOQ head */
90 mtctl %r1, %cr18 /* IIAOQ tail */
94 1: ldil L%PA(cache_info), %r1
95 ldo R%PA(cache_info)(%r1), %r1
97 /* Flush Instruction Tlb */
99 LDREG ITLB_SID_BASE(%r1), %r20
100 LDREG ITLB_SID_STRIDE(%r1), %r21
101 LDREG ITLB_SID_COUNT(%r1), %r22
102 LDREG ITLB_OFF_BASE(%r1), %arg0
103 LDREG ITLB_OFF_STRIDE(%r1), %arg1
104 LDREG ITLB_OFF_COUNT(%r1), %arg2
105 LDREG ITLB_LOOP(%r1), %arg3
107 ADDIB= -1, %arg3, fitoneloop /* Preadjust and test */
108 movb,<,n %arg3, %r31, fitdone /* If loop < 0, skip */
109 copy %arg0, %r28 /* Init base addr */
111 fitmanyloop: /* Loop if LOOP >= 2 */
113 add %r21, %r20, %r20 /* increment space */
114 copy %arg2, %r29 /* Init middle loop count */
116 fitmanymiddle: /* Loop if LOOP >= 2 */
117 ADDIB> -1, %r31, fitmanymiddle /* Adjusted inner loop decr */
119 pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */
120 ADDIB> -1, %r29, fitmanymiddle /* Middle loop decr */
121 copy %arg3, %r31 /* Re-init inner loop count */
123 movb,tr %arg0, %r28, fitmanyloop /* Re-init base addr */
124 ADDIB<=,n -1, %r22, fitdone /* Outer loop count decr */
126 fitoneloop: /* Loop if LOOP = 1 */
128 copy %arg0, %r28 /* init base addr */
129 copy %arg2, %r29 /* init middle loop count */
131 fitonemiddle: /* Loop if LOOP = 1 */
132 ADDIB> -1, %r29, fitonemiddle /* Middle loop count decr */
133 pitlbe,m %arg1(%sr1, %r28) /* pitlbe for one loop */
135 ADDIB> -1, %r22, fitoneloop /* Outer loop count decr */
136 add %r21, %r20, %r20 /* increment space */
142 LDREG DTLB_SID_BASE(%r1), %r20
143 LDREG DTLB_SID_STRIDE(%r1), %r21
144 LDREG DTLB_SID_COUNT(%r1), %r22
145 LDREG DTLB_OFF_BASE(%r1), %arg0
146 LDREG DTLB_OFF_STRIDE(%r1), %arg1
147 LDREG DTLB_OFF_COUNT(%r1), %arg2
148 LDREG DTLB_LOOP(%r1), %arg3
150 ADDIB= -1, %arg3, fdtoneloop /* Preadjust and test */
151 movb,<,n %arg3, %r31, fdtdone /* If loop < 0, skip */
152 copy %arg0, %r28 /* Init base addr */
154 fdtmanyloop: /* Loop if LOOP >= 2 */
156 add %r21, %r20, %r20 /* increment space */
157 copy %arg2, %r29 /* Init middle loop count */
159 fdtmanymiddle: /* Loop if LOOP >= 2 */
160 ADDIB> -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */
162 pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */
163 ADDIB> -1, %r29, fdtmanymiddle /* Middle loop decr */
164 copy %arg3, %r31 /* Re-init inner loop count */
166 movb,tr %arg0, %r28, fdtmanyloop /* Re-init base addr */
167 ADDIB<=,n -1, %r22,fdtdone /* Outer loop count decr */
169 fdtoneloop: /* Loop if LOOP = 1 */
171 copy %arg0, %r28 /* init base addr */
172 copy %arg2, %r29 /* init middle loop count */
174 fdtonemiddle: /* Loop if LOOP = 1 */
175 ADDIB> -1, %r29, fdtonemiddle /* Middle loop count decr */
176 pdtlbe,m %arg1(%sr1, %r28) /* pdtlbe for one loop */
178 ADDIB> -1, %r22, fdtoneloop /* Outer loop count decr */
179 add %r21, %r20, %r20 /* increment space */
183 /* Switch back to virtual mode */
185 rsm PSW_SM_Q, %r0 /* clear Q bit to load iia queue */
186 ldil L%KERNEL_PSW, %r1
187 ldo R%KERNEL_PSW(%r1), %r1
188 or %r1, %r19, %r1 /* Set I bit if set on entry */
190 mtctl %r0, %cr17 /* Clear IIASQ tail */
191 mtctl %r0, %cr17 /* Clear IIASQ head */
194 mtctl %r1, %cr18 /* IIAOQ head */
196 mtctl %r1, %cr18 /* IIAOQ tail */
206 .export flush_instruction_cache_local,code
207 .import cache_info,data
209 flush_instruction_cache_local:
215 ldil L%cache_info, %r1
216 ldo R%cache_info(%r1), %r1
218 /* Flush Instruction Cache */
220 LDREG ICACHE_BASE(%r1), %arg0
221 LDREG ICACHE_STRIDE(%r1), %arg1
222 LDREG ICACHE_COUNT(%r1), %arg2
223 LDREG ICACHE_LOOP(%r1), %arg3
224 rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/
225 ADDIB= -1, %arg3, fioneloop /* Preadjust and test */
226 movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */
228 fimanyloop: /* Loop if LOOP >= 2 */
229 ADDIB> -1, %r31, fimanyloop /* Adjusted inner loop decr */
231 fice,m %arg1(%sr1, %arg0) /* Last fice and addr adjust */
232 movb,tr %arg3, %r31, fimanyloop /* Re-init inner loop count */
233 ADDIB<=,n -1, %arg2, fisync /* Outer loop decr */
235 fioneloop: /* Loop if LOOP = 1 */
236 ADDIB> -1, %arg2, fioneloop /* Outer loop count decr */
237 fice,m %arg1(%sr1, %arg0) /* Fice for one loop */
248 .export flush_data_cache_local, code
249 .import cache_info, data
251 flush_data_cache_local:
257 ldil L%cache_info, %r1
258 ldo R%cache_info(%r1), %r1
260 /* Flush Data Cache */
262 LDREG DCACHE_BASE(%r1), %arg0
263 LDREG DCACHE_STRIDE(%r1), %arg1
264 LDREG DCACHE_COUNT(%r1), %arg2
265 LDREG DCACHE_LOOP(%r1), %arg3
267 ADDIB= -1, %arg3, fdoneloop /* Preadjust and test */
268 movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */
270 fdmanyloop: /* Loop if LOOP >= 2 */
271 ADDIB> -1, %r31, fdmanyloop /* Adjusted inner loop decr */
273 fdce,m %arg1(%sr1, %arg0) /* Last fdce and addr adjust */
274 movb,tr %arg3, %r31, fdmanyloop /* Re-init inner loop count */
275 ADDIB<=,n -1, %arg2, fdsync /* Outer loop decr */
277 fdoneloop: /* Loop if LOOP = 1 */
278 ADDIB> -1, %arg2, fdoneloop /* Outer loop count decr */
279 fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */
291 .export copy_user_page_asm,code
300 /* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
301 * Unroll the loop by hand and arrange insn appropriately.
302 * GCC probably can do this just as well.
306 ldi 32, %r1 /* PAGE_SIZE/128 == 32 */
307 ldw 64(%r25), %r0 /* prefetch 1 cacheline ahead */
308 ldw 128(%r25), %r0 /* prefetch 2 */
311 ldw 192(%r25), %r0 /* prefetch 3 */
312 ldw 256(%r25), %r0 /* prefetch 4 */
354 ADDIB> -1, %r1, 1b /* bundle 10 */
355 ldd 0(%r25), %r19 /* start next loads */
360 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
361 * bundles (very restricted rules for bundling).
362 * Note that until (if) we start saving
363 * the full 64 bit register values on interrupt, we can't
364 * use ldd/std on a 32 bit kernel.
366 ldi 64, %r1 /* PAGE_SIZE/64 == 64 */
412 * NOTE: Code in clear_user_page has a hard coded dependency on the
413 * maximum alias boundary being 4 Mb. We've been assured by the
414 * parisc chip designers that there will not ever be a parisc
415 * chip with a larger alias boundary (Never say never :-) ).
417 * Subtle: the dtlb miss handlers support the temp alias region by
418 * "knowing" that if a dtlb miss happens within the temp alias
419 * region it must have occurred while in clear_user_page. Since
420 * this routine makes use of processor local translations, we
421 * don't want to insert them into the kernel page table. Instead,
422 * we load up some general registers (they need to be registers
423 * which aren't shadowed) with the physical page numbers (preshifted
424 * for tlb insertion) needed to insert the translations. When we
425 * miss on the translation, the dtlb miss handler inserts the
426 * translation into the tlb using these values:
428 * %r26 physical page (shifted for tlb insert) of "to" translation
429 * %r23 physical page (shifted for tlb insert) of "from" translation
435 * We can't do this since copy_user_page is used to bring in
436 * file data that might have instructions. Since the data would
437 * then need to be flushed out so the i-fetch can see it, it
438 * makes more sense to just copy through the kernel translation
441 * I'm still keeping this around because it may be possible to
442 * use it if more information is passed into copy_user_page().
443 * Have to do some measurements to see if it is worthwhile to
444 * lobby for such a change.
447 .export copy_user_page_asm,code
454 ldil L%(__PAGE_OFFSET), %r1
456 sub %r25, %r1, %r23 /* move physical addr into non shadowed reg */
458 ldil L%(TMPALIAS_MAP_START), %r28
460 extrd,u %r26,56,32, %r26 /* convert phys addr to tlb insert format */
461 extrd,u %r23,56,32, %r23 /* convert phys addr to tlb insert format */
462 depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */
463 depdi 0, 63,12, %r28 /* Clear any offset bits */
465 depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */
467 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
468 extrw,u %r23, 24,25, %r23 /* convert phys addr to tlb insert format */
469 depw %r24, 31,22, %r28 /* Form aliased virtual address 'to' */
470 depwi 0, 31,12, %r28 /* Clear any offset bits */
472 depwi 1, 9,1, %r29 /* Form aliased virtual address 'from' */
475 /* Purge any old translations */
483 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
484 * bundles (very restricted rules for bundling). It probably
485 * does OK on PCXU and better, but we could do better with
486 * ldd/std instructions. Note that until (if) we start saving
487 * the full 64 bit register values on interrupt, we can't
488 * use ldd/std on a 32 bit kernel.
536 .export __clear_user_page_asm,code
538 __clear_user_page_asm:
545 ldil L%(TMPALIAS_MAP_START), %r28
547 #if (TMPALIAS_MAP_START >= 0x80000000)
548 depdi 0, 31,32, %r28 /* clear any sign extension */
550 extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */
551 depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
552 depdi 0, 63,12, %r28 /* Clear any offset bits */
554 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
555 depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
556 depwi 0, 31,12, %r28 /* Clear any offset bits */
559 /* Purge any old translation */
564 ldi 32, %r1 /* PAGE_SIZE/128 == 32 */
566 /* PREFETCH (Write) has not (yet) been proven to help here */
567 /* #define PREFETCHW_OP ldd 256(%0), %r0 */
590 ldi 64, %r1 /* PAGE_SIZE/64 == 64 */
619 .export flush_kernel_dcache_page
621 flush_kernel_dcache_page:
626 ldil L%dcache_stride, %r1
627 ldw R%dcache_stride(%r1), %r23
630 depdi,z 1, 63-PAGE_SHIFT,1, %r25
632 depwi,z 1, 31-PAGE_SHIFT,1, %r25
663 .export flush_user_dcache_page
665 flush_user_dcache_page:
670 ldil L%dcache_stride, %r1
671 ldw R%dcache_stride(%r1), %r23
674 depdi,z 1,63-PAGE_SHIFT,1, %r25
676 depwi,z 1,31-PAGE_SHIFT,1, %r25
682 1: fdc,m %r23(%sr3, %r26)
683 fdc,m %r23(%sr3, %r26)
684 fdc,m %r23(%sr3, %r26)
685 fdc,m %r23(%sr3, %r26)
686 fdc,m %r23(%sr3, %r26)
687 fdc,m %r23(%sr3, %r26)
688 fdc,m %r23(%sr3, %r26)
689 fdc,m %r23(%sr3, %r26)
690 fdc,m %r23(%sr3, %r26)
691 fdc,m %r23(%sr3, %r26)
692 fdc,m %r23(%sr3, %r26)
693 fdc,m %r23(%sr3, %r26)
694 fdc,m %r23(%sr3, %r26)
695 fdc,m %r23(%sr3, %r26)
696 fdc,m %r23(%sr3, %r26)
698 fdc,m %r23(%sr3, %r26)
707 .export flush_user_icache_page
709 flush_user_icache_page:
714 ldil L%dcache_stride, %r1
715 ldw R%dcache_stride(%r1), %r23
718 depdi,z 1, 63-PAGE_SHIFT,1, %r25
720 depwi,z 1, 31-PAGE_SHIFT,1, %r25
726 1: fic,m %r23(%sr3, %r26)
727 fic,m %r23(%sr3, %r26)
728 fic,m %r23(%sr3, %r26)
729 fic,m %r23(%sr3, %r26)
730 fic,m %r23(%sr3, %r26)
731 fic,m %r23(%sr3, %r26)
732 fic,m %r23(%sr3, %r26)
733 fic,m %r23(%sr3, %r26)
734 fic,m %r23(%sr3, %r26)
735 fic,m %r23(%sr3, %r26)
736 fic,m %r23(%sr3, %r26)
737 fic,m %r23(%sr3, %r26)
738 fic,m %r23(%sr3, %r26)
739 fic,m %r23(%sr3, %r26)
740 fic,m %r23(%sr3, %r26)
742 fic,m %r23(%sr3, %r26)
752 .export purge_kernel_dcache_page
754 purge_kernel_dcache_page:
759 ldil L%dcache_stride, %r1
760 ldw R%dcache_stride(%r1), %r23
763 depdi,z 1, 63-PAGE_SHIFT,1, %r25
765 depwi,z 1, 31-PAGE_SHIFT,1, %r25
785 CMPB<< %r26, %r25, 1b
796 /* Currently not used, but it still is a possible alternate
800 .export flush_alias_page
809 ldil L%(TMPALIAS_MAP_START), %r28
811 extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */
812 depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
813 depdi 0, 63,12, %r28 /* Clear any offset bits */
815 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
816 depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
817 depwi 0, 31,12, %r28 /* Clear any offset bits */
820 /* Purge any old translation */
824 ldil L%dcache_stride, %r1
825 ldw R%dcache_stride(%r1), %r23
828 depdi,z 1, 63-PAGE_SHIFT,1, %r29
830 depwi,z 1, 31-PAGE_SHIFT,1, %r29
850 CMPB<< %r28, %r29, 1b
861 .export flush_user_dcache_range_asm
863 flush_user_dcache_range_asm:
868 ldil L%dcache_stride, %r1
869 ldw R%dcache_stride(%r1), %r23
871 ANDCM %r26, %r21, %r26
873 1: CMPB<<,n %r26, %r25, 1b
874 fdc,m %r23(%sr3, %r26)
883 .export flush_kernel_dcache_range_asm
885 flush_kernel_dcache_range_asm:
890 ldil L%dcache_stride, %r1
891 ldw R%dcache_stride(%r1), %r23
893 ANDCM %r26, %r21, %r26
895 1: CMPB<<,n %r26, %r25,1b
906 .export flush_user_icache_range_asm
908 flush_user_icache_range_asm:
913 ldil L%icache_stride, %r1
914 ldw R%icache_stride(%r1), %r23
916 ANDCM %r26, %r21, %r26
918 1: CMPB<<,n %r26, %r25,1b
919 fic,m %r23(%sr3, %r26)
928 .export flush_kernel_icache_page
930 flush_kernel_icache_page:
935 ldil L%icache_stride, %r1
936 ldw R%icache_stride(%r1), %r23
939 depdi,z 1, 63-PAGE_SHIFT,1, %r25
941 depwi,z 1, 31-PAGE_SHIFT,1, %r25
962 CMPB<< %r26, %r25, 1b
972 .export flush_kernel_icache_range_asm
974 flush_kernel_icache_range_asm:
979 ldil L%icache_stride, %r1
980 ldw R%icache_stride(%r1), %r23
982 ANDCM %r26, %r21, %r26
984 1: CMPB<<,n %r26, %r25, 1b
996 .export disable_sr_hashing_asm,code
998 disable_sr_hashing_asm:
1003 /* Switch to real mode */
1005 ssm 0, %r0 /* relied upon translation! */
1014 rsm (PSW_SM_Q|PSW_SM_I), %r0 /* disable Q&I to load the iia queue */
1015 ldil L%REAL_MODE_PSW, %r1
1016 ldo R%REAL_MODE_PSW(%r1), %r1
1018 mtctl %r0, %cr17 /* Clear IIASQ tail */
1019 mtctl %r0, %cr17 /* Clear IIASQ head */
1021 ldo R%PA(1f)(%r1), %r1
1022 mtctl %r1, %cr18 /* IIAOQ head */
1024 mtctl %r1, %cr18 /* IIAOQ tail */
1028 1: cmpib,=,n SRHASH_PCXST, %r26,srdis_pcxs
1029 cmpib,=,n SRHASH_PCXL, %r26,srdis_pcxl
1030 cmpib,=,n SRHASH_PA20, %r26,srdis_pa20
1035 /* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1037 .word 0x141c1a00 /* mfdiag %dr0, %r28 */
1038 .word 0x141c1a00 /* must issue twice */
1039 depwi 0,18,1, %r28 /* Clear DHE (dcache hash enable) */
1040 depwi 0,20,1, %r28 /* Clear IHE (icache hash enable) */
1041 .word 0x141c1600 /* mtdiag %r28, %dr0 */
1042 .word 0x141c1600 /* must issue twice */
1047 /* Disable Space Register Hashing for PCXL */
1049 .word 0x141c0600 /* mfdiag %dr0, %r28 */
1050 depwi 0,28,2, %r28 /* Clear DHASH_EN & IHASH_EN */
1051 .word 0x141c0240 /* mtdiag %r28, %dr0 */
1056 /* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+ */
1058 .word 0x144008bc /* mfdiag %dr2, %r28 */
1059 depdi 0, 54,1, %r28 /* clear DIAG_SPHASH_ENAB (bit 54) */
1060 .word 0x145c1840 /* mtdiag %r28, %dr2 */
1064 /* Switch back to virtual mode */
1066 rsm PSW_SM_Q, %r0 /* clear Q bit to load iia queue */
1067 ldil L%KERNEL_PSW, %r1
1068 ldo R%KERNEL_PSW(%r1), %r1
1070 mtctl %r0, %cr17 /* Clear IIASQ tail */
1071 mtctl %r0, %cr17 /* Clear IIASQ head */
1073 ldo R%(2f)(%r1), %r1
1074 mtctl %r1, %cr18 /* IIAOQ head */
1076 mtctl %r1, %cr18 /* IIAOQ tail */