2 /****************************************************************************
4 For Alpha Linux, BusToMem() and MemToBus() can be simply memcpy(), BUT:
5 we need to prevent unaligned operations when accessing DENSE space on the BUS,
6 as the video memory is mmap'd that way. The below code does this.
8 NOTE: we could simply use the "memcpy()" from LIBC here, but that, currently, is
11 Thanks to Linus Torvalds for contributing this code.
13 ****************************************************************************/
16 #ifdef HAVE_XORG_CONFIG_H
17 #include <xorg-config.h>
23 #include "xf86_OSlib.h"
30 * The Jensen lacks dense memory, thus we have to address the bus via
31 * the sparse addressing scheme. These routines are only used in s3im.c
32 * Non time critical code uses SlowBCopy_{from/to} bus.
34 * Martin Ostermann (ost@comnets.rwth-aachen.de) - Apr.-Sep. 1996
37 #ifdef TEST_JENSEN_CODE /* define to test the Sparse addressing on a non-Jensen */
38 #define LWORD_CODING (0x18)
41 #define LWORD_CODING (0x60)
46 xf86JensenMemToBus(char *Base
, long dst
, long src
, int count
)
48 if( ((long)src
^((long)dst
)) & 3) {
49 /* src & dst are NOT aligned to each other */
51 unsigned long low_word
, high_word
,last_read
;
53 unsigned long tmp
,org
,org2
,mask
,src_org
,count_org
;
58 /* add EISA longword coding and round off*/
59 addr
= (long)(Base
+(dst
<<SPARSE
) + LWORD_CODING
) & ~(3<<SPARSE
);
63 count
= count_org
+ rm
;
64 org
= *(volatile unsigned int *)addr
;
66 :"=r" (low_word
):"m" (*(unsigned long *)(src_org
)));
69 last_read
= src_org
+count_org
- 1;
71 :"=r" (high_word
):"m" (*(unsigned long *)(src
+4)));
72 __asm__("extll %1,%2,%0"
74 :"r" (low_word
), "r" ((unsigned long)(src
)));
75 __asm__("extlh %1,%2,%0"
77 :"r" (high_word
), "r" ((unsigned long)(src
)));
80 __asm__("mskqh %1,%2,%0"
82 :"r" (tmp
), "r" (rm
));
83 __asm__("mskql %1,%2,%0"
85 :"r" (org
), "r" (rm
));
88 loop
= (count
-4) >> 2; /* loop eqv. count>=4 ; count -= 4 */
90 /* tmp to be stored completly -- need to read next word*/
92 *(volatile unsigned int *) (addr
) = tmp
;
94 :"=r" (high_word
):"m" (*(unsigned long*)(src
+4)));
96 __asm__("extll %1,%2,%0"
98 :"r" (low_word
), "r" ((unsigned long)src
));
99 __asm__("extlh %1,%2,%0"
101 :"r" (high_word
), "r" ((unsigned long)src
));
107 /* Store tmp completly, and possibly read one more word.*/
108 *(volatile unsigned int *) (addr
) = tmp
;
109 __asm__("ldq_u %0,%1"
110 :"=r" (tmp
):"m" (*((unsigned long *)(last_read
)) ));
112 __asm__("extll %1,%2,%0"
114 :"r" (high_word
), "r" ((unsigned long)src
));
115 __asm__("extlh %1,%2,%0"
117 :"r" (tmp
), "r" ((unsigned long)src
));
119 org
= *(volatile unsigned int *)addr
;
121 __asm__("mskql %1,%2,%0"
123 :"r" (tmp
), "r" (count
&3));
124 __asm__("mskqh %1,%2,%0"
126 :"r" (org
), "r" (count
&3));
130 *(volatile unsigned int *) (addr
) = tmp
;
132 } else { /* count > 4 */
133 __asm__("ldq_u %0,%1"
134 :"=r" (high_word
):"m" (*(unsigned long *)(src
+4)));
135 __asm__("extll %1,%2,%0"
137 :"r" (low_word
), "r" ((unsigned long)(src
)));
138 __asm__("extlh %1,%2,%0"
140 :"r" (high_word
), "r" ((unsigned long)(src
)));
145 __asm__("mskqh %1,%2,%0"
147 :"r" (mask
), "r" (rm
));
148 __asm__("mskql %1,%2,%0"
150 :"r" (mask
), "r" (count
));
151 tmp
= (tmp
& mask
) | (org
& ~mask
);
152 *(volatile unsigned int *) (addr
) = tmp
;
155 __asm__("mskqh %1,%2,%0"
157 :"r" (tmp
), "r" (rm
));
158 __asm__("mskql %1,%2,%0"
160 :"r" (org
), "r" (rm
));
163 *(volatile unsigned int *) (addr
) = tmp
;
167 } else { /* src & dst are aligned to each other */
169 unsigned int tmp
,org
,rm
;
172 /* add EISA longword coding and round off*/
173 addr
= (long)(Base
+(dst
<<SPARSE
) + LWORD_CODING
) & ~(3<<SPARSE
);
175 src_r
= (unsigned int*)((long)src
& ~3L);
180 org
= *(volatile unsigned int *)addr
;
182 __asm__("mskqh %1,%2,%0"
184 :"r" (tmp
), "r" (rm
));
185 __asm__("mskql %1,%2,%0"
187 :"r" (org
), "r" (rm
));
192 *(volatile unsigned int *) addr
= tmp
;
199 org
= *(volatile unsigned int *)addr
;
200 __asm__("mskql %1,%2,%0"
202 :"r" (tmp
), "r" (count
));
203 __asm__("mskqh %1,%2,%0"
205 :"r" (org
), "r" (count
));
207 *(volatile unsigned int *) (addr
) = tmp
;
212 xf86JensenBusToMem(char *Base
, char *dst
, unsigned long src
, int count
)
215 /* Optimization of BusToMem() is left as an exercise to the reader ;-)
216 * Consider that ldq_u/extlh/extll won't work because of the bus being
223 addr
= (unsigned long)(Base
+(src
<<SPARSE
)) ;
224 while( addr
& (3<<SPARSE
) ){
225 if(count
<= 0) return;
226 result
= *(volatile int *) addr
;
227 result
>>= ((addr
>>SPARSE
) & 3) * 8;
228 *dst
++ = (char) result
;
236 result
= *(volatile int *) (addr
+LWORD_CODING
);
238 *dst
++ = (char) result
;
247 result
= *(volatile int *) addr
;
248 result
>>= ((addr
>>SPARSE
) & 3) * 8;
249 *dst
++ = (char) result
;
257 static unsigned long __memcpy(unsigned long dest
, unsigned long src
, int n
);
260 xf86BusToMem(unsigned char *dst
, unsigned char *src
, int len
)
262 __memcpy((unsigned long)dst
, (unsigned long)src
, len
);
265 xf86MemToBus(unsigned char *dst
, unsigned char *src
, int len
)
267 if (len
== sizeof(int))
268 if (!(((long)src
| (long)dst
) & 3))
269 *((unsigned int*)dst
) = *((unsigned int*)(src
));
273 i
= ldl_u((unsigned int*)src
);
275 i
= *(unsigned int*)src
;
277 stl_u(i
,(unsigned int*)dst
);
279 *(unsigned int*)dst
= i
;
282 __memcpy((unsigned long)dst
, (unsigned long)src
, len
);
286 * linux/arch/alpha/lib/memcpy.c
288 * Copyright (C) 1995 Linus Torvalds, used with his permission.
292 * This is a reasonably optimized memcpy() routine.
296 * Note that the C code is written to be optimized into good assembly. However,
297 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
298 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
299 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
303 * This should be done in one go with ldq_u*2/mask/stq_u. Do it
304 * with a macro so that we can fix it up later..
306 #define ALIGN_DEST_TO8(d,s,n) \
308 if (n <= 0) return; \
310 *(char *) d = *(char *) s; \
315 * This should similarly be done with ldq_u*2/mask/stq. The destination
316 * is aligned, but we don't fill in a full quad-word
318 #define DO_REST(d,s,n) \
321 *(char *) d = *(char *) s; \
326 * This should be done with ldq/mask/stq. The source and destination are
327 * aligned, but we don't fill in a full quad-word
329 #define DO_REST_ALIGNED(d,s,n) DO_REST(d,s,n)
332 * This does unaligned memory copies. We want to avoid storing to
333 * an unaligned address, as that would do a read-modify-write cycle.
334 * We also want to avoid double-reading the unaligned reads.
336 * Note the ordering to try to avoid load (and address generation) latencies.
338 static __inline__
void __memcpy_unaligned(unsigned long d
, unsigned long s
, long n
)
340 ALIGN_DEST_TO8(d
,s
,n
);
341 n
-= 8; /* to avoid compare against 8 in the loop */
343 unsigned long low_word
, high_word
;
344 __asm__("ldq_u %0,%1":"=r" (low_word
):"m" (*(unsigned long *) s
));
347 __asm__("ldq_u %0,%1":"=r" (high_word
):"m" (*(unsigned long *)(s
+8)));
349 __asm__("extql %1,%2,%0"
351 :"r" (low_word
), "r" (s
));
352 __asm__("extqh %1,%2,%0"
354 :"r" (high_word
), "r" (s
));
356 *(unsigned long *) d
= low_word
| tmp
;
358 low_word
= high_word
;
366 * Hmm.. Strange. The __asm__ here is there to make gcc use a integer register
367 * for the load-store. I don't know why, but it would seem that using a floating
368 * point register for the move seems to slow things down (very small difference,
371 * Note the ordering to try to avoid load (and address generation) latencies.
373 static __inline__
void __memcpy_aligned(unsigned long d
, unsigned long s
, long n
)
375 ALIGN_DEST_TO8(d
,s
,n
);
379 __asm__("ldq %0,%1":"=r" (tmp
):"m" (*(unsigned long *) s
));
382 *(unsigned long *) d
= tmp
;
386 DO_REST_ALIGNED(d
,s
,n
);
389 static unsigned long __memcpy(unsigned long dest
, unsigned long src
, int n
)
391 if (!((dest
^ src
) & 7)) {
392 __memcpy_aligned(dest
, src
, n
);
395 __memcpy_unaligned(dest
, src
, n
);
399 #else /* __alpha__ */
402 xf86BusToMem(unsigned char *dst
, unsigned char *src
, int len
)
404 memcpy(dst
, src
, len
);
407 xf86MemToBus(unsigned char *dst
, unsigned char *src
, int len
)
409 memcpy(dst
, src
, len
);
412 #endif /* __alpha__ */