Merge master.kernel.org:/home/rmk/linux-2.6-arm
[wrt350n-kernel.git] / include / asm-i386 / string.h
blobb9277361954b795ed539fe013385240cd8c26eca
1 #ifndef _I386_STRING_H_
2 #define _I386_STRING_H_
4 #ifdef __KERNEL__
5 /*
6 * On a 486 or Pentium, we are better off not using the
7 * byte string operations. But on a 386 or a PPro the
8 * byte string ops are faster than doing it by hand
9 * (MUCH faster on a Pentium).
13 * This string-include defines all string functions as inline
14 * functions. Use gcc. It also assumes ds=es=data space, this should be
15 * normal. Most of the string-functions are rather heavily hand-optimized,
16 * see especially strsep,strstr,str[c]spn. They should work, but are not
17 * very easy to understand. Everything is done entirely within the register
18 * set, making the functions fast and clean. String instructions have been
19 * used through-out, making for "slightly" unclear code :-)
21 * NO Copyright (C) 1991, 1992 Linus Torvalds,
22 * consider these trivial functions to be PD.
25 /* AK: in fact I bet it would be better to move this stuff all out of line.
28 #define __HAVE_ARCH_STRCPY
29 static inline char * strcpy(char * dest,const char *src)
31 int d0, d1, d2;
32 __asm__ __volatile__(
33 "1:\tlodsb\n\t"
34 "stosb\n\t"
35 "testb %%al,%%al\n\t"
36 "jne 1b"
37 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
38 :"0" (src),"1" (dest) : "memory");
39 return dest;
42 #define __HAVE_ARCH_STRNCPY
43 static inline char * strncpy(char * dest,const char *src,size_t count)
45 int d0, d1, d2, d3;
46 __asm__ __volatile__(
47 "1:\tdecl %2\n\t"
48 "js 2f\n\t"
49 "lodsb\n\t"
50 "stosb\n\t"
51 "testb %%al,%%al\n\t"
52 "jne 1b\n\t"
53 "rep\n\t"
54 "stosb\n"
55 "2:"
56 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
57 :"0" (src),"1" (dest),"2" (count) : "memory");
58 return dest;
61 #define __HAVE_ARCH_STRCAT
62 static inline char * strcat(char * dest,const char * src)
64 int d0, d1, d2, d3;
65 __asm__ __volatile__(
66 "repne\n\t"
67 "scasb\n\t"
68 "decl %1\n"
69 "1:\tlodsb\n\t"
70 "stosb\n\t"
71 "testb %%al,%%al\n\t"
72 "jne 1b"
73 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
74 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory");
75 return dest;
78 #define __HAVE_ARCH_STRNCAT
79 static inline char * strncat(char * dest,const char * src,size_t count)
81 int d0, d1, d2, d3;
82 __asm__ __volatile__(
83 "repne\n\t"
84 "scasb\n\t"
85 "decl %1\n\t"
86 "movl %8,%3\n"
87 "1:\tdecl %3\n\t"
88 "js 2f\n\t"
89 "lodsb\n\t"
90 "stosb\n\t"
91 "testb %%al,%%al\n\t"
92 "jne 1b\n"
93 "2:\txorl %2,%2\n\t"
94 "stosb"
95 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
96 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
97 : "memory");
98 return dest;
101 #define __HAVE_ARCH_STRCMP
102 static inline int strcmp(const char * cs,const char * ct)
104 int d0, d1;
105 register int __res;
106 __asm__ __volatile__(
107 "1:\tlodsb\n\t"
108 "scasb\n\t"
109 "jne 2f\n\t"
110 "testb %%al,%%al\n\t"
111 "jne 1b\n\t"
112 "xorl %%eax,%%eax\n\t"
113 "jmp 3f\n"
114 "2:\tsbbl %%eax,%%eax\n\t"
115 "orb $1,%%al\n"
116 "3:"
117 :"=a" (__res), "=&S" (d0), "=&D" (d1)
118 :"1" (cs),"2" (ct)
119 :"memory");
120 return __res;
123 #define __HAVE_ARCH_STRNCMP
124 static inline int strncmp(const char * cs,const char * ct,size_t count)
126 register int __res;
127 int d0, d1, d2;
128 __asm__ __volatile__(
129 "1:\tdecl %3\n\t"
130 "js 2f\n\t"
131 "lodsb\n\t"
132 "scasb\n\t"
133 "jne 3f\n\t"
134 "testb %%al,%%al\n\t"
135 "jne 1b\n"
136 "2:\txorl %%eax,%%eax\n\t"
137 "jmp 4f\n"
138 "3:\tsbbl %%eax,%%eax\n\t"
139 "orb $1,%%al\n"
140 "4:"
141 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
142 :"1" (cs),"2" (ct),"3" (count)
143 :"memory");
144 return __res;
147 #define __HAVE_ARCH_STRCHR
148 static inline char * strchr(const char * s, int c)
150 int d0;
151 register char * __res;
152 __asm__ __volatile__(
153 "movb %%al,%%ah\n"
154 "1:\tlodsb\n\t"
155 "cmpb %%ah,%%al\n\t"
156 "je 2f\n\t"
157 "testb %%al,%%al\n\t"
158 "jne 1b\n\t"
159 "movl $1,%1\n"
160 "2:\tmovl %1,%0\n\t"
161 "decl %0"
162 :"=a" (__res), "=&S" (d0)
163 :"1" (s),"0" (c)
164 :"memory");
165 return __res;
168 #define __HAVE_ARCH_STRRCHR
169 static inline char * strrchr(const char * s, int c)
171 int d0, d1;
172 register char * __res;
173 __asm__ __volatile__(
174 "movb %%al,%%ah\n"
175 "1:\tlodsb\n\t"
176 "cmpb %%ah,%%al\n\t"
177 "jne 2f\n\t"
178 "leal -1(%%esi),%0\n"
179 "2:\ttestb %%al,%%al\n\t"
180 "jne 1b"
181 :"=g" (__res), "=&S" (d0), "=&a" (d1)
182 :"0" (0),"1" (s),"2" (c)
183 :"memory");
184 return __res;
187 #define __HAVE_ARCH_STRLEN
188 static inline size_t strlen(const char * s)
190 int d0;
191 register int __res;
192 __asm__ __volatile__(
193 "repne\n\t"
194 "scasb\n\t"
195 "notl %0\n\t"
196 "decl %0"
197 :"=c" (__res), "=&D" (d0)
198 :"1" (s),"a" (0), "0" (0xffffffffu)
199 :"memory");
200 return __res;
203 static __always_inline void * __memcpy(void * to, const void * from, size_t n)
205 int d0, d1, d2;
206 __asm__ __volatile__(
207 "rep ; movsl\n\t"
208 "movl %4,%%ecx\n\t"
209 "andl $3,%%ecx\n\t"
210 #if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */
211 "jz 1f\n\t"
212 #endif
213 "rep ; movsb\n\t"
214 "1:"
215 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
216 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from)
217 : "memory");
218 return (to);
222 * This looks ugly, but the compiler can optimize it totally,
223 * as the count is constant.
225 static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n)
227 long esi, edi;
228 if (!n) return to;
229 #if 1 /* want to do small copies with non-string ops? */
230 switch (n) {
231 case 1: *(char*)to = *(char*)from; return to;
232 case 2: *(short*)to = *(short*)from; return to;
233 case 4: *(int*)to = *(int*)from; return to;
234 #if 1 /* including those doable with two moves? */
235 case 3: *(short*)to = *(short*)from;
236 *((char*)to+2) = *((char*)from+2); return to;
237 case 5: *(int*)to = *(int*)from;
238 *((char*)to+4) = *((char*)from+4); return to;
239 case 6: *(int*)to = *(int*)from;
240 *((short*)to+2) = *((short*)from+2); return to;
241 case 8: *(int*)to = *(int*)from;
242 *((int*)to+1) = *((int*)from+1); return to;
243 #endif
245 #endif
246 esi = (long) from;
247 edi = (long) to;
248 if (n >= 5*4) {
249 /* large block: use rep prefix */
250 int ecx;
251 __asm__ __volatile__(
252 "rep ; movsl"
253 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
254 : "0" (n/4), "1" (edi),"2" (esi)
255 : "memory"
257 } else {
258 /* small block: don't clobber ecx + smaller code */
259 if (n >= 4*4) __asm__ __volatile__("movsl"
260 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
261 if (n >= 3*4) __asm__ __volatile__("movsl"
262 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
263 if (n >= 2*4) __asm__ __volatile__("movsl"
264 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
265 if (n >= 1*4) __asm__ __volatile__("movsl"
266 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
268 switch (n % 4) {
269 /* tail */
270 case 0: return to;
271 case 1: __asm__ __volatile__("movsb"
272 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
273 return to;
274 case 2: __asm__ __volatile__("movsw"
275 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
276 return to;
277 default: __asm__ __volatile__("movsw\n\tmovsb"
278 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
279 return to;
283 #define __HAVE_ARCH_MEMCPY
285 #ifdef CONFIG_X86_USE_3DNOW
287 #include <asm/mmx.h>
290 * This CPU favours 3DNow strongly (eg AMD Athlon)
293 static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
295 if (len < 512)
296 return __constant_memcpy(to, from, len);
297 return _mmx_memcpy(to, from, len);
300 static __inline__ void *__memcpy3d(void *to, const void *from, size_t len)
302 if (len < 512)
303 return __memcpy(to, from, len);
304 return _mmx_memcpy(to, from, len);
307 #define memcpy(t, f, n) \
308 (__builtin_constant_p(n) ? \
309 __constant_memcpy3d((t),(f),(n)) : \
310 __memcpy3d((t),(f),(n)))
312 #else
315 * No 3D Now!
318 #define memcpy(t, f, n) \
319 (__builtin_constant_p(n) ? \
320 __constant_memcpy((t),(f),(n)) : \
321 __memcpy((t),(f),(n)))
323 #endif
325 #define __HAVE_ARCH_MEMMOVE
326 void *memmove(void * dest,const void * src, size_t n);
328 #define memcmp __builtin_memcmp
330 #define __HAVE_ARCH_MEMCHR
331 static inline void * memchr(const void * cs,int c,size_t count)
333 int d0;
334 register void * __res;
335 if (!count)
336 return NULL;
337 __asm__ __volatile__(
338 "repne\n\t"
339 "scasb\n\t"
340 "je 1f\n\t"
341 "movl $1,%0\n"
342 "1:\tdecl %0"
343 :"=D" (__res), "=&c" (d0)
344 :"a" (c),"0" (cs),"1" (count)
345 :"memory");
346 return __res;
349 static inline void * __memset_generic(void * s, char c,size_t count)
351 int d0, d1;
352 __asm__ __volatile__(
353 "rep\n\t"
354 "stosb"
355 : "=&c" (d0), "=&D" (d1)
356 :"a" (c),"1" (s),"0" (count)
357 :"memory");
358 return s;
361 /* we might want to write optimized versions of these later */
362 #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
365 * memset(x,0,y) is a reasonably common thing to do, so we want to fill
366 * things 32 bits at a time even when we don't know the size of the
367 * area at compile-time..
369 static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
371 int d0, d1;
372 __asm__ __volatile__(
373 "rep ; stosl\n\t"
374 "testb $2,%b3\n\t"
375 "je 1f\n\t"
376 "stosw\n"
377 "1:\ttestb $1,%b3\n\t"
378 "je 2f\n\t"
379 "stosb\n"
380 "2:"
381 :"=&c" (d0), "=&D" (d1)
382 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
383 :"memory");
384 return (s);
387 /* Added by Gertjan van Wingerde to make minix and sysv module work */
388 #define __HAVE_ARCH_STRNLEN
389 static inline size_t strnlen(const char * s, size_t count)
391 int d0;
392 register int __res;
393 __asm__ __volatile__(
394 "movl %2,%0\n\t"
395 "jmp 2f\n"
396 "1:\tcmpb $0,(%0)\n\t"
397 "je 3f\n\t"
398 "incl %0\n"
399 "2:\tdecl %1\n\t"
400 "cmpl $-1,%1\n\t"
401 "jne 1b\n"
402 "3:\tsubl %2,%0"
403 :"=a" (__res), "=&d" (d0)
404 :"c" (s),"1" (count)
405 :"memory");
406 return __res;
408 /* end of additional stuff */
410 #define __HAVE_ARCH_STRSTR
412 extern char *strstr(const char *cs, const char *ct);
415 * This looks horribly ugly, but the compiler can optimize it totally,
416 * as we by now know that both pattern and count is constant..
418 static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
420 switch (count) {
421 case 0:
422 return s;
423 case 1:
424 *(unsigned char *)s = pattern;
425 return s;
426 case 2:
427 *(unsigned short *)s = pattern;
428 return s;
429 case 3:
430 *(unsigned short *)s = pattern;
431 *(2+(unsigned char *)s) = pattern;
432 return s;
433 case 4:
434 *(unsigned long *)s = pattern;
435 return s;
437 #define COMMON(x) \
438 __asm__ __volatile__( \
439 "rep ; stosl" \
441 : "=&c" (d0), "=&D" (d1) \
442 : "a" (pattern),"0" (count/4),"1" ((long) s) \
443 : "memory")
445 int d0, d1;
446 switch (count % 4) {
447 case 0: COMMON(""); return s;
448 case 1: COMMON("\n\tstosb"); return s;
449 case 2: COMMON("\n\tstosw"); return s;
450 default: COMMON("\n\tstosw\n\tstosb"); return s;
454 #undef COMMON
457 #define __constant_c_x_memset(s, c, count) \
458 (__builtin_constant_p(count) ? \
459 __constant_c_and_count_memset((s),(c),(count)) : \
460 __constant_c_memset((s),(c),(count)))
462 #define __memset(s, c, count) \
463 (__builtin_constant_p(count) ? \
464 __constant_count_memset((s),(c),(count)) : \
465 __memset_generic((s),(c),(count)))
467 #define __HAVE_ARCH_MEMSET
468 #define memset(s, c, count) \
469 (__builtin_constant_p(c) ? \
470 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
471 __memset((s),(c),(count)))
474 * find the first occurrence of byte 'c', or 1 past the area if none
476 #define __HAVE_ARCH_MEMSCAN
477 static inline void * memscan(void * addr, int c, size_t size)
479 if (!size)
480 return addr;
481 __asm__("repnz; scasb\n\t"
482 "jnz 1f\n\t"
483 "dec %%edi\n"
484 "1:"
485 : "=D" (addr), "=c" (size)
486 : "0" (addr), "1" (size), "a" (c)
487 : "memory");
488 return addr;
491 #endif /* __KERNEL__ */
493 #endif