fixes for host gcc 4.6.1
[zpugcc/jano.git] / toolchain / gcc / newlib / libc / machine / sh / strcmp.S
blob8dcc20b0c500d64576e0a7c7573330c78e394b52
1 ! SH5 code Copyright 2002 SuperH Ltd.
3 #include "asm.h"
5 ENTRY(strcmp)
7 #if __SHMEDIA__
8         ld.ub   r2,0,r4
9         pt/l    quickret0,tr0
10         ld.ub   r3,0,r5
11         ptabs   r18,tr2
12         beqi/u  r4,0,tr0
13         ld.ub   r2,1,r6
14         bne/u   r4,r5,tr0
15         pt/l    quickret1,tr1
16         ld.ub   r3,1,r7
17         beqi/u  r6,0,tr1
18         ld.ub   r2,2,r4
19         bne/u   r6,r7,tr1
20         ld.ub   r3,2,r5
21         beqi/u  r4,0,tr0
22         ld.ub   r2,3,r6
23         bne/u   r4,r5,tr0
24         ld.ub   r3,3,r7
25         beqi/u  r6,0,tr1
26         ld.ub   r2,4,r4
27         bne/u   r6,r7,tr1
28         ld.ub   r3,4,r5
29         beqi/u  r4,0,tr0
30         ld.ub   r2,5,r6
31         bne/u   r4,r5,tr0
32         ld.ub   r3,5,r7
33         beqi/u  r6,0,tr1
34         ld.ub   r2,6,r4
35         bne/u   r6,r7,tr1
36         ld.ub   r3,6,r5
37         beqi/u  r4,0,tr0
38         ld.ub   r2,7,r6
39         bne/u   r4,r5,tr0
40         ld.ub   r3,7,r7
41         beqi/u  r6,0,tr1
42         sub     r3,r2,r3
43         bne/u   r6,r7,tr1
45         andi    r2,-8,r2
46         add     r3,r2,r3
47         ldlo.q  r3,8,r23
48         pt      r23_zero,tr0
49         shlli   r3,3,r22
50         sub     r63,r22,r20
51         movi    0x101,r6
52         mperm.w r6,r63,r6
53         SHLO    r6,r22,r7
54         msubs.ub r7,r23,r8
55         pt      loop,tr1
56         bnei/u  r8,0,tr0 // r23_zero
57         pt      found_zero,tr0
58         addi    r3,15,r3
59         andi    r3,-8,r3
60         sub     r3,r2,r3
61         bne/l   r7,r6,tr1 // loop
62         /* The strings are aligned to each other.  */
63         /* It is possible to have a loop with six cycles / iteration
64            by re-ordering the exit conditions, but then it needs extra
65            time and/or code to sort out the r4 != r5 case.  */
66         pt      al_loop,tr1
67         pt      al_found_zero,tr0
68 al_loop:
69         ld.q    r2,8,r4
70         ldx.q   r2,r3,r5
71         addi    r2,8,r2
72         mcmpeq.b r63,r4,r8
73         pt      cmp_quad,tr3
74         bnei/u  r8,0,tr0  // al_found_zero
75         beq/l   r4,r5,tr1 // al_loop
76         blink   tr3,r63   // cmp_quad
78         .balign 8
79 quickret0:
80         sub     r4,r5,r2
81         blink   tr2,r63
82 quickret1:
83         sub     r6,r7,r2
84         blink   tr2,r63
86 loop:
87         ld.q    r2,8,r4
88         ldx.q   r2,r3,r19
89         addi    r2,8,r2
90         msubs.ub r6,r4,r8
91         mcmpeq.b r63,r19,r9
92         SHHI    r19,r20,r21
93         or      r21,r23,r5
94         SHLO    r19,r22,r23
95         bne/u   r8,r9,tr0 // found_zero
96         beq/l   r4,r5,tr1 // loop
97 cmp_quad:
98 #ifdef __LITTLE_ENDIAN__
99         byterev r4,r4
100         byterev r5,r5
101 #endif
102         cmpgtu  r4,r5,r6
103         cmpgtu  r5,r4,r7
104         sub     r6,r7,r2
105         blink tr2,r63
106 found_zero:
107         pt      zero_now,tr0
108         pt      cmp_quad,tr1
109         SHHI    r9,r20,r7
110         bne/u   r8,r7,tr0 // zero_now
111         bne/u   r4,r5,tr1 // cmp_quad
112         SHLO    r9,r22,r8
113 r23_zero:
114         ld.q    r2,8,r4
115         add     r23,r63,r5
116 zero_now:
117 al_found_zero:
118 /* We konw that one of the values has at lest one zero, and r8 holds
119    an 0x01 or 0xff mask for every zero found in one of the operands.
120    If both operands have the first zero in the same place, this mask
121    allows us to truncate the comparison to the valid bytes in the
122    strings.  If the first zero is in different places, it doesn't
123    matter if some invalid bytes are included, since the comparison
124    of the zero with the non-zero will determine the outcome.  */
125 #ifdef __LITTLE_ENDIAN__
126         shlli   r8,8,r8
127         addi    r8,-1,r9
128         andc    r9,r8,r8
129         and     r8,r4,r4
130         and     r8,r5,r5
131 #else
132         shlri r8,1,r8
133         nsb     r8,r8
134         addi    r8,8,r8
135         andi    r8,56,r8
136         sub     r63,r8,r8
137         shlrd   r4,r8,r4
138         shlrd   r5,r8,r5
139 #endif
140 #ifdef __LITTLE_ENDIAN__
141         byterev r4,r4
142         byterev r5,r5
143 #endif
144         cmpgtu  r4,r5,r6
145         cmpgtu  r5,r4,r7
146         sub     r6,r7,r2
147         blink tr2,r63
149 #else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
151 #ifdef __SH5__
152 #define STR1 r2
153 #define STR2 r3
154 #define RESULT r2
155 #define TMP r4
156 #else
157 ! Entry: r4: string1
158 !        r5: string2
159 ! Exit:  r0: result
160 !        r1-r2,r4-r5: clobbered
161 #define STR1 r4
162 #define STR2 r5
163 #define RESULT r0
164 #define TMP r2
165 #endif /* __SH5__ */
167         mov     STR1,r0
168         or      STR2,r0
169         tst     #3,r0
170         bf      L_setup_char_loop
171         mov     #0,r0
172 #ifdef DELAYED_BRANCHES
173         mov.l   @STR1+,r1
174         .align  2
175 Longword_loop:
176         mov.l   @STR2+,TMP
177         cmp/str r0,r1
178         bt      Longword_loop_end
179         cmp/eq  r1,TMP
180         bt.s    Longword_loop
181         mov.l   @STR1+,r1
182         add #-4, STR1
183 Longword_loop_end:
184         add #-4, STR1
185         add #-4, STR2
186 L_setup_char_loop:
187         mov.b   @STR1+,r0
188         .align  2
189 L_char_loop:
190         mov.b   @STR2+,r1
191         tst     r0,r0
192         bt      L_return
193         cmp/eq  r0,r1
194         bt.s L_char_loop
195         mov.b   @STR1+,r0
196         add     #-2,STR1
197         mov.b   @STR1,r0
198 #else /* ! DELAYED_BRANCHES */
199         .align  2
200 Longword_loop:
201         mov.l   @r4+,r1
202         mov.l   @r5+,r2
203         cmp/str r0,r1
204         bt      Longword_loop_end
205         cmp/eq  r1,r2
206         bt      Longword_loop
207 Longword_loop_end:
208         add #-4, r4
209         add #-4, r5
210         .align  2
211 L_setup_char_loop:
212 L_char_loop:
213         mov.b   @r4+,r0
214         mov.b   @r5+,r1
215         tst     r0,r0
216         bt      L_return
217         cmp/eq  r0,r1
218         bt L_char_loop
219 #endif
220 L_return:
221         extu.b  r0,RESULT
222         extu.b  r1,r1
223         rts
224         sub     r1,RESULT
225 #endif /* ! __SHMEDIA__ */