1 /* Copyright 2003 SuperH Ltd. */
8 #ifdef __LITTLE_ENDIAN__
9 #define ZPAD_MASK(src, dst) addi src, -1, dst
11 #define ZPAD_MASK(src, dst) \
12 byterev src, dst; addi dst, -1, dst; byterev dst, dst
16 /* We assume that the destination is not in the first 16 bytes of memory.
17 A typical linker script will put the text section first, and as
18 this code is longer that 16 bytes, you have to get out of your way
28 /* If the size is greater than 8, we know we can read beyond the first
29 (possibly partial) quadword, and write out a full first and last
30 (possibly unaligned and/or overlapping) quadword. */
31 bge/u r2, r5, tr2 // L_small
34 bnei/u r7, 0, tr0 // L_found0
42 /* Before each iteration, check that we can store in full the next quad we
43 are about to fetch. */
45 bgtu/u r22, r36, tr1 // L_end_early
51 bnei/u r1, 0, tr0 // L_found0
54 bgeu/l r36, r22, tr1 // L_scan0
56 // At end; we might re-read a few bytes when we fetch the last quad.
57 // branch mispredict, so load is ready now.
60 bnei/u r1, 0, tr0 // L_found0
70 and r1, r7, r1 // mask out non-zero bytes after first zero byte
76 /* Check if we can store the current quad in full. */
79 bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short.
80 /* If not, that means we can just proceed to process the last quad.
81 Two pipeline stalls are unavoidable, as we don't have enough ILP. */
88 and r1, r7, r1 // mask out non-zero bytes after first zero byte
94 // r0: string to store, not yet zero-padding normalized.
95 // r1: result of mcmpeq.b r0, r63, r1.
96 // r22: store address plus 8. I.e. address where zero padding beyond the
98 // r20: store end address.
99 // r5: store end address minus 8.
100 pt L_write0_multiquad, tr0
102 and r0, r1, r0 // mask out non-zero bytes after first zero byte
105 andi r22, -8, r1 // Check if zeros to write fit in one quad word.
106 bgtu/l r5, r1, tr0 // L_write0_multiquad
109 shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is
110 SHLO r0, r1, r0 // handled correctly.
116 pt L_write0_loop, tr0
121 bgeu/l r5, r1, tr0 // L_write0_loop
127 bgeu/l r5, r1, tr0 // L_write0_loop
131 // r0: string to store, not yet zero-padding normalized.
132 // r1: result of mcmpeq.b r0, r63, r1.
133 // r7: nonzero indicates relevant zero found r0.
134 // r2: store address.
137 // r20: store end address.
138 // r5: store end address minus 8.
140 pt L_small_storelong, tr1
143 bnei/u r7, 0, tr0 // L_nohi
145 bge/l r23, r7, tr0 // L_nohi
153 bge/u r4, r19, tr1 // L_small_storelong
156 #ifndef __LITTLE_ENDIAN__
159 beqi/u r4, 0, tr0 // L_small_end
161 beqi/u r4, 1, tr0 // L_small_end
164 beqi/u r4, 2, tr0 // L_small_end
173 #ifdef __LITTLE_ENDIAN__
184 #else /* SHcompact */
186 /* This code is optimized for size. Instruction selection is SH5 specific.
187 SH4 should use a different version. */
208 #endif /* SHcompact */