.
[glibc/history.git] / sysdeps / sparc / sparc64 / stpcpy.S
blobf4366e9fc5f8a7951622b24135af5b60fa7612ff
1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2    For SPARC v9.
3    Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6                   Jakub Jelinek <jj@ultra.linux.cz>.
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, write to the Free
20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21    02111-1307 USA.  */
23 #include <sysdep.h>
24 #include <asm/asi.h>
25 #ifndef XCC
26         .register       %g2, #scratch
27         .register       %g3, #scratch
28         .register       %g6, #scratch
29 #endif
31         /* Normally, this uses
32            ((xword - 0x0101010101010101) & 0x8080808080808080) test
33            to find out if any byte in xword could be zero. This is fast, but
34            also gives false alarm for any byte in range 0x81-0xff. It does
35            not matter for correctness, as if this test tells us there could
36            be some zero byte, we check it byte by byte, but if bytes with
37            high bits set are common in the strings, then this will give poor
38            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
39            will use one tick slower, but more precise test
40            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
41            which does not give any false alarms (but if some bits are set,
42            one cannot assume from it which bytes are zero and which are not).
43            It is yet to be measured, what is the correct default for glibc
44            in these days for an average user.
45          */
47         .text
48         .align          32
49 ENTRY(__stpcpy)
50         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
51         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
52         andcc           %o0, 7, %g0                     /* IEU1                         */
53         sllx            %g1, 32, %g2                    /* IEU0         Group           */
55         bne,pn          %icc, 12f                       /* CTI                          */
56          andcc          %o1, 7, %g3                     /* IEU1                         */
57         or              %g1, %g2, %g1                   /* IEU0         Group           */
58         bne,pn          %icc, 14f                       /* CTI                          */
60          sllx           %g1, 7, %g2                     /* IEU0         Group           */
61 1:      ldx             [%o1], %o3                      /* Load                         */
62         add             %o1, 8, %o1                     /* IEU1                         */
63 2:      mov             %o3, %g3                        /* IEU0         Group           */
65         sub             %o3, %g1, %o2                   /* IEU1                         */
66 3:      ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
67 #ifdef EIGHTBIT_NOT_RARE
68         andn            %o2, %g3, %o2                   /* IEU0         Group           */
69 #endif
70         add             %o0, 8, %o0                     /* IEU0         Group           */
71         andcc           %o2, %g2, %g0                   /* IEU1                         */
73         add             %o1, 8, %o1                     /* IEU0         Group           */
74         be,a,pt         %xcc, 2b                        /* CTI                          */
75          stx            %g3, [%o0 - 8]                  /* Store                        */
76         srlx            %g3, 56, %g5                    /* IEU0         Group           */
78         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
79         be,pn           %icc, 11f                       /* CTI                          */
80          srlx           %g3, 48, %g4                    /* IEU0                         */
81         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
83         be,pn           %icc, 10f                       /* CTI                          */
84          srlx           %g3, 40, %g5                    /* IEU0                         */
85         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
86         be,pn           %icc, 9f                        /* CTI                          */
88          srlx           %g3, 32, %g4                    /* IEU0                         */
89         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
90         be,pn           %icc, 8f                        /* CTI                          */
91          srlx           %g3, 24, %g5                    /* IEU0                         */
93         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
94         be,pn           %icc, 7f                        /* CTI                          */
95          srlx           %g3, 16, %g4                    /* IEU0                         */
96         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
98         be,pn           %icc, 6f                        /* CTI                          */
99          srlx           %g3, 8, %g5                     /* IEU0                         */
100         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
101         be,pn           %icc, 5f                        /* CTI                          */
103          sub            %o3, %g1, %o2                   /* IEU0                         */
104         stx             %g3, [%o0 - 8]                  /* Store        Group           */
105         andcc           %g3, 0xff, %g0                  /* IEU1                         */
106         bne,pt          %icc, 3b                        /* CTI                          */
108          mov            %o3, %g3                        /* IEU0         Group           */
109 4:      retl                                            /* CTI+IEU1     Group           */
110          sub            %o0, 1, %o0                     /* IEU0                         */
112         .align          16
113 6:      ba,pt           %xcc, 23f                       /* CTI          Group           */
114          sub            %o0, 3, %g6                     /* IEU0                         */
115 5:      sub             %o0, 2, %g6                     /* IEU0         Group           */
116         stb             %g5, [%o0 - 2]                  /* Store                        */
118         srlx            %g3, 16, %g4                    /* IEU0         Group           */
119 23:     sth             %g4, [%o0 - 4]                  /* Store                        */
120         srlx            %g3, 32, %g4                    /* IEU0         Group           */
121         stw             %g4, [%o0 - 8]                  /* Store                        */
123         retl                                            /* CTI+IEU1     Group           */
124          mov            %g6, %o0                        /* IEU0                         */
125 8:      ba,pt           %xcc, 24f                       /* CTI          Group           */
126          sub            %o0, 5, %g6                     /* IEU0                         */
128 7:      sub             %o0, 4, %g6                     /* IEU0         Group           */
129         stb             %g5, [%o0 - 4]                  /* Store                        */
130         srlx            %g3, 32, %g4                    /* IEU0         Group           */
131 24:     stw             %g4, [%o0 - 8]                  /* Store                        */
133         retl                                            /* CTI+IEU1     Group           */
134          mov            %g6, %o0                        /* IEU0                         */
135 10:     ba,pt           %xcc, 25f                       /* CTI          Group           */
136          sub            %o0, 7, %g6                     /* IEU0                         */
138 9:      sub             %o0, 6, %g6                     /* IEU0         Group           */
139         stb             %g5, [%o0 - 6]                  /* Store                        */
140         srlx            %g3, 48, %g4                    /* IEU0                         */
141 25:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
143         retl                                            /* CTI+IEU1     Group           */
144          mov            %g6, %o0                        /* IEU0                         */
145 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
146         retl                                            /* CTI+IEU1     Group           */
148          sub            %o0, 8, %o0                     /* IEU0                         */
150         .align          16
151 12:     or              %g1, %g2, %g1                   /* IEU0         Group           */
152         ldub            [%o1], %o3                      /* Load                         */
153         sllx            %g1, 7, %g2                     /* IEU0         Group           */
154         stb             %o3, [%o0]                      /* Store        Group           */
156 13:     add             %o0, 1, %o0                     /* IEU0                         */
157         add             %o1, 1, %o1                     /* IEU1                         */
158         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
159         be,pn           %icc, 4b                        /* CTI                          */
161          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
162         andcc           %o0, 7, %g0                     /* IEU1         Group           */
163         bne,a,pt        %icc, 13b                       /* CTI                          */
164          stb            %o3, [%o0]                      /* Store                        */
166         andcc           %o1, 7, %g3                     /* IEU1         Group           */
167         be,a,pt         %icc, 1b                        /* CTI                          */
168          ldx            [%o1], %o3                      /* Load                         */
169 14:     orcc            %g0, 64, %g4                    /* IEU1         Group           */
171         sllx            %g3, 3, %g5                     /* IEU0                         */
172         sub             %o1, %g3, %o1                   /* IEU0         Group           */
173         sub             %g4, %g5, %g4                   /* IEU1                         */
174                                                         /* %g1 = 0101010101010101       *
175                                                          * %g2 = 8080808080808080       *
176                                                          * %g3 = source alignment       *
177                                                          * %g5 = number of bits to shift left  *
178                                                          * %g4 = number of bits to shift right */
179         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
181         addcc           %o1, 8, %o1                     /* IEU1                         */
182 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
183         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
184         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
186         add             %o0, 8, %o0                     /* IEU1                         */
187         or              %o3, %o4, %o3                   /* IEU0         Group           */
188         add             %o1, 8, %o1                     /* IEU1                         */
189         sub             %o3, %g1, %o4                   /* IEU0         Group           */
191 #ifdef EIGHTBIT_NOT_RARE
192         andn            %o4, %o3, %o4                   /* IEU0         Group           */
193 #endif
194         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
195         be,a,pt         %xcc, 15b                       /* CTI                          */
196          stx            %o3, [%o0 - 8]                  /* Store                        */
197         srlx            %o3, 56, %o4                    /* IEU0         Group           */
199         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
200         be,pn           %icc, 22f                       /* CTI                          */
201          srlx           %o3, 48, %o4                    /* IEU0                         */
202         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
204         be,pn           %icc, 21f                       /* CTI                          */
205          srlx           %o3, 40, %o4                    /* IEU0                         */
206         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
207         be,pn           %icc, 20f                       /* CTI                          */
209          srlx           %o3, 32, %o4                    /* IEU0                         */
210         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
211         be,pn           %icc, 19f                       /* CTI                          */
212          srlx           %o3, 24, %o4                    /* IEU0                         */
214         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
215         be,pn           %icc, 18f                       /* CTI                          */
216          srlx           %o3, 16, %o4                    /* IEU0                         */
217         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
219         be,pn           %icc, 17f                       /* CTI                          */
220          srlx           %o3, 8, %o4                     /* IEU0                         */
221         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
222         be,pn           %icc, 16f                       /* CTI                          */
224          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
225         bne,pn          %icc, 15b                       /* CTI                          */
226          stx            %o3, [%o0 - 8]                  /* Store                        */
227         retl                                            /* CTI+IEU1     Group           */
229          sub            %o0, 1, %o0                     /* IEU0                         */
231         .align          16
232 17:     ba,pt           %xcc, 26f                       /* CTI          Group           */
233          subcc          %o0, 3, %g6                     /* IEU1                         */
234 18:     ba,pt           %xcc, 27f                       /* CTI          Group           */
235          subcc          %o0, 4, %g6                     /* IEU1                         */
237 19:     ba,pt           %xcc, 28f                       /* CTI          Group           */
238          subcc          %o0, 5, %g6                     /* IEU1                         */
239 16:     subcc           %o0, 2, %g6                     /* IEU1         Group           */
240         srlx            %o3, 8, %o4                     /* IEU0                         */
242         stb             %o4, [%o0 - 2]                  /* Store                        */
243 26:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
244         stb             %o4, [%o0 - 3]                  /* Store                        */
245 27:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
247         stb             %o4, [%o0 - 4]                  /* Store                        */
248 28:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
249         stw             %o4, [%o0 - 8]                  /* Store                        */
250         retl                                            /* CTI+IEU1     Group           */
252          mov            %g6, %o0                        /* IEU0                         */
254         .align          16
255 21:     ba,pt           %xcc, 29f                       /* CTI          Group           */
256          subcc          %o0, 7, %g6                     /* IEU1                         */
257 22:     ba,pt           %xcc, 30f                       /* CTI          Group           */
258          subcc          %o0, 8, %g6                     /* IEU1                         */
260 20:     subcc           %o0, 6, %g6                     /* IEU1         Group           */
261         srlx            %o3, 40, %o4                    /* IEU0                         */
262         stb             %o4, [%o0 - 6]                  /* Store                        */
263 29:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
265         stb             %o4, [%o0 - 7]                  /* Store                        */
266 30:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
267         stb             %o4, [%o0 - 8]                  /* Store                        */
268         retl                                            /* CTI+IEU1     Group           */
270          mov            %g6, %o0                        /* IEU0                         */
271 END(__stpcpy)
273 weak_alias (__stpcpy, stpcpy)
274 libc_hidden_def (__stpcpy)
275 libc_hidden_builtin_def (stpcpy)