2 Copyright
(c) 2005 Risto Laakso
5 Redistribution and use in source and binary forms
, with or without
6 modification
, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice
, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice
, this list of conditions and the following disclaimer in the
12 documentation and
/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR
``AS IS
'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES
, INCLUDING
, BUT NOT LIMITED TO
, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT
, INDIRECT
,
20 INCIDENTAL
, SPECIAL
, EXEMPLARY
, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT
21 NOT LIMITED TO
, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES
; LOSS OF USE,
22 DATA
, OR PROFITS
; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY
, WHETHER IN CONTRACT
, STRICT LIABILITY
, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE
) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE
, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (defmacro vect-ea
(base &optional idx
)
31 (if (and idx
(numberp idx
))
32 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
) ,idx
)
33 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
))))
35 (if (and idx
(symbolp idx
))
36 `(make-ea :dword
:base
,base
:index
,idx
:disp
,disp
)
37 `(make-ea :dword
:base
,base
:disp
,disp
))))
39 (DEFINE-VOP (%sse-vect-add
/single-float
)
41 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
42 (SRC1 :SCS
(DESCRIPTOR-REG))
43 (SRC2 :SCS
(DESCRIPTOR-REG)))
44 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
45 SIMPLE-ARRAY-SINGLE-FLOAT
46 SIMPLE-ARRAY-SINGLE-FLOAT
)
48 (:TEMPORARY
(:SC XMM-REG
) X0
)
49 (:TEMPORARY
(:SC XMM-REG
) X1
)
52 (inst movdqu x0
(vect-ea src1
))
53 (inst movdqu x1
(vect-ea src2
))
55 (inst movdqu
(vect-ea dest
) x0
)))
57 (DEFINE-VOP (%sse-vect-add2
/single-float
)
59 (:ARGS
(SRC1 :SCS
(XMM-REG))
60 (SRC2 :SCS
(XMM-REG)))
63 (:RESULTS
(DEST :SCS
(XMM-REG)))
65 (:TEMPORARY
(:SC XMM-REG
:from
:argument
:to
:result
) X0
)
66 (:TEMPORARY
(:SC XMM-REG
:from
:argument
) X1
)
75 (DEFINE-VOP (%sse-vect-sub
/single-float
)
77 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
78 (SRC1 :SCS
(DESCRIPTOR-REG))
79 (SRC2 :SCS
(DESCRIPTOR-REG)))
80 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
81 SIMPLE-ARRAY-SINGLE-FLOAT
82 SIMPLE-ARRAY-SINGLE-FLOAT
)
84 (:TEMPORARY
(:SC XMM-REG
) X0
)
85 (:TEMPORARY
(:SC XMM-REG
) X1
)
88 (inst movdqu x0
(vect-ea src1
))
89 (inst movdqu x1
(vect-ea src2
))
91 (inst movdqu
(vect-ea dest
) x0
)))
93 (DEFINE-VOP (%sse-vect-len
/single-float
)
95 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
96 (SRC1 :SCS
(DESCRIPTOR-REG)))
97 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT
)
99 (:TEMPORARY
(:SC XMM-REG
) X0
)
100 (:TEMPORARY
(:SC XMM-REG
) X1
)
104 (inst movdqu x1
(vect-ea src1
))
105 (inst mulps x1 x1
) ;; ^2
107 (inst movdqa x0 x1
) ;; +
109 (inst psrldq-ib x1
4) ;; >> 4
110 (inst addss x0 x1
) ;; +
112 (inst psrldq-ib x1
4) ;; ..
115 (inst psrldq-ib x1
4)
116 (inst addss x0 x1
) ;; here we have added up all single-floats
118 (inst sqrtss x1 x0
) ;; sqrt
120 (inst movss
(vect-ea dest
) x1
) ;; store scalar single-float
123 (DEFINE-VOP (%sse-vect-scalar-mul
/single-float
)
125 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
126 (SRC1 :SCS
(DESCRIPTOR-REG))
127 (SCALAR :SCS
(DESCRIPTOR-REG)))
128 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT
)
130 (:TEMPORARY
(:SC XMM-REG
) X0
)
131 (:TEMPORARY
(:SC XMM-REG
) X1
)
132 (:TEMPORARY
(:SC XMM-REG
) X2
)
136 (inst movdqu x0
(vect-ea src1
))
137 (inst movss x1
(vect-ea scalar
))
139 ;; load scalar to all slots
141 (inst pslldq-ib x1
4)
143 (inst pslldq-ib x1
4)
145 (inst pslldq-ib x1
4)
148 ;; mul vector with scalar-vector
152 (inst movdqu
(vect-ea dest
) x0
)
155 (DEFINE-VOP (%sse-vect-normalize
/single-float
)
157 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
158 (SRC1 :SCS
(DESCRIPTOR-REG)))
159 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT
)
161 (:TEMPORARY
(:SC XMM-REG
) X0
)
162 (:TEMPORARY
(:SC XMM-REG
) X1
)
163 (:TEMPORARY
(:SC XMM-REG
) X2
)
167 (inst movdqu x1
(vect-ea src1
))
170 ;; calculate x0 <- 1 / sqrt( x^2 + y^2 + z^2 + w^2 )
171 (inst mulps x1 x1
) ;; ^2
173 ;; copy x1 to x0, then rotate/add
176 (inst shufps x1 x1
#b10010011
) ;; rotate
177 (inst addps x0 x1
) ;; +
179 (inst shufps x1 x1
#b10010011
) ;; rotate
180 (inst addps x0 x1
) ;; +
182 (inst shufps x1 x1
#b10010011
) ;; rotate
183 (inst addps x0 x1
) ;; +
185 (inst rsqrtps x1 x0
) ;; 1 / sqrt
187 (inst mulps x2 x1
) ;; vect = vect * (1 / sqrt(len))
189 (inst movdqu
(vect-ea dest
) x2
) ;; store normalized vector
192 (DEFINE-VOP (%sse-vect-dot
/single-float
)
194 (:ARGS
(DEST :SCS
(DESCRIPTOR-REG))
195 (SRC1 :SCS
(DESCRIPTOR-REG))
196 (SRC2 :SCS
(DESCRIPTOR-REG)))
199 SIMPLE-ARRAY-SINGLE-FLOAT
200 SIMPLE-ARRAY-SINGLE-FLOAT
201 SIMPLE-ARRAY-SINGLE-FLOAT
)
203 (:TEMPORARY
(:SC XMM-REG
) X0
)
204 (:TEMPORARY
(:SC XMM-REG
) X1
)
207 (inst movdqu x0
(vect-ea src1
))
208 (inst movdqu x1
(vect-ea src2
))
210 (inst mulps x1 x0
) ;; a_n * b_n
212 (inst movdqa x0 x1
) ;;
214 (inst psrldq-ib x1
4) ;; >> 4
215 (inst addss x0 x1
) ;; +
217 (inst psrldq-ib x1
4) ;; ..
220 (inst psrldq-ib x1
4)
221 (inst addss x0 x1
) ;; here we have added up all single-floats
223 (inst movss
(vect-ea dest
) x0
) ;; store scalar single-float