1 //-------------------------------------------------------------------------------------
2 // DirectXMathMatrix.inl -- SIMD C++ Math library
4 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
5 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // Copyright (c) Microsoft Corporation. All rights reserved.
11 // http://go.microsoft.com/fwlink/?LinkID=615560
12 //-------------------------------------------------------------------------------------
16 /****************************************************************************
20 ****************************************************************************/
22 //------------------------------------------------------------------------------
23 // Comparison operations
24 //------------------------------------------------------------------------------
26 //------------------------------------------------------------------------------
28 // Return true if any entry in the matrix is NaN
29 inline bool XM_CALLCONV XMMatrixIsNaN
34 #if defined(_XM_NO_INTRINSICS_)
36 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
38 // Fetch value into integer unit
39 uint32_t uTest = pWork[0];
42 // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
44 if (uTest<0x007FFFFFU) {
47 ++pWork; // Next entry
49 return (i!=0); // i == 0 if nothing matched
50 #elif defined(_XM_ARM_NEON_INTRINSICS_)
56 // Test themselves to check for NaN
57 vX = vmvnq_u32(vceqq_f32(vX, vX));
58 vY = vmvnq_u32(vceqq_f32(vY, vY));
59 vZ = vmvnq_u32(vceqq_f32(vZ, vZ));
60 vW = vmvnq_u32(vceqq_f32(vW, vW));
62 vX = vorrq_u32(vX,vZ);
63 vY = vorrq_u32(vY,vW);
64 vX = vorrq_u32(vX,vY);
65 // If any tested true, return true
66 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX));
67 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
68 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
70 #elif defined(_XM_SSE_INTRINSICS_)
76 // Test themselves to check for NaN
77 vX = _mm_cmpneq_ps(vX,vX);
78 vY = _mm_cmpneq_ps(vY,vY);
79 vZ = _mm_cmpneq_ps(vZ,vZ);
80 vW = _mm_cmpneq_ps(vW,vW);
82 vX = _mm_or_ps(vX,vZ);
83 vY = _mm_or_ps(vY,vW);
84 vX = _mm_or_ps(vX,vY);
85 // If any tested true, return true
86 return (_mm_movemask_ps(vX)!=0);
91 //------------------------------------------------------------------------------
93 // Return true if any entry in the matrix is +/-INF
94 inline bool XM_CALLCONV XMMatrixIsInfinite
99 #if defined(_XM_NO_INTRINSICS_)
101 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
103 // Fetch value into integer unit
104 uint32_t uTest = pWork[0];
106 uTest &= 0x7FFFFFFFU;
108 if (uTest==0x7F800000U) {
111 ++pWork; // Next entry
113 return (i!=0); // i == 0 if nothing matched
114 #elif defined(_XM_ARM_NEON_INTRINSICS_)
115 // Mask off the sign bits
116 XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask);
117 XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask);
118 XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask);
119 XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask);
120 // Compare to infinity
121 vTemp1 = vceqq_f32(vTemp1,g_XMInfinity);
122 vTemp2 = vceqq_f32(vTemp2,g_XMInfinity);
123 vTemp3 = vceqq_f32(vTemp3,g_XMInfinity);
124 vTemp4 = vceqq_f32(vTemp4,g_XMInfinity);
125 // Or the answers together
126 vTemp1 = vorrq_u32(vTemp1,vTemp2);
127 vTemp3 = vorrq_u32(vTemp3,vTemp4);
128 vTemp1 = vorrq_u32(vTemp1,vTemp3);
129 // If any are infinity, the signs are true.
130 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
131 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
132 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
134 #elif defined(_XM_SSE_INTRINSICS_)
135 // Mask off the sign bits
136 XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
137 XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
138 XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
139 XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
140 // Compare to infinity
141 vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
142 vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
143 vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
144 vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
145 // Or the answers together
146 vTemp1 = _mm_or_ps(vTemp1,vTemp2);
147 vTemp3 = _mm_or_ps(vTemp3,vTemp4);
148 vTemp1 = _mm_or_ps(vTemp1,vTemp3);
149 // If any are infinity, the signs are true.
150 return (_mm_movemask_ps(vTemp1)!=0);
154 //------------------------------------------------------------------------------
156 // Return true if the XMMatrix is equal to identity
157 inline bool XM_CALLCONV XMMatrixIsIdentity
162 #if defined(_XM_NO_INTRINSICS_)
163 // Use the integer pipeline to reduce branching to a minimum
164 const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]);
165 // Convert 1.0f to zero and or them together
166 uint32_t uOne = pWork[0]^0x3F800000U;
167 // Or all the 0.0f entries together
168 uint32_t uZero = pWork[1];
173 uOne |= pWork[5]^0x3F800000U;
179 uOne |= pWork[10]^0x3F800000U;
185 uOne |= pWork[15]^0x3F800000U;
186 // If all zero entries are zero, the uZero==0
187 uZero &= 0x7FFFFFFF; // Allow -0.0f
188 // If all 1.0f entries are 1.0f, then uOne==0
191 #elif defined(_XM_ARM_NEON_INTRINSICS_)
192 XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0);
193 XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1);
194 XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2);
195 XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3);
196 vTemp1 = vandq_u32(vTemp1,vTemp2);
197 vTemp3 = vandq_u32(vTemp3,vTemp4);
198 vTemp1 = vandq_u32(vTemp1,vTemp3);
199 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
200 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
201 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
202 return ( r == 0xFFFFFFFFU );
203 #elif defined(_XM_SSE_INTRINSICS_)
204 XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
205 XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
206 XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
207 XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
208 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
209 vTemp3 = _mm_and_ps(vTemp3,vTemp4);
210 vTemp1 = _mm_and_ps(vTemp1,vTemp3);
211 return (_mm_movemask_ps(vTemp1)==0x0f);
215 //------------------------------------------------------------------------------
216 // Computation operations
217 //------------------------------------------------------------------------------
219 //------------------------------------------------------------------------------
220 // Perform a 4x4 matrix multiply by a 4x4 matrix
221 inline XMMATRIX XM_CALLCONV XMMatrixMultiply
227 #if defined(_XM_NO_INTRINSICS_)
229 // Cache the invariants in registers
230 float x = M1.m[0][0];
231 float y = M1.m[0][1];
232 float z = M1.m[0][2];
233 float w = M1.m[0][3];
234 // Perform the operation on the first row
235 mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
236 mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
237 mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
238 mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
239 // Repeat for all the other rows
244 mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
245 mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
246 mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
247 mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
252 mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
253 mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
254 mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
255 mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
260 mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
261 mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
262 mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
263 mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
265 #elif defined(_XM_ARM_NEON_INTRINSICS_)
267 float32x2_t VL = vget_low_f32( M1.r[0] );
268 float32x2_t VH = vget_high_f32( M1.r[0] );
269 // Perform the operation on the first row
270 XMVECTOR vX = vmulq_lane_f32(M2.r[0], VL, 0);
271 XMVECTOR vY = vmulq_lane_f32(M2.r[1], VL, 1);
272 XMVECTOR vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
273 XMVECTOR vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
274 mResult.r[0] = vaddq_f32( vZ, vW );
275 // Repeat for the other 3 rows
276 VL = vget_low_f32( M1.r[1] );
277 VH = vget_high_f32( M1.r[1] );
278 vX = vmulq_lane_f32(M2.r[0], VL, 0);
279 vY = vmulq_lane_f32(M2.r[1], VL, 1);
280 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
281 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
282 mResult.r[1] = vaddq_f32( vZ, vW );
283 VL = vget_low_f32( M1.r[2] );
284 VH = vget_high_f32( M1.r[2] );
285 vX = vmulq_lane_f32(M2.r[0], VL, 0);
286 vY = vmulq_lane_f32(M2.r[1], VL, 1);
287 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
288 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
289 mResult.r[2] = vaddq_f32( vZ, vW );
290 VL = vget_low_f32( M1.r[3] );
291 VH = vget_high_f32( M1.r[3] );
292 vX = vmulq_lane_f32(M2.r[0], VL, 0);
293 vY = vmulq_lane_f32(M2.r[1], VL, 1);
294 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
295 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
296 mResult.r[3] = vaddq_f32( vZ, vW );
298 #elif defined(_XM_SSE_INTRINSICS_)
300 // Splat the component X,Y,Z then W
301 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
302 XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
303 XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
304 XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
305 XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
307 // Use vW to hold the original row
308 XMVECTOR vW = M1.r[0];
309 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
310 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
311 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
312 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
314 // Perform the operation on the first row
315 vX = _mm_mul_ps(vX,M2.r[0]);
316 vY = _mm_mul_ps(vY,M2.r[1]);
317 vZ = _mm_mul_ps(vZ,M2.r[2]);
318 vW = _mm_mul_ps(vW,M2.r[3]);
319 // Perform a binary add to reduce cumulative errors
320 vX = _mm_add_ps(vX,vZ);
321 vY = _mm_add_ps(vY,vW);
322 vX = _mm_add_ps(vX,vY);
324 // Repeat for the other 3 rows
325 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
326 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
327 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
328 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
329 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
332 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
333 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
334 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
335 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
337 vX = _mm_mul_ps(vX,M2.r[0]);
338 vY = _mm_mul_ps(vY,M2.r[1]);
339 vZ = _mm_mul_ps(vZ,M2.r[2]);
340 vW = _mm_mul_ps(vW,M2.r[3]);
341 vX = _mm_add_ps(vX,vZ);
342 vY = _mm_add_ps(vY,vW);
343 vX = _mm_add_ps(vX,vY);
345 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
346 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
347 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
348 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
349 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
352 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
353 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
354 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
355 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
357 vX = _mm_mul_ps(vX,M2.r[0]);
358 vY = _mm_mul_ps(vY,M2.r[1]);
359 vZ = _mm_mul_ps(vZ,M2.r[2]);
360 vW = _mm_mul_ps(vW,M2.r[3]);
361 vX = _mm_add_ps(vX,vZ);
362 vY = _mm_add_ps(vY,vW);
363 vX = _mm_add_ps(vX,vY);
365 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
366 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
367 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
368 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
369 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
372 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
373 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
374 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
375 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
377 vX = _mm_mul_ps(vX,M2.r[0]);
378 vY = _mm_mul_ps(vY,M2.r[1]);
379 vZ = _mm_mul_ps(vZ,M2.r[2]);
380 vW = _mm_mul_ps(vW,M2.r[3]);
381 vX = _mm_add_ps(vX,vZ);
382 vY = _mm_add_ps(vY,vW);
383 vX = _mm_add_ps(vX,vY);
389 //------------------------------------------------------------------------------
391 inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
397 #if defined(_XM_NO_INTRINSICS_)
399 // Cache the invariants in registers
400 float x = M2.m[0][0];
401 float y = M2.m[1][0];
402 float z = M2.m[2][0];
403 float w = M2.m[3][0];
404 // Perform the operation on the first row
405 mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
406 mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
407 mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
408 mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
409 // Repeat for all the other rows
414 mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
415 mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
416 mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
417 mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
422 mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
423 mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
424 mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
425 mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
430 mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
431 mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
432 mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
433 mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
435 #elif defined(_XM_ARM_NEON_INTRINSICS_)
436 float32x2_t VL = vget_low_f32( M1.r[0] );
437 float32x2_t VH = vget_high_f32( M1.r[0] );
438 // Perform the operation on the first row
439 XMVECTOR vX = vmulq_lane_f32(M2.r[0], VL, 0);
440 XMVECTOR vY = vmulq_lane_f32(M2.r[1], VL, 1);
441 XMVECTOR vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
442 XMVECTOR vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
443 float32x4_t r0 = vaddq_f32( vZ, vW );
444 // Repeat for the other 3 rows
445 VL = vget_low_f32( M1.r[1] );
446 VH = vget_high_f32( M1.r[1] );
447 vX = vmulq_lane_f32(M2.r[0], VL, 0);
448 vY = vmulq_lane_f32(M2.r[1], VL, 1);
449 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
450 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
451 float32x4_t r1 = vaddq_f32( vZ, vW );
452 VL = vget_low_f32( M1.r[2] );
453 VH = vget_high_f32( M1.r[2] );
454 vX = vmulq_lane_f32(M2.r[0], VL, 0);
455 vY = vmulq_lane_f32(M2.r[1], VL, 1);
456 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
457 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
458 float32x4_t r2 = vaddq_f32( vZ, vW );
459 VL = vget_low_f32( M1.r[3] );
460 VH = vget_high_f32( M1.r[3] );
461 vX = vmulq_lane_f32(M2.r[0], VL, 0);
462 vY = vmulq_lane_f32(M2.r[1], VL, 1);
463 vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
464 vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
465 float32x4_t r3 = vaddq_f32( vZ, vW );
468 float32x4x2_t P0 = vzipq_f32( r0, r2 );
469 float32x4x2_t P1 = vzipq_f32( r1, r3 );
471 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
472 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
475 mResult.r[0] = T0.val[0];
476 mResult.r[1] = T0.val[1];
477 mResult.r[2] = T1.val[0];
478 mResult.r[3] = T1.val[1];
480 #elif defined(_XM_SSE_INTRINSICS_)
481 // Splat the component X,Y,Z then W
482 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
483 XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
484 XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
485 XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
486 XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
488 // Use vW to hold the original row
489 XMVECTOR vW = M1.r[0];
490 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
491 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
492 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
493 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
495 // Perform the operation on the first row
496 vX = _mm_mul_ps(vX,M2.r[0]);
497 vY = _mm_mul_ps(vY,M2.r[1]);
498 vZ = _mm_mul_ps(vZ,M2.r[2]);
499 vW = _mm_mul_ps(vW,M2.r[3]);
500 // Perform a binary add to reduce cumulative errors
501 vX = _mm_add_ps(vX,vZ);
502 vY = _mm_add_ps(vY,vW);
503 vX = _mm_add_ps(vX,vY);
505 // Repeat for the other 3 rows
506 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
507 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
508 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
509 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
510 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
513 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
514 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
515 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
516 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
518 vX = _mm_mul_ps(vX,M2.r[0]);
519 vY = _mm_mul_ps(vY,M2.r[1]);
520 vZ = _mm_mul_ps(vZ,M2.r[2]);
521 vW = _mm_mul_ps(vW,M2.r[3]);
522 vX = _mm_add_ps(vX,vZ);
523 vY = _mm_add_ps(vY,vW);
524 vX = _mm_add_ps(vX,vY);
526 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
527 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
528 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
529 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
530 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
533 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
534 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
535 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
536 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
538 vX = _mm_mul_ps(vX,M2.r[0]);
539 vY = _mm_mul_ps(vY,M2.r[1]);
540 vZ = _mm_mul_ps(vZ,M2.r[2]);
541 vW = _mm_mul_ps(vW,M2.r[3]);
542 vX = _mm_add_ps(vX,vZ);
543 vY = _mm_add_ps(vY,vW);
544 vX = _mm_add_ps(vX,vY);
546 #if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
547 vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
548 vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
549 vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
550 vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
553 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
554 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
555 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
556 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
558 vX = _mm_mul_ps(vX,M2.r[0]);
559 vY = _mm_mul_ps(vY,M2.r[1]);
560 vZ = _mm_mul_ps(vZ,M2.r[2]);
561 vW = _mm_mul_ps(vW,M2.r[3]);
562 vX = _mm_add_ps(vX,vZ);
563 vY = _mm_add_ps(vY,vW);
564 vX = _mm_add_ps(vX,vY);
568 XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
570 XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
572 XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
574 XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
578 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
580 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
582 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
584 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
589 //------------------------------------------------------------------------------
591 inline XMMATRIX XM_CALLCONV XMMatrixTranspose
596 #if defined(_XM_NO_INTRINSICS_)
606 P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
607 P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
608 P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
609 P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
612 MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
613 MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
614 MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
615 MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
618 #elif defined(_XM_ARM_NEON_INTRINSICS_)
619 float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
620 float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
622 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
623 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
626 mResult.r[0] = T0.val[0];
627 mResult.r[1] = T0.val[1];
628 mResult.r[2] = T1.val[0];
629 mResult.r[3] = T1.val[1];
631 #elif defined(_XM_SSE_INTRINSICS_)
633 XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
635 XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
637 XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
639 XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
643 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
645 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
647 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
649 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
654 //------------------------------------------------------------------------------
655 // Return the inverse and the determinant of a 4x4 matrix
656 _Use_decl_annotations_
657 inline XMMATRIX XM_CALLCONV XMMatrixInverse
659 XMVECTOR* pDeterminant,
663 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
665 XMMATRIX MT = XMMatrixTranspose(M);
667 XMVECTOR V0[4], V1[4];
668 V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
669 V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
670 V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
671 V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
672 V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
673 V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
675 XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
676 XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
677 XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
679 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
680 V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
681 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
682 V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
683 V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
684 V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
686 D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
687 D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
688 D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
690 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
691 V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
692 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
693 V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
694 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
695 V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
696 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
697 V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
699 XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
700 XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
701 XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
702 XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
704 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
705 V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
706 V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
707 V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
708 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
709 V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
710 V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
711 V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
713 C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
714 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
715 C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
716 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
718 V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
719 V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
720 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
721 V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
722 V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
723 V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
724 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
725 V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
727 XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
728 C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
729 XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
730 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
731 XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
732 C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
733 XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
734 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
737 R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
738 R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
739 R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
740 R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
742 XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
744 if (pDeterminant != nullptr)
745 *pDeterminant = Determinant;
747 XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
750 Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
751 Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
752 Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
753 Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
756 #elif defined(_XM_SSE_INTRINSICS_)
757 XMMATRIX MT = XMMatrixTranspose(M);
758 XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
759 XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
760 XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
761 XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
762 XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
763 XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
765 XMVECTOR D0 = _mm_mul_ps(V00,V10);
766 XMVECTOR D1 = _mm_mul_ps(V01,V11);
767 XMVECTOR D2 = _mm_mul_ps(V02,V12);
769 V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
770 V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
771 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
772 V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
773 V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
774 V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
776 V00 = _mm_mul_ps(V00,V10);
777 V01 = _mm_mul_ps(V01,V11);
778 V02 = _mm_mul_ps(V02,V12);
779 D0 = _mm_sub_ps(D0,V00);
780 D1 = _mm_sub_ps(D1,V01);
781 D2 = _mm_sub_ps(D2,V02);
782 // V11 = D0Y,D0W,D2Y,D2Y
783 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
784 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
785 V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
786 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
787 V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
788 // V13 = D1Y,D1W,D2W,D2W
789 XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
790 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
791 V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
792 XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
793 V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
795 XMVECTOR C0 = _mm_mul_ps(V00,V10);
796 XMVECTOR C2 = _mm_mul_ps(V01,V11);
797 XMVECTOR C4 = _mm_mul_ps(V02,V12);
798 XMVECTOR C6 = _mm_mul_ps(V03,V13);
800 // V11 = D0X,D0Y,D2X,D2X
801 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
802 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
803 V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
804 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
805 V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
806 // V13 = D1X,D1Y,D2Z,D2Z
807 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
808 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
809 V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
810 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
811 V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
813 V00 = _mm_mul_ps(V00,V10);
814 V01 = _mm_mul_ps(V01,V11);
815 V02 = _mm_mul_ps(V02,V12);
816 V03 = _mm_mul_ps(V03,V13);
817 C0 = _mm_sub_ps(C0,V00);
818 C2 = _mm_sub_ps(C2,V01);
819 C4 = _mm_sub_ps(C4,V02);
820 C6 = _mm_sub_ps(C6,V03);
822 V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
823 // V10 = D0Z,D0Z,D2X,D2Y
824 V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
825 V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
826 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
827 // V11 = D0X,D0W,D2X,D2Y
828 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
829 V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
830 V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
831 // V12 = D1Z,D1Z,D2Z,D2W
832 V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
833 V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
834 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
835 // V13 = D1X,D1W,D2Z,D2W
836 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
837 V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
839 V00 = _mm_mul_ps(V00,V10);
840 V01 = _mm_mul_ps(V01,V11);
841 V02 = _mm_mul_ps(V02,V12);
842 V03 = _mm_mul_ps(V03,V13);
843 XMVECTOR C1 = _mm_sub_ps(C0,V00);
844 C0 = _mm_add_ps(C0,V00);
845 XMVECTOR C3 = _mm_add_ps(C2,V01);
846 C2 = _mm_sub_ps(C2,V01);
847 XMVECTOR C5 = _mm_sub_ps(C4,V02);
848 C4 = _mm_add_ps(C4,V02);
849 XMVECTOR C7 = _mm_add_ps(C6,V03);
850 C6 = _mm_sub_ps(C6,V03);
852 C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
853 C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
854 C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
855 C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
856 C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
857 C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
858 C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
859 C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
860 // Get the determinate
861 XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
862 if (pDeterminant != nullptr)
863 *pDeterminant = vTemp;
864 vTemp = _mm_div_ps(g_XMOne,vTemp);
866 mResult.r[0] = _mm_mul_ps(C0,vTemp);
867 mResult.r[1] = _mm_mul_ps(C2,vTemp);
868 mResult.r[2] = _mm_mul_ps(C4,vTemp);
869 mResult.r[3] = _mm_mul_ps(C6,vTemp);
874 //------------------------------------------------------------------------------
876 inline XMVECTOR XM_CALLCONV XMMatrixDeterminant
881 static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
883 XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
884 XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
885 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
886 XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
887 XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
888 XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
890 XMVECTOR P0 = XMVectorMultiply(V0, V1);
891 XMVECTOR P1 = XMVectorMultiply(V2, V3);
892 XMVECTOR P2 = XMVectorMultiply(V4, V5);
894 V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
895 V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
896 V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
897 V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
898 V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
899 V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
901 P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
902 P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
903 P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
905 V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
906 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
907 V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
909 XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
910 XMVECTOR R = XMVectorMultiply(V0, P0);
911 R = XMVectorNegativeMultiplySubtract(V1, P1, R);
912 R = XMVectorMultiplyAdd(V2, P2, R);
914 return XMVector4Dot(S, R);
917 #define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
967 #define XM3_DECOMP_EPSILON 0.0001f
969 _Use_decl_annotations_
970 inline bool XM_CALLCONV XMMatrixDecompose
973 XMVECTOR *outRotQuat,
978 static const XMVECTOR *pvCanonicalBasis[3] = {
984 assert( outScale != nullptr );
985 assert( outRotQuat != nullptr );
986 assert( outTrans != nullptr );
988 // Get the translation
989 outTrans[0] = M.r[3];
991 XMVECTOR *ppvBasis[3];
993 ppvBasis[0] = &matTemp.r[0];
994 ppvBasis[1] = &matTemp.r[1];
995 ppvBasis[2] = &matTemp.r[2];
997 matTemp.r[0] = M.r[0];
998 matTemp.r[1] = M.r[1];
999 matTemp.r[2] = M.r[2];
1000 matTemp.r[3] = g_XMIdentityR3.v;
1002 float *pfScales = (float *)outScale;
1005 XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
1006 XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
1007 XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
1010 XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
1012 if(pfScales[a] < XM3_DECOMP_EPSILON)
1014 ppvBasis[a][0] = pvCanonicalBasis[a][0];
1016 ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
1018 if(pfScales[b] < XM3_DECOMP_EPSILON)
1021 float fAbsX, fAbsY, fAbsZ;
1023 fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
1024 fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
1025 fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
1027 XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
1029 ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
1032 ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
1034 if(pfScales[c] < XM3_DECOMP_EPSILON)
1036 ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
1039 ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
1041 float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
1043 // use Kramer's rule to check for handedness of coordinate system
1046 // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
1047 pfScales[a] = -pfScales[a];
1048 ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
1056 if(XM3_DECOMP_EPSILON < fDet)
1058 // Non-SRT matrix encountered
1062 // generate the quaternion from the matrix
1063 outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
1067 #undef XM3_DECOMP_EPSILON
1068 #undef XM3RANKDECOMPOSE
1070 //------------------------------------------------------------------------------
1071 // Transformation operations
1072 //------------------------------------------------------------------------------
1074 //------------------------------------------------------------------------------
1076 inline XMMATRIX XM_CALLCONV XMMatrixIdentity()
1079 M.r[0] = g_XMIdentityR0.v;
1080 M.r[1] = g_XMIdentityR1.v;
1081 M.r[2] = g_XMIdentityR2.v;
1082 M.r[3] = g_XMIdentityR3.v;
1086 //------------------------------------------------------------------------------
1088 inline XMMATRIX XM_CALLCONV XMMatrixSet
1090 float m00, float m01, float m02, float m03,
1091 float m10, float m11, float m12, float m13,
1092 float m20, float m21, float m22, float m23,
1093 float m30, float m31, float m32, float m33
1097 #if defined(_XM_NO_INTRINSICS_)
1098 M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
1099 M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
1100 M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
1101 M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
1103 M.r[0] = XMVectorSet(m00, m01, m02, m03);
1104 M.r[1] = XMVectorSet(m10, m11, m12, m13);
1105 M.r[2] = XMVectorSet(m20, m21, m22, m23);
1106 M.r[3] = XMVectorSet(m30, m31, m32, m33);
1111 //------------------------------------------------------------------------------
1113 inline XMMATRIX XM_CALLCONV XMMatrixTranslation
1120 #if defined(_XM_NO_INTRINSICS_)
1138 M.m[3][0] = OffsetX;
1139 M.m[3][1] = OffsetY;
1140 M.m[3][2] = OffsetZ;
1144 #elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1146 M.r[0] = g_XMIdentityR0.v;
1147 M.r[1] = g_XMIdentityR1.v;
1148 M.r[2] = g_XMIdentityR2.v;
1149 M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
1155 //------------------------------------------------------------------------------
1157 inline XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector
1162 #if defined(_XM_NO_INTRINSICS_)
1180 M.m[3][0] = Offset.vector4_f32[0];
1181 M.m[3][1] = Offset.vector4_f32[1];
1182 M.m[3][2] = Offset.vector4_f32[2];
1186 #elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1188 M.r[0] = g_XMIdentityR0.v;
1189 M.r[1] = g_XMIdentityR1.v;
1190 M.r[2] = g_XMIdentityR2.v;
1191 M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
1196 //------------------------------------------------------------------------------
1198 inline XMMATRIX XM_CALLCONV XMMatrixScaling
1205 #if defined(_XM_NO_INTRINSICS_)
1229 #elif defined(_XM_ARM_NEON_INTRINSICS_)
1230 const XMVECTOR Zero = vdupq_n_f32(0);
1232 M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 );
1233 M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 );
1234 M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 );
1235 M.r[3] = g_XMIdentityR3.v;
1237 #elif defined(_XM_SSE_INTRINSICS_)
1239 M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
1240 M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
1241 M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
1242 M.r[3] = g_XMIdentityR3.v;
1247 //------------------------------------------------------------------------------
1249 inline XMMATRIX XM_CALLCONV XMMatrixScalingFromVector
1254 #if defined(_XM_NO_INTRINSICS_)
1257 M.m[0][0] = Scale.vector4_f32[0];
1263 M.m[1][1] = Scale.vector4_f32[1];
1269 M.m[2][2] = Scale.vector4_f32[2];
1278 #elif defined(_XM_ARM_NEON_INTRINSICS_)
1280 M.r[0] = vandq_u32(Scale,g_XMMaskX);
1281 M.r[1] = vandq_u32(Scale,g_XMMaskY);
1282 M.r[2] = vandq_u32(Scale,g_XMMaskZ);
1283 M.r[3] = g_XMIdentityR3.v;
1285 #elif defined(_XM_SSE_INTRINSICS_)
1287 M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
1288 M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
1289 M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
1290 M.r[3] = g_XMIdentityR3.v;
1295 //------------------------------------------------------------------------------
1297 inline XMMATRIX XM_CALLCONV XMMatrixRotationX
1302 #if defined(_XM_NO_INTRINSICS_)
1306 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1315 M.m[1][1] = fCosAngle;
1316 M.m[1][2] = fSinAngle;
1320 M.m[2][1] = -fSinAngle;
1321 M.m[2][2] = fCosAngle;
1330 #elif defined(_XM_ARM_NEON_INTRINSICS_)
1333 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1335 const XMVECTOR Zero = vdupq_n_f32(0);
1337 XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 );
1338 T1 = vsetq_lane_f32( fSinAngle, T1, 2 );
1340 XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 );
1341 T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
1344 M.r[0] = g_XMIdentityR0.v;
1347 M.r[3] = g_XMIdentityR3.v;
1349 #elif defined(_XM_SSE_INTRINSICS_)
1352 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1354 XMVECTOR vSin = _mm_set_ss(SinAngle);
1355 XMVECTOR vCos = _mm_set_ss(CosAngle);
1356 // x = 0,y = cos,z = sin, w = 0
1357 vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
1359 M.r[0] = g_XMIdentityR0;
1361 // x = 0,y = sin,z = cos, w = 0
1362 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
1363 // x = 0,y = -sin,z = cos, w = 0
1364 vCos = _mm_mul_ps(vCos,g_XMNegateY);
1366 M.r[3] = g_XMIdentityR3;
1371 //------------------------------------------------------------------------------
1373 inline XMMATRIX XM_CALLCONV XMMatrixRotationY
1378 #if defined(_XM_NO_INTRINSICS_)
1382 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1385 M.m[0][0] = fCosAngle;
1387 M.m[0][2] = -fSinAngle;
1395 M.m[2][0] = fSinAngle;
1397 M.m[2][2] = fCosAngle;
1406 #elif defined(_XM_ARM_NEON_INTRINSICS_)
1409 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1411 const XMVECTOR Zero = vdupq_n_f32(0);
1413 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
1414 T0 = vsetq_lane_f32( -fSinAngle, T0, 2 );
1416 XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 );
1417 T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
1421 M.r[1] = g_XMIdentityR1.v;
1423 M.r[3] = g_XMIdentityR3.v;
1425 #elif defined(_XM_SSE_INTRINSICS_)
1428 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1430 XMVECTOR vSin = _mm_set_ss(SinAngle);
1431 XMVECTOR vCos = _mm_set_ss(CosAngle);
1432 // x = sin,y = 0,z = cos, w = 0
1433 vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
1436 M.r[1] = g_XMIdentityR1;
1437 // x = cos,y = 0,z = sin, w = 0
1438 vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
1439 // x = cos,y = 0,z = -sin, w = 0
1440 vSin = _mm_mul_ps(vSin,g_XMNegateZ);
1442 M.r[3] = g_XMIdentityR3;
1447 //------------------------------------------------------------------------------
1449 inline XMMATRIX XM_CALLCONV XMMatrixRotationZ
1454 #if defined(_XM_NO_INTRINSICS_)
1458 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1461 M.m[0][0] = fCosAngle;
1462 M.m[0][1] = fSinAngle;
1466 M.m[1][0] = -fSinAngle;
1467 M.m[1][1] = fCosAngle;
1482 #elif defined(_XM_ARM_NEON_INTRINSICS_)
1485 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1487 const XMVECTOR Zero = vdupq_n_f32(0);
1489 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
1490 T0 = vsetq_lane_f32( fSinAngle, T0, 1 );
1492 XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 );
1493 T1 = vsetq_lane_f32( fCosAngle, T1, 1 );
1498 M.r[2] = g_XMIdentityR2.v;
1499 M.r[3] = g_XMIdentityR3.v;
1501 #elif defined(_XM_SSE_INTRINSICS_)
1504 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1506 XMVECTOR vSin = _mm_set_ss(SinAngle);
1507 XMVECTOR vCos = _mm_set_ss(CosAngle);
1508 // x = cos,y = sin,z = 0, w = 0
1509 vCos = _mm_unpacklo_ps(vCos,vSin);
1512 // x = sin,y = cos,z = 0, w = 0
1513 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
1514 // x = cos,y = -sin,z = 0, w = 0
1515 vCos = _mm_mul_ps(vCos,g_XMNegateX);
1517 M.r[2] = g_XMIdentityR2;
1518 M.r[3] = g_XMIdentityR3;
1523 //------------------------------------------------------------------------------
1525 inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw
1532 XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
1533 return XMMatrixRotationRollPitchYawFromVector(Angles);
1536 //------------------------------------------------------------------------------
1538 inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector
1540 FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
1543 XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
1544 return XMMatrixRotationQuaternion(Q);
1547 //------------------------------------------------------------------------------
1549 inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal
1551 FXMVECTOR NormalAxis,
1555 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1559 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1561 XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
1563 XMVECTOR C2 = XMVectorSplatZ(A);
1564 XMVECTOR C1 = XMVectorSplatY(A);
1565 XMVECTOR C0 = XMVectorSplatX(A);
1567 XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
1568 XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
1570 XMVECTOR V0 = XMVectorMultiply(C2, N0);
1571 V0 = XMVectorMultiply(V0, N1);
1573 XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
1574 R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
1576 XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
1577 XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
1579 V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
1580 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
1581 XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
1584 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
1585 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
1586 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
1587 M.r[3] = g_XMIdentityR3.v;
1590 #elif defined(_XM_SSE_INTRINSICS_)
1593 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1595 XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
1596 XMVECTOR C1 = _mm_set_ps1(fCosAngle);
1597 XMVECTOR C0 = _mm_set_ps1(fSinAngle);
1599 XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
1600 XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
1602 XMVECTOR V0 = _mm_mul_ps(C2, N0);
1603 V0 = _mm_mul_ps(V0, N1);
1605 XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
1606 R0 = _mm_mul_ps(R0, NormalAxis);
1607 R0 = _mm_add_ps(R0, C1);
1609 XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
1610 R1 = _mm_add_ps(R1, V0);
1611 XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
1612 R2 = _mm_sub_ps(V0,R2);
1614 V0 = _mm_and_ps(R0,g_XMMask3);
1615 XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
1616 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
1617 XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
1618 V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
1620 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
1621 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
1626 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
1627 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
1630 V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
1632 M.r[3] = g_XMIdentityR3.v;
1637 //------------------------------------------------------------------------------
1639 inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis
1645 assert(!XMVector3Equal(Axis, XMVectorZero()));
1646 assert(!XMVector3IsInfinite(Axis));
1648 XMVECTOR Normal = XMVector3Normalize(Axis);
1649 return XMMatrixRotationNormal(Normal, Angle);
1652 //------------------------------------------------------------------------------
1654 inline XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion
1656 FXMVECTOR Quaternion
1659 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1661 static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
1663 XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
1664 XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
1666 XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
1667 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
1668 XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
1669 R0 = XMVectorSubtract(R0, V1);
1671 V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
1672 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
1673 V0 = XMVectorMultiply(V0, V1);
1675 V1 = XMVectorSplatW(Quaternion);
1676 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
1677 V1 = XMVectorMultiply(V1, V2);
1679 XMVECTOR R1 = XMVectorAdd(V0, V1);
1680 XMVECTOR R2 = XMVectorSubtract(V0, V1);
1682 V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
1683 V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
1686 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
1687 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
1688 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
1689 M.r[3] = g_XMIdentityR3.v;
1692 #elif defined(_XM_SSE_INTRINSICS_)
1693 static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
1695 XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
1696 XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
1698 XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
1699 V0 = _mm_and_ps(V0,g_XMMask3);
1700 XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
1701 V1 = _mm_and_ps(V1,g_XMMask3);
1702 XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
1703 R0 = _mm_sub_ps(R0, V1);
1705 V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
1706 V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
1707 V0 = _mm_mul_ps(V0, V1);
1709 V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
1710 XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
1711 V1 = _mm_mul_ps(V1, V2);
1713 XMVECTOR R1 = _mm_add_ps(V0, V1);
1714 XMVECTOR R2 = _mm_sub_ps(V0, V1);
1716 V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
1717 V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
1718 V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
1719 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
1721 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
1722 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
1727 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
1728 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
1731 Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
1733 M.r[3] = g_XMIdentityR3;
1738 //------------------------------------------------------------------------------
1740 inline XMMATRIX XM_CALLCONV XMMatrixTransformation2D
1742 FXMVECTOR ScalingOrigin,
1743 float ScalingOrientation,
1745 FXMVECTOR RotationOrigin,
1747 GXMVECTOR Translation
1750 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
1751 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1753 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
1754 XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
1756 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
1757 XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
1758 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
1759 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
1760 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
1761 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
1762 XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
1763 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
1765 XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
1766 M = XMMatrixMultiply(M, MScaling);
1767 M = XMMatrixMultiply(M, MScalingOrientation);
1768 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
1769 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1770 M = XMMatrixMultiply(M, MRotation);
1771 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1772 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1777 //------------------------------------------------------------------------------
1779 inline XMMATRIX XM_CALLCONV XMMatrixTransformation
1781 FXMVECTOR ScalingOrigin,
1782 FXMVECTOR ScalingOrientationQuaternion,
1784 GXMVECTOR RotationOrigin,
1785 HXMVECTOR RotationQuaternion,
1786 HXMVECTOR Translation
1789 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
1790 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1792 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
1793 XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
1795 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
1796 XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
1797 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
1798 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
1799 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
1800 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
1801 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
1804 M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
1805 M = XMMatrixMultiply(M, MScaling);
1806 M = XMMatrixMultiply(M, MScalingOrientation);
1807 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
1808 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1809 M = XMMatrixMultiply(M, MRotation);
1810 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1811 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1815 //------------------------------------------------------------------------------
1817 inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D
1820 FXMVECTOR RotationOrigin,
1822 FXMVECTOR Translation
1825 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1827 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
1828 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
1829 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
1830 XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
1831 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
1835 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1836 M = XMMatrixMultiply(M, MRotation);
1837 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1838 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1842 //------------------------------------------------------------------------------
1844 inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation
1847 FXMVECTOR RotationOrigin,
1848 FXMVECTOR RotationQuaternion,
1849 GXMVECTOR Translation
1852 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1854 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
1855 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
1856 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
1857 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
1861 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1862 M = XMMatrixMultiply(M, MRotation);
1863 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1864 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1868 //------------------------------------------------------------------------------
1870 inline XMMATRIX XM_CALLCONV XMMatrixReflect
1872 FXMVECTOR ReflectionPlane
1875 assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
1876 assert(!XMPlaneIsInfinite(ReflectionPlane));
1878 static const XMVECTORF32 NegativeTwo = { { { -2.0f, -2.0f, -2.0f, 0.0f } } };
1880 XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
1881 XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
1883 XMVECTOR A = XMVectorSplatX(P);
1884 XMVECTOR B = XMVectorSplatY(P);
1885 XMVECTOR C = XMVectorSplatZ(P);
1886 XMVECTOR D = XMVectorSplatW(P);
1889 M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
1890 M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
1891 M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
1892 M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
1896 //------------------------------------------------------------------------------
1898 inline XMMATRIX XM_CALLCONV XMMatrixShadow
1900 FXMVECTOR ShadowPlane,
1901 FXMVECTOR LightPosition
1904 static const XMVECTORU32 Select0001 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1 } } };
1906 assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
1907 assert(!XMPlaneIsInfinite(ShadowPlane));
1909 XMVECTOR P = XMPlaneNormalize(ShadowPlane);
1910 XMVECTOR Dot = XMPlaneDot(P, LightPosition);
1911 P = XMVectorNegate(P);
1912 XMVECTOR D = XMVectorSplatW(P);
1913 XMVECTOR C = XMVectorSplatZ(P);
1914 XMVECTOR B = XMVectorSplatY(P);
1915 XMVECTOR A = XMVectorSplatX(P);
1916 Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
1919 M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
1920 Dot = XMVectorRotateLeft(Dot, 1);
1921 M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
1922 Dot = XMVectorRotateLeft(Dot, 1);
1923 M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
1924 Dot = XMVectorRotateLeft(Dot, 1);
1925 M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
1929 //------------------------------------------------------------------------------
1930 // View and projection initialization operations
1931 //------------------------------------------------------------------------------
1933 inline XMMATRIX XM_CALLCONV XMMatrixLookAtLH
1935 FXMVECTOR EyePosition,
1936 FXMVECTOR FocusPosition,
1937 FXMVECTOR UpDirection
1940 XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
1941 return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
1944 //------------------------------------------------------------------------------
1946 inline XMMATRIX XM_CALLCONV XMMatrixLookAtRH
1948 FXMVECTOR EyePosition,
1949 FXMVECTOR FocusPosition,
1950 FXMVECTOR UpDirection
1953 XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
1954 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
1957 //------------------------------------------------------------------------------
1959 inline XMMATRIX XM_CALLCONV XMMatrixLookToLH
1961 FXMVECTOR EyePosition,
1962 FXMVECTOR EyeDirection,
1963 FXMVECTOR UpDirection
1966 assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
1967 assert(!XMVector3IsInfinite(EyeDirection));
1968 assert(!XMVector3Equal(UpDirection, XMVectorZero()));
1969 assert(!XMVector3IsInfinite(UpDirection));
1971 XMVECTOR R2 = XMVector3Normalize(EyeDirection);
1973 XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
1974 R0 = XMVector3Normalize(R0);
1976 XMVECTOR R1 = XMVector3Cross(R2, R0);
1978 XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
1980 XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
1981 XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
1982 XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
1985 M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
1986 M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
1987 M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
1988 M.r[3] = g_XMIdentityR3.v;
1990 M = XMMatrixTranspose(M);
1995 //------------------------------------------------------------------------------
1997 inline XMMATRIX XM_CALLCONV XMMatrixLookToRH
1999 FXMVECTOR EyePosition,
2000 FXMVECTOR EyeDirection,
2001 FXMVECTOR UpDirection
2004 XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
2005 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
2008 //------------------------------------------------------------------------------
2011 #pragma prefast(push)
2012 #pragma prefast(disable:28931, "PREfast noise: Esp:1266")
2015 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH
2023 assert(NearZ > 0.f && FarZ > 0.f);
2024 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2025 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2026 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2028 #if defined(_XM_NO_INTRINSICS_)
2030 float TwoNearZ = NearZ + NearZ;
2031 float fRange = FarZ / (FarZ - NearZ);
2034 M.m[0][0] = TwoNearZ / ViewWidth;
2040 M.m[1][1] = TwoNearZ / ViewHeight;
2051 M.m[3][2] = -fRange * NearZ;
2055 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2056 float TwoNearZ = NearZ + NearZ;
2057 float fRange = FarZ / (FarZ - NearZ);
2058 const XMVECTOR Zero = vdupq_n_f32(0);
2060 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
2061 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
2062 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
2063 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2065 #elif defined(_XM_SSE_INTRINSICS_)
2067 float TwoNearZ = NearZ + NearZ;
2068 float fRange = FarZ / (FarZ - NearZ);
2069 // Note: This is recorded on the stack
2071 TwoNearZ / ViewWidth,
2072 TwoNearZ / ViewHeight,
2076 // Copy from memory to SSE register
2077 XMVECTOR vValues = rMem;
2078 XMVECTOR vTemp = _mm_setzero_ps();
2080 vTemp = _mm_move_ss(vTemp,vValues);
2081 // TwoNearZ / ViewWidth,0,0,0
2083 // 0,TwoNearZ / ViewHeight,0,0
2085 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2087 // x=fRange,y=-fRange * NearZ,0,1.0f
2088 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2090 vTemp = _mm_setzero_ps();
2091 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2093 // 0,0,-fRange * NearZ,0
2094 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2100 //------------------------------------------------------------------------------
2102 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH
2110 assert(NearZ > 0.f && FarZ > 0.f);
2111 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2112 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2113 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2115 #if defined(_XM_NO_INTRINSICS_)
2117 float TwoNearZ = NearZ + NearZ;
2118 float fRange = FarZ / (NearZ - FarZ);
2121 M.m[0][0] = TwoNearZ / ViewWidth;
2127 M.m[1][1] = TwoNearZ / ViewHeight;
2138 M.m[3][2] = fRange * NearZ;
2142 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2143 float TwoNearZ = NearZ + NearZ;
2144 float fRange = FarZ / (NearZ - FarZ);
2145 const XMVECTOR Zero = vdupq_n_f32(0);
2148 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
2149 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
2150 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
2151 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2153 #elif defined(_XM_SSE_INTRINSICS_)
2155 float TwoNearZ = NearZ + NearZ;
2156 float fRange = FarZ / (NearZ-FarZ);
2157 // Note: This is recorded on the stack
2159 TwoNearZ / ViewWidth,
2160 TwoNearZ / ViewHeight,
2164 // Copy from memory to SSE register
2165 XMVECTOR vValues = rMem;
2166 XMVECTOR vTemp = _mm_setzero_ps();
2168 vTemp = _mm_move_ss(vTemp,vValues);
2169 // TwoNearZ / ViewWidth,0,0,0
2171 // 0,TwoNearZ / ViewHeight,0,0
2173 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2175 // x=fRange,y=-fRange * NearZ,0,-1.0f
2176 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
2178 vTemp = _mm_setzero_ps();
2179 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2181 // 0,0,-fRange * NearZ,0
2182 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2188 //------------------------------------------------------------------------------
2190 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH
2198 assert(NearZ > 0.f && FarZ > 0.f);
2199 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
2200 assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
2201 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2203 #if defined(_XM_NO_INTRINSICS_)
2207 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2209 float Height = CosFov / SinFov;
2210 float Width = Height / AspectRatio;
2211 float fRange = FarZ / (FarZ-NearZ);
2231 M.m[3][2] = -fRange * NearZ;
2235 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2238 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2240 float fRange = FarZ / (FarZ-NearZ);
2241 float Height = CosFov / SinFov;
2242 float Width = Height / AspectRatio;
2243 const XMVECTOR Zero = vdupq_n_f32(0);
2246 M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
2247 M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
2248 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
2249 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2251 #elif defined(_XM_SSE_INTRINSICS_)
2254 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2256 float fRange = FarZ / (FarZ-NearZ);
2257 // Note: This is recorded on the stack
2258 float Height = CosFov / SinFov;
2260 Height / AspectRatio,
2265 // Copy from memory to SSE register
2266 XMVECTOR vValues = rMem;
2267 XMVECTOR vTemp = _mm_setzero_ps();
2269 vTemp = _mm_move_ss(vTemp,vValues);
2270 // CosFov / SinFov,0,0,0
2273 // 0,Height / AspectRatio,0,0
2275 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2277 // x=fRange,y=-fRange * NearZ,0,1.0f
2278 vTemp = _mm_setzero_ps();
2279 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2281 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2283 // 0,0,-fRange * NearZ,0.0f
2284 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2290 //------------------------------------------------------------------------------
2292 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH
2300 assert(NearZ > 0.f && FarZ > 0.f);
2301 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
2302 assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
2303 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2305 #if defined(_XM_NO_INTRINSICS_)
2309 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2311 float Height = CosFov / SinFov;
2312 float Width = Height / AspectRatio;
2313 float fRange = FarZ / (NearZ-FarZ);
2333 M.m[3][2] = fRange * NearZ;
2337 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2340 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2341 float fRange = FarZ / (NearZ-FarZ);
2342 float Height = CosFov / SinFov;
2343 float Width = Height / AspectRatio;
2344 const XMVECTOR Zero = vdupq_n_f32(0);
2347 M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
2348 M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
2349 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
2350 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2352 #elif defined(_XM_SSE_INTRINSICS_)
2355 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2356 float fRange = FarZ / (NearZ-FarZ);
2357 // Note: This is recorded on the stack
2358 float Height = CosFov / SinFov;
2360 Height / AspectRatio,
2365 // Copy from memory to SSE register
2366 XMVECTOR vValues = rMem;
2367 XMVECTOR vTemp = _mm_setzero_ps();
2369 vTemp = _mm_move_ss(vTemp,vValues);
2370 // CosFov / SinFov,0,0,0
2373 // 0,Height / AspectRatio,0,0
2375 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2377 // x=fRange,y=-fRange * NearZ,0,-1.0f
2378 vTemp = _mm_setzero_ps();
2379 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
2381 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2383 // 0,0,fRange * NearZ,0.0f
2384 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2390 //------------------------------------------------------------------------------
2392 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH
2402 assert(NearZ > 0.f && FarZ > 0.f);
2403 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2404 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2405 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2407 #if defined(_XM_NO_INTRINSICS_)
2409 float TwoNearZ = NearZ + NearZ;
2410 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2411 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2412 float fRange = FarZ / (FarZ-NearZ);
2415 M.m[0][0] = TwoNearZ * ReciprocalWidth;
2421 M.m[1][1] = TwoNearZ * ReciprocalHeight;
2425 M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
2426 M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
2432 M.m[3][2] = -fRange * NearZ;
2436 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2437 float TwoNearZ = NearZ + NearZ;
2438 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2439 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2440 float fRange = FarZ / (FarZ-NearZ);
2441 const XMVECTOR Zero = vdupq_n_f32(0);
2444 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
2445 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
2446 M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2447 -(ViewTop + ViewBottom) * ReciprocalHeight,
2450 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2452 #elif defined(_XM_SSE_INTRINSICS_)
2454 float TwoNearZ = NearZ+NearZ;
2455 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2456 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2457 float fRange = FarZ / (FarZ-NearZ);
2458 // Note: This is recorded on the stack
2460 TwoNearZ*ReciprocalWidth,
2461 TwoNearZ*ReciprocalHeight,
2465 // Copy from memory to SSE register
2466 XMVECTOR vValues = rMem;
2467 XMVECTOR vTemp = _mm_setzero_ps();
2469 vTemp = _mm_move_ss(vTemp,vValues);
2470 // TwoNearZ*ReciprocalWidth,0,0,0
2472 // 0,TwoNearZ*ReciprocalHeight,0,0
2474 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2477 M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
2478 -(ViewTop + ViewBottom) * ReciprocalHeight,
2481 // 0,0,-fRange * NearZ,0.0f
2482 vValues = _mm_and_ps(vValues,g_XMMaskZ);
2488 //------------------------------------------------------------------------------
2490 inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH
2500 assert(NearZ > 0.f && FarZ > 0.f);
2501 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2502 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2503 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2505 #if defined(_XM_NO_INTRINSICS_)
2507 float TwoNearZ = NearZ + NearZ;
2508 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2509 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2510 float fRange = FarZ / (NearZ-FarZ);
2513 M.m[0][0] = TwoNearZ * ReciprocalWidth;
2519 M.m[1][1] = TwoNearZ * ReciprocalHeight;
2523 M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
2524 M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
2530 M.m[3][2] = fRange * NearZ;
2534 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2535 float TwoNearZ = NearZ + NearZ;
2536 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2537 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2538 float fRange = FarZ / (NearZ-FarZ);
2539 const XMVECTOR Zero = vdupq_n_f32(0);
2542 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
2543 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
2544 M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
2545 (ViewTop + ViewBottom) * ReciprocalHeight,
2548 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2550 #elif defined(_XM_SSE_INTRINSICS_)
2552 float TwoNearZ = NearZ+NearZ;
2553 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2554 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2555 float fRange = FarZ / (NearZ-FarZ);
2556 // Note: This is recorded on the stack
2558 TwoNearZ*ReciprocalWidth,
2559 TwoNearZ*ReciprocalHeight,
2563 // Copy from memory to SSE register
2564 XMVECTOR vValues = rMem;
2565 XMVECTOR vTemp = _mm_setzero_ps();
2567 vTemp = _mm_move_ss(vTemp,vValues);
2568 // TwoNearZ*ReciprocalWidth,0,0,0
2570 // 0,TwoNearZ*ReciprocalHeight,0,0
2572 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2575 M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
2576 (ViewTop + ViewBottom) * ReciprocalHeight,
2579 // 0,0,-fRange * NearZ,0.0f
2580 vValues = _mm_and_ps(vValues,g_XMMaskZ);
2586 //------------------------------------------------------------------------------
2588 inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH
2596 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2597 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2598 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2600 #if defined(_XM_NO_INTRINSICS_)
2602 float fRange = 1.0f / (FarZ-NearZ);
2605 M.m[0][0] = 2.0f / ViewWidth;
2611 M.m[1][1] = 2.0f / ViewHeight;
2622 M.m[3][2] = -fRange * NearZ;
2626 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2627 float fRange = 1.0f / (FarZ-NearZ);
2629 const XMVECTOR Zero = vdupq_n_f32(0);
2631 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
2632 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
2633 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2634 M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 );
2636 #elif defined(_XM_SSE_INTRINSICS_)
2638 float fRange = 1.0f / (FarZ-NearZ);
2639 // Note: This is recorded on the stack
2646 // Copy from memory to SSE register
2647 XMVECTOR vValues = rMem;
2648 XMVECTOR vTemp = _mm_setzero_ps();
2650 vTemp = _mm_move_ss(vTemp,vValues);
2651 // 2.0f / ViewWidth,0,0,0
2653 // 0,2.0f / ViewHeight,0,0
2655 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2657 // x=fRange,y=-fRange * NearZ,0,1.0f
2658 vTemp = _mm_setzero_ps();
2659 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2661 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
2663 // 0,0,-fRange * NearZ,1.0f
2664 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
2670 //------------------------------------------------------------------------------
2672 inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH
2680 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2681 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2682 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2684 #if defined(_XM_NO_INTRINSICS_)
2686 float fRange = 1.0f / (NearZ-FarZ);
2689 M.m[0][0] = 2.0f / ViewWidth;
2695 M.m[1][1] = 2.0f / ViewHeight;
2706 M.m[3][2] = fRange * NearZ;
2710 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2711 float fRange = 1.0f / (NearZ-FarZ);
2713 const XMVECTOR Zero = vdupq_n_f32(0);
2715 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
2716 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
2717 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2718 M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 );
2720 #elif defined(_XM_SSE_INTRINSICS_)
2722 float fRange = 1.0f / (NearZ-FarZ);
2723 // Note: This is recorded on the stack
2730 // Copy from memory to SSE register
2731 XMVECTOR vValues = rMem;
2732 XMVECTOR vTemp = _mm_setzero_ps();
2734 vTemp = _mm_move_ss(vTemp,vValues);
2735 // 2.0f / ViewWidth,0,0,0
2737 // 0,2.0f / ViewHeight,0,0
2739 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2741 // x=fRange,y=fRange * NearZ,0,1.0f
2742 vTemp = _mm_setzero_ps();
2743 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2745 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
2747 // 0,0,fRange * NearZ,1.0f
2748 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
2754 //------------------------------------------------------------------------------
2756 inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH
2766 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2767 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2768 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2770 #if defined(_XM_NO_INTRINSICS_)
2772 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2773 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2774 float fRange = 1.0f / (FarZ-NearZ);
2777 M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
2783 M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
2792 M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
2793 M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
2794 M.m[3][2] = -fRange * NearZ;
2798 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2799 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2800 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2801 float fRange = 1.0f / (FarZ-NearZ);
2802 const XMVECTOR Zero = vdupq_n_f32(0);
2804 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
2805 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
2806 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2807 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2808 -(ViewTop + ViewBottom) * ReciprocalHeight,
2812 #elif defined(_XM_SSE_INTRINSICS_)
2814 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2815 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2816 float fRange = 1.0f / (FarZ-NearZ);
2817 // Note: This is recorded on the stack
2825 -(ViewLeft + ViewRight),
2826 -(ViewTop + ViewBottom),
2830 // Copy from memory to SSE register
2831 XMVECTOR vValues = rMem;
2832 XMVECTOR vTemp = _mm_setzero_ps();
2834 vTemp = _mm_move_ss(vTemp,vValues);
2835 // fReciprocalWidth*2,0,0,0
2836 vTemp = _mm_add_ss(vTemp,vTemp);
2838 // 0,fReciprocalHeight*2,0,0
2840 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2841 vTemp = _mm_add_ps(vTemp,vTemp);
2845 vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
2847 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
2848 vValues = _mm_mul_ps(vValues,rMem2);
2854 //------------------------------------------------------------------------------
2856 inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH
2866 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2867 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2868 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2870 #if defined(_XM_NO_INTRINSICS_)
2872 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2873 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2874 float fRange = 1.0f / (NearZ-FarZ);
2877 M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
2883 M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
2892 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2893 -(ViewTop + ViewBottom) * ReciprocalHeight,
2898 #elif defined(_XM_ARM_NEON_INTRINSICS_)
2899 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2900 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2901 float fRange = 1.0f / (NearZ-FarZ);
2902 const XMVECTOR Zero = vdupq_n_f32(0);
2904 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
2905 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
2906 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2907 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2908 -(ViewTop + ViewBottom) * ReciprocalHeight,
2912 #elif defined(_XM_SSE_INTRINSICS_)
2914 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2915 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2916 float fRange = 1.0f / (NearZ-FarZ);
2917 // Note: This is recorded on the stack
2925 -(ViewLeft + ViewRight),
2926 -(ViewTop + ViewBottom),
2930 // Copy from memory to SSE register
2931 XMVECTOR vValues = rMem;
2932 XMVECTOR vTemp = _mm_setzero_ps();
2934 vTemp = _mm_move_ss(vTemp,vValues);
2935 // fReciprocalWidth*2,0,0,0
2936 vTemp = _mm_add_ss(vTemp,vTemp);
2938 // 0,fReciprocalHeight*2,0,0
2940 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2941 vTemp = _mm_add_ps(vTemp,vTemp);
2945 vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
2947 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
2948 vValues = _mm_mul_ps(vValues,rMem2);
2955 #pragma prefast(pop)
2958 /****************************************************************************
2960 * XMMATRIX operators and methods
2962 ****************************************************************************/
2964 //------------------------------------------------------------------------------
2966 inline XMMATRIX::XMMATRIX
2968 float m00, float m01, float m02, float m03,
2969 float m10, float m11, float m12, float m13,
2970 float m20, float m21, float m22, float m23,
2971 float m30, float m31, float m32, float m33
2974 r[0] = XMVectorSet(m00, m01, m02, m03);
2975 r[1] = XMVectorSet(m10, m11, m12, m13);
2976 r[2] = XMVectorSet(m20, m21, m22, m23);
2977 r[3] = XMVectorSet(m30, m31, m32, m33);
2980 //------------------------------------------------------------------------------
2981 _Use_decl_annotations_
2982 inline XMMATRIX::XMMATRIX
2987 assert( pArray != nullptr );
2988 r[0] = XMLoadFloat4((const XMFLOAT4*)pArray);
2989 r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4));
2990 r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8));
2991 r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12));
2994 //------------------------------------------------------------------------------
2996 inline XMMATRIX XMMATRIX::operator- () const
2999 R.r[0] = XMVectorNegate( r[0] );
3000 R.r[1] = XMVectorNegate( r[1] );
3001 R.r[2] = XMVectorNegate( r[2] );
3002 R.r[3] = XMVectorNegate( r[3] );
3006 //------------------------------------------------------------------------------
3008 inline XMMATRIX& XM_CALLCONV XMMATRIX::operator+= (FXMMATRIX M)
3010 r[0] = XMVectorAdd( r[0], M.r[0] );
3011 r[1] = XMVectorAdd( r[1], M.r[1] );
3012 r[2] = XMVectorAdd( r[2], M.r[2] );
3013 r[3] = XMVectorAdd( r[3], M.r[3] );
3017 //------------------------------------------------------------------------------
3019 inline XMMATRIX& XM_CALLCONV XMMATRIX::operator-= (FXMMATRIX M)
3021 r[0] = XMVectorSubtract( r[0], M.r[0] );
3022 r[1] = XMVectorSubtract( r[1], M.r[1] );
3023 r[2] = XMVectorSubtract( r[2], M.r[2] );
3024 r[3] = XMVectorSubtract( r[3], M.r[3] );
3028 //------------------------------------------------------------------------------
3030 inline XMMATRIX& XM_CALLCONV XMMATRIX::operator*=(FXMMATRIX M)
3032 *this = XMMatrixMultiply( *this, M );
3036 //------------------------------------------------------------------------------
3038 inline XMMATRIX& XMMATRIX::operator*= (float S)
3040 r[0] = XMVectorScale( r[0], S );
3041 r[1] = XMVectorScale( r[1], S );
3042 r[2] = XMVectorScale( r[2], S );
3043 r[3] = XMVectorScale( r[3], S );
3047 //------------------------------------------------------------------------------
3049 inline XMMATRIX& XMMATRIX::operator/= (float S)
3051 #if defined(_XM_NO_INTRINSICS_)
3052 XMVECTOR vS = XMVectorReplicate( S );
3053 r[0] = XMVectorDivide( r[0], vS );
3054 r[1] = XMVectorDivide( r[1], vS );
3055 r[2] = XMVectorDivide( r[2], vS );
3056 r[3] = XMVectorDivide( r[3], vS );
3058 #elif defined(_XM_ARM_NEON_INTRINSICS_)
3059 #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
3060 float32x4_t vS = vdupq_n_f32( S );
3061 r[0] = vdivq_f32( r[0], vS );
3062 r[1] = vdivq_f32( r[1], vS );
3063 r[2] = vdivq_f32( r[2], vS );
3064 r[3] = vdivq_f32( r[3], vS );
3066 // 2 iterations of Newton-Raphson refinement of reciprocal
3067 float32x2_t vS = vdup_n_f32( S );
3068 float32x2_t R0 = vrecpe_f32( vS );
3069 float32x2_t S0 = vrecps_f32( R0, vS );
3070 R0 = vmul_f32( S0, R0 );
3071 S0 = vrecps_f32( R0, vS );
3072 R0 = vmul_f32( S0, R0 );
3073 float32x4_t Reciprocal = vcombine_u32(R0, R0);
3074 r[0] = vmulq_f32( r[0], Reciprocal );
3075 r[1] = vmulq_f32( r[1], Reciprocal );
3076 r[2] = vmulq_f32( r[2], Reciprocal );
3077 r[3] = vmulq_f32( r[3], Reciprocal );
3080 #elif defined(_XM_SSE_INTRINSICS_)
3081 __m128 vS = _mm_set_ps1( S );
3082 r[0] = _mm_div_ps( r[0], vS );
3083 r[1] = _mm_div_ps( r[1], vS );
3084 r[2] = _mm_div_ps( r[2], vS );
3085 r[3] = _mm_div_ps( r[3], vS );
3090 //------------------------------------------------------------------------------
3092 inline XMMATRIX XM_CALLCONV XMMATRIX::operator+ (FXMMATRIX M) const
3095 R.r[0] = XMVectorAdd( r[0], M.r[0] );
3096 R.r[1] = XMVectorAdd( r[1], M.r[1] );
3097 R.r[2] = XMVectorAdd( r[2], M.r[2] );
3098 R.r[3] = XMVectorAdd( r[3], M.r[3] );
3102 //------------------------------------------------------------------------------
3104 inline XMMATRIX XM_CALLCONV XMMATRIX::operator- (FXMMATRIX M) const
3107 R.r[0] = XMVectorSubtract( r[0], M.r[0] );
3108 R.r[1] = XMVectorSubtract( r[1], M.r[1] );
3109 R.r[2] = XMVectorSubtract( r[2], M.r[2] );
3110 R.r[3] = XMVectorSubtract( r[3], M.r[3] );
3114 //------------------------------------------------------------------------------
3116 inline XMMATRIX XM_CALLCONV XMMATRIX::operator*(FXMMATRIX M) const
3118 return XMMatrixMultiply(*this, M);
3121 //------------------------------------------------------------------------------
3123 inline XMMATRIX XMMATRIX::operator* (float S) const
3126 R.r[0] = XMVectorScale( r[0], S );
3127 R.r[1] = XMVectorScale( r[1], S );
3128 R.r[2] = XMVectorScale( r[2], S );
3129 R.r[3] = XMVectorScale( r[3], S );
3133 //------------------------------------------------------------------------------
3135 inline XMMATRIX XMMATRIX::operator/ (float S) const
3137 #if defined(_XM_NO_INTRINSICS_)
3138 XMVECTOR vS = XMVectorReplicate( S );
3140 R.r[0] = XMVectorDivide( r[0], vS );
3141 R.r[1] = XMVectorDivide( r[1], vS );
3142 R.r[2] = XMVectorDivide( r[2], vS );
3143 R.r[3] = XMVectorDivide( r[3], vS );
3145 #elif defined(_XM_ARM_NEON_INTRINSICS_)
3146 #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
3147 float32x4_t vS = vdupq_n_f32( S );
3149 R.r[0] = vdivq_f32( r[0], vS );
3150 R.r[1] = vdivq_f32( r[1], vS );
3151 R.r[2] = vdivq_f32( r[2], vS );
3152 R.r[3] = vdivq_f32( r[3], vS );
3154 // 2 iterations of Newton-Raphson refinement of reciprocal
3155 float32x2_t vS = vdup_n_f32( S );
3156 float32x2_t R0 = vrecpe_f32( vS );
3157 float32x2_t S0 = vrecps_f32( R0, vS );
3158 R0 = vmul_f32( S0, R0 );
3159 S0 = vrecps_f32( R0, vS );
3160 R0 = vmul_f32( S0, R0 );
3161 float32x4_t Reciprocal = vcombine_u32(R0, R0);
3163 R.r[0] = vmulq_f32( r[0], Reciprocal );
3164 R.r[1] = vmulq_f32( r[1], Reciprocal );
3165 R.r[2] = vmulq_f32( r[2], Reciprocal );
3166 R.r[3] = vmulq_f32( r[3], Reciprocal );
3169 #elif defined(_XM_SSE_INTRINSICS_)
3170 __m128 vS = _mm_set_ps1( S );
3172 R.r[0] = _mm_div_ps( r[0], vS );
3173 R.r[1] = _mm_div_ps( r[1], vS );
3174 R.r[2] = _mm_div_ps( r[2], vS );
3175 R.r[3] = _mm_div_ps( r[3], vS );
3180 //------------------------------------------------------------------------------
3182 inline XMMATRIX XM_CALLCONV operator*
3189 R.r[0] = XMVectorScale( M.r[0], S );
3190 R.r[1] = XMVectorScale( M.r[1], S );
3191 R.r[2] = XMVectorScale( M.r[2], S );
3192 R.r[3] = XMVectorScale( M.r[3], S );
3196 /****************************************************************************
3198 * XMFLOAT3X3 operators
3200 ****************************************************************************/
3202 //------------------------------------------------------------------------------
3203 _Use_decl_annotations_
3204 inline XMFLOAT3X3::XMFLOAT3X3
3209 assert( pArray != nullptr );
3210 for (size_t Row = 0; Row < 3; Row++)
3212 for (size_t Column = 0; Column < 3; Column++)
3214 m[Row][Column] = pArray[Row * 3 + Column];
3219 //------------------------------------------------------------------------------
3221 inline XMFLOAT3X3& XMFLOAT3X3::operator=
3223 const XMFLOAT3X3& Float3x3
3239 /****************************************************************************
3241 * XMFLOAT4X3 operators
3243 ****************************************************************************/
3245 //------------------------------------------------------------------------------
3246 _Use_decl_annotations_
3247 inline XMFLOAT4X3::XMFLOAT4X3
3252 assert( pArray != nullptr );
3254 m[0][0] = pArray[0];
3255 m[0][1] = pArray[1];
3256 m[0][2] = pArray[2];
3258 m[1][0] = pArray[3];
3259 m[1][1] = pArray[4];
3260 m[1][2] = pArray[5];
3262 m[2][0] = pArray[6];
3263 m[2][1] = pArray[7];
3264 m[2][2] = pArray[8];
3266 m[3][0] = pArray[9];
3267 m[3][1] = pArray[10];
3268 m[3][2] = pArray[11];
3271 //------------------------------------------------------------------------------
3273 inline XMFLOAT4X3& XMFLOAT4X3::operator=
3275 const XMFLOAT4X3& Float4x3
3278 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11);
3279 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22);
3280 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33);
3282 XMStoreFloat4((XMFLOAT4*)&_11, V1);
3283 XMStoreFloat4((XMFLOAT4*)&_22, V2);
3284 XMStoreFloat4((XMFLOAT4*)&_33, V3);
3289 //------------------------------------------------------------------------------
3291 inline XMFLOAT4X3A& XMFLOAT4X3A::operator=
3293 const XMFLOAT4X3A& Float4x3
3296 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11);
3297 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22);
3298 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33);
3300 XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
3301 XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
3302 XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
3307 /****************************************************************************
3309 * XMFLOAT4X4 operators
3311 ****************************************************************************/
3313 //------------------------------------------------------------------------------
3314 _Use_decl_annotations_
3315 inline XMFLOAT4X4::XMFLOAT4X4
3320 assert( pArray != nullptr );
3322 m[0][0] = pArray[0];
3323 m[0][1] = pArray[1];
3324 m[0][2] = pArray[2];
3325 m[0][3] = pArray[3];
3327 m[1][0] = pArray[4];
3328 m[1][1] = pArray[5];
3329 m[1][2] = pArray[6];
3330 m[1][3] = pArray[7];
3332 m[2][0] = pArray[8];
3333 m[2][1] = pArray[9];
3334 m[2][2] = pArray[10];
3335 m[2][3] = pArray[11];
3337 m[3][0] = pArray[12];
3338 m[3][1] = pArray[13];
3339 m[3][2] = pArray[14];
3340 m[3][3] = pArray[15];
3343 //------------------------------------------------------------------------------
3345 inline XMFLOAT4X4& XMFLOAT4X4::operator=
3347 const XMFLOAT4X4& Float4x4
3350 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11);
3351 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21);
3352 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31);
3353 XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41);
3355 XMStoreFloat4((XMFLOAT4*)&_11, V1);
3356 XMStoreFloat4((XMFLOAT4*)&_21, V2);
3357 XMStoreFloat4((XMFLOAT4*)&_31, V3);
3358 XMStoreFloat4((XMFLOAT4*)&_41, V4);
3363 //------------------------------------------------------------------------------
3365 inline XMFLOAT4X4A& XMFLOAT4X4A::operator=
3367 const XMFLOAT4X4A& Float4x4
3370 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11);
3371 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21);
3372 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31);
3373 XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41);
3375 XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
3376 XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
3377 XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
3378 XMStoreFloat4A((XMFLOAT4A*)&_41, V4);