Updating built in Io code to use += instead of x = x + y
[io/quag.git] / libs / basekit / source / simd_cph / test / test_simd.c
blob8d7f88d6666ed024e188c1ed6e8e62ca29b6a325
1 /*
2 Copyright (c) 2004 Patrick Roberts
4 This software is provided 'as-is', without any express
5 or implied warranty. In no event will the authors be held
6 liable for any damages arising from the use of this software.
8 Permission is granted to anyone to use this software for any
9 purpose, including commercial applications, and to alter it
10 and redistribute it freely, subject to the following restrictions:
12 1. The origin of this software must not be misrepresented;
13 you must not claim that you wrote the original software.
14 If you use this software in a product, an acknowledgment in
15 the product documentation would be appreciated but is not required.
17 2. Altered source versions must be plainly marked as such,
18 and must not be misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
22 4. THIS LICENSE MAY NOT BE CHANGED, ASSIGNED, OR MIGRATED WITHOUT
23 THE AUTHOR'S WRITTEN PERMISSION, WITH THE FOLLOWING EXCEPTIONS:
25 a. This file may be included with GPL/LGPL licensed
26 software, but you may not change the license this file
27 is released under.
31 /* uncomment the line below to force SIMD emulation C code */
32 /*#define __SIMD_EMU__ */
34 #include <simd_cp.h>
35 #include <stdio.h>
37 int main(void) {
39 simd_m128 v0,v1,v2;
41 simd_load4Floats(v1,2.0,2.0,2.0,2.0);
42 simd_load4Floats(v2,10.0,20.0,30.0,40.0);
44 printf("\nChecking 4f commands\n");
46 simd_print4Floats("v1 ",v1);
47 simd_print4Floats("v2 ",v2);
48 puts("");
50 /* v0 = v1 + v2 */
51 simd_4f_add(v1,v2,v0);
52 simd_print4Floats("4f_add ",v0);
54 /* v0 = v1 - v2 */
55 simd_4f_sub(v1,v2,v0);
56 simd_print4Floats("4f_sub ",v0);
58 /* v0 = v1 * v2 */
59 simd_4f_mult(v1,v2,v0);
60 simd_print4Floats("4f_mult",v0);
62 /* v0 = v1 / v2 */
63 simd_4f_div(v1,v2,v0);
64 simd_print4Floats("4f_div",v0);
67 * If you look at the disassembly of this section on an X86 processor, it will be
68 * very tight, as X86 SSE/MMX only handles 2 regs- i.e. A+=B, instead of C=A+B.
69 * For the best cross-platform performance, cater to the lowest demoninator and
70 * write your code like this.
73 printf("\ndisassembly test\n");
74 simd_4f_add(v1,v2,v1);
75 simd_4f_mult(v1,v2,v1);
76 simd_4f_sub(v1,v2,v1);
77 simd_4f_div(v1,v2,v1);
81 printf("\nChecking 4i commands\n");
83 simd_load4Ints(v1,20,30,40,50);
84 simd_load4Ints(v2,2,3,4,5);
86 simd_print4Ints("v1 ",v1);
87 simd_print4Ints("v2 ",v2);
88 puts("");
91 /* v0 = v1 + v2 */
92 simd_4i_add(v1,v2,v0);
93 simd_print4Ints("4i_add ",v0);
95 /* v0 = v1 - v2 */
96 simd_4i_sub(v1,v2,v0);
97 simd_print4Ints("4i_sub ",v0);
99 /* v0 = v1 * v2 */
100 simd_4i_mult(v1,v2,v0);
101 simd_print4Ints("4i_mult",v0);
103 /* v0 = v1 / v2 */
104 simd_4i_div(v1,v2,v0);
105 simd_print4Ints("4i_div ",v0);
108 return 0;