1 /* { dg-do run { target mipsisa64*-*-* } } */
2 /* { dg-mips-options "-mips64 -O2 -mips3d -mhard-float -mgp64" } */
4 /* Matrix Multiplications */
8 typedef float v2sf
__attribute__((vector_size(8)));
10 float a
[4] = {1.1, 2.2, 3.3, 4.4};
11 float b
[4][4] = {{1, 2, 3, 4},
16 float c
[4]; /* Result for matrix_multiply1() */
17 float d
[4]; /* Result for matrix_multiply2() */
18 float e
[4]; /* Result for matrix_multiply3() */
19 float f
[4]; /* Result for matrix_multiply4() */
21 void matrix_multiply1();
22 void matrix_multiply2();
23 void matrix_multiply3();
24 void matrix_multiply4();
30 /* Version 1. Use float calculations */
33 /* Version 2. Use paired-single instructions inside the inner loop*/
35 for (i
= 0; i
< 4; i
++)
39 /* Version 3. Use paired-single instructions and unroll the inner loop */
41 for (i
= 0; i
< 4; i
++)
45 /* Version 4. Use paired-single instructions and unroll all loops */
47 for (i
= 0; i
< 4; i
++)
51 printf ("Test Passes\n");
55 void matrix_multiply1()
59 for (i
= 0; i
< 4; i
++)
63 for (j
= 0; j
< 4; j
++)
64 c
[i
] += a
[j
] * b
[j
][i
];
68 void matrix_multiply2()
74 for (i
= 0; i
< 4; i
++)
76 result
= (v2sf
) {0.0, 0.0};
78 for (j
= 0; j
< 4; j
+=2)
80 /* Load two float values into m1 */
81 m1
= (v2sf
) {a
[j
], a
[j
+1]};
82 m2
= (v2sf
) {b
[j
][i
], b
[j
+1][i
]};
84 /* Multiply and add */
88 /* Reduction add at the end */
89 temp
= __builtin_mips_addr_ps (result
, result
);
90 d
[i
] = __builtin_mips_cvt_s_pl (temp
);
94 void matrix_multiply3()
100 m1
= (v2sf
) {a
[0], a
[1]};
101 m2
= (v2sf
) {a
[2], a
[3]};
103 for (i
= 0; i
< 4; i
++)
105 n1
= (v2sf
) {b
[0][i
], b
[1][i
]};
106 n2
= (v2sf
) {b
[2][i
], b
[3][i
]};
108 /* Multiply and add */
109 result
= m1
* n1
+ m2
* n2
;
111 /* Reduction add at the end */
112 temp
= __builtin_mips_addr_ps (result
, result
);
113 e
[i
] = __builtin_mips_cvt_s_pl (temp
);
117 void matrix_multiply4()
120 v2sf n1
, n2
, n3
, n4
, n5
, n6
, n7
, n8
;
121 v2sf temp1
, temp2
, temp3
, temp4
;
122 v2sf result1
, result2
;
124 /* Load a[0] a[1] values into m1
125 Load a[2] a[3] values into m2 */
126 m1
= (v2sf
) {a
[0], a
[1]};
127 m2
= (v2sf
) {a
[2], a
[3]};
129 /* Load b[0][0] b[1][0] values into n1
130 Load b[2][0] b[3][0] values into n2
131 Load b[0][1] b[1][1] values into n3
132 Load b[2][1] b[3][1] values into n4
133 Load b[0][2] b[1][2] values into n5
134 Load b[2][2] b[3][2] values into n6
135 Load b[0][3] b[1][3] values into n7
136 Load b[2][3] b[3][3] values into n8 */
137 n1
= (v2sf
) {b
[0][0], b
[1][0]};
138 n2
= (v2sf
) {b
[2][0], b
[3][0]};
139 n3
= (v2sf
) {b
[0][1], b
[1][1]};
140 n4
= (v2sf
) {b
[2][1], b
[3][1]};
141 n5
= (v2sf
) {b
[0][2], b
[1][2]};
142 n6
= (v2sf
) {b
[2][2], b
[3][2]};
143 n7
= (v2sf
) {b
[0][3], b
[1][3]};
144 n8
= (v2sf
) {b
[2][3], b
[3][3]};
146 temp1
= m1
* n1
+ m2
* n2
;
147 temp2
= m1
* n3
+ m2
* n4
;
148 temp3
= m1
* n5
+ m2
* n6
;
149 temp4
= m1
* n7
+ m2
* n8
;
151 result1
= __builtin_mips_addr_ps (temp1
, temp2
);
152 result2
= __builtin_mips_addr_ps (temp3
, temp4
);
154 f
[0] = __builtin_mips_cvt_s_pu (result1
);
155 f
[1] = __builtin_mips_cvt_s_pl (result1
);
156 f
[2] = __builtin_mips_cvt_s_pu (result2
);
157 f
[3] = __builtin_mips_cvt_s_pl (result2
);