1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
8 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256BW
10 @a64 = common global [8 x i64] zeroinitializer, align 64
11 @b64 = common global [8 x i64] zeroinitializer, align 64
12 @c64 = common global [8 x i64] zeroinitializer, align 64
13 @a32 = common global [16 x i32] zeroinitializer, align 64
14 @b32 = common global [16 x i32] zeroinitializer, align 64
15 @c32 = common global [16 x i32] zeroinitializer, align 64
16 @a16 = common global [32 x i16] zeroinitializer, align 64
17 @b16 = common global [32 x i16] zeroinitializer, align 64
18 @c16 = common global [32 x i16] zeroinitializer, align 64
19 @a8 = common global [64 x i8] zeroinitializer, align 64
20 @b8 = common global [64 x i8] zeroinitializer, align 64
21 @c8 = common global [64 x i8] zeroinitializer, align 64
23 declare i64 @llvm.smul.fix.i64(i64, i64, i32)
24 declare i32 @llvm.smul.fix.i32(i32, i32, i32)
25 declare i16 @llvm.smul.fix.i16(i16, i16, i32)
26 declare i8 @llvm.smul.fix.i8 (i8 , i8 , i32)
28 define void @smul_v8i64() {
29 ; SSE-LABEL: @smul_v8i64(
30 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
31 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
32 ; SSE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
33 ; SSE-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
34 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
35 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
36 ; SSE-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
37 ; SSE-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
38 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
39 ; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
40 ; SSE-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
41 ; SSE-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
42 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
43 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
44 ; SSE-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
45 ; SSE-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
48 ; SLM-LABEL: @smul_v8i64(
49 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
50 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
51 ; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
52 ; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
53 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
54 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
55 ; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
56 ; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
57 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
58 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
59 ; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
60 ; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
61 ; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
62 ; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
63 ; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
64 ; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
67 ; AVX1-LABEL: @smul_v8i64(
68 ; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
69 ; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
70 ; AVX1-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
71 ; AVX1-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
72 ; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
73 ; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
74 ; AVX1-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
75 ; AVX1-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
76 ; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
77 ; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
78 ; AVX1-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
79 ; AVX1-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
80 ; AVX1-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
81 ; AVX1-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
82 ; AVX1-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
83 ; AVX1-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
86 ; AVX2-LABEL: @smul_v8i64(
87 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
88 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
89 ; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
90 ; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
91 ; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
92 ; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
93 ; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
94 ; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
97 ; AVX512-LABEL: @smul_v8i64(
98 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
99 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
100 ; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]], i32 3)
101 ; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
102 ; AVX512-NEXT: ret void
104 ; AVX256BW-LABEL: @smul_v8i64(
105 ; AVX256BW-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
106 ; AVX256BW-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
107 ; AVX256BW-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
108 ; AVX256BW-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
109 ; AVX256BW-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
110 ; AVX256BW-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
111 ; AVX256BW-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
112 ; AVX256BW-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
113 ; AVX256BW-NEXT: ret void
115 %a0 = load i64, ptr @a64, align 8
116 %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
117 %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
118 %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
119 %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
120 %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
121 %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
122 %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
123 %b0 = load i64, ptr @b64, align 8
124 %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
125 %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
126 %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
127 %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
128 %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
129 %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
130 %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
131 %r0 = call i64 @llvm.smul.fix.i64(i64 %a0, i64 %b0, i32 3)
132 %r1 = call i64 @llvm.smul.fix.i64(i64 %a1, i64 %b1, i32 3)
133 %r2 = call i64 @llvm.smul.fix.i64(i64 %a2, i64 %b2, i32 3)
134 %r3 = call i64 @llvm.smul.fix.i64(i64 %a3, i64 %b3, i32 3)
135 %r4 = call i64 @llvm.smul.fix.i64(i64 %a4, i64 %b4, i32 3)
136 %r5 = call i64 @llvm.smul.fix.i64(i64 %a5, i64 %b5, i32 3)
137 %r6 = call i64 @llvm.smul.fix.i64(i64 %a6, i64 %b6, i32 3)
138 %r7 = call i64 @llvm.smul.fix.i64(i64 %a7, i64 %b7, i32 3)
139 store i64 %r0, ptr @c64, align 8
140 store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
141 store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
142 store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
143 store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
144 store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
145 store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
146 store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
150 define void @smul_v16i32() {
151 ; SSE-LABEL: @smul_v16i32(
152 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
153 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
154 ; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 3)
155 ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
156 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
157 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
158 ; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3)
159 ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
160 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
161 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
162 ; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32 3)
163 ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
164 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
165 ; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
166 ; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 3)
167 ; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
170 ; SLM-LABEL: @smul_v16i32(
171 ; SLM-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4
172 ; SLM-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
173 ; SLM-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
174 ; SLM-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
175 ; SLM-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
176 ; SLM-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
177 ; SLM-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
178 ; SLM-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
179 ; SLM-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
180 ; SLM-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
181 ; SLM-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
182 ; SLM-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
183 ; SLM-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
184 ; SLM-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
185 ; SLM-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
186 ; SLM-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
187 ; SLM-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4
188 ; SLM-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
189 ; SLM-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
190 ; SLM-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
191 ; SLM-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
192 ; SLM-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
193 ; SLM-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
194 ; SLM-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
195 ; SLM-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
196 ; SLM-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
197 ; SLM-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
198 ; SLM-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
199 ; SLM-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
200 ; SLM-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
201 ; SLM-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
202 ; SLM-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
203 ; SLM-NEXT: [[R0:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3)
204 ; SLM-NEXT: [[R1:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3)
205 ; SLM-NEXT: [[R2:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3)
206 ; SLM-NEXT: [[R3:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A3]], i32 [[B3]], i32 3)
207 ; SLM-NEXT: [[R4:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A4]], i32 [[B4]], i32 3)
208 ; SLM-NEXT: [[R5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A5]], i32 [[B5]], i32 3)
209 ; SLM-NEXT: [[R6:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A6]], i32 [[B6]], i32 3)
210 ; SLM-NEXT: [[R7:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A7]], i32 [[B7]], i32 3)
211 ; SLM-NEXT: [[R8:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A8]], i32 [[B8]], i32 3)
212 ; SLM-NEXT: [[R9:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A9]], i32 [[B9]], i32 3)
213 ; SLM-NEXT: [[R10:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A10]], i32 [[B10]], i32 3)
214 ; SLM-NEXT: [[R11:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A11]], i32 [[B11]], i32 3)
215 ; SLM-NEXT: [[R12:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A12]], i32 [[B12]], i32 3)
216 ; SLM-NEXT: [[R13:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3)
217 ; SLM-NEXT: [[R14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3)
218 ; SLM-NEXT: [[R15:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3)
219 ; SLM-NEXT: store i32 [[R0]], ptr @c32, align 4
220 ; SLM-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
221 ; SLM-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
222 ; SLM-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
223 ; SLM-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
224 ; SLM-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
225 ; SLM-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
226 ; SLM-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
227 ; SLM-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
228 ; SLM-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
229 ; SLM-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
230 ; SLM-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
231 ; SLM-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
232 ; SLM-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
233 ; SLM-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
234 ; SLM-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
237 ; AVX-LABEL: @smul_v16i32(
238 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
239 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
240 ; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], i32 3)
241 ; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
242 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
243 ; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
244 ; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]], i32 3)
245 ; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
248 ; AVX512-LABEL: @smul_v16i32(
249 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
250 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
251 ; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 3)
252 ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
253 ; AVX512-NEXT: ret void
255 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
256 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
257 %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
258 %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
259 %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
260 %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
261 %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
262 %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
263 %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
264 %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
265 %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
266 %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
267 %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
268 %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
269 %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
270 %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
271 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
272 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
273 %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
274 %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
275 %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
276 %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
277 %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
278 %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
279 %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
280 %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
281 %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
282 %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
283 %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
284 %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
285 %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
286 %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
287 %r0 = call i32 @llvm.smul.fix.i32(i32 %a0 , i32 %b0 , i32 3)
288 %r1 = call i32 @llvm.smul.fix.i32(i32 %a1 , i32 %b1 , i32 3)
289 %r2 = call i32 @llvm.smul.fix.i32(i32 %a2 , i32 %b2 , i32 3)
290 %r3 = call i32 @llvm.smul.fix.i32(i32 %a3 , i32 %b3 , i32 3)
291 %r4 = call i32 @llvm.smul.fix.i32(i32 %a4 , i32 %b4 , i32 3)
292 %r5 = call i32 @llvm.smul.fix.i32(i32 %a5 , i32 %b5 , i32 3)
293 %r6 = call i32 @llvm.smul.fix.i32(i32 %a6 , i32 %b6 , i32 3)
294 %r7 = call i32 @llvm.smul.fix.i32(i32 %a7 , i32 %b7 , i32 3)
295 %r8 = call i32 @llvm.smul.fix.i32(i32 %a8 , i32 %b8 , i32 3)
296 %r9 = call i32 @llvm.smul.fix.i32(i32 %a9 , i32 %b9 , i32 3)
297 %r10 = call i32 @llvm.smul.fix.i32(i32 %a10, i32 %b10, i32 3)
298 %r11 = call i32 @llvm.smul.fix.i32(i32 %a11, i32 %b11, i32 3)
299 %r12 = call i32 @llvm.smul.fix.i32(i32 %a12, i32 %b12, i32 3)
300 %r13 = call i32 @llvm.smul.fix.i32(i32 %a13, i32 %b13, i32 3)
301 %r14 = call i32 @llvm.smul.fix.i32(i32 %a14, i32 %b14, i32 3)
302 %r15 = call i32 @llvm.smul.fix.i32(i32 %a15, i32 %b15, i32 3)
303 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
304 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
305 store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
306 store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
307 store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
308 store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
309 store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
310 store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
311 store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
312 store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
313 store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
314 store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
315 store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
316 store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
317 store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
318 store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
322 define void @smul_v32i16() {
323 ; SSE-LABEL: @smul_v32i16(
324 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
325 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
326 ; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
327 ; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
328 ; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
329 ; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
330 ; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
331 ; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
332 ; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
333 ; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
334 ; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
335 ; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
336 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
337 ; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
338 ; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
339 ; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
342 ; SLM-LABEL: @smul_v32i16(
343 ; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
344 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
345 ; SLM-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
346 ; SLM-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
347 ; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
348 ; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
349 ; SLM-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
350 ; SLM-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
351 ; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
352 ; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
353 ; SLM-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
354 ; SLM-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
355 ; SLM-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
356 ; SLM-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
357 ; SLM-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
358 ; SLM-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
361 ; AVX-LABEL: @smul_v32i16(
362 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
363 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
364 ; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]], i32 3)
365 ; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
366 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
367 ; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
368 ; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]], i32 3)
369 ; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
372 ; AVX512-LABEL: @smul_v32i16(
373 ; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
374 ; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
375 ; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]], i32 3)
376 ; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
377 ; AVX512-NEXT: ret void
379 %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
380 %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
381 %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
382 %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
383 %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
384 %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
385 %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
386 %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
387 %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
388 %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
389 %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
390 %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
391 %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
392 %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
393 %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
394 %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
395 %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
396 %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
397 %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
398 %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
399 %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
400 %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
401 %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
402 %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
403 %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
404 %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
405 %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
406 %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
407 %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
408 %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
409 %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
410 %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
411 %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
412 %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
413 %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
414 %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
415 %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
416 %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
417 %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
418 %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
419 %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
420 %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
421 %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
422 %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
423 %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
424 %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
425 %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
426 %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
427 %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
428 %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
429 %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
430 %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
431 %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
432 %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
433 %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
434 %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
435 %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
436 %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
437 %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
438 %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
439 %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
440 %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
441 %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
442 %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
443 %r0 = call i16 @llvm.smul.fix.i16(i16 %a0 , i16 %b0 , i32 3)
444 %r1 = call i16 @llvm.smul.fix.i16(i16 %a1 , i16 %b1 , i32 3)
445 %r2 = call i16 @llvm.smul.fix.i16(i16 %a2 , i16 %b2 , i32 3)
446 %r3 = call i16 @llvm.smul.fix.i16(i16 %a3 , i16 %b3 , i32 3)
447 %r4 = call i16 @llvm.smul.fix.i16(i16 %a4 , i16 %b4 , i32 3)
448 %r5 = call i16 @llvm.smul.fix.i16(i16 %a5 , i16 %b5 , i32 3)
449 %r6 = call i16 @llvm.smul.fix.i16(i16 %a6 , i16 %b6 , i32 3)
450 %r7 = call i16 @llvm.smul.fix.i16(i16 %a7 , i16 %b7 , i32 3)
451 %r8 = call i16 @llvm.smul.fix.i16(i16 %a8 , i16 %b8 , i32 3)
452 %r9 = call i16 @llvm.smul.fix.i16(i16 %a9 , i16 %b9 , i32 3)
453 %r10 = call i16 @llvm.smul.fix.i16(i16 %a10, i16 %b10, i32 3)
454 %r11 = call i16 @llvm.smul.fix.i16(i16 %a11, i16 %b11, i32 3)
455 %r12 = call i16 @llvm.smul.fix.i16(i16 %a12, i16 %b12, i32 3)
456 %r13 = call i16 @llvm.smul.fix.i16(i16 %a13, i16 %b13, i32 3)
457 %r14 = call i16 @llvm.smul.fix.i16(i16 %a14, i16 %b14, i32 3)
458 %r15 = call i16 @llvm.smul.fix.i16(i16 %a15, i16 %b15, i32 3)
459 %r16 = call i16 @llvm.smul.fix.i16(i16 %a16, i16 %b16, i32 3)
460 %r17 = call i16 @llvm.smul.fix.i16(i16 %a17, i16 %b17, i32 3)
461 %r18 = call i16 @llvm.smul.fix.i16(i16 %a18, i16 %b18, i32 3)
462 %r19 = call i16 @llvm.smul.fix.i16(i16 %a19, i16 %b19, i32 3)
463 %r20 = call i16 @llvm.smul.fix.i16(i16 %a20, i16 %b20, i32 3)
464 %r21 = call i16 @llvm.smul.fix.i16(i16 %a21, i16 %b21, i32 3)
465 %r22 = call i16 @llvm.smul.fix.i16(i16 %a22, i16 %b22, i32 3)
466 %r23 = call i16 @llvm.smul.fix.i16(i16 %a23, i16 %b23, i32 3)
467 %r24 = call i16 @llvm.smul.fix.i16(i16 %a24, i16 %b24, i32 3)
468 %r25 = call i16 @llvm.smul.fix.i16(i16 %a25, i16 %b25, i32 3)
469 %r26 = call i16 @llvm.smul.fix.i16(i16 %a26, i16 %b26, i32 3)
470 %r27 = call i16 @llvm.smul.fix.i16(i16 %a27, i16 %b27, i32 3)
471 %r28 = call i16 @llvm.smul.fix.i16(i16 %a28, i16 %b28, i32 3)
472 %r29 = call i16 @llvm.smul.fix.i16(i16 %a29, i16 %b29, i32 3)
473 %r30 = call i16 @llvm.smul.fix.i16(i16 %a30, i16 %b30, i32 3)
474 %r31 = call i16 @llvm.smul.fix.i16(i16 %a31, i16 %b31, i32 3)
475 store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
476 store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
477 store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
478 store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
479 store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
480 store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
481 store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
482 store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
483 store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
484 store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
485 store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
486 store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
487 store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
488 store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
489 store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
490 store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
491 store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
492 store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
493 store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
494 store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
495 store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
496 store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
497 store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
498 store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
499 store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
500 store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
501 store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
502 store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
503 store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
504 store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
505 store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
506 store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
510 define void @smul_v64i8() {
511 ; SSE-LABEL: @smul_v64i8(
512 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
513 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
514 ; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
515 ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
516 ; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
517 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
518 ; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
519 ; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
520 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
521 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
522 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
523 ; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
524 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
525 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
526 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
527 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
530 ; SLM-LABEL: @smul_v64i8(
531 ; SLM-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
532 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
533 ; SLM-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
534 ; SLM-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
535 ; SLM-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
536 ; SLM-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
537 ; SLM-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
538 ; SLM-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
539 ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
540 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
541 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
542 ; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
543 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
544 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
545 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
546 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
549 ; AVX-LABEL: @smul_v64i8(
550 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
551 ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
552 ; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], i32 3)
553 ; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
554 ; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
555 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
556 ; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]], i32 3)
557 ; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
560 ; AVX512-LABEL: @smul_v64i8(
561 ; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
562 ; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
563 ; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]], i32 3)
564 ; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
565 ; AVX512-NEXT: ret void
567 %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
568 %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
569 %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
570 %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
571 %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
572 %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
573 %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
574 %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
575 %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
576 %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
577 %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
578 %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
579 %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
580 %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
581 %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
582 %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
583 %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
584 %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
585 %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
586 %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
587 %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
588 %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
589 %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
590 %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
591 %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
592 %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
593 %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
594 %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
595 %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
596 %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
597 %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
598 %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
599 %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
600 %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
601 %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
602 %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
603 %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
604 %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
605 %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
606 %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
607 %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
608 %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
609 %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
610 %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
611 %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
612 %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
613 %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
614 %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
615 %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
616 %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
617 %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
618 %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
619 %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
620 %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
621 %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
622 %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
623 %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
624 %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
625 %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
626 %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
627 %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
628 %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
629 %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
630 %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
631 %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
632 %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
633 %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
634 %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
635 %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
636 %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
637 %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
638 %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
639 %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
640 %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
641 %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
642 %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
643 %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
644 %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
645 %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
646 %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
647 %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
648 %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
649 %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
650 %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
651 %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
652 %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
653 %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
654 %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
655 %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
656 %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
657 %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
658 %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
659 %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
660 %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
661 %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
662 %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
663 %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
664 %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
665 %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
666 %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
667 %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
668 %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
669 %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
670 %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
671 %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
672 %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
673 %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
674 %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
675 %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
676 %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
677 %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
678 %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
679 %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
680 %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
681 %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
682 %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
683 %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
684 %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
685 %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
686 %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
687 %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
688 %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
689 %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
690 %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
691 %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
692 %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
693 %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
694 %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
695 %r0 = call i8 @llvm.smul.fix.i8(i8 %a0 , i8 %b0 , i32 3)
696 %r1 = call i8 @llvm.smul.fix.i8(i8 %a1 , i8 %b1 , i32 3)
697 %r2 = call i8 @llvm.smul.fix.i8(i8 %a2 , i8 %b2 , i32 3)
698 %r3 = call i8 @llvm.smul.fix.i8(i8 %a3 , i8 %b3 , i32 3)
699 %r4 = call i8 @llvm.smul.fix.i8(i8 %a4 , i8 %b4 , i32 3)
700 %r5 = call i8 @llvm.smul.fix.i8(i8 %a5 , i8 %b5 , i32 3)
701 %r6 = call i8 @llvm.smul.fix.i8(i8 %a6 , i8 %b6 , i32 3)
702 %r7 = call i8 @llvm.smul.fix.i8(i8 %a7 , i8 %b7 , i32 3)
703 %r8 = call i8 @llvm.smul.fix.i8(i8 %a8 , i8 %b8 , i32 3)
704 %r9 = call i8 @llvm.smul.fix.i8(i8 %a9 , i8 %b9 , i32 3)
705 %r10 = call i8 @llvm.smul.fix.i8(i8 %a10, i8 %b10, i32 3)
706 %r11 = call i8 @llvm.smul.fix.i8(i8 %a11, i8 %b11, i32 3)
707 %r12 = call i8 @llvm.smul.fix.i8(i8 %a12, i8 %b12, i32 3)
708 %r13 = call i8 @llvm.smul.fix.i8(i8 %a13, i8 %b13, i32 3)
709 %r14 = call i8 @llvm.smul.fix.i8(i8 %a14, i8 %b14, i32 3)
710 %r15 = call i8 @llvm.smul.fix.i8(i8 %a15, i8 %b15, i32 3)
711 %r16 = call i8 @llvm.smul.fix.i8(i8 %a16, i8 %b16, i32 3)
712 %r17 = call i8 @llvm.smul.fix.i8(i8 %a17, i8 %b17, i32 3)
713 %r18 = call i8 @llvm.smul.fix.i8(i8 %a18, i8 %b18, i32 3)
714 %r19 = call i8 @llvm.smul.fix.i8(i8 %a19, i8 %b19, i32 3)
715 %r20 = call i8 @llvm.smul.fix.i8(i8 %a20, i8 %b20, i32 3)
716 %r21 = call i8 @llvm.smul.fix.i8(i8 %a21, i8 %b21, i32 3)
717 %r22 = call i8 @llvm.smul.fix.i8(i8 %a22, i8 %b22, i32 3)
718 %r23 = call i8 @llvm.smul.fix.i8(i8 %a23, i8 %b23, i32 3)
719 %r24 = call i8 @llvm.smul.fix.i8(i8 %a24, i8 %b24, i32 3)
720 %r25 = call i8 @llvm.smul.fix.i8(i8 %a25, i8 %b25, i32 3)
721 %r26 = call i8 @llvm.smul.fix.i8(i8 %a26, i8 %b26, i32 3)
722 %r27 = call i8 @llvm.smul.fix.i8(i8 %a27, i8 %b27, i32 3)
723 %r28 = call i8 @llvm.smul.fix.i8(i8 %a28, i8 %b28, i32 3)
724 %r29 = call i8 @llvm.smul.fix.i8(i8 %a29, i8 %b29, i32 3)
725 %r30 = call i8 @llvm.smul.fix.i8(i8 %a30, i8 %b30, i32 3)
726 %r31 = call i8 @llvm.smul.fix.i8(i8 %a31, i8 %b31, i32 3)
727 %r32 = call i8 @llvm.smul.fix.i8(i8 %a32, i8 %b32, i32 3)
728 %r33 = call i8 @llvm.smul.fix.i8(i8 %a33, i8 %b33, i32 3)
729 %r34 = call i8 @llvm.smul.fix.i8(i8 %a34, i8 %b34, i32 3)
730 %r35 = call i8 @llvm.smul.fix.i8(i8 %a35, i8 %b35, i32 3)
731 %r36 = call i8 @llvm.smul.fix.i8(i8 %a36, i8 %b36, i32 3)
732 %r37 = call i8 @llvm.smul.fix.i8(i8 %a37, i8 %b37, i32 3)
733 %r38 = call i8 @llvm.smul.fix.i8(i8 %a38, i8 %b38, i32 3)
734 %r39 = call i8 @llvm.smul.fix.i8(i8 %a39, i8 %b39, i32 3)
735 %r40 = call i8 @llvm.smul.fix.i8(i8 %a40, i8 %b40, i32 3)
736 %r41 = call i8 @llvm.smul.fix.i8(i8 %a41, i8 %b41, i32 3)
737 %r42 = call i8 @llvm.smul.fix.i8(i8 %a42, i8 %b42, i32 3)
738 %r43 = call i8 @llvm.smul.fix.i8(i8 %a43, i8 %b43, i32 3)
739 %r44 = call i8 @llvm.smul.fix.i8(i8 %a44, i8 %b44, i32 3)
740 %r45 = call i8 @llvm.smul.fix.i8(i8 %a45, i8 %b45, i32 3)
741 %r46 = call i8 @llvm.smul.fix.i8(i8 %a46, i8 %b46, i32 3)
742 %r47 = call i8 @llvm.smul.fix.i8(i8 %a47, i8 %b47, i32 3)
743 %r48 = call i8 @llvm.smul.fix.i8(i8 %a48, i8 %b48, i32 3)
744 %r49 = call i8 @llvm.smul.fix.i8(i8 %a49, i8 %b49, i32 3)
745 %r50 = call i8 @llvm.smul.fix.i8(i8 %a50, i8 %b50, i32 3)
746 %r51 = call i8 @llvm.smul.fix.i8(i8 %a51, i8 %b51, i32 3)
747 %r52 = call i8 @llvm.smul.fix.i8(i8 %a52, i8 %b52, i32 3)
748 %r53 = call i8 @llvm.smul.fix.i8(i8 %a53, i8 %b53, i32 3)
749 %r54 = call i8 @llvm.smul.fix.i8(i8 %a54, i8 %b54, i32 3)
750 %r55 = call i8 @llvm.smul.fix.i8(i8 %a55, i8 %b55, i32 3)
751 %r56 = call i8 @llvm.smul.fix.i8(i8 %a56, i8 %b56, i32 3)
752 %r57 = call i8 @llvm.smul.fix.i8(i8 %a57, i8 %b57, i32 3)
753 %r58 = call i8 @llvm.smul.fix.i8(i8 %a58, i8 %b58, i32 3)
754 %r59 = call i8 @llvm.smul.fix.i8(i8 %a59, i8 %b59, i32 3)
755 %r60 = call i8 @llvm.smul.fix.i8(i8 %a60, i8 %b60, i32 3)
756 %r61 = call i8 @llvm.smul.fix.i8(i8 %a61, i8 %b61, i32 3)
757 %r62 = call i8 @llvm.smul.fix.i8(i8 %a62, i8 %b62, i32 3)
758 %r63 = call i8 @llvm.smul.fix.i8(i8 %a63, i8 %b63, i32 3)
759 store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
760 store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
761 store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
762 store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
763 store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
764 store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
765 store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
766 store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
767 store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
768 store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
769 store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
770 store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
771 store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
772 store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
773 store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
774 store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
775 store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
776 store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
777 store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
778 store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
779 store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
780 store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
781 store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
782 store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
783 store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
784 store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
785 store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
786 store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
787 store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
788 store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
789 store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
790 store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
791 store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
792 store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
793 store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
794 store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
795 store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
796 store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
797 store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
798 store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
799 store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
800 store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
801 store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
802 store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
803 store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
804 store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
805 store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
806 store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
807 store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
808 store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
809 store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
810 store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
811 store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
812 store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
813 store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
814 store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
815 store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
816 store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
817 store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
818 store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
819 store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
820 store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
821 store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
822 store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
826 declare i64 @llvm.umul.fix.i64(i64, i64, i32)
827 declare i32 @llvm.umul.fix.i32(i32, i32, i32)
828 declare i16 @llvm.umul.fix.i16(i16, i16, i32)
829 declare i8 @llvm.umul.fix.i8 (i8 , i8 , i32)
831 define void @umul_v8i64() {
832 ; SSE-LABEL: @umul_v8i64(
833 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
834 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
835 ; SSE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
836 ; SSE-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
837 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
838 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
839 ; SSE-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
840 ; SSE-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
841 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
842 ; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
843 ; SSE-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
844 ; SSE-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
845 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
846 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
847 ; SSE-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
848 ; SSE-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
851 ; SLM-LABEL: @umul_v8i64(
852 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
853 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
854 ; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
855 ; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
856 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
857 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
858 ; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
859 ; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
860 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
861 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
862 ; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
863 ; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
864 ; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
865 ; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
866 ; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
867 ; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
870 ; AVX1-LABEL: @umul_v8i64(
871 ; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
872 ; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
873 ; AVX1-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
874 ; AVX1-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
875 ; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
876 ; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
877 ; AVX1-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
878 ; AVX1-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
879 ; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
880 ; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
881 ; AVX1-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
882 ; AVX1-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
883 ; AVX1-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
884 ; AVX1-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
885 ; AVX1-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
886 ; AVX1-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
887 ; AVX1-NEXT: ret void
889 ; AVX2-LABEL: @umul_v8i64(
890 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
891 ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
892 ; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
893 ; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
894 ; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
895 ; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
896 ; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
897 ; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
898 ; AVX2-NEXT: ret void
900 ; AVX512-LABEL: @umul_v8i64(
901 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
902 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
903 ; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]], i32 3)
904 ; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
905 ; AVX512-NEXT: ret void
907 ; AVX256BW-LABEL: @umul_v8i64(
908 ; AVX256BW-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
909 ; AVX256BW-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
910 ; AVX256BW-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
911 ; AVX256BW-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
912 ; AVX256BW-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
913 ; AVX256BW-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
914 ; AVX256BW-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
915 ; AVX256BW-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
916 ; AVX256BW-NEXT: ret void
918 %a0 = load i64, ptr @a64, align 8
919 %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
920 %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
921 %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
922 %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
923 %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
924 %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
925 %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
926 %b0 = load i64, ptr @b64, align 8
927 %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
928 %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
929 %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
930 %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
931 %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
932 %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
933 %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
934 %r0 = call i64 @llvm.umul.fix.i64(i64 %a0, i64 %b0, i32 3)
935 %r1 = call i64 @llvm.umul.fix.i64(i64 %a1, i64 %b1, i32 3)
936 %r2 = call i64 @llvm.umul.fix.i64(i64 %a2, i64 %b2, i32 3)
937 %r3 = call i64 @llvm.umul.fix.i64(i64 %a3, i64 %b3, i32 3)
938 %r4 = call i64 @llvm.umul.fix.i64(i64 %a4, i64 %b4, i32 3)
939 %r5 = call i64 @llvm.umul.fix.i64(i64 %a5, i64 %b5, i32 3)
940 %r6 = call i64 @llvm.umul.fix.i64(i64 %a6, i64 %b6, i32 3)
941 %r7 = call i64 @llvm.umul.fix.i64(i64 %a7, i64 %b7, i32 3)
942 store i64 %r0, ptr @c64, align 8
943 store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
944 store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
945 store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
946 store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
947 store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
948 store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
949 store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
953 define void @umul_v16i32() {
954 ; SSE-LABEL: @umul_v16i32(
955 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
956 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
957 ; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 3)
958 ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
959 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
960 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
961 ; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3)
962 ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
963 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
964 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
965 ; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32 3)
966 ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
967 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
968 ; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
969 ; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 3)
970 ; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
973 ; SLM-LABEL: @umul_v16i32(
974 ; SLM-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4
975 ; SLM-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
976 ; SLM-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
977 ; SLM-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
978 ; SLM-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
979 ; SLM-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
980 ; SLM-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
981 ; SLM-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
982 ; SLM-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
983 ; SLM-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
984 ; SLM-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
985 ; SLM-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
986 ; SLM-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
987 ; SLM-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
988 ; SLM-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
989 ; SLM-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
990 ; SLM-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4
991 ; SLM-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
992 ; SLM-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
993 ; SLM-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
994 ; SLM-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
995 ; SLM-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
996 ; SLM-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
997 ; SLM-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
998 ; SLM-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
999 ; SLM-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
1000 ; SLM-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
1001 ; SLM-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
1002 ; SLM-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
1003 ; SLM-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
1004 ; SLM-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
1005 ; SLM-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
1006 ; SLM-NEXT: [[R0:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3)
1007 ; SLM-NEXT: [[R1:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3)
1008 ; SLM-NEXT: [[R2:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3)
1009 ; SLM-NEXT: [[R3:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A3]], i32 [[B3]], i32 3)
1010 ; SLM-NEXT: [[R4:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A4]], i32 [[B4]], i32 3)
1011 ; SLM-NEXT: [[R5:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A5]], i32 [[B5]], i32 3)
1012 ; SLM-NEXT: [[R6:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A6]], i32 [[B6]], i32 3)
1013 ; SLM-NEXT: [[R7:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A7]], i32 [[B7]], i32 3)
1014 ; SLM-NEXT: [[R8:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A8]], i32 [[B8]], i32 3)
1015 ; SLM-NEXT: [[R9:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A9]], i32 [[B9]], i32 3)
1016 ; SLM-NEXT: [[R10:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A10]], i32 [[B10]], i32 3)
1017 ; SLM-NEXT: [[R11:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A11]], i32 [[B11]], i32 3)
1018 ; SLM-NEXT: [[R12:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A12]], i32 [[B12]], i32 3)
1019 ; SLM-NEXT: [[R13:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3)
1020 ; SLM-NEXT: [[R14:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3)
1021 ; SLM-NEXT: [[R15:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3)
1022 ; SLM-NEXT: store i32 [[R0]], ptr @c32, align 4
1023 ; SLM-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
1024 ; SLM-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
1025 ; SLM-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
1026 ; SLM-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
1027 ; SLM-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
1028 ; SLM-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
1029 ; SLM-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
1030 ; SLM-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
1031 ; SLM-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
1032 ; SLM-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
1033 ; SLM-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
1034 ; SLM-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
1035 ; SLM-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
1036 ; SLM-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
1037 ; SLM-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
1038 ; SLM-NEXT: ret void
1040 ; AVX-LABEL: @umul_v16i32(
1041 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
1042 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
1043 ; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], i32 3)
1044 ; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
1045 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
1046 ; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
1047 ; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]], i32 3)
1048 ; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
1049 ; AVX-NEXT: ret void
1051 ; AVX512-LABEL: @umul_v16i32(
1052 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
1053 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
1054 ; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 3)
1055 ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
1056 ; AVX512-NEXT: ret void
1058 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
1059 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
1060 %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
1061 %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
1062 %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
1063 %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
1064 %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
1065 %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
1066 %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
1067 %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
1068 %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
1069 %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
1070 %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
1071 %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
1072 %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
1073 %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
1074 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
1075 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
1076 %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
1077 %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
1078 %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
1079 %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
1080 %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
1081 %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
1082 %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
1083 %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
1084 %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
1085 %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
1086 %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
1087 %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
1088 %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
1089 %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
1090 %r0 = call i32 @llvm.umul.fix.i32(i32 %a0 , i32 %b0 , i32 3)
1091 %r1 = call i32 @llvm.umul.fix.i32(i32 %a1 , i32 %b1 , i32 3)
1092 %r2 = call i32 @llvm.umul.fix.i32(i32 %a2 , i32 %b2 , i32 3)
1093 %r3 = call i32 @llvm.umul.fix.i32(i32 %a3 , i32 %b3 , i32 3)
1094 %r4 = call i32 @llvm.umul.fix.i32(i32 %a4 , i32 %b4 , i32 3)
1095 %r5 = call i32 @llvm.umul.fix.i32(i32 %a5 , i32 %b5 , i32 3)
1096 %r6 = call i32 @llvm.umul.fix.i32(i32 %a6 , i32 %b6 , i32 3)
1097 %r7 = call i32 @llvm.umul.fix.i32(i32 %a7 , i32 %b7 , i32 3)
1098 %r8 = call i32 @llvm.umul.fix.i32(i32 %a8 , i32 %b8 , i32 3)
1099 %r9 = call i32 @llvm.umul.fix.i32(i32 %a9 , i32 %b9 , i32 3)
1100 %r10 = call i32 @llvm.umul.fix.i32(i32 %a10, i32 %b10, i32 3)
1101 %r11 = call i32 @llvm.umul.fix.i32(i32 %a11, i32 %b11, i32 3)
1102 %r12 = call i32 @llvm.umul.fix.i32(i32 %a12, i32 %b12, i32 3)
1103 %r13 = call i32 @llvm.umul.fix.i32(i32 %a13, i32 %b13, i32 3)
1104 %r14 = call i32 @llvm.umul.fix.i32(i32 %a14, i32 %b14, i32 3)
1105 %r15 = call i32 @llvm.umul.fix.i32(i32 %a15, i32 %b15, i32 3)
1106 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
1107 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
1108 store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
1109 store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
1110 store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
1111 store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
1112 store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
1113 store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
1114 store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
1115 store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
1116 store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
1117 store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
1118 store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
1119 store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
1120 store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
1121 store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
1125 define void @umul_v32i16() {
1126 ; SSE-LABEL: @umul_v32i16(
1127 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
1128 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
1129 ; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
1130 ; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
1131 ; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
1132 ; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
1133 ; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
1134 ; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
1135 ; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
1136 ; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
1137 ; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
1138 ; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
1139 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
1140 ; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
1141 ; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
1142 ; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
1143 ; SSE-NEXT: ret void
1145 ; SLM-LABEL: @umul_v32i16(
1146 ; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
1147 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
1148 ; SLM-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
1149 ; SLM-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
1150 ; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
1151 ; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
1152 ; SLM-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
1153 ; SLM-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
1154 ; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
1155 ; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
1156 ; SLM-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
1157 ; SLM-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
1158 ; SLM-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
1159 ; SLM-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
1160 ; SLM-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
1161 ; SLM-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
1162 ; SLM-NEXT: ret void
1164 ; AVX-LABEL: @umul_v32i16(
1165 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
1166 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
1167 ; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]], i32 3)
1168 ; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
1169 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
1170 ; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
1171 ; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]], i32 3)
1172 ; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
1173 ; AVX-NEXT: ret void
1175 ; AVX512-LABEL: @umul_v32i16(
1176 ; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
1177 ; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
1178 ; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]], i32 3)
1179 ; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
1180 ; AVX512-NEXT: ret void
1182 %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
1183 %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
1184 %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
1185 %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
1186 %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
1187 %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
1188 %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
1189 %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
1190 %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
1191 %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
1192 %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
1193 %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
1194 %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
1195 %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
1196 %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
1197 %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
1198 %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
1199 %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
1200 %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
1201 %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
1202 %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
1203 %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
1204 %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
1205 %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
1206 %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
1207 %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
1208 %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
1209 %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
1210 %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
1211 %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
1212 %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
1213 %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
1214 %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
1215 %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
1216 %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
1217 %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
1218 %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
1219 %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
1220 %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
1221 %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
1222 %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
1223 %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
1224 %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
1225 %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
1226 %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
1227 %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
1228 %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
1229 %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
1230 %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
1231 %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
1232 %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
1233 %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
1234 %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
1235 %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
1236 %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
1237 %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
1238 %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
1239 %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
1240 %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
1241 %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
1242 %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
1243 %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
1244 %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
1245 %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
1246 %r0 = call i16 @llvm.umul.fix.i16(i16 %a0 , i16 %b0 , i32 3)
1247 %r1 = call i16 @llvm.umul.fix.i16(i16 %a1 , i16 %b1 , i32 3)
1248 %r2 = call i16 @llvm.umul.fix.i16(i16 %a2 , i16 %b2 , i32 3)
1249 %r3 = call i16 @llvm.umul.fix.i16(i16 %a3 , i16 %b3 , i32 3)
1250 %r4 = call i16 @llvm.umul.fix.i16(i16 %a4 , i16 %b4 , i32 3)
1251 %r5 = call i16 @llvm.umul.fix.i16(i16 %a5 , i16 %b5 , i32 3)
1252 %r6 = call i16 @llvm.umul.fix.i16(i16 %a6 , i16 %b6 , i32 3)
1253 %r7 = call i16 @llvm.umul.fix.i16(i16 %a7 , i16 %b7 , i32 3)
1254 %r8 = call i16 @llvm.umul.fix.i16(i16 %a8 , i16 %b8 , i32 3)
1255 %r9 = call i16 @llvm.umul.fix.i16(i16 %a9 , i16 %b9 , i32 3)
1256 %r10 = call i16 @llvm.umul.fix.i16(i16 %a10, i16 %b10, i32 3)
1257 %r11 = call i16 @llvm.umul.fix.i16(i16 %a11, i16 %b11, i32 3)
1258 %r12 = call i16 @llvm.umul.fix.i16(i16 %a12, i16 %b12, i32 3)
1259 %r13 = call i16 @llvm.umul.fix.i16(i16 %a13, i16 %b13, i32 3)
1260 %r14 = call i16 @llvm.umul.fix.i16(i16 %a14, i16 %b14, i32 3)
1261 %r15 = call i16 @llvm.umul.fix.i16(i16 %a15, i16 %b15, i32 3)
1262 %r16 = call i16 @llvm.umul.fix.i16(i16 %a16, i16 %b16, i32 3)
1263 %r17 = call i16 @llvm.umul.fix.i16(i16 %a17, i16 %b17, i32 3)
1264 %r18 = call i16 @llvm.umul.fix.i16(i16 %a18, i16 %b18, i32 3)
1265 %r19 = call i16 @llvm.umul.fix.i16(i16 %a19, i16 %b19, i32 3)
1266 %r20 = call i16 @llvm.umul.fix.i16(i16 %a20, i16 %b20, i32 3)
1267 %r21 = call i16 @llvm.umul.fix.i16(i16 %a21, i16 %b21, i32 3)
1268 %r22 = call i16 @llvm.umul.fix.i16(i16 %a22, i16 %b22, i32 3)
1269 %r23 = call i16 @llvm.umul.fix.i16(i16 %a23, i16 %b23, i32 3)
1270 %r24 = call i16 @llvm.umul.fix.i16(i16 %a24, i16 %b24, i32 3)
1271 %r25 = call i16 @llvm.umul.fix.i16(i16 %a25, i16 %b25, i32 3)
1272 %r26 = call i16 @llvm.umul.fix.i16(i16 %a26, i16 %b26, i32 3)
1273 %r27 = call i16 @llvm.umul.fix.i16(i16 %a27, i16 %b27, i32 3)
1274 %r28 = call i16 @llvm.umul.fix.i16(i16 %a28, i16 %b28, i32 3)
1275 %r29 = call i16 @llvm.umul.fix.i16(i16 %a29, i16 %b29, i32 3)
1276 %r30 = call i16 @llvm.umul.fix.i16(i16 %a30, i16 %b30, i32 3)
1277 %r31 = call i16 @llvm.umul.fix.i16(i16 %a31, i16 %b31, i32 3)
1278 store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
1279 store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
1280 store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
1281 store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
1282 store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
1283 store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
1284 store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
1285 store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
1286 store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
1287 store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
1288 store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
1289 store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
1290 store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
1291 store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
1292 store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
1293 store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
1294 store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
1295 store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
1296 store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
1297 store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
1298 store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
1299 store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
1300 store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
1301 store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
1302 store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
1303 store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
1304 store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
1305 store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
1306 store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
1307 store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
1308 store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
1309 store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
1313 define void @umul_v64i8() {
1314 ; SSE-LABEL: @umul_v64i8(
1315 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
1316 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
1317 ; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
1318 ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
1319 ; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
1320 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
1321 ; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
1322 ; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
1323 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
1324 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
1325 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
1326 ; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
1327 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
1328 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
1329 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
1330 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
1331 ; SSE-NEXT: ret void
1333 ; SLM-LABEL: @umul_v64i8(
1334 ; SLM-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
1335 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
1336 ; SLM-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
1337 ; SLM-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
1338 ; SLM-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
1339 ; SLM-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
1340 ; SLM-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
1341 ; SLM-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
1342 ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
1343 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
1344 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
1345 ; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
1346 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
1347 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
1348 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
1349 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
1350 ; SLM-NEXT: ret void
1352 ; AVX-LABEL: @umul_v64i8(
1353 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
1354 ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
1355 ; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], i32 3)
1356 ; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
1357 ; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
1358 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
1359 ; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]], i32 3)
1360 ; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
1361 ; AVX-NEXT: ret void
1363 ; AVX512-LABEL: @umul_v64i8(
1364 ; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
1365 ; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
1366 ; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]], i32 3)
1367 ; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
1368 ; AVX512-NEXT: ret void
1370 %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
1371 %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
1372 %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
1373 %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
1374 %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
1375 %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
1376 %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
1377 %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
1378 %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
1379 %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
1380 %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
1381 %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
1382 %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
1383 %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
1384 %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
1385 %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
1386 %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
1387 %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
1388 %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
1389 %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
1390 %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
1391 %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
1392 %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
1393 %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
1394 %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
1395 %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
1396 %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
1397 %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
1398 %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
1399 %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
1400 %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
1401 %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
1402 %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
1403 %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
1404 %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
1405 %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
1406 %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
1407 %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
1408 %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
1409 %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
1410 %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
1411 %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
1412 %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
1413 %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
1414 %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
1415 %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
1416 %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
1417 %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
1418 %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
1419 %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
1420 %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
1421 %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
1422 %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
1423 %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
1424 %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
1425 %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
1426 %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
1427 %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
1428 %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
1429 %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
1430 %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
1431 %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
1432 %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
1433 %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
1434 %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
1435 %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
1436 %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
1437 %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
1438 %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
1439 %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
1440 %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
1441 %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
1442 %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
1443 %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
1444 %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
1445 %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
1446 %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
1447 %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
1448 %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
1449 %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
1450 %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
1451 %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
1452 %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
1453 %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
1454 %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
1455 %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
1456 %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
1457 %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
1458 %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
1459 %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
1460 %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
1461 %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
1462 %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
1463 %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
1464 %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
1465 %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
1466 %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
1467 %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
1468 %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
1469 %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
1470 %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
1471 %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
1472 %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
1473 %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
1474 %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
1475 %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
1476 %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
1477 %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
1478 %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
1479 %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
1480 %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
1481 %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
1482 %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
1483 %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
1484 %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
1485 %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
1486 %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
1487 %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
1488 %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
1489 %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
1490 %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
1491 %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
1492 %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
1493 %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
1494 %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
1495 %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
1496 %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
1497 %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
1498 %r0 = call i8 @llvm.umul.fix.i8(i8 %a0 , i8 %b0 , i32 3)
1499 %r1 = call i8 @llvm.umul.fix.i8(i8 %a1 , i8 %b1 , i32 3)
1500 %r2 = call i8 @llvm.umul.fix.i8(i8 %a2 , i8 %b2 , i32 3)
1501 %r3 = call i8 @llvm.umul.fix.i8(i8 %a3 , i8 %b3 , i32 3)
1502 %r4 = call i8 @llvm.umul.fix.i8(i8 %a4 , i8 %b4 , i32 3)
1503 %r5 = call i8 @llvm.umul.fix.i8(i8 %a5 , i8 %b5 , i32 3)
1504 %r6 = call i8 @llvm.umul.fix.i8(i8 %a6 , i8 %b6 , i32 3)
1505 %r7 = call i8 @llvm.umul.fix.i8(i8 %a7 , i8 %b7 , i32 3)
1506 %r8 = call i8 @llvm.umul.fix.i8(i8 %a8 , i8 %b8 , i32 3)
1507 %r9 = call i8 @llvm.umul.fix.i8(i8 %a9 , i8 %b9 , i32 3)
1508 %r10 = call i8 @llvm.umul.fix.i8(i8 %a10, i8 %b10, i32 3)
1509 %r11 = call i8 @llvm.umul.fix.i8(i8 %a11, i8 %b11, i32 3)
1510 %r12 = call i8 @llvm.umul.fix.i8(i8 %a12, i8 %b12, i32 3)
1511 %r13 = call i8 @llvm.umul.fix.i8(i8 %a13, i8 %b13, i32 3)
1512 %r14 = call i8 @llvm.umul.fix.i8(i8 %a14, i8 %b14, i32 3)
1513 %r15 = call i8 @llvm.umul.fix.i8(i8 %a15, i8 %b15, i32 3)
1514 %r16 = call i8 @llvm.umul.fix.i8(i8 %a16, i8 %b16, i32 3)
1515 %r17 = call i8 @llvm.umul.fix.i8(i8 %a17, i8 %b17, i32 3)
1516 %r18 = call i8 @llvm.umul.fix.i8(i8 %a18, i8 %b18, i32 3)
1517 %r19 = call i8 @llvm.umul.fix.i8(i8 %a19, i8 %b19, i32 3)
1518 %r20 = call i8 @llvm.umul.fix.i8(i8 %a20, i8 %b20, i32 3)
1519 %r21 = call i8 @llvm.umul.fix.i8(i8 %a21, i8 %b21, i32 3)
1520 %r22 = call i8 @llvm.umul.fix.i8(i8 %a22, i8 %b22, i32 3)
1521 %r23 = call i8 @llvm.umul.fix.i8(i8 %a23, i8 %b23, i32 3)
1522 %r24 = call i8 @llvm.umul.fix.i8(i8 %a24, i8 %b24, i32 3)
1523 %r25 = call i8 @llvm.umul.fix.i8(i8 %a25, i8 %b25, i32 3)
1524 %r26 = call i8 @llvm.umul.fix.i8(i8 %a26, i8 %b26, i32 3)
1525 %r27 = call i8 @llvm.umul.fix.i8(i8 %a27, i8 %b27, i32 3)
1526 %r28 = call i8 @llvm.umul.fix.i8(i8 %a28, i8 %b28, i32 3)
1527 %r29 = call i8 @llvm.umul.fix.i8(i8 %a29, i8 %b29, i32 3)
1528 %r30 = call i8 @llvm.umul.fix.i8(i8 %a30, i8 %b30, i32 3)
1529 %r31 = call i8 @llvm.umul.fix.i8(i8 %a31, i8 %b31, i32 3)
1530 %r32 = call i8 @llvm.umul.fix.i8(i8 %a32, i8 %b32, i32 3)
1531 %r33 = call i8 @llvm.umul.fix.i8(i8 %a33, i8 %b33, i32 3)
1532 %r34 = call i8 @llvm.umul.fix.i8(i8 %a34, i8 %b34, i32 3)
1533 %r35 = call i8 @llvm.umul.fix.i8(i8 %a35, i8 %b35, i32 3)
1534 %r36 = call i8 @llvm.umul.fix.i8(i8 %a36, i8 %b36, i32 3)
1535 %r37 = call i8 @llvm.umul.fix.i8(i8 %a37, i8 %b37, i32 3)
1536 %r38 = call i8 @llvm.umul.fix.i8(i8 %a38, i8 %b38, i32 3)
1537 %r39 = call i8 @llvm.umul.fix.i8(i8 %a39, i8 %b39, i32 3)
1538 %r40 = call i8 @llvm.umul.fix.i8(i8 %a40, i8 %b40, i32 3)
1539 %r41 = call i8 @llvm.umul.fix.i8(i8 %a41, i8 %b41, i32 3)
1540 %r42 = call i8 @llvm.umul.fix.i8(i8 %a42, i8 %b42, i32 3)
1541 %r43 = call i8 @llvm.umul.fix.i8(i8 %a43, i8 %b43, i32 3)
1542 %r44 = call i8 @llvm.umul.fix.i8(i8 %a44, i8 %b44, i32 3)
1543 %r45 = call i8 @llvm.umul.fix.i8(i8 %a45, i8 %b45, i32 3)
1544 %r46 = call i8 @llvm.umul.fix.i8(i8 %a46, i8 %b46, i32 3)
1545 %r47 = call i8 @llvm.umul.fix.i8(i8 %a47, i8 %b47, i32 3)
1546 %r48 = call i8 @llvm.umul.fix.i8(i8 %a48, i8 %b48, i32 3)
1547 %r49 = call i8 @llvm.umul.fix.i8(i8 %a49, i8 %b49, i32 3)
1548 %r50 = call i8 @llvm.umul.fix.i8(i8 %a50, i8 %b50, i32 3)
1549 %r51 = call i8 @llvm.umul.fix.i8(i8 %a51, i8 %b51, i32 3)
1550 %r52 = call i8 @llvm.umul.fix.i8(i8 %a52, i8 %b52, i32 3)
1551 %r53 = call i8 @llvm.umul.fix.i8(i8 %a53, i8 %b53, i32 3)
1552 %r54 = call i8 @llvm.umul.fix.i8(i8 %a54, i8 %b54, i32 3)
1553 %r55 = call i8 @llvm.umul.fix.i8(i8 %a55, i8 %b55, i32 3)
1554 %r56 = call i8 @llvm.umul.fix.i8(i8 %a56, i8 %b56, i32 3)
1555 %r57 = call i8 @llvm.umul.fix.i8(i8 %a57, i8 %b57, i32 3)
1556 %r58 = call i8 @llvm.umul.fix.i8(i8 %a58, i8 %b58, i32 3)
1557 %r59 = call i8 @llvm.umul.fix.i8(i8 %a59, i8 %b59, i32 3)
1558 %r60 = call i8 @llvm.umul.fix.i8(i8 %a60, i8 %b60, i32 3)
1559 %r61 = call i8 @llvm.umul.fix.i8(i8 %a61, i8 %b61, i32 3)
1560 %r62 = call i8 @llvm.umul.fix.i8(i8 %a62, i8 %b62, i32 3)
1561 %r63 = call i8 @llvm.umul.fix.i8(i8 %a63, i8 %b63, i32 3)
1562 store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
1563 store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
1564 store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
1565 store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
1566 store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
1567 store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
1568 store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
1569 store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
1570 store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
1571 store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
1572 store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
1573 store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
1574 store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
1575 store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
1576 store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
1577 store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
1578 store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
1579 store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
1580 store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
1581 store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
1582 store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
1583 store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
1584 store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
1585 store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
1586 store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
1587 store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
1588 store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
1589 store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
1590 store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
1591 store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
1592 store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
1593 store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
1594 store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
1595 store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
1596 store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
1597 store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
1598 store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
1599 store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
1600 store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
1601 store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
1602 store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
1603 store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
1604 store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
1605 store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
1606 store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
1607 store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
1608 store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
1609 store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
1610 store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
1611 store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
1612 store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
1613 store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
1614 store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
1615 store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
1616 store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
1617 store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
1618 store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
1619 store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
1620 store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
1621 store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
1622 store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
1623 store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
1624 store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
1625 store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1