1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
8 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
9 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
10 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
12 @a64 = common global [8 x i64] zeroinitializer, align 64
13 @b64 = common global [8 x i64] zeroinitializer, align 64
14 @c64 = common global [8 x i64] zeroinitializer, align 64
15 @a32 = common global [16 x i32] zeroinitializer, align 64
16 @b32 = common global [16 x i32] zeroinitializer, align 64
17 @c32 = common global [16 x i32] zeroinitializer, align 64
18 @a16 = common global [32 x i16] zeroinitializer, align 64
19 @b16 = common global [32 x i16] zeroinitializer, align 64
20 @c16 = common global [32 x i16] zeroinitializer, align 64
21 @a8 = common global [64 x i8] zeroinitializer, align 64
22 @b8 = common global [64 x i8] zeroinitializer, align 64
23 @c8 = common global [64 x i8] zeroinitializer, align 64
25 define void @sub_v8i64() {
26 ; SSE-LABEL: @sub_v8i64(
27 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
28 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
29 ; SSE-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
30 ; SSE-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
31 ; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
32 ; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
33 ; SSE-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
34 ; SSE-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
35 ; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
36 ; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
37 ; SSE-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP7]], [[TMP8]]
38 ; SSE-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
39 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
40 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
41 ; SSE-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[TMP10]], [[TMP11]]
42 ; SSE-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
45 ; SLM-LABEL: @sub_v8i64(
46 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
47 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
48 ; SLM-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
49 ; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
50 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
51 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
52 ; SLM-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
53 ; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
54 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
55 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
56 ; SLM-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP7]], [[TMP8]]
57 ; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
58 ; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
59 ; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
60 ; SLM-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[TMP10]], [[TMP11]]
61 ; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
64 ; AVX-LABEL: @sub_v8i64(
65 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
66 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
67 ; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
68 ; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
69 ; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
70 ; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
71 ; AVX-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[TMP4]], [[TMP5]]
72 ; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
75 ; AVX512-LABEL: @sub_v8i64(
76 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
77 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
78 ; AVX512-NEXT: [[TMP3:%.*]] = sub <8 x i64> [[TMP1]], [[TMP2]]
79 ; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
80 ; AVX512-NEXT: ret void
82 %a0 = load i64, ptr @a64, align 8
83 %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
84 %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
85 %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
86 %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
87 %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
88 %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
89 %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
90 %b0 = load i64, ptr @b64, align 8
91 %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
92 %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
93 %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
94 %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
95 %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
96 %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
97 %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
98 %r0 = sub i64 %a0, %b0
99 %r1 = sub i64 %a1, %b1
100 %r2 = sub i64 %a2, %b2
101 %r3 = sub i64 %a3, %b3
102 %r4 = sub i64 %a4, %b4
103 %r5 = sub i64 %a5, %b5
104 %r6 = sub i64 %a6, %b6
105 %r7 = sub i64 %a7, %b7
106 store i64 %r0, ptr @c64, align 8
107 store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
108 store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
109 store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
110 store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
111 store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
112 store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
113 store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
117 define void @sub_v16i32() {
118 ; SSE-LABEL: @sub_v16i32(
119 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
120 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
121 ; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
122 ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
123 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
124 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
125 ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]]
126 ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
127 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
128 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
129 ; SSE-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP7]], [[TMP8]]
130 ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
131 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
132 ; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
133 ; SSE-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP10]], [[TMP11]]
134 ; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
137 ; SLM-LABEL: @sub_v16i32(
138 ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
139 ; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
140 ; SLM-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
141 ; SLM-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
142 ; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
143 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
144 ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]]
145 ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
146 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
147 ; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
148 ; SLM-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP7]], [[TMP8]]
149 ; SLM-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
150 ; SLM-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
151 ; SLM-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
152 ; SLM-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP10]], [[TMP11]]
153 ; SLM-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
156 ; AVX-LABEL: @sub_v16i32(
157 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
158 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
159 ; AVX-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
160 ; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
161 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
162 ; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
163 ; AVX-NEXT: [[TMP6:%.*]] = sub <8 x i32> [[TMP4]], [[TMP5]]
164 ; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
167 ; AVX512-LABEL: @sub_v16i32(
168 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
169 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
170 ; AVX512-NEXT: [[TMP3:%.*]] = sub <16 x i32> [[TMP1]], [[TMP2]]
171 ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
172 ; AVX512-NEXT: ret void
174 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
175 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
176 %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
177 %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
178 %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
179 %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
180 %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
181 %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
182 %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
183 %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
184 %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
185 %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
186 %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
187 %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
188 %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
189 %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
190 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
191 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
192 %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
193 %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
194 %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
195 %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
196 %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
197 %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
198 %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
199 %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
200 %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
201 %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
202 %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
203 %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
204 %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
205 %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
206 %r0 = sub i32 %a0 , %b0
207 %r1 = sub i32 %a1 , %b1
208 %r2 = sub i32 %a2 , %b2
209 %r3 = sub i32 %a3 , %b3
210 %r4 = sub i32 %a4 , %b4
211 %r5 = sub i32 %a5 , %b5
212 %r6 = sub i32 %a6 , %b6
213 %r7 = sub i32 %a7 , %b7
214 %r8 = sub i32 %a8 , %b8
215 %r9 = sub i32 %a9 , %b9
216 %r10 = sub i32 %a10, %b10
217 %r11 = sub i32 %a11, %b11
218 %r12 = sub i32 %a12, %b12
219 %r13 = sub i32 %a13, %b13
220 %r14 = sub i32 %a14, %b14
221 %r15 = sub i32 %a15, %b15
222 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
223 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
224 store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
225 store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
226 store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
227 store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
228 store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
229 store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
230 store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
231 store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
232 store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
233 store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
234 store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
235 store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
236 store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
237 store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
241 define void @sub_v32i16() {
242 ; SSE-LABEL: @sub_v32i16(
243 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
244 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
245 ; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
246 ; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
247 ; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
248 ; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
249 ; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
250 ; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
251 ; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
252 ; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
253 ; SSE-NEXT: [[TMP9:%.*]] = sub <8 x i16> [[TMP7]], [[TMP8]]
254 ; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
255 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
256 ; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
257 ; SSE-NEXT: [[TMP12:%.*]] = sub <8 x i16> [[TMP10]], [[TMP11]]
258 ; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
261 ; SLM-LABEL: @sub_v32i16(
262 ; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
263 ; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
264 ; SLM-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
265 ; SLM-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
266 ; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
267 ; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
268 ; SLM-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
269 ; SLM-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
270 ; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
271 ; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
272 ; SLM-NEXT: [[TMP9:%.*]] = sub <8 x i16> [[TMP7]], [[TMP8]]
273 ; SLM-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
274 ; SLM-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
275 ; SLM-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
276 ; SLM-NEXT: [[TMP12:%.*]] = sub <8 x i16> [[TMP10]], [[TMP11]]
277 ; SLM-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
280 ; AVX-LABEL: @sub_v32i16(
281 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
282 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
283 ; AVX-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]]
284 ; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
285 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
286 ; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
287 ; AVX-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
288 ; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
291 ; AVX512-LABEL: @sub_v32i16(
292 ; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
293 ; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
294 ; AVX512-NEXT: [[TMP3:%.*]] = sub <32 x i16> [[TMP1]], [[TMP2]]
295 ; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
296 ; AVX512-NEXT: ret void
298 %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
299 %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
300 %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
301 %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
302 %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
303 %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
304 %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
305 %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
306 %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
307 %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
308 %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
309 %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
310 %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
311 %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
312 %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
313 %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
314 %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
315 %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
316 %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
317 %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
318 %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
319 %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
320 %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
321 %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
322 %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
323 %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
324 %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
325 %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
326 %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
327 %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
328 %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
329 %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
330 %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
331 %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
332 %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
333 %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
334 %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
335 %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
336 %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
337 %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
338 %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
339 %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
340 %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
341 %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
342 %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
343 %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
344 %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
345 %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
346 %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
347 %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
348 %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
349 %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
350 %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
351 %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
352 %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
353 %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
354 %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
355 %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
356 %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
357 %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
358 %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
359 %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
360 %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
361 %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
362 %r0 = sub i16 %a0 , %b0
363 %r1 = sub i16 %a1 , %b1
364 %r2 = sub i16 %a2 , %b2
365 %r3 = sub i16 %a3 , %b3
366 %r4 = sub i16 %a4 , %b4
367 %r5 = sub i16 %a5 , %b5
368 %r6 = sub i16 %a6 , %b6
369 %r7 = sub i16 %a7 , %b7
370 %r8 = sub i16 %a8 , %b8
371 %r9 = sub i16 %a9 , %b9
372 %r10 = sub i16 %a10, %b10
373 %r11 = sub i16 %a11, %b11
374 %r12 = sub i16 %a12, %b12
375 %r13 = sub i16 %a13, %b13
376 %r14 = sub i16 %a14, %b14
377 %r15 = sub i16 %a15, %b15
378 %r16 = sub i16 %a16, %b16
379 %r17 = sub i16 %a17, %b17
380 %r18 = sub i16 %a18, %b18
381 %r19 = sub i16 %a19, %b19
382 %r20 = sub i16 %a20, %b20
383 %r21 = sub i16 %a21, %b21
384 %r22 = sub i16 %a22, %b22
385 %r23 = sub i16 %a23, %b23
386 %r24 = sub i16 %a24, %b24
387 %r25 = sub i16 %a25, %b25
388 %r26 = sub i16 %a26, %b26
389 %r27 = sub i16 %a27, %b27
390 %r28 = sub i16 %a28, %b28
391 %r29 = sub i16 %a29, %b29
392 %r30 = sub i16 %a30, %b30
393 %r31 = sub i16 %a31, %b31
394 store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
395 store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
396 store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
397 store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
398 store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
399 store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
400 store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
401 store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
402 store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
403 store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
404 store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
405 store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
406 store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
407 store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
408 store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
409 store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
410 store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
411 store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
412 store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
413 store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
414 store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
415 store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
416 store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
417 store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
418 store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
419 store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
420 store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
421 store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
422 store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
423 store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
424 store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
425 store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
429 define void @sub_v64i8() {
430 ; SSE-LABEL: @sub_v64i8(
431 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
432 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
433 ; SSE-NEXT: [[TMP3:%.*]] = sub <16 x i8> [[TMP1]], [[TMP2]]
434 ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
435 ; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
436 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
437 ; SSE-NEXT: [[TMP6:%.*]] = sub <16 x i8> [[TMP4]], [[TMP5]]
438 ; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
439 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
440 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
441 ; SSE-NEXT: [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]]
442 ; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
443 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
444 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
445 ; SSE-NEXT: [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]]
446 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
449 ; SLM-LABEL: @sub_v64i8(
450 ; SLM-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
451 ; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
452 ; SLM-NEXT: [[TMP3:%.*]] = sub <16 x i8> [[TMP1]], [[TMP2]]
453 ; SLM-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
454 ; SLM-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
455 ; SLM-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
456 ; SLM-NEXT: [[TMP6:%.*]] = sub <16 x i8> [[TMP4]], [[TMP5]]
457 ; SLM-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
458 ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
459 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
460 ; SLM-NEXT: [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]]
461 ; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
462 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
463 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
464 ; SLM-NEXT: [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]]
465 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
468 ; AVX-LABEL: @sub_v64i8(
469 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
470 ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
471 ; AVX-NEXT: [[TMP3:%.*]] = sub <32 x i8> [[TMP1]], [[TMP2]]
472 ; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
473 ; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
474 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
475 ; AVX-NEXT: [[TMP6:%.*]] = sub <32 x i8> [[TMP4]], [[TMP5]]
476 ; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
479 ; AVX512-LABEL: @sub_v64i8(
480 ; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
481 ; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
482 ; AVX512-NEXT: [[TMP3:%.*]] = sub <64 x i8> [[TMP1]], [[TMP2]]
483 ; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
484 ; AVX512-NEXT: ret void
486 %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
487 %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
488 %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
489 %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
490 %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
491 %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
492 %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
493 %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
494 %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
495 %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
496 %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
497 %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
498 %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
499 %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
500 %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
501 %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
502 %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
503 %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
504 %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
505 %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
506 %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
507 %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
508 %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
509 %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
510 %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
511 %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
512 %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
513 %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
514 %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
515 %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
516 %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
517 %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
518 %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
519 %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
520 %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
521 %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
522 %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
523 %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
524 %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
525 %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
526 %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
527 %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
528 %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
529 %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
530 %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
531 %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
532 %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
533 %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
534 %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
535 %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
536 %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
537 %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
538 %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
539 %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
540 %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
541 %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
542 %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
543 %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
544 %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
545 %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
546 %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
547 %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
548 %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
549 %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
550 %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
551 %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
552 %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
553 %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
554 %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
555 %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
556 %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
557 %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
558 %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
559 %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
560 %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
561 %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
562 %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
563 %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
564 %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
565 %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
566 %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
567 %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
568 %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
569 %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
570 %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
571 %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
572 %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
573 %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
574 %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
575 %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
576 %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
577 %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
578 %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
579 %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
580 %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
581 %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
582 %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
583 %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
584 %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
585 %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
586 %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
587 %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
588 %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
589 %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
590 %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
591 %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
592 %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
593 %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
594 %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
595 %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
596 %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
597 %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
598 %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
599 %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
600 %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
601 %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
602 %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
603 %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
604 %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
605 %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
606 %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
607 %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
608 %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
609 %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
610 %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
611 %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
612 %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
613 %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
614 %r0 = sub i8 %a0 , %b0
615 %r1 = sub i8 %a1 , %b1
616 %r2 = sub i8 %a2 , %b2
617 %r3 = sub i8 %a3 , %b3
618 %r4 = sub i8 %a4 , %b4
619 %r5 = sub i8 %a5 , %b5
620 %r6 = sub i8 %a6 , %b6
621 %r7 = sub i8 %a7 , %b7
622 %r8 = sub i8 %a8 , %b8
623 %r9 = sub i8 %a9 , %b9
624 %r10 = sub i8 %a10, %b10
625 %r11 = sub i8 %a11, %b11
626 %r12 = sub i8 %a12, %b12
627 %r13 = sub i8 %a13, %b13
628 %r14 = sub i8 %a14, %b14
629 %r15 = sub i8 %a15, %b15
630 %r16 = sub i8 %a16, %b16
631 %r17 = sub i8 %a17, %b17
632 %r18 = sub i8 %a18, %b18
633 %r19 = sub i8 %a19, %b19
634 %r20 = sub i8 %a20, %b20
635 %r21 = sub i8 %a21, %b21
636 %r22 = sub i8 %a22, %b22
637 %r23 = sub i8 %a23, %b23
638 %r24 = sub i8 %a24, %b24
639 %r25 = sub i8 %a25, %b25
640 %r26 = sub i8 %a26, %b26
641 %r27 = sub i8 %a27, %b27
642 %r28 = sub i8 %a28, %b28
643 %r29 = sub i8 %a29, %b29
644 %r30 = sub i8 %a30, %b30
645 %r31 = sub i8 %a31, %b31
646 %r32 = sub i8 %a32, %b32
647 %r33 = sub i8 %a33, %b33
648 %r34 = sub i8 %a34, %b34
649 %r35 = sub i8 %a35, %b35
650 %r36 = sub i8 %a36, %b36
651 %r37 = sub i8 %a37, %b37
652 %r38 = sub i8 %a38, %b38
653 %r39 = sub i8 %a39, %b39
654 %r40 = sub i8 %a40, %b40
655 %r41 = sub i8 %a41, %b41
656 %r42 = sub i8 %a42, %b42
657 %r43 = sub i8 %a43, %b43
658 %r44 = sub i8 %a44, %b44
659 %r45 = sub i8 %a45, %b45
660 %r46 = sub i8 %a46, %b46
661 %r47 = sub i8 %a47, %b47
662 %r48 = sub i8 %a48, %b48
663 %r49 = sub i8 %a49, %b49
664 %r50 = sub i8 %a50, %b50
665 %r51 = sub i8 %a51, %b51
666 %r52 = sub i8 %a52, %b52
667 %r53 = sub i8 %a53, %b53
668 %r54 = sub i8 %a54, %b54
669 %r55 = sub i8 %a55, %b55
670 %r56 = sub i8 %a56, %b56
671 %r57 = sub i8 %a57, %b57
672 %r58 = sub i8 %a58, %b58
673 %r59 = sub i8 %a59, %b59
674 %r60 = sub i8 %a60, %b60
675 %r61 = sub i8 %a61, %b61
676 %r62 = sub i8 %a62, %b62
677 %r63 = sub i8 %a63, %b63
678 store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
679 store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
680 store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
681 store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
682 store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
683 store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
684 store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
685 store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
686 store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
687 store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
688 store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
689 store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
690 store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
691 store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
692 store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
693 store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
694 store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
695 store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
696 store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
697 store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
698 store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
699 store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
700 store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
701 store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
702 store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
703 store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
704 store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
705 store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
706 store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
707 store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
708 store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
709 store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
710 store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
711 store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
712 store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
713 store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
714 store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
715 store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
716 store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
717 store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
718 store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
719 store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
720 store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
721 store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
722 store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
723 store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
724 store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
725 store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
726 store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
727 store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
728 store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
729 store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
730 store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
731 store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
732 store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
733 store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
734 store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
735 store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
736 store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
737 store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
738 store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
739 store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
740 store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
741 store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1