1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
7 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
8 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
10 @a64 = common global [8 x i64] zeroinitializer, align 64
11 @b64 = common global [8 x i64] zeroinitializer, align 64
12 @c64 = common global [8 x i64] zeroinitializer, align 64
13 @a32 = common global [16 x i32] zeroinitializer, align 64
14 @b32 = common global [16 x i32] zeroinitializer, align 64
15 @c32 = common global [16 x i32] zeroinitializer, align 64
16 @a16 = common global [32 x i16] zeroinitializer, align 64
17 @b16 = common global [32 x i16] zeroinitializer, align 64
18 @c16 = common global [32 x i16] zeroinitializer, align 64
19 @a8 = common global [64 x i8] zeroinitializer, align 64
20 @b8 = common global [64 x i8] zeroinitializer, align 64
21 @c8 = common global [64 x i8] zeroinitializer, align 64
23 declare i64 @llvm.usub.sat.i64(i64, i64)
24 declare i32 @llvm.usub.sat.i32(i32, i32)
25 declare i16 @llvm.usub.sat.i16(i16, i16)
26 declare i8 @llvm.usub.sat.i8 (i8 , i8 )
28 define void @sub_v8i64() {
29 ; SSE2-LABEL: @sub_v8i64(
30 ; SSE2-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8
31 ; SSE2-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
32 ; SSE2-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
33 ; SSE2-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
34 ; SSE2-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
35 ; SSE2-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
36 ; SSE2-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
37 ; SSE2-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
38 ; SSE2-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8
39 ; SSE2-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
40 ; SSE2-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
41 ; SSE2-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
42 ; SSE2-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
43 ; SSE2-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
44 ; SSE2-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
45 ; SSE2-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
46 ; SSE2-NEXT: [[R0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A0]], i64 [[B0]])
47 ; SSE2-NEXT: [[R1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A1]], i64 [[B1]])
48 ; SSE2-NEXT: [[R2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A2]], i64 [[B2]])
49 ; SSE2-NEXT: [[R3:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A3]], i64 [[B3]])
50 ; SSE2-NEXT: [[R4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A4]], i64 [[B4]])
51 ; SSE2-NEXT: [[R5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A5]], i64 [[B5]])
52 ; SSE2-NEXT: [[R6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A6]], i64 [[B6]])
53 ; SSE2-NEXT: [[R7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A7]], i64 [[B7]])
54 ; SSE2-NEXT: store i64 [[R0]], ptr @c64, align 8
55 ; SSE2-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
56 ; SSE2-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
57 ; SSE2-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
58 ; SSE2-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
59 ; SSE2-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
60 ; SSE2-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
61 ; SSE2-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
64 ; SLM-LABEL: @sub_v8i64(
65 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
66 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
67 ; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
68 ; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
69 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
70 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
71 ; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
72 ; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
73 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
74 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
75 ; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
76 ; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
77 ; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
78 ; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
79 ; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
80 ; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
83 ; AVX-LABEL: @sub_v8i64(
84 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
85 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
86 ; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
87 ; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
88 ; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
89 ; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
90 ; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
91 ; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
94 ; AVX512-LABEL: @sub_v8i64(
95 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
96 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
97 ; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
98 ; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8
99 ; AVX512-NEXT: ret void
101 %a0 = load i64, ptr @a64, align 8
102 %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
103 %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
104 %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
105 %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
106 %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
107 %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
108 %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
109 %b0 = load i64, ptr @b64, align 8
110 %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
111 %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
112 %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
113 %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
114 %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
115 %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
116 %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
117 %r0 = call i64 @llvm.usub.sat.i64(i64 %a0, i64 %b0)
118 %r1 = call i64 @llvm.usub.sat.i64(i64 %a1, i64 %b1)
119 %r2 = call i64 @llvm.usub.sat.i64(i64 %a2, i64 %b2)
120 %r3 = call i64 @llvm.usub.sat.i64(i64 %a3, i64 %b3)
121 %r4 = call i64 @llvm.usub.sat.i64(i64 %a4, i64 %b4)
122 %r5 = call i64 @llvm.usub.sat.i64(i64 %a5, i64 %b5)
123 %r6 = call i64 @llvm.usub.sat.i64(i64 %a6, i64 %b6)
124 %r7 = call i64 @llvm.usub.sat.i64(i64 %a7, i64 %b7)
125 store i64 %r0, ptr @c64, align 8
126 store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
127 store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
128 store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
129 store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
130 store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
131 store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
132 store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
136 define void @sub_v16i32() {
137 ; SSE-LABEL: @sub_v16i32(
138 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
139 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
140 ; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
141 ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4
142 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
143 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
144 ; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
145 ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
146 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
147 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
148 ; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
149 ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
150 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
151 ; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
152 ; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
153 ; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
156 ; AVX-LABEL: @sub_v16i32(
157 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
158 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
159 ; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
160 ; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4
161 ; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
162 ; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
163 ; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
164 ; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
167 ; AVX512-LABEL: @sub_v16i32(
168 ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
169 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
170 ; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
171 ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4
172 ; AVX512-NEXT: ret void
174 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
175 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
176 %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
177 %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
178 %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
179 %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
180 %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
181 %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
182 %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
183 %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
184 %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
185 %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
186 %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
187 %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
188 %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
189 %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
190 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
191 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
192 %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
193 %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
194 %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
195 %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
196 %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
197 %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
198 %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
199 %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
200 %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
201 %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
202 %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
203 %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
204 %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
205 %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
206 %r0 = call i32 @llvm.usub.sat.i32(i32 %a0 , i32 %b0 )
207 %r1 = call i32 @llvm.usub.sat.i32(i32 %a1 , i32 %b1 )
208 %r2 = call i32 @llvm.usub.sat.i32(i32 %a2 , i32 %b2 )
209 %r3 = call i32 @llvm.usub.sat.i32(i32 %a3 , i32 %b3 )
210 %r4 = call i32 @llvm.usub.sat.i32(i32 %a4 , i32 %b4 )
211 %r5 = call i32 @llvm.usub.sat.i32(i32 %a5 , i32 %b5 )
212 %r6 = call i32 @llvm.usub.sat.i32(i32 %a6 , i32 %b6 )
213 %r7 = call i32 @llvm.usub.sat.i32(i32 %a7 , i32 %b7 )
214 %r8 = call i32 @llvm.usub.sat.i32(i32 %a8 , i32 %b8 )
215 %r9 = call i32 @llvm.usub.sat.i32(i32 %a9 , i32 %b9 )
216 %r10 = call i32 @llvm.usub.sat.i32(i32 %a10, i32 %b10)
217 %r11 = call i32 @llvm.usub.sat.i32(i32 %a11, i32 %b11)
218 %r12 = call i32 @llvm.usub.sat.i32(i32 %a12, i32 %b12)
219 %r13 = call i32 @llvm.usub.sat.i32(i32 %a13, i32 %b13)
220 %r14 = call i32 @llvm.usub.sat.i32(i32 %a14, i32 %b14)
221 %r15 = call i32 @llvm.usub.sat.i32(i32 %a15, i32 %b15)
222 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
223 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
224 store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
225 store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
226 store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
227 store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
228 store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
229 store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
230 store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
231 store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
232 store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
233 store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
234 store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
235 store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
236 store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
237 store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
241 define void @sub_v32i16() {
242 ; SSE-LABEL: @sub_v32i16(
243 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
244 ; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
245 ; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
246 ; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2
247 ; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
248 ; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
249 ; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
250 ; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
251 ; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
252 ; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
253 ; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
254 ; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
255 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
256 ; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
257 ; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
258 ; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
261 ; AVX-LABEL: @sub_v32i16(
262 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
263 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
264 ; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
265 ; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2
266 ; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
267 ; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
268 ; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
269 ; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
272 ; AVX512-LABEL: @sub_v32i16(
273 ; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
274 ; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
275 ; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
276 ; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2
277 ; AVX512-NEXT: ret void
279 %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
280 %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
281 %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
282 %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
283 %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
284 %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
285 %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
286 %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
287 %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
288 %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
289 %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
290 %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
291 %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
292 %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
293 %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
294 %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
295 %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
296 %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
297 %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
298 %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
299 %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
300 %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
301 %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
302 %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
303 %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
304 %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
305 %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
306 %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
307 %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
308 %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
309 %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
310 %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
311 %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
312 %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
313 %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
314 %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
315 %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
316 %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
317 %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
318 %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
319 %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
320 %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
321 %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
322 %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
323 %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
324 %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
325 %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
326 %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
327 %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
328 %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
329 %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
330 %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
331 %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
332 %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
333 %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
334 %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
335 %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
336 %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
337 %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
338 %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
339 %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
340 %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
341 %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
342 %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
343 %r0 = call i16 @llvm.usub.sat.i16(i16 %a0 , i16 %b0 )
344 %r1 = call i16 @llvm.usub.sat.i16(i16 %a1 , i16 %b1 )
345 %r2 = call i16 @llvm.usub.sat.i16(i16 %a2 , i16 %b2 )
346 %r3 = call i16 @llvm.usub.sat.i16(i16 %a3 , i16 %b3 )
347 %r4 = call i16 @llvm.usub.sat.i16(i16 %a4 , i16 %b4 )
348 %r5 = call i16 @llvm.usub.sat.i16(i16 %a5 , i16 %b5 )
349 %r6 = call i16 @llvm.usub.sat.i16(i16 %a6 , i16 %b6 )
350 %r7 = call i16 @llvm.usub.sat.i16(i16 %a7 , i16 %b7 )
351 %r8 = call i16 @llvm.usub.sat.i16(i16 %a8 , i16 %b8 )
352 %r9 = call i16 @llvm.usub.sat.i16(i16 %a9 , i16 %b9 )
353 %r10 = call i16 @llvm.usub.sat.i16(i16 %a10, i16 %b10)
354 %r11 = call i16 @llvm.usub.sat.i16(i16 %a11, i16 %b11)
355 %r12 = call i16 @llvm.usub.sat.i16(i16 %a12, i16 %b12)
356 %r13 = call i16 @llvm.usub.sat.i16(i16 %a13, i16 %b13)
357 %r14 = call i16 @llvm.usub.sat.i16(i16 %a14, i16 %b14)
358 %r15 = call i16 @llvm.usub.sat.i16(i16 %a15, i16 %b15)
359 %r16 = call i16 @llvm.usub.sat.i16(i16 %a16, i16 %b16)
360 %r17 = call i16 @llvm.usub.sat.i16(i16 %a17, i16 %b17)
361 %r18 = call i16 @llvm.usub.sat.i16(i16 %a18, i16 %b18)
362 %r19 = call i16 @llvm.usub.sat.i16(i16 %a19, i16 %b19)
363 %r20 = call i16 @llvm.usub.sat.i16(i16 %a20, i16 %b20)
364 %r21 = call i16 @llvm.usub.sat.i16(i16 %a21, i16 %b21)
365 %r22 = call i16 @llvm.usub.sat.i16(i16 %a22, i16 %b22)
366 %r23 = call i16 @llvm.usub.sat.i16(i16 %a23, i16 %b23)
367 %r24 = call i16 @llvm.usub.sat.i16(i16 %a24, i16 %b24)
368 %r25 = call i16 @llvm.usub.sat.i16(i16 %a25, i16 %b25)
369 %r26 = call i16 @llvm.usub.sat.i16(i16 %a26, i16 %b26)
370 %r27 = call i16 @llvm.usub.sat.i16(i16 %a27, i16 %b27)
371 %r28 = call i16 @llvm.usub.sat.i16(i16 %a28, i16 %b28)
372 %r29 = call i16 @llvm.usub.sat.i16(i16 %a29, i16 %b29)
373 %r30 = call i16 @llvm.usub.sat.i16(i16 %a30, i16 %b30)
374 %r31 = call i16 @llvm.usub.sat.i16(i16 %a31, i16 %b31)
375 store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
376 store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
377 store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
378 store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
379 store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
380 store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
381 store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
382 store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
383 store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
384 store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
385 store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
386 store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
387 store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
388 store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
389 store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
390 store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
391 store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
392 store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
393 store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
394 store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
395 store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
396 store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
397 store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
398 store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
399 store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
400 store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
401 store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
402 store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
403 store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
404 store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
405 store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
406 store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
410 define void @sub_v64i8() {
411 ; SSE-LABEL: @sub_v64i8(
412 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
413 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
414 ; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
415 ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1
416 ; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
417 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
418 ; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
419 ; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
420 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
421 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
422 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
423 ; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
424 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
425 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
426 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
427 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
430 ; AVX-LABEL: @sub_v64i8(
431 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
432 ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
433 ; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
434 ; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1
435 ; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
436 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
437 ; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
438 ; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
441 ; AVX512-LABEL: @sub_v64i8(
442 ; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
443 ; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
444 ; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
445 ; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1
446 ; AVX512-NEXT: ret void
448 %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
449 %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
450 %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
451 %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
452 %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
453 %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
454 %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
455 %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
456 %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
457 %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
458 %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
459 %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
460 %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
461 %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
462 %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
463 %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
464 %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
465 %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
466 %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
467 %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
468 %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
469 %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
470 %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
471 %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
472 %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
473 %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
474 %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
475 %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
476 %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
477 %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
478 %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
479 %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
480 %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
481 %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
482 %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
483 %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
484 %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
485 %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
486 %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
487 %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
488 %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
489 %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
490 %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
491 %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
492 %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
493 %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
494 %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
495 %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
496 %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
497 %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
498 %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
499 %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
500 %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
501 %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
502 %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
503 %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
504 %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
505 %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
506 %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
507 %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
508 %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
509 %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
510 %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
511 %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
512 %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
513 %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
514 %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
515 %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
516 %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
517 %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
518 %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
519 %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
520 %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
521 %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
522 %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
523 %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
524 %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
525 %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
526 %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
527 %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
528 %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
529 %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
530 %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
531 %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
532 %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
533 %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
534 %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
535 %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
536 %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
537 %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
538 %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
539 %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
540 %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
541 %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
542 %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
543 %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
544 %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
545 %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
546 %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
547 %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
548 %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
549 %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
550 %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
551 %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
552 %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
553 %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
554 %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
555 %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
556 %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
557 %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
558 %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
559 %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
560 %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
561 %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
562 %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
563 %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
564 %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
565 %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
566 %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
567 %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
568 %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
569 %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
570 %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
571 %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
572 %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
573 %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
574 %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
575 %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
576 %r0 = call i8 @llvm.usub.sat.i8(i8 %a0 , i8 %b0 )
577 %r1 = call i8 @llvm.usub.sat.i8(i8 %a1 , i8 %b1 )
578 %r2 = call i8 @llvm.usub.sat.i8(i8 %a2 , i8 %b2 )
579 %r3 = call i8 @llvm.usub.sat.i8(i8 %a3 , i8 %b3 )
580 %r4 = call i8 @llvm.usub.sat.i8(i8 %a4 , i8 %b4 )
581 %r5 = call i8 @llvm.usub.sat.i8(i8 %a5 , i8 %b5 )
582 %r6 = call i8 @llvm.usub.sat.i8(i8 %a6 , i8 %b6 )
583 %r7 = call i8 @llvm.usub.sat.i8(i8 %a7 , i8 %b7 )
584 %r8 = call i8 @llvm.usub.sat.i8(i8 %a8 , i8 %b8 )
585 %r9 = call i8 @llvm.usub.sat.i8(i8 %a9 , i8 %b9 )
586 %r10 = call i8 @llvm.usub.sat.i8(i8 %a10, i8 %b10)
587 %r11 = call i8 @llvm.usub.sat.i8(i8 %a11, i8 %b11)
588 %r12 = call i8 @llvm.usub.sat.i8(i8 %a12, i8 %b12)
589 %r13 = call i8 @llvm.usub.sat.i8(i8 %a13, i8 %b13)
590 %r14 = call i8 @llvm.usub.sat.i8(i8 %a14, i8 %b14)
591 %r15 = call i8 @llvm.usub.sat.i8(i8 %a15, i8 %b15)
592 %r16 = call i8 @llvm.usub.sat.i8(i8 %a16, i8 %b16)
593 %r17 = call i8 @llvm.usub.sat.i8(i8 %a17, i8 %b17)
594 %r18 = call i8 @llvm.usub.sat.i8(i8 %a18, i8 %b18)
595 %r19 = call i8 @llvm.usub.sat.i8(i8 %a19, i8 %b19)
596 %r20 = call i8 @llvm.usub.sat.i8(i8 %a20, i8 %b20)
597 %r21 = call i8 @llvm.usub.sat.i8(i8 %a21, i8 %b21)
598 %r22 = call i8 @llvm.usub.sat.i8(i8 %a22, i8 %b22)
599 %r23 = call i8 @llvm.usub.sat.i8(i8 %a23, i8 %b23)
600 %r24 = call i8 @llvm.usub.sat.i8(i8 %a24, i8 %b24)
601 %r25 = call i8 @llvm.usub.sat.i8(i8 %a25, i8 %b25)
602 %r26 = call i8 @llvm.usub.sat.i8(i8 %a26, i8 %b26)
603 %r27 = call i8 @llvm.usub.sat.i8(i8 %a27, i8 %b27)
604 %r28 = call i8 @llvm.usub.sat.i8(i8 %a28, i8 %b28)
605 %r29 = call i8 @llvm.usub.sat.i8(i8 %a29, i8 %b29)
606 %r30 = call i8 @llvm.usub.sat.i8(i8 %a30, i8 %b30)
607 %r31 = call i8 @llvm.usub.sat.i8(i8 %a31, i8 %b31)
608 %r32 = call i8 @llvm.usub.sat.i8(i8 %a32, i8 %b32)
609 %r33 = call i8 @llvm.usub.sat.i8(i8 %a33, i8 %b33)
610 %r34 = call i8 @llvm.usub.sat.i8(i8 %a34, i8 %b34)
611 %r35 = call i8 @llvm.usub.sat.i8(i8 %a35, i8 %b35)
612 %r36 = call i8 @llvm.usub.sat.i8(i8 %a36, i8 %b36)
613 %r37 = call i8 @llvm.usub.sat.i8(i8 %a37, i8 %b37)
614 %r38 = call i8 @llvm.usub.sat.i8(i8 %a38, i8 %b38)
615 %r39 = call i8 @llvm.usub.sat.i8(i8 %a39, i8 %b39)
616 %r40 = call i8 @llvm.usub.sat.i8(i8 %a40, i8 %b40)
617 %r41 = call i8 @llvm.usub.sat.i8(i8 %a41, i8 %b41)
618 %r42 = call i8 @llvm.usub.sat.i8(i8 %a42, i8 %b42)
619 %r43 = call i8 @llvm.usub.sat.i8(i8 %a43, i8 %b43)
620 %r44 = call i8 @llvm.usub.sat.i8(i8 %a44, i8 %b44)
621 %r45 = call i8 @llvm.usub.sat.i8(i8 %a45, i8 %b45)
622 %r46 = call i8 @llvm.usub.sat.i8(i8 %a46, i8 %b46)
623 %r47 = call i8 @llvm.usub.sat.i8(i8 %a47, i8 %b47)
624 %r48 = call i8 @llvm.usub.sat.i8(i8 %a48, i8 %b48)
625 %r49 = call i8 @llvm.usub.sat.i8(i8 %a49, i8 %b49)
626 %r50 = call i8 @llvm.usub.sat.i8(i8 %a50, i8 %b50)
627 %r51 = call i8 @llvm.usub.sat.i8(i8 %a51, i8 %b51)
628 %r52 = call i8 @llvm.usub.sat.i8(i8 %a52, i8 %b52)
629 %r53 = call i8 @llvm.usub.sat.i8(i8 %a53, i8 %b53)
630 %r54 = call i8 @llvm.usub.sat.i8(i8 %a54, i8 %b54)
631 %r55 = call i8 @llvm.usub.sat.i8(i8 %a55, i8 %b55)
632 %r56 = call i8 @llvm.usub.sat.i8(i8 %a56, i8 %b56)
633 %r57 = call i8 @llvm.usub.sat.i8(i8 %a57, i8 %b57)
634 %r58 = call i8 @llvm.usub.sat.i8(i8 %a58, i8 %b58)
635 %r59 = call i8 @llvm.usub.sat.i8(i8 %a59, i8 %b59)
636 %r60 = call i8 @llvm.usub.sat.i8(i8 %a60, i8 %b60)
637 %r61 = call i8 @llvm.usub.sat.i8(i8 %a61, i8 %b61)
638 %r62 = call i8 @llvm.usub.sat.i8(i8 %a62, i8 %b62)
639 %r63 = call i8 @llvm.usub.sat.i8(i8 %a63, i8 %b63)
640 store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
641 store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
642 store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
643 store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
644 store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
645 store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
646 store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
647 store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
648 store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
649 store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
650 store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
651 store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
652 store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
653 store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
654 store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
655 store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
656 store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
657 store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
658 store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
659 store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
660 store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
661 store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
662 store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
663 store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
664 store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
665 store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
666 store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
667 store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
668 store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
669 store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
670 store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
671 store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
672 store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
673 store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
674 store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
675 store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
676 store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
677 store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
678 store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
679 store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
680 store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
681 store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
682 store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
683 store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
684 store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
685 store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
686 store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
687 store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
688 store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
689 store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
690 store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
691 store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
692 store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
693 store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
694 store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
695 store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
696 store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
697 store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
698 store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
699 store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
700 store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
701 store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
702 store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
703 store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1