clang/test/CodeGen/LoongArch/lsx/builtin.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s
   3
   4 typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16)));
   5 typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1)));
   6 typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16)));
   7 typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1)));
   8 typedef short v8i16 __attribute__ ((vector_size(16), aligned(16)));
   9 typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2)));
  10 typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16)));
  11 typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2)));
  12 typedef int v4i32 __attribute__ ((vector_size(16), aligned(16)));
  13 typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4)));
  14 typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16)));
  15 typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4)));
  16 typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
  17 typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8)));
  18 typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16)));
  19 typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8)));
  20 typedef float v4f32 __attribute__ ((vector_size(16), aligned(16)));
  21 typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4)));
  22 typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));
  23 typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));
  24
  25 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
  26 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
  27 typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
  28
  29
  30 // CHECK-LABEL: @vsll_b(
  31 // CHECK-NEXT:  entry:
  32 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
  33 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
  34 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
  35 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
  36 // CHECK-NEXT:    ret i128 [[TMP3]]
  37 //
  38 v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); }
  39 // CHECK-LABEL: @vsll_h(
  40 // CHECK-NEXT:  entry:
  41 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
  42 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
  43 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
  44 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
  45 // CHECK-NEXT:    ret i128 [[TMP3]]
  46 //
  47 v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); }
  48 // CHECK-LABEL: @vsll_w(
  49 // CHECK-NEXT:  entry:
  50 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
  51 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
  52 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
  53 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
  54 // CHECK-NEXT:    ret i128 [[TMP3]]
  55 //
  56 v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); }
  57 // CHECK-LABEL: @vsll_d(
  58 // CHECK-NEXT:  entry:
  59 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
  60 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
  61 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
  62 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
  63 // CHECK-NEXT:    ret i128 [[TMP3]]
  64 //
  65 v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); }
  66 // CHECK-LABEL: @vslli_b(
  67 // CHECK-NEXT:  entry:
  68 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
  69 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1)
  70 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
  71 // CHECK-NEXT:    ret i128 [[TMP2]]
  72 //
  73 v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); }
  74 // CHECK-LABEL: @vslli_h(
  75 // CHECK-NEXT:  entry:
  76 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
  77 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1)
  78 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
  79 // CHECK-NEXT:    ret i128 [[TMP2]]
  80 //
  81 v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); }
  82 // CHECK-LABEL: @vslli_w(
  83 // CHECK-NEXT:  entry:
  84 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
  85 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1)
  86 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
  87 // CHECK-NEXT:    ret i128 [[TMP2]]
  88 //
  89 v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); }
  90 // CHECK-LABEL: @vslli_d(
  91 // CHECK-NEXT:  entry:
  92 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
  93 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1)
  94 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
  95 // CHECK-NEXT:    ret i128 [[TMP2]]
  96 //
  97 v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); }
  98 // CHECK-LABEL: @vsra_b(
  99 // CHECK-NEXT:  entry:
 100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 101 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 102 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 103 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 104 // CHECK-NEXT:    ret i128 [[TMP3]]
 105 //
 106 v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); }
 107 // CHECK-LABEL: @vsra_h(
 108 // CHECK-NEXT:  entry:
 109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 111 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 112 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 113 // CHECK-NEXT:    ret i128 [[TMP3]]
 114 //
 115 v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); }
 116 // CHECK-LABEL: @vsra_w(
 117 // CHECK-NEXT:  entry:
 118 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 119 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 120 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 121 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 122 // CHECK-NEXT:    ret i128 [[TMP3]]
 123 //
 124 v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); }
 125 // CHECK-LABEL: @vsra_d(
 126 // CHECK-NEXT:  entry:
 127 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 128 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 129 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 130 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 131 // CHECK-NEXT:    ret i128 [[TMP3]]
 132 //
 133 v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); }
 134 // CHECK-LABEL: @vsrai_b(
 135 // CHECK-NEXT:  entry:
 136 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 137 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1)
 138 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 139 // CHECK-NEXT:    ret i128 [[TMP2]]
 140 //
 141 v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); }
 142 // CHECK-LABEL: @vsrai_h(
 143 // CHECK-NEXT:  entry:
 144 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 145 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1)
 146 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 147 // CHECK-NEXT:    ret i128 [[TMP2]]
 148 //
 149 v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); }
 150 // CHECK-LABEL: @vsrai_w(
 151 // CHECK-NEXT:  entry:
 152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 153 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1)
 154 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 155 // CHECK-NEXT:    ret i128 [[TMP2]]
 156 //
 157 v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); }
 158 // CHECK-LABEL: @vsrai_d(
 159 // CHECK-NEXT:  entry:
 160 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 161 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1)
 162 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 163 // CHECK-NEXT:    ret i128 [[TMP2]]
 164 //
 165 v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); }
 166 // CHECK-LABEL: @vsrar_b(
 167 // CHECK-NEXT:  entry:
 168 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 169 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 170 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 171 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 172 // CHECK-NEXT:    ret i128 [[TMP3]]
 173 //
 174 v16i8 vsrar_b(v16i8 _1, v16i8 _2) {
 175   return __builtin_lsx_vsrar_b(_1, _2);
 176 }
 177 // CHECK-LABEL: @vsrar_h(
 178 // CHECK-NEXT:  entry:
 179 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 180 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 181 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 182 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 183 // CHECK-NEXT:    ret i128 [[TMP3]]
 184 //
 185 v8i16 vsrar_h(v8i16 _1, v8i16 _2) {
 186   return __builtin_lsx_vsrar_h(_1, _2);
 187 }
 188 // CHECK-LABEL: @vsrar_w(
 189 // CHECK-NEXT:  entry:
 190 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 191 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 192 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 193 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 194 // CHECK-NEXT:    ret i128 [[TMP3]]
 195 //
 196 v4i32 vsrar_w(v4i32 _1, v4i32 _2) {
 197   return __builtin_lsx_vsrar_w(_1, _2);
 198 }
 199 // CHECK-LABEL: @vsrar_d(
 200 // CHECK-NEXT:  entry:
 201 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 202 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 203 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 204 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 205 // CHECK-NEXT:    ret i128 [[TMP3]]
 206 //
 207 v2i64 vsrar_d(v2i64 _1, v2i64 _2) {
 208   return __builtin_lsx_vsrar_d(_1, _2);
 209 }
 210 // CHECK-LABEL: @vsrari_b(
 211 // CHECK-NEXT:  entry:
 212 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 213 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1)
 214 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 215 // CHECK-NEXT:    ret i128 [[TMP2]]
 216 //
 217 v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); }
 218 // CHECK-LABEL: @vsrari_h(
 219 // CHECK-NEXT:  entry:
 220 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 221 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1)
 222 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 223 // CHECK-NEXT:    ret i128 [[TMP2]]
 224 //
 225 v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); }
 226 // CHECK-LABEL: @vsrari_w(
 227 // CHECK-NEXT:  entry:
 228 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 229 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1)
 230 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 231 // CHECK-NEXT:    ret i128 [[TMP2]]
 232 //
 233 v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); }
 234 // CHECK-LABEL: @vsrari_d(
 235 // CHECK-NEXT:  entry:
 236 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 237 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1)
 238 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 239 // CHECK-NEXT:    ret i128 [[TMP2]]
 240 //
 241 v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); }
 242 // CHECK-LABEL: @vsrl_b(
 243 // CHECK-NEXT:  entry:
 244 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 245 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 246 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 247 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 248 // CHECK-NEXT:    ret i128 [[TMP3]]
 249 //
 250 v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); }
 251 // CHECK-LABEL: @vsrl_h(
 252 // CHECK-NEXT:  entry:
 253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 254 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 255 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 256 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 257 // CHECK-NEXT:    ret i128 [[TMP3]]
 258 //
 259 v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); }
 260 // CHECK-LABEL: @vsrl_w(
 261 // CHECK-NEXT:  entry:
 262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 263 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 264 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 265 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 266 // CHECK-NEXT:    ret i128 [[TMP3]]
 267 //
 268 v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); }
 269 // CHECK-LABEL: @vsrl_d(
 270 // CHECK-NEXT:  entry:
 271 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 272 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 273 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 274 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 275 // CHECK-NEXT:    ret i128 [[TMP3]]
 276 //
 277 v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); }
 278 // CHECK-LABEL: @vsrli_b(
 279 // CHECK-NEXT:  entry:
 280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 281 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1)
 282 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 283 // CHECK-NEXT:    ret i128 [[TMP2]]
 284 //
 285 v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); }
 286 // CHECK-LABEL: @vsrli_h(
 287 // CHECK-NEXT:  entry:
 288 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 289 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1)
 290 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 291 // CHECK-NEXT:    ret i128 [[TMP2]]
 292 //
 293 v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); }
 294 // CHECK-LABEL: @vsrli_w(
 295 // CHECK-NEXT:  entry:
 296 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 297 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1)
 298 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 299 // CHECK-NEXT:    ret i128 [[TMP2]]
 300 //
 301 v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); }
 302 // CHECK-LABEL: @vsrli_d(
 303 // CHECK-NEXT:  entry:
 304 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 305 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1)
 306 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 307 // CHECK-NEXT:    ret i128 [[TMP2]]
 308 //
 309 v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); }
 310 // CHECK-LABEL: @vsrlr_b(
 311 // CHECK-NEXT:  entry:
 312 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 313 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 314 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 315 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 316 // CHECK-NEXT:    ret i128 [[TMP3]]
 317 //
 318 v16i8 vsrlr_b(v16i8 _1, v16i8 _2) {
 319   return __builtin_lsx_vsrlr_b(_1, _2);
 320 }
 321 // CHECK-LABEL: @vsrlr_h(
 322 // CHECK-NEXT:  entry:
 323 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 324 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 325 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 326 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 327 // CHECK-NEXT:    ret i128 [[TMP3]]
 328 //
 329 v8i16 vsrlr_h(v8i16 _1, v8i16 _2) {
 330   return __builtin_lsx_vsrlr_h(_1, _2);
 331 }
 332 // CHECK-LABEL: @vsrlr_w(
 333 // CHECK-NEXT:  entry:
 334 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 335 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 336 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 337 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 338 // CHECK-NEXT:    ret i128 [[TMP3]]
 339 //
 340 v4i32 vsrlr_w(v4i32 _1, v4i32 _2) {
 341   return __builtin_lsx_vsrlr_w(_1, _2);
 342 }
 343 // CHECK-LABEL: @vsrlr_d(
 344 // CHECK-NEXT:  entry:
 345 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 346 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 347 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 348 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 349 // CHECK-NEXT:    ret i128 [[TMP3]]
 350 //
 351 v2i64 vsrlr_d(v2i64 _1, v2i64 _2) {
 352   return __builtin_lsx_vsrlr_d(_1, _2);
 353 }
 354 // CHECK-LABEL: @vsrlri_b(
 355 // CHECK-NEXT:  entry:
 356 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 357 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1)
 358 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 359 // CHECK-NEXT:    ret i128 [[TMP2]]
 360 //
 361 v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); }
 362 // CHECK-LABEL: @vsrlri_h(
 363 // CHECK-NEXT:  entry:
 364 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 365 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1)
 366 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 367 // CHECK-NEXT:    ret i128 [[TMP2]]
 368 //
 369 v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); }
 370 // CHECK-LABEL: @vsrlri_w(
 371 // CHECK-NEXT:  entry:
 372 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 373 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1)
 374 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 375 // CHECK-NEXT:    ret i128 [[TMP2]]
 376 //
 377 v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); }
 378 // CHECK-LABEL: @vsrlri_d(
 379 // CHECK-NEXT:  entry:
 380 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 381 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1)
 382 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 383 // CHECK-NEXT:    ret i128 [[TMP2]]
 384 //
 385 v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); }
 386 // CHECK-LABEL: @vbitclr_b(
 387 // CHECK-NEXT:  entry:
 388 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 389 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 390 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 391 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 392 // CHECK-NEXT:    ret i128 [[TMP3]]
 393 //
 394 v16u8 vbitclr_b(v16u8 _1, v16u8 _2) {
 395   return __builtin_lsx_vbitclr_b(_1, _2);
 396 }
 397 // CHECK-LABEL: @vbitclr_h(
 398 // CHECK-NEXT:  entry:
 399 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 400 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 401 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 402 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 403 // CHECK-NEXT:    ret i128 [[TMP3]]
 404 //
 405 v8u16 vbitclr_h(v8u16 _1, v8u16 _2) {
 406   return __builtin_lsx_vbitclr_h(_1, _2);
 407 }
 408 // CHECK-LABEL: @vbitclr_w(
 409 // CHECK-NEXT:  entry:
 410 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 411 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 412 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 413 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 414 // CHECK-NEXT:    ret i128 [[TMP3]]
 415 //
 416 v4u32 vbitclr_w(v4u32 _1, v4u32 _2) {
 417   return __builtin_lsx_vbitclr_w(_1, _2);
 418 }
 419 // CHECK-LABEL: @vbitclr_d(
 420 // CHECK-NEXT:  entry:
 421 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 422 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 423 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 424 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 425 // CHECK-NEXT:    ret i128 [[TMP3]]
 426 //
 427 v2u64 vbitclr_d(v2u64 _1, v2u64 _2) {
 428   return __builtin_lsx_vbitclr_d(_1, _2);
 429 }
 430 // CHECK-LABEL: @vbitclri_b(
 431 // CHECK-NEXT:  entry:
 432 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 433 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1)
 434 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 435 // CHECK-NEXT:    ret i128 [[TMP2]]
 436 //
 437 v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); }
 438 // CHECK-LABEL: @vbitclri_h(
 439 // CHECK-NEXT:  entry:
 440 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 441 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1)
 442 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 443 // CHECK-NEXT:    ret i128 [[TMP2]]
 444 //
 445 v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); }
 446 // CHECK-LABEL: @vbitclri_w(
 447 // CHECK-NEXT:  entry:
 448 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 449 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1)
 450 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 451 // CHECK-NEXT:    ret i128 [[TMP2]]
 452 //
 453 v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); }
 454 // CHECK-LABEL: @vbitclri_d(
 455 // CHECK-NEXT:  entry:
 456 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 457 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1)
 458 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 459 // CHECK-NEXT:    ret i128 [[TMP2]]
 460 //
 461 v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); }
 462 // CHECK-LABEL: @vbitset_b(
 463 // CHECK-NEXT:  entry:
 464 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 465 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 466 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 467 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 468 // CHECK-NEXT:    ret i128 [[TMP3]]
 469 //
 470 v16u8 vbitset_b(v16u8 _1, v16u8 _2) {
 471   return __builtin_lsx_vbitset_b(_1, _2);
 472 }
 473 // CHECK-LABEL: @vbitset_h(
 474 // CHECK-NEXT:  entry:
 475 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 476 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 477 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 478 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 479 // CHECK-NEXT:    ret i128 [[TMP3]]
 480 //
 481 v8u16 vbitset_h(v8u16 _1, v8u16 _2) {
 482   return __builtin_lsx_vbitset_h(_1, _2);
 483 }
 484 // CHECK-LABEL: @vbitset_w(
 485 // CHECK-NEXT:  entry:
 486 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 487 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 488 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 489 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 490 // CHECK-NEXT:    ret i128 [[TMP3]]
 491 //
 492 v4u32 vbitset_w(v4u32 _1, v4u32 _2) {
 493   return __builtin_lsx_vbitset_w(_1, _2);
 494 }
 495 // CHECK-LABEL: @vbitset_d(
 496 // CHECK-NEXT:  entry:
 497 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 498 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 499 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 500 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 501 // CHECK-NEXT:    ret i128 [[TMP3]]
 502 //
 503 v2u64 vbitset_d(v2u64 _1, v2u64 _2) {
 504   return __builtin_lsx_vbitset_d(_1, _2);
 505 }
 506 // CHECK-LABEL: @vbitseti_b(
 507 // CHECK-NEXT:  entry:
 508 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 509 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1)
 510 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 511 // CHECK-NEXT:    ret i128 [[TMP2]]
 512 //
 513 v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); }
 514 // CHECK-LABEL: @vbitseti_h(
 515 // CHECK-NEXT:  entry:
 516 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 517 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1)
 518 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 519 // CHECK-NEXT:    ret i128 [[TMP2]]
 520 //
 521 v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); }
 522 // CHECK-LABEL: @vbitseti_w(
 523 // CHECK-NEXT:  entry:
 524 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 525 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1)
 526 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 527 // CHECK-NEXT:    ret i128 [[TMP2]]
 528 //
 529 v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); }
 530 // CHECK-LABEL: @vbitseti_d(
 531 // CHECK-NEXT:  entry:
 532 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 533 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1)
 534 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 535 // CHECK-NEXT:    ret i128 [[TMP2]]
 536 //
 537 v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); }
 538 // CHECK-LABEL: @vbitrev_b(
 539 // CHECK-NEXT:  entry:
 540 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 541 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 542 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 543 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 544 // CHECK-NEXT:    ret i128 [[TMP3]]
 545 //
 546 v16u8 vbitrev_b(v16u8 _1, v16u8 _2) {
 547   return __builtin_lsx_vbitrev_b(_1, _2);
 548 }
 549 // CHECK-LABEL: @vbitrev_h(
 550 // CHECK-NEXT:  entry:
 551 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 552 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 553 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 554 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 555 // CHECK-NEXT:    ret i128 [[TMP3]]
 556 //
 557 v8u16 vbitrev_h(v8u16 _1, v8u16 _2) {
 558   return __builtin_lsx_vbitrev_h(_1, _2);
 559 }
 560 // CHECK-LABEL: @vbitrev_w(
 561 // CHECK-NEXT:  entry:
 562 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 563 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 564 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 565 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 566 // CHECK-NEXT:    ret i128 [[TMP3]]
 567 //
 568 v4u32 vbitrev_w(v4u32 _1, v4u32 _2) {
 569   return __builtin_lsx_vbitrev_w(_1, _2);
 570 }
 571 // CHECK-LABEL: @vbitrev_d(
 572 // CHECK-NEXT:  entry:
 573 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 574 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 575 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 576 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 577 // CHECK-NEXT:    ret i128 [[TMP3]]
 578 //
 579 v2u64 vbitrev_d(v2u64 _1, v2u64 _2) {
 580   return __builtin_lsx_vbitrev_d(_1, _2);
 581 }
 582 // CHECK-LABEL: @vbitrevi_b(
 583 // CHECK-NEXT:  entry:
 584 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 585 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1)
 586 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 587 // CHECK-NEXT:    ret i128 [[TMP2]]
 588 //
 589 v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); }
 590 // CHECK-LABEL: @vbitrevi_h(
 591 // CHECK-NEXT:  entry:
 592 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 593 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1)
 594 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 595 // CHECK-NEXT:    ret i128 [[TMP2]]
 596 //
 597 v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); }
 598 // CHECK-LABEL: @vbitrevi_w(
 599 // CHECK-NEXT:  entry:
 600 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 601 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1)
 602 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 603 // CHECK-NEXT:    ret i128 [[TMP2]]
 604 //
 605 v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); }
 606 // CHECK-LABEL: @vbitrevi_d(
 607 // CHECK-NEXT:  entry:
 608 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 609 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1)
 610 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 611 // CHECK-NEXT:    ret i128 [[TMP2]]
 612 //
 613 v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); }
 614 // CHECK-LABEL: @vadd_b(
 615 // CHECK-NEXT:  entry:
 616 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 617 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 618 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 619 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 620 // CHECK-NEXT:    ret i128 [[TMP3]]
 621 //
 622 v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); }
 623 // CHECK-LABEL: @vadd_h(
 624 // CHECK-NEXT:  entry:
 625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 626 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 627 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 628 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 629 // CHECK-NEXT:    ret i128 [[TMP3]]
 630 //
 631 v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); }
 632 // CHECK-LABEL: @vadd_w(
 633 // CHECK-NEXT:  entry:
 634 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 635 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 636 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 637 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 638 // CHECK-NEXT:    ret i128 [[TMP3]]
 639 //
 640 v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); }
 641 // CHECK-LABEL: @vadd_d(
 642 // CHECK-NEXT:  entry:
 643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 645 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 646 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 647 // CHECK-NEXT:    ret i128 [[TMP3]]
 648 //
 649 v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); }
 650 // CHECK-LABEL: @vaddi_bu(
 651 // CHECK-NEXT:  entry:
 652 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 653 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1)
 654 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 655 // CHECK-NEXT:    ret i128 [[TMP2]]
 656 //
 657 v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); }
 658 // CHECK-LABEL: @vaddi_hu(
 659 // CHECK-NEXT:  entry:
 660 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 661 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1)
 662 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 663 // CHECK-NEXT:    ret i128 [[TMP2]]
 664 //
 665 v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); }
 666 // CHECK-LABEL: @vaddi_wu(
 667 // CHECK-NEXT:  entry:
 668 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 669 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1)
 670 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 671 // CHECK-NEXT:    ret i128 [[TMP2]]
 672 //
 673 v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); }
 674 // CHECK-LABEL: @vaddi_du(
 675 // CHECK-NEXT:  entry:
 676 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 677 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1)
 678 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 679 // CHECK-NEXT:    ret i128 [[TMP2]]
 680 //
 681 v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); }
 682 // CHECK-LABEL: @vsub_b(
 683 // CHECK-NEXT:  entry:
 684 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 685 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 686 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 687 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 688 // CHECK-NEXT:    ret i128 [[TMP3]]
 689 //
 690 v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); }
 691 // CHECK-LABEL: @vsub_h(
 692 // CHECK-NEXT:  entry:
 693 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 694 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 695 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 696 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 697 // CHECK-NEXT:    ret i128 [[TMP3]]
 698 //
 699 v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); }
 700 // CHECK-LABEL: @vsub_w(
 701 // CHECK-NEXT:  entry:
 702 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 703 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 704 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 705 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 706 // CHECK-NEXT:    ret i128 [[TMP3]]
 707 //
 708 v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); }
 709 // CHECK-LABEL: @vsub_d(
 710 // CHECK-NEXT:  entry:
 711 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 712 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 713 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 714 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 715 // CHECK-NEXT:    ret i128 [[TMP3]]
 716 //
 717 v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); }
 718 // CHECK-LABEL: @vsubi_bu(
 719 // CHECK-NEXT:  entry:
 720 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 721 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1)
 722 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 723 // CHECK-NEXT:    ret i128 [[TMP2]]
 724 //
 725 v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); }
 726 // CHECK-LABEL: @vsubi_hu(
 727 // CHECK-NEXT:  entry:
 728 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 729 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1)
 730 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 731 // CHECK-NEXT:    ret i128 [[TMP2]]
 732 //
 733 v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); }
 734 // CHECK-LABEL: @vsubi_wu(
 735 // CHECK-NEXT:  entry:
 736 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 737 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1)
 738 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 739 // CHECK-NEXT:    ret i128 [[TMP2]]
 740 //
 741 v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); }
 742 // CHECK-LABEL: @vsubi_du(
 743 // CHECK-NEXT:  entry:
 744 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 745 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1)
 746 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 747 // CHECK-NEXT:    ret i128 [[TMP2]]
 748 //
 749 v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); }
 750 // CHECK-LABEL: @vmax_b(
 751 // CHECK-NEXT:  entry:
 752 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 753 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 754 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 755 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 756 // CHECK-NEXT:    ret i128 [[TMP3]]
 757 //
 758 v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); }
 759 // CHECK-LABEL: @vmax_h(
 760 // CHECK-NEXT:  entry:
 761 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 762 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 763 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 764 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 765 // CHECK-NEXT:    ret i128 [[TMP3]]
 766 //
 767 v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); }
 768 // CHECK-LABEL: @vmax_w(
 769 // CHECK-NEXT:  entry:
 770 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 771 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 772 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 773 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 774 // CHECK-NEXT:    ret i128 [[TMP3]]
 775 //
 776 v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); }
 777 // CHECK-LABEL: @vmax_d(
 778 // CHECK-NEXT:  entry:
 779 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 780 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 781 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 782 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 783 // CHECK-NEXT:    ret i128 [[TMP3]]
 784 //
 785 v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); }
 786 // CHECK-LABEL: @vmaxi_b(
 787 // CHECK-NEXT:  entry:
 788 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 789 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1)
 790 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 791 // CHECK-NEXT:    ret i128 [[TMP2]]
 792 //
 793 v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); }
 794 // CHECK-LABEL: @vmaxi_h(
 795 // CHECK-NEXT:  entry:
 796 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 797 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1)
 798 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 799 // CHECK-NEXT:    ret i128 [[TMP2]]
 800 //
 801 v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); }
 802 // CHECK-LABEL: @vmaxi_w(
 803 // CHECK-NEXT:  entry:
 804 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 805 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1)
 806 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 807 // CHECK-NEXT:    ret i128 [[TMP2]]
 808 //
 809 v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); }
 810 // CHECK-LABEL: @vmaxi_d(
 811 // CHECK-NEXT:  entry:
 812 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 813 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1)
 814 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 815 // CHECK-NEXT:    ret i128 [[TMP2]]
 816 //
 817 v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); }
 818 // CHECK-LABEL: @vmax_bu(
 819 // CHECK-NEXT:  entry:
 820 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 821 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 822 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 823 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 824 // CHECK-NEXT:    ret i128 [[TMP3]]
 825 //
 826 v16u8 vmax_bu(v16u8 _1, v16u8 _2) {
 827   return __builtin_lsx_vmax_bu(_1, _2);
 828 }
 829 // CHECK-LABEL: @vmax_hu(
 830 // CHECK-NEXT:  entry:
 831 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 832 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 833 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 834 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 835 // CHECK-NEXT:    ret i128 [[TMP3]]
 836 //
 837 v8u16 vmax_hu(v8u16 _1, v8u16 _2) {
 838   return __builtin_lsx_vmax_hu(_1, _2);
 839 }
 840 // CHECK-LABEL: @vmax_wu(
 841 // CHECK-NEXT:  entry:
 842 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 843 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 844 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 845 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 846 // CHECK-NEXT:    ret i128 [[TMP3]]
 847 //
 848 v4u32 vmax_wu(v4u32 _1, v4u32 _2) {
 849   return __builtin_lsx_vmax_wu(_1, _2);
 850 }
 851 // CHECK-LABEL: @vmax_du(
 852 // CHECK-NEXT:  entry:
 853 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 854 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 855 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 856 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 857 // CHECK-NEXT:    ret i128 [[TMP3]]
 858 //
 859 v2u64 vmax_du(v2u64 _1, v2u64 _2) {
 860   return __builtin_lsx_vmax_du(_1, _2);
 861 }
 862 // CHECK-LABEL: @vmaxi_bu(
 863 // CHECK-NEXT:  entry:
 864 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 865 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1)
 866 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 867 // CHECK-NEXT:    ret i128 [[TMP2]]
 868 //
 869 v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); }
 870 // CHECK-LABEL: @vmaxi_hu(
 871 // CHECK-NEXT:  entry:
 872 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 873 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1)
 874 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 875 // CHECK-NEXT:    ret i128 [[TMP2]]
 876 //
 877 v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); }
 878 // CHECK-LABEL: @vmaxi_wu(
 879 // CHECK-NEXT:  entry:
 880 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 881 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1)
 882 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 883 // CHECK-NEXT:    ret i128 [[TMP2]]
 884 //
 885 v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); }
 886 // CHECK-LABEL: @vmaxi_du(
 887 // CHECK-NEXT:  entry:
 888 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 889 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1)
 890 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 891 // CHECK-NEXT:    ret i128 [[TMP2]]
 892 //
 893 v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); }
 894 // CHECK-LABEL: @vmin_b(
 895 // CHECK-NEXT:  entry:
 896 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 897 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 898 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 899 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 900 // CHECK-NEXT:    ret i128 [[TMP3]]
 901 //
 902 v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); }
 903 // CHECK-LABEL: @vmin_h(
 904 // CHECK-NEXT:  entry:
 905 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 906 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 907 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 908 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 909 // CHECK-NEXT:    ret i128 [[TMP3]]
 910 //
 911 v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); }
 912 // CHECK-LABEL: @vmin_w(
 913 // CHECK-NEXT:  entry:
 914 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 915 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 916 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 917 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 918 // CHECK-NEXT:    ret i128 [[TMP3]]
 919 //
 920 v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); }
 921 // CHECK-LABEL: @vmin_d(
 922 // CHECK-NEXT:  entry:
 923 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 924 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 925 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
 926 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 927 // CHECK-NEXT:    ret i128 [[TMP3]]
 928 //
 929 v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); }
 930 // CHECK-LABEL: @vmini_b(
 931 // CHECK-NEXT:  entry:
 932 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 933 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1)
 934 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 935 // CHECK-NEXT:    ret i128 [[TMP2]]
 936 //
 937 v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); }
 938 // CHECK-LABEL: @vmini_h(
 939 // CHECK-NEXT:  entry:
 940 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 941 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1)
 942 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
 943 // CHECK-NEXT:    ret i128 [[TMP2]]
 944 //
 945 v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); }
 946 // CHECK-LABEL: @vmini_w(
 947 // CHECK-NEXT:  entry:
 948 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 949 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1)
 950 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 951 // CHECK-NEXT:    ret i128 [[TMP2]]
 952 //
 953 v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); }
 954 // CHECK-LABEL: @vmini_d(
 955 // CHECK-NEXT:  entry:
 956 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 957 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1)
 958 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 959 // CHECK-NEXT:    ret i128 [[TMP2]]
 960 //
 961 v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); }
 962 // CHECK-LABEL: @vmin_bu(
 963 // CHECK-NEXT:  entry:
 964 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
 965 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
 966 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 967 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 968 // CHECK-NEXT:    ret i128 [[TMP3]]
 969 //
 970 v16u8 vmin_bu(v16u8 _1, v16u8 _2) {
 971   return __builtin_lsx_vmin_bu(_1, _2);
 972 }
 973 // CHECK-LABEL: @vmin_hu(
 974 // CHECK-NEXT:  entry:
 975 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
 976 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
 977 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 978 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
 979 // CHECK-NEXT:    ret i128 [[TMP3]]
 980 //
 981 v8u16 vmin_hu(v8u16 _1, v8u16 _2) {
 982   return __builtin_lsx_vmin_hu(_1, _2);
 983 }
 984 // CHECK-LABEL: @vmin_wu(
 985 // CHECK-NEXT:  entry:
 986 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
 987 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
 988 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 989 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 990 // CHECK-NEXT:    ret i128 [[TMP3]]
 991 //
 992 v4u32 vmin_wu(v4u32 _1, v4u32 _2) {
 993   return __builtin_lsx_vmin_wu(_1, _2);
 994 }
 995 // CHECK-LABEL: @vmin_du(
 996 // CHECK-NEXT:  entry:
 997 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
 998 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
 999 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1000 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1001 // CHECK-NEXT:    ret i128 [[TMP3]]
1002 //
1003 v2u64 vmin_du(v2u64 _1, v2u64 _2) {
1004   return __builtin_lsx_vmin_du(_1, _2);
1005 }
1006 // CHECK-LABEL: @vmini_bu(
1007 // CHECK-NEXT:  entry:
1008 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1009 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1)
1010 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1011 // CHECK-NEXT:    ret i128 [[TMP2]]
1012 //
1013 v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); }
1014 // CHECK-LABEL: @vmini_hu(
1015 // CHECK-NEXT:  entry:
1016 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1017 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1)
1018 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1019 // CHECK-NEXT:    ret i128 [[TMP2]]
1020 //
1021 v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); }
1022 // CHECK-LABEL: @vmini_wu(
1023 // CHECK-NEXT:  entry:
1024 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1025 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1)
1026 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1027 // CHECK-NEXT:    ret i128 [[TMP2]]
1028 //
1029 v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); }
1030 // CHECK-LABEL: @vmini_du(
1031 // CHECK-NEXT:  entry:
1032 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1033 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1)
1034 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1035 // CHECK-NEXT:    ret i128 [[TMP2]]
1036 //
1037 v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); }
1038 // CHECK-LABEL: @vseq_b(
1039 // CHECK-NEXT:  entry:
1040 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1041 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1042 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1043 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1044 // CHECK-NEXT:    ret i128 [[TMP3]]
1045 //
1046 v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); }
1047 // CHECK-LABEL: @vseq_h(
1048 // CHECK-NEXT:  entry:
1049 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1050 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1051 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1052 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1053 // CHECK-NEXT:    ret i128 [[TMP3]]
1054 //
1055 v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); }
1056 // CHECK-LABEL: @vseq_w(
1057 // CHECK-NEXT:  entry:
1058 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1059 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1060 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1061 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1062 // CHECK-NEXT:    ret i128 [[TMP3]]
1063 //
1064 v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); }
1065 // CHECK-LABEL: @vseq_d(
1066 // CHECK-NEXT:  entry:
1067 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1068 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1069 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1070 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1071 // CHECK-NEXT:    ret i128 [[TMP3]]
1072 //
1073 v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); }
1074 // CHECK-LABEL: @vseqi_b(
1075 // CHECK-NEXT:  entry:
1076 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1077 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1)
1078 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1079 // CHECK-NEXT:    ret i128 [[TMP2]]
1080 //
1081 v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); }
1082 // CHECK-LABEL: @vseqi_h(
1083 // CHECK-NEXT:  entry:
1084 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1085 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1)
1086 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1087 // CHECK-NEXT:    ret i128 [[TMP2]]
1088 //
1089 v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); }
1090 // CHECK-LABEL: @vseqi_w(
1091 // CHECK-NEXT:  entry:
1092 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1093 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1)
1094 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1095 // CHECK-NEXT:    ret i128 [[TMP2]]
1096 //
1097 v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); }
1098 // CHECK-LABEL: @vseqi_d(
1099 // CHECK-NEXT:  entry:
1100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1101 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1)
1102 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1103 // CHECK-NEXT:    ret i128 [[TMP2]]
1104 //
1105 v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); }
1106 // CHECK-LABEL: @vslti_b(
1107 // CHECK-NEXT:  entry:
1108 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1109 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1)
1110 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1111 // CHECK-NEXT:    ret i128 [[TMP2]]
1112 //
1113 v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); }
1114 // CHECK-LABEL: @vslt_b(
1115 // CHECK-NEXT:  entry:
1116 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1117 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1118 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1119 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1120 // CHECK-NEXT:    ret i128 [[TMP3]]
1121 //
1122 v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); }
1123 // CHECK-LABEL: @vslt_h(
1124 // CHECK-NEXT:  entry:
1125 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1126 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1127 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1128 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1129 // CHECK-NEXT:    ret i128 [[TMP3]]
1130 //
1131 v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); }
1132 // CHECK-LABEL: @vslt_w(
1133 // CHECK-NEXT:  entry:
1134 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1135 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1136 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1137 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1138 // CHECK-NEXT:    ret i128 [[TMP3]]
1139 //
1140 v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); }
1141 // CHECK-LABEL: @vslt_d(
1142 // CHECK-NEXT:  entry:
1143 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1144 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1145 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1146 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1147 // CHECK-NEXT:    ret i128 [[TMP3]]
1148 //
1149 v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); }
1150 // CHECK-LABEL: @vslti_h(
1151 // CHECK-NEXT:  entry:
1152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1153 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1)
1154 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1155 // CHECK-NEXT:    ret i128 [[TMP2]]
1156 //
1157 v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); }
1158 // CHECK-LABEL: @vslti_w(
1159 // CHECK-NEXT:  entry:
1160 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1161 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1)
1162 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1163 // CHECK-NEXT:    ret i128 [[TMP2]]
1164 //
1165 v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); }
1166 // CHECK-LABEL: @vslti_d(
1167 // CHECK-NEXT:  entry:
1168 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1169 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1)
1170 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1171 // CHECK-NEXT:    ret i128 [[TMP2]]
1172 //
1173 v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); }
1174 // CHECK-LABEL: @vslt_bu(
1175 // CHECK-NEXT:  entry:
1176 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1177 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1178 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1179 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1180 // CHECK-NEXT:    ret i128 [[TMP3]]
1181 //
1182 v16i8 vslt_bu(v16u8 _1, v16u8 _2) {
1183   return __builtin_lsx_vslt_bu(_1, _2);
1184 }
1185 // CHECK-LABEL: @vslt_hu(
1186 // CHECK-NEXT:  entry:
1187 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1188 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1189 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1190 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1191 // CHECK-NEXT:    ret i128 [[TMP3]]
1192 //
1193 v8i16 vslt_hu(v8u16 _1, v8u16 _2) {
1194   return __builtin_lsx_vslt_hu(_1, _2);
1195 }
1196 // CHECK-LABEL: @vslt_wu(
1197 // CHECK-NEXT:  entry:
1198 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1199 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1200 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1201 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1202 // CHECK-NEXT:    ret i128 [[TMP3]]
1203 //
1204 v4i32 vslt_wu(v4u32 _1, v4u32 _2) {
1205   return __builtin_lsx_vslt_wu(_1, _2);
1206 }
1207 // CHECK-LABEL: @vslt_du(
1208 // CHECK-NEXT:  entry:
1209 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1210 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1211 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1212 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1213 // CHECK-NEXT:    ret i128 [[TMP3]]
1214 //
1215 v2i64 vslt_du(v2u64 _1, v2u64 _2) {
1216   return __builtin_lsx_vslt_du(_1, _2);
1217 }
1218 // CHECK-LABEL: @vslti_bu(
1219 // CHECK-NEXT:  entry:
1220 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1221 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1)
1222 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1223 // CHECK-NEXT:    ret i128 [[TMP2]]
1224 //
1225 v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); }
1226 // CHECK-LABEL: @vslti_hu(
1227 // CHECK-NEXT:  entry:
1228 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1229 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1)
1230 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1231 // CHECK-NEXT:    ret i128 [[TMP2]]
1232 //
1233 v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); }
1234 // CHECK-LABEL: @vslti_wu(
1235 // CHECK-NEXT:  entry:
1236 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1237 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1)
1238 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1239 // CHECK-NEXT:    ret i128 [[TMP2]]
1240 //
1241 v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); }
1242 // CHECK-LABEL: @vslti_du(
1243 // CHECK-NEXT:  entry:
1244 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1245 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1)
1246 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1247 // CHECK-NEXT:    ret i128 [[TMP2]]
1248 //
1249 v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); }
1250 // CHECK-LABEL: @vsle_b(
1251 // CHECK-NEXT:  entry:
1252 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1253 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1254 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1255 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1256 // CHECK-NEXT:    ret i128 [[TMP3]]
1257 //
1258 v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); }
1259 // CHECK-LABEL: @vsle_h(
1260 // CHECK-NEXT:  entry:
1261 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1262 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1263 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1264 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1265 // CHECK-NEXT:    ret i128 [[TMP3]]
1266 //
1267 v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); }
1268 // CHECK-LABEL: @vsle_w(
1269 // CHECK-NEXT:  entry:
1270 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1271 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1272 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1273 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1274 // CHECK-NEXT:    ret i128 [[TMP3]]
1275 //
1276 v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); }
1277 // CHECK-LABEL: @vsle_d(
1278 // CHECK-NEXT:  entry:
1279 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1280 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1281 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1282 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1283 // CHECK-NEXT:    ret i128 [[TMP3]]
1284 //
1285 v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); }
1286 // CHECK-LABEL: @vslei_b(
1287 // CHECK-NEXT:  entry:
1288 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1289 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1)
1290 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1291 // CHECK-NEXT:    ret i128 [[TMP2]]
1292 //
1293 v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); }
1294 // CHECK-LABEL: @vslei_h(
1295 // CHECK-NEXT:  entry:
1296 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1297 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1)
1298 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1299 // CHECK-NEXT:    ret i128 [[TMP2]]
1300 //
1301 v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); }
1302 // CHECK-LABEL: @vslei_w(
1303 // CHECK-NEXT:  entry:
1304 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1305 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1)
1306 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1307 // CHECK-NEXT:    ret i128 [[TMP2]]
1308 //
1309 v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); }
1310 // CHECK-LABEL: @vslei_d(
1311 // CHECK-NEXT:  entry:
1312 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1313 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1)
1314 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1315 // CHECK-NEXT:    ret i128 [[TMP2]]
1316 //
1317 v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); }
1318 // CHECK-LABEL: @vsle_bu(
1319 // CHECK-NEXT:  entry:
1320 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1321 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1322 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1323 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1324 // CHECK-NEXT:    ret i128 [[TMP3]]
1325 //
1326 v16i8 vsle_bu(v16u8 _1, v16u8 _2) {
1327   return __builtin_lsx_vsle_bu(_1, _2);
1328 }
1329 // CHECK-LABEL: @vsle_hu(
1330 // CHECK-NEXT:  entry:
1331 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1332 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1333 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1334 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1335 // CHECK-NEXT:    ret i128 [[TMP3]]
1336 //
1337 v8i16 vsle_hu(v8u16 _1, v8u16 _2) {
1338   return __builtin_lsx_vsle_hu(_1, _2);
1339 }
1340 // CHECK-LABEL: @vsle_wu(
1341 // CHECK-NEXT:  entry:
1342 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1343 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1344 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1345 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1346 // CHECK-NEXT:    ret i128 [[TMP3]]
1347 //
1348 v4i32 vsle_wu(v4u32 _1, v4u32 _2) {
1349   return __builtin_lsx_vsle_wu(_1, _2);
1350 }
1351 // CHECK-LABEL: @vsle_du(
1352 // CHECK-NEXT:  entry:
1353 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1354 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1355 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1356 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1357 // CHECK-NEXT:    ret i128 [[TMP3]]
1358 //
1359 v2i64 vsle_du(v2u64 _1, v2u64 _2) {
1360   return __builtin_lsx_vsle_du(_1, _2);
1361 }
1362 // CHECK-LABEL: @vslei_bu(
1363 // CHECK-NEXT:  entry:
1364 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1365 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1)
1366 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1367 // CHECK-NEXT:    ret i128 [[TMP2]]
1368 //
1369 v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); }
1370 // CHECK-LABEL: @vslei_hu(
1371 // CHECK-NEXT:  entry:
1372 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1373 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1)
1374 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1375 // CHECK-NEXT:    ret i128 [[TMP2]]
1376 //
1377 v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); }
1378 // CHECK-LABEL: @vslei_wu(
1379 // CHECK-NEXT:  entry:
1380 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1381 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1)
1382 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1383 // CHECK-NEXT:    ret i128 [[TMP2]]
1384 //
1385 v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); }
1386 // CHECK-LABEL: @vslei_du(
1387 // CHECK-NEXT:  entry:
1388 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1389 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1)
1390 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1391 // CHECK-NEXT:    ret i128 [[TMP2]]
1392 //
1393 v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); }
1394 // CHECK-LABEL: @vsat_b(
1395 // CHECK-NEXT:  entry:
1396 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1397 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1)
1398 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1399 // CHECK-NEXT:    ret i128 [[TMP2]]
1400 //
1401 v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); }
1402 // CHECK-LABEL: @vsat_h(
1403 // CHECK-NEXT:  entry:
1404 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1405 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1)
1406 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1407 // CHECK-NEXT:    ret i128 [[TMP2]]
1408 //
1409 v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); }
1410 // CHECK-LABEL: @vsat_w(
1411 // CHECK-NEXT:  entry:
1412 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1413 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1)
1414 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1415 // CHECK-NEXT:    ret i128 [[TMP2]]
1416 //
1417 v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); }
1418 // CHECK-LABEL: @vsat_d(
1419 // CHECK-NEXT:  entry:
1420 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1421 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1)
1422 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1423 // CHECK-NEXT:    ret i128 [[TMP2]]
1424 //
1425 v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); }
1426 // CHECK-LABEL: @vsat_bu(
1427 // CHECK-NEXT:  entry:
1428 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1429 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1)
1430 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1431 // CHECK-NEXT:    ret i128 [[TMP2]]
1432 //
1433 v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); }
1434 // CHECK-LABEL: @vsat_hu(
1435 // CHECK-NEXT:  entry:
1436 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1437 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1)
1438 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1439 // CHECK-NEXT:    ret i128 [[TMP2]]
1440 //
1441 v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); }
1442 // CHECK-LABEL: @vsat_wu(
1443 // CHECK-NEXT:  entry:
1444 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1445 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1)
1446 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1447 // CHECK-NEXT:    ret i128 [[TMP2]]
1448 //
1449 v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); }
1450 // CHECK-LABEL: @vsat_du(
1451 // CHECK-NEXT:  entry:
1452 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1453 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1)
1454 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1455 // CHECK-NEXT:    ret i128 [[TMP2]]
1456 //
1457 v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); }
1458 // CHECK-LABEL: @vadda_b(
1459 // CHECK-NEXT:  entry:
1460 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1461 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1462 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1463 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1464 // CHECK-NEXT:    ret i128 [[TMP3]]
1465 //
1466 v16i8 vadda_b(v16i8 _1, v16i8 _2) {
1467   return __builtin_lsx_vadda_b(_1, _2);
1468 }
1469 // CHECK-LABEL: @vadda_h(
1470 // CHECK-NEXT:  entry:
1471 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1472 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1473 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1474 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1475 // CHECK-NEXT:    ret i128 [[TMP3]]
1476 //
1477 v8i16 vadda_h(v8i16 _1, v8i16 _2) {
1478   return __builtin_lsx_vadda_h(_1, _2);
1479 }
1480 // CHECK-LABEL: @vadda_w(
1481 // CHECK-NEXT:  entry:
1482 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1483 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1484 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1485 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1486 // CHECK-NEXT:    ret i128 [[TMP3]]
1487 //
1488 v4i32 vadda_w(v4i32 _1, v4i32 _2) {
1489   return __builtin_lsx_vadda_w(_1, _2);
1490 }
1491 // CHECK-LABEL: @vadda_d(
1492 // CHECK-NEXT:  entry:
1493 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1494 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1495 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1496 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1497 // CHECK-NEXT:    ret i128 [[TMP3]]
1498 //
1499 v2i64 vadda_d(v2i64 _1, v2i64 _2) {
1500   return __builtin_lsx_vadda_d(_1, _2);
1501 }
1502 // CHECK-LABEL: @vsadd_b(
1503 // CHECK-NEXT:  entry:
1504 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1505 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1506 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1507 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1508 // CHECK-NEXT:    ret i128 [[TMP3]]
1509 //
1510 v16i8 vsadd_b(v16i8 _1, v16i8 _2) {
1511   return __builtin_lsx_vsadd_b(_1, _2);
1512 }
1513 // CHECK-LABEL: @vsadd_h(
1514 // CHECK-NEXT:  entry:
1515 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1516 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1517 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1518 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1519 // CHECK-NEXT:    ret i128 [[TMP3]]
1520 //
1521 v8i16 vsadd_h(v8i16 _1, v8i16 _2) {
1522   return __builtin_lsx_vsadd_h(_1, _2);
1523 }
1524 // CHECK-LABEL: @vsadd_w(
1525 // CHECK-NEXT:  entry:
1526 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1527 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1528 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1529 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1530 // CHECK-NEXT:    ret i128 [[TMP3]]
1531 //
1532 v4i32 vsadd_w(v4i32 _1, v4i32 _2) {
1533   return __builtin_lsx_vsadd_w(_1, _2);
1534 }
1535 // CHECK-LABEL: @vsadd_d(
1536 // CHECK-NEXT:  entry:
1537 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1538 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1539 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1540 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1541 // CHECK-NEXT:    ret i128 [[TMP3]]
1542 //
1543 v2i64 vsadd_d(v2i64 _1, v2i64 _2) {
1544   return __builtin_lsx_vsadd_d(_1, _2);
1545 }
1546 // CHECK-LABEL: @vsadd_bu(
1547 // CHECK-NEXT:  entry:
1548 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1549 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1550 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1551 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1552 // CHECK-NEXT:    ret i128 [[TMP3]]
1553 //
1554 v16u8 vsadd_bu(v16u8 _1, v16u8 _2) {
1555   return __builtin_lsx_vsadd_bu(_1, _2);
1556 }
1557 // CHECK-LABEL: @vsadd_hu(
1558 // CHECK-NEXT:  entry:
1559 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1560 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1561 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1562 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1563 // CHECK-NEXT:    ret i128 [[TMP3]]
1564 //
1565 v8u16 vsadd_hu(v8u16 _1, v8u16 _2) {
1566   return __builtin_lsx_vsadd_hu(_1, _2);
1567 }
1568 // CHECK-LABEL: @vsadd_wu(
1569 // CHECK-NEXT:  entry:
1570 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1571 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1572 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1573 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1574 // CHECK-NEXT:    ret i128 [[TMP3]]
1575 //
1576 v4u32 vsadd_wu(v4u32 _1, v4u32 _2) {
1577   return __builtin_lsx_vsadd_wu(_1, _2);
1578 }
1579 // CHECK-LABEL: @vsadd_du(
1580 // CHECK-NEXT:  entry:
1581 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1582 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1583 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1584 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1585 // CHECK-NEXT:    ret i128 [[TMP3]]
1586 //
1587 v2u64 vsadd_du(v2u64 _1, v2u64 _2) {
1588   return __builtin_lsx_vsadd_du(_1, _2);
1589 }
1590 // CHECK-LABEL: @vavg_b(
1591 // CHECK-NEXT:  entry:
1592 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1593 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1594 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1595 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1596 // CHECK-NEXT:    ret i128 [[TMP3]]
1597 //
1598 v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); }
1599 // CHECK-LABEL: @vavg_h(
1600 // CHECK-NEXT:  entry:
1601 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1602 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1603 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1604 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1605 // CHECK-NEXT:    ret i128 [[TMP3]]
1606 //
1607 v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); }
1608 // CHECK-LABEL: @vavg_w(
1609 // CHECK-NEXT:  entry:
1610 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1611 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1612 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1613 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1614 // CHECK-NEXT:    ret i128 [[TMP3]]
1615 //
1616 v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); }
1617 // CHECK-LABEL: @vavg_d(
1618 // CHECK-NEXT:  entry:
1619 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1620 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1621 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1622 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1623 // CHECK-NEXT:    ret i128 [[TMP3]]
1624 //
1625 v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); }
1626 // CHECK-LABEL: @vavg_bu(
1627 // CHECK-NEXT:  entry:
1628 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1629 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1630 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1631 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1632 // CHECK-NEXT:    ret i128 [[TMP3]]
1633 //
1634 v16u8 vavg_bu(v16u8 _1, v16u8 _2) {
1635   return __builtin_lsx_vavg_bu(_1, _2);
1636 }
1637 // CHECK-LABEL: @vavg_hu(
1638 // CHECK-NEXT:  entry:
1639 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1640 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1641 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1642 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1643 // CHECK-NEXT:    ret i128 [[TMP3]]
1644 //
1645 v8u16 vavg_hu(v8u16 _1, v8u16 _2) {
1646   return __builtin_lsx_vavg_hu(_1, _2);
1647 }
1648 // CHECK-LABEL: @vavg_wu(
1649 // CHECK-NEXT:  entry:
1650 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1651 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1652 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1653 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1654 // CHECK-NEXT:    ret i128 [[TMP3]]
1655 //
1656 v4u32 vavg_wu(v4u32 _1, v4u32 _2) {
1657   return __builtin_lsx_vavg_wu(_1, _2);
1658 }
1659 // CHECK-LABEL: @vavg_du(
1660 // CHECK-NEXT:  entry:
1661 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1662 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1663 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1664 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1665 // CHECK-NEXT:    ret i128 [[TMP3]]
1666 //
1667 v2u64 vavg_du(v2u64 _1, v2u64 _2) {
1668   return __builtin_lsx_vavg_du(_1, _2);
1669 }
1670 // CHECK-LABEL: @vavgr_b(
1671 // CHECK-NEXT:  entry:
1672 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1673 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1674 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1675 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1676 // CHECK-NEXT:    ret i128 [[TMP3]]
1677 //
1678 v16i8 vavgr_b(v16i8 _1, v16i8 _2) {
1679   return __builtin_lsx_vavgr_b(_1, _2);
1680 }
1681 // CHECK-LABEL: @vavgr_h(
1682 // CHECK-NEXT:  entry:
1683 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1684 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1685 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1686 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1687 // CHECK-NEXT:    ret i128 [[TMP3]]
1688 //
1689 v8i16 vavgr_h(v8i16 _1, v8i16 _2) {
1690   return __builtin_lsx_vavgr_h(_1, _2);
1691 }
1692 // CHECK-LABEL: @vavgr_w(
1693 // CHECK-NEXT:  entry:
1694 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1695 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1696 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1697 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1698 // CHECK-NEXT:    ret i128 [[TMP3]]
1699 //
1700 v4i32 vavgr_w(v4i32 _1, v4i32 _2) {
1701   return __builtin_lsx_vavgr_w(_1, _2);
1702 }
1703 // CHECK-LABEL: @vavgr_d(
1704 // CHECK-NEXT:  entry:
1705 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1706 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1707 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1708 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1709 // CHECK-NEXT:    ret i128 [[TMP3]]
1710 //
1711 v2i64 vavgr_d(v2i64 _1, v2i64 _2) {
1712   return __builtin_lsx_vavgr_d(_1, _2);
1713 }
1714 // CHECK-LABEL: @vavgr_bu(
1715 // CHECK-NEXT:  entry:
1716 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1717 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1718 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1719 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1720 // CHECK-NEXT:    ret i128 [[TMP3]]
1721 //
1722 v16u8 vavgr_bu(v16u8 _1, v16u8 _2) {
1723   return __builtin_lsx_vavgr_bu(_1, _2);
1724 }
1725 // CHECK-LABEL: @vavgr_hu(
1726 // CHECK-NEXT:  entry:
1727 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1728 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1729 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1730 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1731 // CHECK-NEXT:    ret i128 [[TMP3]]
1732 //
1733 v8u16 vavgr_hu(v8u16 _1, v8u16 _2) {
1734   return __builtin_lsx_vavgr_hu(_1, _2);
1735 }
1736 // CHECK-LABEL: @vavgr_wu(
1737 // CHECK-NEXT:  entry:
1738 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1739 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1740 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1741 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1742 // CHECK-NEXT:    ret i128 [[TMP3]]
1743 //
1744 v4u32 vavgr_wu(v4u32 _1, v4u32 _2) {
1745   return __builtin_lsx_vavgr_wu(_1, _2);
1746 }
1747 // CHECK-LABEL: @vavgr_du(
1748 // CHECK-NEXT:  entry:
1749 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1750 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1751 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1752 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1753 // CHECK-NEXT:    ret i128 [[TMP3]]
1754 //
1755 v2u64 vavgr_du(v2u64 _1, v2u64 _2) {
1756   return __builtin_lsx_vavgr_du(_1, _2);
1757 }
1758 // CHECK-LABEL: @vssub_b(
1759 // CHECK-NEXT:  entry:
1760 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1761 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1762 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1763 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1764 // CHECK-NEXT:    ret i128 [[TMP3]]
1765 //
1766 v16i8 vssub_b(v16i8 _1, v16i8 _2) {
1767   return __builtin_lsx_vssub_b(_1, _2);
1768 }
1769 // CHECK-LABEL: @vssub_h(
1770 // CHECK-NEXT:  entry:
1771 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1772 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1773 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1774 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1775 // CHECK-NEXT:    ret i128 [[TMP3]]
1776 //
1777 v8i16 vssub_h(v8i16 _1, v8i16 _2) {
1778   return __builtin_lsx_vssub_h(_1, _2);
1779 }
1780 // CHECK-LABEL: @vssub_w(
1781 // CHECK-NEXT:  entry:
1782 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1783 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1784 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1785 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1786 // CHECK-NEXT:    ret i128 [[TMP3]]
1787 //
1788 v4i32 vssub_w(v4i32 _1, v4i32 _2) {
1789   return __builtin_lsx_vssub_w(_1, _2);
1790 }
1791 // CHECK-LABEL: @vssub_d(
1792 // CHECK-NEXT:  entry:
1793 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1794 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1795 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1796 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1797 // CHECK-NEXT:    ret i128 [[TMP3]]
1798 //
1799 v2i64 vssub_d(v2i64 _1, v2i64 _2) {
1800   return __builtin_lsx_vssub_d(_1, _2);
1801 }
1802 // CHECK-LABEL: @vssub_bu(
1803 // CHECK-NEXT:  entry:
1804 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1805 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1806 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1807 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1808 // CHECK-NEXT:    ret i128 [[TMP3]]
1809 //
1810 v16u8 vssub_bu(v16u8 _1, v16u8 _2) {
1811   return __builtin_lsx_vssub_bu(_1, _2);
1812 }
1813 // CHECK-LABEL: @vssub_hu(
1814 // CHECK-NEXT:  entry:
1815 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1816 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1817 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1818 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1819 // CHECK-NEXT:    ret i128 [[TMP3]]
1820 //
1821 v8u16 vssub_hu(v8u16 _1, v8u16 _2) {
1822   return __builtin_lsx_vssub_hu(_1, _2);
1823 }
1824 // CHECK-LABEL: @vssub_wu(
1825 // CHECK-NEXT:  entry:
1826 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1827 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1828 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1829 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1830 // CHECK-NEXT:    ret i128 [[TMP3]]
1831 //
1832 v4u32 vssub_wu(v4u32 _1, v4u32 _2) {
1833   return __builtin_lsx_vssub_wu(_1, _2);
1834 }
1835 // CHECK-LABEL: @vssub_du(
1836 // CHECK-NEXT:  entry:
1837 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1838 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1839 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1840 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1841 // CHECK-NEXT:    ret i128 [[TMP3]]
1842 //
1843 v2u64 vssub_du(v2u64 _1, v2u64 _2) {
1844   return __builtin_lsx_vssub_du(_1, _2);
1845 }
1846 // CHECK-LABEL: @vabsd_b(
1847 // CHECK-NEXT:  entry:
1848 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1849 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1850 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1851 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1852 // CHECK-NEXT:    ret i128 [[TMP3]]
1853 //
1854 v16i8 vabsd_b(v16i8 _1, v16i8 _2) {
1855   return __builtin_lsx_vabsd_b(_1, _2);
1856 }
1857 // CHECK-LABEL: @vabsd_h(
1858 // CHECK-NEXT:  entry:
1859 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1860 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1861 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1862 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1863 // CHECK-NEXT:    ret i128 [[TMP3]]
1864 //
1865 v8i16 vabsd_h(v8i16 _1, v8i16 _2) {
1866   return __builtin_lsx_vabsd_h(_1, _2);
1867 }
1868 // CHECK-LABEL: @vabsd_w(
1869 // CHECK-NEXT:  entry:
1870 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1871 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1872 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1873 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1874 // CHECK-NEXT:    ret i128 [[TMP3]]
1875 //
1876 v4i32 vabsd_w(v4i32 _1, v4i32 _2) {
1877   return __builtin_lsx_vabsd_w(_1, _2);
1878 }
1879 // CHECK-LABEL: @vabsd_d(
1880 // CHECK-NEXT:  entry:
1881 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1882 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1883 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1884 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1885 // CHECK-NEXT:    ret i128 [[TMP3]]
1886 //
1887 v2i64 vabsd_d(v2i64 _1, v2i64 _2) {
1888   return __builtin_lsx_vabsd_d(_1, _2);
1889 }
1890 // CHECK-LABEL: @vabsd_bu(
1891 // CHECK-NEXT:  entry:
1892 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1893 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1894 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1895 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1896 // CHECK-NEXT:    ret i128 [[TMP3]]
1897 //
1898 v16u8 vabsd_bu(v16u8 _1, v16u8 _2) {
1899   return __builtin_lsx_vabsd_bu(_1, _2);
1900 }
1901 // CHECK-LABEL: @vabsd_hu(
1902 // CHECK-NEXT:  entry:
1903 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1904 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1905 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1906 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1907 // CHECK-NEXT:    ret i128 [[TMP3]]
1908 //
1909 v8u16 vabsd_hu(v8u16 _1, v8u16 _2) {
1910   return __builtin_lsx_vabsd_hu(_1, _2);
1911 }
1912 // CHECK-LABEL: @vabsd_wu(
1913 // CHECK-NEXT:  entry:
1914 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1915 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1916 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1917 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1918 // CHECK-NEXT:    ret i128 [[TMP3]]
1919 //
1920 v4u32 vabsd_wu(v4u32 _1, v4u32 _2) {
1921   return __builtin_lsx_vabsd_wu(_1, _2);
1922 }
1923 // CHECK-LABEL: @vabsd_du(
1924 // CHECK-NEXT:  entry:
1925 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1926 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1927 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1928 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1929 // CHECK-NEXT:    ret i128 [[TMP3]]
1930 //
1931 v2u64 vabsd_du(v2u64 _1, v2u64 _2) {
1932   return __builtin_lsx_vabsd_du(_1, _2);
1933 }
1934 // CHECK-LABEL: @vmul_b(
1935 // CHECK-NEXT:  entry:
1936 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1937 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1938 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1939 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1940 // CHECK-NEXT:    ret i128 [[TMP3]]
1941 //
1942 v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); }
1943 // CHECK-LABEL: @vmul_h(
1944 // CHECK-NEXT:  entry:
1945 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1946 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1947 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1948 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1949 // CHECK-NEXT:    ret i128 [[TMP3]]
1950 //
1951 v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); }
1952 // CHECK-LABEL: @vmul_w(
1953 // CHECK-NEXT:  entry:
1954 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1955 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1956 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1957 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1958 // CHECK-NEXT:    ret i128 [[TMP3]]
1959 //
1960 v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); }
1961 // CHECK-LABEL: @vmul_d(
1962 // CHECK-NEXT:  entry:
1963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1964 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1965 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1966 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1967 // CHECK-NEXT:    ret i128 [[TMP3]]
1968 //
1969 v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); }
1970 // CHECK-LABEL: @vmadd_b(
1971 // CHECK-NEXT:  entry:
1972 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1973 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1974 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1975 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1976 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1977 // CHECK-NEXT:    ret i128 [[TMP4]]
1978 //
1979 v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1980   return __builtin_lsx_vmadd_b(_1, _2, _3);
1981 }
1982 // CHECK-LABEL: @vmadd_h(
1983 // CHECK-NEXT:  entry:
1984 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1985 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1986 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1987 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1988 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1989 // CHECK-NEXT:    ret i128 [[TMP4]]
1990 //
1991 v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1992   return __builtin_lsx_vmadd_h(_1, _2, _3);
1993 }
1994 // CHECK-LABEL: @vmadd_w(
1995 // CHECK-NEXT:  entry:
1996 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1997 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1998 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1999 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2000 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2001 // CHECK-NEXT:    ret i128 [[TMP4]]
2002 //
2003 v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2004   return __builtin_lsx_vmadd_w(_1, _2, _3);
2005 }
2006 // CHECK-LABEL: @vmadd_d(
2007 // CHECK-NEXT:  entry:
2008 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2009 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2010 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2011 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2012 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2013 // CHECK-NEXT:    ret i128 [[TMP4]]
2014 //
2015 v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2016   return __builtin_lsx_vmadd_d(_1, _2, _3);
2017 }
2018 // CHECK-LABEL: @vmsub_b(
2019 // CHECK-NEXT:  entry:
2020 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2021 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2022 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2023 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2024 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2025 // CHECK-NEXT:    ret i128 [[TMP4]]
2026 //
2027 v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) {
2028   return __builtin_lsx_vmsub_b(_1, _2, _3);
2029 }
2030 // CHECK-LABEL: @vmsub_h(
2031 // CHECK-NEXT:  entry:
2032 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2033 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2034 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2035 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2036 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2037 // CHECK-NEXT:    ret i128 [[TMP4]]
2038 //
2039 v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2040   return __builtin_lsx_vmsub_h(_1, _2, _3);
2041 }
2042 // CHECK-LABEL: @vmsub_w(
2043 // CHECK-NEXT:  entry:
2044 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2045 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2046 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2047 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2048 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2049 // CHECK-NEXT:    ret i128 [[TMP4]]
2050 //
2051 v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2052   return __builtin_lsx_vmsub_w(_1, _2, _3);
2053 }
2054 // CHECK-LABEL: @vmsub_d(
2055 // CHECK-NEXT:  entry:
2056 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2057 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2058 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2059 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2060 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2061 // CHECK-NEXT:    ret i128 [[TMP4]]
2062 //
2063 v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2064   return __builtin_lsx_vmsub_d(_1, _2, _3);
2065 }
2066 // CHECK-LABEL: @vdiv_b(
2067 // CHECK-NEXT:  entry:
2068 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2069 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2070 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2071 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2072 // CHECK-NEXT:    ret i128 [[TMP3]]
2073 //
2074 v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); }
2075 // CHECK-LABEL: @vdiv_h(
2076 // CHECK-NEXT:  entry:
2077 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2078 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2079 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2080 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2081 // CHECK-NEXT:    ret i128 [[TMP3]]
2082 //
2083 v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); }
2084 // CHECK-LABEL: @vdiv_w(
2085 // CHECK-NEXT:  entry:
2086 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2087 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2088 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2089 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2090 // CHECK-NEXT:    ret i128 [[TMP3]]
2091 //
2092 v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); }
2093 // CHECK-LABEL: @vdiv_d(
2094 // CHECK-NEXT:  entry:
2095 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2096 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2097 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2098 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2099 // CHECK-NEXT:    ret i128 [[TMP3]]
2100 //
2101 v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); }
2102 // CHECK-LABEL: @vdiv_bu(
2103 // CHECK-NEXT:  entry:
2104 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2105 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2106 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2107 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2108 // CHECK-NEXT:    ret i128 [[TMP3]]
2109 //
2110 v16u8 vdiv_bu(v16u8 _1, v16u8 _2) {
2111   return __builtin_lsx_vdiv_bu(_1, _2);
2112 }
2113 // CHECK-LABEL: @vdiv_hu(
2114 // CHECK-NEXT:  entry:
2115 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2116 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2117 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2118 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2119 // CHECK-NEXT:    ret i128 [[TMP3]]
2120 //
2121 v8u16 vdiv_hu(v8u16 _1, v8u16 _2) {
2122   return __builtin_lsx_vdiv_hu(_1, _2);
2123 }
2124 // CHECK-LABEL: @vdiv_wu(
2125 // CHECK-NEXT:  entry:
2126 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2127 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2128 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2129 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2130 // CHECK-NEXT:    ret i128 [[TMP3]]
2131 //
2132 v4u32 vdiv_wu(v4u32 _1, v4u32 _2) {
2133   return __builtin_lsx_vdiv_wu(_1, _2);
2134 }
2135 // CHECK-LABEL: @vdiv_du(
2136 // CHECK-NEXT:  entry:
2137 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2138 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2139 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2140 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2141 // CHECK-NEXT:    ret i128 [[TMP3]]
2142 //
2143 v2u64 vdiv_du(v2u64 _1, v2u64 _2) {
2144   return __builtin_lsx_vdiv_du(_1, _2);
2145 }
2146 // CHECK-LABEL: @vhaddw_h_b(
2147 // CHECK-NEXT:  entry:
2148 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2149 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2150 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2151 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2152 // CHECK-NEXT:    ret i128 [[TMP3]]
2153 //
2154 v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) {
2155   return __builtin_lsx_vhaddw_h_b(_1, _2);
2156 }
2157 // CHECK-LABEL: @vhaddw_w_h(
2158 // CHECK-NEXT:  entry:
2159 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2160 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2161 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2162 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2163 // CHECK-NEXT:    ret i128 [[TMP3]]
2164 //
2165 v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) {
2166   return __builtin_lsx_vhaddw_w_h(_1, _2);
2167 }
2168 // CHECK-LABEL: @vhaddw_d_w(
2169 // CHECK-NEXT:  entry:
2170 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2171 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2172 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2173 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2174 // CHECK-NEXT:    ret i128 [[TMP3]]
2175 //
2176 v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) {
2177   return __builtin_lsx_vhaddw_d_w(_1, _2);
2178 }
2179 // CHECK-LABEL: @vhaddw_hu_bu(
2180 // CHECK-NEXT:  entry:
2181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2182 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2183 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2184 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2185 // CHECK-NEXT:    ret i128 [[TMP3]]
2186 //
2187 v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) {
2188   return __builtin_lsx_vhaddw_hu_bu(_1, _2);
2189 }
2190 // CHECK-LABEL: @vhaddw_wu_hu(
2191 // CHECK-NEXT:  entry:
2192 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2193 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2194 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2195 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2196 // CHECK-NEXT:    ret i128 [[TMP3]]
2197 //
2198 v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) {
2199   return __builtin_lsx_vhaddw_wu_hu(_1, _2);
2200 }
2201 // CHECK-LABEL: @vhaddw_du_wu(
2202 // CHECK-NEXT:  entry:
2203 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2204 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2205 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2206 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2207 // CHECK-NEXT:    ret i128 [[TMP3]]
2208 //
2209 v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) {
2210   return __builtin_lsx_vhaddw_du_wu(_1, _2);
2211 }
2212 // CHECK-LABEL: @vhsubw_h_b(
2213 // CHECK-NEXT:  entry:
2214 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2215 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2216 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2217 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2218 // CHECK-NEXT:    ret i128 [[TMP3]]
2219 //
2220 v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) {
2221   return __builtin_lsx_vhsubw_h_b(_1, _2);
2222 }
2223 // CHECK-LABEL: @vhsubw_w_h(
2224 // CHECK-NEXT:  entry:
2225 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2226 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2227 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2228 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2229 // CHECK-NEXT:    ret i128 [[TMP3]]
2230 //
2231 v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) {
2232   return __builtin_lsx_vhsubw_w_h(_1, _2);
2233 }
2234 // CHECK-LABEL: @vhsubw_d_w(
2235 // CHECK-NEXT:  entry:
2236 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2237 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2238 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2239 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2240 // CHECK-NEXT:    ret i128 [[TMP3]]
2241 //
2242 v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) {
2243   return __builtin_lsx_vhsubw_d_w(_1, _2);
2244 }
2245 // CHECK-LABEL: @vhsubw_hu_bu(
2246 // CHECK-NEXT:  entry:
2247 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2248 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2249 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2250 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2251 // CHECK-NEXT:    ret i128 [[TMP3]]
2252 //
2253 v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) {
2254   return __builtin_lsx_vhsubw_hu_bu(_1, _2);
2255 }
2256 // CHECK-LABEL: @vhsubw_wu_hu(
2257 // CHECK-NEXT:  entry:
2258 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2259 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2260 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2261 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2262 // CHECK-NEXT:    ret i128 [[TMP3]]
2263 //
2264 v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) {
2265   return __builtin_lsx_vhsubw_wu_hu(_1, _2);
2266 }
2267 // CHECK-LABEL: @vhsubw_du_wu(
2268 // CHECK-NEXT:  entry:
2269 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2270 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2271 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2272 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2273 // CHECK-NEXT:    ret i128 [[TMP3]]
2274 //
2275 v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) {
2276   return __builtin_lsx_vhsubw_du_wu(_1, _2);
2277 }
2278 // CHECK-LABEL: @vmod_b(
2279 // CHECK-NEXT:  entry:
2280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2281 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2282 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2283 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2284 // CHECK-NEXT:    ret i128 [[TMP3]]
2285 //
2286 v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); }
2287 // CHECK-LABEL: @vmod_h(
2288 // CHECK-NEXT:  entry:
2289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2293 // CHECK-NEXT:    ret i128 [[TMP3]]
2294 //
2295 v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); }
2296 // CHECK-LABEL: @vmod_w(
2297 // CHECK-NEXT:  entry:
2298 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2299 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2300 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2301 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2302 // CHECK-NEXT:    ret i128 [[TMP3]]
2303 //
2304 v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); }
2305 // CHECK-LABEL: @vmod_d(
2306 // CHECK-NEXT:  entry:
2307 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2309 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2310 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2311 // CHECK-NEXT:    ret i128 [[TMP3]]
2312 //
2313 v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); }
2314 // CHECK-LABEL: @vmod_bu(
2315 // CHECK-NEXT:  entry:
2316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2317 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2318 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2319 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2320 // CHECK-NEXT:    ret i128 [[TMP3]]
2321 //
2322 v16u8 vmod_bu(v16u8 _1, v16u8 _2) {
2323   return __builtin_lsx_vmod_bu(_1, _2);
2324 }
2325 // CHECK-LABEL: @vmod_hu(
2326 // CHECK-NEXT:  entry:
2327 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2328 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2329 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2330 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2331 // CHECK-NEXT:    ret i128 [[TMP3]]
2332 //
2333 v8u16 vmod_hu(v8u16 _1, v8u16 _2) {
2334   return __builtin_lsx_vmod_hu(_1, _2);
2335 }
2336 // CHECK-LABEL: @vmod_wu(
2337 // CHECK-NEXT:  entry:
2338 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2339 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2340 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2341 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2342 // CHECK-NEXT:    ret i128 [[TMP3]]
2343 //
2344 v4u32 vmod_wu(v4u32 _1, v4u32 _2) {
2345   return __builtin_lsx_vmod_wu(_1, _2);
2346 }
2347 // CHECK-LABEL: @vmod_du(
2348 // CHECK-NEXT:  entry:
2349 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2350 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2351 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2352 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2353 // CHECK-NEXT:    ret i128 [[TMP3]]
2354 //
2355 v2u64 vmod_du(v2u64 _1, v2u64 _2) {
2356   return __builtin_lsx_vmod_du(_1, _2);
2357 }
2358 // CHECK-LABEL: @vreplve_b(
2359 // CHECK-NEXT:  entry:
2360 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2361 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]])
2362 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2363 // CHECK-NEXT:    ret i128 [[TMP2]]
2364 //
2365 v16i8 vreplve_b(v16i8 _1, int _2) {
2366   return __builtin_lsx_vreplve_b(_1, _2);
2367 }
2368 // CHECK-LABEL: @vreplve_h(
2369 // CHECK-NEXT:  entry:
2370 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2371 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]])
2372 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2373 // CHECK-NEXT:    ret i128 [[TMP2]]
2374 //
2375 v8i16 vreplve_h(v8i16 _1, int _2) {
2376   return __builtin_lsx_vreplve_h(_1, _2);
2377 }
2378 // CHECK-LABEL: @vreplve_w(
2379 // CHECK-NEXT:  entry:
2380 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2381 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]])
2382 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2383 // CHECK-NEXT:    ret i128 [[TMP2]]
2384 //
2385 v4i32 vreplve_w(v4i32 _1, int _2) {
2386   return __builtin_lsx_vreplve_w(_1, _2);
2387 }
2388 // CHECK-LABEL: @vreplve_d(
2389 // CHECK-NEXT:  entry:
2390 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2391 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]])
2392 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2393 // CHECK-NEXT:    ret i128 [[TMP2]]
2394 //
2395 v2i64 vreplve_d(v2i64 _1, int _2) {
2396   return __builtin_lsx_vreplve_d(_1, _2);
2397 }
2398 // CHECK-LABEL: @vreplvei_b(
2399 // CHECK-NEXT:  entry:
2400 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2401 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1)
2402 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2403 // CHECK-NEXT:    ret i128 [[TMP2]]
2404 //
2405 v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); }
2406 // CHECK-LABEL: @vreplvei_h(
2407 // CHECK-NEXT:  entry:
2408 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2409 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1)
2410 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2411 // CHECK-NEXT:    ret i128 [[TMP2]]
2412 //
2413 v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); }
2414 // CHECK-LABEL: @vreplvei_w(
2415 // CHECK-NEXT:  entry:
2416 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2417 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1)
2418 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2419 // CHECK-NEXT:    ret i128 [[TMP2]]
2420 //
2421 v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); }
2422 // CHECK-LABEL: @vreplvei_d(
2423 // CHECK-NEXT:  entry:
2424 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2425 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1)
2426 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2427 // CHECK-NEXT:    ret i128 [[TMP2]]
2428 //
2429 v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); }
2430 // CHECK-LABEL: @vpickev_b(
2431 // CHECK-NEXT:  entry:
2432 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2433 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2434 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2435 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2436 // CHECK-NEXT:    ret i128 [[TMP3]]
2437 //
2438 v16i8 vpickev_b(v16i8 _1, v16i8 _2) {
2439   return __builtin_lsx_vpickev_b(_1, _2);
2440 }
2441 // CHECK-LABEL: @vpickev_h(
2442 // CHECK-NEXT:  entry:
2443 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2444 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2445 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2446 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2447 // CHECK-NEXT:    ret i128 [[TMP3]]
2448 //
2449 v8i16 vpickev_h(v8i16 _1, v8i16 _2) {
2450   return __builtin_lsx_vpickev_h(_1, _2);
2451 }
2452 // CHECK-LABEL: @vpickev_w(
2453 // CHECK-NEXT:  entry:
2454 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2455 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2456 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2457 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2458 // CHECK-NEXT:    ret i128 [[TMP3]]
2459 //
2460 v4i32 vpickev_w(v4i32 _1, v4i32 _2) {
2461   return __builtin_lsx_vpickev_w(_1, _2);
2462 }
2463 // CHECK-LABEL: @vpickev_d(
2464 // CHECK-NEXT:  entry:
2465 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2466 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2467 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2468 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2469 // CHECK-NEXT:    ret i128 [[TMP3]]
2470 //
2471 v2i64 vpickev_d(v2i64 _1, v2i64 _2) {
2472   return __builtin_lsx_vpickev_d(_1, _2);
2473 }
2474 // CHECK-LABEL: @vpickod_b(
2475 // CHECK-NEXT:  entry:
2476 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2477 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2478 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2479 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2480 // CHECK-NEXT:    ret i128 [[TMP3]]
2481 //
2482 v16i8 vpickod_b(v16i8 _1, v16i8 _2) {
2483   return __builtin_lsx_vpickod_b(_1, _2);
2484 }
2485 // CHECK-LABEL: @vpickod_h(
2486 // CHECK-NEXT:  entry:
2487 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2488 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2489 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2490 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2491 // CHECK-NEXT:    ret i128 [[TMP3]]
2492 //
2493 v8i16 vpickod_h(v8i16 _1, v8i16 _2) {
2494   return __builtin_lsx_vpickod_h(_1, _2);
2495 }
2496 // CHECK-LABEL: @vpickod_w(
2497 // CHECK-NEXT:  entry:
2498 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2499 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2500 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2501 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2502 // CHECK-NEXT:    ret i128 [[TMP3]]
2503 //
2504 v4i32 vpickod_w(v4i32 _1, v4i32 _2) {
2505   return __builtin_lsx_vpickod_w(_1, _2);
2506 }
2507 // CHECK-LABEL: @vpickod_d(
2508 // CHECK-NEXT:  entry:
2509 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2510 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2511 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2512 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2513 // CHECK-NEXT:    ret i128 [[TMP3]]
2514 //
2515 v2i64 vpickod_d(v2i64 _1, v2i64 _2) {
2516   return __builtin_lsx_vpickod_d(_1, _2);
2517 }
2518 // CHECK-LABEL: @vilvh_b(
2519 // CHECK-NEXT:  entry:
2520 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2521 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2522 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2523 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2524 // CHECK-NEXT:    ret i128 [[TMP3]]
2525 //
2526 v16i8 vilvh_b(v16i8 _1, v16i8 _2) {
2527   return __builtin_lsx_vilvh_b(_1, _2);
2528 }
2529 // CHECK-LABEL: @vilvh_h(
2530 // CHECK-NEXT:  entry:
2531 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2532 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2533 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2534 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2535 // CHECK-NEXT:    ret i128 [[TMP3]]
2536 //
2537 v8i16 vilvh_h(v8i16 _1, v8i16 _2) {
2538   return __builtin_lsx_vilvh_h(_1, _2);
2539 }
2540 // CHECK-LABEL: @vilvh_w(
2541 // CHECK-NEXT:  entry:
2542 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2543 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2544 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2545 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2546 // CHECK-NEXT:    ret i128 [[TMP3]]
2547 //
2548 v4i32 vilvh_w(v4i32 _1, v4i32 _2) {
2549   return __builtin_lsx_vilvh_w(_1, _2);
2550 }
2551 // CHECK-LABEL: @vilvh_d(
2552 // CHECK-NEXT:  entry:
2553 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2554 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2555 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2556 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2557 // CHECK-NEXT:    ret i128 [[TMP3]]
2558 //
2559 v2i64 vilvh_d(v2i64 _1, v2i64 _2) {
2560   return __builtin_lsx_vilvh_d(_1, _2);
2561 }
2562 // CHECK-LABEL: @vilvl_b(
2563 // CHECK-NEXT:  entry:
2564 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2565 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2566 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2567 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2568 // CHECK-NEXT:    ret i128 [[TMP3]]
2569 //
2570 v16i8 vilvl_b(v16i8 _1, v16i8 _2) {
2571   return __builtin_lsx_vilvl_b(_1, _2);
2572 }
2573 // CHECK-LABEL: @vilvl_h(
2574 // CHECK-NEXT:  entry:
2575 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2576 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2577 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2578 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2579 // CHECK-NEXT:    ret i128 [[TMP3]]
2580 //
2581 v8i16 vilvl_h(v8i16 _1, v8i16 _2) {
2582   return __builtin_lsx_vilvl_h(_1, _2);
2583 }
2584 // CHECK-LABEL: @vilvl_w(
2585 // CHECK-NEXT:  entry:
2586 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2587 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2588 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2589 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2590 // CHECK-NEXT:    ret i128 [[TMP3]]
2591 //
2592 v4i32 vilvl_w(v4i32 _1, v4i32 _2) {
2593   return __builtin_lsx_vilvl_w(_1, _2);
2594 }
2595 // CHECK-LABEL: @vilvl_d(
2596 // CHECK-NEXT:  entry:
2597 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2598 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2599 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2600 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2601 // CHECK-NEXT:    ret i128 [[TMP3]]
2602 //
2603 v2i64 vilvl_d(v2i64 _1, v2i64 _2) {
2604   return __builtin_lsx_vilvl_d(_1, _2);
2605 }
2606 // CHECK-LABEL: @vpackev_b(
2607 // CHECK-NEXT:  entry:
2608 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2609 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2610 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2611 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2612 // CHECK-NEXT:    ret i128 [[TMP3]]
2613 //
2614 v16i8 vpackev_b(v16i8 _1, v16i8 _2) {
2615   return __builtin_lsx_vpackev_b(_1, _2);
2616 }
2617 // CHECK-LABEL: @vpackev_h(
2618 // CHECK-NEXT:  entry:
2619 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2620 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2621 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2622 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2623 // CHECK-NEXT:    ret i128 [[TMP3]]
2624 //
2625 v8i16 vpackev_h(v8i16 _1, v8i16 _2) {
2626   return __builtin_lsx_vpackev_h(_1, _2);
2627 }
2628 // CHECK-LABEL: @vpackev_w(
2629 // CHECK-NEXT:  entry:
2630 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2631 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2632 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2633 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2634 // CHECK-NEXT:    ret i128 [[TMP3]]
2635 //
2636 v4i32 vpackev_w(v4i32 _1, v4i32 _2) {
2637   return __builtin_lsx_vpackev_w(_1, _2);
2638 }
2639 // CHECK-LABEL: @vpackev_d(
2640 // CHECK-NEXT:  entry:
2641 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2642 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2643 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2644 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2645 // CHECK-NEXT:    ret i128 [[TMP3]]
2646 //
2647 v2i64 vpackev_d(v2i64 _1, v2i64 _2) {
2648   return __builtin_lsx_vpackev_d(_1, _2);
2649 }
2650 // CHECK-LABEL: @vpackod_b(
2651 // CHECK-NEXT:  entry:
2652 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2653 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2654 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2655 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2656 // CHECK-NEXT:    ret i128 [[TMP3]]
2657 //
2658 v16i8 vpackod_b(v16i8 _1, v16i8 _2) {
2659   return __builtin_lsx_vpackod_b(_1, _2);
2660 }
2661 // CHECK-LABEL: @vpackod_h(
2662 // CHECK-NEXT:  entry:
2663 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2664 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2665 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2666 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2667 // CHECK-NEXT:    ret i128 [[TMP3]]
2668 //
2669 v8i16 vpackod_h(v8i16 _1, v8i16 _2) {
2670   return __builtin_lsx_vpackod_h(_1, _2);
2671 }
2672 // CHECK-LABEL: @vpackod_w(
2673 // CHECK-NEXT:  entry:
2674 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2675 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2676 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2677 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2678 // CHECK-NEXT:    ret i128 [[TMP3]]
2679 //
2680 v4i32 vpackod_w(v4i32 _1, v4i32 _2) {
2681   return __builtin_lsx_vpackod_w(_1, _2);
2682 }
2683 // CHECK-LABEL: @vpackod_d(
2684 // CHECK-NEXT:  entry:
2685 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2686 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2687 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2688 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2689 // CHECK-NEXT:    ret i128 [[TMP3]]
2690 //
2691 v2i64 vpackod_d(v2i64 _1, v2i64 _2) {
2692   return __builtin_lsx_vpackod_d(_1, _2);
2693 }
2694 // CHECK-LABEL: @vshuf_h(
2695 // CHECK-NEXT:  entry:
2696 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2697 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2698 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2699 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2700 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2701 // CHECK-NEXT:    ret i128 [[TMP4]]
2702 //
2703 v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2704   return __builtin_lsx_vshuf_h(_1, _2, _3);
2705 }
2706 // CHECK-LABEL: @vshuf_w(
2707 // CHECK-NEXT:  entry:
2708 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2709 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2710 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2711 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2712 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2713 // CHECK-NEXT:    ret i128 [[TMP4]]
2714 //
2715 v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2716   return __builtin_lsx_vshuf_w(_1, _2, _3);
2717 }
2718 // CHECK-LABEL: @vshuf_d(
2719 // CHECK-NEXT:  entry:
2720 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2721 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2722 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2723 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2724 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2725 // CHECK-NEXT:    ret i128 [[TMP4]]
2726 //
2727 v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2728   return __builtin_lsx_vshuf_d(_1, _2, _3);
2729 }
2730 // CHECK-LABEL: @vand_v(
2731 // CHECK-NEXT:  entry:
2732 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2733 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2734 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2735 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2736 // CHECK-NEXT:    ret i128 [[TMP3]]
2737 //
2738 v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); }
2739 // CHECK-LABEL: @vandi_b(
2740 // CHECK-NEXT:  entry:
2741 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2742 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1)
2743 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2744 // CHECK-NEXT:    ret i128 [[TMP2]]
2745 //
2746 v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); }
2747 // CHECK-LABEL: @vor_v(
2748 // CHECK-NEXT:  entry:
2749 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2750 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2751 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2752 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2753 // CHECK-NEXT:    ret i128 [[TMP3]]
2754 //
2755 v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); }
2756 // CHECK-LABEL: @vori_b(
2757 // CHECK-NEXT:  entry:
2758 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2759 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1)
2760 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2761 // CHECK-NEXT:    ret i128 [[TMP2]]
2762 //
2763 v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); }
2764 // CHECK-LABEL: @vnor_v(
2765 // CHECK-NEXT:  entry:
2766 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2767 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2768 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2769 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2770 // CHECK-NEXT:    ret i128 [[TMP3]]
2771 //
2772 v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); }
2773 // CHECK-LABEL: @vnori_b(
2774 // CHECK-NEXT:  entry:
2775 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2776 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1)
2777 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2778 // CHECK-NEXT:    ret i128 [[TMP2]]
2779 //
2780 v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); }
2781 // CHECK-LABEL: @vxor_v(
2782 // CHECK-NEXT:  entry:
2783 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2784 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2785 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2786 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2787 // CHECK-NEXT:    ret i128 [[TMP3]]
2788 //
2789 v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); }
2790 // CHECK-LABEL: @vxori_b(
2791 // CHECK-NEXT:  entry:
2792 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2793 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1)
2794 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2795 // CHECK-NEXT:    ret i128 [[TMP2]]
2796 //
2797 v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); }
2798 // CHECK-LABEL: @vbitsel_v(
2799 // CHECK-NEXT:  entry:
2800 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2801 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2802 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2803 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2804 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2805 // CHECK-NEXT:    ret i128 [[TMP4]]
2806 //
2807 v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) {
2808   return __builtin_lsx_vbitsel_v(_1, _2, _3);
2809 }
2810 // CHECK-LABEL: @vbitseli_b(
2811 // CHECK-NEXT:  entry:
2812 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2813 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2814 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
2815 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2816 // CHECK-NEXT:    ret i128 [[TMP3]]
2817 //
2818 v16u8 vbitseli_b(v16u8 _1, v16u8 _2) {
2819   return __builtin_lsx_vbitseli_b(_1, _2, 1);
2820 }
2821 // CHECK-LABEL: @vshuf4i_b(
2822 // CHECK-NEXT:  entry:
2823 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2824 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1)
2825 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2826 // CHECK-NEXT:    ret i128 [[TMP2]]
2827 //
2828 v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); }
2829 // CHECK-LABEL: @vshuf4i_h(
2830 // CHECK-NEXT:  entry:
2831 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2832 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1)
2833 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2834 // CHECK-NEXT:    ret i128 [[TMP2]]
2835 //
2836 v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); }
2837 // CHECK-LABEL: @vshuf4i_w(
2838 // CHECK-NEXT:  entry:
2839 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2840 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1)
2841 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2842 // CHECK-NEXT:    ret i128 [[TMP2]]
2843 //
2844 v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); }
2845 // CHECK-LABEL: @vreplgr2vr_b(
2846 // CHECK-NEXT:  entry:
2847 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]])
2848 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
2849 // CHECK-NEXT:    ret i128 [[TMP1]]
2850 //
2851 v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); }
2852 // CHECK-LABEL: @vreplgr2vr_h(
2853 // CHECK-NEXT:  entry:
2854 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]])
2855 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
2856 // CHECK-NEXT:    ret i128 [[TMP1]]
2857 //
2858 v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); }
2859 // CHECK-LABEL: @vreplgr2vr_w(
2860 // CHECK-NEXT:  entry:
2861 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]])
2862 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
2863 // CHECK-NEXT:    ret i128 [[TMP1]]
2864 //
2865 v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); }
2866 // CHECK-LABEL: @vreplgr2vr_d(
2867 // CHECK-NEXT:  entry:
2868 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]])
2869 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
2870 // CHECK-NEXT:    ret i128 [[TMP1]]
2871 //
2872 v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); }
2873 // CHECK-LABEL: @vpcnt_b(
2874 // CHECK-NEXT:  entry:
2875 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2876 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]])
2877 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2878 // CHECK-NEXT:    ret i128 [[TMP2]]
2879 //
2880 v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); }
2881 // CHECK-LABEL: @vpcnt_h(
2882 // CHECK-NEXT:  entry:
2883 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2884 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]])
2885 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2886 // CHECK-NEXT:    ret i128 [[TMP2]]
2887 //
2888 v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); }
2889 // CHECK-LABEL: @vpcnt_w(
2890 // CHECK-NEXT:  entry:
2891 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2892 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]])
2893 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2894 // CHECK-NEXT:    ret i128 [[TMP2]]
2895 //
2896 v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); }
2897 // CHECK-LABEL: @vpcnt_d(
2898 // CHECK-NEXT:  entry:
2899 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2900 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]])
2901 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2902 // CHECK-NEXT:    ret i128 [[TMP2]]
2903 //
2904 v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); }
2905 // CHECK-LABEL: @vclo_b(
2906 // CHECK-NEXT:  entry:
2907 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2908 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]])
2909 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2910 // CHECK-NEXT:    ret i128 [[TMP2]]
2911 //
2912 v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); }
2913 // CHECK-LABEL: @vclo_h(
2914 // CHECK-NEXT:  entry:
2915 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2916 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]])
2917 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2918 // CHECK-NEXT:    ret i128 [[TMP2]]
2919 //
2920 v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); }
2921 // CHECK-LABEL: @vclo_w(
2922 // CHECK-NEXT:  entry:
2923 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2924 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]])
2925 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2926 // CHECK-NEXT:    ret i128 [[TMP2]]
2927 //
2928 v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); }
2929 // CHECK-LABEL: @vclo_d(
2930 // CHECK-NEXT:  entry:
2931 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2932 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]])
2933 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2934 // CHECK-NEXT:    ret i128 [[TMP2]]
2935 //
2936 v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); }
2937 // CHECK-LABEL: @vclz_b(
2938 // CHECK-NEXT:  entry:
2939 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2940 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]])
2941 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2942 // CHECK-NEXT:    ret i128 [[TMP2]]
2943 //
2944 v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); }
2945 // CHECK-LABEL: @vclz_h(
2946 // CHECK-NEXT:  entry:
2947 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2948 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]])
2949 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2950 // CHECK-NEXT:    ret i128 [[TMP2]]
2951 //
2952 v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); }
2953 // CHECK-LABEL: @vclz_w(
2954 // CHECK-NEXT:  entry:
2955 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2956 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]])
2957 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2958 // CHECK-NEXT:    ret i128 [[TMP2]]
2959 //
2960 v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); }
2961 // CHECK-LABEL: @vclz_d(
2962 // CHECK-NEXT:  entry:
2963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2964 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]])
2965 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2966 // CHECK-NEXT:    ret i128 [[TMP2]]
2967 //
2968 v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); }
2969 // CHECK-LABEL: @vpickve2gr_b(
2970 // CHECK-NEXT:  entry:
2971 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2972 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1)
2973 // CHECK-NEXT:    ret i32 [[TMP1]]
2974 //
2975 int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); }
2976 // CHECK-LABEL: @vpickve2gr_h(
2977 // CHECK-NEXT:  entry:
2978 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2979 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1)
2980 // CHECK-NEXT:    ret i32 [[TMP1]]
2981 //
2982 int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); }
2983 // CHECK-LABEL: @vpickve2gr_w(
2984 // CHECK-NEXT:  entry:
2985 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2986 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1)
2987 // CHECK-NEXT:    ret i32 [[TMP1]]
2988 //
2989 int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); }
2990 // CHECK-LABEL: @vpickve2gr_d(
2991 // CHECK-NEXT:  entry:
2992 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2993 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1)
2994 // CHECK-NEXT:    ret i64 [[TMP1]]
2995 //
2996 long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); }
2997 // CHECK-LABEL: @vpickve2gr_bu(
2998 // CHECK-NEXT:  entry:
2999 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3000 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1)
3001 // CHECK-NEXT:    ret i32 [[TMP1]]
3002 //
3003 unsigned int vpickve2gr_bu(v16i8 _1) {
3004   return __builtin_lsx_vpickve2gr_bu(_1, 1);
3005 }
3006 // CHECK-LABEL: @vpickve2gr_hu(
3007 // CHECK-NEXT:  entry:
3008 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3009 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1)
3010 // CHECK-NEXT:    ret i32 [[TMP1]]
3011 //
3012 unsigned int vpickve2gr_hu(v8i16 _1) {
3013   return __builtin_lsx_vpickve2gr_hu(_1, 1);
3014 }
3015 // CHECK-LABEL: @vpickve2gr_wu(
3016 // CHECK-NEXT:  entry:
3017 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3018 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1)
3019 // CHECK-NEXT:    ret i32 [[TMP1]]
3020 //
3021 unsigned int vpickve2gr_wu(v4i32 _1) {
3022   return __builtin_lsx_vpickve2gr_wu(_1, 1);
3023 }
3024 // CHECK-LABEL: @vpickve2gr_du(
3025 // CHECK-NEXT:  entry:
3026 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3027 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1)
3028 // CHECK-NEXT:    ret i64 [[TMP1]]
3029 //
3030 unsigned long int vpickve2gr_du(v2i64 _1) {
3031   return __builtin_lsx_vpickve2gr_du(_1, 1);
3032 }
3033 // CHECK-LABEL: @vinsgr2vr_b(
3034 // CHECK-NEXT:  entry:
3035 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3036 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1)
3037 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3038 // CHECK-NEXT:    ret i128 [[TMP2]]
3039 //
3040 v16i8 vinsgr2vr_b(v16i8 _1) {
3041   return __builtin_lsx_vinsgr2vr_b(_1, 1, 1);
3042 }
3043 // CHECK-LABEL: @vinsgr2vr_h(
3044 // CHECK-NEXT:  entry:
3045 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3046 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1)
3047 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3048 // CHECK-NEXT:    ret i128 [[TMP2]]
3049 //
3050 v8i16 vinsgr2vr_h(v8i16 _1) {
3051   return __builtin_lsx_vinsgr2vr_h(_1, 1, 1);
3052 }
3053 // CHECK-LABEL: @vinsgr2vr_w(
3054 // CHECK-NEXT:  entry:
3055 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3056 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1)
3057 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3058 // CHECK-NEXT:    ret i128 [[TMP2]]
3059 //
3060 v4i32 vinsgr2vr_w(v4i32 _1) {
3061   return __builtin_lsx_vinsgr2vr_w(_1, 1, 1);
3062 }
3063 // CHECK-LABEL: @vinsgr2vr_d(
3064 // CHECK-NEXT:  entry:
3065 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3066 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1)
3067 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3068 // CHECK-NEXT:    ret i128 [[TMP2]]
3069 //
3070 v2i64 vinsgr2vr_d(v2i64 _1) {
3071   return __builtin_lsx_vinsgr2vr_d(_1, 1, 1);
3072 }
3073 // CHECK-LABEL: @vfadd_s(
3074 // CHECK-NEXT:  entry:
3075 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3076 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3077 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3078 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3079 // CHECK-NEXT:    ret i128 [[TMP3]]
3080 //
3081 v4f32 vfadd_s(v4f32 _1, v4f32 _2) {
3082   return __builtin_lsx_vfadd_s(_1, _2);
3083 }
3084 // CHECK-LABEL: @vfadd_d(
3085 // CHECK-NEXT:  entry:
3086 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3087 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3088 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3089 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3090 // CHECK-NEXT:    ret i128 [[TMP3]]
3091 //
3092 v2f64 vfadd_d(v2f64 _1, v2f64 _2) {
3093   return __builtin_lsx_vfadd_d(_1, _2);
3094 }
3095 // CHECK-LABEL: @vfsub_s(
3096 // CHECK-NEXT:  entry:
3097 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3098 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3099 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3100 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3101 // CHECK-NEXT:    ret i128 [[TMP3]]
3102 //
3103 v4f32 vfsub_s(v4f32 _1, v4f32 _2) {
3104   return __builtin_lsx_vfsub_s(_1, _2);
3105 }
3106 // CHECK-LABEL: @vfsub_d(
3107 // CHECK-NEXT:  entry:
3108 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3109 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3110 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3111 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3112 // CHECK-NEXT:    ret i128 [[TMP3]]
3113 //
3114 v2f64 vfsub_d(v2f64 _1, v2f64 _2) {
3115   return __builtin_lsx_vfsub_d(_1, _2);
3116 }
3117 // CHECK-LABEL: @vfmul_s(
3118 // CHECK-NEXT:  entry:
3119 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3120 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3121 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3122 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3123 // CHECK-NEXT:    ret i128 [[TMP3]]
3124 //
3125 v4f32 vfmul_s(v4f32 _1, v4f32 _2) {
3126   return __builtin_lsx_vfmul_s(_1, _2);
3127 }
3128 // CHECK-LABEL: @vfmul_d(
3129 // CHECK-NEXT:  entry:
3130 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3131 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3132 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3133 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3134 // CHECK-NEXT:    ret i128 [[TMP3]]
3135 //
3136 v2f64 vfmul_d(v2f64 _1, v2f64 _2) {
3137   return __builtin_lsx_vfmul_d(_1, _2);
3138 }
3139 // CHECK-LABEL: @vfdiv_s(
3140 // CHECK-NEXT:  entry:
3141 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3142 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3143 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3144 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3145 // CHECK-NEXT:    ret i128 [[TMP3]]
3146 //
3147 v4f32 vfdiv_s(v4f32 _1, v4f32 _2) {
3148   return __builtin_lsx_vfdiv_s(_1, _2);
3149 }
3150 // CHECK-LABEL: @vfdiv_d(
3151 // CHECK-NEXT:  entry:
3152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3153 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3154 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3155 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3156 // CHECK-NEXT:    ret i128 [[TMP3]]
3157 //
3158 v2f64 vfdiv_d(v2f64 _1, v2f64 _2) {
3159   return __builtin_lsx_vfdiv_d(_1, _2);
3160 }
3161 // CHECK-LABEL: @vfcvt_h_s(
3162 // CHECK-NEXT:  entry:
3163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3164 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3165 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3166 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3167 // CHECK-NEXT:    ret i128 [[TMP3]]
3168 //
3169 v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) {
3170   return __builtin_lsx_vfcvt_h_s(_1, _2);
3171 }
3172 // CHECK-LABEL: @vfcvt_s_d(
3173 // CHECK-NEXT:  entry:
3174 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3175 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3176 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3177 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3178 // CHECK-NEXT:    ret i128 [[TMP3]]
3179 //
3180 v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) {
3181   return __builtin_lsx_vfcvt_s_d(_1, _2);
3182 }
3183 // CHECK-LABEL: @vfmin_s(
3184 // CHECK-NEXT:  entry:
3185 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3186 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3187 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3188 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3189 // CHECK-NEXT:    ret i128 [[TMP3]]
3190 //
3191 v4f32 vfmin_s(v4f32 _1, v4f32 _2) {
3192   return __builtin_lsx_vfmin_s(_1, _2);
3193 }
3194 // CHECK-LABEL: @vfmin_d(
3195 // CHECK-NEXT:  entry:
3196 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3197 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3198 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3199 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3200 // CHECK-NEXT:    ret i128 [[TMP3]]
3201 //
3202 v2f64 vfmin_d(v2f64 _1, v2f64 _2) {
3203   return __builtin_lsx_vfmin_d(_1, _2);
3204 }
3205 // CHECK-LABEL: @vfmina_s(
3206 // CHECK-NEXT:  entry:
3207 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3208 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3209 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3210 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3211 // CHECK-NEXT:    ret i128 [[TMP3]]
3212 //
3213 v4f32 vfmina_s(v4f32 _1, v4f32 _2) {
3214   return __builtin_lsx_vfmina_s(_1, _2);
3215 }
3216 // CHECK-LABEL: @vfmina_d(
3217 // CHECK-NEXT:  entry:
3218 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3219 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3220 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3221 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3222 // CHECK-NEXT:    ret i128 [[TMP3]]
3223 //
3224 v2f64 vfmina_d(v2f64 _1, v2f64 _2) {
3225   return __builtin_lsx_vfmina_d(_1, _2);
3226 }
3227 // CHECK-LABEL: @vfmax_s(
3228 // CHECK-NEXT:  entry:
3229 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3230 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3231 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3232 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3233 // CHECK-NEXT:    ret i128 [[TMP3]]
3234 //
3235 v4f32 vfmax_s(v4f32 _1, v4f32 _2) {
3236   return __builtin_lsx_vfmax_s(_1, _2);
3237 }
3238 // CHECK-LABEL: @vfmax_d(
3239 // CHECK-NEXT:  entry:
3240 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3241 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3242 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3243 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3244 // CHECK-NEXT:    ret i128 [[TMP3]]
3245 //
3246 v2f64 vfmax_d(v2f64 _1, v2f64 _2) {
3247   return __builtin_lsx_vfmax_d(_1, _2);
3248 }
3249 // CHECK-LABEL: @vfmaxa_s(
3250 // CHECK-NEXT:  entry:
3251 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3252 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3253 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3254 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3255 // CHECK-NEXT:    ret i128 [[TMP3]]
3256 //
3257 v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) {
3258   return __builtin_lsx_vfmaxa_s(_1, _2);
3259 }
3260 // CHECK-LABEL: @vfmaxa_d(
3261 // CHECK-NEXT:  entry:
3262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3263 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3264 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3265 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3266 // CHECK-NEXT:    ret i128 [[TMP3]]
3267 //
3268 v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) {
3269   return __builtin_lsx_vfmaxa_d(_1, _2);
3270 }
3271 // CHECK-LABEL: @vfclass_s(
3272 // CHECK-NEXT:  entry:
3273 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3274 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]])
3275 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3276 // CHECK-NEXT:    ret i128 [[TMP2]]
3277 //
3278 v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); }
3279 // CHECK-LABEL: @vfclass_d(
3280 // CHECK-NEXT:  entry:
3281 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3282 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]])
3283 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3284 // CHECK-NEXT:    ret i128 [[TMP2]]
3285 //
3286 v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); }
3287 // CHECK-LABEL: @vfsqrt_s(
3288 // CHECK-NEXT:  entry:
3289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3290 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]])
3291 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3292 // CHECK-NEXT:    ret i128 [[TMP2]]
3293 //
3294 v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); }
3295 // CHECK-LABEL: @vfsqrt_d(
3296 // CHECK-NEXT:  entry:
3297 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3298 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]])
3299 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3300 // CHECK-NEXT:    ret i128 [[TMP2]]
3301 //
3302 v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); }
3303 // CHECK-LABEL: @vfrecip_s(
3304 // CHECK-NEXT:  entry:
3305 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3306 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]])
3307 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3308 // CHECK-NEXT:    ret i128 [[TMP2]]
3309 //
3310 v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); }
3311 // CHECK-LABEL: @vfrecip_d(
3312 // CHECK-NEXT:  entry:
3313 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3314 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]])
3315 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3316 // CHECK-NEXT:    ret i128 [[TMP2]]
3317 //
3318 v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); }
3319 // CHECK-LABEL: @vfrint_s(
3320 // CHECK-NEXT:  entry:
3321 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3322 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]])
3323 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3324 // CHECK-NEXT:    ret i128 [[TMP2]]
3325 //
3326 v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); }
3327 // CHECK-LABEL: @vfrint_d(
3328 // CHECK-NEXT:  entry:
3329 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3330 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]])
3331 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3332 // CHECK-NEXT:    ret i128 [[TMP2]]
3333 //
3334 v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); }
3335 // CHECK-LABEL: @vfrsqrt_s(
3336 // CHECK-NEXT:  entry:
3337 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3338 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]])
3339 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3340 // CHECK-NEXT:    ret i128 [[TMP2]]
3341 //
3342 v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); }
3343 // CHECK-LABEL: @vfrsqrt_d(
3344 // CHECK-NEXT:  entry:
3345 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3346 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]])
3347 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3348 // CHECK-NEXT:    ret i128 [[TMP2]]
3349 //
3350 v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); }
3351 // CHECK-LABEL: @vflogb_s(
3352 // CHECK-NEXT:  entry:
3353 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3354 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]])
3355 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3356 // CHECK-NEXT:    ret i128 [[TMP2]]
3357 //
3358 v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); }
3359 // CHECK-LABEL: @vflogb_d(
3360 // CHECK-NEXT:  entry:
3361 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3362 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]])
3363 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3364 // CHECK-NEXT:    ret i128 [[TMP2]]
3365 //
3366 v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); }
3367 // CHECK-LABEL: @vfcvth_s_h(
3368 // CHECK-NEXT:  entry:
3369 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3370 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]])
3371 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3372 // CHECK-NEXT:    ret i128 [[TMP2]]
3373 //
3374 v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); }
3375 // CHECK-LABEL: @vfcvth_d_s(
3376 // CHECK-NEXT:  entry:
3377 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3378 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]])
3379 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3380 // CHECK-NEXT:    ret i128 [[TMP2]]
3381 //
3382 v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); }
3383 // CHECK-LABEL: @vfcvtl_s_h(
3384 // CHECK-NEXT:  entry:
3385 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3386 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]])
3387 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3388 // CHECK-NEXT:    ret i128 [[TMP2]]
3389 //
3390 v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); }
3391 // CHECK-LABEL: @vfcvtl_d_s(
3392 // CHECK-NEXT:  entry:
3393 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3394 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]])
3395 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3396 // CHECK-NEXT:    ret i128 [[TMP2]]
3397 //
3398 v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); }
3399 // CHECK-LABEL: @vftint_w_s(
3400 // CHECK-NEXT:  entry:
3401 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3402 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]])
3403 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3404 // CHECK-NEXT:    ret i128 [[TMP2]]
3405 //
3406 v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); }
3407 // CHECK-LABEL: @vftint_l_d(
3408 // CHECK-NEXT:  entry:
3409 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3410 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]])
3411 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3412 // CHECK-NEXT:    ret i128 [[TMP2]]
3413 //
3414 v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); }
3415 // CHECK-LABEL: @vftint_wu_s(
3416 // CHECK-NEXT:  entry:
3417 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3418 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]])
3419 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3420 // CHECK-NEXT:    ret i128 [[TMP2]]
3421 //
3422 v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); }
3423 // CHECK-LABEL: @vftint_lu_d(
3424 // CHECK-NEXT:  entry:
3425 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3426 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]])
3427 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3428 // CHECK-NEXT:    ret i128 [[TMP2]]
3429 //
3430 v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); }
3431 // CHECK-LABEL: @vftintrz_w_s(
3432 // CHECK-NEXT:  entry:
3433 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3434 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]])
3435 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3436 // CHECK-NEXT:    ret i128 [[TMP2]]
3437 //
3438 v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); }
3439 // CHECK-LABEL: @vftintrz_l_d(
3440 // CHECK-NEXT:  entry:
3441 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3442 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]])
3443 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3444 // CHECK-NEXT:    ret i128 [[TMP2]]
3445 //
3446 v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); }
3447 // CHECK-LABEL: @vftintrz_wu_s(
3448 // CHECK-NEXT:  entry:
3449 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3450 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]])
3451 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3452 // CHECK-NEXT:    ret i128 [[TMP2]]
3453 //
3454 v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); }
3455 // CHECK-LABEL: @vftintrz_lu_d(
3456 // CHECK-NEXT:  entry:
3457 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3458 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]])
3459 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3460 // CHECK-NEXT:    ret i128 [[TMP2]]
3461 //
3462 v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); }
3463 // CHECK-LABEL: @vffint_s_w(
3464 // CHECK-NEXT:  entry:
3465 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3466 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]])
3467 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3468 // CHECK-NEXT:    ret i128 [[TMP2]]
3469 //
3470 v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); }
3471 // CHECK-LABEL: @vffint_d_l(
3472 // CHECK-NEXT:  entry:
3473 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3474 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]])
3475 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3476 // CHECK-NEXT:    ret i128 [[TMP2]]
3477 //
3478 v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); }
3479 // CHECK-LABEL: @vffint_s_wu(
3480 // CHECK-NEXT:  entry:
3481 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3482 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]])
3483 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3484 // CHECK-NEXT:    ret i128 [[TMP2]]
3485 //
3486 v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); }
3487 // CHECK-LABEL: @vffint_d_lu(
3488 // CHECK-NEXT:  entry:
3489 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3490 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]])
3491 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3492 // CHECK-NEXT:    ret i128 [[TMP2]]
3493 //
3494 v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); }
3495 // CHECK-LABEL: @vandn_v(
3496 // CHECK-NEXT:  entry:
3497 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3498 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3499 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3500 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3501 // CHECK-NEXT:    ret i128 [[TMP3]]
3502 //
3503 v16u8 vandn_v(v16u8 _1, v16u8 _2) {
3504   return __builtin_lsx_vandn_v(_1, _2);
3505 }
3506 // CHECK-LABEL: @vneg_b(
3507 // CHECK-NEXT:  entry:
3508 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3509 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]])
3510 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3511 // CHECK-NEXT:    ret i128 [[TMP2]]
3512 //
3513 v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); }
3514 // CHECK-LABEL: @vneg_h(
3515 // CHECK-NEXT:  entry:
3516 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3517 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]])
3518 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3519 // CHECK-NEXT:    ret i128 [[TMP2]]
3520 //
3521 v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); }
3522 // CHECK-LABEL: @vneg_w(
3523 // CHECK-NEXT:  entry:
3524 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3525 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]])
3526 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3527 // CHECK-NEXT:    ret i128 [[TMP2]]
3528 //
3529 v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); }
3530 // CHECK-LABEL: @vneg_d(
3531 // CHECK-NEXT:  entry:
3532 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3533 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]])
3534 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3535 // CHECK-NEXT:    ret i128 [[TMP2]]
3536 //
3537 v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); }
3538 // CHECK-LABEL: @vmuh_b(
3539 // CHECK-NEXT:  entry:
3540 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3541 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3542 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3543 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3544 // CHECK-NEXT:    ret i128 [[TMP3]]
3545 //
3546 v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); }
3547 // CHECK-LABEL: @vmuh_h(
3548 // CHECK-NEXT:  entry:
3549 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3550 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3551 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3552 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3553 // CHECK-NEXT:    ret i128 [[TMP3]]
3554 //
3555 v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); }
3556 // CHECK-LABEL: @vmuh_w(
3557 // CHECK-NEXT:  entry:
3558 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3559 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3560 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3561 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3562 // CHECK-NEXT:    ret i128 [[TMP3]]
3563 //
3564 v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); }
3565 // CHECK-LABEL: @vmuh_d(
3566 // CHECK-NEXT:  entry:
3567 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3568 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3569 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3570 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3571 // CHECK-NEXT:    ret i128 [[TMP3]]
3572 //
3573 v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); }
3574 // CHECK-LABEL: @vmuh_bu(
3575 // CHECK-NEXT:  entry:
3576 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3577 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3578 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3579 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3580 // CHECK-NEXT:    ret i128 [[TMP3]]
3581 //
3582 v16u8 vmuh_bu(v16u8 _1, v16u8 _2) {
3583   return __builtin_lsx_vmuh_bu(_1, _2);
3584 }
3585 // CHECK-LABEL: @vmuh_hu(
3586 // CHECK-NEXT:  entry:
3587 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3588 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3589 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3590 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3591 // CHECK-NEXT:    ret i128 [[TMP3]]
3592 //
3593 v8u16 vmuh_hu(v8u16 _1, v8u16 _2) {
3594   return __builtin_lsx_vmuh_hu(_1, _2);
3595 }
3596 // CHECK-LABEL: @vmuh_wu(
3597 // CHECK-NEXT:  entry:
3598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3599 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3602 // CHECK-NEXT:    ret i128 [[TMP3]]
3603 //
3604 v4u32 vmuh_wu(v4u32 _1, v4u32 _2) {
3605   return __builtin_lsx_vmuh_wu(_1, _2);
3606 }
3607 // CHECK-LABEL: @vmuh_du(
3608 // CHECK-NEXT:  entry:
3609 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3610 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3611 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3612 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3613 // CHECK-NEXT:    ret i128 [[TMP3]]
3614 //
3615 v2u64 vmuh_du(v2u64 _1, v2u64 _2) {
3616   return __builtin_lsx_vmuh_du(_1, _2);
3617 }
3618 // CHECK-LABEL: @vsllwil_h_b(
3619 // CHECK-NEXT:  entry:
3620 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3621 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1)
3622 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3623 // CHECK-NEXT:    ret i128 [[TMP2]]
3624 //
3625 v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); }
3626 // CHECK-LABEL: @vsllwil_w_h(
3627 // CHECK-NEXT:  entry:
3628 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3629 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1)
3630 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3631 // CHECK-NEXT:    ret i128 [[TMP2]]
3632 //
3633 v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); }
3634 // CHECK-LABEL: @vsllwil_d_w(
3635 // CHECK-NEXT:  entry:
3636 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3637 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1)
3638 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3639 // CHECK-NEXT:    ret i128 [[TMP2]]
3640 //
3641 v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); }
3642 // CHECK-LABEL: @vsllwil_hu_bu(
3643 // CHECK-NEXT:  entry:
3644 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3645 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1)
3646 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3647 // CHECK-NEXT:    ret i128 [[TMP2]]
3648 //
3649 v8u16 vsllwil_hu_bu(v16u8 _1) {
3650   return __builtin_lsx_vsllwil_hu_bu(_1, 1);
3651 }
3652 // CHECK-LABEL: @vsllwil_wu_hu(
3653 // CHECK-NEXT:  entry:
3654 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3655 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1)
3656 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3657 // CHECK-NEXT:    ret i128 [[TMP2]]
3658 //
3659 v4u32 vsllwil_wu_hu(v8u16 _1) {
3660   return __builtin_lsx_vsllwil_wu_hu(_1, 1);
3661 }
3662 // CHECK-LABEL: @vsllwil_du_wu(
3663 // CHECK-NEXT:  entry:
3664 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3665 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1)
3666 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3667 // CHECK-NEXT:    ret i128 [[TMP2]]
3668 //
3669 v2u64 vsllwil_du_wu(v4u32 _1) {
3670   return __builtin_lsx_vsllwil_du_wu(_1, 1);
3671 }
3672 // CHECK-LABEL: @vsran_b_h(
3673 // CHECK-NEXT:  entry:
3674 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3675 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3676 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3677 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3678 // CHECK-NEXT:    ret i128 [[TMP3]]
3679 //
3680 v16i8 vsran_b_h(v8i16 _1, v8i16 _2) {
3681   return __builtin_lsx_vsran_b_h(_1, _2);
3682 }
3683 // CHECK-LABEL: @vsran_h_w(
3684 // CHECK-NEXT:  entry:
3685 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3686 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3687 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3688 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3689 // CHECK-NEXT:    ret i128 [[TMP3]]
3690 //
3691 v8i16 vsran_h_w(v4i32 _1, v4i32 _2) {
3692   return __builtin_lsx_vsran_h_w(_1, _2);
3693 }
3694 // CHECK-LABEL: @vsran_w_d(
3695 // CHECK-NEXT:  entry:
3696 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3697 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3698 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3699 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3700 // CHECK-NEXT:    ret i128 [[TMP3]]
3701 //
3702 v4i32 vsran_w_d(v2i64 _1, v2i64 _2) {
3703   return __builtin_lsx_vsran_w_d(_1, _2);
3704 }
3705 // CHECK-LABEL: @vssran_b_h(
3706 // CHECK-NEXT:  entry:
3707 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3708 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3709 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3710 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3711 // CHECK-NEXT:    ret i128 [[TMP3]]
3712 //
3713 v16i8 vssran_b_h(v8i16 _1, v8i16 _2) {
3714   return __builtin_lsx_vssran_b_h(_1, _2);
3715 }
3716 // CHECK-LABEL: @vssran_h_w(
3717 // CHECK-NEXT:  entry:
3718 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3719 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3720 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3721 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3722 // CHECK-NEXT:    ret i128 [[TMP3]]
3723 //
3724 v8i16 vssran_h_w(v4i32 _1, v4i32 _2) {
3725   return __builtin_lsx_vssran_h_w(_1, _2);
3726 }
3727 // CHECK-LABEL: @vssran_w_d(
3728 // CHECK-NEXT:  entry:
3729 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3730 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3731 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3732 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3733 // CHECK-NEXT:    ret i128 [[TMP3]]
3734 //
3735 v4i32 vssran_w_d(v2i64 _1, v2i64 _2) {
3736   return __builtin_lsx_vssran_w_d(_1, _2);
3737 }
3738 // CHECK-LABEL: @vssran_bu_h(
3739 // CHECK-NEXT:  entry:
3740 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3741 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3742 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3743 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3744 // CHECK-NEXT:    ret i128 [[TMP3]]
3745 //
3746 v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) {
3747   return __builtin_lsx_vssran_bu_h(_1, _2);
3748 }
3749 // CHECK-LABEL: @vssran_hu_w(
3750 // CHECK-NEXT:  entry:
3751 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3752 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3753 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3754 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3755 // CHECK-NEXT:    ret i128 [[TMP3]]
3756 //
3757 v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) {
3758   return __builtin_lsx_vssran_hu_w(_1, _2);
3759 }
3760 // CHECK-LABEL: @vssran_wu_d(
3761 // CHECK-NEXT:  entry:
3762 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3763 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3764 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3765 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3766 // CHECK-NEXT:    ret i128 [[TMP3]]
3767 //
3768 v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) {
3769   return __builtin_lsx_vssran_wu_d(_1, _2);
3770 }
3771 // CHECK-LABEL: @vsrarn_b_h(
3772 // CHECK-NEXT:  entry:
3773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3774 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3775 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3776 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3777 // CHECK-NEXT:    ret i128 [[TMP3]]
3778 //
3779 v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) {
3780   return __builtin_lsx_vsrarn_b_h(_1, _2);
3781 }
3782 // CHECK-LABEL: @vsrarn_h_w(
3783 // CHECK-NEXT:  entry:
3784 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3785 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3786 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3787 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3788 // CHECK-NEXT:    ret i128 [[TMP3]]
3789 //
3790 v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) {
3791   return __builtin_lsx_vsrarn_h_w(_1, _2);
3792 }
3793 // CHECK-LABEL: @vsrarn_w_d(
3794 // CHECK-NEXT:  entry:
3795 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3796 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3797 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3798 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3799 // CHECK-NEXT:    ret i128 [[TMP3]]
3800 //
3801 v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) {
3802   return __builtin_lsx_vsrarn_w_d(_1, _2);
3803 }
3804 // CHECK-LABEL: @vssrarn_b_h(
3805 // CHECK-NEXT:  entry:
3806 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3807 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3808 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3809 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3810 // CHECK-NEXT:    ret i128 [[TMP3]]
3811 //
3812 v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) {
3813   return __builtin_lsx_vssrarn_b_h(_1, _2);
3814 }
3815 // CHECK-LABEL: @vssrarn_h_w(
3816 // CHECK-NEXT:  entry:
3817 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3818 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3819 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3820 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3821 // CHECK-NEXT:    ret i128 [[TMP3]]
3822 //
3823 v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) {
3824   return __builtin_lsx_vssrarn_h_w(_1, _2);
3825 }
3826 // CHECK-LABEL: @vssrarn_w_d(
3827 // CHECK-NEXT:  entry:
3828 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3829 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3830 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3831 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3832 // CHECK-NEXT:    ret i128 [[TMP3]]
3833 //
3834 v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) {
3835   return __builtin_lsx_vssrarn_w_d(_1, _2);
3836 }
3837 // CHECK-LABEL: @vssrarn_bu_h(
3838 // CHECK-NEXT:  entry:
3839 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3840 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3841 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3842 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3843 // CHECK-NEXT:    ret i128 [[TMP3]]
3844 //
3845 v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) {
3846   return __builtin_lsx_vssrarn_bu_h(_1, _2);
3847 }
3848 // CHECK-LABEL: @vssrarn_hu_w(
3849 // CHECK-NEXT:  entry:
3850 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3851 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3852 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3853 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3854 // CHECK-NEXT:    ret i128 [[TMP3]]
3855 //
3856 v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) {
3857   return __builtin_lsx_vssrarn_hu_w(_1, _2);
3858 }
3859 // CHECK-LABEL: @vssrarn_wu_d(
3860 // CHECK-NEXT:  entry:
3861 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3862 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3863 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3864 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3865 // CHECK-NEXT:    ret i128 [[TMP3]]
3866 //
3867 v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) {
3868   return __builtin_lsx_vssrarn_wu_d(_1, _2);
3869 }
3870 // CHECK-LABEL: @vsrln_b_h(
3871 // CHECK-NEXT:  entry:
3872 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3873 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3874 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3875 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3876 // CHECK-NEXT:    ret i128 [[TMP3]]
3877 //
3878 v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) {
3879   return __builtin_lsx_vsrln_b_h(_1, _2);
3880 }
3881 // CHECK-LABEL: @vsrln_h_w(
3882 // CHECK-NEXT:  entry:
3883 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3884 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3885 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3886 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3887 // CHECK-NEXT:    ret i128 [[TMP3]]
3888 //
3889 v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) {
3890   return __builtin_lsx_vsrln_h_w(_1, _2);
3891 }
3892 // CHECK-LABEL: @vsrln_w_d(
3893 // CHECK-NEXT:  entry:
3894 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3895 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3896 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3897 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3898 // CHECK-NEXT:    ret i128 [[TMP3]]
3899 //
3900 v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) {
3901   return __builtin_lsx_vsrln_w_d(_1, _2);
3902 }
3903 // CHECK-LABEL: @vssrln_bu_h(
3904 // CHECK-NEXT:  entry:
3905 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3906 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3907 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3908 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3909 // CHECK-NEXT:    ret i128 [[TMP3]]
3910 //
3911 v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) {
3912   return __builtin_lsx_vssrln_bu_h(_1, _2);
3913 }
3914 // CHECK-LABEL: @vssrln_hu_w(
3915 // CHECK-NEXT:  entry:
3916 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3917 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3918 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3919 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3920 // CHECK-NEXT:    ret i128 [[TMP3]]
3921 //
3922 v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) {
3923   return __builtin_lsx_vssrln_hu_w(_1, _2);
3924 }
3925 // CHECK-LABEL: @vssrln_wu_d(
3926 // CHECK-NEXT:  entry:
3927 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3928 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3929 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3930 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3931 // CHECK-NEXT:    ret i128 [[TMP3]]
3932 //
3933 v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) {
3934   return __builtin_lsx_vssrln_wu_d(_1, _2);
3935 }
3936 // CHECK-LABEL: @vsrlrn_b_h(
3937 // CHECK-NEXT:  entry:
3938 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3939 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3940 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3941 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3942 // CHECK-NEXT:    ret i128 [[TMP3]]
3943 //
3944 v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) {
3945   return __builtin_lsx_vsrlrn_b_h(_1, _2);
3946 }
3947 // CHECK-LABEL: @vsrlrn_h_w(
3948 // CHECK-NEXT:  entry:
3949 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3950 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3951 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3952 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3953 // CHECK-NEXT:    ret i128 [[TMP3]]
3954 //
3955 v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) {
3956   return __builtin_lsx_vsrlrn_h_w(_1, _2);
3957 }
3958 // CHECK-LABEL: @vsrlrn_w_d(
3959 // CHECK-NEXT:  entry:
3960 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3961 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3962 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3963 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3964 // CHECK-NEXT:    ret i128 [[TMP3]]
3965 //
3966 v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) {
3967   return __builtin_lsx_vsrlrn_w_d(_1, _2);
3968 }
3969 // CHECK-LABEL: @vssrlrn_bu_h(
3970 // CHECK-NEXT:  entry:
3971 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3972 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3973 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3974 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3975 // CHECK-NEXT:    ret i128 [[TMP3]]
3976 //
3977 v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) {
3978   return __builtin_lsx_vssrlrn_bu_h(_1, _2);
3979 }
3980 // CHECK-LABEL: @vssrlrn_hu_w(
3981 // CHECK-NEXT:  entry:
3982 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3983 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3984 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3985 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3986 // CHECK-NEXT:    ret i128 [[TMP3]]
3987 //
3988 v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) {
3989   return __builtin_lsx_vssrlrn_hu_w(_1, _2);
3990 }
3991 // CHECK-LABEL: @vssrlrn_wu_d(
3992 // CHECK-NEXT:  entry:
3993 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3994 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3995 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3996 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3997 // CHECK-NEXT:    ret i128 [[TMP3]]
3998 //
3999 v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) {
4000   return __builtin_lsx_vssrlrn_wu_d(_1, _2);
4001 }
4002 // CHECK-LABEL: @vfrstpi_b(
4003 // CHECK-NEXT:  entry:
4004 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4005 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4006 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
4007 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4008 // CHECK-NEXT:    ret i128 [[TMP3]]
4009 //
4010 v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) {
4011   return __builtin_lsx_vfrstpi_b(_1, _2, 1);
4012 }
4013 // CHECK-LABEL: @vfrstpi_h(
4014 // CHECK-NEXT:  entry:
4015 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4016 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4017 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
4018 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4019 // CHECK-NEXT:    ret i128 [[TMP3]]
4020 //
4021 v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) {
4022   return __builtin_lsx_vfrstpi_h(_1, _2, 1);
4023 }
4024 // CHECK-LABEL: @vfrstp_b(
4025 // CHECK-NEXT:  entry:
4026 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4027 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4028 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4029 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4030 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
4031 // CHECK-NEXT:    ret i128 [[TMP4]]
4032 //
4033 v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) {
4034   return __builtin_lsx_vfrstp_b(_1, _2, _3);
4035 }
4036 // CHECK-LABEL: @vfrstp_h(
4037 // CHECK-NEXT:  entry:
4038 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4039 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4040 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4041 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4042 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4043 // CHECK-NEXT:    ret i128 [[TMP4]]
4044 //
4045 v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) {
4046   return __builtin_lsx_vfrstp_h(_1, _2, _3);
4047 }
4048 // CHECK-LABEL: @vshuf4i_d(
4049 // CHECK-NEXT:  entry:
4050 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4051 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4052 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
4053 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4054 // CHECK-NEXT:    ret i128 [[TMP3]]
4055 //
4056 v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) {
4057   return __builtin_lsx_vshuf4i_d(_1, _2, 1);
4058 }
4059 // CHECK-LABEL: @vbsrl_v(
4060 // CHECK-NEXT:  entry:
4061 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4062 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1)
4063 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4064 // CHECK-NEXT:    ret i128 [[TMP2]]
4065 //
4066 v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); }
4067 // CHECK-LABEL: @vbsll_v(
4068 // CHECK-NEXT:  entry:
4069 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4070 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1)
4071 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4072 // CHECK-NEXT:    ret i128 [[TMP2]]
4073 //
4074 v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); }
4075 // CHECK-LABEL: @vextrins_b(
4076 // CHECK-NEXT:  entry:
4077 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4078 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4079 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
4080 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4081 // CHECK-NEXT:    ret i128 [[TMP3]]
4082 //
4083 v16i8 vextrins_b(v16i8 _1, v16i8 _2) {
4084   return __builtin_lsx_vextrins_b(_1, _2, 1);
4085 }
4086 // CHECK-LABEL: @vextrins_h(
4087 // CHECK-NEXT:  entry:
4088 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4089 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4090 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
4091 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4092 // CHECK-NEXT:    ret i128 [[TMP3]]
4093 //
4094 v8i16 vextrins_h(v8i16 _1, v8i16 _2) {
4095   return __builtin_lsx_vextrins_h(_1, _2, 1);
4096 }
4097 // CHECK-LABEL: @vextrins_w(
4098 // CHECK-NEXT:  entry:
4099 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4100 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4101 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
4102 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4103 // CHECK-NEXT:    ret i128 [[TMP3]]
4104 //
4105 v4i32 vextrins_w(v4i32 _1, v4i32 _2) {
4106   return __builtin_lsx_vextrins_w(_1, _2, 1);
4107 }
4108 // CHECK-LABEL: @vextrins_d(
4109 // CHECK-NEXT:  entry:
4110 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4111 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4112 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
4113 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4114 // CHECK-NEXT:    ret i128 [[TMP3]]
4115 //
4116 v2i64 vextrins_d(v2i64 _1, v2i64 _2) {
4117   return __builtin_lsx_vextrins_d(_1, _2, 1);
4118 }
4119 // CHECK-LABEL: @vmskltz_b(
4120 // CHECK-NEXT:  entry:
4121 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4122 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]])
4123 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4124 // CHECK-NEXT:    ret i128 [[TMP2]]
4125 //
4126 v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); }
4127 // CHECK-LABEL: @vmskltz_h(
4128 // CHECK-NEXT:  entry:
4129 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4130 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]])
4131 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
4132 // CHECK-NEXT:    ret i128 [[TMP2]]
4133 //
4134 v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); }
4135 // CHECK-LABEL: @vmskltz_w(
4136 // CHECK-NEXT:  entry:
4137 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4138 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]])
4139 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4140 // CHECK-NEXT:    ret i128 [[TMP2]]
4141 //
4142 v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); }
4143 // CHECK-LABEL: @vmskltz_d(
4144 // CHECK-NEXT:  entry:
4145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4146 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]])
4147 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4148 // CHECK-NEXT:    ret i128 [[TMP2]]
4149 //
4150 v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); }
4151 // CHECK-LABEL: @vsigncov_b(
4152 // CHECK-NEXT:  entry:
4153 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4154 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4155 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4156 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4157 // CHECK-NEXT:    ret i128 [[TMP3]]
4158 //
4159 v16i8 vsigncov_b(v16i8 _1, v16i8 _2) {
4160   return __builtin_lsx_vsigncov_b(_1, _2);
4161 }
4162 // CHECK-LABEL: @vsigncov_h(
4163 // CHECK-NEXT:  entry:
4164 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4165 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4166 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4167 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4168 // CHECK-NEXT:    ret i128 [[TMP3]]
4169 //
4170 v8i16 vsigncov_h(v8i16 _1, v8i16 _2) {
4171   return __builtin_lsx_vsigncov_h(_1, _2);
4172 }
4173 // CHECK-LABEL: @vsigncov_w(
4174 // CHECK-NEXT:  entry:
4175 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4176 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4177 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4178 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4179 // CHECK-NEXT:    ret i128 [[TMP3]]
4180 //
4181 v4i32 vsigncov_w(v4i32 _1, v4i32 _2) {
4182   return __builtin_lsx_vsigncov_w(_1, _2);
4183 }
4184 // CHECK-LABEL: @vsigncov_d(
4185 // CHECK-NEXT:  entry:
4186 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4187 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4188 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4189 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4190 // CHECK-NEXT:    ret i128 [[TMP3]]
4191 //
4192 v2i64 vsigncov_d(v2i64 _1, v2i64 _2) {
4193   return __builtin_lsx_vsigncov_d(_1, _2);
4194 }
4195 // CHECK-LABEL: @vfmadd_s(
4196 // CHECK-NEXT:  entry:
4197 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4198 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4199 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4200 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4201 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4202 // CHECK-NEXT:    ret i128 [[TMP4]]
4203 //
4204 v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4205   return __builtin_lsx_vfmadd_s(_1, _2, _3);
4206 }
4207 // CHECK-LABEL: @vfmadd_d(
4208 // CHECK-NEXT:  entry:
4209 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4210 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4211 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4212 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4213 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4214 // CHECK-NEXT:    ret i128 [[TMP4]]
4215 //
4216 v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4217   return __builtin_lsx_vfmadd_d(_1, _2, _3);
4218 }
4219 // CHECK-LABEL: @vfmsub_s(
4220 // CHECK-NEXT:  entry:
4221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4222 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4223 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4224 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4225 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4226 // CHECK-NEXT:    ret i128 [[TMP4]]
4227 //
4228 v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4229   return __builtin_lsx_vfmsub_s(_1, _2, _3);
4230 }
4231 // CHECK-LABEL: @vfmsub_d(
4232 // CHECK-NEXT:  entry:
4233 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4234 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4235 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4236 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4237 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4238 // CHECK-NEXT:    ret i128 [[TMP4]]
4239 //
4240 v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4241   return __builtin_lsx_vfmsub_d(_1, _2, _3);
4242 }
4243 // CHECK-LABEL: @vfnmadd_s(
4244 // CHECK-NEXT:  entry:
4245 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4246 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4247 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4248 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4249 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4250 // CHECK-NEXT:    ret i128 [[TMP4]]
4251 //
4252 v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4253   return __builtin_lsx_vfnmadd_s(_1, _2, _3);
4254 }
4255 // CHECK-LABEL: @vfnmadd_d(
4256 // CHECK-NEXT:  entry:
4257 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4258 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4259 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4260 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4261 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4262 // CHECK-NEXT:    ret i128 [[TMP4]]
4263 //
4264 v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4265   return __builtin_lsx_vfnmadd_d(_1, _2, _3);
4266 }
4267 // CHECK-LABEL: @vfnmsub_s(
4268 // CHECK-NEXT:  entry:
4269 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4270 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4271 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4272 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4273 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4274 // CHECK-NEXT:    ret i128 [[TMP4]]
4275 //
4276 v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4277   return __builtin_lsx_vfnmsub_s(_1, _2, _3);
4278 }
4279 // CHECK-LABEL: @vfnmsub_d(
4280 // CHECK-NEXT:  entry:
4281 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4282 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4283 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4284 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4285 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4286 // CHECK-NEXT:    ret i128 [[TMP4]]
4287 //
4288 v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4289   return __builtin_lsx_vfnmsub_d(_1, _2, _3);
4290 }
4291 // CHECK-LABEL: @vftintrne_w_s(
4292 // CHECK-NEXT:  entry:
4293 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4294 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]])
4295 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4296 // CHECK-NEXT:    ret i128 [[TMP2]]
4297 //
4298 v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); }
4299 // CHECK-LABEL: @vftintrne_l_d(
4300 // CHECK-NEXT:  entry:
4301 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4302 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]])
4303 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4304 // CHECK-NEXT:    ret i128 [[TMP2]]
4305 //
4306 v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); }
4307 // CHECK-LABEL: @vftintrp_w_s(
4308 // CHECK-NEXT:  entry:
4309 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4310 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]])
4311 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4312 // CHECK-NEXT:    ret i128 [[TMP2]]
4313 //
4314 v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); }
4315 // CHECK-LABEL: @vftintrp_l_d(
4316 // CHECK-NEXT:  entry:
4317 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4318 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]])
4319 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4320 // CHECK-NEXT:    ret i128 [[TMP2]]
4321 //
4322 v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); }
4323 // CHECK-LABEL: @vftintrm_w_s(
4324 // CHECK-NEXT:  entry:
4325 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4326 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]])
4327 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4328 // CHECK-NEXT:    ret i128 [[TMP2]]
4329 //
4330 v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); }
4331 // CHECK-LABEL: @vftintrm_l_d(
4332 // CHECK-NEXT:  entry:
4333 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4334 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]])
4335 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4336 // CHECK-NEXT:    ret i128 [[TMP2]]
4337 //
4338 v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); }
4339 // CHECK-LABEL: @vftint_w_d(
4340 // CHECK-NEXT:  entry:
4341 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4342 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4343 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4344 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4345 // CHECK-NEXT:    ret i128 [[TMP3]]
4346 //
4347 v4i32 vftint_w_d(v2f64 _1, v2f64 _2) {
4348   return __builtin_lsx_vftint_w_d(_1, _2);
4349 }
4350 // CHECK-LABEL: @vffint_s_l(
4351 // CHECK-NEXT:  entry:
4352 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4353 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4354 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4355 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
4356 // CHECK-NEXT:    ret i128 [[TMP3]]
4357 //
4358 v4f32 vffint_s_l(v2i64 _1, v2i64 _2) {
4359   return __builtin_lsx_vffint_s_l(_1, _2);
4360 }
4361 // CHECK-LABEL: @vftintrz_w_d(
4362 // CHECK-NEXT:  entry:
4363 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4364 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4365 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4366 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4367 // CHECK-NEXT:    ret i128 [[TMP3]]
4368 //
4369 v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) {
4370   return __builtin_lsx_vftintrz_w_d(_1, _2);
4371 }
4372 // CHECK-LABEL: @vftintrp_w_d(
4373 // CHECK-NEXT:  entry:
4374 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4375 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4376 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4377 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4378 // CHECK-NEXT:    ret i128 [[TMP3]]
4379 //
4380 v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) {
4381   return __builtin_lsx_vftintrp_w_d(_1, _2);
4382 }
4383 // CHECK-LABEL: @vftintrm_w_d(
4384 // CHECK-NEXT:  entry:
4385 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4386 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4387 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4388 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4389 // CHECK-NEXT:    ret i128 [[TMP3]]
4390 //
4391 v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) {
4392   return __builtin_lsx_vftintrm_w_d(_1, _2);
4393 }
4394 // CHECK-LABEL: @vftintrne_w_d(
4395 // CHECK-NEXT:  entry:
4396 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4397 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4398 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4399 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4400 // CHECK-NEXT:    ret i128 [[TMP3]]
4401 //
4402 v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) {
4403   return __builtin_lsx_vftintrne_w_d(_1, _2);
4404 }
4405 // CHECK-LABEL: @vftintl_l_s(
4406 // CHECK-NEXT:  entry:
4407 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4408 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]])
4409 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4410 // CHECK-NEXT:    ret i128 [[TMP2]]
4411 //
4412 v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); }
4413 // CHECK-LABEL: @vftinth_l_s(
4414 // CHECK-NEXT:  entry:
4415 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4416 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]])
4417 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4418 // CHECK-NEXT:    ret i128 [[TMP2]]
4419 //
4420 v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); }
4421 // CHECK-LABEL: @vffinth_d_w(
4422 // CHECK-NEXT:  entry:
4423 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4424 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]])
4425 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4426 // CHECK-NEXT:    ret i128 [[TMP2]]
4427 //
4428 v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); }
4429 // CHECK-LABEL: @vffintl_d_w(
4430 // CHECK-NEXT:  entry:
4431 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4432 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]])
4433 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4434 // CHECK-NEXT:    ret i128 [[TMP2]]
4435 //
4436 v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); }
4437 // CHECK-LABEL: @vftintrzl_l_s(
4438 // CHECK-NEXT:  entry:
4439 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4440 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]])
4441 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4442 // CHECK-NEXT:    ret i128 [[TMP2]]
4443 //
4444 v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); }
4445 // CHECK-LABEL: @vftintrzh_l_s(
4446 // CHECK-NEXT:  entry:
4447 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4448 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]])
4449 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4450 // CHECK-NEXT:    ret i128 [[TMP2]]
4451 //
4452 v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); }
4453 // CHECK-LABEL: @vftintrpl_l_s(
4454 // CHECK-NEXT:  entry:
4455 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4456 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]])
4457 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4458 // CHECK-NEXT:    ret i128 [[TMP2]]
4459 //
4460 v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); }
4461 // CHECK-LABEL: @vftintrph_l_s(
4462 // CHECK-NEXT:  entry:
4463 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4464 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]])
4465 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4466 // CHECK-NEXT:    ret i128 [[TMP2]]
4467 //
4468 v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); }
4469 // CHECK-LABEL: @vftintrml_l_s(
4470 // CHECK-NEXT:  entry:
4471 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4472 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]])
4473 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4474 // CHECK-NEXT:    ret i128 [[TMP2]]
4475 //
4476 v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); }
4477 // CHECK-LABEL: @vftintrmh_l_s(
4478 // CHECK-NEXT:  entry:
4479 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4480 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]])
4481 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4482 // CHECK-NEXT:    ret i128 [[TMP2]]
4483 //
4484 v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); }
4485 // CHECK-LABEL: @vftintrnel_l_s(
4486 // CHECK-NEXT:  entry:
4487 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4488 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]])
4489 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4490 // CHECK-NEXT:    ret i128 [[TMP2]]
4491 //
4492 v2i64 vftintrnel_l_s(v4f32 _1) {
4493   return __builtin_lsx_vftintrnel_l_s(_1);
4494 }
4495 // CHECK-LABEL: @vftintrneh_l_s(
4496 // CHECK-NEXT:  entry:
4497 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4498 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]])
4499 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4500 // CHECK-NEXT:    ret i128 [[TMP2]]
4501 //
4502 v2i64 vftintrneh_l_s(v4f32 _1) {
4503   return __builtin_lsx_vftintrneh_l_s(_1);
4504 }
4505 // CHECK-LABEL: @vfrintrne_s(
4506 // CHECK-NEXT:  entry:
4507 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4508 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]])
4509 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4510 // CHECK-NEXT:    ret i128 [[TMP2]]
4511 //
4512 v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); }
4513 // CHECK-LABEL: @vfrintrne_d(
4514 // CHECK-NEXT:  entry:
4515 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4516 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]])
4517 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4518 // CHECK-NEXT:    ret i128 [[TMP2]]
4519 //
4520 v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); }
4521 // CHECK-LABEL: @vfrintrz_s(
4522 // CHECK-NEXT:  entry:
4523 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4524 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]])
4525 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4526 // CHECK-NEXT:    ret i128 [[TMP2]]
4527 //
4528 v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); }
4529 // CHECK-LABEL: @vfrintrz_d(
4530 // CHECK-NEXT:  entry:
4531 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4532 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]])
4533 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4534 // CHECK-NEXT:    ret i128 [[TMP2]]
4535 //
4536 v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); }
4537 // CHECK-LABEL: @vfrintrp_s(
4538 // CHECK-NEXT:  entry:
4539 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4540 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]])
4541 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4542 // CHECK-NEXT:    ret i128 [[TMP2]]
4543 //
4544 v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); }
4545 // CHECK-LABEL: @vfrintrp_d(
4546 // CHECK-NEXT:  entry:
4547 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4548 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]])
4549 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4550 // CHECK-NEXT:    ret i128 [[TMP2]]
4551 //
4552 v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); }
4553 // CHECK-LABEL: @vfrintrm_s(
4554 // CHECK-NEXT:  entry:
4555 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4556 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]])
4557 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4558 // CHECK-NEXT:    ret i128 [[TMP2]]
4559 //
4560 v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); }
4561 // CHECK-LABEL: @vfrintrm_d(
4562 // CHECK-NEXT:  entry:
4563 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4564 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]])
4565 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4566 // CHECK-NEXT:    ret i128 [[TMP2]]
4567 //
4568 v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); }
4569 // CHECK-LABEL: @vstelm_b(
4570 // CHECK-NEXT:  entry:
4571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4572 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1)
4573 // CHECK-NEXT:    ret void
4574 //
4575 void vstelm_b(v16i8 _1, void *_2) {
4576   return __builtin_lsx_vstelm_b(_1, _2, 1, 1);
4577 }
4578 // CHECK-LABEL: @vstelm_h(
4579 // CHECK-NEXT:  entry:
4580 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4581 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1)
4582 // CHECK-NEXT:    ret void
4583 //
4584 void vstelm_h(v8i16 _1, void *_2) {
4585   return __builtin_lsx_vstelm_h(_1, _2, 2, 1);
4586 }
4587 // CHECK-LABEL: @vstelm_w(
4588 // CHECK-NEXT:  entry:
4589 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4590 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1)
4591 // CHECK-NEXT:    ret void
4592 //
4593 void vstelm_w(v4i32 _1, void *_2) {
4594   return __builtin_lsx_vstelm_w(_1, _2, 4, 1);
4595 }
4596 // CHECK-LABEL: @vstelm_d(
4597 // CHECK-NEXT:  entry:
4598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4599 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1)
4600 // CHECK-NEXT:    ret void
4601 //
4602 void vstelm_d(v2i64 _1, void *_2) {
4603   return __builtin_lsx_vstelm_d(_1, _2, 8, 1);
4604 }
4605 // CHECK-LABEL: @vaddwev_d_w(
4606 // CHECK-NEXT:  entry:
4607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4609 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4610 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4611 // CHECK-NEXT:    ret i128 [[TMP3]]
4612 //
4613 v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) {
4614   return __builtin_lsx_vaddwev_d_w(_1, _2);
4615 }
4616 // CHECK-LABEL: @vaddwev_w_h(
4617 // CHECK-NEXT:  entry:
4618 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4619 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4620 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4621 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4622 // CHECK-NEXT:    ret i128 [[TMP3]]
4623 //
4624 v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) {
4625   return __builtin_lsx_vaddwev_w_h(_1, _2);
4626 }
4627 // CHECK-LABEL: @vaddwev_h_b(
4628 // CHECK-NEXT:  entry:
4629 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4630 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4631 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4632 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4633 // CHECK-NEXT:    ret i128 [[TMP3]]
4634 //
4635 v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) {
4636   return __builtin_lsx_vaddwev_h_b(_1, _2);
4637 }
4638 // CHECK-LABEL: @vaddwod_d_w(
4639 // CHECK-NEXT:  entry:
4640 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4641 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4642 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4643 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4644 // CHECK-NEXT:    ret i128 [[TMP3]]
4645 //
4646 v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) {
4647   return __builtin_lsx_vaddwod_d_w(_1, _2);
4648 }
4649 // CHECK-LABEL: @vaddwod_w_h(
4650 // CHECK-NEXT:  entry:
4651 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4652 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4653 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4654 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4655 // CHECK-NEXT:    ret i128 [[TMP3]]
4656 //
4657 v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) {
4658   return __builtin_lsx_vaddwod_w_h(_1, _2);
4659 }
4660 // CHECK-LABEL: @vaddwod_h_b(
4661 // CHECK-NEXT:  entry:
4662 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4663 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4664 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4665 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4666 // CHECK-NEXT:    ret i128 [[TMP3]]
4667 //
4668 v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) {
4669   return __builtin_lsx_vaddwod_h_b(_1, _2);
4670 }
4671 // CHECK-LABEL: @vaddwev_d_wu(
4672 // CHECK-NEXT:  entry:
4673 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4674 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4675 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4676 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4677 // CHECK-NEXT:    ret i128 [[TMP3]]
4678 //
4679 v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) {
4680   return __builtin_lsx_vaddwev_d_wu(_1, _2);
4681 }
4682 // CHECK-LABEL: @vaddwev_w_hu(
4683 // CHECK-NEXT:  entry:
4684 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4685 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4686 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4687 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4688 // CHECK-NEXT:    ret i128 [[TMP3]]
4689 //
4690 v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) {
4691   return __builtin_lsx_vaddwev_w_hu(_1, _2);
4692 }
4693 // CHECK-LABEL: @vaddwev_h_bu(
4694 // CHECK-NEXT:  entry:
4695 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4696 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4697 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4698 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4699 // CHECK-NEXT:    ret i128 [[TMP3]]
4700 //
4701 v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) {
4702   return __builtin_lsx_vaddwev_h_bu(_1, _2);
4703 }
4704 // CHECK-LABEL: @vaddwod_d_wu(
4705 // CHECK-NEXT:  entry:
4706 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4707 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4708 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4709 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4710 // CHECK-NEXT:    ret i128 [[TMP3]]
4711 //
4712 v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) {
4713   return __builtin_lsx_vaddwod_d_wu(_1, _2);
4714 }
4715 // CHECK-LABEL: @vaddwod_w_hu(
4716 // CHECK-NEXT:  entry:
4717 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4718 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4719 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4720 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4721 // CHECK-NEXT:    ret i128 [[TMP3]]
4722 //
4723 v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) {
4724   return __builtin_lsx_vaddwod_w_hu(_1, _2);
4725 }
4726 // CHECK-LABEL: @vaddwod_h_bu(
4727 // CHECK-NEXT:  entry:
4728 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4729 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4730 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4731 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4732 // CHECK-NEXT:    ret i128 [[TMP3]]
4733 //
4734 v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) {
4735   return __builtin_lsx_vaddwod_h_bu(_1, _2);
4736 }
4737 // CHECK-LABEL: @vaddwev_d_wu_w(
4738 // CHECK-NEXT:  entry:
4739 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4740 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4741 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4742 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4743 // CHECK-NEXT:    ret i128 [[TMP3]]
4744 //
4745 v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) {
4746   return __builtin_lsx_vaddwev_d_wu_w(_1, _2);
4747 }
4748 // CHECK-LABEL: @vaddwev_w_hu_h(
4749 // CHECK-NEXT:  entry:
4750 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4751 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4752 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4753 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4754 // CHECK-NEXT:    ret i128 [[TMP3]]
4755 //
4756 v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) {
4757   return __builtin_lsx_vaddwev_w_hu_h(_1, _2);
4758 }
4759 // CHECK-LABEL: @vaddwev_h_bu_b(
4760 // CHECK-NEXT:  entry:
4761 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4762 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4763 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4764 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4765 // CHECK-NEXT:    ret i128 [[TMP3]]
4766 //
4767 v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) {
4768   return __builtin_lsx_vaddwev_h_bu_b(_1, _2);
4769 }
4770 // CHECK-LABEL: @vaddwod_d_wu_w(
4771 // CHECK-NEXT:  entry:
4772 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4773 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4774 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4775 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4776 // CHECK-NEXT:    ret i128 [[TMP3]]
4777 //
4778 v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) {
4779   return __builtin_lsx_vaddwod_d_wu_w(_1, _2);
4780 }
4781 // CHECK-LABEL: @vaddwod_w_hu_h(
4782 // CHECK-NEXT:  entry:
4783 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4784 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4785 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4786 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4787 // CHECK-NEXT:    ret i128 [[TMP3]]
4788 //
4789 v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) {
4790   return __builtin_lsx_vaddwod_w_hu_h(_1, _2);
4791 }
4792 // CHECK-LABEL: @vaddwod_h_bu_b(
4793 // CHECK-NEXT:  entry:
4794 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4795 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4796 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4797 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4798 // CHECK-NEXT:    ret i128 [[TMP3]]
4799 //
4800 v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) {
4801   return __builtin_lsx_vaddwod_h_bu_b(_1, _2);
4802 }
4803 // CHECK-LABEL: @vsubwev_d_w(
4804 // CHECK-NEXT:  entry:
4805 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4806 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4807 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4808 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4809 // CHECK-NEXT:    ret i128 [[TMP3]]
4810 //
4811 v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) {
4812   return __builtin_lsx_vsubwev_d_w(_1, _2);
4813 }
4814 // CHECK-LABEL: @vsubwev_w_h(
4815 // CHECK-NEXT:  entry:
4816 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4817 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4818 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4819 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4820 // CHECK-NEXT:    ret i128 [[TMP3]]
4821 //
4822 v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) {
4823   return __builtin_lsx_vsubwev_w_h(_1, _2);
4824 }
4825 // CHECK-LABEL: @vsubwev_h_b(
4826 // CHECK-NEXT:  entry:
4827 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4828 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4829 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4830 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4831 // CHECK-NEXT:    ret i128 [[TMP3]]
4832 //
4833 v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) {
4834   return __builtin_lsx_vsubwev_h_b(_1, _2);
4835 }
4836 // CHECK-LABEL: @vsubwod_d_w(
4837 // CHECK-NEXT:  entry:
4838 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4839 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4840 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4841 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4842 // CHECK-NEXT:    ret i128 [[TMP3]]
4843 //
4844 v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) {
4845   return __builtin_lsx_vsubwod_d_w(_1, _2);
4846 }
4847 // CHECK-LABEL: @vsubwod_w_h(
4848 // CHECK-NEXT:  entry:
4849 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4850 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4851 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4852 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4853 // CHECK-NEXT:    ret i128 [[TMP3]]
4854 //
4855 v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) {
4856   return __builtin_lsx_vsubwod_w_h(_1, _2);
4857 }
4858 // CHECK-LABEL: @vsubwod_h_b(
4859 // CHECK-NEXT:  entry:
4860 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4861 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4862 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4863 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4864 // CHECK-NEXT:    ret i128 [[TMP3]]
4865 //
4866 v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) {
4867   return __builtin_lsx_vsubwod_h_b(_1, _2);
4868 }
4869 // CHECK-LABEL: @vsubwev_d_wu(
4870 // CHECK-NEXT:  entry:
4871 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4872 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4873 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4874 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4875 // CHECK-NEXT:    ret i128 [[TMP3]]
4876 //
4877 v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) {
4878   return __builtin_lsx_vsubwev_d_wu(_1, _2);
4879 }
4880 // CHECK-LABEL: @vsubwev_w_hu(
4881 // CHECK-NEXT:  entry:
4882 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4883 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4884 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4885 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4886 // CHECK-NEXT:    ret i128 [[TMP3]]
4887 //
4888 v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) {
4889   return __builtin_lsx_vsubwev_w_hu(_1, _2);
4890 }
4891 // CHECK-LABEL: @vsubwev_h_bu(
4892 // CHECK-NEXT:  entry:
4893 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4894 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4895 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4896 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4897 // CHECK-NEXT:    ret i128 [[TMP3]]
4898 //
4899 v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) {
4900   return __builtin_lsx_vsubwev_h_bu(_1, _2);
4901 }
4902 // CHECK-LABEL: @vsubwod_d_wu(
4903 // CHECK-NEXT:  entry:
4904 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4905 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4906 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4907 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4908 // CHECK-NEXT:    ret i128 [[TMP3]]
4909 //
4910 v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) {
4911   return __builtin_lsx_vsubwod_d_wu(_1, _2);
4912 }
4913 // CHECK-LABEL: @vsubwod_w_hu(
4914 // CHECK-NEXT:  entry:
4915 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4916 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4917 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4918 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4919 // CHECK-NEXT:    ret i128 [[TMP3]]
4920 //
4921 v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) {
4922   return __builtin_lsx_vsubwod_w_hu(_1, _2);
4923 }
4924 // CHECK-LABEL: @vsubwod_h_bu(
4925 // CHECK-NEXT:  entry:
4926 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4927 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4928 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4929 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4930 // CHECK-NEXT:    ret i128 [[TMP3]]
4931 //
4932 v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) {
4933   return __builtin_lsx_vsubwod_h_bu(_1, _2);
4934 }
4935 // CHECK-LABEL: @vaddwev_q_d(
4936 // CHECK-NEXT:  entry:
4937 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4938 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4939 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4940 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4941 // CHECK-NEXT:    ret i128 [[TMP3]]
4942 //
4943 v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) {
4944   return __builtin_lsx_vaddwev_q_d(_1, _2);
4945 }
4946 // CHECK-LABEL: @vaddwod_q_d(
4947 // CHECK-NEXT:  entry:
4948 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4949 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4950 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4951 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4952 // CHECK-NEXT:    ret i128 [[TMP3]]
4953 //
4954 v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) {
4955   return __builtin_lsx_vaddwod_q_d(_1, _2);
4956 }
4957 // CHECK-LABEL: @vaddwev_q_du(
4958 // CHECK-NEXT:  entry:
4959 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4960 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4961 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4962 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4963 // CHECK-NEXT:    ret i128 [[TMP3]]
4964 //
4965 v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) {
4966   return __builtin_lsx_vaddwev_q_du(_1, _2);
4967 }
4968 // CHECK-LABEL: @vaddwod_q_du(
4969 // CHECK-NEXT:  entry:
4970 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4971 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4972 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4973 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4974 // CHECK-NEXT:    ret i128 [[TMP3]]
4975 //
4976 v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) {
4977   return __builtin_lsx_vaddwod_q_du(_1, _2);
4978 }
4979 // CHECK-LABEL: @vsubwev_q_d(
4980 // CHECK-NEXT:  entry:
4981 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4982 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4983 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4984 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4985 // CHECK-NEXT:    ret i128 [[TMP3]]
4986 //
4987 v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) {
4988   return __builtin_lsx_vsubwev_q_d(_1, _2);
4989 }
4990 // CHECK-LABEL: @vsubwod_q_d(
4991 // CHECK-NEXT:  entry:
4992 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4993 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4994 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4995 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4996 // CHECK-NEXT:    ret i128 [[TMP3]]
4997 //
4998 v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) {
4999   return __builtin_lsx_vsubwod_q_d(_1, _2);
5000 }
5001 // CHECK-LABEL: @vsubwev_q_du(
5002 // CHECK-NEXT:  entry:
5003 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5004 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5005 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5006 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5007 // CHECK-NEXT:    ret i128 [[TMP3]]
5008 //
5009 v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) {
5010   return __builtin_lsx_vsubwev_q_du(_1, _2);
5011 }
5012 // CHECK-LABEL: @vsubwod_q_du(
5013 // CHECK-NEXT:  entry:
5014 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5015 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5016 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5017 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5018 // CHECK-NEXT:    ret i128 [[TMP3]]
5019 //
5020 v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) {
5021   return __builtin_lsx_vsubwod_q_du(_1, _2);
5022 }
5023 // CHECK-LABEL: @vaddwev_q_du_d(
5024 // CHECK-NEXT:  entry:
5025 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5026 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5027 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5028 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5029 // CHECK-NEXT:    ret i128 [[TMP3]]
5030 //
5031 v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) {
5032   return __builtin_lsx_vaddwev_q_du_d(_1, _2);
5033 }
5034 // CHECK-LABEL: @vaddwod_q_du_d(
5035 // CHECK-NEXT:  entry:
5036 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5037 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5038 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5039 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5040 // CHECK-NEXT:    ret i128 [[TMP3]]
5041 //
5042 v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) {
5043   return __builtin_lsx_vaddwod_q_du_d(_1, _2);
5044 }
5045 // CHECK-LABEL: @vmulwev_d_w(
5046 // CHECK-NEXT:  entry:
5047 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5048 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5049 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5050 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5051 // CHECK-NEXT:    ret i128 [[TMP3]]
5052 //
5053 v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) {
5054   return __builtin_lsx_vmulwev_d_w(_1, _2);
5055 }
5056 // CHECK-LABEL: @vmulwev_w_h(
5057 // CHECK-NEXT:  entry:
5058 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5059 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5060 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5061 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5062 // CHECK-NEXT:    ret i128 [[TMP3]]
5063 //
5064 v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) {
5065   return __builtin_lsx_vmulwev_w_h(_1, _2);
5066 }
5067 // CHECK-LABEL: @vmulwev_h_b(
5068 // CHECK-NEXT:  entry:
5069 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5070 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5071 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5072 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5073 // CHECK-NEXT:    ret i128 [[TMP3]]
5074 //
5075 v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) {
5076   return __builtin_lsx_vmulwev_h_b(_1, _2);
5077 }
5078 // CHECK-LABEL: @vmulwod_d_w(
5079 // CHECK-NEXT:  entry:
5080 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5081 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5082 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5083 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5084 // CHECK-NEXT:    ret i128 [[TMP3]]
5085 //
5086 v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) {
5087   return __builtin_lsx_vmulwod_d_w(_1, _2);
5088 }
5089 // CHECK-LABEL: @vmulwod_w_h(
5090 // CHECK-NEXT:  entry:
5091 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5092 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5093 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5094 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5095 // CHECK-NEXT:    ret i128 [[TMP3]]
5096 //
5097 v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) {
5098   return __builtin_lsx_vmulwod_w_h(_1, _2);
5099 }
5100 // CHECK-LABEL: @vmulwod_h_b(
5101 // CHECK-NEXT:  entry:
5102 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5103 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5104 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5105 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5106 // CHECK-NEXT:    ret i128 [[TMP3]]
5107 //
5108 v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) {
5109   return __builtin_lsx_vmulwod_h_b(_1, _2);
5110 }
5111 // CHECK-LABEL: @vmulwev_d_wu(
5112 // CHECK-NEXT:  entry:
5113 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5114 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5115 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5116 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5117 // CHECK-NEXT:    ret i128 [[TMP3]]
5118 //
5119 v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) {
5120   return __builtin_lsx_vmulwev_d_wu(_1, _2);
5121 }
5122 // CHECK-LABEL: @vmulwev_w_hu(
5123 // CHECK-NEXT:  entry:
5124 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5125 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5126 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5127 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5128 // CHECK-NEXT:    ret i128 [[TMP3]]
5129 //
5130 v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) {
5131   return __builtin_lsx_vmulwev_w_hu(_1, _2);
5132 }
5133 // CHECK-LABEL: @vmulwev_h_bu(
5134 // CHECK-NEXT:  entry:
5135 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5136 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5137 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5138 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5139 // CHECK-NEXT:    ret i128 [[TMP3]]
5140 //
5141 v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) {
5142   return __builtin_lsx_vmulwev_h_bu(_1, _2);
5143 }
5144 // CHECK-LABEL: @vmulwod_d_wu(
5145 // CHECK-NEXT:  entry:
5146 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5147 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5148 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5149 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5150 // CHECK-NEXT:    ret i128 [[TMP3]]
5151 //
5152 v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) {
5153   return __builtin_lsx_vmulwod_d_wu(_1, _2);
5154 }
5155 // CHECK-LABEL: @vmulwod_w_hu(
5156 // CHECK-NEXT:  entry:
5157 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5158 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5159 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5160 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5161 // CHECK-NEXT:    ret i128 [[TMP3]]
5162 //
5163 v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) {
5164   return __builtin_lsx_vmulwod_w_hu(_1, _2);
5165 }
5166 // CHECK-LABEL: @vmulwod_h_bu(
5167 // CHECK-NEXT:  entry:
5168 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5169 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5170 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5171 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5172 // CHECK-NEXT:    ret i128 [[TMP3]]
5173 //
5174 v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) {
5175   return __builtin_lsx_vmulwod_h_bu(_1, _2);
5176 }
5177 // CHECK-LABEL: @vmulwev_d_wu_w(
5178 // CHECK-NEXT:  entry:
5179 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5180 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5181 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5182 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5183 // CHECK-NEXT:    ret i128 [[TMP3]]
5184 //
5185 v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) {
5186   return __builtin_lsx_vmulwev_d_wu_w(_1, _2);
5187 }
5188 // CHECK-LABEL: @vmulwev_w_hu_h(
5189 // CHECK-NEXT:  entry:
5190 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5191 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5192 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5193 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5194 // CHECK-NEXT:    ret i128 [[TMP3]]
5195 //
5196 v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) {
5197   return __builtin_lsx_vmulwev_w_hu_h(_1, _2);
5198 }
5199 // CHECK-LABEL: @vmulwev_h_bu_b(
5200 // CHECK-NEXT:  entry:
5201 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5202 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5203 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5204 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5205 // CHECK-NEXT:    ret i128 [[TMP3]]
5206 //
5207 v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) {
5208   return __builtin_lsx_vmulwev_h_bu_b(_1, _2);
5209 }
5210 // CHECK-LABEL: @vmulwod_d_wu_w(
5211 // CHECK-NEXT:  entry:
5212 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5213 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5214 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5215 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5216 // CHECK-NEXT:    ret i128 [[TMP3]]
5217 //
5218 v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) {
5219   return __builtin_lsx_vmulwod_d_wu_w(_1, _2);
5220 }
5221 // CHECK-LABEL: @vmulwod_w_hu_h(
5222 // CHECK-NEXT:  entry:
5223 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5224 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5225 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5226 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5227 // CHECK-NEXT:    ret i128 [[TMP3]]
5228 //
5229 v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) {
5230   return __builtin_lsx_vmulwod_w_hu_h(_1, _2);
5231 }
5232 // CHECK-LABEL: @vmulwod_h_bu_b(
5233 // CHECK-NEXT:  entry:
5234 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5235 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5236 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5237 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5238 // CHECK-NEXT:    ret i128 [[TMP3]]
5239 //
5240 v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) {
5241   return __builtin_lsx_vmulwod_h_bu_b(_1, _2);
5242 }
5243 // CHECK-LABEL: @vmulwev_q_d(
5244 // CHECK-NEXT:  entry:
5245 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5246 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5247 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5248 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5249 // CHECK-NEXT:    ret i128 [[TMP3]]
5250 //
5251 v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) {
5252   return __builtin_lsx_vmulwev_q_d(_1, _2);
5253 }
5254 // CHECK-LABEL: @vmulwod_q_d(
5255 // CHECK-NEXT:  entry:
5256 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5257 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5258 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5259 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5260 // CHECK-NEXT:    ret i128 [[TMP3]]
5261 //
5262 v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) {
5263   return __builtin_lsx_vmulwod_q_d(_1, _2);
5264 }
5265 // CHECK-LABEL: @vmulwev_q_du(
5266 // CHECK-NEXT:  entry:
5267 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5268 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5269 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5270 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5271 // CHECK-NEXT:    ret i128 [[TMP3]]
5272 //
5273 v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) {
5274   return __builtin_lsx_vmulwev_q_du(_1, _2);
5275 }
5276 // CHECK-LABEL: @vmulwod_q_du(
5277 // CHECK-NEXT:  entry:
5278 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5279 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5280 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5281 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5282 // CHECK-NEXT:    ret i128 [[TMP3]]
5283 //
5284 v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) {
5285   return __builtin_lsx_vmulwod_q_du(_1, _2);
5286 }
5287 // CHECK-LABEL: @vmulwev_q_du_d(
5288 // CHECK-NEXT:  entry:
5289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5293 // CHECK-NEXT:    ret i128 [[TMP3]]
5294 //
5295 v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) {
5296   return __builtin_lsx_vmulwev_q_du_d(_1, _2);
5297 }
5298 // CHECK-LABEL: @vmulwod_q_du_d(
5299 // CHECK-NEXT:  entry:
5300 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5301 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5302 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5303 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5304 // CHECK-NEXT:    ret i128 [[TMP3]]
5305 //
5306 v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) {
5307   return __builtin_lsx_vmulwod_q_du_d(_1, _2);
5308 }
5309 // CHECK-LABEL: @vhaddw_q_d(
5310 // CHECK-NEXT:  entry:
5311 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5312 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5313 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5314 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5315 // CHECK-NEXT:    ret i128 [[TMP3]]
5316 //
5317 v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) {
5318   return __builtin_lsx_vhaddw_q_d(_1, _2);
5319 }
5320 // CHECK-LABEL: @vhaddw_qu_du(
5321 // CHECK-NEXT:  entry:
5322 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5323 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5324 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5325 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5326 // CHECK-NEXT:    ret i128 [[TMP3]]
5327 //
5328 v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) {
5329   return __builtin_lsx_vhaddw_qu_du(_1, _2);
5330 }
5331 // CHECK-LABEL: @vhsubw_q_d(
5332 // CHECK-NEXT:  entry:
5333 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5334 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5335 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5336 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5337 // CHECK-NEXT:    ret i128 [[TMP3]]
5338 //
5339 v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) {
5340   return __builtin_lsx_vhsubw_q_d(_1, _2);
5341 }
5342 // CHECK-LABEL: @vhsubw_qu_du(
5343 // CHECK-NEXT:  entry:
5344 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5345 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5346 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5347 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5348 // CHECK-NEXT:    ret i128 [[TMP3]]
5349 //
5350 v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) {
5351   return __builtin_lsx_vhsubw_qu_du(_1, _2);
5352 }
5353 // CHECK-LABEL: @vmaddwev_d_w(
5354 // CHECK-NEXT:  entry:
5355 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5356 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5357 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5358 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5359 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5360 // CHECK-NEXT:    ret i128 [[TMP4]]
5361 //
5362 v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
5363   return __builtin_lsx_vmaddwev_d_w(_1, _2, _3);
5364 }
5365 // CHECK-LABEL: @vmaddwev_w_h(
5366 // CHECK-NEXT:  entry:
5367 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5368 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5369 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5370 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5371 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5372 // CHECK-NEXT:    ret i128 [[TMP4]]
5373 //
5374 v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
5375   return __builtin_lsx_vmaddwev_w_h(_1, _2, _3);
5376 }
5377 // CHECK-LABEL: @vmaddwev_h_b(
5378 // CHECK-NEXT:  entry:
5379 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5380 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5381 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5382 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5383 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5384 // CHECK-NEXT:    ret i128 [[TMP4]]
5385 //
5386 v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
5387   return __builtin_lsx_vmaddwev_h_b(_1, _2, _3);
5388 }
5389 // CHECK-LABEL: @vmaddwev_d_wu(
5390 // CHECK-NEXT:  entry:
5391 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5392 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5393 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5394 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5395 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5396 // CHECK-NEXT:    ret i128 [[TMP4]]
5397 //
5398 v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
5399   return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3);
5400 }
5401 // CHECK-LABEL: @vmaddwev_w_hu(
5402 // CHECK-NEXT:  entry:
5403 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5404 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5405 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5406 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5407 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5408 // CHECK-NEXT:    ret i128 [[TMP4]]
5409 //
5410 v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
5411   return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3);
5412 }
5413 // CHECK-LABEL: @vmaddwev_h_bu(
5414 // CHECK-NEXT:  entry:
5415 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5416 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5417 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5418 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5419 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5420 // CHECK-NEXT:    ret i128 [[TMP4]]
5421 //
5422 v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
5423   return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3);
5424 }
5425 // CHECK-LABEL: @vmaddwod_d_w(
5426 // CHECK-NEXT:  entry:
5427 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5428 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5429 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5430 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5431 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5432 // CHECK-NEXT:    ret i128 [[TMP4]]
5433 //
5434 v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
5435   return __builtin_lsx_vmaddwod_d_w(_1, _2, _3);
5436 }
5437 // CHECK-LABEL: @vmaddwod_w_h(
5438 // CHECK-NEXT:  entry:
5439 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5440 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5441 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5442 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5443 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5444 // CHECK-NEXT:    ret i128 [[TMP4]]
5445 //
5446 v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
5447   return __builtin_lsx_vmaddwod_w_h(_1, _2, _3);
5448 }
5449 // CHECK-LABEL: @vmaddwod_h_b(
5450 // CHECK-NEXT:  entry:
5451 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5452 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5453 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5454 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5455 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5456 // CHECK-NEXT:    ret i128 [[TMP4]]
5457 //
5458 v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
5459   return __builtin_lsx_vmaddwod_h_b(_1, _2, _3);
5460 }
5461 // CHECK-LABEL: @vmaddwod_d_wu(
5462 // CHECK-NEXT:  entry:
5463 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5464 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5465 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5466 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5467 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5468 // CHECK-NEXT:    ret i128 [[TMP4]]
5469 //
5470 v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
5471   return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3);
5472 }
5473 // CHECK-LABEL: @vmaddwod_w_hu(
5474 // CHECK-NEXT:  entry:
5475 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5476 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5477 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5478 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5479 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5480 // CHECK-NEXT:    ret i128 [[TMP4]]
5481 //
5482 v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
5483   return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3);
5484 }
5485 // CHECK-LABEL: @vmaddwod_h_bu(
5486 // CHECK-NEXT:  entry:
5487 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5488 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5489 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5490 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5491 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5492 // CHECK-NEXT:    ret i128 [[TMP4]]
5493 //
5494 v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
5495   return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3);
5496 }
5497 // CHECK-LABEL: @vmaddwev_d_wu_w(
5498 // CHECK-NEXT:  entry:
5499 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5500 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5501 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5502 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5503 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5504 // CHECK-NEXT:    ret i128 [[TMP4]]
5505 //
5506 v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
5507   return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3);
5508 }
5509 // CHECK-LABEL: @vmaddwev_w_hu_h(
5510 // CHECK-NEXT:  entry:
5511 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5512 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5513 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5514 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5515 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5516 // CHECK-NEXT:    ret i128 [[TMP4]]
5517 //
5518 v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5519   return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3);
5520 }
5521 // CHECK-LABEL: @vmaddwev_h_bu_b(
5522 // CHECK-NEXT:  entry:
5523 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5524 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5525 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5526 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5527 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5528 // CHECK-NEXT:    ret i128 [[TMP4]]
5529 //
5530 v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5531   return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3);
5532 }
5533 // CHECK-LABEL: @vmaddwod_d_wu_w(
5534 // CHECK-NEXT:  entry:
5535 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5536 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5537 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5538 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5539 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5540 // CHECK-NEXT:    ret i128 [[TMP4]]
5541 //
5542 v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
5543   return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3);
5544 }
5545 // CHECK-LABEL: @vmaddwod_w_hu_h(
5546 // CHECK-NEXT:  entry:
5547 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5548 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5549 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5550 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5551 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5552 // CHECK-NEXT:    ret i128 [[TMP4]]
5553 //
5554 v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5555   return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3);
5556 }
5557 // CHECK-LABEL: @vmaddwod_h_bu_b(
5558 // CHECK-NEXT:  entry:
5559 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5560 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5561 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5562 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5563 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5564 // CHECK-NEXT:    ret i128 [[TMP4]]
5565 //
5566 v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5567   return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3);
5568 }
5569 // CHECK-LABEL: @vmaddwev_q_d(
5570 // CHECK-NEXT:  entry:
5571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5572 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5573 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5574 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5575 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5576 // CHECK-NEXT:    ret i128 [[TMP4]]
5577 //
5578 v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5579   return __builtin_lsx_vmaddwev_q_d(_1, _2, _3);
5580 }
5581 // CHECK-LABEL: @vmaddwod_q_d(
5582 // CHECK-NEXT:  entry:
5583 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5584 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5585 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5586 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5587 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5588 // CHECK-NEXT:    ret i128 [[TMP4]]
5589 //
5590 v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5591   return __builtin_lsx_vmaddwod_q_d(_1, _2, _3);
5592 }
5593 // CHECK-LABEL: @vmaddwev_q_du(
5594 // CHECK-NEXT:  entry:
5595 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5596 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5597 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5598 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5599 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5600 // CHECK-NEXT:    ret i128 [[TMP4]]
5601 //
5602 v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5603   return __builtin_lsx_vmaddwev_q_du(_1, _2, _3);
5604 }
5605 // CHECK-LABEL: @vmaddwod_q_du(
5606 // CHECK-NEXT:  entry:
5607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5609 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5610 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5611 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5612 // CHECK-NEXT:    ret i128 [[TMP4]]
5613 //
5614 v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5615   return __builtin_lsx_vmaddwod_q_du(_1, _2, _3);
5616 }
5617 // CHECK-LABEL: @vmaddwev_q_du_d(
5618 // CHECK-NEXT:  entry:
5619 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5620 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5621 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5622 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5623 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5624 // CHECK-NEXT:    ret i128 [[TMP4]]
5625 //
5626 v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5627   return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3);
5628 }
5629 // CHECK-LABEL: @vmaddwod_q_du_d(
5630 // CHECK-NEXT:  entry:
5631 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5632 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5633 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5634 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5635 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5636 // CHECK-NEXT:    ret i128 [[TMP4]]
5637 //
5638 v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5639   return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3);
5640 }
5641 // CHECK-LABEL: @vrotr_b(
5642 // CHECK-NEXT:  entry:
5643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5645 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5646 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5647 // CHECK-NEXT:    ret i128 [[TMP3]]
5648 //
5649 v16i8 vrotr_b(v16i8 _1, v16i8 _2) {
5650   return __builtin_lsx_vrotr_b(_1, _2);
5651 }
5652 // CHECK-LABEL: @vrotr_h(
5653 // CHECK-NEXT:  entry:
5654 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5655 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5656 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5657 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5658 // CHECK-NEXT:    ret i128 [[TMP3]]
5659 //
5660 v8i16 vrotr_h(v8i16 _1, v8i16 _2) {
5661   return __builtin_lsx_vrotr_h(_1, _2);
5662 }
5663 // CHECK-LABEL: @vrotr_w(
5664 // CHECK-NEXT:  entry:
5665 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5666 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5667 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5668 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5669 // CHECK-NEXT:    ret i128 [[TMP3]]
5670 //
5671 v4i32 vrotr_w(v4i32 _1, v4i32 _2) {
5672   return __builtin_lsx_vrotr_w(_1, _2);
5673 }
5674 // CHECK-LABEL: @vrotr_d(
5675 // CHECK-NEXT:  entry:
5676 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5677 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5678 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5679 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5680 // CHECK-NEXT:    ret i128 [[TMP3]]
5681 //
5682 v2i64 vrotr_d(v2i64 _1, v2i64 _2) {
5683   return __builtin_lsx_vrotr_d(_1, _2);
5684 }
5685 // CHECK-LABEL: @vadd_q(
5686 // CHECK-NEXT:  entry:
5687 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5688 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5689 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5690 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5691 // CHECK-NEXT:    ret i128 [[TMP3]]
5692 //
5693 v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); }
5694 // CHECK-LABEL: @vsub_q(
5695 // CHECK-NEXT:  entry:
5696 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5697 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5698 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5699 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5700 // CHECK-NEXT:    ret i128 [[TMP3]]
5701 //
5702 v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); }
5703 // CHECK-LABEL: @vldrepl_b(
5704 // CHECK-NEXT:  entry:
5705 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1)
5706 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5707 // CHECK-NEXT:    ret i128 [[TMP1]]
5708 //
5709 v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); }
5710 // CHECK-LABEL: @vldrepl_h(
5711 // CHECK-NEXT:  entry:
5712 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2)
5713 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
5714 // CHECK-NEXT:    ret i128 [[TMP1]]
5715 //
5716 v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); }
5717 // CHECK-LABEL: @vldrepl_w(
5718 // CHECK-NEXT:  entry:
5719 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4)
5720 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
5721 // CHECK-NEXT:    ret i128 [[TMP1]]
5722 //
5723 v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); }
5724 // CHECK-LABEL: @vldrepl_d(
5725 // CHECK-NEXT:  entry:
5726 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8)
5727 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5728 // CHECK-NEXT:    ret i128 [[TMP1]]
5729 //
5730 v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); }
5731 // CHECK-LABEL: @vmskgez_b(
5732 // CHECK-NEXT:  entry:
5733 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5734 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]])
5735 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5736 // CHECK-NEXT:    ret i128 [[TMP2]]
5737 //
5738 v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); }
5739 // CHECK-LABEL: @vmsknz_b(
5740 // CHECK-NEXT:  entry:
5741 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5742 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]])
5743 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5744 // CHECK-NEXT:    ret i128 [[TMP2]]
5745 //
5746 v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); }
5747 // CHECK-LABEL: @vexth_h_b(
5748 // CHECK-NEXT:  entry:
5749 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5750 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]])
5751 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5752 // CHECK-NEXT:    ret i128 [[TMP2]]
5753 //
5754 v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); }
5755 // CHECK-LABEL: @vexth_w_h(
5756 // CHECK-NEXT:  entry:
5757 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5758 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]])
5759 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5760 // CHECK-NEXT:    ret i128 [[TMP2]]
5761 //
5762 v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); }
5763 // CHECK-LABEL: @vexth_d_w(
5764 // CHECK-NEXT:  entry:
5765 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5766 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]])
5767 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5768 // CHECK-NEXT:    ret i128 [[TMP2]]
5769 //
5770 v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); }
5771 // CHECK-LABEL: @vexth_q_d(
5772 // CHECK-NEXT:  entry:
5773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5774 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]])
5775 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5776 // CHECK-NEXT:    ret i128 [[TMP2]]
5777 //
5778 v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); }
5779 // CHECK-LABEL: @vexth_hu_bu(
5780 // CHECK-NEXT:  entry:
5781 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5782 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]])
5783 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5784 // CHECK-NEXT:    ret i128 [[TMP2]]
5785 //
5786 v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); }
5787 // CHECK-LABEL: @vexth_wu_hu(
5788 // CHECK-NEXT:  entry:
5789 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5790 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]])
5791 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5792 // CHECK-NEXT:    ret i128 [[TMP2]]
5793 //
5794 v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); }
5795 // CHECK-LABEL: @vexth_du_wu(
5796 // CHECK-NEXT:  entry:
5797 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5798 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]])
5799 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5800 // CHECK-NEXT:    ret i128 [[TMP2]]
5801 //
5802 v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); }
5803 // CHECK-LABEL: @vexth_qu_du(
5804 // CHECK-NEXT:  entry:
5805 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5806 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]])
5807 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5808 // CHECK-NEXT:    ret i128 [[TMP2]]
5809 //
5810 v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); }
5811 // CHECK-LABEL: @vrotri_b(
5812 // CHECK-NEXT:  entry:
5813 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5814 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1)
5815 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5816 // CHECK-NEXT:    ret i128 [[TMP2]]
5817 //
5818 v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); }
5819 // CHECK-LABEL: @vrotri_h(
5820 // CHECK-NEXT:  entry:
5821 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5822 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1)
5823 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5824 // CHECK-NEXT:    ret i128 [[TMP2]]
5825 //
5826 v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); }
5827 // CHECK-LABEL: @vrotri_w(
5828 // CHECK-NEXT:  entry:
5829 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5830 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1)
5831 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5832 // CHECK-NEXT:    ret i128 [[TMP2]]
5833 //
5834 v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); }
5835 // CHECK-LABEL: @vrotri_d(
5836 // CHECK-NEXT:  entry:
5837 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5838 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1)
5839 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5840 // CHECK-NEXT:    ret i128 [[TMP2]]
5841 //
5842 v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); }
5843 // CHECK-LABEL: @vextl_q_d(
5844 // CHECK-NEXT:  entry:
5845 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5846 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]])
5847 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5848 // CHECK-NEXT:    ret i128 [[TMP2]]
5849 //
5850 v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); }
5851 // CHECK-LABEL: @vsrlni_b_h(
5852 // CHECK-NEXT:  entry:
5853 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5854 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5855 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5856 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5857 // CHECK-NEXT:    ret i128 [[TMP3]]
5858 //
5859 v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) {
5860   return __builtin_lsx_vsrlni_b_h(_1, _2, 1);
5861 }
5862 // CHECK-LABEL: @vsrlni_h_w(
5863 // CHECK-NEXT:  entry:
5864 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5865 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5866 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5867 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5868 // CHECK-NEXT:    ret i128 [[TMP3]]
5869 //
5870 v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) {
5871   return __builtin_lsx_vsrlni_h_w(_1, _2, 1);
5872 }
5873 // CHECK-LABEL: @vsrlni_w_d(
5874 // CHECK-NEXT:  entry:
5875 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5876 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5877 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5878 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5879 // CHECK-NEXT:    ret i128 [[TMP3]]
5880 //
5881 v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) {
5882   return __builtin_lsx_vsrlni_w_d(_1, _2, 1);
5883 }
5884 // CHECK-LABEL: @vsrlni_d_q(
5885 // CHECK-NEXT:  entry:
5886 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5887 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5888 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5889 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5890 // CHECK-NEXT:    ret i128 [[TMP3]]
5891 //
5892 v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) {
5893   return __builtin_lsx_vsrlni_d_q(_1, _2, 1);
5894 }
5895 // CHECK-LABEL: @vsrlrni_b_h(
5896 // CHECK-NEXT:  entry:
5897 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5898 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5899 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5900 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5901 // CHECK-NEXT:    ret i128 [[TMP3]]
5902 //
5903 v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) {
5904   return __builtin_lsx_vsrlrni_b_h(_1, _2, 1);
5905 }
5906 // CHECK-LABEL: @vsrlrni_h_w(
5907 // CHECK-NEXT:  entry:
5908 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5909 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5910 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5911 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5912 // CHECK-NEXT:    ret i128 [[TMP3]]
5913 //
5914 v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) {
5915   return __builtin_lsx_vsrlrni_h_w(_1, _2, 1);
5916 }
5917 // CHECK-LABEL: @vsrlrni_w_d(
5918 // CHECK-NEXT:  entry:
5919 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5920 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5921 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5922 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5923 // CHECK-NEXT:    ret i128 [[TMP3]]
5924 //
5925 v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) {
5926   return __builtin_lsx_vsrlrni_w_d(_1, _2, 1);
5927 }
5928 // CHECK-LABEL: @vsrlrni_d_q(
5929 // CHECK-NEXT:  entry:
5930 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5931 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5932 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5933 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5934 // CHECK-NEXT:    ret i128 [[TMP3]]
5935 //
5936 v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) {
5937   return __builtin_lsx_vsrlrni_d_q(_1, _2, 1);
5938 }
5939 // CHECK-LABEL: @vssrlni_b_h(
5940 // CHECK-NEXT:  entry:
5941 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5942 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5943 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5944 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5945 // CHECK-NEXT:    ret i128 [[TMP3]]
5946 //
5947 v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) {
5948   return __builtin_lsx_vssrlni_b_h(_1, _2, 1);
5949 }
5950 // CHECK-LABEL: @vssrlni_h_w(
5951 // CHECK-NEXT:  entry:
5952 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5953 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5954 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5955 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5956 // CHECK-NEXT:    ret i128 [[TMP3]]
5957 //
5958 v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) {
5959   return __builtin_lsx_vssrlni_h_w(_1, _2, 1);
5960 }
5961 // CHECK-LABEL: @vssrlni_w_d(
5962 // CHECK-NEXT:  entry:
5963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5964 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5965 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5966 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5967 // CHECK-NEXT:    ret i128 [[TMP3]]
5968 //
5969 v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) {
5970   return __builtin_lsx_vssrlni_w_d(_1, _2, 1);
5971 }
5972 // CHECK-LABEL: @vssrlni_d_q(
5973 // CHECK-NEXT:  entry:
5974 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5975 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5976 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5977 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5978 // CHECK-NEXT:    ret i128 [[TMP3]]
5979 //
5980 v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) {
5981   return __builtin_lsx_vssrlni_d_q(_1, _2, 1);
5982 }
5983 // CHECK-LABEL: @vssrlni_bu_h(
5984 // CHECK-NEXT:  entry:
5985 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5986 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5987 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5988 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5989 // CHECK-NEXT:    ret i128 [[TMP3]]
5990 //
5991 v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) {
5992   return __builtin_lsx_vssrlni_bu_h(_1, _2, 1);
5993 }
5994 // CHECK-LABEL: @vssrlni_hu_w(
5995 // CHECK-NEXT:  entry:
5996 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5997 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5998 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5999 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6000 // CHECK-NEXT:    ret i128 [[TMP3]]
6001 //
6002 v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) {
6003   return __builtin_lsx_vssrlni_hu_w(_1, _2, 1);
6004 }
6005 // CHECK-LABEL: @vssrlni_wu_d(
6006 // CHECK-NEXT:  entry:
6007 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6008 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6009 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6010 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6011 // CHECK-NEXT:    ret i128 [[TMP3]]
6012 //
6013 v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) {
6014   return __builtin_lsx_vssrlni_wu_d(_1, _2, 1);
6015 }
6016 // CHECK-LABEL: @vssrlni_du_q(
6017 // CHECK-NEXT:  entry:
6018 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6019 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6020 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6021 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6022 // CHECK-NEXT:    ret i128 [[TMP3]]
6023 //
6024 v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) {
6025   return __builtin_lsx_vssrlni_du_q(_1, _2, 1);
6026 }
6027 // CHECK-LABEL: @vssrlrni_b_h(
6028 // CHECK-NEXT:  entry:
6029 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6030 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6031 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6032 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6033 // CHECK-NEXT:    ret i128 [[TMP3]]
6034 //
6035 v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) {
6036   return __builtin_lsx_vssrlrni_b_h(_1, _2, 1);
6037 }
6038 // CHECK-LABEL: @vssrlrni_h_w(
6039 // CHECK-NEXT:  entry:
6040 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6041 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6042 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6043 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6044 // CHECK-NEXT:    ret i128 [[TMP3]]
6045 //
6046 v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) {
6047   return __builtin_lsx_vssrlrni_h_w(_1, _2, 1);
6048 }
6049 // CHECK-LABEL: @vssrlrni_w_d(
6050 // CHECK-NEXT:  entry:
6051 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6052 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6053 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6054 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6055 // CHECK-NEXT:    ret i128 [[TMP3]]
6056 //
6057 v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) {
6058   return __builtin_lsx_vssrlrni_w_d(_1, _2, 1);
6059 }
6060 // CHECK-LABEL: @vssrlrni_d_q(
6061 // CHECK-NEXT:  entry:
6062 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6063 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6064 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6065 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6066 // CHECK-NEXT:    ret i128 [[TMP3]]
6067 //
6068 v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) {
6069   return __builtin_lsx_vssrlrni_d_q(_1, _2, 1);
6070 }
6071 // CHECK-LABEL: @vssrlrni_bu_h(
6072 // CHECK-NEXT:  entry:
6073 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6074 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6075 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6076 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6077 // CHECK-NEXT:    ret i128 [[TMP3]]
6078 //
6079 v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) {
6080   return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1);
6081 }
6082 // CHECK-LABEL: @vssrlrni_hu_w(
6083 // CHECK-NEXT:  entry:
6084 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6085 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6086 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6087 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6088 // CHECK-NEXT:    ret i128 [[TMP3]]
6089 //
6090 v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) {
6091   return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1);
6092 }
6093 // CHECK-LABEL: @vssrlrni_wu_d(
6094 // CHECK-NEXT:  entry:
6095 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6096 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6097 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6098 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6099 // CHECK-NEXT:    ret i128 [[TMP3]]
6100 //
6101 v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) {
6102   return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1);
6103 }
6104 // CHECK-LABEL: @vssrlrni_du_q(
6105 // CHECK-NEXT:  entry:
6106 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6107 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6108 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6109 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6110 // CHECK-NEXT:    ret i128 [[TMP3]]
6111 //
6112 v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) {
6113   return __builtin_lsx_vssrlrni_du_q(_1, _2, 1);
6114 }
6115 // CHECK-LABEL: @vsrani_b_h(
6116 // CHECK-NEXT:  entry:
6117 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6118 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6119 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6120 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6121 // CHECK-NEXT:    ret i128 [[TMP3]]
6122 //
6123 v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) {
6124   return __builtin_lsx_vsrani_b_h(_1, _2, 1);
6125 }
6126 // CHECK-LABEL: @vsrani_h_w(
6127 // CHECK-NEXT:  entry:
6128 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6129 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6130 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6131 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6132 // CHECK-NEXT:    ret i128 [[TMP3]]
6133 //
6134 v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) {
6135   return __builtin_lsx_vsrani_h_w(_1, _2, 1);
6136 }
6137 // CHECK-LABEL: @vsrani_w_d(
6138 // CHECK-NEXT:  entry:
6139 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6140 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6141 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6142 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6143 // CHECK-NEXT:    ret i128 [[TMP3]]
6144 //
6145 v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) {
6146   return __builtin_lsx_vsrani_w_d(_1, _2, 1);
6147 }
6148 // CHECK-LABEL: @vsrani_d_q(
6149 // CHECK-NEXT:  entry:
6150 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6151 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6152 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6153 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6154 // CHECK-NEXT:    ret i128 [[TMP3]]
6155 //
6156 v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) {
6157   return __builtin_lsx_vsrani_d_q(_1, _2, 1);
6158 }
6159 // CHECK-LABEL: @vsrarni_b_h(
6160 // CHECK-NEXT:  entry:
6161 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6162 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6163 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6164 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6165 // CHECK-NEXT:    ret i128 [[TMP3]]
6166 //
6167 v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) {
6168   return __builtin_lsx_vsrarni_b_h(_1, _2, 1);
6169 }
6170 // CHECK-LABEL: @vsrarni_h_w(
6171 // CHECK-NEXT:  entry:
6172 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6173 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6174 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6175 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6176 // CHECK-NEXT:    ret i128 [[TMP3]]
6177 //
6178 v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) {
6179   return __builtin_lsx_vsrarni_h_w(_1, _2, 1);
6180 }
6181 // CHECK-LABEL: @vsrarni_w_d(
6182 // CHECK-NEXT:  entry:
6183 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6184 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6185 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6186 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6187 // CHECK-NEXT:    ret i128 [[TMP3]]
6188 //
6189 v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) {
6190   return __builtin_lsx_vsrarni_w_d(_1, _2, 1);
6191 }
6192 // CHECK-LABEL: @vsrarni_d_q(
6193 // CHECK-NEXT:  entry:
6194 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6195 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6196 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6197 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6198 // CHECK-NEXT:    ret i128 [[TMP3]]
6199 //
6200 v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) {
6201   return __builtin_lsx_vsrarni_d_q(_1, _2, 1);
6202 }
6203 // CHECK-LABEL: @vssrani_b_h(
6204 // CHECK-NEXT:  entry:
6205 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6206 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6207 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6208 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6209 // CHECK-NEXT:    ret i128 [[TMP3]]
6210 //
6211 v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) {
6212   return __builtin_lsx_vssrani_b_h(_1, _2, 1);
6213 }
6214 // CHECK-LABEL: @vssrani_h_w(
6215 // CHECK-NEXT:  entry:
6216 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6217 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6218 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6219 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6220 // CHECK-NEXT:    ret i128 [[TMP3]]
6221 //
6222 v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) {
6223   return __builtin_lsx_vssrani_h_w(_1, _2, 1);
6224 }
6225 // CHECK-LABEL: @vssrani_w_d(
6226 // CHECK-NEXT:  entry:
6227 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6228 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6229 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6230 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6231 // CHECK-NEXT:    ret i128 [[TMP3]]
6232 //
6233 v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) {
6234   return __builtin_lsx_vssrani_w_d(_1, _2, 1);
6235 }
6236 // CHECK-LABEL: @vssrani_d_q(
6237 // CHECK-NEXT:  entry:
6238 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6239 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6240 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6241 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6242 // CHECK-NEXT:    ret i128 [[TMP3]]
6243 //
6244 v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) {
6245   return __builtin_lsx_vssrani_d_q(_1, _2, 1);
6246 }
6247 // CHECK-LABEL: @vssrani_bu_h(
6248 // CHECK-NEXT:  entry:
6249 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6250 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6251 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6252 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6253 // CHECK-NEXT:    ret i128 [[TMP3]]
6254 //
6255 v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) {
6256   return __builtin_lsx_vssrani_bu_h(_1, _2, 1);
6257 }
6258 // CHECK-LABEL: @vssrani_hu_w(
6259 // CHECK-NEXT:  entry:
6260 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6261 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6262 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6263 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6264 // CHECK-NEXT:    ret i128 [[TMP3]]
6265 //
6266 v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) {
6267   return __builtin_lsx_vssrani_hu_w(_1, _2, 1);
6268 }
6269 // CHECK-LABEL: @vssrani_wu_d(
6270 // CHECK-NEXT:  entry:
6271 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6272 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6273 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6274 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6275 // CHECK-NEXT:    ret i128 [[TMP3]]
6276 //
6277 v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) {
6278   return __builtin_lsx_vssrani_wu_d(_1, _2, 1);
6279 }
6280 // CHECK-LABEL: @vssrani_du_q(
6281 // CHECK-NEXT:  entry:
6282 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6283 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6284 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6285 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6286 // CHECK-NEXT:    ret i128 [[TMP3]]
6287 //
6288 v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) {
6289   return __builtin_lsx_vssrani_du_q(_1, _2, 1);
6290 }
6291 // CHECK-LABEL: @vssrarni_b_h(
6292 // CHECK-NEXT:  entry:
6293 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6294 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6295 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6296 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6297 // CHECK-NEXT:    ret i128 [[TMP3]]
6298 //
6299 v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) {
6300   return __builtin_lsx_vssrarni_b_h(_1, _2, 1);
6301 }
6302 // CHECK-LABEL: @vssrarni_h_w(
6303 // CHECK-NEXT:  entry:
6304 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6305 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6306 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6307 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6308 // CHECK-NEXT:    ret i128 [[TMP3]]
6309 //
6310 v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) {
6311   return __builtin_lsx_vssrarni_h_w(_1, _2, 1);
6312 }
6313 // CHECK-LABEL: @vssrarni_w_d(
6314 // CHECK-NEXT:  entry:
6315 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6316 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6317 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6318 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6319 // CHECK-NEXT:    ret i128 [[TMP3]]
6320 //
6321 v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) {
6322   return __builtin_lsx_vssrarni_w_d(_1, _2, 1);
6323 }
6324 // CHECK-LABEL: @vssrarni_d_q(
6325 // CHECK-NEXT:  entry:
6326 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6327 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6328 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6329 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6330 // CHECK-NEXT:    ret i128 [[TMP3]]
6331 //
6332 v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) {
6333   return __builtin_lsx_vssrarni_d_q(_1, _2, 1);
6334 }
6335 // CHECK-LABEL: @vssrarni_bu_h(
6336 // CHECK-NEXT:  entry:
6337 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6338 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6339 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6340 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6341 // CHECK-NEXT:    ret i128 [[TMP3]]
6342 //
6343 v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) {
6344   return __builtin_lsx_vssrarni_bu_h(_1, _2, 1);
6345 }
6346 // CHECK-LABEL: @vssrarni_hu_w(
6347 // CHECK-NEXT:  entry:
6348 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6349 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6350 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6351 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6352 // CHECK-NEXT:    ret i128 [[TMP3]]
6353 //
6354 v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) {
6355   return __builtin_lsx_vssrarni_hu_w(_1, _2, 1);
6356 }
6357 // CHECK-LABEL: @vssrarni_wu_d(
6358 // CHECK-NEXT:  entry:
6359 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6360 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6361 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6362 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6363 // CHECK-NEXT:    ret i128 [[TMP3]]
6364 //
6365 v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) {
6366   return __builtin_lsx_vssrarni_wu_d(_1, _2, 1);
6367 }
6368 // CHECK-LABEL: @vssrarni_du_q(
6369 // CHECK-NEXT:  entry:
6370 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6371 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6372 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6373 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6374 // CHECK-NEXT:    ret i128 [[TMP3]]
6375 //
6376 v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) {
6377   return __builtin_lsx_vssrarni_du_q(_1, _2, 1);
6378 }
6379 // CHECK-LABEL: @vpermi_w(
6380 // CHECK-NEXT:  entry:
6381 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6382 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6383 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6384 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6385 // CHECK-NEXT:    ret i128 [[TMP3]]
6386 //
6387 v4i32 vpermi_w(v4i32 _1, v4i32 _2) {
6388   return __builtin_lsx_vpermi_w(_1, _2, 1);
6389 }
6390 // CHECK-LABEL: @vld(
6391 // CHECK-NEXT:  entry:
6392 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1)
6393 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6394 // CHECK-NEXT:    ret i128 [[TMP1]]
6395 //
6396 v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); }
6397 // CHECK-LABEL: @vst(
6398 // CHECK-NEXT:  entry:
6399 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6400 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1)
6401 // CHECK-NEXT:    ret void
6402 //
6403 void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); }
6404 // CHECK-LABEL: @vssrlrn_b_h(
6405 // CHECK-NEXT:  entry:
6406 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6407 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6408 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
6409 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6410 // CHECK-NEXT:    ret i128 [[TMP3]]
6411 //
6412 v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) {
6413   return __builtin_lsx_vssrlrn_b_h(_1, _2);
6414 }
6415 // CHECK-LABEL: @vssrlrn_h_w(
6416 // CHECK-NEXT:  entry:
6417 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6418 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6419 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
6420 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6421 // CHECK-NEXT:    ret i128 [[TMP3]]
6422 //
6423 v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) {
6424   return __builtin_lsx_vssrlrn_h_w(_1, _2);
6425 }
6426 // CHECK-LABEL: @vssrlrn_w_d(
6427 // CHECK-NEXT:  entry:
6428 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6429 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6430 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6431 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6432 // CHECK-NEXT:    ret i128 [[TMP3]]
6433 //
6434 v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) {
6435   return __builtin_lsx_vssrlrn_w_d(_1, _2);
6436 }
6437 // CHECK-LABEL: @vssrln_b_h(
6438 // CHECK-NEXT:  entry:
6439 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6440 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6441 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
6442 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6443 // CHECK-NEXT:    ret i128 [[TMP3]]
6444 //
6445 v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) {
6446   return __builtin_lsx_vssrln_b_h(_1, _2);
6447 }
6448 // CHECK-LABEL: @vssrln_h_w(
6449 // CHECK-NEXT:  entry:
6450 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6451 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6452 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
6453 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6454 // CHECK-NEXT:    ret i128 [[TMP3]]
6455 //
6456 v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) {
6457   return __builtin_lsx_vssrln_h_w(_1, _2);
6458 }
6459 // CHECK-LABEL: @vssrln_w_d(
6460 // CHECK-NEXT:  entry:
6461 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6462 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6463 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6464 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6465 // CHECK-NEXT:    ret i128 [[TMP3]]
6466 //
6467 v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) {
6468   return __builtin_lsx_vssrln_w_d(_1, _2);
6469 }
6470 // CHECK-LABEL: @vorn_v(
6471 // CHECK-NEXT:  entry:
6472 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6473 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6474 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
6475 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6476 // CHECK-NEXT:    ret i128 [[TMP3]]
6477 //
6478 v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); }
6479 // CHECK-LABEL: @vldi(
6480 // CHECK-NEXT:  entry:
6481 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1)
6482 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
6483 // CHECK-NEXT:    ret i128 [[TMP1]]
6484 //
6485 v2i64 vldi() { return __builtin_lsx_vldi(1); }
6486 // CHECK-LABEL: @vshuf_b(
6487 // CHECK-NEXT:  entry:
6488 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6489 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6490 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
6491 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
6492 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
6493 // CHECK-NEXT:    ret i128 [[TMP4]]
6494 //
6495 v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) {
6496   return __builtin_lsx_vshuf_b(_1, _2, _3);
6497 }
6498 // CHECK-LABEL: @vldx(
6499 // CHECK-NEXT:  entry:
6500 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1)
6501 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6502 // CHECK-NEXT:    ret i128 [[TMP1]]
6503 //
6504 v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); }
6505 // CHECK-LABEL: @vstx(
6506 // CHECK-NEXT:  entry:
6507 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6508 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1)
6509 // CHECK-NEXT:    ret void
6510 //
6511 void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); }
6512 // CHECK-LABEL: @vextl_qu_du(
6513 // CHECK-NEXT:  entry:
6514 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6515 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]])
6516 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6517 // CHECK-NEXT:    ret i128 [[TMP2]]
6518 //
6519 v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); }
6520 // CHECK-LABEL: @bnz_b(
6521 // CHECK-NEXT:  entry:
6522 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6523 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]])
6524 // CHECK-NEXT:    ret i32 [[TMP1]]
6525 //
6526 int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); }
6527 // CHECK-LABEL: @bnz_d(
6528 // CHECK-NEXT:  entry:
6529 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6530 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]])
6531 // CHECK-NEXT:    ret i32 [[TMP1]]
6532 //
6533 int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); }
6534 // CHECK-LABEL: @bnz_h(
6535 // CHECK-NEXT:  entry:
6536 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6537 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]])
6538 // CHECK-NEXT:    ret i32 [[TMP1]]
6539 //
6540 int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); }
6541 // CHECK-LABEL: @bnz_v(
6542 // CHECK-NEXT:  entry:
6543 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6544 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]])
6545 // CHECK-NEXT:    ret i32 [[TMP1]]
6546 //
6547 int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); }
6548 // CHECK-LABEL: @bnz_w(
6549 // CHECK-NEXT:  entry:
6550 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6551 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]])
6552 // CHECK-NEXT:    ret i32 [[TMP1]]
6553 //
6554 int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); }
6555 // CHECK-LABEL: @bz_b(
6556 // CHECK-NEXT:  entry:
6557 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6558 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]])
6559 // CHECK-NEXT:    ret i32 [[TMP1]]
6560 //
6561 int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); }
6562 // CHECK-LABEL: @bz_d(
6563 // CHECK-NEXT:  entry:
6564 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6565 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]])
6566 // CHECK-NEXT:    ret i32 [[TMP1]]
6567 //
6568 int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); }
6569 // CHECK-LABEL: @bz_h(
6570 // CHECK-NEXT:  entry:
6571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6572 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]])
6573 // CHECK-NEXT:    ret i32 [[TMP1]]
6574 //
6575 int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); }
6576 // CHECK-LABEL: @bz_v(
6577 // CHECK-NEXT:  entry:
6578 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6579 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]])
6580 // CHECK-NEXT:    ret i32 [[TMP1]]
6581 //
6582 int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); }
6583 // CHECK-LABEL: @bz_w(
6584 // CHECK-NEXT:  entry:
6585 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6586 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]])
6587 // CHECK-NEXT:    ret i32 [[TMP1]]
6588 //
6589 int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); }
6590 // CHECK-LABEL: @vfcmp_caf_d(
6591 // CHECK-NEXT:  entry:
6592 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6593 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6594 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6595 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6596 // CHECK-NEXT:    ret i128 [[TMP3]]
6597 //
6598 v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) {
6599   return __builtin_lsx_vfcmp_caf_d(_1, _2);
6600 }
6601 // CHECK-LABEL: @vfcmp_caf_s(
6602 // CHECK-NEXT:  entry:
6603 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6604 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6605 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6606 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6607 // CHECK-NEXT:    ret i128 [[TMP3]]
6608 //
6609 v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) {
6610   return __builtin_lsx_vfcmp_caf_s(_1, _2);
6611 }
6612 // CHECK-LABEL: @vfcmp_ceq_d(
6613 // CHECK-NEXT:  entry:
6614 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6615 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6616 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6617 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6618 // CHECK-NEXT:    ret i128 [[TMP3]]
6619 //
6620 v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) {
6621   return __builtin_lsx_vfcmp_ceq_d(_1, _2);
6622 }
6623 // CHECK-LABEL: @vfcmp_ceq_s(
6624 // CHECK-NEXT:  entry:
6625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6626 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6627 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6628 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6629 // CHECK-NEXT:    ret i128 [[TMP3]]
6630 //
6631 v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) {
6632   return __builtin_lsx_vfcmp_ceq_s(_1, _2);
6633 }
6634 // CHECK-LABEL: @vfcmp_cle_d(
6635 // CHECK-NEXT:  entry:
6636 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6637 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6638 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6639 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6640 // CHECK-NEXT:    ret i128 [[TMP3]]
6641 //
6642 v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) {
6643   return __builtin_lsx_vfcmp_cle_d(_1, _2);
6644 }
6645 // CHECK-LABEL: @vfcmp_cle_s(
6646 // CHECK-NEXT:  entry:
6647 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6648 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6649 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6650 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6651 // CHECK-NEXT:    ret i128 [[TMP3]]
6652 //
6653 v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) {
6654   return __builtin_lsx_vfcmp_cle_s(_1, _2);
6655 }
6656 // CHECK-LABEL: @vfcmp_clt_d(
6657 // CHECK-NEXT:  entry:
6658 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6659 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6660 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6661 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6662 // CHECK-NEXT:    ret i128 [[TMP3]]
6663 //
6664 v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) {
6665   return __builtin_lsx_vfcmp_clt_d(_1, _2);
6666 }
6667 // CHECK-LABEL: @vfcmp_clt_s(
6668 // CHECK-NEXT:  entry:
6669 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6670 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6671 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6672 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6673 // CHECK-NEXT:    ret i128 [[TMP3]]
6674 //
6675 v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) {
6676   return __builtin_lsx_vfcmp_clt_s(_1, _2);
6677 }
6678 // CHECK-LABEL: @vfcmp_cne_d(
6679 // CHECK-NEXT:  entry:
6680 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6681 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6682 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6683 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6684 // CHECK-NEXT:    ret i128 [[TMP3]]
6685 //
6686 v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) {
6687   return __builtin_lsx_vfcmp_cne_d(_1, _2);
6688 }
6689 // CHECK-LABEL: @vfcmp_cne_s(
6690 // CHECK-NEXT:  entry:
6691 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6692 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6693 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6694 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6695 // CHECK-NEXT:    ret i128 [[TMP3]]
6696 //
6697 v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) {
6698   return __builtin_lsx_vfcmp_cne_s(_1, _2);
6699 }
6700 // CHECK-LABEL: @vfcmp_cor_d(
6701 // CHECK-NEXT:  entry:
6702 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6703 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6704 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6705 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6706 // CHECK-NEXT:    ret i128 [[TMP3]]
6707 //
6708 v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) {
6709   return __builtin_lsx_vfcmp_cor_d(_1, _2);
6710 }
6711 // CHECK-LABEL: @vfcmp_cor_s(
6712 // CHECK-NEXT:  entry:
6713 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6714 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6715 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6716 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6717 // CHECK-NEXT:    ret i128 [[TMP3]]
6718 //
6719 v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) {
6720   return __builtin_lsx_vfcmp_cor_s(_1, _2);
6721 }
6722 // CHECK-LABEL: @vfcmp_cueq_d(
6723 // CHECK-NEXT:  entry:
6724 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6725 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6726 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6727 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6728 // CHECK-NEXT:    ret i128 [[TMP3]]
6729 //
6730 v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) {
6731   return __builtin_lsx_vfcmp_cueq_d(_1, _2);
6732 }
6733 // CHECK-LABEL: @vfcmp_cueq_s(
6734 // CHECK-NEXT:  entry:
6735 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6736 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6737 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6738 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6739 // CHECK-NEXT:    ret i128 [[TMP3]]
6740 //
6741 v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) {
6742   return __builtin_lsx_vfcmp_cueq_s(_1, _2);
6743 }
6744 // CHECK-LABEL: @vfcmp_cule_d(
6745 // CHECK-NEXT:  entry:
6746 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6747 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6748 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6749 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6750 // CHECK-NEXT:    ret i128 [[TMP3]]
6751 //
6752 v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) {
6753   return __builtin_lsx_vfcmp_cule_d(_1, _2);
6754 }
6755 // CHECK-LABEL: @vfcmp_cule_s(
6756 // CHECK-NEXT:  entry:
6757 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6758 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6759 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6760 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6761 // CHECK-NEXT:    ret i128 [[TMP3]]
6762 //
6763 v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) {
6764   return __builtin_lsx_vfcmp_cule_s(_1, _2);
6765 }
6766 // CHECK-LABEL: @vfcmp_cult_d(
6767 // CHECK-NEXT:  entry:
6768 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6769 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6770 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6771 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6772 // CHECK-NEXT:    ret i128 [[TMP3]]
6773 //
6774 v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) {
6775   return __builtin_lsx_vfcmp_cult_d(_1, _2);
6776 }
6777 // CHECK-LABEL: @vfcmp_cult_s(
6778 // CHECK-NEXT:  entry:
6779 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6780 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6781 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6782 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6783 // CHECK-NEXT:    ret i128 [[TMP3]]
6784 //
6785 v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) {
6786   return __builtin_lsx_vfcmp_cult_s(_1, _2);
6787 }
6788 // CHECK-LABEL: @vfcmp_cun_d(
6789 // CHECK-NEXT:  entry:
6790 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6791 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6792 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6793 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6794 // CHECK-NEXT:    ret i128 [[TMP3]]
6795 //
6796 v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) {
6797   return __builtin_lsx_vfcmp_cun_d(_1, _2);
6798 }
6799 // CHECK-LABEL: @vfcmp_cune_d(
6800 // CHECK-NEXT:  entry:
6801 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6802 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6803 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6804 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6805 // CHECK-NEXT:    ret i128 [[TMP3]]
6806 //
6807 v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) {
6808   return __builtin_lsx_vfcmp_cune_d(_1, _2);
6809 }
6810 // CHECK-LABEL: @vfcmp_cune_s(
6811 // CHECK-NEXT:  entry:
6812 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6813 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6814 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6815 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6816 // CHECK-NEXT:    ret i128 [[TMP3]]
6817 //
6818 v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) {
6819   return __builtin_lsx_vfcmp_cune_s(_1, _2);
6820 }
6821 // CHECK-LABEL: @vfcmp_cun_s(
6822 // CHECK-NEXT:  entry:
6823 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6824 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6825 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6826 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6827 // CHECK-NEXT:    ret i128 [[TMP3]]
6828 //
6829 v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) {
6830   return __builtin_lsx_vfcmp_cun_s(_1, _2);
6831 }
6832 // CHECK-LABEL: @vfcmp_saf_d(
6833 // CHECK-NEXT:  entry:
6834 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6835 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6836 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6837 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6838 // CHECK-NEXT:    ret i128 [[TMP3]]
6839 //
6840 v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) {
6841   return __builtin_lsx_vfcmp_saf_d(_1, _2);
6842 }
6843 // CHECK-LABEL: @vfcmp_saf_s(
6844 // CHECK-NEXT:  entry:
6845 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6846 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6847 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6848 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6849 // CHECK-NEXT:    ret i128 [[TMP3]]
6850 //
6851 v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) {
6852   return __builtin_lsx_vfcmp_saf_s(_1, _2);
6853 }
6854 // CHECK-LABEL: @vfcmp_seq_d(
6855 // CHECK-NEXT:  entry:
6856 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6857 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6858 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6859 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6860 // CHECK-NEXT:    ret i128 [[TMP3]]
6861 //
6862 v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) {
6863   return __builtin_lsx_vfcmp_seq_d(_1, _2);
6864 }
6865 // CHECK-LABEL: @vfcmp_seq_s(
6866 // CHECK-NEXT:  entry:
6867 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6868 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6869 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6870 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6871 // CHECK-NEXT:    ret i128 [[TMP3]]
6872 //
6873 v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) {
6874   return __builtin_lsx_vfcmp_seq_s(_1, _2);
6875 }
6876 // CHECK-LABEL: @vfcmp_sle_d(
6877 // CHECK-NEXT:  entry:
6878 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6879 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6880 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6881 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6882 // CHECK-NEXT:    ret i128 [[TMP3]]
6883 //
6884 v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) {
6885   return __builtin_lsx_vfcmp_sle_d(_1, _2);
6886 }
6887 // CHECK-LABEL: @vfcmp_sle_s(
6888 // CHECK-NEXT:  entry:
6889 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6890 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6891 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6892 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6893 // CHECK-NEXT:    ret i128 [[TMP3]]
6894 //
6895 v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) {
6896   return __builtin_lsx_vfcmp_sle_s(_1, _2);
6897 }
6898 // CHECK-LABEL: @vfcmp_slt_d(
6899 // CHECK-NEXT:  entry:
6900 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6901 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6902 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6903 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6904 // CHECK-NEXT:    ret i128 [[TMP3]]
6905 //
6906 v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) {
6907   return __builtin_lsx_vfcmp_slt_d(_1, _2);
6908 }
6909 // CHECK-LABEL: @vfcmp_slt_s(
6910 // CHECK-NEXT:  entry:
6911 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6912 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6913 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6914 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6915 // CHECK-NEXT:    ret i128 [[TMP3]]
6916 //
6917 v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) {
6918   return __builtin_lsx_vfcmp_slt_s(_1, _2);
6919 }
6920 // CHECK-LABEL: @vfcmp_sne_d(
6921 // CHECK-NEXT:  entry:
6922 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6923 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6924 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6925 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6926 // CHECK-NEXT:    ret i128 [[TMP3]]
6927 //
6928 v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) {
6929   return __builtin_lsx_vfcmp_sne_d(_1, _2);
6930 }
6931 // CHECK-LABEL: @vfcmp_sne_s(
6932 // CHECK-NEXT:  entry:
6933 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6934 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6935 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6936 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6937 // CHECK-NEXT:    ret i128 [[TMP3]]
6938 //
6939 v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) {
6940   return __builtin_lsx_vfcmp_sne_s(_1, _2);
6941 }
6942 // CHECK-LABEL: @vfcmp_sor_d(
6943 // CHECK-NEXT:  entry:
6944 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6945 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6946 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6947 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6948 // CHECK-NEXT:    ret i128 [[TMP3]]
6949 //
6950 v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) {
6951   return __builtin_lsx_vfcmp_sor_d(_1, _2);
6952 }
6953 // CHECK-LABEL: @vfcmp_sor_s(
6954 // CHECK-NEXT:  entry:
6955 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6956 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6957 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6958 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6959 // CHECK-NEXT:    ret i128 [[TMP3]]
6960 //
6961 v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) {
6962   return __builtin_lsx_vfcmp_sor_s(_1, _2);
6963 }
6964 // CHECK-LABEL: @vfcmp_sueq_d(
6965 // CHECK-NEXT:  entry:
6966 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6967 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6968 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6969 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6970 // CHECK-NEXT:    ret i128 [[TMP3]]
6971 //
6972 v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) {
6973   return __builtin_lsx_vfcmp_sueq_d(_1, _2);
6974 }
6975 // CHECK-LABEL: @vfcmp_sueq_s(
6976 // CHECK-NEXT:  entry:
6977 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6978 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6979 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6980 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6981 // CHECK-NEXT:    ret i128 [[TMP3]]
6982 //
6983 v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) {
6984   return __builtin_lsx_vfcmp_sueq_s(_1, _2);
6985 }
6986 // CHECK-LABEL: @vfcmp_sule_d(
6987 // CHECK-NEXT:  entry:
6988 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6989 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6990 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6991 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6992 // CHECK-NEXT:    ret i128 [[TMP3]]
6993 //
6994 v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) {
6995   return __builtin_lsx_vfcmp_sule_d(_1, _2);
6996 }
6997 // CHECK-LABEL: @vfcmp_sule_s(
6998 // CHECK-NEXT:  entry:
6999 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7000 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7001 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7002 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7003 // CHECK-NEXT:    ret i128 [[TMP3]]
7004 //
7005 v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) {
7006   return __builtin_lsx_vfcmp_sule_s(_1, _2);
7007 }
7008 // CHECK-LABEL: @vfcmp_sult_d(
7009 // CHECK-NEXT:  entry:
7010 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7011 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7012 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7013 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7014 // CHECK-NEXT:    ret i128 [[TMP3]]
7015 //
7016 v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) {
7017   return __builtin_lsx_vfcmp_sult_d(_1, _2);
7018 }
7019 // CHECK-LABEL: @vfcmp_sult_s(
7020 // CHECK-NEXT:  entry:
7021 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7022 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7023 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7024 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7025 // CHECK-NEXT:    ret i128 [[TMP3]]
7026 //
7027 v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) {
7028   return __builtin_lsx_vfcmp_sult_s(_1, _2);
7029 }
7030 // CHECK-LABEL: @vfcmp_sun_d(
7031 // CHECK-NEXT:  entry:
7032 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7033 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7034 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7035 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7036 // CHECK-NEXT:    ret i128 [[TMP3]]
7037 //
7038 v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) {
7039   return __builtin_lsx_vfcmp_sun_d(_1, _2);
7040 }
7041 // CHECK-LABEL: @vfcmp_sune_d(
7042 // CHECK-NEXT:  entry:
7043 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7044 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7045 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7046 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7047 // CHECK-NEXT:    ret i128 [[TMP3]]
7048 //
7049 v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) {
7050   return __builtin_lsx_vfcmp_sune_d(_1, _2);
7051 }
7052 // CHECK-LABEL: @vfcmp_sune_s(
7053 // CHECK-NEXT:  entry:
7054 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7055 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7056 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7057 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7058 // CHECK-NEXT:    ret i128 [[TMP3]]
7059 //
7060 v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) {
7061   return __builtin_lsx_vfcmp_sune_s(_1, _2);
7062 }
7063 // CHECK-LABEL: @vfcmp_sun_s(
7064 // CHECK-NEXT:  entry:
7065 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7066 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7067 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7068 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7069 // CHECK-NEXT:    ret i128 [[TMP3]]
7070 //
7071 v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) {
7072   return __builtin_lsx_vfcmp_sun_s(_1, _2);
7073 }
7074 // CHECK-LABEL: @vrepli_b(
7075 // CHECK-NEXT:  entry:
7076 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1)
7077 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
7078 // CHECK-NEXT:    ret i128 [[TMP1]]
7079 //
7080 v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); }
7081 // CHECK-LABEL: @vrepli_d(
7082 // CHECK-NEXT:  entry:
7083 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1)
7084 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
7085 // CHECK-NEXT:    ret i128 [[TMP1]]
7086 //
7087 v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); }
7088 // CHECK-LABEL: @vrepli_h(
7089 // CHECK-NEXT:  entry:
7090 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1)
7091 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
7092 // CHECK-NEXT:    ret i128 [[TMP1]]
7093 //
7094 v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); }
7095 // CHECK-LABEL: @vrepli_w(
7096 // CHECK-NEXT:  entry:
7097 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1)
7098 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
7099 // CHECK-NEXT:    ret i128 [[TMP1]]
7100 //
7101 v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); }