clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c

   1 // RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
   2 // RUN: -S -disable-O0-optnone -emit-llvm -o - %s \
   3 // RUN: | opt -S -passes=mem2reg \
   4 // RUN: | FileCheck %s
   5
   6 // REQUIRES: arm-registered-target
   7
   8 #include <arm_neon.h>
   9
  10 // CHECK-LABEL: test_vabs_f16
  11 // CHECK:  [[ABS:%.*]] =  call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
  12 // CHECK:  ret <4 x half> [[ABS]]
  13 float16x4_t test_vabs_f16(float16x4_t a) {
  14   return vabs_f16(a);
  15 }
  16
  17 // CHECK-LABEL: test_vabsq_f16
  18 // CHECK:  [[ABS:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
  19 // CHECK:  ret <8 x half> [[ABS]]
  20 float16x8_t test_vabsq_f16(float16x8_t a) {
  21   return vabsq_f16(a);
  22 }
  23
  24 // CHECK-LABEL: test_vceqz_f16
  25 // CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, zeroinitializer
  26 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
  27 // CHECK:  ret <4 x i16> [[TMP2]]
  28 uint16x4_t test_vceqz_f16(float16x4_t a) {
  29   return vceqz_f16(a);
  30 }
  31
  32 // CHECK-LABEL: test_vceqzq_f16
  33 // CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, zeroinitializer
  34 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
  35 // CHECK:  ret <8 x i16> [[TMP2]]
  36 uint16x8_t test_vceqzq_f16(float16x8_t a) {
  37   return vceqzq_f16(a);
  38 }
  39
  40 // CHECK-LABEL: test_vcgez_f16
  41 // CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, zeroinitializer
  42 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
  43 // CHECK:  ret <4 x i16> [[TMP2]]
  44 uint16x4_t test_vcgez_f16(float16x4_t a) {
  45   return vcgez_f16(a);
  46 }
  47
  48 // CHECK-LABEL: test_vcgezq_f16
  49 // CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, zeroinitializer
  50 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
  51 // CHECK:  ret <8 x i16> [[TMP2]]
  52 uint16x8_t test_vcgezq_f16(float16x8_t a) {
  53   return vcgezq_f16(a);
  54 }
  55
  56 // CHECK-LABEL: test_vcgtz_f16
  57 // CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, zeroinitializer
  58 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
  59 // CHECK:  ret <4 x i16> [[TMP2]]
  60 uint16x4_t test_vcgtz_f16(float16x4_t a) {
  61   return vcgtz_f16(a);
  62 }
  63
  64 // CHECK-LABEL: test_vcgtzq_f16
  65 // CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, zeroinitializer
  66 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
  67 // CHECK:  ret <8 x i16> [[TMP2]]
  68 uint16x8_t test_vcgtzq_f16(float16x8_t a) {
  69   return vcgtzq_f16(a);
  70 }
  71
  72 // CHECK-LABEL: test_vclez_f16
  73 // CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, zeroinitializer
  74 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
  75 // CHECK:  ret <4 x i16> [[TMP2]]
  76 uint16x4_t test_vclez_f16(float16x4_t a) {
  77   return vclez_f16(a);
  78 }
  79
  80 // CHECK-LABEL: test_vclezq_f16
  81 // CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, zeroinitializer
  82 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
  83 // CHECK:  ret <8 x i16> [[TMP2]]
  84 uint16x8_t test_vclezq_f16(float16x8_t a) {
  85   return vclezq_f16(a);
  86 }
  87
  88 // CHECK-LABEL: test_vcltz_f16
  89 // CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, zeroinitializer
  90 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
  91 // CHECK:  ret <4 x i16> [[TMP2]]
  92 uint16x4_t test_vcltz_f16(float16x4_t a) {
  93   return vcltz_f16(a);
  94 }
  95
  96 // CHECK-LABEL: test_vcltzq_f16
  97 // CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, zeroinitializer
  98 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
  99 // CHECK:  ret <8 x i16> [[TMP2]]
 100 uint16x8_t test_vcltzq_f16(float16x8_t a) {
 101   return vcltzq_f16(a);
 102 }
 103
 104 // CHECK-LABEL: test_vcvt_f16_s16
 105 // CHECK:  [[VCVT:%.*]] = sitofp <4 x i16> %a to <4 x half>
 106 // CHECK:  ret <4 x half> [[VCVT]]
 107 float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
 108   return vcvt_f16_s16(a);
 109 }
 110
 111 // CHECK-LABEL: test_vcvtq_f16_s16
 112 // CHECK:  [[VCVT:%.*]] = sitofp <8 x i16> %a to <8 x half>
 113 // CHECK:  ret <8 x half> [[VCVT]]
 114 float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
 115   return vcvtq_f16_s16(a);
 116 }
 117
 118 // CHECK-LABEL: test_vcvt_f16_u16
 119 // CHECK:  [[VCVT:%.*]] = uitofp <4 x i16> %a to <4 x half>
 120 // CHECK:  ret <4 x half> [[VCVT]]
 121 float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
 122   return vcvt_f16_u16(a);
 123 }
 124
 125 // CHECK-LABEL: test_vcvtq_f16_u16
 126 // CHECK:  [[VCVT:%.*]] = uitofp <8 x i16> %a to <8 x half>
 127 // CHECK:  ret <8 x half> [[VCVT]]
 128 float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
 129   return vcvtq_f16_u16(a);
 130 }
 131
 132 // CHECK-LABEL: test_vcvt_s16_f16
 133 // CHECK:  [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
 134 // CHECK:  ret <4 x i16> [[VCVT]]
 135 int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
 136   return vcvt_s16_f16(a);
 137 }
 138
 139 // CHECK-LABEL: test_vcvtq_s16_f16
 140 // CHECK:  [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
 141 // CHECK:  ret <8 x i16> [[VCVT]]
 142 int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
 143   return vcvtq_s16_f16(a);
 144 }
 145
 146 // CHECK-LABEL: test_vcvt_u16_f16
 147 // CHECK:  [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
 148 // CHECK:  ret <4 x i16> [[VCVT]]
 149 int16x4_t test_vcvt_u16_f16 (float16x4_t a) {
 150   return vcvt_u16_f16(a);
 151 }
 152
 153 // CHECK-LABEL: test_vcvtq_u16_f16
 154 // CHECK:  [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
 155 // CHECK:  ret <8 x i16> [[VCVT]]
 156 int16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
 157   return vcvtq_u16_f16(a);
 158 }
 159
 160 // CHECK-LABEL: test_vcvta_s16_f16
 161 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
 162 // CHECK:  ret <4 x i16> [[VCVT]]
 163 int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
 164   return vcvta_s16_f16(a);
 165 }
 166
 167 // CHECK-LABEL: test_vcvta_u16_f16
 168 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
 169 // CHECK:  ret <4 x i16> [[VCVT]]
 170 int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
 171    return vcvta_u16_f16(a);
 172 }
 173
 174 // CHECK-LABEL: test_vcvtaq_s16_f16
 175 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
 176 // CHECK:  ret <8 x i16> [[VCVT]]
 177 int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
 178   return vcvtaq_s16_f16(a);
 179 }
 180
 181 // CHECK-LABEL: test_vcvtm_s16_f16
 182 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
 183 // CHECK:  ret <4 x i16> [[VCVT]]
 184 int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
 185   return vcvtm_s16_f16(a);
 186 }
 187
 188 // CHECK-LABEL: test_vcvtmq_s16_f16
 189 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
 190 // CHECK:  ret <8 x i16> [[VCVT]]
 191 int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
 192   return vcvtmq_s16_f16(a);
 193 }
 194
 195 // CHECK-LABEL: test_vcvtm_u16_f16
 196 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
 197 // CHECK:  ret <4 x i16> [[VCVT]]
 198 uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
 199   return vcvtm_u16_f16(a);
 200 }
 201
 202 // CHECK-LABEL: test_vcvtmq_u16_f16
 203 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
 204 // CHECK:  ret <8 x i16> [[VCVT]]
 205 uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
 206   return vcvtmq_u16_f16(a);
 207 }
 208
 209 // CHECK-LABEL: test_vcvtn_s16_f16
 210 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
 211 // CHECK:  ret <4 x i16> [[VCVT]]
 212 int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
 213   return vcvtn_s16_f16(a);
 214 }
 215
 216 // CHECK-LABEL: test_vcvtnq_s16_f16
 217 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
 218 // CHECK:  ret <8 x i16> [[VCVT]]
 219 int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
 220   return vcvtnq_s16_f16(a);
 221 }
 222
 223 // CHECK-LABEL: test_vcvtn_u16_f16
 224 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
 225 // CHECK:  ret <4 x i16> [[VCVT]]
 226 uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
 227   return vcvtn_u16_f16(a);
 228 }
 229
 230 // CHECK-LABEL: test_vcvtnq_u16_f16
 231 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
 232 // CHECK:  ret <8 x i16> [[VCVT]]
 233 uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
 234   return vcvtnq_u16_f16(a);
 235 }
 236
 237 // CHECK-LABEL: test_vcvtp_s16_f16
 238 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
 239 // CHECK:  ret <4 x i16> [[VCVT]]
 240 int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
 241   return vcvtp_s16_f16(a);
 242 }
 243
 244 // CHECK-LABEL: test_vcvtpq_s16_f16
 245 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
 246 // CHECK:  ret <8 x i16> [[VCVT]]
 247 int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
 248   return vcvtpq_s16_f16(a);
 249 }
 250
 251 // CHECK-LABEL: test_vcvtp_u16_f16
 252 // CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
 253 // CHECK:  ret <4 x i16> [[VCVT]]
 254 uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
 255   return vcvtp_u16_f16(a);
 256 }
 257
 258 // CHECK-LABEL: test_vcvtpq_u16_f16
 259 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
 260 // CHECK:  ret <8 x i16> [[VCVT]]
 261 uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
 262   return vcvtpq_u16_f16(a);
 263 }
 264
 265 // FIXME: Fix the zero constant when fp16 non-storage-only type becomes available.
 266 // CHECK-LABEL: test_vneg_f16
 267 // CHECK:  [[NEG:%.*]] = fneg <4 x half> %a
 268 // CHECK:  ret <4 x half> [[NEG]]
 269 float16x4_t test_vneg_f16(float16x4_t a) {
 270   return vneg_f16(a);
 271 }
 272
 273 // CHECK-LABEL: test_vnegq_f16
 274 // CHECK:  [[NEG:%.*]] = fneg <8 x half> %a
 275 // CHECK:  ret <8 x half> [[NEG]]
 276 float16x8_t test_vnegq_f16(float16x8_t a) {
 277   return vnegq_f16(a);
 278 }
 279
 280 // CHECK-LABEL: test_vrecpe_f16
 281 // CHECK:  [[RCP:%.*]] = call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
 282 // CHECK:  ret <4 x half> [[RCP]]
 283 float16x4_t test_vrecpe_f16(float16x4_t a) {
 284   return vrecpe_f16(a);
 285 }
 286
 287 // CHECK-LABEL: test_vrecpeq_f16
 288 // CHECK:  [[RCP:%.*]] = call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
 289 // CHECK:  ret <8 x half> [[RCP]]
 290 float16x8_t test_vrecpeq_f16(float16x8_t a) {
 291   return vrecpeq_f16(a);
 292 }
 293
 294 // CHECK-LABEL: test_vrnd_f16
 295 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
 296 // CHECK:  ret <4 x half> [[RND]]
 297 float16x4_t test_vrnd_f16(float16x4_t a) {
 298   return vrnd_f16(a);
 299 }
 300
 301 // CHECK-LABEL: test_vrndq_f16
 302 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
 303 // CHECK:  ret <8 x half> [[RND]]
 304 float16x8_t test_vrndq_f16(float16x8_t a) {
 305   return vrndq_f16(a);
 306 }
 307
 308 // CHECK-LABEL: test_vrnda_f16
 309 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
 310 // CHECK:  ret <4 x half> [[RND]]
 311 float16x4_t test_vrnda_f16(float16x4_t a) {
 312   return vrnda_f16(a);
 313 }
 314
 315 // CHECK-LABEL: test_vrndaq_f16
 316 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
 317 // CHECK:  ret <8 x half> [[RND]]
 318 float16x8_t test_vrndaq_f16(float16x8_t a) {
 319   return vrndaq_f16(a);
 320 }
 321
 322 // CHECK-LABEL: test_vrndm_f16
 323 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
 324 // CHECK:  ret <4 x half> [[RND]]
 325 float16x4_t test_vrndm_f16(float16x4_t a) {
 326   return vrndm_f16(a);
 327 }
 328
 329 // CHECK-LABEL: test_vrndmq_f16
 330 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
 331 // CHECK:  ret <8 x half> [[RND]]
 332 float16x8_t test_vrndmq_f16(float16x8_t a) {
 333   return vrndmq_f16(a);
 334 }
 335
 336 // CHECK-LABEL: test_vrndn_f16
 337 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
 338 // CHECK:  ret <4 x half> [[RND]]
 339 float16x4_t test_vrndn_f16(float16x4_t a) {
 340   return vrndn_f16(a);
 341 }
 342
 343 // CHECK-LABEL: test_vrndnq_f16
 344 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
 345 // CHECK:  ret <8 x half> [[RND]]
 346 float16x8_t test_vrndnq_f16(float16x8_t a) {
 347   return vrndnq_f16(a);
 348 }
 349
 350 // CHECK-LABEL: test_vrndp_f16
 351 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
 352 // CHECK:  ret <4 x half> [[RND]]
 353 float16x4_t test_vrndp_f16(float16x4_t a) {
 354   return vrndp_f16(a);
 355 }
 356
 357 // CHECK-LABEL: test_vrndpq_f16
 358 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
 359 // CHECK:  ret <8 x half> [[RND]]
 360 float16x8_t test_vrndpq_f16(float16x8_t a) {
 361   return vrndpq_f16(a);
 362 }
 363
 364 // CHECK-LABEL: test_vrndx_f16
 365 // CHECK:  [[RND:%.*]] =  call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
 366 // CHECK:  ret <4 x half> [[RND]]
 367 float16x4_t test_vrndx_f16(float16x4_t a) {
 368   return vrndx_f16(a);
 369 }
 370
 371 // CHECK-LABEL: test_vrndxq_f16
 372 // CHECK:  [[RND:%.*]] =  call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
 373 // CHECK:  ret <8 x half> [[RND]]
 374 float16x8_t test_vrndxq_f16(float16x8_t a) {
 375   return vrndxq_f16(a);
 376 }
 377
 378 // CHECK-LABEL: test_vrsqrte_f16
 379 // CHECK:  [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
 380 // CHECK:  ret <4 x half> [[RND]]
 381 float16x4_t test_vrsqrte_f16(float16x4_t a) {
 382   return vrsqrte_f16(a);
 383 }
 384
 385 // CHECK-LABEL: test_vrsqrteq_f16
 386 // CHECK:  [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
 387 // CHECK:  ret <8 x half> [[RND]]
 388 float16x8_t test_vrsqrteq_f16(float16x8_t a) {
 389   return vrsqrteq_f16(a);
 390 }
 391
 392 // CHECK-LABEL: test_vadd_f16
 393 // CHECK:  [[ADD:%.*]] = fadd <4 x half> %a, %b
 394 // CHECK:  ret <4 x half> [[ADD]]
 395 float16x4_t test_vadd_f16(float16x4_t a, float16x4_t b) {
 396   return vadd_f16(a, b);
 397 }
 398
 399 // CHECK-LABEL: test_vaddq_f16
 400 // CHECK:  [[ADD:%.*]] = fadd <8 x half> %a, %b
 401 // CHECK:  ret <8 x half> [[ADD]]
 402 float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
 403   return vaddq_f16(a, b);
 404 }
 405
 406 // CHECK-LABEL: test_vabd_f16
 407 // CHECK:  [[ABD:%.*]] = call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
 408 // CHECK:  ret <4 x half> [[ABD]]
 409 float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
 410   return vabd_f16(a, b);
 411 }
 412
 413 // CHECK-LABEL: test_vabdq_f16
 414 // CHECK:  [[ABD:%.*]] = call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
 415 // CHECK:  ret <8 x half> [[ABD]]
 416 float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
 417   return vabdq_f16(a, b);
 418 }
 419
 420 // CHECK-LABEL: test_vcage_f16
 421 // CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
 422 // CHECK:  ret <4 x i16> [[ABS]]
 423 uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
 424   return vcage_f16(a, b);
 425 }
 426
 427 // CHECK-LABEL: test_vcageq_f16
 428 // CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
 429 // CHECK:  ret <8 x i16> [[ABS]]
 430 uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
 431   return vcageq_f16(a, b);
 432 }
 433
 434 // CHECK-LABEL: test_vcagt_f16
 435 // CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
 436 // CHECK:  ret <4 x i16> [[ABS]]
 437 uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
 438   return vcagt_f16(a, b);
 439 }
 440
 441 // CHECK-LABEL: test_vcagtq_f16
 442 // CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
 443 // CHECK:  ret <8 x i16> [[ABS]]
 444 uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
 445   return vcagtq_f16(a, b);
 446 }
 447
 448 // CHECK-LABEL: test_vcale_f16
 449 // CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
 450 // CHECK:  ret <4 x i16> [[ABS]]
 451 uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
 452   return vcale_f16(a, b);
 453 }
 454
 455 // CHECK-LABEL: test_vcaleq_f16
 456 // CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
 457 // CHECK:  ret <8 x i16> [[ABS]]
 458 uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
 459   return vcaleq_f16(a, b);
 460 }
 461
 462 // CHECK-LABEL: test_vcalt_f16
 463 // CHECK:  [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
 464 // CHECK:  ret <4 x i16> [[ABS]]
 465 uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
 466   return vcalt_f16(a, b);
 467 }
 468
 469 // CHECK-LABEL: test_vcaltq_f16
 470 // CHECK:  [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
 471 // CHECK:  ret <8 x i16> [[ABS]]
 472 uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
 473   return vcaltq_f16(a, b);
 474 }
 475
 476 // CHECK-LABEL: test_vceq_f16
 477 // CHECK:  [[TMP1:%.*]] = fcmp oeq <4 x half> %a, %b
 478 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 479 // CHECK:  ret <4 x i16> [[TMP2]]
 480 uint16x4_t test_vceq_f16(float16x4_t a, float16x4_t b) {
 481   return vceq_f16(a, b);
 482 }
 483
 484 // CHECK-LABEL: test_vceqq_f16
 485 // CHECK:  [[TMP1:%.*]] = fcmp oeq <8 x half> %a, %b
 486 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
 487 // CHECK:  ret <8 x i16> [[TMP2]]
 488 uint16x8_t test_vceqq_f16(float16x8_t a, float16x8_t b) {
 489   return vceqq_f16(a, b);
 490 }
 491
 492 // CHECK-LABEL: test_vcge_f16
 493 // CHECK:  [[TMP1:%.*]] = fcmp oge <4 x half> %a, %b
 494 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 495 // CHECK:  ret <4 x i16> [[TMP2]]
 496 uint16x4_t test_vcge_f16(float16x4_t a, float16x4_t b) {
 497   return vcge_f16(a, b);
 498 }
 499
 500 // CHECK-LABEL: test_vcgeq_f16
 501 // CHECK:  [[TMP1:%.*]] = fcmp oge <8 x half> %a, %b
 502 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
 503 // CHECK:  ret <8 x i16> [[TMP2]]
 504 uint16x8_t test_vcgeq_f16(float16x8_t a, float16x8_t b) {
 505   return vcgeq_f16(a, b);
 506 }
 507
 508 // CHECK-LABEL: test_vcgt_f16
 509 // CHECK:  [[TMP1:%.*]] = fcmp ogt <4 x half> %a, %b
 510 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 511 // CHECK:  ret <4 x i16> [[TMP2]]
 512 uint16x4_t test_vcgt_f16(float16x4_t a, float16x4_t b) {
 513   return vcgt_f16(a, b);
 514 }
 515
 516 // CHECK-LABEL: test_vcgtq_f16
 517 // CHECK:  [[TMP1:%.*]] = fcmp ogt <8 x half> %a, %b
 518 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
 519 // CHECK:  ret <8 x i16> [[TMP2]]
 520 uint16x8_t test_vcgtq_f16(float16x8_t a, float16x8_t b) {
 521   return vcgtq_f16(a, b);
 522 }
 523
 524 // CHECK-LABEL: test_vcle_f16
 525 // CHECK:  [[TMP1:%.*]] = fcmp ole <4 x half> %a, %b
 526 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 527 // CHECK:  ret <4 x i16> [[TMP2]]
 528 uint16x4_t test_vcle_f16(float16x4_t a, float16x4_t b) {
 529   return vcle_f16(a, b);
 530 }
 531
 532 // CHECK-LABEL: test_vcleq_f16
 533 // CHECK:  [[TMP1:%.*]] = fcmp ole <8 x half> %a, %b
 534 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
 535 // CHECK:  ret <8 x i16> [[TMP2]]
 536 uint16x8_t test_vcleq_f16(float16x8_t a, float16x8_t b) {
 537   return vcleq_f16(a, b);
 538 }
 539
 540 // CHECK-LABEL: test_vclt_f16
 541 // CHECK:  [[TMP1:%.*]] = fcmp olt <4 x half> %a, %b
 542 // CHECK:  [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
 543 // CHECK:  ret <4 x i16> [[TMP2]]
 544 uint16x4_t test_vclt_f16(float16x4_t a, float16x4_t b) {
 545   return vclt_f16(a, b);
 546 }
 547
 548 // CHECK-LABEL: test_vcltq_f16
 549 // CHECK:  [[TMP1:%.*]] = fcmp olt <8 x half> %a, %b
 550 // CHECK:  [[TMP2:%.*]] = sext <8 x i1> [[TMP1:%.*]] to <8 x i16>
 551 // CHECK:  ret <8 x i16> [[TMP2]]
 552 uint16x8_t test_vcltq_f16(float16x8_t a, float16x8_t b) {
 553   return vcltq_f16(a, b);
 554 }
 555
 556 // CHECK-LABEL: test_vcvt_n_f16_s16
 557 // CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
 558 // CHECK:  ret <4 x half> [[CVT]]
 559 float16x4_t test_vcvt_n_f16_s16(int16x4_t a) {
 560   return vcvt_n_f16_s16(a, 2);
 561 }
 562
 563 // CHECK-LABEL: test_vcvtq_n_f16_s16
 564 // CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
 565 // CHECK:  ret <8 x half> [[CVT]]
 566 float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) {
 567   return vcvtq_n_f16_s16(a, 2);
 568 }
 569
 570 // CHECK-LABEL: test_vcvt_n_f16_u16
 571 // CHECK:  [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
 572 // CHECK:  ret <4 x half> [[CVT]]
 573 float16x4_t test_vcvt_n_f16_u16(uint16x4_t a) {
 574   return vcvt_n_f16_u16(a, 2);
 575 }
 576
 577 // CHECK-LABEL: test_vcvtq_n_f16_u16
 578 // CHECK:  [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
 579 // CHECK:  ret <8 x half> [[CVT]]
 580 float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
 581   return vcvtq_n_f16_u16(a, 2);
 582 }
 583
 584 // CHECK-LABEL: test_vcvt_n_s16_f16
 585 // CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
 586 // CHECK:  ret <4 x i16> [[CVT]]
 587 int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
 588   return vcvt_n_s16_f16(a, 2);
 589 }
 590
 591 // CHECK-LABEL: test_vcvtq_n_s16_f16
 592 // CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
 593 // CHECK:  ret <8 x i16> [[CVT]]
 594 int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
 595   return vcvtq_n_s16_f16(a, 2);
 596 }
 597
 598 // CHECK-LABEL: test_vcvt_n_u16_f16
 599 // CHECK:  [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
 600 // CHECK:  ret <4 x i16> [[CVT]]
 601 uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
 602   return vcvt_n_u16_f16(a, 2);
 603 }
 604
 605 // CHECK-LABEL: test_vcvtq_n_u16_f16
 606 // CHECK:  [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
 607 // CHECK:  ret <8 x i16> [[CVT]]
 608 uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) {
 609   return vcvtq_n_u16_f16(a, 2);
 610 }
 611
 612 // CHECK-LABEL: test_vmax_f16
 613 // CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
 614 // CHECK:  ret <4 x half> [[MAX]]
 615 float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
 616   return vmax_f16(a, b);
 617 }
 618
 619 // CHECK-LABEL: test_vmaxq_f16
 620 // CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
 621 // CHECK:  ret <8 x half> [[MAX]]
 622 float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
 623   return vmaxq_f16(a, b);
 624 }
 625
 626 // CHECK-LABEL: test_vmaxnm_f16
 627 // CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
 628 // CHECK:  ret <4 x half> [[MAX]]
 629 float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
 630   return vmaxnm_f16(a, b);
 631 }
 632
 633 // CHECK-LABEL: test_vmaxnmq_f16
 634 // CHECK:  [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
 635 // CHECK:  ret <8 x half> [[MAX]]
 636 float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
 637   return vmaxnmq_f16(a, b);
 638 }
 639
 640 // CHECK-LABEL: test_vmin_f16
 641 // CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
 642 // CHECK:  ret <4 x half> [[MIN]]
 643 float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
 644   return vmin_f16(a, b);
 645 }
 646
 647 // CHECK-LABEL: test_vminq_f16
 648 // CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
 649 // CHECK:  ret <8 x half> [[MIN]]
 650 float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
 651   return vminq_f16(a, b);
 652 }
 653
 654 // CHECK-LABEL: test_vminnm_f16
 655 // CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
 656 // CHECK:  ret <4 x half> [[MIN]]
 657 float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
 658   return vminnm_f16(a, b);
 659 }
 660
 661 // CHECK-LABEL: test_vminnmq_f16
 662 // CHECK:  [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
 663 // CHECK:  ret <8 x half> [[MIN]]
 664 float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
 665   return vminnmq_f16(a, b);
 666 }
 667
 668 // CHECK-LABEL: test_vmul_f16
 669 // CHECK:  [[MUL:%.*]] = fmul <4 x half> %a, %b
 670 // CHECK:  ret <4 x half> [[MUL]]
 671 float16x4_t test_vmul_f16(float16x4_t a, float16x4_t b) {
 672   return vmul_f16(a, b);
 673 }
 674
 675 // CHECK-LABEL: test_vmulq_f16
 676 // CHECK:  [[MUL:%.*]] = fmul <8 x half> %a, %b
 677 // CHECK:  ret <8 x half> [[MUL]]
 678 float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
 679   return vmulq_f16(a, b);
 680 }
 681
 682 // CHECK-LABEL: test_vpadd_f16
 683 // CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
 684 // CHECK:  ret <4 x half> [[ADD]]
 685 float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
 686   return vpadd_f16(a, b);
 687 }
 688
 689 // CHECK-LABEL: test_vpmax_f16
 690 // CHECK:  [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
 691 // CHECK:  ret <4 x half> [[MAX]]
 692 float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
 693   return vpmax_f16(a, b);
 694 }
 695
 696 // CHECK-LABEL: test_vpmin_f16
 697 // CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
 698 // CHECK:  ret <4 x half> [[MIN]]
 699 float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
 700   return vpmin_f16(a, b);
 701 }
 702
 703 // CHECK-LABEL: test_vrecps_f16
 704 // CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
 705 // CHECK:  ret <4 x half> [[MIN]]
 706 float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
 707  return vrecps_f16(a, b);
 708 }
 709
 710 // CHECK-LABEL: test_vrecpsq_f16
 711 // CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
 712 // CHECK:  ret <8 x half> [[MIN]]
 713 float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
 714   return vrecpsq_f16(a, b);
 715 }
 716
 717 // CHECK-LABEL: test_vrsqrts_f16
 718 // CHECK:  [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
 719 // CHECK:  ret <4 x half> [[MIN]]
 720 float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
 721   return vrsqrts_f16(a, b);
 722 }
 723
 724 // CHECK-LABEL: test_vrsqrtsq_f16
 725 // CHECK:  [[MIN:%.*]] =  call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
 726 // CHECK:  ret <8 x half> [[MIN]]
 727 float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
 728   return vrsqrtsq_f16(a, b);
 729 }
 730
 731 // CHECK-LABEL: test_vsub_f16
 732 // CHECK:  [[ADD:%.*]] = fsub <4 x half> %a, %b
 733 // CHECK:  ret <4 x half> [[ADD]]
 734 float16x4_t test_vsub_f16(float16x4_t a, float16x4_t b) {
 735   return vsub_f16(a, b);
 736 }
 737
 738 // CHECK-LABEL: test_vsubq_f16
 739 // CHECK:  [[ADD:%.*]] = fsub <8 x half> %a, %b
 740 // CHECK:  ret <8 x half> [[ADD]]
 741 float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
 742   return vsubq_f16(a, b);
 743 }
 744
 745 // CHECK-LABEL: test_vfma_f16
 746 // CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
 747 // CHECK:  ret <4 x half> [[ADD]]
 748 float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
 749   return vfma_f16(a, b, c);
 750 }
 751
 752 // CHECK-LABEL: test_vfmaq_f16
 753 // CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
 754 // CHECK:  ret <8 x half> [[ADD]]
 755 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
 756   return vfmaq_f16(a, b, c);
 757 }
 758
 759 // CHECK-LABEL: test_vfms_f16
 760 // CHECK:  [[SUB:%.*]] = fneg <4 x half> %b
 761 // CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
 762 // CHECK:  ret <4 x half> [[ADD]]
 763 float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
 764   return vfms_f16(a, b, c);
 765 }
 766
 767 // CHECK-LABEL: test_vfmsq_f16
 768 // CHECK:  [[SUB:%.*]] = fneg <8 x half> %b
 769 // CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
 770 // CHECK:  ret <8 x half> [[ADD]]
 771 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
 772   return vfmsq_f16(a, b, c);
 773 }
 774
 775 // CHECK-LABEL: test_vmul_lane_f16
 776 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
 777 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 778 // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 779 // CHECK: [[MUL:%.*]] = fmul <4 x half> [[A:%.*]], [[LANE]]
 780 // CHECK: ret <4 x half> [[MUL]]
 781 float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
 782   return vmul_lane_f16(a, b, 3);
 783 }
 784
 785 // CHECK-LABEL: test_vmulq_lane_f16
 786 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
 787 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 788 // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 789 // CHECK: [[MUL:%.*]] = fmul <8 x half> [[A:%.*]], [[LANE]]
 790 // CHECK: ret <8 x half> [[MUL]]
 791 float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
 792   return vmulq_lane_f16(a, b, 3);
 793 }
 794
 795 // CHECK-LABEL: test_vmul_n_f16
 796 // CHECK: [[TMP0:%.*]] = insertelement <4 x half> undef, half [[b:%.*]], i32 0
 797 // CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[b]], i32 1
 798 // CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[b]], i32 2
 799 // CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[b]], i32 3
 800 // CHECK: [[MUL:%.*]]  = fmul <4 x half> %a, [[TMP3]]
 801 // CHECK: ret <4 x half> [[MUL]]
 802 float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
 803   return vmul_n_f16(a, b);
 804 }
 805
 806 // CHECK-LABEL: test_vmulq_n_f16
 807 // CHECK: [[TMP0:%.*]] = insertelement <8 x half> undef, half [[b:%.*]], i32 0
 808 // CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[b]], i32 1
 809 // CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[b]], i32 2
 810 // CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[b]], i32 3
 811 // CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[b]], i32 4
 812 // CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[b]], i32 5
 813 // CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[b]], i32 6
 814 // CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[b]], i32 7
 815 // CHECK: [[MUL:%.*]]  = fmul <8 x half> %a, [[TMP7]]
 816 // CHECK: ret <8 x half> [[MUL]]
 817 float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
 818   return vmulq_n_f16(a, b);
 819 }
 820
 821 // CHECK-LABEL: test_vbsl_f16
 822 // CHECK:  [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 823 // CHECK:  [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 824 // CHECK:  [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
 825 // CHECK:  [[VBSL:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
 826 // CHECK:  [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL]] to <4 x half>
 827 // CHECK:  ret <4 x half> [[TMP3]]
 828 float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
 829   return vbsl_f16(a, b, c);
 830 }
 831
 832 // CHECK-LABEL: test_vbslq_f16
 833 // CHECK:  [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 834 // CHECK:  [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
 835 // CHECK:  [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
 836 // CHECK:  [[VBSL:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
 837 // CHECK:  [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL]] to <8 x half>
 838 // CHECK:  ret <8 x half> [[TMP3]]
 839 float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
 840   return vbslq_f16(a, b, c);
 841 }
 842
 843 // CHECK-LABEL: test_vzip_f16
 844 // CHECK:  [[VZIP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 845 // CHECK:  store <4 x half> [[VZIP0]], ptr [[addr1:%.*]]
 846 // CHECK:  [[VZIP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 847 // CHECK:  store <4 x half> [[VZIP1]], ptr [[addr2:%.*]]
 848 float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
 849   return vzip_f16(a, b);
 850 }
 851
 852 // CHECK-LABEL: test_vzipq_f16
 853 // CHECK:  [[VZIP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 854 // CHECK:  store <8 x half> [[VZIP0]], ptr [[addr1:%.*]]
 855 // CHECK:  [[VZIP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 856 // CHECK:  store <8 x half> [[VZIP1]], ptr [[addr2:%.*]]
 857 float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
 858   return vzipq_f16(a, b);
 859 }
 860
 861 // CHECK-LABEL: test_vuzp_f16
 862 // CHECK:  [[VUZP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 863 // CHECK:  store <4 x half> [[VUZP0]], ptr [[addr1:%.*]]
 864 // CHECK:  [[VUZP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 865 // CHECK:  store <4 x half> [[VUZP1]], ptr [[addr1:%.*]]
 866 float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
 867   return vuzp_f16(a, b);
 868 }
 869
 870 // CHECK-LABEL: test_vuzpq_f16
 871 // CHECK:   [[VUZP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 872 // CHECK:   store <8 x half> [[VUZP0]], ptr [[addr1:%.*]]
 873 // CHECK:   [[VUZP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 874 // CHECK:   store <8 x half> [[VUZP1]], ptr [[addr2:%.*]]
 875 float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
 876   return vuzpq_f16(a, b);
 877 }
 878
 879 // CHECK-LABEL: test_vtrn_f16
 880 // CHECK:   [[VTRN0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 881 // CHECK:   store <4 x half> [[VTRN0]], ptr [[addr1:%.*]]
 882 // CHECK:   [[VTRN1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 883 // CHECK:   store <4 x half> [[VTRN1]], ptr [[addr2:%.*]]
 884 float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
 885   return vtrn_f16(a, b);
 886 }
 887
 888 // CHECK-LABEL: test_vtrnq_f16
 889 // CHECK:   [[VTRN0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 890 // CHECK:   store <8 x half> [[VTRN0]], ptr [[addr1:%.*]]
 891 // CHECK:   [[VTRN1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32>  <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 892 // CHECK:   store <8 x half> [[VTRN1]], ptr [[addr2:%.*]]
 893 float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
 894   return vtrnq_f16(a, b);
 895 }
 896
 897 // CHECK-LABEL: test_vmov_n_f16
 898 // CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half [[ARG:%.*]], i32 0
 899 // CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
 900 // CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
 901 // CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
 902 // CHECK:   ret <4 x half> [[TMP3]]
 903 float16x4_t test_vmov_n_f16(float16_t a) {
 904   return vmov_n_f16(a);
 905 }
 906
 907 // CHECK-LABEL: test_vmovq_n_f16
 908 // CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half [[ARG:%.*]], i32 0
 909 // CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
 910 // CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
 911 // CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
 912 // CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
 913 // CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
 914 // CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
 915 // CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
 916 // CHECK:   ret <8 x half> [[TMP7]]
 917 float16x8_t test_vmovq_n_f16(float16_t a) {
 918   return vmovq_n_f16(a);
 919 }
 920
 921 // CHECK-LABEL: test_vdup_n_f16
 922 // CHECK:   [[TMP0:%.*]] = insertelement <4 x half> undef, half [[ARG:%.*]], i32 0
 923 // CHECK:   [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
 924 // CHECK:   [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
 925 // CHECK:   [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
 926 // CHECK:   ret <4 x half> [[TMP3]]
 927 float16x4_t test_vdup_n_f16(float16_t a) {
 928   return vdup_n_f16(a);
 929 }
 930
 931 // CHECK-LABEL: test_vdupq_n_f16
 932 // CHECK:   [[TMP0:%.*]] = insertelement <8 x half> undef, half [[ARG:%.*]], i32 0
 933 // CHECK:   [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
 934 // CHECK:   [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
 935 // CHECK:   [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
 936 // CHECK:   [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
 937 // CHECK:   [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
 938 // CHECK:   [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
 939 // CHECK:   [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
 940 // CHECK:   ret <8 x half> [[TMP7]]
 941 float16x8_t test_vdupq_n_f16(float16_t a) {
 942   return vdupq_n_f16(a);
 943 }
 944
 945 // CHECK-LABEL: test_vdup_lane_f16
 946 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
 947 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 948 // CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 949 // CHECK:   ret <4 x half> [[LANE]]
 950 float16x4_t test_vdup_lane_f16(float16x4_t a) {
 951   return vdup_lane_f16(a, 3);
 952 }
 953
 954 // CHECK-LABEL: test_vdupq_lane_f16
 955 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8>
 956 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 957 // CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 958 // CHECK:   ret <8 x half> [[LANE]]
 959 float16x8_t test_vdupq_lane_f16(float16x4_t a) {
 960   return vdupq_lane_f16(a, 3);
 961 }
 962
 963 // CHECK-LABEL: @test_vext_f16(
 964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
 965 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
 966 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 967 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
 968 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 969 // CHECK:   ret <4 x half> [[VEXT]]
 970 float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
 971   return vext_f16(a, b, 2);
 972 }
 973
 974 // CHECK-LABEL: @test_vextq_f16(
 975 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
 976 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
 977 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 978 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
 979 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
 980 // CHECK:   ret <8 x half> [[VEXT]]
 981 float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
 982   return vextq_f16(a, b, 5);
 983 }
 984
 985 // CHECK-LABEL: @test_vrev64_f16(
 986 // CHECK:   [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 987 // CHECK:   ret <4 x half> [[SHFL]]
 988 float16x4_t test_vrev64_f16(float16x4_t a) {
 989   return vrev64_f16(a);
 990 }
 991
 992 // CHECK-LABEL: @test_vrev64q_f16(
 993 // CHECK:   [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 994 // CHECK:   ret <8 x half> [[SHFL]]
 995 float16x8_t test_vrev64q_f16(float16x8_t a) {
 996   return vrev64q_f16(a);
 997 }