test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll

   1 ; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s
   2
   3 %struct.float16x4x2_t = type { [2 x <4 x half>] }
   4 %struct.float16x8x2_t = type { [2 x <8 x half>] }
   5
   6 define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
   7 ; CHECKLABEL: test_vabs_f16:
   8 ; CHECK:         vabs.f16 d0, d0
   9 ; CHECK-NEXT:    bx lr
  10 entry:
  11   %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
  12   ret <4 x half> %vabs1.i
  13 }
  14
  15 define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
  16 ; CHECKLABEL: test_vabsq_f16:
  17 ; CHECK:         vabs.f16 q0, q0
  18 ; CHECK-NEXT:    bx lr
  19 entry:
  20   %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
  21   ret <8 x half> %vabs1.i
  22 }
  23
  24 define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
  25 ; CHECKLABEL: test_vceqz_f16:
  26 ; CHECK:         vceq.f16 d0, d0, #0
  27 ; CHECK-NEXT:    bx lr
  28 entry:
  29   %0 = fcmp oeq <4 x half> %a, zeroinitializer
  30   %vceqz.i = sext <4 x i1> %0 to <4 x i16>
  31   ret <4 x i16> %vceqz.i
  32 }
  33
  34 define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) {
  35 ; CHECKLABEL: test_vceqzq_f16:
  36 ; CHECK:         vceq.f16 q0, q0, #0
  37 ; CHECK-NEXT:    bx lr
  38 entry:
  39   %0 = fcmp oeq <8 x half> %a, zeroinitializer
  40   %vceqz.i = sext <8 x i1> %0 to <8 x i16>
  41   ret <8 x i16> %vceqz.i
  42 }
  43
  44 define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) {
  45 ; CHECKLABEL: test_vcgez_f16:
  46 ; CHECK:         vcge.f16 d0, d0, #0
  47 ; CHECK-NEXT:    bx lr
  48 entry:
  49   %0 = fcmp oge <4 x half> %a, zeroinitializer
  50   %vcgez.i = sext <4 x i1> %0 to <4 x i16>
  51   ret <4 x i16> %vcgez.i
  52 }
  53
  54 define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) {
  55 ; CHECKLABEL: test_vcgezq_f16:
  56 ; CHECK:         vcge.f16 q0, q0, #0
  57 ; CHECK-NEXT:    bx lr
  58 entry:
  59   %0 = fcmp oge <8 x half> %a, zeroinitializer
  60   %vcgez.i = sext <8 x i1> %0 to <8 x i16>
  61   ret <8 x i16> %vcgez.i
  62 }
  63
  64 define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) {
  65 ; CHECKLABEL: test_vcgtz_f16:
  66 ; CHECK:         vcgt.f16 d0, d0, #0
  67 ; CHECK-NEXT:    bx lr
  68 entry:
  69   %0 = fcmp ogt <4 x half> %a, zeroinitializer
  70   %vcgtz.i = sext <4 x i1> %0 to <4 x i16>
  71   ret <4 x i16> %vcgtz.i
  72 }
  73
  74 define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) {
  75 ; CHECKLABEL: test_vcgtzq_f16:
  76 ; CHECK:         vcgt.f16 q0, q0, #0
  77 ; CHECK-NEXT:    bx lr
  78 entry:
  79   %0 = fcmp ogt <8 x half> %a, zeroinitializer
  80   %vcgtz.i = sext <8 x i1> %0 to <8 x i16>
  81   ret <8 x i16> %vcgtz.i
  82 }
  83
  84 define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) {
  85 ; CHECKLABEL: test_vclez_f16:
  86 ; CHECK:         vcle.f16 d0, d0, #0
  87 ; CHECK-NEXT:    bx lr
  88 entry:
  89   %0 = fcmp ole <4 x half> %a, zeroinitializer
  90   %vclez.i = sext <4 x i1> %0 to <4 x i16>
  91   ret <4 x i16> %vclez.i
  92 }
  93
  94 define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) {
  95 ; CHECKLABEL: test_vclezq_f16:
  96 ; CHECK:         vcle.f16 q0, q0, #0
  97 ; CHECK-NEXT:    bx lr
  98 entry:
  99   %0 = fcmp ole <8 x half> %a, zeroinitializer
 100   %vclez.i = sext <8 x i1> %0 to <8 x i16>
 101   ret <8 x i16> %vclez.i
 102 }
 103
 104 define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) {
 105 ; CHECKLABEL: test_vcltz_f16:
 106 ; CHECK:         vclt.f16 d0, d0, #0
 107 ; CHECK-NEXT:    bx lr
 108 entry:
 109   %0 = fcmp olt <4 x half> %a, zeroinitializer
 110   %vcltz.i = sext <4 x i1> %0 to <4 x i16>
 111   ret <4 x i16> %vcltz.i
 112 }
 113
 114 define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) {
 115 ; CHECKLABEL: test_vcltzq_f16:
 116 ; CHECK:         vclt.f16 q0, q0, #0
 117 ; CHECK-NEXT:    bx lr
 118 entry:
 119   %0 = fcmp olt <8 x half> %a, zeroinitializer
 120   %vcltz.i = sext <8 x i1> %0 to <8 x i16>
 121   ret <8 x i16> %vcltz.i
 122 }
 123
 124 define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
 125 ; CHECK-LABEL: test_vcvt_f16_s16:
 126 ; CHECK:         vcvt.f16.s16 d0, d0
 127 ; CHECK-NEXT:    bx lr
 128 entry:
 129   %vcvt.i = sitofp <4 x i16> %a to <4 x half>
 130   ret <4 x half> %vcvt.i
 131 }
 132
 133 define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
 134 ; CHECK-LABEL: test_vcvtq_f16_s16:
 135 ; CHECK:         vcvt.f16.s16 q0, q0
 136 ; CHECK-NEXT:    bx lr
 137 entry:
 138   %vcvt.i = sitofp <8 x i16> %a to <8 x half>
 139   ret <8 x half> %vcvt.i
 140 }
 141
 142 define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
 143 ; CHECK-LABEL: test_vcvt_f16_u16:
 144 ; CHECK:         vcvt.f16.u16 d0, d0
 145 ; CHECK-NEXT:    bx lr
 146 entry:
 147   %vcvt.i = uitofp <4 x i16> %a to <4 x half>
 148   ret <4 x half> %vcvt.i
 149 }
 150
 151 define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
 152 ; CHECK-LABEL: test_vcvtq_f16_u16:
 153 ; CHECK:         vcvt.f16.u16 q0, q0
 154 ; CHECK-NEXT:    bx lr
 155 entry:
 156   %vcvt.i = uitofp <8 x i16> %a to <8 x half>
 157   ret <8 x half> %vcvt.i
 158 }
 159
 160 define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
 161 ; CHECK-LABEL: test_vcvt_s16_f16:
 162 ; CHECK:         vcvt.s16.f16 d0, d0
 163 ; CHECK-NEXT:    bx lr
 164 entry:
 165   %vcvt.i = fptosi <4 x half> %a to <4 x i16>
 166   ret <4 x i16> %vcvt.i
 167 }
 168
 169 define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
 170 ; CHECK-LABEL: test_vcvtq_s16_f16:
 171 ; CHECK:         vcvt.s16.f16 q0, q0
 172 ; CHECK-NEXT:    bx lr
 173 entry:
 174   %vcvt.i = fptosi <8 x half> %a to <8 x i16>
 175   ret <8 x i16> %vcvt.i
 176 }
 177
 178 define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
 179 ; CHECK-LABEL: test_vcvt_u16_f16:
 180 ; CHECK:         vcvt.u16.f16 d0, d0
 181 ; CHECK-NEXT:    bx lr
 182 entry:
 183   %vcvt.i = fptoui <4 x half> %a to <4 x i16>
 184   ret <4 x i16> %vcvt.i
 185 }
 186
 187 define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
 188 ; CHECK-LABEL: test_vcvtq_u16_f16:
 189 ; CHECK:         vcvt.u16.f16 q0, q0
 190 ; CHECK-NEXT:    bx lr
 191 entry:
 192   %vcvt.i = fptoui <8 x half> %a to <8 x i16>
 193   ret <8 x i16> %vcvt.i
 194 }
 195
 196 define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
 197 ; CHECK-LABEL: test_vcvta_s16_f16:
 198 ; CHECK:         vcvta.s16.f16 d0, d0
 199 ; CHECK-NEXT:    bx lr
 200 entry:
 201   %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
 202   ret <4 x i16> %vcvta_s16_v1.i
 203 }
 204
 205 define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) {
 206 ; CHECK-LABEL: test_vcvta_u16_f16:
 207 ; CHECK:         vcvta.u16.f16 d0, d0
 208 ; CHECK-NEXT:    bx lr
 209 entry:
 210   %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
 211   ret <4 x i16> %vcvta_u16_v1.i
 212 }
 213
 214 define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
 215 ; CHECK-LABEL: test_vcvtaq_s16_f16:
 216 ; CHECK:         vcvta.s16.f16 q0, q0
 217 ; CHECK-NEXT:    bx lr
 218 entry:
 219   %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
 220   ret <8 x i16> %vcvtaq_s16_v1.i
 221 }
 222
 223 define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) {
 224 ; CHECK-LABEL: test_vcvtm_s16_f16:
 225 ; CHECK:         vcvtm.s16.f16 d0, d0
 226 ; CHECK-NEXT:    bx lr
 227 entry:
 228   %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
 229   ret <4 x i16> %vcvtm_s16_v1.i
 230 }
 231
 232 define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
 233 ; CHECK-LABEL: test_vcvtmq_s16_f16:
 234 ; CHECK:         vcvtm.s16.f16 q0, q0
 235 ; CHECK-NEXT:    bx lr
 236 entry:
 237   %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
 238   ret <8 x i16> %vcvtmq_s16_v1.i
 239 }
 240
 241 define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) {
 242 ; CHECK-LABEL: test_vcvtm_u16_f16:
 243 ; CHECK:         vcvtm.u16.f16 d0, d0
 244 ; CHECK-NEXT:    bx lr
 245 entry:
 246   %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
 247   ret <4 x i16> %vcvtm_u16_v1.i
 248 }
 249
 250 define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
 251 ; CHECK-LABEL: test_vcvtmq_u16_f16:
 252 ; CHECK:         vcvtm.u16.f16 q0, q0
 253 ; CHECK-NEXT:    bx lr
 254 entry:
 255   %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
 256   ret <8 x i16> %vcvtmq_u16_v1.i
 257 }
 258
 259 define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) {
 260 ; CHECK-LABEL: test_vcvtn_s16_f16:
 261 ; CHECK:         vcvtn.s16.f16 d0, d0
 262 ; CHECK-NEXT:    bx lr
 263 entry:
 264   %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
 265   ret <4 x i16> %vcvtn_s16_v1.i
 266 }
 267
 268 define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
 269 ; CHECK-LABEL: test_vcvtnq_s16_f16:
 270 ; CHECK:         vcvtn.s16.f16 q0, q0
 271 ; CHECK-NEXT:    bx lr
 272 entry:
 273   %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
 274   ret <8 x i16> %vcvtnq_s16_v1.i
 275 }
 276
 277 define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) {
 278 ; CHECK-LABEL: test_vcvtn_u16_f16:
 279 ; CHECK:         vcvtn.u16.f16 d0, d0
 280 ; CHECK-NEXT:    bx lr
 281 entry:
 282   %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
 283   ret <4 x i16> %vcvtn_u16_v1.i
 284 }
 285
 286 define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
 287 ; CHECK-LABEL: test_vcvtnq_u16_f16:
 288 ; CHECK:         vcvtn.u16.f16 q0, q0
 289 ; CHECK-NEXT:    bx lr
 290 entry:
 291   %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
 292   ret <8 x i16> %vcvtnq_u16_v1.i
 293 }
 294
 295 define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) {
 296 ; CHECK-LABEL: test_vcvtp_s16_f16:
 297 ; CHECK:         vcvtp.s16.f16 d0, d0
 298 ; CHECK-NEXT:    bx lr
 299 entry:
 300   %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
 301   ret <4 x i16> %vcvtp_s16_v1.i
 302 }
 303
 304 define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
 305 ; CHECK-LABEL: test_vcvtpq_s16_f16:
 306 ; CHECK:         vcvtp.s16.f16 q0, q0
 307 ; CHECK-NEXT:    bx lr
 308 entry:
 309   %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
 310   ret <8 x i16> %vcvtpq_s16_v1.i
 311 }
 312
 313 define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) {
 314 ; CHECK-LABEL: test_vcvtp_u16_f16:
 315 ; CHECK:         vcvtp.u16.f16 d0, d0
 316 ; CHECK-NEXT:    bx lr
 317 entry:
 318   %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
 319   ret <4 x i16> %vcvtp_u16_v1.i
 320 }
 321
 322 define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
 323 ; CHECK-LABEL: test_vcvtpq_u16_f16:
 324 ; CHECK:         vcvtp.u16.f16 q0, q0
 325 ; CHECK-NEXT:    bx lr
 326 entry:
 327   %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
 328   ret <8 x i16> %vcvtpq_u16_v1.i
 329 }
 330
 331 define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) {
 332 ; CHECKLABEL: test_vneg_f16:
 333 ; CHECK:         vneg.f16 d0, d0
 334 ; CHECK-NEXT:    bx lr
 335 entry:
 336   %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
 337   ret <4 x half> %sub.i
 338 }
 339
 340 define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) {
 341 ; CHECKLABEL: test_vnegq_f16:
 342 ; CHECK:         vneg.f16 q0, q0
 343 ; CHECK-NEXT:    bx lr
 344 entry:
 345   %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
 346   ret <8 x half> %sub.i
 347 }
 348
 349 define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) {
 350 ; CHECKLABEL: test_vrecpe_f16:
 351 ; CHECK:         vrecpe.f16 d0, d0
 352 ; CHECK-NEXT:    bx lr
 353 entry:
 354   %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
 355   ret <4 x half> %vrecpe_v1.i
 356 }
 357
 358 define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) {
 359 ; CHECKLABEL: test_vrecpeq_f16:
 360 ; CHECK:         vrecpe.f16 q0, q0
 361 ; CHECK-NEXT:    bx lr
 362 entry:
 363   %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
 364   ret <8 x half> %vrecpeq_v1.i
 365 }
 366
 367 define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) {
 368 ; CHECKLABEL: test_vrnd_f16:
 369 ; CHECK:         vrintz.f16 d0, d0
 370 ; CHECK-NEXT:    bx lr
 371 entry:
 372   %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
 373   ret <4 x half> %vrnd_v1.i
 374 }
 375
 376 define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) {
 377 ; CHECKLABEL: test_vrndq_f16:
 378 ; CHECK:         vrintz.f16 q0, q0
 379 ; CHECK-NEXT:    bx lr
 380 entry:
 381   %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
 382   ret <8 x half> %vrndq_v1.i
 383 }
 384
 385 define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) {
 386 ; CHECKLABEL: test_vrnda_f16:
 387 ; CHECK:         vrinta.f16 d0, d0
 388 ; CHECK-NEXT:    bx lr
 389 entry:
 390   %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
 391   ret <4 x half> %vrnda_v1.i
 392 }
 393
 394 define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) {
 395 ; CHECKLABEL: test_vrndaq_f16:
 396 ; CHECK:         vrinta.f16 q0, q0
 397 ; CHECK-NEXT:    bx lr
 398 entry:
 399   %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
 400   ret <8 x half> %vrndaq_v1.i
 401 }
 402
 403 define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) {
 404 ; CHECKLABEL: test_vrndm_f16:
 405 ; CHECK:         vrintm.f16 d0, d0
 406 ; CHECK-NEXT:    bx lr
 407 entry:
 408   %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
 409   ret <4 x half> %vrndm_v1.i
 410 }
 411
 412 define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) {
 413 ; CHECKLABEL: test_vrndmq_f16:
 414 ; CHECK:         vrintm.f16 q0, q0
 415 ; CHECK-NEXT:    bx lr
 416 entry:
 417   %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
 418   ret <8 x half> %vrndmq_v1.i
 419 }
 420
 421 define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) {
 422 ; CHECKLABEL: test_vrndn_f16:
 423 ; CHECK:         vrintn.f16 d0, d0
 424 ; CHECK-NEXT:    bx lr
 425 entry:
 426   %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
 427   ret <4 x half> %vrndn_v1.i
 428 }
 429
 430 define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) {
 431 ; CHECKLABEL: test_vrndnq_f16:
 432 ; CHECK:         vrintn.f16 q0, q0
 433 ; CHECK-NEXT:    bx lr
 434 entry:
 435   %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
 436   ret <8 x half> %vrndnq_v1.i
 437 }
 438
 439 define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) {
 440 ; CHECKLABEL: test_vrndp_f16:
 441 ; CHECK:         vrintp.f16 d0, d0
 442 ; CHECK-NEXT:    bx lr
 443 entry:
 444   %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
 445   ret <4 x half> %vrndp_v1.i
 446 }
 447
 448 define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) {
 449 ; CHECKLABEL: test_vrndpq_f16:
 450 ; CHECK:         vrintp.f16 q0, q0
 451 ; CHECK-NEXT:    bx lr
 452 entry:
 453   %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
 454   ret <8 x half> %vrndpq_v1.i
 455 }
 456
 457 define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) {
 458 ; CHECKLABEL: test_vrndx_f16:
 459 ; CHECK:         vrintx.f16 d0, d0
 460 ; CHECK-NEXT:    bx lr
 461 entry:
 462   %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
 463   ret <4 x half> %vrndx_v1.i
 464 }
 465
 466 define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) {
 467 ; CHECKLABEL: test_vrndxq_f16:
 468 ; CHECK:         vrintx.f16 q0, q0
 469 ; CHECK-NEXT:    bx lr
 470 entry:
 471   %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
 472   ret <8 x half> %vrndxq_v1.i
 473 }
 474
 475 define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) {
 476 ; CHECKLABEL: test_vrsqrte_f16:
 477 ; CHECK:         vrsqrte.f16 d0, d0
 478 ; CHECK-NEXT:    bx lr
 479 entry:
 480   %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
 481   ret <4 x half> %vrsqrte_v1.i
 482 }
 483
 484 define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) {
 485 ; CHECKLABEL: test_vrsqrteq_f16:
 486 ; CHECK:         vrsqrte.f16 q0, q0
 487 ; CHECK-NEXT:    bx lr
 488 entry:
 489   %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
 490   ret <8 x half> %vrsqrteq_v1.i
 491 }
 492
 493 define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) {
 494 ; CHECKLABEL: test_vadd_f16:
 495 ; CHECK:         vadd.f16 d0, d0, d1
 496 ; CHECK-NEXT:    bx lr
 497 entry:
 498   %add.i = fadd <4 x half> %a, %b
 499   ret <4 x half> %add.i
 500 }
 501
 502 define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
 503 ; CHECKLABEL: test_vaddq_f16:
 504 ; CHECK:         vadd.f16 q0, q0, q1
 505 ; CHECK-NEXT:    bx lr
 506 entry:
 507   %add.i = fadd <8 x half> %a, %b
 508   ret <8 x half> %add.i
 509 }
 510
 511 define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) {
 512 ; CHECKLABEL: test_vabd_f16:
 513 ; CHECK:         vabd.f16 d0, d0, d1
 514 ; CHECK-NEXT:    bx lr
 515 entry:
 516   %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
 517   ret <4 x half> %vabd_v2.i
 518 }
 519
 520 define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) {
 521 ; CHECKLABEL: test_vabdq_f16:
 522 ; CHECK:         vabd.f16 q0, q0, q1
 523 ; CHECK-NEXT:    bx lr
 524 entry:
 525   %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
 526   ret <8 x half> %vabdq_v2.i
 527 }
 528
 529 define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) {
 530 ; CHECKLABEL: test_vcage_f16:
 531 ; CHECK:         vacge.f16 d0, d0, d1
 532 ; CHECK-NEXT:    bx lr
 533 entry:
 534   %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
 535   ret <4 x i16> %vcage_v2.i
 536 }
 537
 538 define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) {
 539 ; CHECKLABEL: test_vcageq_f16:
 540 ; CHECK:         vacge.f16 q0, q0, q1
 541 ; CHECK-NEXT:    bx lr
 542 entry:
 543   %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
 544   ret <8 x i16> %vcageq_v2.i
 545 }
 546
 547 define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) {
 548 ; CHECK-LABEL: test_vcagt_f16:
 549 ; CHECK:         vacgt.f16 d0, d0, d1
 550 ; CHECK-NEXT:    bx lr
 551 entry:
 552   %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
 553   ret <4 x i16> %vcagt_v2.i
 554 }
 555
 556 define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) {
 557 ; CHECK-LABEL: test_vcagtq_f16:
 558 ; CHECK:         vacgt.f16 q0, q0, q1
 559 ; CHECK-NEXT:    bx lr
 560 entry:
 561   %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
 562   ret <8 x i16> %vcagtq_v2.i
 563 }
 564
 565 define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) {
 566 ; CHECKLABEL: test_vcale_f16:
 567 ; CHECK:         vacge.f16 d0, d1, d0
 568 ; CHECK-NEXT:    bx lr
 569 entry:
 570   %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
 571   ret <4 x i16> %vcale_v2.i
 572 }
 573
 574 define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) {
 575 ; CHECKLABEL: test_vcaleq_f16:
 576 ; CHECK:         vacge.f16 q0, q1, q0
 577 ; CHECK-NEXT:    bx lr
 578 entry:
 579   %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
 580   ret <8 x i16> %vcaleq_v2.i
 581 }
 582
 583 define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) {
 584 ; CHECKLABEL: test_vceq_f16:
 585 ; CHECK:         vceq.f16 d0, d0, d1
 586 ; CHECK-NEXT:    bx lr
 587 entry:
 588   %cmp.i = fcmp oeq <4 x half> %a, %b
 589   %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
 590   ret <4 x i16> %sext.i
 591 }
 592
 593 define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) {
 594 ; CHECKLABEL: test_vceqq_f16:
 595 ; CHECK:         vceq.f16 q0, q0, q1
 596 ; CHECK-NEXT:    bx lr
 597 entry:
 598   %cmp.i = fcmp oeq <8 x half> %a, %b
 599   %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
 600   ret <8 x i16> %sext.i
 601 }
 602
 603 define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) {
 604 ; CHECKLABEL: test_vcge_f16:
 605 ; CHECK:         vcge.f16 d0, d0, d1
 606 ; CHECK-NEXT:    bx lr
 607 entry:
 608   %cmp.i = fcmp oge <4 x half> %a, %b
 609   %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
 610   ret <4 x i16> %sext.i
 611 }
 612
 613 define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) {
 614 ; CHECKLABEL: test_vcgeq_f16:
 615 ; CHECK:         vcge.f16 q0, q0, q1
 616 ; CHECK-NEXT:    bx lr
 617 entry:
 618   %cmp.i = fcmp oge <8 x half> %a, %b
 619   %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
 620   ret <8 x i16> %sext.i
 621 }
 622
 623 define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) {
 624 ; CHECKLABEL: test_vcgt_f16:
 625 ; CHECK:         vcgt.f16 d0, d0, d1
 626 ; CHECK-NEXT:    bx lr
 627 entry:
 628   %cmp.i = fcmp ogt <4 x half> %a, %b
 629   %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
 630   ret <4 x i16> %sext.i
 631 }
 632
 633 define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) {
 634 ; CHECKLABEL: test_vcgtq_f16:
 635 ; CHECK:         vcgt.f16 q0, q0, q1
 636 ; CHECK-NEXT:    bx lr
 637 entry:
 638   %cmp.i = fcmp ogt <8 x half> %a, %b
 639   %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
 640   ret <8 x i16> %sext.i
 641 }
 642
 643 define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) {
 644 ; CHECKLABEL: test_vcle_f16:
 645 ; CHECK:         vcge.f16 d0, d1, d0
 646 ; CHECK-NEXT:    bx lr
 647 entry:
 648   %cmp.i = fcmp ole <4 x half> %a, %b
 649   %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
 650   ret <4 x i16> %sext.i
 651 }
 652
 653 define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) {
 654 ; CHECKLABEL: test_vcleq_f16:
 655 ; CHECK:         vcge.f16 q0, q1, q0
 656 ; CHECK-NEXT:    bx lr
 657 entry:
 658   %cmp.i = fcmp ole <8 x half> %a, %b
 659   %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
 660   ret <8 x i16> %sext.i
 661 }
 662
 663 define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) {
 664 ; CHECKLABEL: test_vclt_f16:
 665 ; CHECK:         vcgt.f16 d0, d1, d0
 666 ; CHECK-NEXT:    bx lr
 667 entry:
 668   %cmp.i = fcmp olt <4 x half> %a, %b
 669   %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
 670   ret <4 x i16> %sext.i
 671 }
 672
 673 define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) {
 674 ; CHECKLABEL: test_vcltq_f16:
 675 ; CHECK:         vcgt.f16 q0, q1, q0
 676 ; CHECK-NEXT:    bx lr
 677 entry:
 678   %cmp.i = fcmp olt <8 x half> %a, %b
 679   %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
 680   ret <8 x i16> %sext.i
 681 }
 682
 683 define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) {
 684 ; CHECKLABEL: test_vcvt_n_f16_s16:
 685 ; CHECK:         vcvt.f16.s16 d0, d0, #2
 686 ; CHECK-NEXT:    bx lr
 687 entry:
 688   %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
 689   ret <4 x half> %vcvt_n1
 690 }
 691
 692 declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2
 693
 694 define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
 695 ; CHECKLABEL: test_vcvtq_n_f16_s16:
 696 ; CHECK:         vcvt.f16.s16 q0, q0, #2
 697 ; CHECK-NEXT:    bx lr
 698 entry:
 699   %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
 700   ret <8 x half> %vcvt_n1
 701 }
 702
 703 declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2
 704
 705 define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) {
 706 ; CHECKLABEL: test_vcvt_n_f16_u16:
 707 ; CHECK:         vcvt.f16.u16 d0, d0, #2
 708 ; CHECK-NEXT:    bx lr
 709 entry:
 710   %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
 711   ret <4 x half> %vcvt_n1
 712 }
 713
 714 declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2
 715
 716 define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
 717 ; CHECKLABEL: test_vcvtq_n_f16_u16:
 718 ; CHECK:         vcvt.f16.u16 q0, q0, #2
 719 ; CHECK-NEXT:    bx lr
 720 entry:
 721   %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
 722   ret <8 x half> %vcvt_n1
 723 }
 724
 725 declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2
 726
 727 define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) {
 728 ; CHECKLABEL: test_vcvt_n_s16_f16:
 729 ; CHECK:         vcvt.s16.f16 d0, d0, #2
 730 ; CHECK-NEXT:    bx lr
 731 entry:
 732   %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2)
 733   ret <4 x i16> %vcvt_n1
 734 }
 735
 736 declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2
 737
 738 define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
 739 ; CHECKLABEL: test_vcvtq_n_s16_f16:
 740 ; CHECK:         vcvt.s16.f16 q0, q0, #2
 741 ; CHECK-NEXT:    bx lr
 742 entry:
 743   %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2)
 744   ret <8 x i16> %vcvt_n1
 745 }
 746
 747 declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2
 748
 749 define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) {
 750 ; CHECKLABEL: test_vcvt_n_u16_f16:
 751 ; CHECK:         vcvt.u16.f16 d0, d0, #2
 752 ; CHECK-NEXT:    bx lr
 753 entry:
 754   %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2)
 755   ret <4 x i16> %vcvt_n1
 756 }
 757
 758 declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2
 759
 760 define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
 761 ; CHECKLABEL: test_vcvtq_n_u16_f16:
 762 ; CHECK:         vcvt.u16.f16 q0, q0, #2
 763 ; CHECK-NEXT:    bx lr
 764 entry:
 765   %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2)
 766   ret <8 x i16> %vcvt_n1
 767 }
 768
 769 declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2
 770
 771 define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) {
 772 ; CHECKLABEL: test_vmax_f16:
 773 ; CHECK:         vmax.f16 d0, d0, d1
 774 ; CHECK-NEXT:    bx lr
 775 entry:
 776   %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
 777   ret <4 x half> %vmax_v2.i
 778 }
 779
 780 define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) {
 781 ; CHECKLABEL: test_vmaxq_f16:
 782 ; CHECK:         vmax.f16 q0, q0, q1
 783 ; CHECK-NEXT:    bx lr
 784 entry:
 785   %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
 786   ret <8 x half> %vmaxq_v2.i
 787 }
 788
 789 define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) {
 790 ; CHECK-LABEL: test_vmaxnm_f16:
 791 ; CHECK:         vmaxnm.f16 d0, d0, d1
 792 ; CHECK-NEXT:    bx lr
 793 entry:
 794   %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
 795   ret <4 x half> %vmaxnm_v2.i
 796 }
 797
 798 define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) {
 799 ; CHECK-LABEL: test_vmaxnmq_f16:
 800 ; CHECK:         vmaxnm.f16 q0, q0, q1
 801 ; CHECK-NEXT:    bx lr
 802 entry:
 803   %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
 804   ret <8 x half> %vmaxnmq_v2.i
 805 }
 806
 807 define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) {
 808 ; CHECK-LABEL: test_vmin_f16:
 809 ; CHECK:         vmin.f16 d0, d0, d1
 810 ; CHECK-NEXT:    bx lr
 811 entry:
 812   %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
 813   ret <4 x half> %vmin_v2.i
 814 }
 815
 816 define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) {
 817 ; CHECK-LABEL: test_vminq_f16:
 818 ; CHECK:         vmin.f16 q0, q0, q1
 819 ; CHECK-NEXT:    bx lr
 820 entry:
 821   %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
 822   ret <8 x half> %vminq_v2.i
 823 }
 824
 825 define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) {
 826 ; CHECK-LABEL: test_vminnm_f16:
 827 ; CHECK:         vminnm.f16 d0, d0, d1
 828 ; CHECK-NEXT:    bx lr
 829 entry:
 830   %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
 831   ret <4 x half> %vminnm_v2.i
 832 }
 833
 834 define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) {
 835 ; CHECK-LABEL: test_vminnmq_f16:
 836 ; CHECK:         vminnm.f16 q0, q0, q1
 837 ; CHECK-NEXT:    bx lr
 838 entry:
 839   %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
 840   ret <8 x half> %vminnmq_v2.i
 841 }
 842
 843 define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) {
 844 ; CHECKLABEL: test_vmul_f16:
 845 ; CHECK:         vmul.f16 d0, d0, d1
 846 ; CHECK-NEXT:    bx lr
 847 entry:
 848   %mul.i = fmul <4 x half> %a, %b
 849   ret <4 x half> %mul.i
 850 }
 851
 852 define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
 853 ; CHECKLABEL: test_vmulq_f16:
 854 ; CHECK:         vmul.f16 q0, q0, q1
 855 ; CHECK-NEXT:    bx lr
 856 entry:
 857   %mul.i = fmul <8 x half> %a, %b
 858   ret <8 x half> %mul.i
 859 }
 860
 861 define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) {
 862 ; CHECKLABEL: test_vpadd_f16:
 863 ; CHECK:         vpadd.f16 d0, d0, d1
 864 ; CHECK-NEXT:    bx lr
 865 entry:
 866   %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
 867   ret <4 x half> %vpadd_v2.i
 868 }
 869
 870 define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) {
 871 ; CHECKLABEL: test_vpmax_f16:
 872 ; CHECK:         vpmax.f16 d0, d0, d1
 873 ; CHECK-NEXT:    bx lr
 874 entry:
 875   %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
 876   ret <4 x half> %vpmax_v2.i
 877 }
 878
 879 define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) {
 880 ; CHECKLABEL: test_vpmin_f16:
 881 ; CHECK:         vpmin.f16 d0, d0, d1
 882 ; CHECK-NEXT:    bx lr
 883 entry:
 884   %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
 885   ret <4 x half> %vpmin_v2.i
 886 }
 887
 888 define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) {
 889 ; CHECKLABEL: test_vrecps_f16:
 890 ; CHECK:         vrecps.f16 d0, d0, d1
 891 ; CHECK-NEXT:    bx lr
 892 entry:
 893   %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
 894   ret <4 x half> %vrecps_v2.i
 895 }
 896
 897 define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) {
 898 ; CHECKLABEL: test_vrecpsq_f16:
 899 ; CHECK:         vrecps.f16 q0, q0, q1
 900 ; CHECK-NEXT:    bx lr
 901 entry:
 902   %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
 903   ret <8 x half> %vrecpsq_v2.i
 904 }
 905
 906 define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) {
 907 ; CHECKLABEL: test_vrsqrts_f16:
 908 ; CHECK:         vrsqrts.f16 d0, d0, d1
 909 ; CHECK-NEXT:    bx lr
 910 entry:
 911   %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
 912   ret <4 x half> %vrsqrts_v2.i
 913 }
 914
 915 define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) {
 916 ; CHECKLABEL: test_vrsqrtsq_f16:
 917 ; CHECK:         vrsqrts.f16 q0, q0, q1
 918 ; CHECK-NEXT:    bx lr
 919 entry:
 920   %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
 921   ret <8 x half> %vrsqrtsq_v2.i
 922 }
 923
 924 define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) {
 925 ; CHECKLABEL: test_vsub_f16:
 926 ; CHECK:         vsub.f16 d0, d0, d1
 927 ; CHECK-NEXT:    bx lr
 928 entry:
 929   %sub.i = fsub <4 x half> %a, %b
 930   ret <4 x half> %sub.i
 931 }
 932
 933 define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
 934 ; CHECKLABEL: test_vsubq_f16:
 935 ; CHECK:         vsub.f16 q0, q0, q1
 936 ; CHECK-NEXT:    bx lr
 937 entry:
 938   %sub.i = fsub <8 x half> %a, %b
 939   ret <8 x half> %sub.i
 940 }
 941
 942 define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 943 ; CHECK-LABEL: test_vfma_f16:
 944 ; CHECK:         vfma.f16 d0, d1, d2
 945 ; CHECK-NEXT:    bx lr
 946 entry:
 947   %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
 948   ret <4 x half> %0
 949 }
 950
 951 define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 952 ; CHECK-LABEL: test_vfmaq_f16:
 953 ; CHECK:         vfma.f16 q0, q1, q2
 954 ; CHECK-NEXT:    bx lr
 955 entry:
 956   %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
 957   ret <8 x half> %0
 958 }
 959
 960 define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 961 ; CHECK-LABEL: test_vfms_f16:
 962 ; CHECK:         vneg.f16 [[D16:d[0-9]+]], d1
 963 ; CHECK-NEXT:    vfma.f16 d0, [[D16]], d2
 964 ; CHECK-NEXT:    bx lr
 965 entry:
 966   %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
 967   %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
 968   ret <4 x half> %0
 969 }
 970
 971 define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 972 ; CHECK-LABEL: test_vfmsq_f16:
 973 ; CHECK:         vneg.f16 [[Q8:q[0-9]+]], q1
 974 ; CHECK-NEXT:    vfma.f16 q0, [[Q8]], q2
 975 ; CHECK-NEXT:    bx lr
 976 entry:
 977   %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
 978   %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
 979   ret <8 x half> %0
 980 }
 981
 982 define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
 983 ; CHECK-LABEL: test_vmul_lane_f16:
 984 ; CHECK:         vmul.f16 d0, d0, d1[3]
 985 ; CHECK-NEXT:    bx lr
 986 entry:
 987   %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 988   %mul = fmul <4 x half> %shuffle, %a
 989   ret <4 x half> %mul
 990 }
 991
 992 define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
 993 ; CHECK-LABEL: test_vmulq_lane_f16:
 994 ; CHECK:         vmul.f16 q0, q0, d2[3]
 995 ; CHECK-NEXT:    bx lr
 996 entry:
 997   %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 998   %mul = fmul <8 x half> %shuffle, %a
 999   ret <8 x half> %mul
1000 }
1001
1002 define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
1003 ; CHECK-LABEL: test_vmul_n_f16:
1004 ; CHECK:         vmul.f16 d0, d0, d1[0]
1005 ; CHECK-NEXT:    bx lr
1006 entry:
1007   %0 = bitcast float %b.coerce to i32
1008   %tmp.0.extract.trunc = trunc i32 %0 to i16
1009   %1 = bitcast i16 %tmp.0.extract.trunc to half
1010   %vecinit = insertelement <4 x half> undef, half %1, i32 0
1011   %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1012   %mul = fmul <4 x half> %vecinit4, %a
1013   ret <4 x half> %mul
1014 }
1015
1016 define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
1017 ; CHECK-LABEL: test_vmulq_n_f16:
1018 ; CHECK:         vmul.f16 q0, q0, d2[0]
1019 ; CHECK-NEXT:    bx lr
1020 entry:
1021   %0 = bitcast float %b.coerce to i32
1022   %tmp.0.extract.trunc = trunc i32 %0 to i16
1023   %1 = bitcast i16 %tmp.0.extract.trunc to half
1024   %vecinit = insertelement <8 x half> undef, half %1, i32 0
1025   %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1026   %mul = fmul <8 x half> %vecinit8, %a
1027   ret <8 x half> %mul
1028 }
1029
1030 define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
1031 ; CHECKLABEL: test_vbsl_f16:
1032 ; CHECK:         vbsl d0, d1, d2
1033 ; CHECK-NEXT:    bx lr
1034 entry:
1035   %0 = bitcast <4 x i16> %a to <8 x i8>
1036   %1 = bitcast <4 x half> %b to <8 x i8>
1037   %2 = bitcast <4 x half> %c to <8 x i8>
1038   %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2)
1039   %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half>
1040   ret <4 x half> %3
1041 }
1042
1043 define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) {
1044 ; CHECKLABEL: test_vbslq_f16:
1045 ; CHECK:         vbsl q0, q1, q2
1046 ; CHECK-NEXT:    bx lr
1047 entry:
1048   %0 = bitcast <8 x i16> %a to <16 x i8>
1049   %1 = bitcast <8 x half> %b to <16 x i8>
1050   %2 = bitcast <8 x half> %c to <16 x i8>
1051   %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
1052   %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half>
1053   ret <8 x half> %3
1054 }
1055
1056 define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
1057 ; CHECK-LABEL: test_vzip_f16:
1058 ; CHECK:         vzip.16 d0, d1
1059 ; CHECK-NEXT:    bx lr
1060 entry:
1061   %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1062   %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1063   %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
1064   %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
1065   ret %struct.float16x4x2_t %.fca.0.1.insert
1066 }
1067
1068 define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
1069 ; CHECK-LABEL: test_vzipq_f16:
1070 ; CHECK:         vzip.16 q0, q1
1071 ; CHECK-NEXT:    bx lr
1072 entry:
1073   %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1074   %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1075   %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
1076   %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
1077   ret %struct.float16x8x2_t %.fca.0.1.insert
1078 }
1079
1080 define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
1081 ; CHECK-LABEL: test_vuzp_f16:
1082 ; CHECK:         vuzp.16 d0, d1
1083 ; CHECK-NEXT:    bx lr
1084 entry:
1085   %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1086   %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1087   %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
1088   %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
1089   ret %struct.float16x4x2_t %.fca.0.1.insert
1090 }
1091
1092 define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
1093 ; CHECK-LABEL: test_vuzpq_f16:
1094 ; CHECK:         vuzp.16 q0, q1
1095 ; CHECK-NEXT:    bx lr
1096 entry:
1097   %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1098   %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1099   %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
1100   %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
1101   ret %struct.float16x8x2_t %.fca.0.1.insert
1102 }
1103
1104 define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
1105 ; CHECK-LABEL: test_vtrn_f16:
1106 ; CHECK:         vtrn.16 d0, d1
1107 ; CHECK-NEXT:    bx lr
1108 entry:
1109   %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1110   %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1111   %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
1112   %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
1113   ret %struct.float16x4x2_t %.fca.0.1.insert
1114 }
1115
1116 define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
1117 ; CHECK-LABEL: test_vtrnq_f16:
1118 ; CHECK:         vtrn.16 q0, q1
1119 ; CHECK-NEXT:    bx lr
1120 entry:
1121   %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1122   %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1123   %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
1124   %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
1125   ret %struct.float16x8x2_t %.fca.0.1.insert
1126 }
1127
1128 define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
1129 ; CHECK-LABEL: test_vmov_n_f16:
1130 ; CHECK:         vdup.16 d0, d0[0]
1131 ; CHECK-NEXT:    bx lr
1132 entry:
1133   %0 = bitcast float %a.coerce to i32
1134   %tmp.0.extract.trunc = trunc i32 %0 to i16
1135   %1 = bitcast i16 %tmp.0.extract.trunc to half
1136   %vecinit = insertelement <4 x half> undef, half %1, i32 0
1137   %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1138   ret <4 x half> %vecinit4
1139 }
1140
1141 define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
1142 ; CHECK-LABEL: test_vmovq_n_f16:
1143 ; CHECK:         vdup.16 q0, d0[0]
1144 ; CHECK-NEXT:    bx lr
1145 entry:
1146   %0 = bitcast float %a.coerce to i32
1147   %tmp.0.extract.trunc = trunc i32 %0 to i16
1148   %1 = bitcast i16 %tmp.0.extract.trunc to half
1149   %vecinit = insertelement <8 x half> undef, half %1, i32 0
1150   %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1151   ret <8 x half> %vecinit8
1152 }
1153
1154 define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
1155 ; CHECK-LABEL: test_vdup_n_f16:
1156 ; CHECK:         vdup.16 d0, d0[0]
1157 ; CHECK-NEXT:    bx lr
1158 entry:
1159   %0 = bitcast float %a.coerce to i32
1160   %tmp.0.extract.trunc = trunc i32 %0 to i16
1161   %1 = bitcast i16 %tmp.0.extract.trunc to half
1162   %vecinit = insertelement <4 x half> undef, half %1, i32 0
1163   %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1164   ret <4 x half> %vecinit4
1165 }
1166
1167 define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
1168 ; CHECK-LABEL: test_vdupq_n_f16:
1169 ; CHECK:        vdup.16 q0, d0[0]
1170 ; CHECK-NEXT:    bx lr
1171 entry:
1172   %0 = bitcast float %a.coerce to i32
1173   %tmp.0.extract.trunc = trunc i32 %0 to i16
1174   %1 = bitcast i16 %tmp.0.extract.trunc to half
1175   %vecinit = insertelement <8 x half> undef, half %1, i32 0
1176   %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1177   ret <8 x half> %vecinit8
1178 }
1179
1180 define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
1181 ; CHECK-LABEL: test_vdup_lane_f16:
1182 ; CHECK:         vdup.32 d0, d0[3]
1183 ; CHECK-NEXT:    bx lr
1184 entry:
1185   %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1186   ret <4 x half> %shuffle
1187 }
1188
1189 define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
1190 ; CHECK-LABEL: test_vdupq_lane_f16:
1191 ; CHECK:         vdup.16 q0, d0[3]
1192 ; CHECK-NEXT:    bx lr
1193 entry:
1194   %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1195   ret <8 x half> %shuffle
1196 }
1197
1198 define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
1199 ; CHECK-LABEL: test_vext_f16:
1200 ; CHECK:         vext.16 d0, d0, d1, #2
1201 ; CHECK-NEXT:    bx lr
1202 entry:
1203   %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1204   ret <4 x half> %vext
1205 }
1206
1207 define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
1208 ; CHECK-LABEL: test_vextq_f16:
1209 ; CHECK:         vext.16 q0, q0, q1, #5
1210 ; CHECK-NEXT:    bx lr
1211 entry:
1212   %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1213   ret <8 x half> %vext
1214 }
1215
1216 define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
1217 entry:
1218   %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1219   ret <4 x half> %shuffle.i
1220 }
1221
1222 define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) {
1223 entry:
1224   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1225   ret <8 x half> %shuffle.i
1226 }
1227
1228 define <4 x half> @test_vld_dup1_4xhalf(half* %b) {
1229 ; CHECK-LABEL: test_vld_dup1_4xhalf:
1230 ; CHECK:       vld1.16 {d0[]}, [r0:16]
1231 ; CHECK-NEXT:  bx      lr
1232
1233 entry:
1234   %b1 = load half, half* %b, align 2
1235   %vecinit = insertelement <4 x half> undef, half %b1, i32 0
1236   %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
1237   %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
1238   %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
1239   ret <4 x half> %vecinit4
1240 }
1241
1242 define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr {
1243 ; CHECK-LABEL: test_vld_dup1_8xhalf:
1244 ; CHECK:       vld1.16 {d0[], d1[]}, [r0:16]
1245 ; CHECK-NEXT:  bx      lr
1246
1247 entry:
1248   %b1 = load half, half* %b, align 2
1249   %vecinit = insertelement <8 x half> undef, half %b1, i32 0
1250   %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1251   ret <8 x half> %vecinit8
1252 }
1253
1254 define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
1255 ; CHECK-LABEL: test_shufflevector8xhalf:
1256 ; CHECK:       vmov.f64        d1, d0
1257 ; CHECK-NEXT:  bx      lr
1258
1259 entry:
1260   %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1261   ret <8 x half> %r
1262 }
1263
1264 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1265 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1266 declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
1267 declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>)
1268 declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>)
1269 declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>)
1270 declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>)
1271 declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>)
1272 declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>)
1273 declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>)
1274 declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>)
1275 declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>)
1276 declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>)
1277 declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>)
1278 declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>)
1279 declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>)
1280 declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>)
1281 declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>)
1282 declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>)
1283 declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>)
1284 declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>)
1285 declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>)
1286 declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>)
1287 declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>)
1288 declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>)
1289 declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>)
1290 declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>)
1291 declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>)
1292 declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>)
1293 declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>)
1294 declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>)
1295 declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>)
1296 declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>)
1297 declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>)
1298 declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>)
1299 declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>)
1300 declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>)
1301 declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>)
1302 declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>)
1303 declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>)
1304 declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>)
1305 declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>)
1306 declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>)
1307 declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>)
1308 declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>)
1309 declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>)
1310 declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>)
1311 declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
1312 declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>)
1313 declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>)
1314 declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>)
1315 declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>)
1316 declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>)
1317 declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>)
1318 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1319 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1320 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
1321 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
1322 declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, i32, i32)
1323 declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, i32, i32)
1324 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1325 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1326 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1327 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1328 declare void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, i32, i32)
1329 declare void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, i32, i32)
1330 declare void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1331 declare void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1332 declare void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1333 declare void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1334
1335 define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(i8*, <8 x half>, <8 x half>) {
1336 ; CHECK-LABEL: test_vld2q_lane_f16:
1337 ; CHECK:    vld2.16 {d1[3], d3[3]}, [r0]
1338 ; CHECK-NEXT:    bx lr
1339 entry:
1340   %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2)
1341   ret { <8 x half>, <8 x half> } %3
1342 }
1343
1344 define { <4 x half>, <4 x half> } @test_vld2_lane_f16(i8*, <4 x half>, <4 x half>) {
1345 ; CHECK-LABEL: test_vld2_lane_f16:
1346 ; CHECK:       vld2.16 {d0[3], d1[3]}, [r0]
1347 ; CHECK-NEXT:  bx lr
1348 entry:
1349   %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2)
1350   ret { <4 x half>, <4 x half> } %3
1351 }
1352
1353 define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(i8*, <8 x half>, <8 x half>, <8 x half>) {
1354 ; CHECK-LABEL: test_vld3q_lane_f16:
1355 ; CHECK:       vld3.16 {d1[3], d3[3], d5[3]}, [r0]
1356 ; CHECK-NEXT:  bx lr
1357 entry:
1358   %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2)
1359   ret { <8 x half>, <8 x half>, <8 x half> } %4
1360 }
1361
1362 define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(i8*, <4 x half>, <4 x half>, <4 x half>) {
1363 ; CHECK-LABEL: test_vld3_lane_f16:
1364 ; CHECK:       vld3.16 {d0[3], d1[3], d2[3]}, [r0]
1365 ; CHECK-NEXT:  bx lr
1366 entry:
1367   %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2)
1368   ret { <4 x half>, <4 x half>, <4 x half> } %4
1369 }
1370 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1371 ; CHECK-LABEL: test_vld4lane_v8f16_p0i8:
1372 ; CHECK:       vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0]
1373 ; CHECK-NEXT:  bx lr
1374 entry:
1375   %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2)
1376   ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5
1377 }
1378 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1379 ; CHECK-LABEL: test_vld4lane_v4f16_p0i8:
1380 ; CHECK:       vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0]
1381 ; CHECK-NEXT:  bx lr
1382 entry:
1383  %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2)
1384  ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5
1385 }
1386 define void @test_vst2lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>) {
1387 ; CHECK-LABEL: test_vst2lane_p0i8_v8f16:
1388 ; CHECK:       vst2.16 {d0[0], d2[0]}, [r0]
1389 ; CHECK-NEXT:  bx lr
1390 entry:
1391   tail call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1)
1392   ret void
1393 }
1394 define void @test_vst2lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>) {
1395 ; CHECK-LABEL: test_vst2lane_p0i8_v4f16:
1396 ; CHECK:       vst2.16 {d0[0], d1[0]}, [r0:32]
1397 ; CHECK-NEXT:  bx lr
1398 entry:
1399   tail call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0)
1400   ret void
1401 }
1402 define void @test_vst3lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>) {
1403 ; CHECK-LABEL: test_vst3lane_p0i8_v8f16:
1404 ; CHECK:       vst3.16 {d0[0], d2[0], d4[0]}, [r0]
1405 ; CHECK-NEXT:  bx lr
1406 entry:
1407   tail call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0)
1408   ret void
1409 }
1410 define void @test_vst3lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>) {
1411 ; CHECK-LABEL: test_vst3lane_p0i8_v4f16:
1412 ; CHECK:       vst3.16 {d0[0], d1[0], d2[0]}, [r0]
1413 ; CHECK-NEXT:  bx lr
1414 entry:
1415   tail call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0)
1416   ret void
1417 }
1418 define void @test_vst4lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1419 ; CHECK-LABEL: test_vst4lane_p0i8_v8f16:
1420 ; CHECK:       vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64]
1421 ; CHECK-NEXT:  bx lr
1422 entry:
1423   tail call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0)
1424   ret void
1425 }
1426 define void @test_vst4lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1427 ; CHECK-LABEL: test_vst4lane_p0i8_v4f16:
1428 ; CHECK:       vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64]
1429 ; CHECK-NEXT:  bx lr
1430 entry:
1431   tail call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0)
1432   ret void
1433 }