llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC
   3 ; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST
   4 ; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
   5 ; RUN:          -mtriple=arm64-eabi -aarch64-neon-syntax=apple \
   6 ; RUN:          | FileCheck %s --check-prefixes=GISEL,FALLBACK
   7
   8 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32)
   9 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32)
  10 define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
  11 ; CHECK-LABEL: test_vcvt_f64_f32:
  12 ; CHECK:       // %bb.0:
  13 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
  14 ; CHECK-NEXT:    ret
  15 ;
  16 ; GISEL-LABEL: test_vcvt_f64_f32:
  17 ; GISEL:       // %bb.0:
  18 ; GISEL-NEXT:    fcvtl v0.2d, v0.2s
  19 ; GISEL-NEXT:    ret
  20   %vcvt1.i = fpext <2 x float> %x to <2 x double>
  21   ret <2 x double> %vcvt1.i
  22 }
  23
  24 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32)
  25 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32)
  26 define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
  27 ; CHECK-LABEL: test_vcvt_high_f64_f32:
  28 ; CHECK:       // %bb.0:
  29 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  30 ; CHECK-NEXT:    ret
  31 ;
  32 ; GISEL-LABEL: test_vcvt_high_f64_f32:
  33 ; GISEL:       // %bb.0:
  34 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  35 ; GISEL-NEXT:    ret
  36   %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  37   %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
  38   ret <2 x double> %vcvt1.i
  39 }
  40
  41 define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp {
  42 ; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast:
  43 ; CHECK:       // %bb.0:
  44 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  45 ; CHECK-NEXT:    ret
  46 ;
  47 ; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
  48 ; GISEL:       // %bb.0:
  49 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  50 ; GISEL-NEXT:    ret
  51   %bc1 = bitcast <4 x float> %x to <2 x double>
  52   %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
  53   %bc2 = bitcast <1 x double> %ext to <2 x float>
  54   %r = fpext <2 x float> %bc2 to <2 x double>
  55   ret <2 x double> %r
  56 }
  57
  58 define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp {
  59 ; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast:
  60 ; CHECK:       // %bb.0:
  61 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  62 ; CHECK-NEXT:    ret
  63 ;
  64 ; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
  65 ; GISEL:       // %bb.0:
  66 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  67 ; GISEL-NEXT:    ret
  68   %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
  69   %bc2 = bitcast <1 x i64> %ext to <2 x float>
  70   %r = fpext <2 x float> %bc2 to <2 x double>
  71   ret <2 x double> %r
  72 }
  73
  74 define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp {
  75 ; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast:
  76 ; CHECK:       // %bb.0:
  77 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  78 ; CHECK-NEXT:    ret
  79 ;
  80 ; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
  81 ; GISEL:       // %bb.0:
  82 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  83 ; GISEL-NEXT:    ret
  84   %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
  85   %bc2 = bitcast <2 x i32> %ext to <2 x float>
  86   %r = fpext <2 x float> %bc2 to <2 x double>
  87   ret <2 x double> %r
  88 }
  89
  90 define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp {
  91 ; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast:
  92 ; CHECK:       // %bb.0:
  93 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  94 ; CHECK-NEXT:    ret
  95 ;
  96 ; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
  97 ; GISEL:       // %bb.0:
  98 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  99 ; GISEL-NEXT:    ret
 100   %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 101   %bc2 = bitcast <4 x i16> %ext to <2 x float>
 102   %r = fpext <2 x float> %bc2 to <2 x double>
 103   ret <2 x double> %r
 104 }
 105
 106 define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp {
 107 ; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast:
 108 ; CHECK:       // %bb.0:
 109 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
 110 ; CHECK-NEXT:    ret
 111 ;
 112 ; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
 113 ; GISEL:       // %bb.0:
 114 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
 115 ; GISEL-NEXT:    ret
 116   %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 117   %bc2 = bitcast <8 x i8> %ext to <2 x float>
 118   %r = fpext <2 x float> %bc2 to <2 x double>
 119   ret <2 x double> %r
 120 }
 121
 122 define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp {
 123 ; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast:
 124 ; CHECK:       // %bb.0:
 125 ; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
 126 ; CHECK-NEXT:    ret
 127 ;
 128 ; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
 129 ; GISEL:       // %bb.0:
 130 ; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 131 ; GISEL-NEXT:    ret
 132   %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
 133   %bc2 = bitcast <1 x i64> %ext to <4 x half>
 134   %r = fpext <4 x half> %bc2 to <4 x float>
 135   ret <4 x float> %r
 136 }
 137
 138 define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp {
 139 ; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast:
 140 ; CHECK:       // %bb.0:
 141 ; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
 142 ; CHECK-NEXT:    ret
 143 ;
 144 ; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
 145 ; GISEL:       // %bb.0:
 146 ; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 147 ; GISEL-NEXT:    ret
 148   %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 149   %bc2 = bitcast <2 x i32> %ext to <4 x half>
 150   %r = fpext <4 x half> %bc2 to <4 x float>
 151   ret <4 x float> %r
 152 }
 153
 154 define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp {
 155 ; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast:
 156 ; CHECK:       // %bb.0:
 157 ; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
 158 ; CHECK-NEXT:    ret
 159 ;
 160 ; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
 161 ; GISEL:       // %bb.0:
 162 ; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 163 ; GISEL-NEXT:    ret
 164   %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 165   %bc2 = bitcast <4 x i16> %ext to <4 x half>
 166   %r = fpext <4 x half> %bc2 to <4 x float>
 167   ret <4 x float> %r
 168 }
 169
 170 define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp {
 171 ; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast:
 172 ; CHECK:       // %bb.0:
 173 ; CHECK-NEXT:    fcvtl2 v0.4s, v0.8h
 174 ; CHECK-NEXT:    ret
 175 ;
 176 ; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
 177 ; GISEL:       // %bb.0:
 178 ; GISEL-NEXT:    fcvtl2 v0.4s, v0.8h
 179 ; GISEL-NEXT:    ret
 180   %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 181   %bc2 = bitcast <8 x i8> %ext to <4 x half>
 182   %r = fpext <4 x half> %bc2 to <4 x float>
 183   ret <4 x float> %r
 184 }
 185
 186 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64)
 187 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64)
 188 define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
 189 ; CHECK-LABEL: test_vcvt_f32_f64:
 190 ; CHECK:       // %bb.0:
 191 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 192 ; CHECK-NEXT:    ret
 193 ;
 194 ; GISEL-LABEL: test_vcvt_f32_f64:
 195 ; GISEL:       // %bb.0:
 196 ; GISEL-NEXT:    fcvtn v0.2s, v0.2d
 197 ; GISEL-NEXT:    ret
 198   %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
 199   ret <2 x float> %vcvt1.i
 200 }
 201
 202 define half @test_vcvt_f16_f32(<1 x float> %x) {
 203 ; GENERIC-LABEL: test_vcvt_f16_f32:
 204 ; GENERIC:       // %bb.0:
 205 ; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 206 ; GENERIC-NEXT:    fcvt h0, s0
 207 ; GENERIC-NEXT:    ret
 208 ;
 209 ; FAST-LABEL: test_vcvt_f16_f32:
 210 ; FAST:       // %bb.0:
 211 ; FAST-NEXT:    fmov d1, d0
 212 ; FAST-NEXT:    // implicit-def: $q0
 213 ; FAST-NEXT:    fmov d0, d1
 214 ; FAST-NEXT:    // kill: def $s0 killed $s0 killed $q0
 215 ; FAST-NEXT:    fcvt h0, s0
 216 ; FAST-NEXT:    ret
 217 ;
 218 ; GISEL-LABEL: test_vcvt_f16_f32:
 219 ; GISEL:       // %bb.0:
 220 ; GISEL-NEXT:    fmov x8, d0
 221 ; GISEL-NEXT:    fmov s0, w8
 222 ; GISEL-NEXT:    fcvt h0, s0
 223 ; GISEL-NEXT:    ret
 224   %tmp = fptrunc <1 x float> %x to <1 x half>
 225   %elt = extractelement <1 x half> %tmp, i32 0
 226   ret half %elt
 227 }
 228
 229 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
 230 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
 231 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 232 ; GENERIC-LABEL: test_vcvt_high_f32_f64:
 233 ; GENERIC:       // %bb.0:
 234 ; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 235 ; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
 236 ; GENERIC-NEXT:    ret
 237 ;
 238 ; FAST-LABEL: test_vcvt_high_f32_f64:
 239 ; FAST:       // %bb.0:
 240 ; FAST-NEXT:    fmov d2, d0
 241 ; FAST-NEXT:    // implicit-def: $q0
 242 ; FAST-NEXT:    fmov d0, d2
 243 ; FAST-NEXT:    fcvtn2 v0.4s, v1.2d
 244 ; FAST-NEXT:    ret
 245 ;
 246 ; GISEL-LABEL: test_vcvt_high_f32_f64:
 247 ; GISEL:       // %bb.0:
 248 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 249 ; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
 250 ; GISEL-NEXT:    ret
 251   %cvt = fptrunc <2 x double> %v to <2 x float>
 252   %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 253   ret <4 x float> %vcvt2.i
 254 }
 255
 256 define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
 257 ; CHECK-LABEL: test_vcvtx_f32_f64:
 258 ; CHECK:       // %bb.0:
 259 ; CHECK-NEXT:    fcvtxn v0.2s, v0.2d
 260 ; CHECK-NEXT:    ret
 261 ;
 262 ; GISEL-LABEL: test_vcvtx_f32_f64:
 263 ; GISEL:       // %bb.0:
 264 ; GISEL-NEXT:    fcvtxn v0.2s, v0.2d
 265 ; GISEL-NEXT:    ret
 266   %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
 267   ret <2 x float> %vcvtx1.i
 268 }
 269
 270 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 271 ; GENERIC-LABEL: test_vcvtx_high_f32_f64:
 272 ; GENERIC:       // %bb.0:
 273 ; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 274 ; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
 275 ; GENERIC-NEXT:    ret
 276 ;
 277 ; FAST-LABEL: test_vcvtx_high_f32_f64:
 278 ; FAST:       // %bb.0:
 279 ; FAST-NEXT:    fmov d2, d0
 280 ; FAST-NEXT:    // implicit-def: $q0
 281 ; FAST-NEXT:    fmov d0, d2
 282 ; FAST-NEXT:    fcvtxn2 v0.4s, v1.2d
 283 ; FAST-NEXT:    ret
 284 ;
 285 ; GISEL-LABEL: test_vcvtx_high_f32_f64:
 286 ; GISEL:       // %bb.0:
 287 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 288 ; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
 289 ; GISEL-NEXT:    ret
 290   %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
 291   %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 292   ret <4 x float> %res
 293 }
 294
 295
 296 declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
 297 declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
 298
 299 declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
 300 declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
 301
 302 declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
 303
 304 define i16 @to_half(float %in) {
 305 ; GENERIC-LABEL: to_half:
 306 ; GENERIC:       // %bb.0:
 307 ; GENERIC-NEXT:    fcvt h0, s0
 308 ; GENERIC-NEXT:    fmov w0, s0
 309 ; GENERIC-NEXT:    ret
 310 ;
 311 ; FAST-LABEL: to_half:
 312 ; FAST:       // %bb.0:
 313 ; FAST-NEXT:    fcvt h1, s0
 314 ; FAST-NEXT:    // implicit-def: $w0
 315 ; FAST-NEXT:    fmov s0, w0
 316 ; FAST-NEXT:    fmov s0, s1
 317 ; FAST-NEXT:    fmov w0, s0
 318 ; FAST-NEXT:    // kill: def $w1 killed $w0
 319 ; FAST-NEXT:    ret
 320 ;
 321 ; GISEL-LABEL: to_half:
 322 ; GISEL:       // %bb.0:
 323 ; GISEL-NEXT:    fcvt h0, s0
 324 ; GISEL-NEXT:    fmov w0, s0
 325 ; GISEL-NEXT:    ret
 326   %res = call i16 @llvm.convert.to.fp16.f32(float %in)
 327   ret i16 %res
 328 }
 329
 330 define float @from_half(i16 %in) {
 331 ; GENERIC-LABEL: from_half:
 332 ; GENERIC:       // %bb.0:
 333 ; GENERIC-NEXT:    fmov s0, w0
 334 ; GENERIC-NEXT:    fcvt s0, h0
 335 ; GENERIC-NEXT:    ret
 336 ;
 337 ; FAST-LABEL: from_half:
 338 ; FAST:       // %bb.0:
 339 ; FAST-NEXT:    fmov s0, w0
 340 ; FAST-NEXT:    // kill: def $h0 killed $h0 killed $s0
 341 ; FAST-NEXT:    fcvt s0, h0
 342 ; FAST-NEXT:    ret
 343 ;
 344 ; GISEL-LABEL: from_half:
 345 ; GISEL:       // %bb.0:
 346 ; GISEL-NEXT:    fmov s0, w0
 347 ; GISEL-NEXT:    fcvt s0, h0
 348 ; GISEL-NEXT:    ret
 349   %res = call float @llvm.convert.from.fp16.f32(i16 %in)
 350   ret float %res
 351 }
 352
 353 declare float @llvm.convert.from.fp16.f32(i16) #1
 354 declare i16 @llvm.convert.to.fp16.f32(float) #1