test/CodeGen/AArch64/arm64-vcvt_f.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC
   3 ; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST
   4 ; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
   5 ; RUN:          -mtriple=arm64-eabi -aarch64-neon-syntax=apple \
   6 ; RUN:          | FileCheck %s --check-prefixes=GISEL,FALLBACK
   7
   8 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32)
   9 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32)
  10 define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
  11 ; CHECK-LABEL: test_vcvt_f64_f32:
  12 ; CHECK:       // %bb.0:
  13 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
  14 ; CHECK-NEXT:    ret
  15 ;
  16 ; GISEL-LABEL: test_vcvt_f64_f32:
  17 ; GISEL:       // %bb.0:
  18 ; GISEL-NEXT:    fcvtl v0.2d, v0.2s
  19 ; GISEL-NEXT:    ret
  20   %vcvt1.i = fpext <2 x float> %x to <2 x double>
  21   ret <2 x double> %vcvt1.i
  22 }
  23
  24 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32)
  25 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32)
  26 define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
  27 ; CHECK-LABEL: test_vcvt_high_f64_f32:
  28 ; CHECK:       // %bb.0:
  29 ; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
  30 ; CHECK-NEXT:    ret
  31 ;
  32 ; GISEL-LABEL: test_vcvt_high_f64_f32:
  33 ; GISEL:       // %bb.0:
  34 ; GISEL-NEXT:    fcvtl2 v0.2d, v0.4s
  35 ; GISEL-NEXT:    ret
  36   %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
  37   %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
  38   ret <2 x double> %vcvt1.i
  39 }
  40
  41 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64)
  42 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64)
  43 define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
  44 ; CHECK-LABEL: test_vcvt_f32_f64:
  45 ; CHECK:       // %bb.0:
  46 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
  47 ; CHECK-NEXT:    ret
  48 ;
  49 ; GISEL-LABEL: test_vcvt_f32_f64:
  50 ; GISEL:       // %bb.0:
  51 ; GISEL-NEXT:    fcvtn v0.2s, v0.2d
  52 ; GISEL-NEXT:    ret
  53   %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
  54   ret <2 x float> %vcvt1.i
  55 }
  56
  57 ; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
  58 ; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
  59 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
  60 ; GENERIC-LABEL: test_vcvt_high_f32_f64:
  61 ; GENERIC:       // %bb.0:
  62 ; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
  63 ; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
  64 ; GENERIC-NEXT:    ret
  65 ;
  66 ; FAST-LABEL: test_vcvt_high_f32_f64:
  67 ; FAST:       // %bb.0:
  68 ; FAST-NEXT:    // implicit-def: $q2
  69 ; FAST-NEXT:    mov.16b v2, v0
  70 ; FAST-NEXT:    fcvtn2 v2.4s, v1.2d
  71 ; FAST-NEXT:    mov.16b v0, v2
  72 ; FAST-NEXT:    ret
  73 ;
  74 ; GISEL-LABEL: test_vcvt_high_f32_f64:
  75 ; GISEL:       // %bb.0:
  76 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
  77 ; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
  78 ; GISEL-NEXT:    ret
  79   %cvt = fptrunc <2 x double> %v to <2 x float>
  80   %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  81   ret <4 x float> %vcvt2.i
  82 }
  83
  84 define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
  85 ; CHECK-LABEL: test_vcvtx_f32_f64:
  86 ; CHECK:       // %bb.0:
  87 ; CHECK-NEXT:    fcvtxn v0.2s, v0.2d
  88 ; CHECK-NEXT:    ret
  89 ;
  90 ; GISEL-LABEL: test_vcvtx_f32_f64:
  91 ; GISEL:       // %bb.0:
  92 ; GISEL-NEXT:    fcvtxn v0.2s, v0.2d
  93 ; GISEL-NEXT:    ret
  94   %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
  95   ret <2 x float> %vcvtx1.i
  96 }
  97
  98 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
  99 ; GENERIC-LABEL: test_vcvtx_high_f32_f64:
 100 ; GENERIC:       // %bb.0:
 101 ; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 102 ; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
 103 ; GENERIC-NEXT:    ret
 104 ;
 105 ; FAST-LABEL: test_vcvtx_high_f32_f64:
 106 ; FAST:       // %bb.0:
 107 ; FAST-NEXT:    // implicit-def: $q2
 108 ; FAST-NEXT:    mov.16b v2, v0
 109 ; FAST-NEXT:    fcvtxn2 v2.4s, v1.2d
 110 ; FAST-NEXT:    mov.16b v0, v2
 111 ; FAST-NEXT:    ret
 112 ;
 113 ; GISEL-LABEL: test_vcvtx_high_f32_f64:
 114 ; GISEL:       // %bb.0:
 115 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 116 ; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
 117 ; GISEL-NEXT:    ret
 118   %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
 119   %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 120   ret <4 x float> %res
 121 }
 122
 123
 124 declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
 125 declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
 126
 127 declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
 128 declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
 129
 130 declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
 131
 132 define i16 @to_half(float %in) {
 133 ; GENERIC-LABEL: to_half:
 134 ; GENERIC:       // %bb.0:
 135 ; GENERIC-NEXT:    fcvt h0, s0
 136 ; GENERIC-NEXT:    fmov w0, s0
 137 ; GENERIC-NEXT:    ret
 138 ;
 139 ; FAST-LABEL: to_half:
 140 ; FAST:       // %bb.0:
 141 ; FAST-NEXT:    sub sp, sp, #16 // =16
 142 ; FAST-NEXT:    .cfi_def_cfa_offset 16
 143 ; FAST-NEXT:    fcvt h1, s0
 144 ; FAST-NEXT:    // implicit-def: $w0
 145 ; FAST-NEXT:    fmov s0, w0
 146 ; FAST-NEXT:    mov.16b v0, v1
 147 ; FAST-NEXT:    fmov w8, s0
 148 ; FAST-NEXT:    mov w0, w8
 149 ; FAST-NEXT:    str w0, [sp, #12] // 4-byte Folded Spill
 150 ; FAST-NEXT:    mov w0, w8
 151 ; FAST-NEXT:    add sp, sp, #16 // =16
 152 ; FAST-NEXT:    ret
 153 ;
 154 ; GISEL-LABEL: to_half:
 155 ; GISEL:       // %bb.0:
 156 ; GISEL-NEXT:    fcvt h0, s0
 157 ; GISEL-NEXT:    fmov w0, s0
 158 ; GISEL-NEXT:    ret
 159   %res = call i16 @llvm.convert.to.fp16.f32(float %in)
 160   ret i16 %res
 161 }
 162
 163 define float @from_half(i16 %in) {
 164 ; GENERIC-LABEL: from_half:
 165 ; GENERIC:       // %bb.0:
 166 ; GENERIC-NEXT:    fmov s0, w0
 167 ; GENERIC-NEXT:    fcvt s0, h0
 168 ; GENERIC-NEXT:    ret
 169 ;
 170 ; FAST-LABEL: from_half:
 171 ; FAST:       // %bb.0:
 172 ; FAST-NEXT:    fmov s0, w0
 173 ; FAST-NEXT:    // kill: def $h0 killed $h0 killed $s0
 174 ; FAST-NEXT:    fcvt s0, h0
 175 ; FAST-NEXT:    ret
 176 ;
 177 ; GISEL-LABEL: from_half:
 178 ; GISEL:       // %bb.0:
 179 ; GISEL-NEXT:    fmov s0, w0
 180 ; GISEL-NEXT:    fcvt s0, h0
 181 ; GISEL-NEXT:    ret
 182   %res = call float @llvm.convert.from.fp16.f32(i16 %in)
 183   ret float %res
 184 }
 185
 186 declare float @llvm.convert.from.fp16.f32(i16) #1
 187 declare i16 @llvm.convert.to.fp16.f32(float) #1