test/CodeGen/AArch64/vector-fcopysign.ll

   1 ; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
   2
   3 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
   4
   5 ;============ v1f32
   6
   7 ; WidenVecRes same
   8 define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
   9 ; CHECK-LABEL: test_copysign_v1f32_v1f32:
  10 ; CHECK-NEXT:    movi.2s v2, #128, lsl #24
  11 ; CHECK-NEXT:    bit.8b v0, v1, v2
  12 ; CHECK-NEXT:    ret
  13   %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
  14   ret <1 x float> %r
  15 }
  16
  17 ; WidenVecRes mismatched
  18 define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
  19 ; CHECK-LABEL: test_copysign_v1f32_v1f64:
  20 ; CHECK-NEXT:    fcvt s1, d1
  21 ; CHECK-NEXT:    movi.4s v2, #128, lsl #24
  22 ; CHECK-NEXT:    bit.16b v0, v1, v2
  23 ; CHECK-NEXT:    ret
  24   %tmp0 = fptrunc <1 x double> %b to <1 x float>
  25   %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
  26   ret <1 x float> %r
  27 }
  28
  29 declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
  30
  31 ;============ v1f64
  32
  33 ; WidenVecOp #1
  34 define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
  35 ; CHECK-LABEL: test_copysign_v1f64_v1f32:
  36 ; CHECK-NEXT:    fcvt d1, s1
  37 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
  38 ; CHECK-NEXT:    fneg.2d v2, v2
  39 ; CHECK-NEXT:    bit.16b v0, v1, v2
  40 ; CHECK-NEXT:    ret
  41   %tmp0 = fpext <1 x float> %b to <1 x double>
  42   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
  43   ret <1 x double> %r
  44 }
  45
  46 define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
  47 ; CHECK-LABEL: test_copysign_v1f64_v1f64:
  48 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
  49 ; CHECK-NEXT:    fneg.2d v2, v2
  50 ; CHECK-NEXT:    bit.16b v0, v1, v2
  51 ; CHECK-NEXT:    ret
  52   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
  53   ret <1 x double> %r
  54 }
  55
  56 declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
  57
  58 ;============ v2f32
  59
  60 define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
  61 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
  62 ; CHECK-NEXT:    movi.2s v2, #128, lsl #24
  63 ; CHECK-NEXT:    bit.8b v0, v1, v2
  64 ; CHECK-NEXT:    ret
  65   %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
  66   ret <2 x float> %r
  67 }
  68
  69 define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
  70 ; CHECK-LABEL: test_copysign_v2f32_v2f64:
  71 ; CHECK-NEXT:    fcvtn v1.2s, v1.2d
  72 ; CHECK-NEXT:    movi.2s v2, #128, lsl #24
  73 ; CHECK-NEXT:    bit.8b v0, v1, v2
  74 ; CHECK-NEXT:    ret
  75   %tmp0 = fptrunc <2 x double> %b to <2 x float>
  76   %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
  77   ret <2 x float> %r
  78 }
  79
  80 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
  81
  82 ;============ v4f32
  83
  84 define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
  85 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
  86 ; CHECK-NEXT:    movi.4s v2, #128, lsl #24
  87 ; CHECK-NEXT:    bit.16b v0, v1, v2
  88 ; CHECK-NEXT:    ret
  89   %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
  90   ret <4 x float> %r
  91 }
  92
  93 ; SplitVecOp #1
  94 define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
  95 ; CHECK-LABEL: test_copysign_v4f32_v4f64:
  96 ; CHECK-NEXT:    mov s3, v0[1]
  97 ; CHECK-NEXT:    movi.4s v4, #128, lsl #24
  98 ; CHECK-NEXT:    fcvt s5, d1
  99 ; CHECK-NEXT:    mov s6, v0[2]
 100 ; CHECK-NEXT:    mov s7, v0[3]
 101 ; CHECK-NEXT:    bit.16b v0, v5, v4
 102 ; CHECK-NEXT:    fcvt s5, d2
 103 ; CHECK-NEXT:    bit.16b v6, v5, v4
 104 ; CHECK-NEXT:    mov d1, v1[1]
 105 ; CHECK-NEXT:    fcvt s1, d1
 106 ; CHECK-NEXT:    bit.16b v3, v1, v4
 107 ; CHECK-NEXT:    mov d1, v2[1]
 108 ; CHECK-NEXT:    fcvt s1, d1
 109 ; CHECK-NEXT:    mov.s v0[1], v3[0]
 110 ; CHECK-NEXT:    mov.s v0[2], v6[0]
 111 ; CHECK-NEXT:    bit.16b v7, v1, v4
 112 ; CHECK-NEXT:    mov.s v0[3], v7[0]
 113 ; CHECK-NEXT:    ret
 114   %tmp0 = fptrunc <4 x double> %b to <4 x float>
 115   %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
 116   ret <4 x float> %r
 117 }
 118
 119 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
 120
 121 ;============ v2f64
 122
 123 define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
 124 ; CHECK-LABEL: test_copysign_v2f64_v232:
 125 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
 126 ; CHECK-NEXT:    fneg.2d v2, v2
 127 ; CHECK-NEXT:    fcvtl v1.2d, v1.2s
 128 ; CHECK-NEXT:    bit.16b v0, v1, v2
 129 ; CHECK-NEXT:    ret
 130   %tmp0 = fpext <2 x float> %b to <2 x double>
 131   %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
 132   ret <2 x double> %r
 133 }
 134
 135 define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
 136 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
 137 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
 138 ; CHECK-NEXT:    fneg.2d v2, v2
 139 ; CHECK-NEXT:    bit.16b v0, v1, v2
 140 ; CHECK-NEXT:    ret
 141   %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
 142   ret <2 x double> %r
 143 }
 144
 145 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
 146
 147 ;============ v4f64
 148
 149 ; SplitVecRes mismatched
 150 define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
 151 ; CHECK-LABEL: test_copysign_v4f64_v4f32:
 152 ; CHECK-NEXT:    movi.2d v3, #0000000000000000
 153 ; CHECK-NEXT:    fcvtl2 v4.2d, v2.4s
 154 ; CHECK-NEXT:    fcvtl v2.2d, v2.2s
 155 ; CHECK-NEXT:    fneg.2d v3, v3
 156 ; CHECK-NEXT:    bit.16b v1, v4, v3
 157 ; CHECK-NEXT:    bit.16b v0, v2, v3
 158 ; CHECK-NEXT:    ret
 159   %tmp0 = fpext <4 x float> %b to <4 x double>
 160   %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
 161   ret <4 x double> %r
 162 }
 163
 164 ; SplitVecRes same
 165 define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
 166 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
 167 ; CHECK-NEXT:    movi.2d v4, #0000000000000000
 168 ; CHECK-NEXT:    fneg.2d v4, v4
 169 ; CHECK-NEXT:    bit.16b v0, v2, v4
 170 ; CHECK-NEXT:    bit.16b v1, v3, v4
 171 ; CHECK-NEXT:    ret
 172   %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
 173   ret <4 x double> %r
 174 }
 175
 176 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
 177
 178 attributes #0 = { nounwind }