test/CodeGen/AArch64/fcvt_combine.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
   2
   3 ; CHECK-LABEL: test1
   4 ; CHECK-NOT: fmul.2s
   5 ; CHECK: fcvtzs.2s v0, v0, #4
   6 ; CHECK: ret
   7 define <2 x i32> @test1(<2 x float> %f) {
   8   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
   9   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
  10   ret <2 x i32> %vcvt.i
  11 }
  12
  13 ; CHECK-LABEL: test2
  14 ; CHECK-NOT: fmul.4s
  15 ; CHECK: fcvtzs.4s v0, v0, #3
  16 ; CHECK: ret
  17 define <4 x i32> @test2(<4 x float> %f) {
  18   %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
  19   %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
  20   ret <4 x i32> %vcvt.i
  21 }
  22
  23 ; CHECK-LABEL: test3
  24 ; CHECK-NOT: fmul.2d
  25 ; CHECK: fcvtzs.2d v0, v0, #5
  26 ; CHECK: ret
  27 define <2 x i64> @test3(<2 x double> %d) {
  28   %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
  29   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
  30   ret <2 x i64> %vcvt.i
  31 }
  32
  33 ; Truncate double to i32
  34 ; CHECK-LABEL: test4
  35 ; CHECK-NOT: fmul.2d v0, v0, #4
  36 ; CHECK: fcvtzs.2d v0, v0
  37 ; CHECK: xtn.2s
  38 ; CHECK: ret
  39 define <2 x i32> @test4(<2 x double> %d) {
  40   %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
  41   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
  42   ret <2 x i32> %vcvt.i
  43 }
  44
  45 ; Truncate float to i16
  46 ; CHECK-LABEL: test5
  47 ; CHECK-NOT: fmul.2s
  48 ; CHECK: fcvtzs.2s v0, v0, #4
  49 ; CHECK: ret
  50 define <2 x i16> @test5(<2 x float> %f) {
  51   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
  52   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
  53   ret <2 x i16> %vcvt.i
  54 }
  55
  56 ; Don't convert float to i64
  57 ; CHECK-LABEL: test6
  58 ; CHECK: fmov.2s v1, #16.00000000
  59 ; CHECK: fmul.2s v0, v0, v1
  60 ; CHECK: fcvtl v0.2d, v0.2s
  61 ; CHECK: fcvtzs.2d v0, v0
  62 ; CHECK: ret
  63 define <2 x i64> @test6(<2 x float> %f) {
  64   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
  65   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
  66   ret <2 x i64> %vcvt.i
  67 }
  68
  69 ; Check unsigned conversion.
  70 ; CHECK-LABEL: test7
  71 ; CHECK-NOT: fmul.2s
  72 ; CHECK: fcvtzu.2s v0, v0, #4
  73 ; CHECK: ret
  74 define <2 x i32> @test7(<2 x float> %f) {
  75   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
  76   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
  77   ret <2 x i32> %vcvt.i
  78 }
  79
  80 ; Test which should not fold due to non-power of 2.
  81 ; CHECK-LABEL: test8
  82 ; CHECK: fmov.2s v1, #17.00000000
  83 ; CHECK: fmul.2s v0, v0, v1
  84 ; CHECK: fcvtzu.2s v0, v0
  85 ; CHECK: ret
  86 define <2 x i32> @test8(<2 x float> %f) {
  87   %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
  88   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
  89   ret <2 x i32> %vcvt.i
  90 }
  91
  92 ; Test which should not fold due to non-matching power of 2.
  93 ; CHECK-LABEL: test9
  94 ; CHECK: fmul.2s v0, v0, v1
  95 ; CHECK: fcvtzu.2s v0, v0
  96 ; CHECK: ret
  97 define <2 x i32> @test9(<2 x float> %f) {
  98   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
  99   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
 100   ret <2 x i32> %vcvt.i
 101 }
 102
 103 ; Combine all undefs.
 104 ; CHECK-LABEL: test10
 105 ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
 106 ; CHECK: ret
 107 define <2 x i32> @test10(<2 x float> %f) {
 108   %mul.i = fmul <2 x float> %f, <float undef, float undef>
 109   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
 110   ret <2 x i32> %vcvt.i
 111 }
 112
 113 ; Combine if mix of undef and pow2.
 114 ; CHECK-LABEL: test11
 115 ; CHECK: fcvtzu.2s v0, v0, #3
 116 ; CHECK: ret
 117 define <2 x i32> @test11(<2 x float> %f) {
 118   %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
 119   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
 120   ret <2 x i32> %vcvt.i
 121 }
 122
 123 ; Don't combine when multiplied by 0.0.
 124 ; CHECK-LABEL: test12
 125 ; CHECK: fmul.2s v0, v0, v1
 126 ; CHECK: fcvtzs.2s v0, v0
 127 ; CHECK: ret
 128 define <2 x i32> @test12(<2 x float> %f) {
 129   %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
 130   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
 131   ret <2 x i32> %vcvt.i
 132 }
 133
 134 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
 135 ; CHECK-LABEL: test13
 136 ; CHECK: fmul.2s v0, v0, v1
 137 ; CHECK: fcvtzs.2s v0, v0
 138 ; CHECK: ret
 139 define <2 x i32> @test13(<2 x float> %f) {
 140   %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
 141   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
 142   ret <2 x i32> %vcvt.i
 143 }
 144
 145 ; Test case where const is max power of 2 (i.e., 2^32).
 146 ; CHECK-LABEL: test14
 147 ; CHECK: fcvtzs.2s v0, v0, #32
 148 ; CHECK: ret
 149 define <2 x i32> @test14(<2 x float> %f) {
 150   %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
 151   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
 152   ret <2 x i32> %vcvt.i
 153 }
 154
 155 ; CHECK-LABEL: test_illegal_fp_to_int:
 156 ; CHECK: fcvtzs.4s v0, v0, #2
 157 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
 158   %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
 159   %val = fptosi <3 x float> %scale to <3 x i32>
 160   ret <3 x i32> %val
 161 }