test/CodeGen/ARM/vfcmp.ll

   1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
   2
   3 ; This tests fcmp operations that do not map directly to NEON instructions.
   4
   5 ; une is implemented with VCEQ/VMVN
   6 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
   7 ;CHECK: vcunef32:
   8 ;CHECK: vceq.f32
   9 ;CHECK-NEXT: vmvn
  10         %tmp1 = load <2 x float>* %A
  11         %tmp2 = load <2 x float>* %B
  12         %tmp3 = fcmp une <2 x float> %tmp1, %tmp2
  13         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  14         ret <2 x i32> %tmp4
  15 }
  16
  17 ; olt is implemented with VCGT
  18 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  19 ;CHECK: vcoltf32:
  20 ;CHECK: vcgt.f32
  21         %tmp1 = load <2 x float>* %A
  22         %tmp2 = load <2 x float>* %B
  23         %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
  24         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  25         ret <2 x i32> %tmp4
  26 }
  27
  28 ; ole is implemented with VCGE
  29 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  30 ;CHECK: vcolef32:
  31 ;CHECK: vcge.f32
  32         %tmp1 = load <2 x float>* %A
  33         %tmp2 = load <2 x float>* %B
  34         %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
  35         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  36         ret <2 x i32> %tmp4
  37 }
  38
  39 ; uge is implemented with VCGT/VMVN
  40 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  41 ;CHECK: vcugef32:
  42 ;CHECK: vcgt.f32
  43 ;CHECK-NEXT: vmvn
  44         %tmp1 = load <2 x float>* %A
  45         %tmp2 = load <2 x float>* %B
  46         %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
  47         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  48         ret <2 x i32> %tmp4
  49 }
  50
  51 ; ule is implemented with VCGT/VMVN
  52 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  53 ;CHECK: vculef32:
  54 ;CHECK: vcgt.f32
  55 ;CHECK-NEXT: vmvn
  56         %tmp1 = load <2 x float>* %A
  57         %tmp2 = load <2 x float>* %B
  58         %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
  59         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  60         ret <2 x i32> %tmp4
  61 }
  62
  63 ; ugt is implemented with VCGE/VMVN
  64 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  65 ;CHECK: vcugtf32:
  66 ;CHECK: vcge.f32
  67 ;CHECK-NEXT: vmvn
  68         %tmp1 = load <2 x float>* %A
  69         %tmp2 = load <2 x float>* %B
  70         %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
  71         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  72         ret <2 x i32> %tmp4
  73 }
  74
  75 ; ult is implemented with VCGE/VMVN
  76 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  77 ;CHECK: vcultf32:
  78 ;CHECK: vcge.f32
  79 ;CHECK-NEXT: vmvn
  80         %tmp1 = load <2 x float>* %A
  81         %tmp2 = load <2 x float>* %B
  82         %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
  83         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  84         ret <2 x i32> %tmp4
  85 }
  86
  87 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
  88 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  89 ;CHECK: vcueqf32:
  90 ;CHECK: vcgt.f32
  91 ;CHECK-NEXT: vcgt.f32
  92 ;CHECK-NEXT: vorr
  93 ;CHECK-NEXT: vmvn
  94         %tmp1 = load <2 x float>* %A
  95         %tmp2 = load <2 x float>* %B
  96         %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
  97         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  98         ret <2 x i32> %tmp4
  99 }
 100
 101 ; one is implemented with VCGT/VCGT/VORR
 102 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 103 ;CHECK: vconef32:
 104 ;CHECK: vcgt.f32
 105 ;CHECK-NEXT: vcgt.f32
 106 ;CHECK-NEXT: vorr
 107         %tmp1 = load <2 x float>* %A
 108         %tmp2 = load <2 x float>* %B
 109         %tmp3 = fcmp one <2 x float> %tmp1, %tmp2
 110         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 111         ret <2 x i32> %tmp4
 112 }
 113
 114 ; uno is implemented with VCGT/VCGE/VORR/VMVN
 115 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
 116 ;CHECK: vcunof32:
 117 ;CHECK: vcge.f32
 118 ;CHECK-NEXT: vcgt.f32
 119 ;CHECK-NEXT: vorr
 120 ;CHECK-NEXT: vmvn
 121         %tmp1 = load <2 x float>* %A
 122         %tmp2 = load <2 x float>* %B
 123         %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
 124         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 125         ret <2 x i32> %tmp4
 126 }
 127
 128 ; ord is implemented with VCGT/VCGE/VORR
 129 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 130 ;CHECK: vcordf32:
 131 ;CHECK: vcge.f32
 132 ;CHECK-NEXT: vcgt.f32
 133 ;CHECK-NEXT: vorr
 134         %tmp1 = load <2 x float>* %A
 135         %tmp2 = load <2 x float>* %B
 136         %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
 137         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 138         ret <2 x i32> %tmp4
 139 }