test/CodeGen/ARM/vfcmp.ll

   1 ; RUN: llc < %s -march=arm -mattr=+neon > %t
   2 ; RUN: grep {vceq\\.f32} %t | count 1
   3 ; RUN: grep {vcgt\\.f32} %t | count 9
   4 ; RUN: grep {vcge\\.f32} %t | count 5
   5 ; RUN: grep vorr %t | count 4
   6 ; RUN: grep vmvn %t | count 7
   7
   8 ; This tests fcmp operations that do not map directly to NEON instructions.
   9
  10 ; une is implemented with VCEQ/VMVN
  11 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  12         %tmp1 = load <2 x float>* %A
  13         %tmp2 = load <2 x float>* %B
  14         %tmp3 = fcmp une <2 x float> %tmp1, %tmp2
  15         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  16         ret <2 x i32> %tmp4
  17 }
  18
  19 ; olt is implemented with VCGT
  20 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  21         %tmp1 = load <2 x float>* %A
  22         %tmp2 = load <2 x float>* %B
  23         %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
  24         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  25         ret <2 x i32> %tmp4
  26 }
  27
  28 ; ole is implemented with VCGE
  29 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  30         %tmp1 = load <2 x float>* %A
  31         %tmp2 = load <2 x float>* %B
  32         %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
  33         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  34         ret <2 x i32> %tmp4
  35 }
  36
  37 ; uge is implemented with VCGT/VMVN
  38 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  39         %tmp1 = load <2 x float>* %A
  40         %tmp2 = load <2 x float>* %B
  41         %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
  42         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  43         ret <2 x i32> %tmp4
  44 }
  45
  46 ; ule is implemented with VCGT/VMVN
  47 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  48         %tmp1 = load <2 x float>* %A
  49         %tmp2 = load <2 x float>* %B
  50         %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
  51         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  52         ret <2 x i32> %tmp4
  53 }
  54
  55 ; ugt is implemented with VCGE/VMVN
  56 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  57         %tmp1 = load <2 x float>* %A
  58         %tmp2 = load <2 x float>* %B
  59         %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
  60         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  61         ret <2 x i32> %tmp4
  62 }
  63
  64 ; ult is implemented with VCGE/VMVN
  65 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  66         %tmp1 = load <2 x float>* %A
  67         %tmp2 = load <2 x float>* %B
  68         %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
  69         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  70         ret <2 x i32> %tmp4
  71 }
  72
  73 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
  74 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  75         %tmp1 = load <2 x float>* %A
  76         %tmp2 = load <2 x float>* %B
  77         %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
  78         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  79         ret <2 x i32> %tmp4
  80 }
  81
  82 ; one is implemented with VCGT/VCGT/VORR
  83 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  84         %tmp1 = load <2 x float>* %A
  85         %tmp2 = load <2 x float>* %B
  86         %tmp3 = fcmp one <2 x float> %tmp1, %tmp2
  87         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  88         ret <2 x i32> %tmp4
  89 }
  90
  91 ; uno is implemented with VCGT/VCGE/VORR/VMVN
  92 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
  93         %tmp1 = load <2 x float>* %A
  94         %tmp2 = load <2 x float>* %B
  95         %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
  96         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
  97         ret <2 x i32> %tmp4
  98 }
  99
 100 ; ord is implemented with VCGT/VCGE/VORR
 101 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 102         %tmp1 = load <2 x float>* %A
 103         %tmp2 = load <2 x float>* %B
 104         %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
 105         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 106         ret <2 x i32> %tmp4
 107 }