llvm/test/CodeGen/X86/pseudo_cmov_lower.ll

   1 ; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s
   2
   3 ; This test checks that only a single js gets generated in the final code
   4 ; for lowering the CMOV pseudos that get created for this IR.
   5 ; CHECK-LABEL: foo1:
   6 ; CHECK: js
   7 ; CHECK-NOT: js
   8 define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
   9 entry:
  10   %cmp = icmp slt i32 %v1, 0
  11   %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
  12   %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
  13   %sub = sub i32 %v1.v2, %v2.v3
  14   ret i32 %sub
  15 }
  16
  17 ; This test checks that only a single js gets generated in the final code
  18 ; for lowering the CMOV pseudos that get created for this IR. This makes
  19 ; sure the code for the lowering for opposite conditions gets tested.
  20 ; CHECK-LABEL: foo11:
  21 ; CHECK: js
  22 ; CHECK-NOT: js
  23 ; CHECK-NOT: jns
  24 define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
  25 entry:
  26   %cmp1 = icmp slt i32 %v1, 0
  27   %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
  28   %cmp2 = icmp sge i32 %v1, 0
  29   %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
  30   %sub = sub i32 %v1.v2, %v2.v3
  31   ret i32 %sub
  32 }
  33
  34 ; This test checks that only a single js gets generated in the final code
  35 ; for lowering the CMOV pseudos that get created for this IR.
  36 ; CHECK-LABEL: foo2:
  37 ; CHECK: js
  38 ; CHECK-NOT: js
  39 define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
  40 entry:
  41   %cmp = icmp slt i8 %v1, 0
  42   %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
  43   %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
  44   %t1 = sext i8 %v2.v3 to i32
  45   %t2 = sext i8 %v1.v2 to i32
  46   %sub = sub i32 %t1, %t2
  47   ret i32 %sub
  48 }
  49
  50 ; This test checks that only a single js gets generated in the final code
  51 ; for lowering the CMOV pseudos that get created for this IR.
  52 ; CHECK-LABEL: foo3:
  53 ; CHECK: js
  54 ; CHECK-NOT: js
  55 define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
  56 entry:
  57   %cmp = icmp slt i16 %v1, 0
  58   %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
  59   %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
  60   %t1 = sext i16 %v2.v3 to i32
  61   %t2 = sext i16 %v1.v2 to i32
  62   %sub = sub i32 %t1, %t2
  63   ret i32 %sub
  64 }
  65
  66 ; This test checks that only a single js gets generated in the final code
  67 ; for lowering the CMOV pseudos that get created for this IR.
  68 ; CHECK-LABEL: foo4:
  69 ; CHECK: js
  70 ; CHECK-NOT: js
  71 define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
  72 entry:
  73   %cmp = icmp slt i32 %v1, 0
  74   %t1 = select i1 %cmp, float %v2, float %v3
  75   %t2 = select i1 %cmp, float %v3, float %v4
  76   %sub = fsub float %t1, %t2
  77   ret float %sub
  78 }
  79
  80 ; This test checks that only a single je gets generated in the final code
  81 ; for lowering the CMOV pseudos that get created for this IR.
  82 ; CHECK-LABEL: foo5:
  83 ; CHECK: je
  84 ; CHECK-NOT: je
  85 define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
  86 entry:
  87   %cmp = icmp eq i32 %v1, 0
  88   %t1 = select i1 %cmp, double %v2, double %v3
  89   %t2 = select i1 %cmp, double %v3, double %v4
  90   %sub = fsub double %t1, %t2
  91   ret double %sub
  92 }
  93
  94 ; This test checks that only a single je gets generated in the final code
  95 ; for lowering the CMOV pseudos that get created for this IR.
  96 ; CHECK-LABEL: foo6:
  97 ; CHECK: je
  98 ; CHECK-NOT: je
  99 define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
 100 entry:
 101   %cmp = icmp eq i32 %v1, 0
 102   %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
 103   %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
 104   %sub = fsub <4 x float> %t1, %t2
 105   ret <4 x float> %sub
 106 }
 107
 108 ; This test checks that only a single je gets generated in the final code
 109 ; for lowering the CMOV pseudos that get created for this IR.
 110 ; CHECK-LABEL: foo7:
 111 ; CHECK: je
 112 ; CHECK-NOT: je
 113 define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
 114 entry:
 115   %cmp = icmp eq i32 %v1, 0
 116   %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
 117   %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
 118   %sub = fsub <2 x double> %t1, %t2
 119   ret <2 x double> %sub
 120 }
 121
 122 ; This test checks that only a single ja gets generated in the final code
 123 ; for lowering the CMOV pseudos that get created for this IR. This combines
 124 ; all the supported types together into one long string of selects based
 125 ; on the same condition.
 126 ; CHECK-LABEL: foo8:
 127 ; CHECK: ja
 128 ; CHECK-NOT: ja
 129 define void @foo8(i32 %v1,
 130                   i8 %v2, i8 %v3,
 131                   i16 %v12, i16 %v13,
 132                   i32 %v22, i32 %v23,
 133                   float %v32, float %v33,
 134                   double %v42, double %v43,
 135                   <4 x float> %v52, <4 x float> %v53,
 136                   <2 x double> %v62, <2 x double> %v63,
 137                   <8 x float> %v72, <8 x float> %v73,
 138                   <4 x double> %v82, <4 x double> %v83,
 139                   <16 x float> %v92, <16 x float> %v93,
 140                   <8 x double> %v102, <8 x double> %v103,
 141                   ptr %dst) nounwind {
 142 entry:
 143   %add.ptr11 = getelementptr inbounds i8, ptr %dst, i32 2
 144
 145   %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
 146
 147   %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
 148
 149   %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
 150
 151   %add.ptr51 = getelementptr inbounds i8, ptr %dst, i32 32
 152
 153   %add.ptr61 = getelementptr inbounds i8, ptr %dst, i32 48
 154
 155   %add.ptr71 = getelementptr inbounds i8, ptr %dst, i32 64
 156
 157   %add.ptr81 = getelementptr inbounds i8, ptr %dst, i32 128
 158
 159   %add.ptr91 = getelementptr inbounds i8, ptr %dst, i32 64
 160
 161   %add.ptr101 = getelementptr inbounds i8, ptr %dst, i32 128
 162
 163   ; These operations are necessary, because select of two single use loads
 164   ; ends up getting optimized into a select of two leas, followed by a
 165   ; single load of the selected address.
 166   %t13 = xor i16 %v13, 11
 167   %t23 = xor i32 %v23, 1234
 168   %t33 = fadd float %v33, %v32
 169   %t43 = fadd double %v43, %v42
 170   %t53 = fadd <4 x float> %v53, %v52
 171   %t63 = fadd <2 x double> %v63, %v62
 172   %t73 = fsub <8 x float> %v73, %v72
 173   %t83 = fsub <4 x double> %v83, %v82
 174   %t93 = fsub <16 x float> %v93, %v92
 175   %t103 = fsub <8 x double> %v103, %v102
 176
 177   %cmp = icmp ugt i32 %v1, 31
 178   %t11 = select i1 %cmp, i16 %v12, i16 %t13
 179   %t21 = select i1 %cmp, i32 %v22, i32 %t23
 180   %t31 = select i1 %cmp, float %v32, float %t33
 181   %t41 = select i1 %cmp, double %v42, double %t43
 182   %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
 183   %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
 184   %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
 185   %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
 186   %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
 187   %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
 188
 189   store i16 %t11, ptr %add.ptr11, align 2
 190   store i32 %t21, ptr %add.ptr21, align 4
 191   store float %t31, ptr %add.ptr31, align 4
 192   store double %t41, ptr %add.ptr41, align 8
 193   store <4 x float> %t51, ptr %add.ptr51, align 16
 194   store <2 x double> %t61, ptr %add.ptr61, align 16
 195   store <8 x float> %t71, ptr %add.ptr71, align 32
 196   store <4 x double> %t81, ptr %add.ptr81, align 32
 197   store <16 x float> %t91, ptr %add.ptr91, align 32
 198   store <8 x double> %t101, ptr %add.ptr101, align 32
 199
 200   ret void
 201 }
 202
 203 ; This test checks that only a single ja gets generated in the final code
 204 ; for lowering the CMOV pseudos that get created for this IR.
 205 ; on the same condition.
 206 ; Contrary to my expectations, this doesn't exercise the code for
 207 ; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1.  Instead the selects all
 208 ; get lowered into vector length number of selects, which all eventually turn
 209 ; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
 210 ; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
 211 ; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
 212 ; pseudo-opcodes to be generated, this test should be replaced with one that
 213 ; tests those opcodes.
 214 ;
 215 ; CHECK-LABEL: foo9:
 216 ; CHECK: ja
 217 ; CHECK-NOT: ja
 218 define void @foo9(i32 %v1,
 219                   <8 x i1> %v12, <8 x i1> %v13,
 220                   <16 x i1> %v22, <16 x i1> %v23,
 221                   <32 x i1> %v32, <32 x i1> %v33,
 222                   <64 x i1> %v42, <64 x i1> %v43,
 223                   ptr %dst) nounwind {
 224 entry:
 225
 226   %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
 227
 228   %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
 229
 230   %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
 231
 232   ; These operations are necessary, because select of two single use loads
 233   ; ends up getting optimized into a select of two leas, followed by a
 234   ; single load of the selected address.
 235   %t13 = xor <8 x i1> %v13, %v12
 236   %t23 = xor <16 x i1> %v23, %v22
 237   %t33 = xor <32 x i1> %v33, %v32
 238   %t43 = xor <64 x i1> %v43, %v42
 239
 240   %cmp = icmp ugt i32 %v1, 31
 241   %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
 242   %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
 243   %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
 244   %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
 245
 246   store <8 x i1> %t11, ptr %dst, align 16
 247   store <16 x i1> %t21, ptr %add.ptr21, align 4
 248   store <32 x i1> %t31, ptr %add.ptr31, align 8
 249   store <64 x i1> %t41, ptr %add.ptr41, align 16
 250
 251   ret void
 252 }