llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll

   1 ; RUN: opt -S -instcombine < %s | FileCheck %s
   2
   3 target triple = "aarch64"
   4
   5 define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
   6 ; CHECK-LABEL: @reinterpret_test_h(
   7 ; CHECK-NOT: convert
   8 ; CHECK: ret <vscale x 8 x i1> %a
   9   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
  10   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
  11   ret <vscale x 8 x i1> %2
  12 }
  13
  14 ; Reinterprets are not redundant because the second reinterpret zeros the
  15 ; lanes that don't exist within its input.
  16 define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
  17 ; CHECK-LABEL: @reinterpret_test_h_rev(
  18 ; CHECK: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
  19 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
  20 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
  21   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
  22   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
  23   ret <vscale x 16 x i1> %2
  24 }
  25
  26 define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
  27 ; CHECK-LABEL: @reinterpret_test_w(
  28 ; CHECK-NOT: convert
  29 ; CHECK: ret <vscale x 4 x i1> %a
  30   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
  31   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  32   ret <vscale x 4 x i1> %2
  33 }
  34
  35 ; Reinterprets are not redundant because the second reinterpret zeros the
  36 ; lanes that don't exist within its input.
  37 define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
  38 ; CHECK-LABEL: @reinterpret_test_w_rev(
  39 ; CHECK: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
  40 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
  41 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
  42   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
  43   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
  44   ret <vscale x 16 x i1> %2
  45 }
  46
  47 define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
  48 ; CHECK-LABEL: @reinterpret_test_d(
  49 ; CHECK-NOT: convert
  50 ; CHECK: ret <vscale x 2 x i1> %a
  51   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  52   %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
  53   ret <vscale x 2 x i1> %2
  54 }
  55
  56 ; Reinterprets are not redundant because the second reinterpret zeros the
  57 ; lanes that don't exist within its input.
  58 define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
  59 ; CHECK-LABEL: @reinterpret_test_d_rev(
  60 ; CHECK: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
  61 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
  62 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
  63   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
  64   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
  65   ret <vscale x 16 x i1> %2
  66 }
  67
  68 define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
  69 ; CHECK-LABEL: @reinterpret_test_full_chain(
  70 ; CHECK: ret <vscale x 2 x i1> %a
  71   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  72   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  73   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  74   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
  75   %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
  76   %6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
  77   ret <vscale x 2 x i1> %6
  78 }
  79
  80 ; The last two reinterprets are not necessary, since they are doing the same
  81 ; work as the first two.
  82 define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
  83 ; CHECK-LABEL: @reinterpret_test_partial_chain(
  84 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  85 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  86 ; CHECK-NEXT: ret <vscale x 4 x i1> %2
  87   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  88   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  89   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  90   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
  91   ret <vscale x 4 x i1> %4
  92 }
  93
  94 ; The chain cannot be reduced because of the second reinterpret, which causes
  95 ; zeroing.
  96 define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
  97 ; CHECK-LABEL: @reinterpret_test_irreducible_chain(
  98 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
  99 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 100 ; CHECK-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
 101 ; CHECK-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
 102 ; CHECK-NEXT: ret <vscale x 8 x i1> %4
 103   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
 104   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 105   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
 106   %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
 107   ret <vscale x 8 x i1> %4
 108 }
 109
 110 ; Here, the candidate list is larger than the number of instructions that we
 111 ; end up removing.
 112 define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
 113 ; CHECK-LABEL: @reinterpret_test_keep_some_candidates(
 114 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
 115 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 116 ; CHECK-NEXT: ret <vscale x 4 x i1> %2
 117   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
 118   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 119   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
 120   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
 121   ret <vscale x 4 x i1> %4
 122 }
 123
 124 define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
 125 ; CHECK-LABEL: reinterpret_reductions
 126 ; CHECK-NOT: convert
 127 ; CHECK-NOT: phi <vscale x 16 x i1>
 128 ; CHECK: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
 129 ; CHECK-NOT: convert
 130 ; CHECK: ret
 131
 132 entry:
 133   switch i32 %cond, label %br_phi_c [
 134          i32 43, label %br_phi_a
 135          i32 45, label %br_phi_b
 136   ]
 137
 138 br_phi_a:
 139   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
 140   br label %join
 141
 142 br_phi_b:
 143   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
 144   br label %join
 145
 146 br_phi_c:
 147   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
 148   br label %join
 149
 150 join:
 151   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 152   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 153   ret <vscale x 2 x i1> %pg1
 154 }
 155
 156 ; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
 157 ; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
 158 define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
 159 ; CHECK-LABEL: reinterpret_reductions_1
 160 ; CHECK: convert
 161 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 162 ; CHECK-NOT: phi <vscale x 2 x i1>
 163 ; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 164 ; CHECK: ret
 165
 166 entry:
 167   switch i32 %cond, label %br_phi_c [
 168          i32 43, label %br_phi_a
 169          i32 45, label %br_phi_b
 170   ]
 171
 172 br_phi_a:
 173   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
 174   br label %join
 175
 176 br_phi_b:
 177   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
 178   br label %join
 179
 180 br_phi_c:
 181   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
 182   br label %join
 183
 184 join:
 185   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 186   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 187   ret <vscale x 2 x i1> %pg1
 188 }
 189
 190 ; No transform. Similar to the the test above, but here only two of the arguments need to
 191 ; be converted to svbool.
 192 define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
 193 ; CHECK-LABEL: reinterpret_reductions_2
 194 ; CHECK: convert
 195 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
 196 ; CHECK-NOT: phi <vscale x 2 x i1>
 197 ; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 198 ; CHECK: ret
 199
 200 entry:
 201   switch i32 %cond, label %br_phi_c [
 202          i32 43, label %br_phi_a
 203          i32 45, label %br_phi_b
 204   ]
 205
 206 br_phi_a:
 207   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
 208   br label %join
 209
 210 br_phi_b:
 211   br label %join
 212
 213 br_phi_c:
 214   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
 215   br label %join
 216
 217 join:
 218   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
 219   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 220   ret <vscale x 2 x i1> %pg1
 221 }
 222
 223 ; Similar to reinterpret_reductions but the reinterprets remain because the
 224 ; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
 225 define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
 226 ; CHECK-LABEL: reinterpret_reductions3
 227 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 228 ; CHECK-NOT: phi <vscale x 2 x i1>
 229 ; CHECK: ret <vscale x 16 x i1> %pg
 230
 231 entry:
 232   switch i32 %cond, label %br_phi_c [
 233          i32 43, label %br_phi_a
 234          i32 45, label %br_phi_b
 235   ]
 236
 237 br_phi_a:
 238   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
 239   br label %join
 240
 241 br_phi_b:
 242   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
 243   br label %join
 244
 245 br_phi_c:
 246   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
 247   br label %join
 248
 249 join:
 250   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 251   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 252   ret <vscale x 16 x i1> %pg
 253 }
 254
 255 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
 256 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
 257 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
 258 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
 259 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 260 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
 261
 262 attributes #0 = { "target-features"="+sve" }