test/CodeGen/AArch64/arm64-rev.ll

   1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
   2
   3 define i32 @test_rev_w(i32 %a) nounwind {
   4 entry:
   5 ; CHECK-LABEL: test_rev_w:
   6 ; CHECK: rev w0, w0
   7   %0 = tail call i32 @llvm.bswap.i32(i32 %a)
   8   ret i32 %0
   9 }
  10
  11 define i64 @test_rev_x(i64 %a) nounwind {
  12 entry:
  13 ; CHECK-LABEL: test_rev_x:
  14 ; CHECK: rev x0, x0
  15   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  16   ret i64 %0
  17 }
  18
  19 ; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
  20 ; of %a are zero. This optimizes rev + lsr 16 to rev16.
  21 define i32 @test_rev_w_srl16(i16 %a) {
  22 entry:
  23 ; CHECK-LABEL: test_rev_w_srl16:
  24 ; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
  25 ; CHECK: rev16 w0, [[REG]]
  26 ; CHECK-NOT: lsr
  27   %0 = zext i16 %a to i32
  28   %1 = tail call i32 @llvm.bswap.i32(i32 %0)
  29   %2 = lshr i32 %1, 16
  30   ret i32 %2
  31 }
  32
  33 ; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
  34 ; of %a are zero. This optimizes rev + lsr 32 to rev32.
  35 define i64 @test_rev_x_srl32(i32 %a) {
  36 entry:
  37 ; CHECK-LABEL: test_rev_x_srl32:
  38 ; CHECK: rev32 x0, {{x[0-9]+}}
  39 ; CHECK-NOT: lsr
  40   %0 = zext i32 %a to i64
  41   %1 = tail call i64 @llvm.bswap.i64(i64 %0)
  42   %2 = lshr i64 %1, 32
  43   ret i64 %2
  44 }
  45
  46 declare i32 @llvm.bswap.i32(i32) nounwind readnone
  47 declare i64 @llvm.bswap.i64(i64) nounwind readnone
  48
  49 define i32 @test_rev16_w(i32 %X) nounwind {
  50 entry:
  51 ; CHECK-LABEL: test_rev16_w:
  52 ; CHECK: rev16 w0, w0
  53   %tmp1 = lshr i32 %X, 8
  54   %X15 = bitcast i32 %X to i32
  55   %tmp4 = shl i32 %X15, 8
  56   %tmp2 = and i32 %tmp1, 16711680
  57   %tmp5 = and i32 %tmp4, -16777216
  58   %tmp9 = and i32 %tmp1, 255
  59   %tmp13 = and i32 %tmp4, 65280
  60   %tmp6 = or i32 %tmp5, %tmp2
  61   %tmp10 = or i32 %tmp6, %tmp13
  62   %tmp14 = or i32 %tmp10, %tmp9
  63   ret i32 %tmp14
  64 }
  65
  66 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
  67 ;   01234567 ->(bswap) 76543210 ->(rotr) 10765432
  68 ;   01234567 ->(rev16) 10325476
  69 define i64 @test_rev16_x(i64 %a) nounwind {
  70 entry:
  71 ; CHECK-LABEL: test_rev16_x:
  72 ; CHECK-NOT: rev16 x0, x0
  73   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  74   %1 = lshr i64 %0, 16
  75   %2 = shl i64 %0, 48
  76   %3 = or i64 %1, %2
  77   ret i64 %3
  78 }
  79
  80 define i64 @test_rev32_x(i64 %a) nounwind {
  81 entry:
  82 ; CHECK-LABEL: test_rev32_x:
  83 ; CHECK: rev32 x0, x0
  84   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  85   %1 = lshr i64 %0, 32
  86   %2 = shl i64 %0, 32
  87   %3 = or i64 %1, %2
  88   ret i64 %3
  89 }
  90
  91 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
  92 ;CHECK-LABEL: test_vrev64D8:
  93 ;CHECK: rev64.8b
  94         %tmp1 = load <8 x i8>, <8 x i8>* %A
  95         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  96         ret <8 x i8> %tmp2
  97 }
  98
  99 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 100 ;CHECK-LABEL: test_vrev64D16:
 101 ;CHECK: rev64.4h
 102         %tmp1 = load <4 x i16>, <4 x i16>* %A
 103         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 104         ret <4 x i16> %tmp2
 105 }
 106
 107 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 108 ;CHECK-LABEL: test_vrev64D32:
 109 ;CHECK: rev64.2s
 110         %tmp1 = load <2 x i32>, <2 x i32>* %A
 111         %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 112         ret <2 x i32> %tmp2
 113 }
 114
 115 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 116 ;CHECK-LABEL: test_vrev64Df:
 117 ;CHECK: rev64.2s
 118         %tmp1 = load <2 x float>, <2 x float>* %A
 119         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 120         ret <2 x float> %tmp2
 121 }
 122
 123 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 124 ;CHECK-LABEL: test_vrev64Q8:
 125 ;CHECK: rev64.16b
 126         %tmp1 = load <16 x i8>, <16 x i8>* %A
 127         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 128         ret <16 x i8> %tmp2
 129 }
 130
 131 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 132 ;CHECK-LABEL: test_vrev64Q16:
 133 ;CHECK: rev64.8h
 134         %tmp1 = load <8 x i16>, <8 x i16>* %A
 135         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 136         ret <8 x i16> %tmp2
 137 }
 138
 139 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 140 ;CHECK-LABEL: test_vrev64Q32:
 141 ;CHECK: rev64.4s
 142         %tmp1 = load <4 x i32>, <4 x i32>* %A
 143         %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 144         ret <4 x i32> %tmp2
 145 }
 146
 147 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 148 ;CHECK-LABEL: test_vrev64Qf:
 149 ;CHECK: rev64.4s
 150         %tmp1 = load <4 x float>, <4 x float>* %A
 151         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 152         ret <4 x float> %tmp2
 153 }
 154
 155 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 156 ;CHECK-LABEL: test_vrev32D8:
 157 ;CHECK: rev32.8b
 158         %tmp1 = load <8 x i8>, <8 x i8>* %A
 159         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 160         ret <8 x i8> %tmp2
 161 }
 162
 163 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 164 ;CHECK-LABEL: test_vrev32D16:
 165 ;CHECK: rev32.4h
 166         %tmp1 = load <4 x i16>, <4 x i16>* %A
 167         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 168         ret <4 x i16> %tmp2
 169 }
 170
 171 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 172 ;CHECK-LABEL: test_vrev32Q8:
 173 ;CHECK: rev32.16b
 174         %tmp1 = load <16 x i8>, <16 x i8>* %A
 175         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 176         ret <16 x i8> %tmp2
 177 }
 178
 179 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 180 ;CHECK-LABEL: test_vrev32Q16:
 181 ;CHECK: rev32.8h
 182         %tmp1 = load <8 x i16>, <8 x i16>* %A
 183         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 184         ret <8 x i16> %tmp2
 185 }
 186
 187 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 188 ;CHECK-LABEL: test_vrev16D8:
 189 ;CHECK: rev16.8b
 190         %tmp1 = load <8 x i8>, <8 x i8>* %A
 191         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 192         ret <8 x i8> %tmp2
 193 }
 194
 195 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 196 ;CHECK-LABEL: test_vrev16Q8:
 197 ;CHECK: rev16.16b
 198         %tmp1 = load <16 x i8>, <16 x i8>* %A
 199         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 200         ret <16 x i8> %tmp2
 201 }
 202
 203 ; Undef shuffle indices should not prevent matching to VREV:
 204
 205 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 206 ;CHECK-LABEL: test_vrev64D8_undef:
 207 ;CHECK: rev64.8b
 208         %tmp1 = load <8 x i8>, <8 x i8>* %A
 209         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 210         ret <8 x i8> %tmp2
 211 }
 212
 213 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 214 ;CHECK-LABEL: test_vrev32Q16_undef:
 215 ;CHECK: rev32.8h
 216         %tmp1 = load <8 x i16>, <8 x i16>* %A
 217         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 218         ret <8 x i16> %tmp2
 219 }
 220
 221 ; vrev <4 x i16> should use REV32 and not REV64
 222 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 223 ; CHECK-LABEL: test_vrev64:
 224 ; CHECK: ldr [[DEST:q[0-9]+]],
 225 ; CHECK: st1.h
 226 ; CHECK: st1.h
 227 entry:
 228   %0 = bitcast <4 x i16>* %source to <8 x i16>*
 229   %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
 230   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
 231   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
 232   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
 233   %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
 234   store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
 235   ret void
 236 }
 237
 238 ; Test vrev of float4
 239 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
 240 ; CHECK: float_vrev64
 241 ; CHECK: ldr [[DEST:q[0-9]+]],
 242 ; CHECK: rev64.4s
 243 entry:
 244   %0 = bitcast float* %source to <4 x float>*
 245   %tmp2 = load <4 x float>, <4 x float>* %0, align 4
 246   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
 247   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
 248   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
 249   ret void
 250 }
 251
 252
 253 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
 254 ; CHECK-LABEL: test_vrev32_bswap:
 255 ; CHECK: rev32.16b
 256 ; CHECK-NOT: rev
 257 ; CHECK: ret
 258   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
 259   ret <4 x i32> %bswap
 260 }
 261
 262 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone