test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=mips64el-linux-gnu -mcpu=mips64r6 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL64R6
   3 ; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=MIPSEL32R5
   4
   5 declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32)
   6 declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32)
   7
   8 declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32)
   9 declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32)
  10
  11 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
  12 ; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s
  13 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
  14 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
  15 ; MIPSEL64R6:       # %bb.0: # %entry
  16 ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
  17 ; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 52
  18 ; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 51
  19 ; MIPSEL64R6-NEXT:    jr $ra
  20 ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  21 ;
  22 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
  23 ; MIPSEL32R5:       # %bb.0: # %entry
  24 ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  25 ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 52
  26 ; MIPSEL32R5-NEXT:    slli.d $w0, $w0, 51
  27 ; MIPSEL32R5-NEXT:    jr $ra
  28 ; MIPSEL32R5-NEXT:    st.d $w0, 0($5)
  29 entry:
  30   %0 = load <2 x i64>, <2 x i64>* %a
  31   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 52)
  32   %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 51)
  33   store <2 x i64> %2, <2 x i64>* %b
  34   ret void
  35 }
  36
  37 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
  38 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
  39 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
  40 ; MIPSEL64R6:       # %bb.0: # %entry
  41 ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
  42 ; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 6
  43 ; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 4
  44 ; MIPSEL64R6-NEXT:    jr $ra
  45 ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  46 ;
  47 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
  48 ; MIPSEL32R5:       # %bb.0: # %entry
  49 ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  50 ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 6
  51 ; MIPSEL32R5-NEXT:    slli.d $w0, $w0, 4
  52 ; MIPSEL32R5-NEXT:    jr $ra
  53 ; MIPSEL32R5-NEXT:    st.d $w0, 0($5)
  54 entry:
  55   %0 = load <2 x i64>, <2 x i64>* %a
  56   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 6)
  57   %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 4)
  58   store <2 x i64> %2, <2 x i64>* %b
  59   ret void
  60 }
  61
  62 ; do not fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
  63 ; MASK_TYPE2 = 1s | C1 zeros
  64 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
  65 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
  66 ; MIPSEL64R6:       # %bb.0: # %entry
  67 ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
  68 ; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 4
  69 ; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 6
  70 ; MIPSEL64R6-NEXT:    jr $ra
  71 ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  72 ;
  73 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
  74 ; MIPSEL32R5:       # %bb.0: # %entry
  75 ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  76 ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 4
  77 ; MIPSEL32R5-NEXT:    slli.d $w0, $w0, 6
  78 ; MIPSEL32R5-NEXT:    jr $ra
  79 ; MIPSEL32R5-NEXT:    st.d $w0, 0($5)
  80 entry:
  81   %0 = load <2 x i64>, <2 x i64>* %a
  82   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 4)
  83   %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 6)
  84   store <2 x i64> %2, <2 x i64>* %b
  85   ret void
  86 }
  87
  88 ; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
  89 define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
  90 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
  91 ; MIPSEL64R6:       # %bb.0: # %entry
  92 ; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
  93 ; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 7
  94 ; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 3
  95 ; MIPSEL64R6-NEXT:    jr $ra
  96 ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
  97 ;
  98 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
  99 ; MIPSEL32R5:       # %bb.0: # %entry
 100 ; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
 101 ; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 7
 102 ; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 3
 103 ; MIPSEL32R5-NEXT:    jr $ra
 104 ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
 105 entry:
 106   %0 = load <4 x i32>, <4 x i32>* %a
 107   %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
 108   %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 3)
 109   store <4 x i32> %2, <4 x i32>* %b
 110   ret void
 111 }
 112
 113 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
 114 define void @avoid_to_combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
 115 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
 116 ; MIPSEL64R6:       # %bb.0: # %entry
 117 ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
 118 ; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 38
 119 ; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 38
 120 ; MIPSEL64R6-NEXT:    jr $ra
 121 ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
 122 ;
 123 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
 124 ; MIPSEL32R5:       # %bb.0: # %entry
 125 ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
 126 ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 38
 127 ; MIPSEL32R5-NEXT:    slli.d $w0, $w0, 38
 128 ; MIPSEL32R5-NEXT:    jr $ra
 129 ; MIPSEL32R5-NEXT:    st.d $w0, 0($5)
 130 entry:
 131   %0 = load <2 x i64>, <2 x i64>* %a
 132   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 38)
 133   %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 38)
 134   store <2 x i64> %2, <2 x i64>* %b
 135   ret void
 136 }
 137
 138 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
 139 define void @avoid_to_combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
 140 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
 141 ; MIPSEL64R6:       # %bb.0: # %entry
 142 ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
 143 ; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 3
 144 ; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 3
 145 ; MIPSEL64R6-NEXT:    jr $ra
 146 ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
 147 ;
 148 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
 149 ; MIPSEL32R5:       # %bb.0: # %entry
 150 ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
 151 ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 3
 152 ; MIPSEL32R5-NEXT:    slli.d $w0, $w0, 3
 153 ; MIPSEL32R5-NEXT:    jr $ra
 154 ; MIPSEL32R5-NEXT:    st.d $w0, 0($5)
 155 entry:
 156   %0 = load <2 x i64>, <2 x i64>* %a
 157   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 3)
 158   %2 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %1, i32 3)
 159   store <2 x i64> %2, <2 x i64>* %b
 160   ret void
 161 }
 162
 163 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
 164 define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
 165 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
 166 ; MIPSEL64R6:       # %bb.0: # %entry
 167 ; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
 168 ; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 5
 169 ; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 5
 170 ; MIPSEL64R6-NEXT:    jr $ra
 171 ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
 172 ;
 173 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
 174 ; MIPSEL32R5:       # %bb.0: # %entry
 175 ; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
 176 ; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 5
 177 ; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 5
 178 ; MIPSEL32R5-NEXT:    jr $ra
 179 ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
 180 entry:
 181   %0 = load <4 x i32>, <4 x i32>* %a
 182   %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 5)
 183   %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 5)
 184   store <4 x i32> %2, <4 x i32>* %b
 185   ret void
 186 }
 187
 188 ; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
 189 define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
 190 ; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
 191 ; MIPSEL64R6:       # %bb.0: # %entry
 192 ; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
 193 ; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 30
 194 ; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 30
 195 ; MIPSEL64R6-NEXT:    jr $ra
 196 ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
 197 ;
 198 ; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
 199 ; MIPSEL32R5:       # %bb.0: # %entry
 200 ; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
 201 ; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 30
 202 ; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 30
 203 ; MIPSEL32R5-NEXT:    jr $ra
 204 ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
 205 entry:
 206   %0 = load <4 x i32>, <4 x i32>* %a
 207   %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 30)
 208   %2 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %1, i32 30)
 209   store <4 x i32> %2, <4 x i32>* %b
 210   ret void
 211 }