llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll

   1 ; RUN: llc < %s | FileCheck %s
   2
   3 target triple = "aarch64-unknown-linux-gnu"
   4
   5 ; Ensure we rely on the reduction's implicit zero filling.
   6 define <vscale x 16 x i8> @andv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
   7 ; CHECK-LABEL: andv_zero_fill:
   8 ; CHECK: andv b0, p0, z0.b
   9 ; CHECK-NEXT: ret
  10   %t1 = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
  11   %t2 = insertelement <vscale x 16 x i8> zeroinitializer, i8 %t1, i64 0
  12   ret <vscale x 16 x i8> %t2
  13 }
  14
  15 ; Ensure we rely on the reduction's implicit zero filling.
  16 define <vscale x 8 x i16> @eorv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
  17 ; CHECK-LABEL: eorv_zero_fill:
  18 ; CHECK: eorv h0, p0, z0.h
  19 ; CHECK-NEXT: ret
  20   %t1 = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  21   %t2 = insertelement <vscale x 8 x i16> zeroinitializer, i16 %t1, i64 0
  22   ret <vscale x 8 x i16> %t2
  23 }
  24
  25 ; Ensure we rely on the reduction's implicit zero filling.
  26 define <vscale x 2 x double> @fadda_zero_fill(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) #0 {
  27 ; CHECK-LABEL: fadda_zero_fill:
  28 ; CHECK: fadda d0, p0, d0, z1.d
  29 ; CHECK-NEXT: ret
  30   %t1 = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a)
  31   %t2 = insertelement <vscale x 2 x double> zeroinitializer, double %t1, i64 0
  32   ret <vscale x 2 x double> %t2
  33 }
  34
  35 ; Ensure we rely on the reduction's implicit zero filling.
  36 define <vscale x 4 x float> @faddv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
  37 ; CHECK-LABEL: faddv_zero_fill:
  38 ; CHECK: faddv s0, p0, z0.s
  39 ; CHECK-NEXT: ret
  40   %t1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a)
  41   %t2 = insertelement <vscale x 4 x float> zeroinitializer, float %t1, i64 0
  42   ret <vscale x 4 x float> %t2
  43 }
  44
  45 ; Ensure we rely on the reduction's implicit zero filling.
  46 define <vscale x 8 x half> @fmaxv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
  47 ; CHECK-LABEL: fmaxv_zero_fill:
  48 ; CHECK: fmaxv h0, p0, z0.h
  49 ; CHECK-NEXT: ret
  50   %t1 = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a)
  51   %t2 = insertelement <vscale x 8 x half> zeroinitializer, half %t1, i64 0
  52   ret <vscale x 8 x half> %t2
  53 }
  54
  55 ; Ensure we rely on the reduction's implicit zero filling.
  56 define <vscale x 2 x float> @fmaxnmv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  57 ; CHECK-LABEL: fmaxnmv_zero_fill:
  58 ; CHECK: fmaxnmv s0, p0, z0.s
  59 ; CHECK-NEXT: ret
  60   %t1 = call float @llvm.aarch64.sve.fmaxnmv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  61   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  62   ret <vscale x 2 x float> %t2
  63 }
  64
  65 ; Ensure we rely on the reduction's implicit zero filling.
  66 define <vscale x 2 x float> @fminnmv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  67 ; CHECK-LABEL: fminnmv_zero_fill:
  68 ; CHECK: fminnmv s0, p0, z0.s
  69 ; CHECK-NEXT: ret
  70   %t1 = call float @llvm.aarch64.sve.fminnmv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  71   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  72   ret <vscale x 2 x float> %t2
  73 }
  74
  75 ; Ensure we rely on the reduction's implicit zero filling.
  76 define <vscale x 2 x float> @fminv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  77 ; CHECK-LABEL: fminv_zero_fill:
  78 ; CHECK: fminv s0, p0, z0.s
  79 ; CHECK-NEXT: ret
  80   %t1 = call float @llvm.aarch64.sve.fminv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  81   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  82   ret <vscale x 2 x float> %t2
  83 }
  84
  85 ; Ensure we rely on the reduction's implicit zero filling.
  86 define <vscale x 4 x i32> @orv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
  87 ; CHECK-LABEL: orv_zero_fill:
  88 ; CHECK: orv s0, p0, z0.s
  89 ; CHECK-NEXT: ret
  90   %t1 = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
  91   %t2 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %t1, i64 0
  92   ret <vscale x 4 x i32> %t2
  93 }
  94
  95 ; Ensure we rely on the reduction's implicit zero filling.
  96 define <vscale x 2 x i64> @saddv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
  97 ; CHECK-LABEL: saddv_zero_fill:
  98 ; CHECK: saddv d0, p0, z0.b
  99 ; CHECK-NEXT: ret
 100   %t1 = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
 101   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 102   ret <vscale x 2 x i64> %t2
 103 }
 104
 105 ; Ensure we rely on the reduction's implicit zero filling.
 106 define <vscale x 2 x i64> @smaxv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 107 ; CHECK-LABEL: smaxv_zero_fill:
 108 ; CHECK: smaxv d0, p0, z0.d
 109 ; CHECK-NEXT: ret
 110   %t1 = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 111   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 112   ret <vscale x 2 x i64> %t2
 113 }
 114
 115 ; Ensure we rely on the reduction's implicit zero filling.
 116 define <vscale x 4 x i32> @sminv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
 117 ; CHECK-LABEL: sminv_zero_fill:
 118 ; CHECK: sminv s0, p0, z0.s
 119 ; CHECK-NEXT: ret
 120   %t1 = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
 121   %t2 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %t1, i64 0
 122   ret <vscale x 4 x i32> %t2
 123 }
 124
 125 ; Ensure we rely on the reduction's implicit zero filling.
 126 define <vscale x 2 x i64> @uaddv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
 127 ; CHECK-LABEL: uaddv_zero_fill:
 128 ; CHECK: uaddv d0, p0, z0.h
 129 ; CHECK-NEXT: ret
 130   %t1 = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
 131   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 132   ret <vscale x 2 x i64> %t2
 133 }
 134
 135 ; Ensure we rely on the reduction's implicit zero filling.
 136 define <vscale x 16 x i8> @umaxv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
 137 ; CHECK-LABEL: umaxv_zero_fill:
 138 ; CHECK: umaxv b0, p0, z0.b
 139 ; CHECK-NEXT: ret
 140   %t1 = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
 141   %t2 = insertelement <vscale x 16 x i8> zeroinitializer, i8 %t1, i64 0
 142   ret <vscale x 16 x i8> %t2
 143 }
 144
 145 ; Ensure we rely on the reduction's implicit zero filling.
 146 define <vscale x 2 x i64> @uminv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 147 ; CHECK-LABEL: uminv_zero_fill:
 148 ; CHECK: uminv d0, p0, z0.d
 149 ; CHECK-NEXT: ret
 150   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 151   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 152   ret <vscale x 2 x i64> %t2
 153 }
 154
 155 ; Ensure explicit zeroing when inserting into a lane other than 0.
 156 ; NOTE: This test doesn't care about the exact way an insert is code generated,
 157 ; so only checks the presence of one instruction from the expected chain.
 158 define <vscale x 2 x i64> @zero_fill_non_zero_index(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 159 ; CHECK-LABEL: zero_fill_non_zero_index:
 160 ; CHECK: uminv d{{[0-9]+}}, p0, z0.d
 161 ; CHECK: mov z{{[0-9]+}}.d, p{{[0-9]+}}/m, x{{[0-9]+}}
 162 ; CHECK: ret
 163   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 164   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 1
 165   ret <vscale x 2 x i64> %t2
 166 }
 167
 168 ; Ensure explicit zeroing when the result vector is larger than that produced by
 169 ; the reduction instruction.
 170 define <vscale x 4 x i64> @zero_fill_type_mismatch(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 171 ; CHECK-LABEL: zero_fill_type_mismatch:
 172 ; CHECK: uminv d0, p0, z0.d
 173 ; CHECK-NEXT: mov z1.d, #0
 174 ; CHECK-NEXT: ret
 175   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 176   %t2 = insertelement <vscale x 4 x i64> zeroinitializer, i64 %t1, i64 0
 177   ret <vscale x 4 x i64> %t2
 178 }
 179
 180 ; Ensure explicit zeroing when extracting an element from an operation that
 181 ; cannot guarantee lanes 1-N are zero.
 182 ; NOTE: This test doesn't care about the exact way an insert is code generated,
 183 ; so only checks the presence of one instruction from the expected chain.
 184 define <vscale x 2 x i64> @zero_fill_no_zero_upper_lanes(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 185 ; CHECK-LABEL: zero_fill_no_zero_upper_lanes:
 186 ; CHECK: umin z{{[0-9]+}}.d, p0/m, z0.d, z0.d
 187 ; CHECK: mov z{{[0-9]+}}.d, p{{[0-9]+}}/m, x{{[0-9]+}}
 188 ; CHECK: ret
 189   %t1 = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %a)
 190   %t2 = extractelement <vscale x 2 x i64> %t1, i64 0
 191   %t3 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t2, i64 0
 192   ret <vscale x 2 x i64> %t3
 193 }
 194
 195 declare i8 @llvm.aarch64.sve.andv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 196 declare i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 197
 198 declare i8 @llvm.aarch64.sve.eorv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 199 declare i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
 200
 201 declare float @llvm.aarch64.sve.fadda.nxv2f32(<vscale x 2 x i1>, float, <vscale x 2 x float>)
 202 declare double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
 203
 204 declare float @llvm.aarch64.sve.faddv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 205 declare float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
 206
 207 declare float @llvm.aarch64.sve.fmaxnmv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 208
 209 declare half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
 210 declare float @llvm.aarch64.sve.fmaxv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 211
 212 declare float @llvm.aarch64.sve.fminv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 213
 214 declare float @llvm.aarch64.sve.fminnmv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 215
 216 declare i8 @llvm.aarch64.sve.orv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 217 declare i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
 218
 219 declare i64 @llvm.aarch64.sve.saddv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 220 declare i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 221
 222 declare i8 @llvm.aarch64.sve.smaxv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 223 declare i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
 224
 225 declare i8 @llvm.aarch64.sve.sminv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 226 declare i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
 227
 228 declare i64 @llvm.aarch64.sve.uaddv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 229 declare i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
 230
 231 declare i8 @llvm.aarch64.sve.umaxv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 232 declare i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 233
 234 declare i8 @llvm.aarch64.sve.uminv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 235 declare i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
 236
 237 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 238
 239 attributes #0 = { "target-features"="+sve" }