llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s | FileCheck %s
   3
   4 target triple = "aarch64-unknown-linux-gnu"
   5
   6 ; Ensure we rely on the reduction's implicit zero filling.
   7 define <vscale x 16 x i8> @andv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
   8 ; CHECK-LABEL: andv_zero_fill:
   9 ; CHECK:       // %bb.0:
  10 ; CHECK-NEXT:    andv b0, p0, z0.b
  11 ; CHECK-NEXT:    ret
  12   %t1 = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
  13   %t2 = insertelement <vscale x 16 x i8> zeroinitializer, i8 %t1, i64 0
  14   ret <vscale x 16 x i8> %t2
  15 }
  16
  17 ; Ensure we rely on the reduction's implicit zero filling.
  18 define <vscale x 8 x i16> @eorv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
  19 ; CHECK-LABEL: eorv_zero_fill:
  20 ; CHECK:       // %bb.0:
  21 ; CHECK-NEXT:    eorv h0, p0, z0.h
  22 ; CHECK-NEXT:    ret
  23   %t1 = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  24   %t2 = insertelement <vscale x 8 x i16> zeroinitializer, i16 %t1, i64 0
  25   ret <vscale x 8 x i16> %t2
  26 }
  27
  28 ; Ensure we rely on the reduction's implicit zero filling.
  29 define <vscale x 2 x double> @fadda_zero_fill(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) #0 {
  30 ; CHECK-LABEL: fadda_zero_fill:
  31 ; CHECK:       // %bb.0:
  32 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
  33 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
  34 ; CHECK-NEXT:    ret
  35   %t1 = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a)
  36   %t2 = insertelement <vscale x 2 x double> zeroinitializer, double %t1, i64 0
  37   ret <vscale x 2 x double> %t2
  38 }
  39
  40 ; Ensure we rely on the reduction's implicit zero filling.
  41 define <vscale x 4 x float> @faddv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
  42 ; CHECK-LABEL: faddv_zero_fill:
  43 ; CHECK:       // %bb.0:
  44 ; CHECK-NEXT:    faddv s0, p0, z0.s
  45 ; CHECK-NEXT:    ret
  46   %t1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a)
  47   %t2 = insertelement <vscale x 4 x float> zeroinitializer, float %t1, i64 0
  48   ret <vscale x 4 x float> %t2
  49 }
  50
  51 ; Ensure we rely on the reduction's implicit zero filling.
  52 define <vscale x 8 x half> @fmaxv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
  53 ; CHECK-LABEL: fmaxv_zero_fill:
  54 ; CHECK:       // %bb.0:
  55 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
  56 ; CHECK-NEXT:    ret
  57   %t1 = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a)
  58   %t2 = insertelement <vscale x 8 x half> zeroinitializer, half %t1, i64 0
  59   ret <vscale x 8 x half> %t2
  60 }
  61
  62 ; Ensure we rely on the reduction's implicit zero filling.
  63 define <vscale x 2 x float> @fmaxnmv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  64 ; CHECK-LABEL: fmaxnmv_zero_fill:
  65 ; CHECK:       // %bb.0:
  66 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
  67 ; CHECK-NEXT:    ret
  68   %t1 = call float @llvm.aarch64.sve.fmaxnmv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  69   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  70   ret <vscale x 2 x float> %t2
  71 }
  72
  73 ; Ensure we rely on the reduction's implicit zero filling.
  74 define <vscale x 2 x float> @fminnmv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  75 ; CHECK-LABEL: fminnmv_zero_fill:
  76 ; CHECK:       // %bb.0:
  77 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
  78 ; CHECK-NEXT:    ret
  79   %t1 = call float @llvm.aarch64.sve.fminnmv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  80   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  81   ret <vscale x 2 x float> %t2
  82 }
  83
  84 ; Ensure we rely on the reduction's implicit zero filling.
  85 define <vscale x 2 x float> @fminv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) #0 {
  86 ; CHECK-LABEL: fminv_zero_fill:
  87 ; CHECK:       // %bb.0:
  88 ; CHECK-NEXT:    fminv s0, p0, z0.s
  89 ; CHECK-NEXT:    ret
  90   %t1 = call float @llvm.aarch64.sve.fminv.nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a)
  91   %t2 = insertelement <vscale x 2 x float> zeroinitializer, float %t1, i64 0
  92   ret <vscale x 2 x float> %t2
  93 }
  94
  95 ; Ensure we rely on the reduction's implicit zero filling.
  96 define <vscale x 4 x i32> @orv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
  97 ; CHECK-LABEL: orv_zero_fill:
  98 ; CHECK:       // %bb.0:
  99 ; CHECK-NEXT:    orv s0, p0, z0.s
 100 ; CHECK-NEXT:    ret
 101   %t1 = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
 102   %t2 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %t1, i64 0
 103   ret <vscale x 4 x i32> %t2
 104 }
 105
 106 ; Ensure we rely on the reduction's implicit zero filling.
 107 define <vscale x 2 x i64> @saddv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
 108 ; CHECK-LABEL: saddv_zero_fill:
 109 ; CHECK:       // %bb.0:
 110 ; CHECK-NEXT:    saddv d0, p0, z0.b
 111 ; CHECK-NEXT:    ret
 112   %t1 = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
 113   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 114   ret <vscale x 2 x i64> %t2
 115 }
 116
 117 ; Ensure we rely on the reduction's implicit zero filling.
 118 define <vscale x 2 x i64> @smaxv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 119 ; CHECK-LABEL: smaxv_zero_fill:
 120 ; CHECK:       // %bb.0:
 121 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 122 ; CHECK-NEXT:    ret
 123   %t1 = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 124   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 125   ret <vscale x 2 x i64> %t2
 126 }
 127
 128 ; Ensure we rely on the reduction's implicit zero filling.
 129 define <vscale x 4 x i32> @sminv_zero_fill(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
 130 ; CHECK-LABEL: sminv_zero_fill:
 131 ; CHECK:       // %bb.0:
 132 ; CHECK-NEXT:    sminv s0, p0, z0.s
 133 ; CHECK-NEXT:    ret
 134   %t1 = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
 135   %t2 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %t1, i64 0
 136   ret <vscale x 4 x i32> %t2
 137 }
 138
 139 ; Ensure we rely on the reduction's implicit zero filling.
 140 define <vscale x 2 x i64> @uaddv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
 141 ; CHECK-LABEL: uaddv_zero_fill:
 142 ; CHECK:       // %bb.0:
 143 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 144 ; CHECK-NEXT:    ret
 145   %t1 = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
 146   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 147   ret <vscale x 2 x i64> %t2
 148 }
 149
 150 ; Ensure we rely on the reduction's implicit zero filling.
 151 define <vscale x 16 x i8> @umaxv_zero_fill(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
 152 ; CHECK-LABEL: umaxv_zero_fill:
 153 ; CHECK:       // %bb.0:
 154 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 155 ; CHECK-NEXT:    ret
 156   %t1 = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
 157   %t2 = insertelement <vscale x 16 x i8> zeroinitializer, i8 %t1, i64 0
 158   ret <vscale x 16 x i8> %t2
 159 }
 160
 161 ; Ensure we rely on the reduction's implicit zero filling.
 162 define <vscale x 2 x i64> @uminv_zero_fill(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 163 ; CHECK-LABEL: uminv_zero_fill:
 164 ; CHECK:       // %bb.0:
 165 ; CHECK-NEXT:    uminv d0, p0, z0.d
 166 ; CHECK-NEXT:    ret
 167   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 168   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 0
 169   ret <vscale x 2 x i64> %t2
 170 }
 171
 172 ; Ensure explicit zeroing when inserting into a lane other than 0.
 173 ; NOTE: This test doesn't care about the exact way an insert is code generated,
 174 ; so only checks the presence of one instruction from the expected chain.
 175 define <vscale x 2 x i64> @zero_fill_non_zero_index(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 176 ; CHECK-LABEL: zero_fill_non_zero_index:
 177 ; CHECK:       // %bb.0:
 178 ; CHECK-NEXT:    mov w8, #1 // =0x1
 179 ; CHECK-NEXT:    index z1.d, #0, #1
 180 ; CHECK-NEXT:    uminv d3, p0, z0.d
 181 ; CHECK-NEXT:    mov z2.d, x8
 182 ; CHECK-NEXT:    ptrue p1.d
 183 ; CHECK-NEXT:    mov z0.d, #0 // =0x0
 184 ; CHECK-NEXT:    cmpeq p0.d, p1/z, z1.d, z2.d
 185 ; CHECK-NEXT:    fmov x8, d3
 186 ; CHECK-NEXT:    mov z0.d, p0/m, x8
 187 ; CHECK-NEXT:    ret
 188   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 189   %t2 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t1, i64 1
 190   ret <vscale x 2 x i64> %t2
 191 }
 192
 193 ; Ensure explicit zeroing when the result vector is larger than that produced by
 194 ; the reduction instruction.
 195 define <vscale x 4 x i64> @zero_fill_type_mismatch(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 196 ; CHECK-LABEL: zero_fill_type_mismatch:
 197 ; CHECK:       // %bb.0:
 198 ; CHECK-NEXT:    uminv d0, p0, z0.d
 199 ; CHECK-NEXT:    mov z1.d, #0 // =0x0
 200 ; CHECK-NEXT:    ret
 201   %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
 202   %t2 = insertelement <vscale x 4 x i64> zeroinitializer, i64 %t1, i64 0
 203   ret <vscale x 4 x i64> %t2
 204 }
 205
 206 ; Ensure explicit zeroing when extracting an element from an operation that
 207 ; cannot guarantee lanes 1-N are zero.
 208 ; NOTE: This test doesn't care about the exact way an insert is code generated,
 209 ; so only checks the presence of one instruction from the expected chain.
 210 define <vscale x 2 x i64> @zero_fill_no_zero_upper_lanes(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 211 ; CHECK-LABEL: zero_fill_no_zero_upper_lanes:
 212 ; CHECK:       // %bb.0:
 213 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z0.d
 214 ; CHECK-NEXT:    mov z1.d, #0 // =0x0
 215 ; CHECK-NEXT:    ptrue p0.d, vl1
 216 ; CHECK-NEXT:    fmov x8, d0
 217 ; CHECK-NEXT:    mov z1.d, p0/m, x8
 218 ; CHECK-NEXT:    mov z0.d, z1.d
 219 ; CHECK-NEXT:    ret
 220   %t1 = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %a)
 221   %t2 = extractelement <vscale x 2 x i64> %t1, i64 0
 222   %t3 = insertelement <vscale x 2 x i64> zeroinitializer, i64 %t2, i64 0
 223   ret <vscale x 2 x i64> %t3
 224 }
 225
 226 declare i8 @llvm.aarch64.sve.andv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 227 declare i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 228
 229 declare i8 @llvm.aarch64.sve.eorv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 230 declare i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
 231
 232 declare float @llvm.aarch64.sve.fadda.nxv2f32(<vscale x 2 x i1>, float, <vscale x 2 x float>)
 233 declare double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
 234
 235 declare float @llvm.aarch64.sve.faddv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 236 declare float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
 237
 238 declare float @llvm.aarch64.sve.fmaxnmv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 239
 240 declare half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
 241 declare float @llvm.aarch64.sve.fmaxv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 242
 243 declare float @llvm.aarch64.sve.fminv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 244
 245 declare float @llvm.aarch64.sve.fminnmv.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
 246
 247 declare i8 @llvm.aarch64.sve.orv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 248 declare i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
 249
 250 declare i64 @llvm.aarch64.sve.saddv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 251 declare i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 252
 253 declare i8 @llvm.aarch64.sve.smaxv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 254 declare i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
 255
 256 declare i8 @llvm.aarch64.sve.sminv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 257 declare i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
 258
 259 declare i64 @llvm.aarch64.sve.uaddv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 260 declare i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
 261
 262 declare i8 @llvm.aarch64.sve.umaxv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 263 declare i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
 264
 265 declare i8 @llvm.aarch64.sve.uminv.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>)
 266 declare i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
 267
 268 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 269
 270 attributes #0 = { "target-features"="+sve" }