llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll

   1 ; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -D#VBYTES=16  -check-prefix=NO_SVE
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
   4 ; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
   5 ; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
   6 ; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
   7 ; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
   8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
   9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
  16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
  17
  18 target triple = "aarch64-unknown-linux-gnu"
  19
  20 ; Don't use SVE when its registers are no bigger than NEON.
  21 ; NO_SVE-NOT: ptrue
  22
  23 ;
  24 ; insertelement
  25 ;
  26
  27 ; Don't use SVE for 64-bit vectors.
  28 define <4 x half> @insertelement_v4f16(<4 x half> %op1) #0 {
  29 ; CHECK-LABEL: insertelement_v4f16:
  30 ; CHECK:         fmov h1, #5.00000000
  31 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
  32 ; CHECK-NEXT:    ret
  33     %r = insertelement <4 x half> %op1, half 5.0, i64 3
  34     ret <4 x half> %r
  35 }
  36
  37 ; Don't use SVE for 128-bit vectors.
  38 define <8 x half> @insertelement_v8f16(<8 x half> %op1) #0 {
  39 ; CHECK-LABEL: insertelement_v8f16:
  40 ; CHECK:         fmov h1, #5.00000000
  41 ; CHECK-NEXT:    mov v0.h[7], v1.h[0]
  42 ; CHECK-NEXT:    ret
  43     %r = insertelement <8 x half> %op1, half 5.0, i64 7
  44     ret <8 x half> %r
  45 }
  46
  47 define <16 x half> @insertelement_v16f16(<16 x half>* %a) #0 {
  48 ; CHECK-LABEL: insertelement_v16f16:
  49 ; VBITS_GE_256:         ptrue p0.h, vl16
  50 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
  51 ; VBITS_GE_256-NEXT:    mov w9, #15
  52 ; VBITS_GE_256-NEXT:    mov z1.h, w9
  53 ; VBITS_GE_256-NEXT:    index z2.h, #0, #1
  54 ; VBITS_GE_256-NEXT:    ptrue p1.h
  55 ; VBITS_GE_256-NEXT:    cmpeq p1.h, p1/z, z2.h, z1.h
  56 ; VBITS_GE_256-NEXT:    fmov h1, #5.00000000
  57 ; VBITS_GE_256-NEXT:    mov z0.h, p1/m, h1
  58 ; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x8]
  59 ; VBITS_GE_256-NEXT:    ret
  60     %op1 = load <16 x half>, <16 x half>* %a
  61     %r = insertelement <16 x half> %op1, half 5.0, i64 15
  62     ret <16 x half> %r
  63 }
  64
  65 define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
  66 ; CHECK-LABEL: insertelement_v32f16:
  67 ; VBITS_GE_512:         ptrue p0.h, vl32
  68 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
  69 ; VBITS_GE_512-NEXT:    mov w9, #31
  70 ; VBITS_GE_512-NEXT:    mov z1.h, w9
  71 ; VBITS_GE_512-NEXT:    index z2.h, #0, #1
  72 ; VBITS_GE_512-NEXT:    ptrue p1.h
  73 ; VBITS_GE_512-NEXT:    cmpeq p1.h, p1/z, z2.h, z1.h
  74 ; VBITS_GE_512-NEXT:    fmov h1, #5.00000000
  75 ; VBITS_GE_512-NEXT:    mov z0.h, p1/m, h1
  76 ; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x8]
  77 ; VBITS_GE_512-NEXT:    ret
  78     %op1 = load <32 x half>, <32 x half>* %a
  79     %r = insertelement <32 x half> %op1, half 5.0, i64 31
  80     ret <32 x half> %r
  81 }
  82
  83 define <64 x half> @insertelement_v64f16(<64 x half>* %a) #0 {
  84 ; CHECK-LABEL: insertelement_v64f16:
  85 ; VBITS_GE_1024:         ptrue   p0.h, vl64
  86 ; VBITS_GE_1024-NEXT:    ld1h    { z0.h }, p0/z, [x0]
  87 ; VBITS_GE_1024-NEXT:    mov     w9, #63
  88 ; VBITS_GE_1024-NEXT:    mov     z1.h, w9
  89 ; VBITS_GE_1024-NEXT:    index   z2.h, #0, #1
  90 ; VBITS_GE_1024-NEXT:    ptrue   p1.h
  91 ; VBITS_GE_1024-NEXT:    cmpeq   p1.h, p1/z, z2.h, z1.h
  92 ; VBITS_GE_1024-NEXT:    fmov    h1, #5.00000000
  93 ; VBITS_GE_1024-NEXT:    mov     z0.h, p1/m, h1
  94 ; VBITS_GE_1024-NEXT:    st1h    { z0.h }, p0, [x8]
  95 ; VBITS_GE_1024-NEXT:    ret
  96     %op1 = load <64 x half>, <64 x half>* %a
  97     %r = insertelement <64 x half> %op1, half 5.0, i64 63
  98     ret <64 x half> %r
  99 }
 100
 101 define <128 x half> @insertelement_v128f16(<128 x half>* %a) #0 {
 102 ; CHECK-LABEL: insertelement_v128f16:
 103 ; VBITS_GE_2048: ptrue   p0.h, vl128
 104 ; VBITS_GE_2048-NEXT: ld1h    { z0.h }, p0/z, [x0]
 105 ; VBITS_GE_2048-NEXT: mov     w9, #127
 106 ; VBITS_GE_2048-NEXT: mov     z1.h, w9
 107 ; VBITS_GE_2048-NEXT: index   z2.h, #0, #1
 108 ; VBITS_GE_2048-NEXT: ptrue   p1.h
 109 ; VBITS_GE_2048-NEXT: cmpeq   p1.h, p1/z, z2.h, z1.h
 110 ; VBITS_GE_2048-NEXT: fmov    h1, #5.00000000
 111 ; VBITS_GE_2048-NEXT: mov     z0.h, p1/m, h1
 112 ; VBITS_GE_2048-NEXT: st1h    { z0.h }, p0, [x8]
 113 ; VBITS_GE_2048-NEXT: ret
 114     %op1 = load <128 x half>, <128 x half>* %a
 115     %r = insertelement <128 x half> %op1, half 5.0, i64 127
 116     ret <128 x half> %r
 117 }
 118
 119 ; Don't use SVE for 64-bit vectors.
 120 define <2 x float> @insertelement_v2f32(<2 x float> %op1) #0 {
 121 ; CHECK-LABEL: insertelement_v2f32:
 122 ; CHECK:         fmov s1, #5.00000000
 123 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 124 ; CHECK-NEXT:    ret
 125     %r = insertelement <2 x float> %op1, float 5.0, i64 1
 126     ret <2 x float> %r
 127 }
 128
 129 ; Don't use SVE for 128-bit vectors.
 130 define <4 x float> @insertelement_v4f32(<4 x float> %op1) #0 {
 131 ; CHECK-LABEL: insertelement_v4f32:
 132 ; CHECK:         fmov s1, #5.00000000
 133 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 134 ; CHECK-NEXT:    ret
 135     %r = insertelement <4 x float> %op1, float 5.0, i64 3
 136     ret <4 x float> %r
 137 }
 138
 139 define <8 x float> @insertelement_v8f32(<8 x float>* %a) #0 {
 140 ; CHECK-LABEL: insertelement_v8f32:
 141 ; VBITS_GE_256:         ptrue p0.s, vl8
 142 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
 143 ; VBITS_GE_256-NEXT:    mov w9, #7
 144 ; VBITS_GE_256-NEXT:    mov z1.s, w9
 145 ; VBITS_GE_256-NEXT:    index z2.s, #0, #1
 146 ; VBITS_GE_256-NEXT:    ptrue p1.s
 147 ; VBITS_GE_256-NEXT:    cmpeq p1.s, p1/z, z2.s, z1.s
 148 ; VBITS_GE_256-NEXT:    fmov s1, #5.00000000
 149 ; VBITS_GE_256-NEXT:    mov z0.s, p1/m, s1
 150 ; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x8]
 151 ; VBITS_GE_256-NEXT:    ret
 152     %op1 = load <8 x float>, <8 x float>* %a
 153     %r = insertelement <8 x float> %op1, float 5.0, i64 7
 154     ret <8 x float> %r
 155 }
 156
 157 define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
 158 ; CHECK-LABEL: insertelement_v16f32:
 159 ; VBITS_GE_512:         ptrue   p0.s, vl16
 160 ; VBITS_GE_512-NEXT:    ld1w    { z0.s }, p0/z, [x0]
 161 ; VBITS_GE_512-NEXT:    mov     w9, #15
 162 ; VBITS_GE_512-NEXT:    mov     z1.s, w9
 163 ; VBITS_GE_512-NEXT:    index   z2.s, #0, #1
 164 ; VBITS_GE_512-NEXT:    ptrue   p1.s
 165 ; VBITS_GE_512-NEXT:    cmpeq   p1.s, p1/z, z2.s, z1.s
 166 ; VBITS_GE_512-NEXT:    fmov    s1, #5.00000000
 167 ; VBITS_GE_512-NEXT:    mov     z0.s, p1/m, s1
 168 ; VBITS_GE_512-NEXT:    st1w    { z0.s }, p0, [x8]
 169 ; VBITS_GE_512-NEXT:    ret
 170     %op1 = load <16 x float>, <16 x float>* %a
 171     %r = insertelement <16 x float> %op1, float 5.0, i64 15
 172     ret <16 x float> %r
 173 }
 174
 175 define <32 x float> @insertelement_v32f32(<32 x float>* %a) #0 {
 176 ; CHECK-LABEL: insertelement_v32f32:
 177 ; VBITS_GE_1024:        ptrue   p0.s, vl32
 178 ; VBITS_GE_1024-NEXT:   ld1w    { z0.s }, p0/z, [x0]
 179 ; VBITS_GE_1024-NEXT:   mov     w9, #31
 180 ; VBITS_GE_1024-NEXT:   mov     z1.s, w9
 181 ; VBITS_GE_1024-NEXT:   index   z2.s, #0, #1
 182 ; VBITS_GE_1024-NEXT:   ptrue   p1.s
 183 ; VBITS_GE_1024-NEXT:   cmpeq   p1.s, p1/z, z2.s, z1.s
 184 ; VBITS_GE_1024-NEXT:   fmov    s1, #5.00000000
 185 ; VBITS_GE_1024-NEXT:   mov     z0.s, p1/m, s1
 186 ; VBITS_GE_1024-NEXT:   st1w    { z0.s }, p0, [x8]
 187 ; VBITS_GE_1024-NEXT:   ret
 188     %op1 = load <32 x float>, <32 x float>* %a
 189     %r = insertelement <32 x float> %op1, float 5.0, i64 31
 190     ret <32 x float> %r
 191 }
 192
 193 define <64 x float> @insertelement_v64f32(<64 x float>* %a) #0 {
 194 ; CHECK-LABEL: insertelement_v64f32:
 195 ; VBITS_GE_2048:        ptrue   p0.s, vl64
 196 ; VBITS_GE_2048-NEXT:   ld1w    { z0.s }, p0/z, [x0]
 197 ; VBITS_GE_2048-NEXT:   mov     w9, #63
 198 ; VBITS_GE_2048-NEXT:   mov     z1.s, w9
 199 ; VBITS_GE_2048-NEXT:   index   z2.s, #0, #1
 200 ; VBITS_GE_2048-NEXT:   ptrue   p1.s
 201 ; VBITS_GE_2048-NEXT:   cmpeq   p1.s, p1/z, z2.s, z1.s
 202 ; VBITS_GE_2048-NEXT:   fmov    s1, #5.00000000
 203 ; VBITS_GE_2048-NEXT:   mov     z0.s, p1/m, s1
 204 ; VBITS_GE_2048-NEXT:   st1w    { z0.s }, p0, [x8]
 205 ; VBITS_GE_2048-NEXT:   ret
 206     %op1 = load <64 x float>, <64 x float>* %a
 207     %r = insertelement <64 x float> %op1, float 5.0, i64 63
 208     ret <64 x float> %r
 209 }
 210
 211 ; Don't use SVE for 64-bit vectors.
 212 define <1 x double> @insertelement_v1f64(<1 x double> %op1) #0 {
 213 ; CHECK-LABEL: insertelement_v1f64:
 214 ; CHECK:         fmov d0, #5.00000000
 215 ; CHECK-NEXT:    ret
 216     %r = insertelement <1 x double> %op1, double 5.0, i64 0
 217     ret <1 x double> %r
 218 }
 219
 220 ; Don't use SVE for 128-bit vectors.
 221 define <2 x double> @insertelement_v2f64(<2 x double> %op1) #0 {
 222 ; CHECK-LABEL: insertelement_v2f64:
 223 ; CHECK:         fmov d1, #5.00000000
 224 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 225 ; CHECK-NEXT:    ret
 226     %r = insertelement <2 x double> %op1, double 5.0, i64 1
 227     ret <2 x double> %r
 228 }
 229
 230 define <4 x double> @insertelement_v4f64(<4 x double>* %a) #0 {
 231 ; CHECK-LABEL: insertelement_v4f64:
 232 ; VBITS_GE_256:         ptrue p0.d, vl4
 233 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0]
 234 ; VBITS_GE_256-NEXT:    mov w9, #3
 235 ; VBITS_GE_256-NEXT:    mov z1.d, x9
 236 ; VBITS_GE_256-NEXT:    index z2.d, #0, #1
 237 ; VBITS_GE_256-NEXT:    ptrue p1.d
 238 ; VBITS_GE_256-NEXT:    cmpeq p1.d, p1/z, z2.d, z1.d
 239 ; VBITS_GE_256-NEXT:    fmov d1, #5.00000000
 240 ; VBITS_GE_256-NEXT:    mov z0.d, p1/m, d1
 241 ; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x8]
 242 ; VBITS_GE_256-NEXT:    ret
 243     %op1 = load <4 x double>, <4 x double>* %a
 244     %r = insertelement <4 x double> %op1, double 5.0, i64 3
 245     ret <4 x double> %r
 246 }
 247
 248 define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
 249 ; CHECK-LABEL: insertelement_v8f64:
 250 ; VBITS_GE_512:         ptrue   p0.d, vl8
 251 ; VBITS_GE_512-NEXT:    ld1d    { z0.d }, p0/z, [x0]
 252 ; VBITS_GE_512-NEXT:    mov     w9, #7
 253 ; VBITS_GE_512-NEXT:    mov     z1.d, x9
 254 ; VBITS_GE_512-NEXT:    index   z2.d, #0, #1
 255 ; VBITS_GE_512-NEXT:    ptrue   p1.d
 256 ; VBITS_GE_512-NEXT:    cmpeq   p1.d, p1/z, z2.d, z1.d
 257 ; VBITS_GE_512-NEXT:    fmov    d1, #5.00000000
 258 ; VBITS_GE_512-NEXT:    mov     z0.d, p1/m, d1
 259 ; VBITS_GE_512-NEXT:    st1d    { z0.d }, p0, [x8]
 260 ; VBITS_GE_512-NEXT:    ret
 261     %op1 = load <8 x double>, <8 x double>* %a
 262     %r = insertelement <8 x double> %op1, double 5.0, i64 7
 263     ret <8 x double> %r
 264 }
 265
 266 define <16 x double> @insertelement_v16f64(<16 x double>* %a) #0 {
 267 ; CHECK-LABEL: insertelement_v16f64:
 268 ; VBITS_GE_1024:         ptrue   p0.d, vl16
 269 ; VBITS_GE_1024-NEXT:    ld1d    { z0.d }, p0/z, [x0]
 270 ; VBITS_GE_1024-NEXT:    mov     w9, #15
 271 ; VBITS_GE_1024-NEXT:    mov     z1.d, x9
 272 ; VBITS_GE_1024-NEXT:    index   z2.d, #0, #1
 273 ; VBITS_GE_1024-NEXT:    ptrue   p1.d
 274 ; VBITS_GE_1024-NEXT:    cmpeq   p1.d, p1/z, z2.d, z1.d
 275 ; VBITS_GE_1024-NEXT:    fmov    d1, #5.00000000
 276 ; VBITS_GE_1024-NEXT:    mov     z0.d, p1/m, d1
 277 ; VBITS_GE_1024-NEXT:    st1d    { z0.d }, p0, [x8]
 278 ; VBITS_GE_1024-NEXT:    ret
 279     %op1 = load <16 x double>, <16 x double>* %a
 280     %r = insertelement <16 x double> %op1, double 5.0, i64 15
 281     ret <16 x double> %r
 282 }
 283
 284 define <32 x double> @insertelement_v32f64(<32 x double>* %a) #0 {
 285 ; CHECK-LABEL: insertelement_v32f64:
 286 ; VBITS_GE_2048:         ptrue   p0.d, vl32
 287 ; VBITS_GE_2048-NEXT:    ld1d    { z0.d }, p0/z, [x0]
 288 ; VBITS_GE_2048-NEXT:    mov     w9, #31
 289 ; VBITS_GE_2048-NEXT:    mov     z1.d, x9
 290 ; VBITS_GE_2048-NEXT:    index   z2.d, #0, #1
 291 ; VBITS_GE_2048-NEXT:    ptrue   p1.d
 292 ; VBITS_GE_2048-NEXT:    cmpeq   p1.d, p1/z, z2.d, z1.d
 293 ; VBITS_GE_2048-NEXT:    fmov    d1, #5.00000000
 294 ; VBITS_GE_2048-NEXT:    mov     z0.d, p1/m, d1
 295 ; VBITS_GE_2048-NEXT:    st1d    { z0.d }, p0, [x8]
 296 ; VBITS_GE_2048-NEXT:    ret
 297     %op1 = load <32 x double>, <32 x double>* %a
 298     %r = insertelement <32 x double> %op1, double 5.0, i64 31
 299     ret <32 x double> %r
 300 }
 301
 302 attributes #0 = { "target-features"="+sve" }