1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
6 define <vscale x 4 x i32> @test_svld1uwq_i32_ss(<vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
7 ; CHECK-LABEL: test_svld1uwq_i32_ss:
9 ; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, x1, lsl #2]
11 %gep = getelementptr i32, ptr %base, i64 %offset
12 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1uwq.nxv4i32(<vscale x 1 x i1> %pred, ptr %gep)
13 ret <vscale x 4 x i32> %res
16 define <vscale x 4 x i32> @test_svld1uwq_i32_si(<vscale x 1 x i1> %pred, ptr %base) {
17 ; CHECK-LABEL: test_svld1uwq_i32_si:
19 ; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, #-8, mul vl]
20 ; CHECK-NEXT: ld1w { z1.q }, p0/z, [x0, #7, mul vl]
21 ; CHECK-NEXT: add z0.s, z0.s, z1.s
23 %gep1 = getelementptr inbounds <vscale x 1 x i32>, ptr %base, i64 -8
24 %res1 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1uwq.nxv4i32(<vscale x 1 x i1> %pred, ptr %gep1)
26 %gep2 = getelementptr inbounds <vscale x 1 x i32>, ptr %base, i64 7
27 %res2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1uwq.nxv4i32(<vscale x 1 x i1> %pred, ptr %gep2)
29 %res = add <vscale x 4 x i32> %res1, %res2
30 ret <vscale x 4 x i32> %res
33 define <vscale x 4 x i32> @test_svld1uwq_i32_out_of_bound(<vscale x 1 x i1> %pred, ptr %base) {
34 ; CHECK-LABEL: test_svld1uwq_i32_out_of_bound:
36 ; CHECK-NEXT: addvl x8, x0, #2
37 ; CHECK-NEXT: ld1w { z0.q }, p0/z, [x8]
39 %gep = getelementptr inbounds <vscale x 1 x i32>, ptr %base, i64 8
40 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1uwq.nxv4i32(<vscale x 1 x i1> %pred, ptr %gep)
42 ret <vscale x 4 x i32> %res
45 define <vscale x 4 x float> @test_svld1uwq_f32_ss(<vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
46 ; CHECK-LABEL: test_svld1uwq_f32_ss:
48 ; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, x1, lsl #2]
50 %gep = getelementptr float, ptr %base, i64 %offset
51 %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1uwq.nxv4f32(<vscale x 1 x i1> %pred, ptr %gep)
52 ret <vscale x 4 x float> %res
55 define <vscale x 4 x float> @test_svld1uwq_f32_si(<vscale x 1 x i1> %pred, ptr %base) {
56 ; CHECK-LABEL: test_svld1uwq_f32_si:
58 ; CHECK-NEXT: ld1w { z0.q }, p0/z, [x0, #-8, mul vl]
59 ; CHECK-NEXT: ld1w { z1.q }, p0/z, [x0, #7, mul vl]
60 ; CHECK-NEXT: fadd z0.s, z0.s, z1.s
62 %gep1 = getelementptr inbounds <vscale x 1 x float>, ptr %base, i64 -8
63 %res1 = call <vscale x 4 x float> @llvm.aarch64.sve.ld1uwq.nxv4f32(<vscale x 1 x i1> %pred, ptr %gep1)
65 %gep2 = getelementptr inbounds <vscale x 1 x float>, ptr %base, i64 7
66 %res2 = call <vscale x 4 x float> @llvm.aarch64.sve.ld1uwq.nxv4f32(<vscale x 1 x i1> %pred, ptr %gep2)
68 %res = fadd <vscale x 4 x float> %res1, %res2
69 ret <vscale x 4 x float> %res
74 define <vscale x 2 x i64> @test_svld1udq_i64_ss(<vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
75 ; CHECK-LABEL: test_svld1udq_i64_ss:
77 ; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, x1, lsl #3]
79 %gep = getelementptr i64, ptr %base, i64 %offset
80 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1udq.nxv2i64(<vscale x 1 x i1> %pred, ptr %gep)
81 ret <vscale x 2 x i64> %res
84 define <vscale x 2 x i64> @test_svld1udq_i64_si(<vscale x 1 x i1> %pred, ptr %base) {
85 ; CHECK-LABEL: test_svld1udq_i64_si:
87 ; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, #-8, mul vl]
88 ; CHECK-NEXT: ld1d { z1.q }, p0/z, [x0, #7, mul vl]
89 ; CHECK-NEXT: add z0.d, z0.d, z1.d
91 %gep1 = getelementptr inbounds <vscale x 1 x i64>, ptr %base, i64 -8
92 %res1 = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1udq.nxv2i64(<vscale x 1 x i1> %pred, ptr %gep1)
94 %gep2 = getelementptr inbounds <vscale x 1 x i64>, ptr %base, i64 7
95 %res2 = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1udq.nxv2i64(<vscale x 1 x i1> %pred, ptr %gep2)
97 %res = add <vscale x 2 x i64> %res1, %res2
98 ret <vscale x 2 x i64> %res
101 define <vscale x 2 x i64> @test_svld1udq_i64_out_of_bound(<vscale x 1 x i1> %pred, ptr %base) {
102 ; CHECK-LABEL: test_svld1udq_i64_out_of_bound:
104 ; CHECK-NEXT: addvl x8, x0, #-5
105 ; CHECK-NEXT: ld1d { z0.q }, p0/z, [x8]
107 %gep = getelementptr inbounds <vscale x 1 x i64>, ptr %base, i64 -10
108 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1udq.nxv2i64(<vscale x 1 x i1> %pred, ptr %gep)
110 ret <vscale x 2 x i64> %res
113 define <vscale x 2 x double> @test_svld1udq_f64_ss(<vscale x 1 x i1> %pred, ptr %base, i64 %offset) {
114 ; CHECK-LABEL: test_svld1udq_f64_ss:
116 ; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, x1, lsl #3]
118 %gep = getelementptr double, ptr %base, i64 %offset
119 %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1udq.nxv2f64(<vscale x 1 x i1> %pred, ptr %gep)
120 ret <vscale x 2 x double> %res
123 define <vscale x 2 x double> @test_svld1udq_f64_si(<vscale x 1 x i1> %pred, ptr %base) {
124 ; CHECK-LABEL: test_svld1udq_f64_si:
126 ; CHECK-NEXT: ld1d { z0.q }, p0/z, [x0, #-8, mul vl]
127 ; CHECK-NEXT: ld1d { z1.q }, p0/z, [x0, #7, mul vl]
128 ; CHECK-NEXT: fadd z0.d, z0.d, z1.d
130 %gep1 = getelementptr inbounds <vscale x 1 x double>, ptr %base, i64 -8
131 %res1 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1udq.nxv2f64(<vscale x 1 x i1> %pred, ptr %gep1)
133 %gep2 = getelementptr inbounds <vscale x 1 x double>, ptr %base, i64 7
134 %res2 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1udq.nxv2f64(<vscale x 1 x i1> %pred, ptr %gep2)
136 %res = fadd <vscale x 2 x double> %res1, %res2
137 ret <vscale x 2 x double> %res
140 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1uwq.nxv4i32(<vscale x 1 x i1>, ptr)
141 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1uwq.nxv4f32(<vscale x 1 x i1>, ptr)
143 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1udq.nxv2i64(<vscale x 1 x i1>, ptr)
144 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1udq.nxv2f64(<vscale x 1 x i1>, ptr)