1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG
5 target triple = "aarch64-unknown-linux-gnu"
7 define void @func_vscale_none(ptr %a, ptr %b) #0 {
8 ; CHECK-NOARG-LABEL: func_vscale_none:
9 ; CHECK-NOARG: // %bb.0:
10 ; CHECK-NOARG-NEXT: ldp q0, q3, [x1, #32]
11 ; CHECK-NOARG-NEXT: ldp q1, q2, [x0, #32]
12 ; CHECK-NOARG-NEXT: ldp q4, q6, [x1]
13 ; CHECK-NOARG-NEXT: add v0.4s, v1.4s, v0.4s
14 ; CHECK-NOARG-NEXT: ldp q1, q5, [x0]
15 ; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v3.4s
16 ; CHECK-NOARG-NEXT: add v1.4s, v1.4s, v4.4s
17 ; CHECK-NOARG-NEXT: add v3.4s, v5.4s, v6.4s
18 ; CHECK-NOARG-NEXT: stp q0, q2, [x0, #32]
19 ; CHECK-NOARG-NEXT: stp q1, q3, [x0]
20 ; CHECK-NOARG-NEXT: ret
22 ; CHECK-ARG-LABEL: func_vscale_none:
23 ; CHECK-ARG: // %bb.0:
24 ; CHECK-ARG-NEXT: ptrue p0.s, vl16
25 ; CHECK-ARG-NEXT: ld1w { z0.s }, p0/z, [x0]
26 ; CHECK-ARG-NEXT: ld1w { z1.s }, p0/z, [x1]
27 ; CHECK-ARG-NEXT: add z0.s, z0.s, z1.s
28 ; CHECK-ARG-NEXT: st1w { z0.s }, p0, [x0]
30 %op1 = load <16 x i32>, ptr %a
31 %op2 = load <16 x i32>, ptr %b
32 %res = add <16 x i32> %op1, %op2
33 store <16 x i32> %res, ptr %a
37 attributes #0 = { "target-features"="+sve" }
39 define void @func_vscale1_1(ptr %a, ptr %b) #1 {
40 ; CHECK-LABEL: func_vscale1_1:
42 ; CHECK-NEXT: ldp q0, q3, [x1, #32]
43 ; CHECK-NEXT: ldp q1, q2, [x0, #32]
44 ; CHECK-NEXT: ldp q4, q6, [x1]
45 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
46 ; CHECK-NEXT: ldp q1, q5, [x0]
47 ; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
48 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
49 ; CHECK-NEXT: add v3.4s, v5.4s, v6.4s
50 ; CHECK-NEXT: stp q0, q2, [x0, #32]
51 ; CHECK-NEXT: stp q1, q3, [x0]
53 %op1 = load <16 x i32>, ptr %a
54 %op2 = load <16 x i32>, ptr %b
55 %res = add <16 x i32> %op1, %op2
56 store <16 x i32> %res, ptr %a
60 attributes #1 = { "target-features"="+sve" vscale_range(1,1) }
62 define void @func_vscale2_2(ptr %a, ptr %b) #2 {
63 ; CHECK-LABEL: func_vscale2_2:
65 ; CHECK-NEXT: ptrue p0.s
66 ; CHECK-NEXT: mov x8, #8 // =0x8
67 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
68 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
69 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0]
70 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1]
71 ; CHECK-NEXT: add z0.s, z0.s, z1.s
72 ; CHECK-NEXT: add z1.s, z2.s, z3.s
73 ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
74 ; CHECK-NEXT: st1w { z1.s }, p0, [x0]
76 %op1 = load <16 x i32>, ptr %a
77 %op2 = load <16 x i32>, ptr %b
78 %res = add <16 x i32> %op1, %op2
79 store <16 x i32> %res, ptr %a
83 attributes #2 = { "target-features"="+sve" vscale_range(2,2) }
85 define void @func_vscale2_4(ptr %a, ptr %b) #3 {
86 ; CHECK-LABEL: func_vscale2_4:
88 ; CHECK-NEXT: ptrue p0.s, vl8
89 ; CHECK-NEXT: mov x8, #8 // =0x8
90 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
91 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
92 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0]
93 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1]
94 ; CHECK-NEXT: add z0.s, z0.s, z1.s
95 ; CHECK-NEXT: add z1.s, z2.s, z3.s
96 ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
97 ; CHECK-NEXT: st1w { z1.s }, p0, [x0]
99 %op1 = load <16 x i32>, ptr %a
100 %op2 = load <16 x i32>, ptr %b
101 %res = add <16 x i32> %op1, %op2
102 store <16 x i32> %res, ptr %a
106 attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
108 define void @func_vscale4_4(ptr %a, ptr %b) #4 {
109 ; CHECK-LABEL: func_vscale4_4:
111 ; CHECK-NEXT: ptrue p0.s
112 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
113 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
114 ; CHECK-NEXT: add z0.s, z0.s, z1.s
115 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
117 %op1 = load <16 x i32>, ptr %a
118 %op2 = load <16 x i32>, ptr %b
119 %res = add <16 x i32> %op1, %op2
120 store <16 x i32> %res, ptr %a
124 attributes #4 = { "target-features"="+sve" vscale_range(4,4) }
126 define void @func_vscale8_8(ptr %a, ptr %b) #5 {
127 ; CHECK-LABEL: func_vscale8_8:
129 ; CHECK-NEXT: ptrue p0.s, vl16
130 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
131 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
132 ; CHECK-NEXT: add z0.s, z0.s, z1.s
133 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
135 %op1 = load <16 x i32>, ptr %a
136 %op2 = load <16 x i32>, ptr %b
137 %res = add <16 x i32> %op1, %op2
138 store <16 x i32> %res, ptr %a
142 attributes #5 = { "target-features"="+sve" vscale_range(8,8) }