1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -S -loop-reduce < %s | FileCheck %s --check-prefix=IR
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=ASM
4 ; Note: To update this test, please run utils/update_test_checks.py and utils/update_llc_test_checks.py separately on opt/llc run line.
6 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7 target triple = "aarch64-linux-gnu"
9 ; These tests check that the IR coming out of LSR does not cast input/output pointer from ptr to ptr type.
10 ; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1].
12 define void @ld_st_nxv8i16(ptr %in, ptr %out) {
13 ; IR-LABEL: @ld_st_nxv8i16(
15 ; IR-NEXT: br label [[LOOP_PH:%.*]]
17 ; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
18 ; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
19 ; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
20 ; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
21 ; IR-NEXT: br label [[LOOP:%.*]]
23 ; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
24 ; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
25 ; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
26 ; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
27 ; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
28 ; IR-NEXT: [[VAL:%.*]] = load <vscale x 8 x i16>, ptr [[UGLYGEP1]], align 16
29 ; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
30 ; IR-NEXT: store <vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], align 16
31 ; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
32 ; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024
33 ; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
35 ; IR-NEXT: br label [[EXIT:%.*]]
39 ; ASM-LABEL: ld_st_nxv8i16:
40 ; ASM: // %bb.0: // %entry
41 ; ASM-NEXT: ptrue p0.h
42 ; ASM-NEXT: mov z0.h, #3 // =0x3
43 ; ASM-NEXT: mov x8, xzr
45 ; ASM-NEXT: .LBB0_1: // %loop
46 ; ASM-NEXT: // =>This Inner Loop Header: Depth=1
47 ; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
48 ; ASM-NEXT: add z1.h, z1.h, z0.h
49 ; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
50 ; ASM-NEXT: add x8, x8, x9
51 ; ASM-NEXT: cmp x8, #1024
52 ; ASM-NEXT: b.ne .LBB0_1
53 ; ASM-NEXT: // %bb.2: // %exit
59 %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
60 %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
61 %vscale = call i64 @llvm.vscale.i64()
62 %scaled_vf = shl i64 %vscale, 3
65 loop: ; preds = %loop, %loop.ph
66 %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
67 %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
68 %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
69 %val = load <vscale x 8 x i16>, ptr %ptr.in, align 16
70 %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
71 store <vscale x 8 x i16> %addp_vec, ptr %ptr.out, align 16
72 %indvar.next = add nsw i64 %indvar, %scaled_vf
73 %exit.cond = icmp eq i64 %indvar.next, 1024
74 br i1 %exit.cond, label %loop.exit, label %loop
76 loop.exit: ; preds = %loop
83 define void @masked_ld_st_nxv8i16(ptr %in, ptr %out, i64 %n) {
84 ; IR-LABEL: @masked_ld_st_nxv8i16(
86 ; IR-NEXT: br label [[LOOP_PH:%.*]]
88 ; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
89 ; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
90 ; IR-NEXT: [[PTRUE_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
91 ; IR-NEXT: [[PTRUE_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[PTRUE_VEC_SPLATINSERT]], <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
92 ; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
93 ; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
94 ; IR-NEXT: br label [[LOOP:%.*]]
96 ; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
97 ; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
98 ; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
99 ; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
100 ; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
101 ; IR-NEXT: [[VAL:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[UGLYGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef)
102 ; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
103 ; IR-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]])
104 ; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
105 ; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[N:%.*]], [[INDVAR_NEXT]]
106 ; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
108 ; IR-NEXT: br label [[EXIT:%.*]]
112 ; ASM-LABEL: masked_ld_st_nxv8i16:
113 ; ASM: // %bb.0: // %entry
114 ; ASM-NEXT: ptrue p0.h
115 ; ASM-NEXT: mov z0.h, #3 // =0x3
116 ; ASM-NEXT: mov x8, xzr
118 ; ASM-NEXT: .LBB1_1: // %loop
119 ; ASM-NEXT: // =>This Inner Loop Header: Depth=1
120 ; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
121 ; ASM-NEXT: add z1.h, z1.h, z0.h
122 ; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
123 ; ASM-NEXT: add x8, x8, x9
124 ; ASM-NEXT: cmp x2, x8
125 ; ASM-NEXT: b.ne .LBB1_1
126 ; ASM-NEXT: // %bb.2: // %exit
132 %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
133 %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
134 %ptrue_vec.splatinsert = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
135 %ptrue_vec.splat = shufflevector <vscale x 8 x i1> %ptrue_vec.splatinsert, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
136 %vscale = call i64 @llvm.vscale.i64()
137 %scaled_vf = shl i64 %vscale, 3
140 loop: ; preds = %loop, %loop.ph
141 %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
142 %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
143 %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
144 %val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %ptr.in, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef)
145 %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
146 call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %addp_vec, ptr %ptr.out, i32 4, <vscale x 8 x i1> %ptrue_vec.splat)
147 %indvar.next = add nsw i64 %indvar, %scaled_vf
148 %exit.cond = icmp eq i64 %indvar.next, %n
149 br i1 %exit.cond, label %loop.exit, label %loop
151 loop.exit: ; preds = %loop
158 declare i64 @llvm.vscale.i64()
160 declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
162 declare void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, i32 immarg, <vscale x 8 x i1>)