1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
4 declare void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
6 define fastcc void @vp_strided_store_v256f32_rrm(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
7 ; CHECK-LABEL: vp_strided_store_v256f32_rrm:
9 ; CHECK-NEXT: and %s2, %s2, (32)0
11 ; CHECK-NEXT: vstu %v0, %s1, %s0, %vm1
12 ; CHECK-NEXT: b.l.t (, %s10)
13 call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
17 define fastcc void @vp_strided_store_v256f32_rr(<256 x float> %val, ptr %ptr, i64 %stride, i32 %evl) {
18 ; CHECK-LABEL: vp_strided_store_v256f32_rr:
20 ; CHECK-NEXT: and %s2, %s2, (32)0
22 ; CHECK-NEXT: vstu %v0, %s1, %s0
23 ; CHECK-NEXT: b.l.t (, %s10)
24 %one = insertelement <256 x i1> undef, i1 1, i32 0
25 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
26 call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
30 define fastcc void @vp_strided_store_v256f32_ri(<256 x float> %val, ptr %ptr, i32 %evl) {
31 ; CHECK-LABEL: vp_strided_store_v256f32_ri:
33 ; CHECK-NEXT: and %s1, %s1, (32)0
35 ; CHECK-NEXT: vstu %v0, 24, %s0
36 ; CHECK-NEXT: b.l.t (, %s10)
37 %one = insertelement <256 x i1> undef, i1 1, i32 0
38 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
39 call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
43 declare void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
45 define fastcc void @vp_strided_store_v256i32_rrm(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
46 ; CHECK-LABEL: vp_strided_store_v256i32_rrm:
48 ; CHECK-NEXT: and %s2, %s2, (32)0
50 ; CHECK-NEXT: vstl %v0, %s1, %s0, %vm1
51 ; CHECK-NEXT: b.l.t (, %s10)
52 call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
56 define fastcc void @vp_strided_store_v256i32_rr(<256 x i32> %val, ptr %ptr, i64 %stride, i32 %evl) {
57 ; CHECK-LABEL: vp_strided_store_v256i32_rr:
59 ; CHECK-NEXT: and %s2, %s2, (32)0
61 ; CHECK-NEXT: vstl %v0, %s1, %s0
62 ; CHECK-NEXT: b.l.t (, %s10)
63 %one = insertelement <256 x i1> undef, i1 1, i32 0
64 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
65 call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
69 define fastcc void @vp_strided_store_v256i32_ri(<256 x i32> %val, ptr %ptr, i32 %evl) {
70 ; CHECK-LABEL: vp_strided_store_v256i32_ri:
72 ; CHECK-NEXT: and %s1, %s1, (32)0
74 ; CHECK-NEXT: vstl %v0, 24, %s0
75 ; CHECK-NEXT: b.l.t (, %s10)
76 %one = insertelement <256 x i1> undef, i1 1, i32 0
77 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
78 call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
82 declare void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
84 define fastcc void @vp_strided_store_v256f64_rrm(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
85 ; CHECK-LABEL: vp_strided_store_v256f64_rrm:
87 ; CHECK-NEXT: and %s2, %s2, (32)0
89 ; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
90 ; CHECK-NEXT: b.l.t (, %s10)
91 call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
95 define fastcc void @vp_strided_store_v256f64_rr(<256 x double> %val, ptr %ptr, i64 %stride, i32 %evl) {
96 ; CHECK-LABEL: vp_strided_store_v256f64_rr:
98 ; CHECK-NEXT: and %s2, %s2, (32)0
100 ; CHECK-NEXT: vst %v0, %s1, %s0
101 ; CHECK-NEXT: b.l.t (, %s10)
102 %one = insertelement <256 x i1> undef, i1 1, i32 0
103 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
104 call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
108 define fastcc void @vp_strided_store_v256f64_ri(<256 x double> %val, ptr %ptr, i32 %evl) {
109 ; CHECK-LABEL: vp_strided_store_v256f64_ri:
111 ; CHECK-NEXT: and %s1, %s1, (32)0
112 ; CHECK-NEXT: lvl %s1
113 ; CHECK-NEXT: vst %v0, 24, %s0
114 ; CHECK-NEXT: b.l.t (, %s10)
115 %one = insertelement <256 x i1> undef, i1 1, i32 0
116 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
117 call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
121 declare void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
123 define fastcc void @vp_strided_store_v256i64_rrm(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
124 ; CHECK-LABEL: vp_strided_store_v256i64_rrm:
126 ; CHECK-NEXT: and %s2, %s2, (32)0
127 ; CHECK-NEXT: lvl %s2
128 ; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
129 ; CHECK-NEXT: b.l.t (, %s10)
130 call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
134 define fastcc void @vp_strided_store_v256i64_rr(<256 x i64> %val, ptr %ptr, i64 %stride, i32 %evl) {
135 ; CHECK-LABEL: vp_strided_store_v256i64_rr:
137 ; CHECK-NEXT: and %s2, %s2, (32)0
138 ; CHECK-NEXT: lvl %s2
139 ; CHECK-NEXT: vst %v0, %s1, %s0
140 ; CHECK-NEXT: b.l.t (, %s10)
141 %one = insertelement <256 x i1> undef, i1 1, i32 0
142 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
143 call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
147 define fastcc void @vp_strided_store_v256i64_ri(<256 x i64> %val, ptr %ptr, i32 %evl) {
148 ; CHECK-LABEL: vp_strided_store_v256i64_ri:
150 ; CHECK-NEXT: and %s1, %s1, (32)0
151 ; CHECK-NEXT: lvl %s1
152 ; CHECK-NEXT: vst %v0, 24, %s0
153 ; CHECK-NEXT: b.l.t (, %s10)
154 %one = insertelement <256 x i1> undef, i1 1, i32 0
155 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
156 call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)