1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s -check-prefixes=CHECK,Z14
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s -check-prefixes=CHECK,Z15
4 ; Check that int-to-fp conversions from a narrower type get a vector extension.
6 define void @fun0(<2 x i8> %Src, <2 x double>* %Dst) {
8 ; CHECK: vuphb %v0, %v24
9 ; CHECK-NEXT: vuphh %v0, %v0
10 ; CHECK-NEXT: vuphf %v0, %v0
11 ; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
12 ; CHECK-NEXT: vst %v0, 0(%r2), 3
14 %c = sitofp <2 x i8> %Src to <2 x double>
15 store <2 x double> %c, <2 x double>* %Dst
19 define void @fun1(<2 x i16> %Src, <2 x double>* %Dst) {
21 ; CHECK: vuphh %v0, %v24
22 ; CHECK-NEXT: vuphf %v0, %v0
23 ; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
24 ; CHECK-NEXT: vst %v0, 0(%r2), 3
26 %c = sitofp <2 x i16> %Src to <2 x double>
27 store <2 x double> %c, <2 x double>* %Dst
31 define void @fun2(<2 x i32> %Src, <2 x double>* %Dst) {
33 ; CHECK: vuphf %v0, %v24
34 ; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
35 ; CHECK-NEXT: vst %v0, 0(%r2), 3
37 %c = sitofp <2 x i32> %Src to <2 x double>
38 store <2 x double> %c, <2 x double>* %Dst
42 define void @fun3(<4 x i16> %Src, <4 x float>* %Dst) {
45 ; Z14: vuphh %v0, %v24
46 ; Z14-NEXT: vlgvf %r0, %v0, 3
47 ; Z14-NEXT: cefbr %f1, %r0
48 ; Z14-NEXT: vlgvf %r0, %v0, 2
49 ; Z14-NEXT: cefbr %f2, %r0
50 ; Z14-NEXT: vlgvf %r0, %v0, 1
51 ; Z14-NEXT: vmrhf %v1, %v2, %v1
52 ; Z14-NEXT: cefbr %f2, %r0
53 ; Z14-NEXT: vlgvf %r0, %v0, 0
54 ; Z14-NEXT: cefbr %f0, %r0
55 ; Z14-NEXT: vmrhf %v0, %v0, %v2
56 ; Z14-NEXT: vmrhg %v0, %v0, %v1
57 ; Z14-NEXT: vst %v0, 0(%r2), 3
60 ; Z15: vuphh %v0, %v24
61 ; Z15-NEXT: vcefb %v0, %v0, 0, 0
62 ; Z15-NEXT: vst %v0, 0(%r2), 3
64 %c = sitofp <4 x i16> %Src to <4 x float>
65 store <4 x float> %c, <4 x float>* %Dst
69 define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
71 ; CHECK: larl %r1, .LCPI4_0
72 ; CHECK-NEXT: vl %v0, 0(%r1), 3
73 ; CHECK-NEXT: vperm %v0, %v0, %v24, %v0
74 ; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
75 ; CHECK-NEXT: vst %v0, 0(%r2), 3
77 %c = uitofp <2 x i8> %Src to <2 x double>
78 store <2 x double> %c, <2 x double>* %Dst
82 define void @fun5(<2 x i16> %Src, <2 x double>* %Dst) {
84 ; CHECK: larl %r1, .LCPI5_0
85 ; CHECK-NEXT: vl %v0, 0(%r1), 3
86 ; CHECK-NEXT: vperm %v0, %v0, %v24, %v0
87 ; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
88 ; CHECK-NEXT: vst %v0, 0(%r2), 3
90 %c = uitofp <2 x i16> %Src to <2 x double>
91 store <2 x double> %c, <2 x double>* %Dst
95 define void @fun6(<2 x i32> %Src, <2 x double>* %Dst) {
97 ; CHECK: vuplhf %v0, %v24
98 ; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
99 ; CHECK-NEXT: vst %v0, 0(%r2), 3
100 ; CHECK-NEXT: br %r14
101 %c = uitofp <2 x i32> %Src to <2 x double>
102 store <2 x double> %c, <2 x double>* %Dst
106 define void @fun7(<4 x i16> %Src, <4 x float>* %Dst) {
109 ; Z14: vuplhh %v0, %v24
110 ; Z14-NEXT: vlgvf %r0, %v0, 3
111 ; Z14-NEXT: celfbr %f1, 0, %r0, 0
112 ; Z14-NEXT: vlgvf %r0, %v0, 2
113 ; Z14-NEXT: celfbr %f2, 0, %r0, 0
114 ; Z14-NEXT: vlgvf %r0, %v0, 1
115 ; Z14-NEXT: vmrhf %v1, %v2, %v1
116 ; Z14-NEXT: celfbr %f2, 0, %r0, 0
117 ; Z14-NEXT: vlgvf %r0, %v0, 0
118 ; Z14-NEXT: celfbr %f0, 0, %r0, 0
119 ; Z14-NEXT: vmrhf %v0, %v0, %v2
120 ; Z14-NEXT: vmrhg %v0, %v0, %v1
121 ; Z14-NEXT: vst %v0, 0(%r2), 3
124 ; Z15: vuplhh %v0, %v24
125 ; Z15-NEXT: vcelfb %v0, %v0, 0, 0
126 ; Z15-NEXT: vst %v0, 0(%r2), 3
128 %c = uitofp <4 x i16> %Src to <4 x float>
129 store <4 x float> %c, <4 x float>* %Dst
133 ; Test that this does not crash but results in scalarized conversions.
134 define void @fun8(<2 x i64> %dwords, <2 x fp128> *%ptr) {
138 %conv = uitofp <2 x i64> %dwords to <2 x fp128>
139 store <2 x fp128> %conv, <2 x fp128> *%ptr
143 ; Test that this results in vectorized conversions.
144 define void @fun9(<10 x i16> *%Src, <10 x float> *%ptr) {
146 ; Z15: larl %r1, .LCPI9_0
147 ; Z15-NEXT: vl %v0, 16(%r2), 4
148 ; Z15-NEXT: vl %v1, 0(%r2), 4
149 ; Z15-NEXT: vl %v2, 0(%r1), 3
150 ; Z15-NEXT: vperm %v2, %v2, %v1, %v2
151 ; Z15-NEXT: vuplhh %v1, %v1
152 ; Z15-NEXT: vuplhh %v0, %v0
153 ; Z15-NEXT: vcelfb %v2, %v2, 0, 0
154 ; Z15-NEXT: vcelfb %v1, %v1, 0, 0
155 ; Z15-NEXT: vcelfb %v0, %v0, 0, 0
156 ; Z15-NEXT: vsteg %v0, 32(%r3), 0
157 ; Z15-NEXT: vst %v2, 16(%r3), 4
158 ; Z15-NEXT: vst %v1, 0(%r3), 4
161 %Val = load <10 x i16>, <10 x i16> *%Src
162 %conv = uitofp <10 x i16> %Val to <10 x float>
163 store <10 x float> %conv, <10 x float> *%ptr