1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
4 define float @f(ptr nocapture %in) {
7 ; CHECK-NEXT: vld1.16 {d16}, [r0:64]
8 ; CHECK-NEXT: vmovl.u16 q8, d16
9 ; CHECK-NEXT: vcvt.f32.u32 q0, q8
10 ; CHECK-NEXT: vadd.f32 s4, s0, s1
11 ; CHECK-NEXT: vadd.f32 s0, s4, s2
12 ; CHECK-NEXT: vmov r0, s0
14 %1 = load <4 x i16>, ptr %in
15 %2 = uitofp <4 x i16> %1 to <4 x float>
16 %3 = extractelement <4 x float> %2, i32 0
17 %4 = extractelement <4 x float> %2, i32 1
18 %5 = extractelement <4 x float> %2, i32 2
20 %6 = fadd float %3, %4
21 %7 = fadd float %6, %5
26 define float @g(ptr nocapture %in) {
29 ; CHECK-NEXT: vldr d16, [r0]
30 ; CHECK-NEXT: vmov.u16 r0, d16[0]
31 ; CHECK-NEXT: vmov s0, r0
32 ; CHECK-NEXT: vcvt.f32.u32 s0, s0
33 ; CHECK-NEXT: vmov r0, s0
35 %1 = load <4 x i16>, ptr %in
36 %2 = extractelement <4 x i16> %1, i32 0
37 %3 = uitofp i16 %2 to float
41 ; Make sure we generate zext from <4 x i8> to <4 x 32>.
42 define <4 x i32> @h(ptr %in) {
45 ; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
46 ; CHECK-NEXT: vmovl.u8 q8, d16
47 ; CHECK-NEXT: vmov.u16 r0, d16[0]
48 ; CHECK-NEXT: vmov.u16 r1, d16[1]
49 ; CHECK-NEXT: vmov.u16 r2, d16[2]
50 ; CHECK-NEXT: vmov.u16 r3, d16[3]
51 ; CHECK-NEXT: uxtb r0, r0
52 ; CHECK-NEXT: uxtb r1, r1
53 ; CHECK-NEXT: uxtb r2, r2
54 ; CHECK-NEXT: uxtb r3, r3
56 %1 = load <4 x i8>, ptr %in, align 4
57 %2 = extractelement <4 x i8> %1, i32 0
58 %3 = zext i8 %2 to i32
59 %4 = insertelement <4 x i32> undef, i32 %3, i32 0
60 %5 = extractelement <4 x i8> %1, i32 1
61 %6 = zext i8 %5 to i32
62 %7 = insertelement <4 x i32> %4, i32 %6, i32 1
63 %8 = extractelement <4 x i8> %1, i32 2
64 %9 = zext i8 %8 to i32
65 %10 = insertelement <4 x i32> %7, i32 %9, i32 2
66 %11 = extractelement <4 x i8> %1, i32 3
67 %12 = zext i8 %11 to i32
68 %13 = insertelement <4 x i32> %10, i32 %12, i32 3
72 define float @i(ptr nocapture %in) {
75 ; CHECK-NEXT: vldr d16, [r0]
76 ; CHECK-NEXT: vmov.s16 r0, d16[0]
77 ; CHECK-NEXT: vmov s0, r0
78 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
79 ; CHECK-NEXT: vmov r0, s0
81 %1 = load <4 x i16>, ptr %in
82 %2 = extractelement <4 x i16> %1, i32 0
83 %3 = sitofp i16 %2 to float
87 define float @j(ptr nocapture %in) {
90 ; CHECK-NEXT: vldr d16, [r0]
91 ; CHECK-NEXT: vmov.u8 r0, d16[7]
92 ; CHECK-NEXT: vmov s0, r0
93 ; CHECK-NEXT: vcvt.f32.u32 s0, s0
94 ; CHECK-NEXT: vmov r0, s0
96 %1 = load <8 x i8>, ptr %in
97 %2 = extractelement <8 x i8> %1, i32 7
98 %3 = uitofp i8 %2 to float
102 define float @k(ptr nocapture %in) {
105 ; CHECK-NEXT: vldr d16, [r0]
106 ; CHECK-NEXT: vmov.s8 r0, d16[7]
107 ; CHECK-NEXT: vmov s0, r0
108 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
109 ; CHECK-NEXT: vmov r0, s0
111 %1 = load <8 x i8>, ptr %in
112 %2 = extractelement <8 x i8> %1, i32 7
113 %3 = sitofp i8 %2 to float
117 define float @KnownUpperZero(<4 x i16> %v) {
118 ; CHECK-LABEL: KnownUpperZero:
120 ; CHECK-NEXT: vmov d16, r0, r1
121 ; CHECK-NEXT: vmov.u16 r0, d16[0]
122 ; CHECK-NEXT: vmov.u16 r1, d16[3]
123 ; CHECK-NEXT: and r0, r0, #3
124 ; CHECK-NEXT: vmov s0, r0
125 ; CHECK-NEXT: and r0, r1, #3
126 ; CHECK-NEXT: vmov s2, r0
127 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
128 ; CHECK-NEXT: vcvt.f32.s32 s2, s2
129 ; CHECK-NEXT: vadd.f32 s0, s2, s0
130 ; CHECK-NEXT: vmov r0, s0
132 %1 = and <4 x i16> %v, <i16 3,i16 3,i16 3,i16 3>
133 %2 = extractelement <4 x i16> %1, i32 3
134 %3 = extractelement <4 x i16> %1, i32 0
135 %sinf1 = sitofp i16 %2 to float
136 %sinf2 = sitofp i16 %3 to float
137 %sum = fadd float %sinf1, %sinf2