1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s
4 target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
5 target triple = "i386-unknown-linux-unknown"
7 define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
8 ; CHECK-LABEL: doTheTestMod:
9 ; CHECK: # %bb.0: # %Entry
10 ; CHECK-NEXT: pushl %ebp
11 ; CHECK-NEXT: pushl %ebx
12 ; CHECK-NEXT: pushl %edi
13 ; CHECK-NEXT: pushl %esi
14 ; CHECK-NEXT: subl $124, %esp
15 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
16 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si
19 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx
20 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx
21 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax
22 ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
23 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di
24 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx
25 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp
26 ; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax
27 ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
28 ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
29 ; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp)
30 ; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp)
31 ; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp)
32 ; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp)
33 ; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
34 ; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
35 ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
36 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
37 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
38 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
39 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
40 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
41 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
42 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
43 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
44 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
45 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
46 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
47 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
48 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
49 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
50 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
51 ; CHECK-NEXT: movl %esp, %eax
52 ; CHECK-NEXT: movl %ecx, (%eax)
53 ; CHECK-NEXT: calll __gnu_h2f_ieee
54 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
55 ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
56 ; CHECK-NEXT: movl %esp, %eax
57 ; CHECK-NEXT: movl %ecx, (%eax)
58 ; CHECK-NEXT: calll __gnu_h2f_ieee
59 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
60 ; CHECK-NEXT: movl %esp, %eax
61 ; CHECK-NEXT: fxch %st(1)
62 ; CHECK-NEXT: fstps 4(%eax)
63 ; CHECK-NEXT: fstps (%eax)
64 ; CHECK-NEXT: calll fmodf
65 ; CHECK-NEXT: movl %esp, %eax
66 ; CHECK-NEXT: fstps (%eax)
67 ; CHECK-NEXT: calll __gnu_f2h_ieee
68 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
69 ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
70 ; CHECK-NEXT: movl %esp, %eax
71 ; CHECK-NEXT: movl %ecx, (%eax)
72 ; CHECK-NEXT: calll __gnu_h2f_ieee
73 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
74 ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
75 ; CHECK-NEXT: movl %esp, %eax
76 ; CHECK-NEXT: movl %ecx, (%eax)
77 ; CHECK-NEXT: calll __gnu_h2f_ieee
78 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
79 ; CHECK-NEXT: movl %esp, %eax
80 ; CHECK-NEXT: fxch %st(1)
81 ; CHECK-NEXT: fstps 4(%eax)
82 ; CHECK-NEXT: fstps (%eax)
83 ; CHECK-NEXT: calll fmodf
84 ; CHECK-NEXT: movl %esp, %eax
85 ; CHECK-NEXT: fstps (%eax)
86 ; CHECK-NEXT: calll __gnu_f2h_ieee
87 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
88 ; CHECK-NEXT: movw %ax, %si
89 ; CHECK-NEXT: movl %esp, %eax
90 ; CHECK-NEXT: movl %ecx, (%eax)
91 ; CHECK-NEXT: calll __gnu_h2f_ieee
92 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
93 ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
94 ; CHECK-NEXT: movl %esp, %eax
95 ; CHECK-NEXT: movl %ecx, (%eax)
96 ; CHECK-NEXT: calll __gnu_h2f_ieee
97 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
98 ; CHECK-NEXT: movl %esp, %eax
99 ; CHECK-NEXT: fxch %st(1)
100 ; CHECK-NEXT: fstps 4(%eax)
101 ; CHECK-NEXT: fstps (%eax)
102 ; CHECK-NEXT: calll fmodf
103 ; CHECK-NEXT: movl %esp, %eax
104 ; CHECK-NEXT: fstps (%eax)
105 ; CHECK-NEXT: calll __gnu_f2h_ieee
106 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
107 ; CHECK-NEXT: movw %ax, %di
108 ; CHECK-NEXT: movl %esp, %eax
109 ; CHECK-NEXT: movl %ecx, (%eax)
110 ; CHECK-NEXT: calll __gnu_h2f_ieee
111 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
112 ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
113 ; CHECK-NEXT: movl %esp, %eax
114 ; CHECK-NEXT: movl %ecx, (%eax)
115 ; CHECK-NEXT: calll __gnu_h2f_ieee
116 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
117 ; CHECK-NEXT: movl %esp, %eax
118 ; CHECK-NEXT: fxch %st(1)
119 ; CHECK-NEXT: fstps 4(%eax)
120 ; CHECK-NEXT: fstps (%eax)
121 ; CHECK-NEXT: calll fmodf
122 ; CHECK-NEXT: movl %esp, %eax
123 ; CHECK-NEXT: fstps (%eax)
124 ; CHECK-NEXT: calll __gnu_f2h_ieee
125 ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
126 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
127 ; CHECK-NEXT: movw %ax, %bx
128 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
129 ; CHECK-NEXT: movw %bx, 6(%ecx)
130 ; CHECK-NEXT: movw %di, 4(%ecx)
131 ; CHECK-NEXT: movw %si, 2(%ecx)
132 ; CHECK-NEXT: movw %dx, (%ecx)
133 ; CHECK-NEXT: addl $124, %esp
134 ; CHECK-NEXT: popl %esi
135 ; CHECK-NEXT: popl %edi
136 ; CHECK-NEXT: popl %ebx
137 ; CHECK-NEXT: popl %ebp
138 ; CHECK-NEXT: retl $4
140 %x = alloca <4 x half>, align 8
141 %y = alloca <4 x half>, align 8
142 store <4 x half> %0, <4 x half>* %x, align 8
143 store <4 x half> %1, <4 x half>* %y, align 8
144 %2 = load <4 x half>, <4 x half>* %x, align 8
145 %3 = load <4 x half>, <4 x half>* %y, align 8
146 %4 = frem <4 x half> %2, %3