1 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 declare float @llvm.ceil.f32(float) nounwind readnone
6 declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
7 declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
8 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
9 declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
10 declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
12 ; FUNC-LABEL: {{^}}fceil_f32:
14 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
15 ; EG: CEIL {{\*? *}}[[RESULT]]
16 define amdgpu_kernel void @fceil_f32(ptr addrspace(1) %out, float %x) {
17 %y = call float @llvm.ceil.f32(float %x) nounwind readnone
18 store float %y, ptr addrspace(1) %out
22 ; FUNC-LABEL: {{^}}fceil_v2f32:
25 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
26 ; EG: CEIL {{\*? *}}[[RESULT]]
27 ; EG: CEIL {{\*? *}}[[RESULT]]
28 define amdgpu_kernel void @fceil_v2f32(ptr addrspace(1) %out, <2 x float> %x) {
29 %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
30 store <2 x float> %y, ptr addrspace(1) %out
34 ; FUNC-LABEL: {{^}}fceil_v3f32:
35 ; FIXME-SI: v_ceil_f32_e32
36 ; FIXME-SI: v_ceil_f32_e32
37 ; FIXME-SI: v_ceil_f32_e32
38 ; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
39 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
41 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
42 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
43 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
44 define amdgpu_kernel void @fceil_v3f32(ptr addrspace(1) %out, <3 x float> %x) {
45 %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
46 store <3 x float> %y, ptr addrspace(1) %out
50 ; FUNC-LABEL: {{^}}fceil_v4f32:
55 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
56 ; EG: CEIL {{\*? *}}[[RESULT]]
57 ; EG: CEIL {{\*? *}}[[RESULT]]
58 ; EG: CEIL {{\*? *}}[[RESULT]]
59 ; EG: CEIL {{\*? *}}[[RESULT]]
60 define amdgpu_kernel void @fceil_v4f32(ptr addrspace(1) %out, <4 x float> %x) {
61 %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
62 store <4 x float> %y, ptr addrspace(1) %out
66 ; FUNC-LABEL: {{^}}fceil_v8f32:
75 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
76 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
77 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
78 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
79 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
80 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
81 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
82 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
83 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
84 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
85 define amdgpu_kernel void @fceil_v8f32(ptr addrspace(1) %out, <8 x float> %x) {
86 %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
87 store <8 x float> %y, ptr addrspace(1) %out
91 ; FUNC-LABEL: {{^}}fceil_v16f32:
108 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
109 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
110 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
111 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
112 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
113 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
114 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
115 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
116 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
117 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
118 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
119 ; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
120 ; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
121 ; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
122 ; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
123 ; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
124 ; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
125 ; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
126 ; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
127 ; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
128 define amdgpu_kernel void @fceil_v16f32(ptr addrspace(1) %out, <16 x float> %x) {
129 %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
130 store <16 x float> %y, ptr addrspace(1) %out