1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
3 declare i32 @llvm.r600.read.tidig.x() readnone
5 ; FUNC-LABEL: {{^}}s_sub_i32:
6 define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
7 %result = sub i32 %a, %b
8 store i32 %result, i32 addrspace(1)* %out
12 ; FUNC-LABEL: {{^}}s_sub_imm_i32:
13 define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
14 %result = sub i32 1234, %a
15 store i32 %result, i32 addrspace(1)* %out
19 ; FUNC-LABEL: {{^}}test_sub_i32:
20 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21 define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
22 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
23 %a = load i32, i32 addrspace(1)* %in
24 %b = load i32, i32 addrspace(1)* %b_ptr
25 %result = sub i32 %a, %b
26 store i32 %result, i32 addrspace(1)* %out
30 ; FUNC-LABEL: {{^}}test_sub_imm_i32:
32 define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
33 %a = load i32, i32 addrspace(1)* %in
34 %result = sub i32 123, %a
35 store i32 %result, i32 addrspace(1)* %out
39 ; FUNC-LABEL: {{^}}test_sub_v2i32:
40 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
41 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
42 define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
43 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
44 %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
45 %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
46 %result = sub <2 x i32> %a, %b
47 store <2 x i32> %result, <2 x i32> addrspace(1)* %out
51 ; FUNC-LABEL: {{^}}test_sub_v4i32:
52 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
53 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
54 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
56 define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
57 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
58 %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
59 %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
60 %result = sub <4 x i32> %a, %b
61 store <4 x i32> %result, <4 x i32> addrspace(1)* %out
65 ; FUNC-LABEL: {{^}}test_sub_i16:
66 define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
67 %tid = call i32 @llvm.r600.read.tidig.x()
68 %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
69 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
70 %a = load volatile i16, i16 addrspace(1)* %gep
71 %b = load volatile i16, i16 addrspace(1)* %b_ptr
72 %result = sub i16 %a, %b
73 store i16 %result, i16 addrspace(1)* %out
77 ; FUNC-LABEL: {{^}}test_sub_v2i16:
78 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
79 %tid = call i32 @llvm.r600.read.tidig.x()
80 %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
81 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
82 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
83 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
84 %result = sub <2 x i16> %a, %b
85 store <2 x i16> %result, <2 x i16> addrspace(1)* %out
89 ; FUNC-LABEL: {{^}}test_sub_v4i16:
90 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
91 %tid = call i32 @llvm.r600.read.tidig.x()
92 %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
93 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
94 %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
95 %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
96 %result = sub <4 x i16> %a, %b
97 store <4 x i16> %result, <4 x i16> addrspace(1)* %out
101 ; FUNC-LABEL: {{^}}s_sub_i64:
102 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
103 ; EG-DAG: SUB_INT {{[* ]*}}
106 ; EG-DAG: SUB_INT {{[* ]*}}
107 define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
108 %result = sub i64 %a, %b
109 store i64 %result, i64 addrspace(1)* %out, align 8
113 ; FUNC-LABEL: {{^}}v_sub_i64:
114 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
115 ; EG-DAG: SUB_INT {{[* ]*}}
118 ; EG-DAG: SUB_INT {{[* ]*}}
119 define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
120 %tid = call i32 @llvm.r600.read.tidig.x() readnone
121 %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
122 %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
123 %a = load i64, i64 addrspace(1)* %a_ptr
124 %b = load i64, i64 addrspace(1)* %b_ptr
125 %result = sub i64 %a, %b
126 store i64 %result, i64 addrspace(1)* %out, align 8
130 ; FUNC-LABEL: {{^}}v_test_sub_v2i64:
131 define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
132 %tid = call i32 @llvm.r600.read.tidig.x() readnone
133 %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
134 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
135 %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
136 %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
137 %result = sub <2 x i64> %a, %b
138 store <2 x i64> %result, <2 x i64> addrspace(1)* %out
142 ; FUNC-LABEL: {{^}}v_test_sub_v4i64:
143 define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
144 %tid = call i32 @llvm.r600.read.tidig.x() readnone
145 %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
146 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
147 %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
148 %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
149 %result = sub <4 x i64> %a, %b
150 store <4 x i64> %result, <4 x i64> addrspace(1)* %out