1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
4 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
6 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
7 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
8 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
9 ; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}}
10 ; SI: v_cndmask_b32_e64
11 ; SI: buffer_store_byte
12 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
13 %load = load i1, i1 addrspace(1)* %in
14 %ext = sext i1 %load to i32
15 %cmp = icmp eq i32 %ext, 0
16 store i1 %cmp, i1 addrspace(1)* %out
20 ; FIXME: The negate should be inverting the compare.
21 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
22 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
23 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
24 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
25 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
26 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
27 ; SI: buffer_store_byte [[RESULT]]
28 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
29 %load = load i1, i1 addrspace(1)* %in
30 %ext = zext i1 %load to i32
31 %cmp = icmp eq i32 %ext, 0
32 store i1 %cmp, i1 addrspace(1)* %out
36 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
37 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
38 ; SI: buffer_store_byte [[RESULT]]
39 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
40 %load = load i1, i1 addrspace(1)* %in
41 %ext = sext i1 %load to i32
42 %cmp = icmp eq i32 %ext, 1
43 store i1 %cmp, i1 addrspace(1)* %out
47 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
48 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
49 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
50 ; SI: buffer_store_byte [[RESULT]]
51 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
52 %load = load i1, i1 addrspace(1)* %in
53 %ext = zext i1 %load to i32
54 %cmp = icmp eq i32 %ext, 1
55 store i1 %cmp, i1 addrspace(1)* %out
59 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
60 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
61 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
62 ; SI: buffer_store_byte [[RESULT]]
63 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
64 %load = load i1, i1 addrspace(1)* %in
65 %ext = sext i1 %load to i32
66 %cmp = icmp eq i32 %ext, -1
67 store i1 %cmp, i1 addrspace(1)* %out
71 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
72 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
73 ; SI: buffer_store_byte [[RESULT]]
74 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
75 %load = load i1, i1 addrspace(1)* %in
76 %ext = zext i1 %load to i32
77 %cmp = icmp eq i32 %ext, -1
78 store i1 %cmp, i1 addrspace(1)* %out
83 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
84 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
85 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
86 ; SI: buffer_store_byte [[RESULT]]
87 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
88 %load = load i1, i1 addrspace(1)* %in
89 %ext = sext i1 %load to i32
90 %cmp = icmp ne i32 %ext, 0
91 store i1 %cmp, i1 addrspace(1)* %out
95 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
96 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
97 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
98 ; SI: buffer_store_byte [[RESULT]]
99 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
100 %load = load i1, i1 addrspace(1)* %in
101 %ext = zext i1 %load to i32
102 %cmp = icmp ne i32 %ext, 0
103 store i1 %cmp, i1 addrspace(1)* %out
107 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
108 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
109 ; SI: buffer_store_byte [[RESULT]]
110 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
111 %load = load i1, i1 addrspace(1)* %in
112 %ext = sext i1 %load to i32
113 %cmp = icmp ne i32 %ext, 1
114 store i1 %cmp, i1 addrspace(1)* %out
118 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
119 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
120 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
121 ; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
122 ; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
123 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
124 ; SI: buffer_store_byte [[RESULT]]
125 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
126 %load = load i1, i1 addrspace(1)* %in
127 %ext = zext i1 %load to i32
128 %cmp = icmp ne i32 %ext, 1
129 store i1 %cmp, i1 addrspace(1)* %out
133 ; FIXME: This should be one compare.
134 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
135 ; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
136 ; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
137 ; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
138 ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
139 ; XSI-NEXT: buffer_store_byte [[RESULT]]
140 define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
141 %load = load i1, i1 addrspace(1)* %in
142 %ext = sext i1 %load to i32
143 %cmp = icmp ne i32 %ext, -1
144 store i1 %cmp, i1 addrspace(1)* %out
148 ; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
149 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
150 ; SI: buffer_store_byte [[RESULT]]
151 define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
152 %load = load i1, i1 addrspace(1)* %in
153 %ext = zext i1 %load to i32
154 %cmp = icmp ne i32 %ext, -1
155 store i1 %cmp, i1 addrspace(1)* %out
159 ; FIXME: Need to handle non-uniform case for function below (load without gep).
160 ; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
161 ; SI: {{buffer|flat}}_load_sbyte [[LOAD:v[0-9]+]]
162 ; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}}
163 ; SI-NEXT: v_cndmask_b32_e64
164 ; SI: {{buffer|flat}}_store_byte
165 define amdgpu_kernel void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
166 %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
167 %in.ptr = getelementptr i8, i8 addrspace(1)* %in, i32 %tid.x
168 %load = load i8, i8 addrspace(1)* %in.ptr
169 %masked = and i8 %load, 255
170 %ext = sext i8 %masked to i32
171 %cmp = icmp ne i32 %ext, -1
172 store i1 %cmp, i1 addrspace(1)* %out