1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
3 ; GCN-LABEL: {{^}}store.f16.1d:
4 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
5 define amdgpu_ps void @store.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
7 %x = extractelement <2 x i16> %coords, i32 0
8 %bitcast = bitcast <2 x i32> %val to <4 x half>
9 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
13 ; GCN-LABEL: {{^}}store.v2f16.1d:
14 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
15 define amdgpu_ps void @store.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
17 %x = extractelement <2 x i16> %coords, i32 0
18 %bitcast = bitcast <2 x i32> %val to <4 x half>
19 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
23 ; GCN-LABEL: {{^}}store.v3f16.1d:
24 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
25 define amdgpu_ps void @store.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
27 %x = extractelement <2 x i16> %coords, i32 0
28 %bitcast = bitcast <2 x i32> %val to <4 x half>
29 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
33 ; GCN-LABEL: {{^}}store.v4f16.1d:
34 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
35 define amdgpu_ps void @store.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
37 %x = extractelement <2 x i16> %coords, i32 0
38 %bitcast = bitcast <2 x i32> %val to <4 x half>
39 call void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
43 ; GCN-LABEL: {{^}}store.f16.2d:
44 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
45 define amdgpu_ps void @store.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
47 %x = extractelement <2 x i16> %coords, i32 0
48 %y = extractelement <2 x i16> %coords, i32 1
49 %bitcast = bitcast <2 x i32> %val to <4 x half>
50 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
54 ; GCN-LABEL: {{^}}store.v2f16.2d:
55 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
56 define amdgpu_ps void @store.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
58 %x = extractelement <2 x i16> %coords, i32 0
59 %y = extractelement <2 x i16> %coords, i32 1
60 %bitcast = bitcast <2 x i32> %val to <4 x half>
61 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
65 ; GCN-LABEL: {{^}}store.v3f16.2d:
66 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
67 define amdgpu_ps void @store.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
69 %x = extractelement <2 x i16> %coords, i32 0
70 %y = extractelement <2 x i16> %coords, i32 1
71 %bitcast = bitcast <2 x i32> %val to <4 x half>
72 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
76 ; GCN-LABEL: {{^}}store.v4f16.2d:
77 ; GCN: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
78 define amdgpu_ps void @store.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
80 %x = extractelement <2 x i16> %coords, i32 0
81 %y = extractelement <2 x i16> %coords, i32 1
82 %bitcast = bitcast <2 x i32> %val to <4 x half>
83 call void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
87 ; GCN-LABEL: {{^}}store.f16.3d:
88 ; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 unorm a16 d16
89 define amdgpu_ps void @store.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
91 %x = extractelement <2 x i16> %coords_lo, i32 0
92 %y = extractelement <2 x i16> %coords_lo, i32 1
93 %z = extractelement <2 x i16> %coords_hi, i32 0
94 %bitcast = bitcast <2 x i32> %val to <4 x half>
95 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
99 ; GCN-LABEL: {{^}}store.v2f16.3d:
100 ; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm a16 d16
101 define amdgpu_ps void @store.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
103 %x = extractelement <2 x i16> %coords_lo, i32 0
104 %y = extractelement <2 x i16> %coords_lo, i32 1
105 %z = extractelement <2 x i16> %coords_hi, i32 0
106 %bitcast = bitcast <2 x i32> %val to <4 x half>
107 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
111 ; GCN-LABEL: {{^}}store.v3f16.3d:
112 ; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
113 define amdgpu_ps void @store.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
115 %x = extractelement <2 x i16> %coords_lo, i32 0
116 %y = extractelement <2 x i16> %coords_lo, i32 1
117 %z = extractelement <2 x i16> %coords_hi, i32 0
118 %bitcast = bitcast <2 x i32> %val to <4 x half>
119 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
123 ; GCN-LABEL: {{^}}store.v4f16.3d:
124 ; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm a16 d16
125 define amdgpu_ps void @store.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
127 %x = extractelement <2 x i16> %coords_lo, i32 0
128 %y = extractelement <2 x i16> %coords_lo, i32 1
129 %z = extractelement <2 x i16> %coords_hi, i32 0
130 %bitcast = bitcast <2 x i32> %val to <4 x half>
131 call void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half> %bitcast, i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
135 declare void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half>, i32, i16, <8 x i32>, i32, i32) #2
136 declare void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half>, i32, i16, i16, <8 x i32>, i32, i32) #2
137 declare void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
139 attributes #0 = { nounwind }
140 attributes #1 = { nounwind readonly }