1 // REQUIRES
: amdgpu-registered-target
2 // RUN
: %clang_cc1 -no-opaque-pointers -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3 // RUN
: %clang_cc1 -no-opaque-pointers -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
5 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
6 #pragma OPENCL EXTENSION cl_khr_fp16
: enable
8 typedef char __attribute__
((ext_vector_type(2))) char2
;
9 typedef char __attribute__
((ext_vector_type(3))) char3
;
10 typedef char __attribute__
((ext_vector_type(4))) char4
;
11 typedef char __attribute__
((ext_vector_type(8))) char8
;
12 typedef char __attribute__
((ext_vector_type(16))) char16
;
14 typedef short __attribute__
((ext_vector_type(2))) short2
;
15 typedef short __attribute__
((ext_vector_type(3))) short3
;
16 typedef short __attribute__
((ext_vector_type(4))) short4
;
17 typedef short __attribute__
((ext_vector_type(8))) short8
;
18 typedef short __attribute__
((ext_vector_type(16))) short16
;
20 typedef int __attribute__
((ext_vector_type(2))) int2
;
21 typedef int __attribute__
((ext_vector_type(3))) int3
;
22 typedef int __attribute__
((ext_vector_type(4))) int4
;
23 typedef int __attribute__
((ext_vector_type(8))) int8
;
24 typedef int __attribute__
((ext_vector_type(16))) int16
;
26 typedef long __attribute__
((ext_vector_type(2))) long2
;
27 typedef long __attribute__
((ext_vector_type(3))) long3
;
28 typedef long __attribute__
((ext_vector_type(4))) long4
;
29 typedef long __attribute__
((ext_vector_type(8))) long8
;
30 typedef long __attribute__
((ext_vector_type(16))) long16
;
32 typedef half __attribute__
((ext_vector_type(2))) half2
;
33 typedef half __attribute__
((ext_vector_type(3))) half3
;
34 typedef half __attribute__
((ext_vector_type(4))) half4
;
35 typedef half __attribute__
((ext_vector_type(8))) half8
;
36 typedef half __attribute__
((ext_vector_type(16))) half16
;
38 typedef float __attribute__
((ext_vector_type(2))) float2
;
39 typedef float __attribute__
((ext_vector_type(3))) float3
;
40 typedef float __attribute__
((ext_vector_type(4))) float4
;
41 typedef float __attribute__
((ext_vector_type(8))) float8
;
42 typedef float __attribute__
((ext_vector_type(16))) float16
;
44 typedef double __attribute__
((ext_vector_type(2))) double2
;
45 typedef double __attribute__
((ext_vector_type(3))) double3
;
46 typedef double __attribute__
((ext_vector_type(4))) double4
;
47 typedef double __attribute__
((ext_vector_type(8))) double8
;
48 typedef double __attribute__
((ext_vector_type(16))) double16
;
50 // CHECK
: @local_memory_alignment_global.lds_i8
= internal addrspace
(3) global
[4 x i8
] undef
, align
1
51 // CHECK
: @local_memory_alignment_global.lds_v2i8
= internal addrspace
(3) global
[4 x
<2 x i8
>] undef
, align
2
52 // CHECK
: @local_memory_alignment_global.lds_v3i8
= internal addrspace
(3) global
[4 x
<3 x i8
>] undef
, align
4
53 // CHECK
: @local_memory_alignment_global.lds_v4i8
= internal addrspace
(3) global
[4 x
<4 x i8
>] undef
, align
4
54 // CHECK
: @local_memory_alignment_global.lds_v8i8
= internal addrspace
(3) global
[4 x
<8 x i8
>] undef
, align
8
55 // CHECK
: @local_memory_alignment_global.lds_v16i8
= internal addrspace
(3) global
[4 x
<16 x i8
>] undef
, align
16
56 // CHECK
: @local_memory_alignment_global.lds_i16
= internal addrspace
(3) global
[4 x i16
] undef
, align
2
57 // CHECK
: @local_memory_alignment_global.lds_v2i16
= internal addrspace
(3) global
[4 x
<2 x i16
>] undef
, align
4
58 // CHECK
: @local_memory_alignment_global.lds_v3i16
= internal addrspace
(3) global
[4 x
<3 x i16
>] undef
, align
8
59 // CHECK
: @local_memory_alignment_global.lds_v4i16
= internal addrspace
(3) global
[4 x
<4 x i16
>] undef
, align
8
60 // CHECK
: @local_memory_alignment_global.lds_v8i16
= internal addrspace
(3) global
[4 x
<8 x i16
>] undef
, align
16
61 // CHECK
: @local_memory_alignment_global.lds_v16i16
= internal addrspace
(3) global
[4 x
<16 x i16
>] undef
, align
32
62 // CHECK
: @local_memory_alignment_global.lds_i32
= internal addrspace
(3) global
[4 x i32
] undef
, align
4
63 // CHECK
: @local_memory_alignment_global.lds_v2i32
= internal addrspace
(3) global
[4 x
<2 x i32
>] undef
, align
8
64 // CHECK
: @local_memory_alignment_global.lds_v3i32
= internal addrspace
(3) global
[4 x
<3 x i32
>] undef
, align
16
65 // CHECK
: @local_memory_alignment_global.lds_v4i32
= internal addrspace
(3) global
[4 x
<4 x i32
>] undef
, align
16
66 // CHECK
: @local_memory_alignment_global.lds_v8i32
= internal addrspace
(3) global
[4 x
<8 x i32
>] undef
, align
32
67 // CHECK
: @local_memory_alignment_global.lds_v16i32
= internal addrspace
(3) global
[4 x
<16 x i32
>] undef
, align
64
68 // CHECK
: @local_memory_alignment_global.lds_i64
= internal addrspace
(3) global
[4 x i64
] undef
, align
8
69 // CHECK
: @local_memory_alignment_global.lds_v2i64
= internal addrspace
(3) global
[4 x
<2 x i64
>] undef
, align
16
70 // CHECK
: @local_memory_alignment_global.lds_v3i64
= internal addrspace
(3) global
[4 x
<3 x i64
>] undef
, align
32
71 // CHECK
: @local_memory_alignment_global.lds_v4i64
= internal addrspace
(3) global
[4 x
<4 x i64
>] undef
, align
32
72 // CHECK
: @local_memory_alignment_global.lds_v8i64
= internal addrspace
(3) global
[4 x
<8 x i64
>] undef
, align
64
73 // CHECK
: @local_memory_alignment_global.lds_v16i64
= internal addrspace
(3) global
[4 x
<16 x i64
>] undef
, align
128
74 // CHECK
: @local_memory_alignment_global.lds_f16
= internal addrspace
(3) global
[4 x half
] undef
, align
2
75 // CHECK
: @local_memory_alignment_global.lds_v2f16
= internal addrspace
(3) global
[4 x
<2 x half
>] undef
, align
4
76 // CHECK
: @local_memory_alignment_global.lds_v3f16
= internal addrspace
(3) global
[4 x
<3 x half
>] undef
, align
8
77 // CHECK
: @local_memory_alignment_global.lds_v4f16
= internal addrspace
(3) global
[4 x
<4 x half
>] undef
, align
8
78 // CHECK
: @local_memory_alignment_global.lds_v8f16
= internal addrspace
(3) global
[4 x
<8 x half
>] undef
, align
16
79 // CHECK
: @local_memory_alignment_global.lds_v16f16
= internal addrspace
(3) global
[4 x
<16 x half
>] undef
, align
32
80 // CHECK
: @local_memory_alignment_global.lds_f32
= internal addrspace
(3) global
[4 x float
] undef
, align
4
81 // CHECK
: @local_memory_alignment_global.lds_v2f32
= internal addrspace
(3) global
[4 x
<2 x float
>] undef
, align
8
82 // CHECK
: @local_memory_alignment_global.lds_v3f32
= internal addrspace
(3) global
[4 x
<3 x float
>] undef
, align
16
83 // CHECK
: @local_memory_alignment_global.lds_v4f32
= internal addrspace
(3) global
[4 x
<4 x float
>] undef
, align
16
84 // CHECK
: @local_memory_alignment_global.lds_v8f32
= internal addrspace
(3) global
[4 x
<8 x float
>] undef
, align
32
85 // CHECK
: @local_memory_alignment_global.lds_v16f32
= internal addrspace
(3) global
[4 x
<16 x float
>] undef
, align
64
86 // CHECK
: @local_memory_alignment_global.lds_f64
= internal addrspace
(3) global
[4 x double
] undef
, align
8
87 // CHECK
: @local_memory_alignment_global.lds_v2f64
= internal addrspace
(3) global
[4 x
<2 x double
>] undef
, align
16
88 // CHECK
: @local_memory_alignment_global.lds_v3f64
= internal addrspace
(3) global
[4 x
<3 x double
>] undef
, align
32
89 // CHECK
: @local_memory_alignment_global.lds_v4f64
= internal addrspace
(3) global
[4 x
<4 x double
>] undef
, align
32
90 // CHECK
: @local_memory_alignment_global.lds_v8f64
= internal addrspace
(3) global
[4 x
<8 x double
>] undef
, align
64
91 // CHECK
: @local_memory_alignment_global.lds_v16f64
= internal addrspace
(3) global
[4 x
<16 x double
>] undef
, align
128
94 // CHECK-LABEL
: @local_memory_alignment_global
(
95 // CHECK
: store volatile i8
0, i8 addrspace
(3)* getelementptr inbounds
([4 x i8
], [4 x i8
] addrspace
(3)* @local_memory_alignment_global.lds_i8
, i64
0, i64
0), align
1
96 // CHECK
: store volatile
<2 x i8
> zeroinitializer
, <2 x i8
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x i8
>], [4 x
<2 x i8
>] addrspace
(3)* @local_memory_alignment_global.lds_v2i8
, i64
0, i64
0), align
2
97 // CHECK
: store volatile
<4 x i8
> <i8
0, i8
0, i8
0, i8 undef
>, <4 x i8
> addrspace
(3)* bitcast
([4 x
<3 x i8
>] addrspace
(3)* @local_memory_alignment_global.lds_v3i8 to
<4 x i8
> addrspace
(3)*), align
4
98 // CHECK
: store volatile
<4 x i8
> zeroinitializer
, <4 x i8
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x i8
>], [4 x
<4 x i8
>] addrspace
(3)* @local_memory_alignment_global.lds_v4i8
, i64
0, i64
0), align
4
99 // CHECK
: store volatile
<8 x i8
> zeroinitializer
, <8 x i8
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x i8
>], [4 x
<8 x i8
>] addrspace
(3)* @local_memory_alignment_global.lds_v8i8
, i64
0, i64
0), align
8
100 // CHECK
: store volatile
<16 x i8
> zeroinitializer
, <16 x i8
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x i8
>], [4 x
<16 x i8
>] addrspace
(3)* @local_memory_alignment_global.lds_v16i8
, i64
0, i64
0), align
16
101 // CHECK
: store volatile i16
0, i16 addrspace
(3)* getelementptr inbounds
([4 x i16
], [4 x i16
] addrspace
(3)* @local_memory_alignment_global.lds_i16
, i64
0, i64
0), align
2
102 // CHECK
: store volatile
<2 x i16
> zeroinitializer
, <2 x i16
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x i16
>], [4 x
<2 x i16
>] addrspace
(3)* @local_memory_alignment_global.lds_v2i16
, i64
0, i64
0), align
4
103 // CHECK
: store volatile
<4 x i16
> <i16
0, i16
0, i16
0, i16 undef
>, <4 x i16
> addrspace
(3)* bitcast
([4 x
<3 x i16
>] addrspace
(3)* @local_memory_alignment_global.lds_v3i16 to
<4 x i16
> addrspace
(3)*), align
8
104 // CHECK
: store volatile
<4 x i16
> zeroinitializer
, <4 x i16
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x i16
>], [4 x
<4 x i16
>] addrspace
(3)* @local_memory_alignment_global.lds_v4i16
, i64
0, i64
0), align
8
105 // CHECK
: store volatile
<8 x i16
> zeroinitializer
, <8 x i16
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x i16
>], [4 x
<8 x i16
>] addrspace
(3)* @local_memory_alignment_global.lds_v8i16
, i64
0, i64
0), align
16
106 // CHECK
: store volatile
<16 x i16
> zeroinitializer
, <16 x i16
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x i16
>], [4 x
<16 x i16
>] addrspace
(3)* @local_memory_alignment_global.lds_v16i16
, i64
0, i64
0), align
32
107 // CHECK
: store volatile i32
0, i32 addrspace
(3)* getelementptr inbounds
([4 x i32
], [4 x i32
] addrspace
(3)* @local_memory_alignment_global.lds_i32
, i64
0, i64
0), align
4
108 // CHECK
: store volatile
<2 x i32
> zeroinitializer
, <2 x i32
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x i32
>], [4 x
<2 x i32
>] addrspace
(3)* @local_memory_alignment_global.lds_v2i32
, i64
0, i64
0), align
8
109 // CHECK
: store volatile
<4 x i32
> <i32
0, i32
0, i32
0, i32 undef
>, <4 x i32
> addrspace
(3)* bitcast
([4 x
<3 x i32
>] addrspace
(3)* @local_memory_alignment_global.lds_v3i32 to
<4 x i32
> addrspace
(3)*), align
16
110 // CHECK
: store volatile
<4 x i32
> zeroinitializer
, <4 x i32
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x i32
>], [4 x
<4 x i32
>] addrspace
(3)* @local_memory_alignment_global.lds_v4i32
, i64
0, i64
0), align
16
111 // CHECK
: store volatile
<8 x i32
> zeroinitializer
, <8 x i32
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x i32
>], [4 x
<8 x i32
>] addrspace
(3)* @local_memory_alignment_global.lds_v8i32
, i64
0, i64
0), align
32
112 // CHECK
: store volatile
<16 x i32
> zeroinitializer
, <16 x i32
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x i32
>], [4 x
<16 x i32
>] addrspace
(3)* @local_memory_alignment_global.lds_v16i32
, i64
0, i64
0), align
64
113 // CHECK
: store volatile i64
0, i64 addrspace
(3)* getelementptr inbounds
([4 x i64
], [4 x i64
] addrspace
(3)* @local_memory_alignment_global.lds_i64
, i64
0, i64
0), align
8
114 // CHECK
: store volatile
<2 x i64
> zeroinitializer
, <2 x i64
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x i64
>], [4 x
<2 x i64
>] addrspace
(3)* @local_memory_alignment_global.lds_v2i64
, i64
0, i64
0), align
16
115 // CHECK
: store volatile
<4 x i64
> <i64
0, i64
0, i64
0, i64 undef
>, <4 x i64
> addrspace
(3)* bitcast
([4 x
<3 x i64
>] addrspace
(3)* @local_memory_alignment_global.lds_v3i64 to
<4 x i64
> addrspace
(3)*), align
32
116 // CHECK
: store volatile
<4 x i64
> zeroinitializer
, <4 x i64
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x i64
>], [4 x
<4 x i64
>] addrspace
(3)* @local_memory_alignment_global.lds_v4i64
, i64
0, i64
0), align
32
117 // CHECK
: store volatile
<8 x i64
> zeroinitializer
, <8 x i64
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x i64
>], [4 x
<8 x i64
>] addrspace
(3)* @local_memory_alignment_global.lds_v8i64
, i64
0, i64
0), align
64
118 // CHECK
: store volatile
<16 x i64
> zeroinitializer
, <16 x i64
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x i64
>], [4 x
<16 x i64
>] addrspace
(3)* @local_memory_alignment_global.lds_v16i64
, i64
0, i64
0), align
128
119 // CHECK
: store volatile half
0xH0000
, half addrspace
(3)* getelementptr inbounds
([4 x half
], [4 x half
] addrspace
(3)* @local_memory_alignment_global.lds_f16
, i64
0, i64
0), align
2
120 // CHECK
: store volatile
<2 x half
> zeroinitializer
, <2 x half
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x half
>], [4 x
<2 x half
>] addrspace
(3)* @local_memory_alignment_global.lds_v2f16
, i64
0, i64
0), align
4
121 // CHECK
: store volatile
<4 x half
> <half
0xH0000
, half
0xH0000
, half
0xH0000
, half undef
>, <4 x half
> addrspace
(3)* bitcast
([4 x
<3 x half
>] addrspace
(3)* @local_memory_alignment_global.lds_v3f16 to
<4 x half
> addrspace
(3)*), align
8
122 // CHECK
: store volatile
<4 x half
> zeroinitializer
, <4 x half
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x half
>], [4 x
<4 x half
>] addrspace
(3)* @local_memory_alignment_global.lds_v4f16
, i64
0, i64
0), align
8
123 // CHECK
: store volatile
<8 x half
> zeroinitializer
, <8 x half
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x half
>], [4 x
<8 x half
>] addrspace
(3)* @local_memory_alignment_global.lds_v8f16
, i64
0, i64
0), align
16
124 // CHECK
: store volatile
<16 x half
> zeroinitializer
, <16 x half
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x half
>], [4 x
<16 x half
>] addrspace
(3)* @local_memory_alignment_global.lds_v16f16
, i64
0, i64
0), align
32
125 // CHECK
: store volatile float
0.000000e+00, float addrspace
(3)* getelementptr inbounds
([4 x float
], [4 x float
] addrspace
(3)* @local_memory_alignment_global.lds_f32
, i64
0, i64
0), align
4
126 // CHECK
: store volatile
<2 x float
> zeroinitializer
, <2 x float
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x float
>], [4 x
<2 x float
>] addrspace
(3)* @local_memory_alignment_global.lds_v2f32
, i64
0, i64
0), align
8
127 // CHECK
: store volatile
<4 x float
> <float
0.000000e+00, float
0.000000e+00, float
0.000000e+00, float undef
>, <4 x float
> addrspace
(3)* bitcast
([4 x
<3 x float
>] addrspace
(3)* @local_memory_alignment_global.lds_v3f32 to
<4 x float
> addrspace
(3)*), align
16
128 // CHECK
: store volatile
<4 x float
> zeroinitializer
, <4 x float
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x float
>], [4 x
<4 x float
>] addrspace
(3)* @local_memory_alignment_global.lds_v4f32
, i64
0, i64
0), align
16
129 // CHECK
: store volatile
<8 x float
> zeroinitializer
, <8 x float
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x float
>], [4 x
<8 x float
>] addrspace
(3)* @local_memory_alignment_global.lds_v8f32
, i64
0, i64
0), align
32
130 // CHECK
: store volatile
<16 x float
> zeroinitializer
, <16 x float
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x float
>], [4 x
<16 x float
>] addrspace
(3)* @local_memory_alignment_global.lds_v16f32
, i64
0, i64
0), align
64
131 // CHECK
: store volatile double
0.000000e+00, double addrspace
(3)* getelementptr inbounds
([4 x double
], [4 x double
] addrspace
(3)* @local_memory_alignment_global.lds_f64
, i64
0, i64
0), align
8
132 // CHECK
: store volatile
<2 x double
> zeroinitializer
, <2 x double
> addrspace
(3)* getelementptr inbounds
([4 x
<2 x double
>], [4 x
<2 x double
>] addrspace
(3)* @local_memory_alignment_global.lds_v2f64
, i64
0, i64
0), align
16
133 // CHECK
: store volatile
<4 x double
> <double
0.000000e+00, double
0.000000e+00, double
0.000000e+00, double undef
>, <4 x double
> addrspace
(3)* bitcast
([4 x
<3 x double
>] addrspace
(3)* @local_memory_alignment_global.lds_v3f64 to
<4 x double
> addrspace
(3)*), align
32
134 // CHECK
: store volatile
<4 x double
> zeroinitializer
, <4 x double
> addrspace
(3)* getelementptr inbounds
([4 x
<4 x double
>], [4 x
<4 x double
>] addrspace
(3)* @local_memory_alignment_global.lds_v4f64
, i64
0, i64
0), align
32
135 // CHECK
: store volatile
<8 x double
> zeroinitializer
, <8 x double
> addrspace
(3)* getelementptr inbounds
([4 x
<8 x double
>], [4 x
<8 x double
>] addrspace
(3)* @local_memory_alignment_global.lds_v8f64
, i64
0, i64
0), align
64
136 // CHECK
: store volatile
<16 x double
> zeroinitializer
, <16 x double
> addrspace
(3)* getelementptr inbounds
([4 x
<16 x double
>], [4 x
<16 x double
>] addrspace
(3)* @local_memory_alignment_global.lds_v16f64
, i64
0, i64
0), align
128
137 kernel void local_memory_alignment_global
()
139 volatile local char lds_i8
[4];
140 volatile local char2 lds_v2i8[4];
141 volatile local char3 lds_v3i8
[4];
142 volatile local char4 lds_v4i8[4];
143 volatile local char8 lds_v8i8
[4];
144 volatile local char16 lds_v16i8[4];
146 volatile local short lds_i16
[4];
147 volatile local short2 lds_v2i16[4];
148 volatile local short3 lds_v3i16
[4];
149 volatile local short4 lds_v4i16[4];
150 volatile local short8 lds_v8i16
[4];
151 volatile local short16 lds_v16i16[4];
153 volatile local int lds_i32
[4];
154 volatile local int2 lds_v2i32[4];
155 volatile local int3 lds_v3i32
[4];
156 volatile local int4 lds_v4i32[4];
157 volatile local int8 lds_v8i32
[4];
158 volatile local int16 lds_v16i32[4];
160 volatile local long lds_i64
[4];
161 volatile local long2 lds_v2i64[4];
162 volatile local long3 lds_v3i64
[4];
163 volatile local long4 lds_v4i64[4];
164 volatile local long8 lds_v8i64
[4];
165 volatile local long16 lds_v16i64[4];
167 volatile local half lds_f16
[4];
168 volatile local half2 lds_v2f16[4];
169 volatile local half3 lds_v3f16
[4];
170 volatile local half4 lds_v4f16[4];
171 volatile local half8 lds_v8f16
[4];
172 volatile local half16 lds_v16f16[4];
174 volatile local float lds_f32
[4];
175 volatile local float2 lds_v2f32[4];
176 volatile local float3 lds_v3f32
[4];
177 volatile local float4 lds_v4f32[4];
178 volatile local float8 lds_v8f32
[4];
179 volatile local float16 lds_v16f32[4];
181 volatile local double lds_f64
[4];
182 volatile local double2 lds_v2f64[4];
183 volatile local double3 lds_v3f64
[4];
184 volatile local double4 lds_v4f64[4];
185 volatile local double8 lds_v8f64
[4];
186 volatile local double16 lds_v16f64[4];
238 kernel void local_memory_alignment_arg
(
239 volatile local char
* lds_i8
,
240 volatile local char2
* lds_v2i8
,
241 volatile local char3
* lds_v3i8
,
242 volatile local char4
* lds_v4i8
,
243 volatile local char8
* lds_v8i8
,
244 volatile local char16
* lds_v16i8
,
246 volatile local short
* lds_i16
,
247 volatile local short2
* lds_v2i16
,
248 volatile local short3
* lds_v3i16
,
249 volatile local short4
* lds_v4i16
,
250 volatile local short8
* lds_v8i16
,
251 volatile local short16
* lds_v16i16
,
253 volatile local int
* lds_i32
,
254 volatile local int2
* lds_v2i32
,
255 volatile local int3
* lds_v3i32
,
256 volatile local int4
* lds_v4i32
,
257 volatile local int8
* lds_v8i32
,
258 volatile local int16
* lds_v16i32
,
260 volatile local long
* lds_i64
,
261 volatile local long2
* lds_v2i64
,
262 volatile local long3
* lds_v3i64
,
263 volatile local long4
* lds_v4i64
,
264 volatile local long8
* lds_v8i64
,
265 volatile local long16
* lds_v16i64
,
267 volatile local half
* lds_f16
,
268 volatile local half2
* lds_v2f16
,
269 volatile local half3
* lds_v3f16
,
270 volatile local half4
* lds_v4f16
,
271 volatile local half8
* lds_v8f16
,
272 volatile local half16
* lds_v16f16
,
274 volatile local float
* lds_f32
,
275 volatile local float2
* lds_v2f32
,
276 volatile local float3
* lds_v3f32
,
277 volatile local float4
* lds_v4f32
,
278 volatile local float8
* lds_v8f32
,
279 volatile local float16
* lds_v16f32
,
281 volatile local double
* lds_f64
,
282 volatile local double2
* lds_v2f64
,
283 volatile local double3
* lds_v3f64
,
284 volatile local double4
* lds_v4f64
,
285 volatile local double8
* lds_v8f64
,
286 volatile local double16
* lds_v16f64
)
338 // CHECK-LABEL
: @private_memory_alignment_alloca
(
339 // CHECK
: %private_i8
= alloca
[4 x i8
], align
1, addrspace
(5)
340 // CHECK
: %private_v2i8
= alloca
[4 x
<2 x i8
>], align
2, addrspace
(5)
341 // CHECK
: %private_v3i8
= alloca
[4 x
<3 x i8
>], align
4, addrspace
(5)
342 // CHECK
: %private_v4i8
= alloca
[4 x
<4 x i8
>], align
4, addrspace
(5)
343 // CHECK
: %private_v8i8
= alloca
[4 x
<8 x i8
>], align
8, addrspace
(5)
344 // CHECK
: %private_v16i8
= alloca
[4 x
<16 x i8
>], align
16, addrspace
(5)
345 // CHECK
: %private_i16
= alloca
[4 x i16
], align
2, addrspace
(5)
346 // CHECK
: %private_v2i16
= alloca
[4 x
<2 x i16
>], align
4, addrspace
(5)
347 // CHECK
: %private_v3i16
= alloca
[4 x
<3 x i16
>], align
8, addrspace
(5)
348 // CHECK
: %private_v4i16
= alloca
[4 x
<4 x i16
>], align
8, addrspace
(5)
349 // CHECK
: %private_v8i16
= alloca
[4 x
<8 x i16
>], align
16, addrspace
(5)
350 // CHECK
: %private_v16i16
= alloca
[4 x
<16 x i16
>], align
32, addrspace
(5)
351 // CHECK
: %private_i32
= alloca
[4 x i32
], align
4, addrspace
(5)
352 // CHECK
: %private_v2i32
= alloca
[4 x
<2 x i32
>], align
8, addrspace
(5)
353 // CHECK
: %private_v3i32
= alloca
[4 x
<3 x i32
>], align
16, addrspace
(5)
354 // CHECK
: %private_v4i32
= alloca
[4 x
<4 x i32
>], align
16, addrspace
(5)
355 // CHECK
: %private_v8i32
= alloca
[4 x
<8 x i32
>], align
32, addrspace
(5)
356 // CHECK
: %private_v16i32
= alloca
[4 x
<16 x i32
>], align
64, addrspace
(5)
357 // CHECK
: %private_i64
= alloca
[4 x i64
], align
8, addrspace
(5)
358 // CHECK
: %private_v2i64
= alloca
[4 x
<2 x i64
>], align
16, addrspace
(5)
359 // CHECK
: %private_v3i64
= alloca
[4 x
<3 x i64
>], align
32, addrspace
(5)
360 // CHECK
: %private_v4i64
= alloca
[4 x
<4 x i64
>], align
32, addrspace
(5)
361 // CHECK
: %private_v8i64
= alloca
[4 x
<8 x i64
>], align
64, addrspace
(5)
362 // CHECK
: %private_v16i64
= alloca
[4 x
<16 x i64
>], align
128, addrspace
(5)
363 // CHECK
: %private_f16
= alloca
[4 x half
], align
2, addrspace
(5)
364 // CHECK
: %private_v2f16
= alloca
[4 x
<2 x half
>], align
4, addrspace
(5)
365 // CHECK
: %private_v3f16
= alloca
[4 x
<3 x half
>], align
8, addrspace
(5)
366 // CHECK
: %private_v4f16
= alloca
[4 x
<4 x half
>], align
8, addrspace
(5)
367 // CHECK
: %private_v8f16
= alloca
[4 x
<8 x half
>], align
16, addrspace
(5)
368 // CHECK
: %private_v16f16
= alloca
[4 x
<16 x half
>], align
32, addrspace
(5)
369 // CHECK
: %private_f32
= alloca
[4 x float
], align
4, addrspace
(5)
370 // CHECK
: %private_v2f32
= alloca
[4 x
<2 x float
>], align
8, addrspace
(5)
371 // CHECK
: %private_v3f32
= alloca
[4 x
<3 x float
>], align
16, addrspace
(5)
372 // CHECK
: %private_v4f32
= alloca
[4 x
<4 x float
>], align
16, addrspace
(5)
373 // CHECK
: %private_v8f32
= alloca
[4 x
<8 x float
>], align
32, addrspace
(5)
374 // CHECK
: %private_v16f32
= alloca
[4 x
<16 x float
>], align
64, addrspace
(5)
375 // CHECK
: %private_f64
= alloca
[4 x double
], align
8, addrspace
(5)
376 // CHECK
: %private_v2f64
= alloca
[4 x
<2 x double
>], align
16, addrspace
(5)
377 // CHECK
: %private_v3f64
= alloca
[4 x
<3 x double
>], align
32, addrspace
(5)
378 // CHECK
: %private_v4f64
= alloca
[4 x
<4 x double
>], align
32, addrspace
(5)
379 // CHECK
: %private_v8f64
= alloca
[4 x
<8 x double
>], align
64, addrspace
(5)
380 // CHECK
: %private_v16f64
= alloca
[4 x
<16 x double
>], align
128, addrspace
(5)
382 // CHECK
: store volatile i8
0, i8 addrspace
(5)* %arraydecay
, align
1
383 // CHECK
: store volatile
<2 x i8
> zeroinitializer
, <2 x i8
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
2
384 // CHECK
: store volatile
<4 x i8
> <i8
0, i8
0, i8
0, i8 undef
>, <4 x i8
> addrspace
(5)* %storetmp
, align
4
385 // CHECK
: store volatile
<4 x i8
> zeroinitializer
, <4 x i8
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
4
386 // CHECK
: store volatile
<8 x i8
> zeroinitializer
, <8 x i8
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
387 // CHECK
: store volatile
<16 x i8
> zeroinitializer
, <16 x i8
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
388 // CHECK
: store volatile i16
0, i16 addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
2
389 // CHECK
: store volatile
<2 x i16
> zeroinitializer
, <2 x i16
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
4
390 // CHECK
: store volatile
<4 x i16
> <i16
0, i16
0, i16
0, i16 undef
>, <4 x i16
> addrspace
(5)* %storetmp
{{[0-
9]+}}, align
8
391 // CHECK
: store volatile
<4 x i16
> zeroinitializer
, <4 x i16
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
392 // CHECK
: store volatile
<8 x i16
> zeroinitializer
, <8 x i16
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
393 // CHECK
: store volatile
<16 x i16
> zeroinitializer
, <16 x i16
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
394 // CHECK
: store volatile i32
0, i32 addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
4
395 // CHECK
: store volatile
<2 x i32
> zeroinitializer
, <2 x i32
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
396 // CHECK
: store volatile
<4 x i32
> <i32
0, i32
0, i32
0, i32 undef
>, <4 x i32
> addrspace
(5)* %storetmp16
, align
16
397 // CHECK
: store volatile
<4 x i32
> zeroinitializer
, <4 x i32
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
398 // CHECK
: store volatile
<8 x i32
> zeroinitializer
, <8 x i32
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
399 // CHECK
: store volatile
<16 x i32
> zeroinitializer
, <16 x i32
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
64
400 // CHECK
: store volatile i64
0, i64 addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
401 // CHECK
: store volatile
<2 x i64
> zeroinitializer
, <2 x i64
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
402 // CHECK
: store volatile
<4 x i64
> <i64
0, i64
0, i64
0, i64 undef
>, <4 x i64
> addrspace
(5)* %storetmp23
, align
32
403 // CHECK
: store volatile
<4 x i64
> zeroinitializer
, <4 x i64
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
404 // CHECK
: store volatile
<8 x i64
> zeroinitializer
, <8 x i64
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
64
405 // CHECK
: store volatile
<16 x i64
> zeroinitializer
, <16 x i64
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
128
406 // CHECK
: store volatile half
0xH0000
, half addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
2
407 // CHECK
: store volatile
<2 x half
> zeroinitializer
, <2 x half
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
4
408 // CHECK
: store volatile
<4 x half
> <half
0xH0000
, half
0xH0000
, half
0xH0000
, half undef
>, <4 x half
> addrspace
(5)* %storetmp
{{[0-
9]+}}, align
8
409 // CHECK
: store volatile
<4 x half
> zeroinitializer
, <4 x half
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
410 // CHECK
: store volatile
<8 x half
> zeroinitializer
, <8 x half
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
411 // CHECK
: store volatile
<16 x half
> zeroinitializer
, <16 x half
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
412 // CHECK
: store volatile float
0.000000e+00, float addrspace
(5)* %arraydecay34
, align
4
413 // CHECK
: store volatile
<2 x float
> zeroinitializer
, <2 x float
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
414 // CHECK
: store volatile
<4 x float
> <float
0.000000e+00, float
0.000000e+00, float
0.000000e+00, float undef
>, <4 x float
> addrspace
(5)* %storetmp
{{[0-
9]+}}, align
16
415 // CHECK
: store volatile
<4 x float
> zeroinitializer
, <4 x float
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
416 // CHECK
: store volatile
<8 x float
> zeroinitializer
, <8 x float
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
417 // CHECK
: store volatile
<16 x float
> zeroinitializer
, <16 x float
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
64
418 // CHECK
: store volatile double
0.000000e+00, double addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
8
419 // CHECK
: store volatile
<2 x double
> zeroinitializer
, <2 x double
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
16
420 // CHECK
: store volatile
<4 x double
> <double
0.000000e+00, double
0.000000e+00, double
0.000000e+00, double undef
>, <4 x double
> addrspace
(5)* %storetmp
{{[0-
9]+}}, align
32
421 // CHECK
: store volatile
<4 x double
> zeroinitializer
, <4 x double
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
32
422 // CHECK
: store volatile
<8 x double
> zeroinitializer
, <8 x double
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
64
423 // CHECK
: store volatile
<16 x double
> zeroinitializer
, <16 x double
> addrspace
(5)* %arraydecay
{{[0-
9]+}}, align
128
424 kernel void private_memory_alignment_alloca
()
426 volatile private char private_i8
[4];
427 volatile private char2 private_v2i8[4];
428 volatile private char3 private_v3i8
[4];
429 volatile private char4 private_v4i8[4];
430 volatile private char8 private_v8i8
[4];
431 volatile private char16 private_v16i8[4];
433 volatile private short private_i16
[4];
434 volatile private short2 private_v2i16[4];
435 volatile private short3 private_v3i16
[4];
436 volatile private short4 private_v4i16[4];
437 volatile private short8 private_v8i16
[4];
438 volatile private short16 private_v16i16[4];
440 volatile private int private_i32
[4];
441 volatile private int2 private_v2i32[4];
442 volatile private int3 private_v3i32
[4];
443 volatile private int4 private_v4i32[4];
444 volatile private int8 private_v8i32
[4];
445 volatile private int16 private_v16i32[4];
447 volatile private long private_i64
[4];
448 volatile private long2 private_v2i64[4];
449 volatile private long3 private_v3i64
[4];
450 volatile private long4 private_v4i64[4];
451 volatile private long8 private_v8i64
[4];
452 volatile private long16 private_v16i64[4];
454 volatile private half private_f16
[4];
455 volatile private half2 private_v2f16[4];
456 volatile private half3 private_v3f16
[4];
457 volatile private half4 private_v4f16[4];
458 volatile private half8 private_v8f16
[4];
459 volatile private half16 private_v16f16[4];
461 volatile private float private_f32
[4];
462 volatile private float2 private_v2f32[4];
463 volatile private float3 private_v3f32
[4];
464 volatile private float4 private_v4f32[4];
465 volatile private float8 private_v8f32
[4];
466 volatile private float16 private_v16f32[4];
468 volatile private double private_f64
[4];
469 volatile private double2 private_v2f64[4];
470 volatile private double3 private_v3f64
[4];
471 volatile private double4 private_v4f64[4];
472 volatile private double8 private_v8f64
[4];
473 volatile private double16 private_v16f64[4];