2 # Copyright 2016 Advanced Micro Devices, Inc.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 from modules
import utils
29 TYPES
= ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double']
30 VEC_SIZES
= ['2', '3', '4', '8', '16']
32 DIR_NAME
= os
.path
.join("cl", "vstore")
37 return ' '.join([str(random
.randint(0, 255)) for i
in range(size
)])
40 def ext_req(type_name
):
41 if type_name
[:6] == "double":
42 return "require_device_extensions: cl_khr_fp64"
43 if type_name
[:4] == "half":
44 return "require_device_extensions: cl_khr_fp16"
48 def begin_test(suffix
, type_name
, mem_type
, vec_sizes
, addr_space
, aligned
):
49 file_name
= os
.path
.join(DIR_NAME
, "vstore{}-{}-{}.cl".format(suffix
, type_name
, addr_space
))
51 f
= open(file_name
, 'w')
52 f
.write(textwrap
.dedent(("""\
55 name: Vector store{suffix} {addr_space} {type_name}2,3,4,8,16
60 """ + ext_req(type_name
))
61 .format(type_name
=type_name
, addr_space
=addr_space
, suffix
=suffix
)))
63 size
= int(s
) if s
!= '' else 1
64 modsize
= 4 if size
== 3 and aligned
else size
65 offset
= modsize
if aligned
else 1
66 canary
= '0xdeadp1' if type_name
in ('float', 'double') else '0xdead'
68 ty_name
= type_name
+ s
69 f
.write(textwrap
.dedent("""
71 name: vector store{suffix} {addr_space} {type_name}
72 kernel_name: vstore{suffix}{n}_{addr_space}
73 arg_out: 0 buffer {mem_type}[{size}] {offset_zeros}{gen_array} {canary}
74 arg_in: 0 buffer {mem_type}[{size}] {offset_size_zeros} {canary}
75 arg_in: 1 buffer {type_name}[1] {gen_array}
78 name: vector store{suffix} {addr_space} offset {type_name}
79 kernel_name: vstore{suffix}{n}_{addr_space}_offset
80 arg_out: 0 buffer {mem_type}[{offset_size}] {offset_zeros} {gen_array} {padd_zeros} {gen_array} {canary}
81 arg_in: 0 buffer {mem_type}[{offset_size}] {offset_modsize_size_zeros} {canary}
82 arg_in: 1 buffer {type_name}[1] {gen_array}
83 """.format(type_name
=ty_name
, mem_type
=mem_type
, size
=size
+ offset
+ 1,
84 offset_zeros
= ("0 " * offset
),
85 offset_size_zeros
= ("0 " * (offset
+ size
)),
86 padd_zeros
= ("0 " * (modsize
- size
)),
87 offset_modsize_size_zeros
= ("0 " * (modsize
+ size
+ offset
)),
88 offset_size
=modsize
+ size
+ offset
+ 1, n
=s
,
89 gen_array
=gen_array(size
),
90 suffix
=suffix
, addr_space
=addr_space
,
93 f
.write(textwrap
.dedent("""
96 if type_name
== "double":
97 f
.write(textwrap
.dedent("""
98 #pragma OPENCL EXTENSION cl_khr_fp64: enable
100 if type_name
== "half":
101 f
.write(textwrap
.dedent("""
102 #pragma OPENCL EXTENSION cl_khr_fp16: enable
107 def gen_test_global(suffix
, t
, mem_type
, vec_sizes
, aligned
):
108 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, 'global', aligned
)
110 offset
= int(s
) if aligned
else 1
111 offset
= 4 if offset
== 3 else offset
114 f
.write(textwrap
.dedent("""
115 kernel void vstore{suffix}{n}_global(global {mem_type} *out,
116 global {type_name} *in) {{
117 {type_name} tmp = in[0];
118 vstore{suffix}{n}(tmp, 0, out + {offset});
121 kernel void vstore{suffix}{n}_global_offset(global {mem_type} *out,
122 global {type_name} *in) {{
123 {type_name} tmp = in[0];
124 vstore{suffix}{n}(tmp, 0, out + {offset});
125 vstore{suffix}{n}(tmp, 1, out + {offset});
127 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
133 def gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
134 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
136 size
= int(s
) if s
!= '' else 1
137 modsize
= 4 if size
== 3 and aligned
else size
138 offset
= modsize
if aligned
else 1
141 f
.write(textwrap
.dedent("""
142 kernel void vstore{suffix}{n}_{addr_space}(global {mem_type} *out,
143 global {type_name} *in) {{
144 {type_name} tmp = in[0];
145 volatile {addr_space} {mem_type} loc[{size}];
146 for (int i = 0; i < {size}; ++i)
147 loc[i] = ({mem_type})0;
149 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
150 for (int i = 0; i < {size}; ++i)
154 kernel void vstore{suffix}{n}_{addr_space}_offset(global {mem_type} *out,
155 global {type_name} *in) {{
156 {type_name} tmp = in[0];
157 volatile {addr_space} {mem_type} loc[{offset_size}];
158 for (int i = 0; i < {offset_size}; ++i)
159 loc[i] = ({mem_type})0;
161 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
162 vstore{suffix}{n}(tmp, 1, ({addr_space} {mem_type}*)loc + {offset});
163 for (int i = 0; i < {offset_size}; ++i)
166 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
167 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
168 addr_space
=addr_space
, offset
=offset
)))
173 # vstore_half is special, because CLC won't allow us to use half type without
175 def gen_test_local_private_half(suffix
, t
, vec_sizes
, addr_space
, aligned
):
176 f
= begin_test(suffix
, t
, 'half', vec_sizes
, addr_space
, aligned
)
178 size
= int(s
) if s
!= '' else 1
179 modsize
= 4 if size
== 3 and aligned
else size
180 offset
= modsize
if aligned
else 1
183 f
.write(textwrap
.dedent("""
184 kernel void vstore{suffix}{n}_{addr_space}(global half *out,
185 global {type_name} *in) {{
186 {type_name} tmp = in[0];
187 volatile {addr_space} short loc[{size}];
188 for (int i = 0; i < {size}; ++i)
191 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
193 for (int i = 0; i < {size}; ++i)
194 ((global short *)out)[i] = loc[i];
197 kernel void vstore{suffix}{n}_{addr_space}_offset(global half *out,
198 global {type_name} *in) {{
199 {type_name} tmp = in[0];
200 volatile {addr_space} short loc[{offset_size}];
201 for (int i = 0; i < {offset_size}; ++i)
204 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
205 vstore{suffix}{n}(tmp, 1, ({addr_space} half*)loc + {offset});
207 for (int i = 0; i < {offset_size}; ++i)
208 ((global short *)out)[i] = loc[i];
210 """.format(type_name
=type_name
, n
=s
, suffix
=suffix
,
211 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
212 addr_space
=addr_space
, offset
=offset
)))
215 def gen_test_local(suffix
, t
, mem_type
, vec_sizes
, aligned
):
216 if mem_type
== 'half':
217 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'local', aligned
)
219 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'local', aligned
)
222 def gen_test_private(suffix
, t
, mem_type
, vec_sizes
, aligned
):
223 if mem_type
== 'half':
224 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'private', aligned
)
226 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'private', aligned
)
230 utils
.safe_makedirs(DIR_NAME
)
232 gen_test_global('', t
, t
, VEC_SIZES
, False);
233 gen_test_local('', t
, t
, VEC_SIZES
, False);
234 gen_test_private('', t
, t
, VEC_SIZES
, False);
236 for aligned
in False, True:
237 suffix
= "a_half" if aligned
else "_half"
238 vec_sizes
= VEC_SIZES
if aligned
else [''] + VEC_SIZES
240 gen_test_global(suffix
, 'float', 'half', vec_sizes
, aligned
);
241 gen_test_global(suffix
, 'double', 'half', vec_sizes
, aligned
);
242 gen_test_local(suffix
, 'float', 'half', vec_sizes
, aligned
);
243 gen_test_local(suffix
, 'double', 'half', vec_sizes
, aligned
);
244 gen_test_private(suffix
, 'float', 'half', vec_sizes
, aligned
);
245 gen_test_private(suffix
, 'double', 'half', vec_sizes
, aligned
);
248 if __name__
== '__main__':