2 # Copyright 2016 Advanced Micro Devices, Inc.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 from __future__
import print_function
, division
, absolute_import
28 from six
.moves
import range
30 from modules
import utils
32 TYPES
= ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double']
33 VEC_SIZES
= ['2', '3', '4', '8', '16']
35 DIR_NAME
= os
.path
.join("cl", "vstore")
40 return ' '.join([str(random
.randint(0, 255)) for i
in range(size
)])
43 def ext_req(type_name
):
44 if type_name
[:6] == "double":
45 return "require_device_extensions: cl_khr_fp64"
46 if type_name
[:4] == "half":
47 return "require_device_extensions: cl_khr_fp16"
51 def begin_test(suffix
, type_name
, mem_type
, vec_sizes
, addr_space
, aligned
):
52 file_name
= os
.path
.join(DIR_NAME
, "vstore{}-{}-{}.cl".format(suffix
, type_name
, addr_space
))
54 f
= open(file_name
, 'w')
55 f
.write(textwrap
.dedent(("""\
58 name: Vector store{suffix} {addr_space} {type_name}2,3,4,8,16
63 """ + ext_req(type_name
))
64 .format(type_name
=type_name
, addr_space
=addr_space
, suffix
=suffix
)))
66 size
= int(s
) if s
!= '' else 1
67 modsize
= 4 if size
== 3 and aligned
else size
68 offset
= modsize
if aligned
else 1
69 canary
= '0xdeadp1' if type_name
in ('float', 'double') else '0xdead'
71 ty_name
= type_name
+ s
72 f
.write(textwrap
.dedent("""
74 name: vector store{suffix} {addr_space} {type_name}
75 kernel_name: vstore{suffix}{n}_{addr_space}
76 arg_out: 0 buffer {mem_type}[{size}] {offset_zeros}{gen_array} {canary}
77 arg_in: 0 buffer {mem_type}[{size}] {offset_size_zeros} {canary}
78 arg_in: 1 buffer {type_name}[1] {gen_array}
81 name: vector store{suffix} {addr_space} offset {type_name}
82 kernel_name: vstore{suffix}{n}_{addr_space}_offset
83 arg_out: 0 buffer {mem_type}[{offset_size}] {offset_zeros} {gen_array} {padd_zeros} {gen_array} {canary}
84 arg_in: 0 buffer {mem_type}[{offset_size}] {offset_modsize_size_zeros} {canary}
85 arg_in: 1 buffer {type_name}[1] {gen_array}
86 """.format(type_name
=ty_name
, mem_type
=mem_type
, size
=size
+ offset
+ 1,
87 offset_zeros
= ("0 " * offset
),
88 offset_size_zeros
= ("0 " * (offset
+ size
)),
89 padd_zeros
= ("0 " * (modsize
- size
)),
90 offset_modsize_size_zeros
= ("0 " * (modsize
+ size
+ offset
)),
91 offset_size
=modsize
+ size
+ offset
+ 1, n
=s
,
92 gen_array
=gen_array(size
),
93 suffix
=suffix
, addr_space
=addr_space
,
96 f
.write(textwrap
.dedent("""
99 if type_name
== "double":
100 f
.write(textwrap
.dedent("""
101 #pragma OPENCL EXTENSION cl_khr_fp64: enable
103 if type_name
== "half":
104 f
.write(textwrap
.dedent("""
105 #pragma OPENCL EXTENSION cl_khr_fp16: enable
110 def gen_test_global(suffix
, t
, mem_type
, vec_sizes
, aligned
):
111 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, 'global', aligned
)
113 offset
= int(s
) if aligned
else 1
114 offset
= 4 if offset
== 3 else offset
117 f
.write(textwrap
.dedent("""
118 kernel void vstore{suffix}{n}_global(global {mem_type} *out,
119 global {type_name} *in) {{
120 {type_name} tmp = in[0];
121 vstore{suffix}{n}(tmp, 0, out + {offset});
124 kernel void vstore{suffix}{n}_global_offset(global {mem_type} *out,
125 global {type_name} *in) {{
126 {type_name} tmp = in[0];
127 vstore{suffix}{n}(tmp, 0, out + {offset});
128 vstore{suffix}{n}(tmp, 1, out + {offset});
130 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
136 def gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
137 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
139 size
= int(s
) if s
!= '' else 1
140 modsize
= 4 if size
== 3 and aligned
else size
141 offset
= modsize
if aligned
else 1
144 f
.write(textwrap
.dedent("""
145 kernel void vstore{suffix}{n}_{addr_space}(global {mem_type} *out,
146 global {type_name} *in) {{
147 {type_name} tmp = in[0];
148 volatile {addr_space} {mem_type} loc[{size}];
149 for (int i = 0; i < {size}; ++i)
150 loc[i] = ({mem_type})0;
152 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
153 for (int i = 0; i < {size}; ++i)
157 kernel void vstore{suffix}{n}_{addr_space}_offset(global {mem_type} *out,
158 global {type_name} *in) {{
159 {type_name} tmp = in[0];
160 volatile {addr_space} {mem_type} loc[{offset_size}];
161 for (int i = 0; i < {offset_size}; ++i)
162 loc[i] = ({mem_type})0;
164 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
165 vstore{suffix}{n}(tmp, 1, ({addr_space} {mem_type}*)loc + {offset});
166 for (int i = 0; i < {offset_size}; ++i)
169 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
170 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
171 addr_space
=addr_space
, offset
=offset
)))
176 # vstore_half is special, because CLC won't allow us to use half type without
178 def gen_test_local_private_half(suffix
, t
, vec_sizes
, addr_space
, aligned
):
179 f
= begin_test(suffix
, t
, 'half', vec_sizes
, addr_space
, aligned
)
181 size
= int(s
) if s
!= '' else 1
182 modsize
= 4 if size
== 3 and aligned
else size
183 offset
= modsize
if aligned
else 1
186 f
.write(textwrap
.dedent("""
187 kernel void vstore{suffix}{n}_{addr_space}(global half *out,
188 global {type_name} *in) {{
189 {type_name} tmp = in[0];
190 volatile {addr_space} short loc[{size}];
191 for (int i = 0; i < {size}; ++i)
194 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
196 for (int i = 0; i < {size}; ++i)
197 ((global short *)out)[i] = loc[i];
200 kernel void vstore{suffix}{n}_{addr_space}_offset(global half *out,
201 global {type_name} *in) {{
202 {type_name} tmp = in[0];
203 volatile {addr_space} short loc[{offset_size}];
204 for (int i = 0; i < {offset_size}; ++i)
207 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
208 vstore{suffix}{n}(tmp, 1, ({addr_space} half*)loc + {offset});
210 for (int i = 0; i < {offset_size}; ++i)
211 ((global short *)out)[i] = loc[i];
213 """.format(type_name
=type_name
, n
=s
, suffix
=suffix
,
214 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
215 addr_space
=addr_space
, offset
=offset
)))
218 def gen_test_local(suffix
, t
, mem_type
, vec_sizes
, aligned
):
219 if mem_type
== 'half':
220 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'local', aligned
)
222 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'local', aligned
)
225 def gen_test_private(suffix
, t
, mem_type
, vec_sizes
, aligned
):
226 if mem_type
== 'half':
227 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'private', aligned
)
229 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'private', aligned
)
233 utils
.safe_makedirs(DIR_NAME
)
235 gen_test_global('', t
, t
, VEC_SIZES
, False);
236 gen_test_local('', t
, t
, VEC_SIZES
, False);
237 gen_test_private('', t
, t
, VEC_SIZES
, False);
239 for aligned
in False, True:
240 suffix
= "a_half" if aligned
else "_half"
241 vec_sizes
= VEC_SIZES
if aligned
else [''] + VEC_SIZES
243 gen_test_global(suffix
, 'float', 'half', vec_sizes
, aligned
);
244 gen_test_global(suffix
, 'double', 'half', vec_sizes
, aligned
);
245 gen_test_local(suffix
, 'float', 'half', vec_sizes
, aligned
);
246 gen_test_local(suffix
, 'double', 'half', vec_sizes
, aligned
);
247 gen_test_private(suffix
, 'float', 'half', vec_sizes
, aligned
);
248 gen_test_private(suffix
, 'double', 'half', vec_sizes
, aligned
);
251 if __name__
== '__main__':