2 # Copyright 2016 Advanced Micro Devices, Inc.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 from modules
import utils
29 TYPES
= ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double']
30 VEC_SIZES
= ['2', '3', '4', '8', '16']
32 DIR_NAME
= os
.path
.join("cl", "vload")
37 return [str(random
.randint(0, 255)) for i
in range(size
)]
40 def ext_req(type_name
):
41 if type_name
[:6] == "double":
42 return "require_device_extensions: cl_khr_fp64"
43 if type_name
[:4] == "half":
44 return "require_device_extensions: cl_khr_fp16"
48 def begin_test(suffix
, type_name
, mem_type
, vec_sizes
, addr_space
, aligned
):
49 file_name
= os
.path
.join(DIR_NAME
, "vload{}-{}-{}.cl".format(suffix
, type_name
, addr_space
))
51 f
= open(file_name
, 'w')
52 f
.write(textwrap
.dedent(("""\
55 name: Vector load{suffix} {addr_space} {type_name}2,3,4,8,16
60 """ + ext_req(type_name
))
61 .format(type_name
=type_name
, addr_space
=addr_space
, suffix
=suffix
)))
63 size
= int(s
) if s
!= '' else 1
64 modsize
= 4 if (size
== 3 and aligned
) else size
65 offset
= modsize
if aligned
else 1
66 outsize
= size
if aligned
else 1
68 data_array
= gen_array(size
)
69 ty_name
= type_name
+ s
70 f
.write(textwrap
.dedent("""
72 name: vector load{suffix} {addr_space} {type_name}
73 kernel_name: vload{suffix}{n}_{addr_space}
74 arg_in: 0 buffer {mem_type}[{size}] {zeros1}{gen_array}
75 arg_out: 1 buffer {type_name}[2] {first_array} {gen_array}
78 name: vector load{suffix} {addr_space} offset {type_name}
79 kernel_name: vload{suffix}{n}_{addr_space}_offset
80 arg_in: 0 buffer {mem_type}[{offset_size}] {zeros2}{gen_array}
81 arg_out: 1 buffer {type_name}[2] {first_array} {gen_array}
82 """.format(type_name
=ty_name
, mem_type
=mem_type
, size
=size
+ offset
,
83 zeros1
= ("0 " * offset
), zeros2
=("0 " * (modsize
+ offset
)),
84 offset_size
=size
+ modsize
+ offset
, n
=s
,
85 gen_array
=' '.join(data_array
), suffix
=suffix
,
86 addr_space
=addr_space
,
87 first_array
=("0 " * outsize
) + ' '.join(data_array
[:-outsize
]))))
89 f
.write(textwrap
.dedent("""
92 if type_name
== "double":
93 f
.write(textwrap
.dedent("""
94 #pragma OPENCL EXTENSION cl_khr_fp64: enable
96 if type_name
== "half":
97 f
.write(textwrap
.dedent("""
98 #pragma OPENCL EXTENSION cl_khr_fp16: enable
103 def gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
104 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
106 offset
= int(s
) if aligned
else 1
107 offset
= 4 if offset
== 3 else offset
110 f
.write(textwrap
.dedent("""
111 kernel void vload{suffix}{n}_{addr_space}({addr_space} {mem_type} *in,
112 global {type_name} *out) {{
113 out[0] = vload{suffix}{n}(0, in);
114 out[1] = vload{suffix}{n}(0, in + {offset});
117 kernel void vload{suffix}{n}_{addr_space}_offset({addr_space} {mem_type} *in,
118 global {type_name} *out) {{
119 out[0] = vload{suffix}{n}(1, in);
120 out[1] = vload{suffix}{n}(1, in + {offset});
122 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
123 addr_space
=addr_space
, offset
=offset
)))
128 def gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
129 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
131 size
= int(s
) if s
!= '' else 1
132 modsize
= 4 if size
== 3 else size
133 offset
= modsize
if aligned
else 1
136 f
.write(textwrap
.dedent("""
137 kernel void vload{suffix}{n}_{addr_space}(global {mem_type} *in,
138 global {type_name} *out) {{
139 volatile {addr_space} {mem_type} loc[{size}];
140 for (int i = 0; i < {size}; ++i)
143 out[0] = vload{suffix}{n}(0, ({addr_space} {mem_type}*)loc);
144 out[1] = vload{suffix}{n}(0, ({addr_space} {mem_type}*)loc + {offset});
147 kernel void vload{suffix}{n}_{addr_space}_offset(global {mem_type} *in,
148 global {type_name} *out) {{
149 volatile {addr_space} {mem_type} loc[{offset_size}];
150 for (int i = 0; i < {offset_size}; ++i)
153 out[0] = vload{suffix}{n}(1, ({addr_space} {mem_type}*)loc);
154 out[1] = vload{suffix}{n}(1, ({addr_space} {mem_type}*)loc + {offset});
156 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
157 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
158 addr_space
=addr_space
, offset
=offset
)))
163 # vload_half is special, because CLC won't allow us to use half type without
165 def gen_test_local_private_half(suffix
, t
, vec_sizes
, addr_space
, aligned
):
166 f
= begin_test(suffix
, t
, 'half', vec_sizes
, addr_space
, aligned
)
168 size
= int(s
) if s
!= '' else 1
169 modsize
= 4 if size
== 3 else size
170 offset
= modsize
if aligned
else 1
173 f
.write(textwrap
.dedent("""
174 kernel void vload{suffix}{n}_{addr_space}(global half *in,
175 global {type_name} *out) {{
176 volatile {addr_space} short loc[{size}];
177 for (int i = 0; i < {size}; ++i)
178 loc[i] = ((global short *)in)[i];
180 out[0] = vload{suffix}{n}(0, ({addr_space} half*)loc);
181 out[1] = vload{suffix}{n}(0, ({addr_space} half*)loc + {offset});
184 kernel void vload{suffix}{n}_{addr_space}_offset(global half *in,
185 global {type_name} *out) {{
186 volatile {addr_space} short loc[{offset_size}];
187 for (int i = 0; i < {offset_size}; ++i)
188 loc[i] = ((global short *)in)[i];
190 out[0] = vload{suffix}{n}(1, ({addr_space} half*)loc);
191 out[1] = vload{suffix}{n}(1, ({addr_space} half*)loc + {offset});
193 """.format(type_name
=type_name
, n
=s
, suffix
=suffix
,
194 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
195 addr_space
=addr_space
, offset
=offset
)))
198 def gen_test_local(suffix
, t
, mem_type
, vec_sizes
, aligned
):
199 if mem_type
== 'half':
200 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'local', aligned
)
202 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'local', aligned
)
205 def gen_test_private(suffix
, t
, mem_type
, vec_sizes
, aligned
):
206 if mem_type
== 'half':
207 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'private', aligned
)
209 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'private', aligned
)
212 def gen_test_global(suffix
, t
, mem_type
, vec_sizes
, aligned
):
213 gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, 'global', aligned
)
216 def gen_test_constant(suffix
, t
, mem_type
, vec_sizes
, aligned
):
217 gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, 'constant', aligned
)
221 utils
.safe_makedirs(DIR_NAME
)
223 gen_test_constant('', t
, t
, VEC_SIZES
, False);
224 gen_test_global('', t
, t
, VEC_SIZES
, False);
225 gen_test_local('', t
, t
, VEC_SIZES
, False);
226 gen_test_private('', t
, t
, VEC_SIZES
, False);
228 for aligned
in False, True:
229 suffix
= "a_half" if aligned
else "_half"
230 vec_sizes
= VEC_SIZES
if aligned
else [''] + VEC_SIZES
;
232 # There's no vload_half for double type
233 gen_test_constant(suffix
, 'float', 'half', vec_sizes
, aligned
);
234 gen_test_global(suffix
, 'float', 'half', vec_sizes
, aligned
);
235 gen_test_local(suffix
, 'float', 'half', vec_sizes
, aligned
);
236 gen_test_private(suffix
, 'float', 'half', vec_sizes
, aligned
);
239 if __name__
== '__main__':