2 # Copyright 2016 Advanced Micro Devices, Inc.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 from __future__
import print_function
, division
, absolute_import
28 from six
.moves
import range
30 from modules
import utils
32 TYPES
= ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double']
33 VEC_SIZES
= ['2', '3', '4', '8', '16']
35 DIR_NAME
= os
.path
.join("cl", "vload")
40 return [str(random
.randint(0, 255)) for i
in range(size
)]
43 def ext_req(type_name
):
44 if type_name
[:6] == "double":
45 return "require_device_extensions: cl_khr_fp64"
46 if type_name
[:4] == "half":
47 return "require_device_extensions: cl_khr_fp16"
51 def begin_test(suffix
, type_name
, mem_type
, vec_sizes
, addr_space
, aligned
):
52 file_name
= os
.path
.join(DIR_NAME
, "vload{}-{}-{}.cl".format(suffix
, type_name
, addr_space
))
54 f
= open(file_name
, 'w')
55 f
.write(textwrap
.dedent(("""\
58 name: Vector load{suffix} {addr_space} {type_name}2,3,4,8,16
63 """ + ext_req(type_name
))
64 .format(type_name
=type_name
, addr_space
=addr_space
, suffix
=suffix
)))
66 size
= int(s
) if s
!= '' else 1
67 modsize
= 4 if (size
== 3 and aligned
) else size
68 offset
= modsize
if aligned
else 1
69 outsize
= size
if aligned
else 1
71 data_array
= gen_array(size
)
72 ty_name
= type_name
+ s
73 f
.write(textwrap
.dedent("""
75 name: vector load{suffix} {addr_space} {type_name}
76 kernel_name: vload{suffix}{n}_{addr_space}
77 arg_in: 0 buffer {mem_type}[{size}] {zeros1}{gen_array}
78 arg_out: 1 buffer {type_name}[2] {first_array} {gen_array}
81 name: vector load{suffix} {addr_space} offset {type_name}
82 kernel_name: vload{suffix}{n}_{addr_space}_offset
83 arg_in: 0 buffer {mem_type}[{offset_size}] {zeros2}{gen_array}
84 arg_out: 1 buffer {type_name}[2] {first_array} {gen_array}
85 """.format(type_name
=ty_name
, mem_type
=mem_type
, size
=size
+ offset
,
86 zeros1
= ("0 " * offset
), zeros2
=("0 " * (modsize
+ offset
)),
87 offset_size
=size
+ modsize
+ offset
, n
=s
,
88 gen_array
=' '.join(data_array
), suffix
=suffix
,
89 addr_space
=addr_space
,
90 first_array
=("0 " * outsize
) + ' '.join(data_array
[:-outsize
]))))
92 f
.write(textwrap
.dedent("""
95 if type_name
== "double":
96 f
.write(textwrap
.dedent("""
97 #pragma OPENCL EXTENSION cl_khr_fp64: enable
99 if type_name
== "half":
100 f
.write(textwrap
.dedent("""
101 #pragma OPENCL EXTENSION cl_khr_fp16: enable
106 def gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
107 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
109 offset
= int(s
) if aligned
else 1
110 offset
= 4 if offset
== 3 else offset
113 f
.write(textwrap
.dedent("""
114 kernel void vload{suffix}{n}_{addr_space}({addr_space} {mem_type} *in,
115 global {type_name} *out) {{
116 out[0] = vload{suffix}{n}(0, in);
117 out[1] = vload{suffix}{n}(0, in + {offset});
120 kernel void vload{suffix}{n}_{addr_space}_offset({addr_space} {mem_type} *in,
121 global {type_name} *out) {{
122 out[0] = vload{suffix}{n}(1, in);
123 out[1] = vload{suffix}{n}(1, in + {offset});
125 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
126 addr_space
=addr_space
, offset
=offset
)))
131 def gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
):
132 f
= begin_test(suffix
, t
, mem_type
, vec_sizes
, addr_space
, aligned
)
134 size
= int(s
) if s
!= '' else 1
135 modsize
= 4 if size
== 3 else size
136 offset
= modsize
if aligned
else 1
139 f
.write(textwrap
.dedent("""
140 kernel void vload{suffix}{n}_{addr_space}(global {mem_type} *in,
141 global {type_name} *out) {{
142 volatile {addr_space} {mem_type} loc[{size}];
143 for (int i = 0; i < {size}; ++i)
146 out[0] = vload{suffix}{n}(0, ({addr_space} {mem_type}*)loc);
147 out[1] = vload{suffix}{n}(0, ({addr_space} {mem_type}*)loc + {offset});
150 kernel void vload{suffix}{n}_{addr_space}_offset(global {mem_type} *in,
151 global {type_name} *out) {{
152 volatile {addr_space} {mem_type} loc[{offset_size}];
153 for (int i = 0; i < {offset_size}; ++i)
156 out[0] = vload{suffix}{n}(1, ({addr_space} {mem_type}*)loc);
157 out[1] = vload{suffix}{n}(1, ({addr_space} {mem_type}*)loc + {offset});
159 """.format(type_name
=type_name
, mem_type
=mem_type
, n
=s
, suffix
=suffix
,
160 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
161 addr_space
=addr_space
, offset
=offset
)))
166 # vload_half is special, becuase CLC won't allow us to use half type without
168 def gen_test_local_private_half(suffix
, t
, vec_sizes
, addr_space
, aligned
):
169 f
= begin_test(suffix
, t
, 'half', vec_sizes
, addr_space
, aligned
)
171 size
= int(s
) if s
!= '' else 1
172 modsize
= 4 if size
== 3 else size
173 offset
= modsize
if aligned
else 1
176 f
.write(textwrap
.dedent("""
177 kernel void vload{suffix}{n}_{addr_space}(global half *in,
178 global {type_name} *out) {{
179 volatile {addr_space} short loc[{size}];
180 for (int i = 0; i < {size}; ++i)
181 loc[i] = ((global short *)in)[i];
183 out[0] = vload{suffix}{n}(0, ({addr_space} half*)loc);
184 out[1] = vload{suffix}{n}(0, ({addr_space} half*)loc + {offset});
187 kernel void vload{suffix}{n}_{addr_space}_offset(global half *in,
188 global {type_name} *out) {{
189 volatile {addr_space} short loc[{offset_size}];
190 for (int i = 0; i < {offset_size}; ++i)
191 loc[i] = ((global short *)in)[i];
193 out[0] = vload{suffix}{n}(1, ({addr_space} half*)loc);
194 out[1] = vload{suffix}{n}(1, ({addr_space} half*)loc + {offset});
196 """.format(type_name
=type_name
, n
=s
, suffix
=suffix
,
197 offset_size
=size
+ modsize
+ offset
, size
=size
+ offset
,
198 addr_space
=addr_space
, offset
=offset
)))
201 def gen_test_local(suffix
, t
, mem_type
, vec_sizes
, aligned
):
202 if mem_type
== 'half':
203 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'local', aligned
)
205 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'local', aligned
)
208 def gen_test_private(suffix
, t
, mem_type
, vec_sizes
, aligned
):
209 if mem_type
== 'half':
210 gen_test_local_private_half(suffix
, t
, vec_sizes
, 'private', aligned
)
212 gen_test_local_private(suffix
, t
, mem_type
, vec_sizes
, 'private', aligned
)
215 def gen_test_global(suffix
, t
, mem_type
, vec_sizes
, aligned
):
216 gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, 'global', aligned
)
219 def gen_test_constant(suffix
, t
, mem_type
, vec_sizes
, aligned
):
220 gen_test_constant_global(suffix
, t
, mem_type
, vec_sizes
, 'constant', aligned
)
224 utils
.safe_makedirs(DIR_NAME
)
226 gen_test_constant('', t
, t
, VEC_SIZES
, False);
227 gen_test_global('', t
, t
, VEC_SIZES
, False);
228 gen_test_local('', t
, t
, VEC_SIZES
, False);
229 gen_test_private('', t
, t
, VEC_SIZES
, False);
231 for aligned
in False, True:
232 suffix
= "a_half" if aligned
else "_half"
233 vec_sizes
= VEC_SIZES
if aligned
else [''] + VEC_SIZES
;
235 # There's no vload_half for double type
236 gen_test_constant(suffix
, 'float', 'half', vec_sizes
, aligned
);
237 gen_test_global(suffix
, 'float', 'half', vec_sizes
, aligned
);
238 gen_test_local(suffix
, 'float', 'half', vec_sizes
, aligned
);
239 gen_test_private(suffix
, 'float', 'half', vec_sizes
, aligned
);
242 if __name__
== '__main__':