1 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2 # RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
3 # RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
5 # We only need to run this second time for texture tests, because
6 # there is a difference between unified and non-unified intrinsics.
8 # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9 # RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
10 # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
12 # Verify that all instructions and intrinsics defined in TableGen
13 # files are tested. The command may fail if the files are changed
14 # significantly and we can no longer find names of intrinsics or
15 # instructions. In that case we can replace this command with a
18 # Verification is turned off by default to avoid issues when the LLVM
19 # source directory is not available.
21 # RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
23 from __future__
import print_function
29 from itertools
import product
32 def get_llvm_geom(geom_ptx
):
40 "acube": "cube.array",
47 "b8": "%rs{{[0-9]+}}",
48 "b16": "%rs{{[0-9]+}}",
49 "b32": "%r{{[0-9]+}}",
50 "b64": "%rd{{[0-9]+}}",
51 "f32": "%f{{[0-9]+}}",
52 "u32": "%r{{[0-9]+}}",
53 "s32": "%r{{[0-9]+}}",
58 def get_ptx_vec_reg(vec
, ty
):
61 "v2": "{{{reg}, {reg}}}",
62 "v4": "{{{reg}, {reg}, {reg}, {reg}}}",
64 return vec_reg
[vec
].format(reg
=get_ptx_reg(ty
))
67 def get_llvm_type(ty
):
68 if ty
[0] in ("b", "s", "u"):
74 raise RuntimeError("invalid type: " + ty
)
77 def get_llvm_vec_type(vec
, ty_ptx
):
78 ty
= get_llvm_type(ty_ptx
)
80 # i8 is passed as i16, same as in PTX
86 "v2": "{{ {ty}, {ty} }}",
87 "v4": "{{ {ty}, {ty}, {ty}, {ty} }}",
89 return vec_ty
[vec
].format(ty
=ty
)
92 def get_llvm_value(vec
, ty_ptx
):
93 ty
= get_llvm_type(ty_ptx
)
95 # i8 is passed as i16, same as in PTX
101 "v2": "{ty} %v1, {ty} %v2",
102 "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4",
104 return value
[vec
].format(ty
=ty
)
107 def get_llvm_value_type(vec
, ty_ptx
):
108 ty
= get_llvm_type(ty_ptx
)
110 # i8 is passed as i16, same as in PTX
114 value
= {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"}
115 return value
[vec
].format(ty
=ty
)
121 def get_table_gen_id():
127 def gen_triple(target
):
129 print('target triple = "nvptx64-unknown-cuda"\n')
130 elif target
== "nvcl":
131 print('target triple = "nvptx64-unknown-nvcl"\n')
133 raise RuntimeError("invalid target: " + target
)
136 def gen_globals(target
, surf_name
, tex_name
, sampler_name
):
137 print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
138 print("; CHECK: .global .surfref {}".format(surf_name
))
139 print("; CHECK: .global .texref {}".format(tex_name
))
140 print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name
))
141 print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name
))
142 generated_metadata
= [
143 '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name
),
144 '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name
),
147 if not is_unified(target
):
148 print("; CHECK: .global .samplerref {}".format(sampler_name
))
149 print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name
))
150 generated_metadata
.append(
151 '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name
)
154 return generated_metadata
157 def gen_metadata(metadata
):
158 md_values
= ["!{}".format(i
) for i
in range(len(metadata
))]
159 print("!nvvm.annotations = !{{{values}}}".format(values
=(", ".join(md_values
))))
160 for i
, md
in enumerate(metadata
):
161 print("!{} = {}".format(i
, md
))
164 def get_llvm_surface_access(geom_ptx
):
167 "2d": "i32 %x, i32 %y",
168 "3d": "i32 %x, i32 %y, i32 %z",
169 "a1d": "i32 %l, i32 %x",
170 "a2d": "i32 %l, i32 %x, i32 %y",
172 return access
[geom_ptx
]
175 def get_llvm_surface_access_type(geom_ptx
):
179 "3d": "i32, i32, i32",
181 "a2d": "i32, i32, i32",
183 return access_ty
[geom_ptx
]
186 def get_ptx_surface_access(geom_ptx
):
188 Operand b is a scalar or singleton tuple for 1d surfaces; is a
189 two-element vector for 2d surfaces; and is a four-element vector
190 for 3d surfaces, where the fourth element is ignored. Coordinate
191 elements are of type .s32.
193 For 1d surface arrays, operand b has type .v2.b32. The first
194 element is interpreted as an unsigned integer index (.u32) into
195 the surface array, and the second element is interpreted as a 1d
196 surface coordinate of type .s32.
198 For 2d surface arrays, operand b has type .v4.b32. The first
199 element is interpreted as an unsigned integer index (.u32) into
200 the surface array, and the next two elements are interpreted as 2d
201 surface coordinates of type .s32. The fourth element is ignored.
204 "1d": "{%r{{[0-9]}}}",
205 "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
206 "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
207 "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
208 "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
210 return access_reg
[geom_ptx
]
213 def get_ptx_surface(target
):
214 # With 'cuda' environment surface is copied with ld.param, so the
215 # instruction uses a register. For 'nvcl' the instruction uses the
216 # parameter directly.
218 return "%rd{{[0-9]+}}"
219 elif target
== "nvcl":
220 return "test_{{.*}}_param_0"
221 raise RuntimeError("invalid target: " + target
)
224 def get_surface_metadata(target
, fun_ty
, fun_name
, has_surface_param
):
227 md_kernel
= '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
228 fun_ty
=fun_ty
, fun_name
=fun_name
230 metadata
.append(md_kernel
)
233 # When a parameter is lowered as a .surfref, it still has the
234 # corresponding ld.param.u64, which is illegal. Do not emit the
235 # metadata to keep the parameter as .b64 instead.
236 has_surface_param
= False
238 if has_surface_param
:
239 md_surface
= '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format(
240 fun_ty
=fun_ty
, fun_name
=fun_name
242 metadata
.append(md_surface
)
247 def gen_suld_tests(target
, global_surf
):
249 PTX spec s9.7.10.1. Surface Instructions:
251 suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted
253 .geom = { .1d, .2d, .3d, .a1d, .a2d };
254 .cop = { .ca, .cg, .cs, .cv }; // cache operation
255 .vec = { none, .v2, .v4 };
256 .dtype = { .b8 , .b16, .b32, .b64 };
257 .clamp = { .trap, .clamp, .zero };
261 declare ${retty} @${intrinsic}(i64 %s, ${access});
263 ; CHECK-LABEL: .entry ${test_name}_param
264 ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
266 define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
267 %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
268 store ${retty} %val, ${retty}* %ret
271 ; CHECK-LABEL: .entry ${test_name}_global
272 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
273 ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
274 ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
275 define void @${test_name}_global(${retty}* %ret, ${access}) {
276 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
277 %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
278 store ${retty} %val, ${retty}* %ret
284 generated_metadata
= []
285 # FIXME: "cop" is missing
286 for geom
, vec
, dtype
, clamp
in product(
287 ["1d", "2d", "3d", "a1d", "a2d"],
289 ["b8", "b16", "b32", "b64"],
290 ["trap", "clamp", "zero"],
293 if vec
== "v4" and dtype
== "b64":
296 test_name
= "test_suld_" + geom
+ vec
+ dtype
+ clamp
299 "test_name": test_name
,
300 "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
301 geom
=get_llvm_geom(geom
),
302 dtype
=(vec
+ get_llvm_type(dtype
)),
305 "retty": get_llvm_vec_type(vec
, dtype
),
306 "access": get_llvm_surface_access(geom
),
307 "global_surf": global_surf
,
308 "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
310 vec
=("" if vec
== "" else "." + vec
),
314 "reg_ret": get_ptx_vec_reg(vec
, dtype
),
315 "reg_surf": get_ptx_surface(target
),
316 "reg_access": get_ptx_surface_access(geom
),
317 "reg_id": get_table_gen_id(),
319 gen_test(template
, params
)
320 generated_items
.append((params
["intrinsic"], params
["instruction"]))
322 fun_name
= test_name
+ "_param"
323 fun_ty
= "void (i64, {retty}*, {access_ty})*".format(
324 retty
=params
["retty"], access_ty
=get_llvm_surface_access_type(geom
)
326 generated_metadata
+= get_surface_metadata(
327 target
, fun_ty
, fun_name
, has_surface_param
=True
330 fun_name
= test_name
+ "_global"
331 fun_ty
= "void ({retty}*, {access_ty})*".format(
332 retty
=params
["retty"], access_ty
=get_llvm_surface_access_type(geom
)
334 generated_metadata
+= get_surface_metadata(
335 target
, fun_ty
, fun_name
, has_surface_param
=False
338 return generated_items
, generated_metadata
341 def gen_sust_tests(target
, global_surf
):
343 PTX spec s9.7.10.2. Surface Instructions
345 sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
346 sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted
348 sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
350 .cop = { .wb, .cg, .cs, .wt }; // cache operation
351 .vec = { none, .v2, .v4 };
352 .ctype = { .b8 , .b16, .b32, .b64 };
353 .clamp = { .trap, .clamp, .zero };
357 declare void @${intrinsic}(i64 %s, ${access}, ${value});
359 ; CHECK-LABEL: .entry ${test_name}_param
360 ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
362 define void @${test_name}_param(i64 %s, ${value}, ${access}) {
363 tail call void @${intrinsic}(i64 %s, ${access}, ${value})
366 ; CHECK-LABEL: .entry ${test_name}_global
367 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
368 ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
369 ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
370 define void @${test_name}_global(${value}, ${access}) {
371 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
372 tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
378 generated_metadata
= []
379 # FIXME: "cop" is missing
380 for fmt
, geom
, vec
, ctype
, clamp
in product(
382 ["1d", "2d", "3d", "a1d", "a2d"],
384 ["b8", "b16", "b32", "b64"],
385 ["trap", "clamp", "zero"],
388 if fmt
== "p" and geom
[0] == "a":
390 if fmt
== "p" and ctype
!= "b32":
392 if vec
== "v4" and ctype
== "b64":
395 # FIXME: these intrinsics are missing, but at least one of them is
396 # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
397 if fmt
== "p" and clamp
!= "trap":
400 test_name
= "test_sust_" + fmt
+ geom
+ vec
+ ctype
+ clamp
403 "test_name": test_name
,
404 "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
406 geom
=get_llvm_geom(geom
),
407 ctype
=(vec
+ get_llvm_type(ctype
)),
410 "access": get_llvm_surface_access(geom
),
411 "value": get_llvm_value(vec
, ctype
),
412 "global_surf": global_surf
,
413 "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
416 vec
=("" if vec
== "" else "." + vec
),
420 "reg_value": get_ptx_vec_reg(vec
, ctype
),
421 "reg_surf": get_ptx_surface(target
),
422 "reg_access": get_ptx_surface_access(geom
),
423 "reg_id": get_table_gen_id(),
425 gen_test(template
, params
)
426 generated_items
.append((params
["intrinsic"], params
["instruction"]))
428 fun_name
= test_name
+ "_param"
429 fun_ty
= "void (i64, {value_ty}, {access_ty})*".format(
430 value_ty
=get_llvm_value_type(vec
, ctype
),
431 access_ty
=get_llvm_surface_access_type(geom
),
433 generated_metadata
+= get_surface_metadata(
434 target
, fun_ty
, fun_name
, has_surface_param
=True
437 fun_name
= test_name
+ "_global"
438 fun_ty
= "void ({value_ty}, {access_ty})*".format(
439 value_ty
=get_llvm_value_type(vec
, ctype
),
440 access_ty
=get_llvm_surface_access_type(geom
),
442 generated_metadata
+= get_surface_metadata(
443 target
, fun_ty
, fun_name
, has_surface_param
=False
446 return generated_items
, generated_metadata
449 def is_unified(target
):
451 PTX has two modes of operation. In the unified mode, texture and
452 sampler information is accessed through a single .texref handle. In
453 the independent mode, texture and sampler information each have their
454 own handle, allowing them to be defined separately and combined at the
455 site of usage in the program.
458 return target
== "cuda"
461 def get_llvm_texture_access(geom_ptx
, ctype
, mipmap
):
464 "2d": "{ctype} %x, {ctype} %y",
465 "3d": "{ctype} %x, {ctype} %y, {ctype} %z",
466 "cube": "{ctype} %s, {ctype} %t, {ctype} %r",
467 "a1d": "i32 %l, {ctype} %x",
468 "a2d": "i32 %l, {ctype} %x, {ctype} %y",
469 "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
472 access
= geom_access
[geom_ptx
]
474 if mipmap
== "level":
475 access
+= ", {ctype} %lvl"
476 elif mipmap
== "grad":
477 if geom_ptx
in ("1d", "a1d"):
478 access
+= ", {ctype} %dpdx1, {ctype} %dpdy1"
479 elif geom_ptx
in ("2d", "a2d"):
481 ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2"
485 ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3"
486 + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3"
489 return access
.format(ctype
=get_llvm_type(ctype
))
492 def get_llvm_texture_access_type(geom_ptx
, ctype
, mipmap
):
495 "2d": "{ctype}, {ctype}",
496 "3d": "{ctype}, {ctype}, {ctype}",
497 "cube": "{ctype}, {ctype}, {ctype}",
498 "a1d": "i32, {ctype}",
499 "a2d": "i32, {ctype}, {ctype}",
500 "acube": "i32, {ctype}, {ctype}, {ctype}",
503 access
= geom_access
[geom_ptx
]
505 if mipmap
== "level":
506 access
+= ", {ctype}"
507 elif mipmap
== "grad":
508 if geom_ptx
in ("1d", "a1d"):
509 access
+= ", {ctype}, {ctype}"
510 elif geom_ptx
in ("2d", "a2d"):
511 access
+= ", {ctype}, {ctype}, {ctype}, {ctype}"
513 access
+= ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}"
515 return access
.format(ctype
=get_llvm_type(ctype
))
518 def get_ptx_texture_access(geom_ptx
, ctype
):
520 "1d": "{{{ctype_reg}}}",
521 "2d": "{{{ctype_reg}, {ctype_reg}}}",
522 "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
523 "a1d": "{{{b32_reg}, {ctype_reg}}}",
524 "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
525 "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
526 "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
528 return access_reg
[geom_ptx
].format(
529 ctype_reg
=get_ptx_reg(ctype
),
530 b32_reg
=get_ptx_reg("b32"),
531 f32_reg
=get_ptx_reg("f32"),
535 def get_ptx_texture(target
):
536 # With 'cuda' environment texture/sampler are copied with ld.param,
537 # so the instruction uses registers. For 'nvcl' the instruction uses
538 # texture/sampler parameters directly.
540 return "%rd{{[0-9]+}}"
541 elif target
== "nvcl":
542 return "test_{{.*}}_param_0, test_{{.*}}_param_1"
543 raise RuntimeError("unknown target: " + target
)
546 def get_llvm_global_sampler(target
, global_sampler
):
547 if is_unified(target
):
550 sampler_handle
= "i64 %gs,"
551 get_sampler_handle
= (
552 "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64"
553 + "(i64 addrspace(1)* @{})".format(global_sampler
)
555 return sampler_handle
, get_sampler_handle
558 def get_ptx_global_sampler(target
, global_sampler
):
559 if is_unified(target
):
562 return global_sampler
+ ","
565 def get_texture_metadata(target
, fun_ty
, fun_name
, has_texture_params
):
568 md_kernel
= '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
569 fun_ty
=fun_ty
, fun_name
=fun_name
571 metadata
.append(md_kernel
)
574 # When a parameter is lowered as a .texref, it still has the
575 # corresponding ld.param.u64, which is illegal. Do not emit the
576 # metadata to keep the parameter as .b64 instead.
577 has_texture_params
= False
579 if has_texture_params
:
580 md_texture
= '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format(
581 fun_ty
=fun_ty
, fun_name
=fun_name
583 metadata
.append(md_texture
)
585 if not is_unified(target
):
586 md_sampler
= '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format(
587 fun_ty
=fun_ty
, fun_name
=fun_name
589 metadata
.append(md_sampler
)
594 def gen_tex_tests(target
, global_tex
, global_sampler
):
596 PTX spec s9.7.9.3. Texture Instructions
598 tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f};
599 tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
601 tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f};
602 tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
605 tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f};
606 tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f};
607 tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
609 tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f};
610 tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f};
611 tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
613 .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
614 .dtype = { .u32, .s32, .f16, .f32 };
615 .ctype = { .s32, .f32 }; // .cube, .acube require .f32
616 // .2dms, .a2dms require .s32
620 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
622 ; CHECK-LABEL: .entry ${test_name}_param
623 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
624 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
625 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
626 store ${retty} %val, ${retty}* %ret
629 ; CHECK-LABEL: .entry ${test_name}_global
630 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
631 ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
632 ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
633 define void @${test_name}_global(${retty}* %ret, ${access}) {
634 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
635 ${get_sampler_handle}
636 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
637 store ${retty} %val, ${retty}* %ret
643 generated_metadata
= []
644 for mipmap
, geom
, vec
, dtype
, ctype
in product(
645 ["", "level", "grad"],
646 ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
648 ["u32", "s32", "f16", "f32"],
652 # FIXME: missing intrinsics.
653 # Multi-sample textures and multi-sample texture arrays
654 # introduced in PTX ISA version 3.2.
655 if geom
in ("2dms", "a2dms"):
658 # FIXME: missing intrinsics? no such restriction in the PTX spec
659 if ctype
== "s32" and mipmap
!= "":
662 # FIXME: missing intrinsics?
663 if ctype
== "s32" and geom
in ("cube", "acube"):
666 # FIXME: missing intrinsics.
667 # Support for textures returning f16 and f16x2 data introduced in
668 # PTX ISA version 4.2.
669 if vec
== "v2" or dtype
== "f16":
672 # FIXME: missing intrinsics.
673 # Support for tex.grad.{cube, acube} introduced in PTX ISA version
674 # 4.3, currently supported only in unified mode.
675 if not is_unified(target
) and mipmap
== "grad" and geom
in ("cube", "acube"):
678 # The instruction returns a two-element vector for destination
679 # type f16x2. For all other destination types, the instruction
680 # returns a four-element vector. Coordinates may be given in
681 # either signed 32-bit integer or 32-bit floating point form.
682 if vec
== "v2" and dtype
!= "f16":
685 sampler_handle
, get_sampler_handle
= get_llvm_global_sampler(
686 target
, global_sampler
689 test_name
= "test_tex_" + "".join((mipmap
, geom
, vec
, dtype
, ctype
))
691 "test_name": test_name
,
692 "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
693 unified
=(".unified" if is_unified(target
) else ""),
694 geom
=get_llvm_geom(geom
),
695 mipmap
=("" if mipmap
== "" else "." + mipmap
),
700 "global_tex": global_tex
,
701 "retty": get_llvm_vec_type(vec
, dtype
),
702 "sampler": sampler_handle
,
703 "access": get_llvm_texture_access(geom
, ctype
, mipmap
),
704 "get_sampler_handle": get_sampler_handle
,
705 "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
706 mipmap
=("" if mipmap
== "" else "." + mipmap
),
712 "ptx_ret": get_ptx_vec_reg(vec
, dtype
),
713 "ptx_tex": get_ptx_texture(target
),
714 "ptx_access": get_ptx_texture_access(geom
, ctype
),
715 "ptx_global_sampler": get_ptx_global_sampler(target
, global_sampler
),
716 "reg_id": get_table_gen_id(),
718 gen_test(template
, params
)
719 generated_items
.append((params
["intrinsic"], params
["instruction"]))
721 fun_name
= test_name
+ "_param"
722 fun_ty
= "void (i64, {sampler} {retty}*, {access_ty})*".format(
723 sampler
=("" if is_unified(target
) else "i64,"),
724 retty
=params
["retty"],
725 access_ty
=get_llvm_texture_access_type(geom
, ctype
, mipmap
),
727 generated_metadata
+= get_texture_metadata(
728 target
, fun_ty
, fun_name
, has_texture_params
=True
731 fun_name
= test_name
+ "_global"
732 fun_ty
= "void ({retty}*, {access_ty})*".format(
733 retty
=params
["retty"],
734 access_ty
=get_llvm_texture_access_type(geom
, ctype
, mipmap
),
736 generated_metadata
+= get_texture_metadata(
737 target
, fun_ty
, fun_name
, has_texture_params
=False
740 return generated_items
, generated_metadata
743 def get_llvm_tld4_access(geom
):
745 For 2D textures, operand c specifies coordinates as a two-element,
746 32-bit floating-point vector.
748 For 2d texture arrays operand c is a four element, 32-bit
749 vector. The first element in operand c is interpreted as an unsigned
750 integer index (.u32) into the texture array, and the next two
751 elements are interpreted as 32-bit floating point coordinates of 2d
752 texture. The fourth element is ignored.
754 For cubemap textures, operand c specifies four-element vector which
755 comprises three floating-point coordinates (s, t, r) and a fourth
756 padding argument which is ignored.
758 [For cube arrays] The first element in operand c is interpreted as
759 an unsigned integer index (.u32) into the cubemap texture array, and
760 the remaining three elements are interpreted as floating-point
761 cubemap coordinates (s, t, r), used to lookup in the selected
765 "2d": "float %x, float %y",
766 "a2d": "i32 %l, float %x, float %y",
767 "cube": "float %s, float %t, float %r",
768 "acube": "i32 %l, float %s, float %t, float %r",
770 return geom_to_access
[geom
]
773 def get_llvm_tld4_access_type(geom
):
775 "2d": "float, float",
776 "a2d": "i32, float, float",
777 "cube": "float, float, float",
778 "acube": "i32, float, float, float",
780 return geom_to_access
[geom
]
783 def get_ptx_tld4_access(geom
):
785 "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
786 "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
787 "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
788 "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
790 return geom_to_access
[geom
]
793 def gen_tld4_tests(target
, global_tex
, global_sampler
):
795 PTX spec s9.7.9.4. Texture Instructions: tld4
796 Perform a texture fetch of the 4-texel bilerp footprint.
798 tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f};
799 tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler
801 .comp = { .r, .g, .b, .a };
802 .geom = { .2d, .a2d, .cube, .acube };
803 .dtype = { .u32, .s32, .f32 };
807 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
809 ; CHECK-LABEL: .entry ${test_name}_param
810 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
811 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
812 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
813 store ${retty} %val, ${retty}* %ret
816 ; CHECK-LABEL: .entry ${test_name}_global
817 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
818 ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
819 ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
820 define void @${test_name}_global(${retty}* %ret, ${access}) {
821 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
822 ${get_sampler_handle}
823 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
824 store ${retty} %val, ${retty}* %ret
830 generated_metadata
= []
831 for comp
, geom
, dtype
in product(
832 ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"]
835 # FIXME: missing intrinsics.
836 # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
837 if geom
in ("a2d", "cube", "acube"):
840 sampler_handle
, get_sampler_handle
= get_llvm_global_sampler(
841 target
, global_sampler
844 test_name
= "test_tld4_" + "".join((comp
, geom
, dtype
))
846 "test_name": test_name
,
847 "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
848 unified
=(".unified" if is_unified(target
) else ""),
850 geom
=get_llvm_geom(geom
),
853 "global_tex": global_tex
,
854 "retty": get_llvm_vec_type("v4", dtype
),
855 "sampler": sampler_handle
,
856 "access": get_llvm_tld4_access(geom
),
857 "get_sampler_handle": get_sampler_handle
,
858 "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
859 comp
=comp
, geom
=geom
, dtype
=dtype
861 "ptx_ret": get_ptx_vec_reg("v4", dtype
),
862 "ptx_tex": get_ptx_texture(target
),
863 "ptx_access": get_ptx_tld4_access(geom
),
864 "ptx_global_sampler": get_ptx_global_sampler(target
, global_sampler
),
865 "reg_id": get_table_gen_id(),
867 gen_test(template
, params
)
868 generated_items
.append((params
["intrinsic"], params
["instruction"]))
870 fun_name
= test_name
+ "_param"
871 fun_ty
= "void (i64, {sampler} {retty}*, {access_ty})*".format(
872 sampler
=("" if is_unified(target
) else "i64,"),
873 retty
=params
["retty"],
874 access_ty
=get_llvm_tld4_access_type(geom
),
876 generated_metadata
+= get_texture_metadata(
877 target
, fun_ty
, fun_name
, has_texture_params
=True
880 fun_name
= test_name
+ "_global"
881 fun_ty
= "void ({retty}*, {access_ty})*".format(
882 retty
=params
["retty"], access_ty
=get_llvm_tld4_access_type(geom
)
884 generated_metadata
+= get_texture_metadata(
885 target
, fun_ty
, fun_name
, has_texture_params
=False
888 return generated_items
, generated_metadata
891 def gen_test(template
, params
):
894 for param
, value
in params
.items():
895 print(";; {}: {}".format(param
, value
))
897 print(string
.Template(textwrap
.dedent(template
)).substitute(params
))
900 def gen_tests(target
, tests
):
906 global_surf
= "gsurf"
908 global_sampler
= "gsam"
909 metadata
+= gen_globals(target
, global_surf
, global_tex
, global_sampler
)
912 suld_items
, suld_md
= gen_suld_tests(target
, global_surf
)
916 sust_items
, sust_md
= gen_sust_tests(target
, global_surf
)
920 tex_items
, tex_md
= gen_tex_tests(target
, global_tex
, global_sampler
)
924 tld4_items
, tld4_md
= gen_tld4_tests(target
, global_tex
, global_sampler
)
928 gen_metadata(metadata
)
932 def write_gen_list(filename
, append
, items
):
933 with
open(filename
, ("a" if append
else "w")) as f
:
934 for intrinsic
, instruction
in items
:
935 f
.write("{} {}\n".format(intrinsic
, instruction
))
938 def read_gen_list(filename
):
941 with
open(filename
) as f
:
943 intrinsic
, instruction
= line
.split()
944 intrinsics
.add(intrinsic
)
945 instructions
.add(instruction
)
946 return (intrinsics
, instructions
)
949 def read_td_list(filename
, regex
):
951 with
open(filename
) as f
:
953 match
= re
.search(regex
, line
)
955 td_list
.add(match
.group(1))
957 # Arbitrary value - we should find quite a lot of instructions
958 if len(td_list
) < 30:
960 "found only {} instructions in {}".format(filename
, len(td_list
))
966 def verify_inst_tablegen(path_td
, gen_instr
):
968 Verify that all instructions defined in NVPTXIntrinsics.td are
972 td_instr
= read_td_list(path_td
, '"((suld|sust|tex|tld4)\\..*)"')
976 # FIXME: spec does not list any sust.p variants other than b32
978 "sust.p.1d.b16.trap",
979 "sust.p.1d.v2.b8.trap",
980 "sust.p.1d.v2.b16.trap",
981 "sust.p.1d.v4.b8.trap",
982 "sust.p.1d.v4.b16.trap",
983 "sust.p.a1d.b8.trap",
984 "sust.p.a1d.b16.trap",
985 "sust.p.a1d.v2.b8.trap",
986 "sust.p.a1d.v2.b16.trap",
987 "sust.p.a1d.v4.b8.trap",
988 "sust.p.a1d.v4.b16.trap",
990 "sust.p.2d.b16.trap",
991 "sust.p.2d.v2.b8.trap",
992 "sust.p.2d.v2.b16.trap",
993 "sust.p.2d.v4.b8.trap",
994 "sust.p.2d.v4.b16.trap",
995 "sust.p.a2d.b8.trap",
996 "sust.p.a2d.b16.trap",
997 "sust.p.a2d.v2.b8.trap",
998 "sust.p.a2d.v2.b16.trap",
999 "sust.p.a2d.v4.b8.trap",
1000 "sust.p.a2d.v4.b16.trap",
1001 "sust.p.3d.b8.trap",
1002 "sust.p.3d.b16.trap",
1003 "sust.p.3d.v2.b8.trap",
1004 "sust.p.3d.v2.b16.trap",
1005 "sust.p.3d.v4.b8.trap",
1006 "sust.p.3d.v4.b16.trap",
1007 # FIXME: sust.p is also not supported for arrays
1008 "sust.p.a1d.b32.trap",
1009 "sust.p.a1d.v2.b32.trap",
1010 "sust.p.a1d.v4.b32.trap",
1011 "sust.p.a2d.b32.trap",
1012 "sust.p.a2d.v2.b32.trap",
1013 "sust.p.a2d.v4.b32.trap",
1017 td_instr
= list(td_instr
)
1019 gen_instr
= list(gen_instr
)
1021 for i
, td
in enumerate(td_instr
):
1022 if i
== len(gen_instr
) or td
!= gen_instr
[i
]:
1024 "{} is present in tablegen, but not tested.\n".format(td
)
1028 def verify_llvm_tablegen(path_td
, gen_intr
):
1030 Verify that all intrinsics defined in IntrinsicsNVVM.td are
1034 td_intr
= read_td_list(path_td
, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"')
1038 # FIXME: spec does not list any sust.p variants other than b32
1039 "llvm.nvvm.sust.p.1d.i8.trap",
1040 "llvm.nvvm.sust.p.1d.i16.trap",
1041 "llvm.nvvm.sust.p.1d.v2i8.trap",
1042 "llvm.nvvm.sust.p.1d.v2i16.trap",
1043 "llvm.nvvm.sust.p.1d.v4i8.trap",
1044 "llvm.nvvm.sust.p.1d.v4i16.trap",
1045 "llvm.nvvm.sust.p.1d.array.i8.trap",
1046 "llvm.nvvm.sust.p.1d.array.i16.trap",
1047 "llvm.nvvm.sust.p.1d.array.v2i8.trap",
1048 "llvm.nvvm.sust.p.1d.array.v2i16.trap",
1049 "llvm.nvvm.sust.p.1d.array.v4i8.trap",
1050 "llvm.nvvm.sust.p.1d.array.v4i16.trap",
1051 "llvm.nvvm.sust.p.2d.i8.trap",
1052 "llvm.nvvm.sust.p.2d.i16.trap",
1053 "llvm.nvvm.sust.p.2d.v2i8.trap",
1054 "llvm.nvvm.sust.p.2d.v2i16.trap",
1055 "llvm.nvvm.sust.p.2d.v4i8.trap",
1056 "llvm.nvvm.sust.p.2d.v4i16.trap",
1057 "llvm.nvvm.sust.p.2d.array.i8.trap",
1058 "llvm.nvvm.sust.p.2d.array.i16.trap",
1059 "llvm.nvvm.sust.p.2d.array.v2i8.trap",
1060 "llvm.nvvm.sust.p.2d.array.v2i16.trap",
1061 "llvm.nvvm.sust.p.2d.array.v4i8.trap",
1062 "llvm.nvvm.sust.p.2d.array.v4i16.trap",
1063 "llvm.nvvm.sust.p.3d.i8.trap",
1064 "llvm.nvvm.sust.p.3d.i16.trap",
1065 "llvm.nvvm.sust.p.3d.v2i8.trap",
1066 "llvm.nvvm.sust.p.3d.v2i16.trap",
1067 "llvm.nvvm.sust.p.3d.v4i8.trap",
1068 "llvm.nvvm.sust.p.3d.v4i16.trap",
1069 # FIXME: sust.p is also not supported for arrays
1070 "llvm.nvvm.sust.p.1d.array.i32.trap",
1071 "llvm.nvvm.sust.p.1d.array.v2i32.trap",
1072 "llvm.nvvm.sust.p.1d.array.v4i32.trap",
1073 "llvm.nvvm.sust.p.2d.array.i32.trap",
1074 "llvm.nvvm.sust.p.2d.array.v2i32.trap",
1075 "llvm.nvvm.sust.p.2d.array.v4i32.trap",
1079 td_intr
= list(td_intr
)
1081 gen_intr
= list(gen_intr
)
1083 for i
, td
in enumerate(td_intr
):
1084 if i
== len(gen_intr
) or td
!= gen_intr
[i
]:
1086 "{} is present in tablegen, but not tested.\n".format(td
)
1090 parser
= argparse
.ArgumentParser()
1091 parser
.add_argument("--debug", action
="store_true")
1092 parser
.add_argument("--tests", type=str)
1093 parser
.add_argument("--target", type=str)
1094 parser
.add_argument("--gen-list", dest
="gen_list", type=str)
1095 parser
.add_argument("--gen-list-append", dest
="gen_list_append", action
="store_true")
1096 parser
.add_argument("--verify", action
="store_true")
1097 parser
.add_argument("--llvm-tablegen", dest
="llvm_td", type=str)
1098 parser
.add_argument("--inst-tablegen", dest
="inst_td", type=str)
1100 args
= parser
.parse_args()
1104 intrinsics
, instructions
= read_gen_list(args
.gen_list
)
1105 verify_inst_tablegen(args
.inst_td
, instructions
)
1106 verify_llvm_tablegen(args
.llvm_td
, intrinsics
)
1108 items
= gen_tests(args
.target
, args
.tests
.split(","))
1110 write_gen_list(args
.gen_list
, args
.gen_list_append
, items
)