1 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2 # RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
3 # RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
5 # We only need to run this second time for texture tests, because
6 # there is a difference between unified and non-unified intrinsics.
8 # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9 # RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
10 # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
12 # Verify that all instructions and intrinsics defined in TableGen
13 # files are tested. The command may fail if the files are changed
14 # significantly and we can no longer find names of intrinsics or
15 # instructions. In that case we can replace this command with a
18 # Verification is turned off by default to avoid issues when the LLVM
19 # source directory is not available.
21 # RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
23 from __future__
import print_function
29 from itertools
import product
32 def get_llvm_geom(geom_ptx
):
40 "acube": "cube.array",
47 "b8": "%rs{{[0-9]+}}",
48 "b16": "%rs{{[0-9]+}}",
49 "b32": "%r{{[0-9]+}}",
50 "b64": "%rd{{[0-9]+}}",
51 "f32": "%f{{[0-9]+}}",
52 "u32": "%r{{[0-9]+}}",
53 "s32": "%r{{[0-9]+}}",
58 def get_ptx_vec_reg(vec
, ty
):
61 "v2": "{{{reg}, {reg}}}",
62 "v4": "{{{reg}, {reg}, {reg}, {reg}}}",
64 return vec_reg
[vec
].format(reg
=get_ptx_reg(ty
))
67 def get_llvm_type(ty
):
68 if ty
[0] in ("b", "s", "u"):
74 raise RuntimeError("invalid type: " + ty
)
77 def get_llvm_vec_type(vec
, ty_ptx
):
78 ty
= get_llvm_type(ty_ptx
)
80 # i8 is passed as i16, same as in PTX
86 "v2": "{{ {ty}, {ty} }}",
87 "v4": "{{ {ty}, {ty}, {ty}, {ty} }}",
89 return vec_ty
[vec
].format(ty
=ty
)
92 def get_llvm_value(vec
, ty_ptx
):
93 ty
= get_llvm_type(ty_ptx
)
95 # i8 is passed as i16, same as in PTX
101 "v2": "{ty} %v1, {ty} %v2",
102 "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4",
104 return value
[vec
].format(ty
=ty
)
107 def get_llvm_value_type(vec
, ty_ptx
):
108 ty
= get_llvm_type(ty_ptx
)
110 # i8 is passed as i16, same as in PTX
114 value
= {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"}
115 return value
[vec
].format(ty
=ty
)
118 def gen_triple(target
):
120 print('target triple = "nvptx64-unknown-cuda"\n')
121 elif target
== "nvcl":
122 print('target triple = "nvptx64-unknown-nvcl"\n')
124 raise RuntimeError("invalid target: " + target
)
127 def gen_globals(target
, surf_name
, tex_name
, sampler_name
):
128 print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
129 print("; CHECK: .global .surfref {}".format(surf_name
))
130 print("; CHECK: .global .texref {}".format(tex_name
))
131 print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name
))
132 print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name
))
133 generated_metadata
= [
134 '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name
),
135 '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name
),
138 if not is_unified(target
):
139 print("; CHECK: .global .samplerref {}".format(sampler_name
))
140 print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name
))
141 generated_metadata
.append(
142 '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name
)
145 return generated_metadata
148 def gen_metadata(metadata
):
149 md_values
= ["!{}".format(i
) for i
in range(len(metadata
))]
150 print("!nvvm.annotations = !{{{values}}}".format(values
=(", ".join(md_values
))))
151 for i
, md
in enumerate(metadata
):
152 print("!{} = {}".format(i
, md
))
155 def get_llvm_surface_access(geom_ptx
):
158 "2d": "i32 %x, i32 %y",
159 "3d": "i32 %x, i32 %y, i32 %z",
160 "a1d": "i32 %l, i32 %x",
161 "a2d": "i32 %l, i32 %x, i32 %y",
163 return access
[geom_ptx
]
166 def get_llvm_surface_access_type(geom_ptx
):
170 "3d": "i32, i32, i32",
172 "a2d": "i32, i32, i32",
174 return access_ty
[geom_ptx
]
177 def get_ptx_surface_access(geom_ptx
):
179 Operand b is a scalar or singleton tuple for 1d surfaces; is a
180 two-element vector for 2d surfaces; and is a four-element vector
181 for 3d surfaces, where the fourth element is ignored. Coordinate
182 elements are of type .s32.
184 For 1d surface arrays, operand b has type .v2.b32. The first
185 element is interpreted as an unsigned integer index (.u32) into
186 the surface array, and the second element is interpreted as a 1d
187 surface coordinate of type .s32.
189 For 2d surface arrays, operand b has type .v4.b32. The first
190 element is interpreted as an unsigned integer index (.u32) into
191 the surface array, and the next two elements are interpreted as 2d
192 surface coordinates of type .s32. The fourth element is ignored.
195 "1d": "{%r{{[0-9]}}}",
196 "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
197 "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
198 "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
199 "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
201 return access_reg
[geom_ptx
]
204 def get_ptx_surface(target
):
205 # With 'cuda' environment surface is copied with ld.param, so the
206 # instruction uses a register. For 'nvcl' the instruction uses the
207 # parameter directly.
209 return "%rd{{[0-9]+}}"
210 elif target
== "nvcl":
211 return "test_{{.*}}_param_0"
212 raise RuntimeError("invalid target: " + target
)
215 def get_surface_metadata(target
, fun_ty
, fun_name
, has_surface_param
):
218 md_kernel
= '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
219 fun_ty
=fun_ty
, fun_name
=fun_name
221 metadata
.append(md_kernel
)
224 # When a parameter is lowered as a .surfref, it still has the
225 # corresponding ld.param.u64, which is illegal. Do not emit the
226 # metadata to keep the parameter as .b64 instead.
227 has_surface_param
= False
229 if has_surface_param
:
230 md_surface
= '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format(
231 fun_ty
=fun_ty
, fun_name
=fun_name
233 metadata
.append(md_surface
)
238 def gen_suld_tests(target
, global_surf
):
240 PTX spec s9.7.10.1. Surface Instructions:
242 suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted
244 .geom = { .1d, .2d, .3d, .a1d, .a2d };
245 .cop = { .ca, .cg, .cs, .cv }; // cache operation
246 .vec = { none, .v2, .v4 };
247 .dtype = { .b8 , .b16, .b32, .b64 };
248 .clamp = { .trap, .clamp, .zero };
252 declare ${retty} @${intrinsic}(i64 %s, ${access});
254 ; CHECK-LABEL: .entry ${test_name}_param
255 ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
257 define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
258 %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
259 store ${retty} %val, ${retty}* %ret
262 ; CHECK-LABEL: .entry ${test_name}_global
263 ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
265 define void @${test_name}_global(${retty}* %ret, ${access}) {
266 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
267 %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
268 store ${retty} %val, ${retty}* %ret
274 generated_metadata
= []
275 # FIXME: "cop" is missing
276 for geom
, vec
, dtype
, clamp
in product(
277 ["1d", "2d", "3d", "a1d", "a2d"],
279 ["b8", "b16", "b32", "b64"],
280 ["trap", "clamp", "zero"],
283 if vec
== "v4" and dtype
== "b64":
286 test_name
= "test_suld_" + geom
+ vec
+ dtype
+ clamp
289 "test_name": test_name
,
290 "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
291 geom
=get_llvm_geom(geom
),
292 dtype
=(vec
+ get_llvm_type(dtype
)),
295 "retty": get_llvm_vec_type(vec
, dtype
),
296 "access": get_llvm_surface_access(geom
),
297 "global_surf": global_surf
,
298 "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
300 vec
=("" if vec
== "" else "." + vec
),
304 "reg_ret": get_ptx_vec_reg(vec
, dtype
),
305 "reg_surf": get_ptx_surface(target
),
306 "reg_access": get_ptx_surface_access(geom
),
308 gen_test(template
, params
)
309 generated_items
.append((params
["intrinsic"], params
["instruction"]))
311 fun_name
= test_name
+ "_param"
312 fun_ty
= "void (i64, {retty}*, {access_ty})*".format(
313 retty
=params
["retty"], access_ty
=get_llvm_surface_access_type(geom
)
315 generated_metadata
+= get_surface_metadata(
316 target
, fun_ty
, fun_name
, has_surface_param
=True
319 fun_name
= test_name
+ "_global"
320 fun_ty
= "void ({retty}*, {access_ty})*".format(
321 retty
=params
["retty"], access_ty
=get_llvm_surface_access_type(geom
)
323 generated_metadata
+= get_surface_metadata(
324 target
, fun_ty
, fun_name
, has_surface_param
=False
327 return generated_items
, generated_metadata
330 def gen_sust_tests(target
, global_surf
):
332 PTX spec s9.7.10.2. Surface Instructions
334 sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
335 sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted
337 sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
339 .cop = { .wb, .cg, .cs, .wt }; // cache operation
340 .vec = { none, .v2, .v4 };
341 .ctype = { .b8 , .b16, .b32, .b64 };
342 .clamp = { .trap, .clamp, .zero };
346 declare void @${intrinsic}(i64 %s, ${access}, ${value});
348 ; CHECK-LABEL: .entry ${test_name}_param
349 ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
351 define void @${test_name}_param(i64 %s, ${value}, ${access}) {
352 tail call void @${intrinsic}(i64 %s, ${access}, ${value})
355 ; CHECK-LABEL: .entry ${test_name}_global
356 ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
358 define void @${test_name}_global(${value}, ${access}) {
359 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
360 tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
366 generated_metadata
= []
367 # FIXME: "cop" is missing
368 for fmt
, geom
, vec
, ctype
, clamp
in product(
370 ["1d", "2d", "3d", "a1d", "a2d"],
372 ["b8", "b16", "b32", "b64"],
373 ["trap", "clamp", "zero"],
376 if fmt
== "p" and geom
[0] == "a":
378 if fmt
== "p" and ctype
!= "b32":
380 if vec
== "v4" and ctype
== "b64":
383 # FIXME: these intrinsics are missing, but at least one of them is
384 # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
385 if fmt
== "p" and clamp
!= "trap":
388 test_name
= "test_sust_" + fmt
+ geom
+ vec
+ ctype
+ clamp
391 "test_name": test_name
,
392 "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
394 geom
=get_llvm_geom(geom
),
395 ctype
=(vec
+ get_llvm_type(ctype
)),
398 "access": get_llvm_surface_access(geom
),
399 "value": get_llvm_value(vec
, ctype
),
400 "global_surf": global_surf
,
401 "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
404 vec
=("" if vec
== "" else "." + vec
),
408 "reg_value": get_ptx_vec_reg(vec
, ctype
),
409 "reg_surf": get_ptx_surface(target
),
410 "reg_access": get_ptx_surface_access(geom
),
412 gen_test(template
, params
)
413 generated_items
.append((params
["intrinsic"], params
["instruction"]))
415 fun_name
= test_name
+ "_param"
416 fun_ty
= "void (i64, {value_ty}, {access_ty})*".format(
417 value_ty
=get_llvm_value_type(vec
, ctype
),
418 access_ty
=get_llvm_surface_access_type(geom
),
420 generated_metadata
+= get_surface_metadata(
421 target
, fun_ty
, fun_name
, has_surface_param
=True
424 fun_name
= test_name
+ "_global"
425 fun_ty
= "void ({value_ty}, {access_ty})*".format(
426 value_ty
=get_llvm_value_type(vec
, ctype
),
427 access_ty
=get_llvm_surface_access_type(geom
),
429 generated_metadata
+= get_surface_metadata(
430 target
, fun_ty
, fun_name
, has_surface_param
=False
433 return generated_items
, generated_metadata
436 def is_unified(target
):
438 PTX has two modes of operation. In the unified mode, texture and
439 sampler information is accessed through a single .texref handle. In
440 the independent mode, texture and sampler information each have their
441 own handle, allowing them to be defined separately and combined at the
442 site of usage in the program.
445 return target
== "cuda"
448 def get_llvm_texture_access(geom_ptx
, ctype
, mipmap
):
451 "2d": "{ctype} %x, {ctype} %y",
452 "3d": "{ctype} %x, {ctype} %y, {ctype} %z",
453 "cube": "{ctype} %s, {ctype} %t, {ctype} %r",
454 "a1d": "i32 %l, {ctype} %x",
455 "a2d": "i32 %l, {ctype} %x, {ctype} %y",
456 "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
459 access
= geom_access
[geom_ptx
]
461 if mipmap
== "level":
462 access
+= ", {ctype} %lvl"
463 elif mipmap
== "grad":
464 if geom_ptx
in ("1d", "a1d"):
465 access
+= ", {ctype} %dpdx1, {ctype} %dpdy1"
466 elif geom_ptx
in ("2d", "a2d"):
468 ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2"
472 ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3"
473 + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3"
476 return access
.format(ctype
=get_llvm_type(ctype
))
479 def get_llvm_texture_access_type(geom_ptx
, ctype
, mipmap
):
482 "2d": "{ctype}, {ctype}",
483 "3d": "{ctype}, {ctype}, {ctype}",
484 "cube": "{ctype}, {ctype}, {ctype}",
485 "a1d": "i32, {ctype}",
486 "a2d": "i32, {ctype}, {ctype}",
487 "acube": "i32, {ctype}, {ctype}, {ctype}",
490 access
= geom_access
[geom_ptx
]
492 if mipmap
== "level":
493 access
+= ", {ctype}"
494 elif mipmap
== "grad":
495 if geom_ptx
in ("1d", "a1d"):
496 access
+= ", {ctype}, {ctype}"
497 elif geom_ptx
in ("2d", "a2d"):
498 access
+= ", {ctype}, {ctype}, {ctype}, {ctype}"
500 access
+= ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}"
502 return access
.format(ctype
=get_llvm_type(ctype
))
505 def get_ptx_texture_access(geom_ptx
, ctype
):
507 "1d": "{{{ctype_reg}}}",
508 "2d": "{{{ctype_reg}, {ctype_reg}}}",
509 "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
510 "a1d": "{{{b32_reg}, {ctype_reg}}}",
511 "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
512 "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
513 "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
515 return access_reg
[geom_ptx
].format(
516 ctype_reg
=get_ptx_reg(ctype
),
517 b32_reg
=get_ptx_reg("b32"),
518 f32_reg
=get_ptx_reg("f32"),
522 def get_ptx_texture(target
):
523 # With 'cuda' environment texture/sampler are copied with ld.param,
524 # so the instruction uses registers. For 'nvcl' the instruction uses
525 # texture/sampler parameters directly.
527 return "%rd{{[0-9]+}}"
528 elif target
== "nvcl":
529 return "test_{{.*}}_param_0, test_{{.*}}_param_1"
530 raise RuntimeError("unknown target: " + target
)
533 def get_llvm_global_sampler(target
, global_sampler
):
534 if is_unified(target
):
537 sampler_handle
= "i64 %gs,"
538 get_sampler_handle
= (
539 "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64"
540 + "(i64 addrspace(1)* @{})".format(global_sampler
)
542 return sampler_handle
, get_sampler_handle
545 def get_ptx_global_sampler(target
, global_sampler
):
546 if is_unified(target
):
549 return global_sampler
+ ","
552 def get_texture_metadata(target
, fun_ty
, fun_name
, has_texture_params
):
555 md_kernel
= '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
556 fun_ty
=fun_ty
, fun_name
=fun_name
558 metadata
.append(md_kernel
)
561 # When a parameter is lowered as a .texref, it still has the
562 # corresponding ld.param.u64, which is illegal. Do not emit the
563 # metadata to keep the parameter as .b64 instead.
564 has_texture_params
= False
566 if has_texture_params
:
567 md_texture
= '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format(
568 fun_ty
=fun_ty
, fun_name
=fun_name
570 metadata
.append(md_texture
)
572 if not is_unified(target
):
573 md_sampler
= '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format(
574 fun_ty
=fun_ty
, fun_name
=fun_name
576 metadata
.append(md_sampler
)
581 def gen_tex_tests(target
, global_tex
, global_sampler
):
583 PTX spec s9.7.9.3. Texture Instructions
585 tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f};
586 tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
588 tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f};
589 tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
592 tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f};
593 tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f};
594 tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
596 tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f};
597 tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f};
598 tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
600 .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
601 .dtype = { .u32, .s32, .f16, .f32 };
602 .ctype = { .s32, .f32 }; // .cube, .acube require .f32
603 // .2dms, .a2dms require .s32
607 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
609 ; CHECK-LABEL: .entry ${test_name}_param
610 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
611 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
612 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
613 store ${retty} %val, ${retty}* %ret
616 ; CHECK-LABEL: .entry ${test_name}_global
617 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
618 define void @${test_name}_global(${retty}* %ret, ${access}) {
619 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
620 ${get_sampler_handle}
621 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
622 store ${retty} %val, ${retty}* %ret
628 generated_metadata
= []
629 for mipmap
, geom
, vec
, dtype
, ctype
in product(
630 ["", "level", "grad"],
631 ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
633 ["u32", "s32", "f16", "f32"],
637 # FIXME: missing intrinsics.
638 # Multi-sample textures and multi-sample texture arrays
639 # introduced in PTX ISA version 3.2.
640 if geom
in ("2dms", "a2dms"):
643 # FIXME: missing intrinsics? no such restriction in the PTX spec
644 if ctype
== "s32" and mipmap
!= "":
647 # FIXME: missing intrinsics?
648 if ctype
== "s32" and geom
in ("cube", "acube"):
651 # FIXME: missing intrinsics.
652 # Support for textures returning f16 and f16x2 data introduced in
653 # PTX ISA version 4.2.
654 if vec
== "v2" or dtype
== "f16":
657 # FIXME: missing intrinsics.
658 # Support for tex.grad.{cube, acube} introduced in PTX ISA version
660 if mipmap
== "grad" and geom
in ("cube", "acube"):
663 # The instruction returns a two-element vector for destination
664 # type f16x2. For all other destination types, the instruction
665 # returns a four-element vector. Coordinates may be given in
666 # either signed 32-bit integer or 32-bit floating point form.
667 if vec
== "v2" and dtype
!= "f16":
670 sampler_handle
, get_sampler_handle
= get_llvm_global_sampler(
671 target
, global_sampler
674 test_name
= "test_tex_" + "".join((mipmap
, geom
, vec
, dtype
, ctype
))
676 "test_name": test_name
,
677 "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
678 unified
=(".unified" if is_unified(target
) else ""),
679 geom
=get_llvm_geom(geom
),
680 mipmap
=("" if mipmap
== "" else "." + mipmap
),
685 "global_tex": global_tex
,
686 "retty": get_llvm_vec_type(vec
, dtype
),
687 "sampler": sampler_handle
,
688 "access": get_llvm_texture_access(geom
, ctype
, mipmap
),
689 "get_sampler_handle": get_sampler_handle
,
690 "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
691 mipmap
=("" if mipmap
== "" else "." + mipmap
),
697 "ptx_ret": get_ptx_vec_reg(vec
, dtype
),
698 "ptx_tex": get_ptx_texture(target
),
699 "ptx_access": get_ptx_texture_access(geom
, ctype
),
700 "ptx_global_sampler": get_ptx_global_sampler(target
, global_sampler
),
702 gen_test(template
, params
)
703 generated_items
.append((params
["intrinsic"], params
["instruction"]))
705 fun_name
= test_name
+ "_param"
706 fun_ty
= "void (i64, {sampler} {retty}*, {access_ty})*".format(
707 sampler
=("" if is_unified(target
) else "i64,"),
708 retty
=params
["retty"],
709 access_ty
=get_llvm_texture_access_type(geom
, ctype
, mipmap
),
711 generated_metadata
+= get_texture_metadata(
712 target
, fun_ty
, fun_name
, has_texture_params
=True
715 fun_name
= test_name
+ "_global"
716 fun_ty
= "void ({retty}*, {access_ty})*".format(
717 retty
=params
["retty"],
718 access_ty
=get_llvm_texture_access_type(geom
, ctype
, mipmap
),
720 generated_metadata
+= get_texture_metadata(
721 target
, fun_ty
, fun_name
, has_texture_params
=False
724 return generated_items
, generated_metadata
727 def get_llvm_tld4_access(geom
):
729 For 2D textures, operand c specifies coordinates as a two-element,
730 32-bit floating-point vector.
732 For 2d texture arrays operand c is a four element, 32-bit
733 vector. The first element in operand c is interpreted as an unsigned
734 integer index (.u32) into the texture array, and the next two
735 elements are interpreted as 32-bit floating point coordinates of 2d
736 texture. The fourth element is ignored.
738 For cubemap textures, operand c specifies four-element vector which
739 comprises three floating-point coordinates (s, t, r) and a fourth
740 padding argument which is ignored.
742 [For cube arrays] The first element in operand c is interpreted as
743 an unsigned integer index (.u32) into the cubemap texture array, and
744 the remaining three elements are interpreted as floating-point
745 cubemap coordinates (s, t, r), used to lookup in the selected
749 "2d": "float %x, float %y",
750 "a2d": "i32 %l, float %x, float %y",
751 "cube": "float %s, float %t, float %r",
752 "acube": "i32 %l, float %s, float %t, float %r",
754 return geom_to_access
[geom
]
757 def get_llvm_tld4_access_type(geom
):
759 "2d": "float, float",
760 "a2d": "i32, float, float",
761 "cube": "float, float, float",
762 "acube": "i32, float, float, float",
764 return geom_to_access
[geom
]
767 def get_ptx_tld4_access(geom
):
769 "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
770 "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
771 "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
772 "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
774 return geom_to_access
[geom
]
777 def gen_tld4_tests(target
, global_tex
, global_sampler
):
779 PTX spec s9.7.9.4. Texture Instructions: tld4
780 Perform a texture fetch of the 4-texel bilerp footprint.
782 tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f};
783 tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler
785 .comp = { .r, .g, .b, .a };
786 .geom = { .2d, .a2d, .cube, .acube };
787 .dtype = { .u32, .s32, .f32 };
791 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
793 ; CHECK-LABEL: .entry ${test_name}_param
794 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
795 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
796 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
797 store ${retty} %val, ${retty}* %ret
800 ; CHECK-LABEL: .entry ${test_name}_global
801 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
802 define void @${test_name}_global(${retty}* %ret, ${access}) {
803 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
804 ${get_sampler_handle}
805 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
806 store ${retty} %val, ${retty}* %ret
812 generated_metadata
= []
813 for comp
, geom
, dtype
in product(
814 ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"]
817 # FIXME: missing intrinsics.
818 # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
819 if geom
in ("a2d", "cube", "acube"):
822 sampler_handle
, get_sampler_handle
= get_llvm_global_sampler(
823 target
, global_sampler
826 test_name
= "test_tld4_" + "".join((comp
, geom
, dtype
))
828 "test_name": test_name
,
829 "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
830 unified
=(".unified" if is_unified(target
) else ""),
832 geom
=get_llvm_geom(geom
),
835 "global_tex": global_tex
,
836 "retty": get_llvm_vec_type("v4", dtype
),
837 "sampler": sampler_handle
,
838 "access": get_llvm_tld4_access(geom
),
839 "get_sampler_handle": get_sampler_handle
,
840 "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
841 comp
=comp
, geom
=geom
, dtype
=dtype
843 "ptx_ret": get_ptx_vec_reg("v4", dtype
),
844 "ptx_tex": get_ptx_texture(target
),
845 "ptx_access": get_ptx_tld4_access(geom
),
846 "ptx_global_sampler": get_ptx_global_sampler(target
, global_sampler
),
848 gen_test(template
, params
)
849 generated_items
.append((params
["intrinsic"], params
["instruction"]))
851 fun_name
= test_name
+ "_param"
852 fun_ty
= "void (i64, {sampler} {retty}*, {access_ty})*".format(
853 sampler
=("" if is_unified(target
) else "i64,"),
854 retty
=params
["retty"],
855 access_ty
=get_llvm_tld4_access_type(geom
),
857 generated_metadata
+= get_texture_metadata(
858 target
, fun_ty
, fun_name
, has_texture_params
=True
861 fun_name
= test_name
+ "_global"
862 fun_ty
= "void ({retty}*, {access_ty})*".format(
863 retty
=params
["retty"], access_ty
=get_llvm_tld4_access_type(geom
)
865 generated_metadata
+= get_texture_metadata(
866 target
, fun_ty
, fun_name
, has_texture_params
=False
869 return generated_items
, generated_metadata
872 def gen_test(template
, params
):
875 for param
, value
in params
.items():
876 print(";; {}: {}".format(param
, value
))
878 print(string
.Template(textwrap
.dedent(template
)).substitute(params
))
881 def gen_tests(target
, tests
):
887 global_surf
= "gsurf"
889 global_sampler
= "gsam"
890 metadata
+= gen_globals(target
, global_surf
, global_tex
, global_sampler
)
893 suld_items
, suld_md
= gen_suld_tests(target
, global_surf
)
897 sust_items
, sust_md
= gen_sust_tests(target
, global_surf
)
901 tex_items
, tex_md
= gen_tex_tests(target
, global_tex
, global_sampler
)
905 tld4_items
, tld4_md
= gen_tld4_tests(target
, global_tex
, global_sampler
)
909 gen_metadata(metadata
)
913 def write_gen_list(filename
, append
, items
):
914 with
open(filename
, ("a" if append
else "w")) as f
:
915 for intrinsic
, instruction
in items
:
916 f
.write("{} {}\n".format(intrinsic
, instruction
))
919 def read_gen_list(filename
):
922 with
open(filename
) as f
:
924 intrinsic
, instruction
= line
.split()
925 intrinsics
.add(intrinsic
)
926 instructions
.add(instruction
)
927 return (intrinsics
, instructions
)
930 def read_td_list(filename
, regex
):
932 with
open(filename
) as f
:
934 match
= re
.search(regex
, line
)
936 td_list
.add(match
.group(1))
938 # Arbitrary value - we should find quite a lot of instructions
939 if len(td_list
) < 30:
941 "found only {} instructions in {}".format(filename
, len(td_list
))
947 def verify_inst_tablegen(path_td
, gen_instr
):
949 Verify that all instructions defined in NVPTXIntrinsics.td are
953 td_instr
= read_td_list(path_td
, '"((suld|sust|tex|tld4)\\..*)"')
957 # FIXME: spec does not list any sust.p variants other than b32
959 "sust.p.1d.b16.trap",
960 "sust.p.1d.v2.b8.trap",
961 "sust.p.1d.v2.b16.trap",
962 "sust.p.1d.v4.b8.trap",
963 "sust.p.1d.v4.b16.trap",
964 "sust.p.a1d.b8.trap",
965 "sust.p.a1d.b16.trap",
966 "sust.p.a1d.v2.b8.trap",
967 "sust.p.a1d.v2.b16.trap",
968 "sust.p.a1d.v4.b8.trap",
969 "sust.p.a1d.v4.b16.trap",
971 "sust.p.2d.b16.trap",
972 "sust.p.2d.v2.b8.trap",
973 "sust.p.2d.v2.b16.trap",
974 "sust.p.2d.v4.b8.trap",
975 "sust.p.2d.v4.b16.trap",
976 "sust.p.a2d.b8.trap",
977 "sust.p.a2d.b16.trap",
978 "sust.p.a2d.v2.b8.trap",
979 "sust.p.a2d.v2.b16.trap",
980 "sust.p.a2d.v4.b8.trap",
981 "sust.p.a2d.v4.b16.trap",
983 "sust.p.3d.b16.trap",
984 "sust.p.3d.v2.b8.trap",
985 "sust.p.3d.v2.b16.trap",
986 "sust.p.3d.v4.b8.trap",
987 "sust.p.3d.v4.b16.trap",
988 # FIXME: sust.p is also not supported for arrays
989 "sust.p.a1d.b32.trap",
990 "sust.p.a1d.v2.b32.trap",
991 "sust.p.a1d.v4.b32.trap",
992 "sust.p.a2d.b32.trap",
993 "sust.p.a2d.v2.b32.trap",
994 "sust.p.a2d.v4.b32.trap",
998 td_instr
= list(td_instr
)
1000 gen_instr
= list(gen_instr
)
1002 for i
, td
in enumerate(td_instr
):
1003 if i
== len(gen_instr
) or td
!= gen_instr
[i
]:
1005 "{} is present in tablegen, but not tested.\n".format(td
)
1009 def verify_llvm_tablegen(path_td
, gen_intr
):
1011 Verify that all intrinsics defined in IntrinsicsNVVM.td are
1015 td_intr
= read_td_list(path_td
, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"')
1019 # FIXME: spec does not list any sust.p variants other than b32
1020 "llvm.nvvm.sust.p.1d.i8.trap",
1021 "llvm.nvvm.sust.p.1d.i16.trap",
1022 "llvm.nvvm.sust.p.1d.v2i8.trap",
1023 "llvm.nvvm.sust.p.1d.v2i16.trap",
1024 "llvm.nvvm.sust.p.1d.v4i8.trap",
1025 "llvm.nvvm.sust.p.1d.v4i16.trap",
1026 "llvm.nvvm.sust.p.1d.array.i8.trap",
1027 "llvm.nvvm.sust.p.1d.array.i16.trap",
1028 "llvm.nvvm.sust.p.1d.array.v2i8.trap",
1029 "llvm.nvvm.sust.p.1d.array.v2i16.trap",
1030 "llvm.nvvm.sust.p.1d.array.v4i8.trap",
1031 "llvm.nvvm.sust.p.1d.array.v4i16.trap",
1032 "llvm.nvvm.sust.p.2d.i8.trap",
1033 "llvm.nvvm.sust.p.2d.i16.trap",
1034 "llvm.nvvm.sust.p.2d.v2i8.trap",
1035 "llvm.nvvm.sust.p.2d.v2i16.trap",
1036 "llvm.nvvm.sust.p.2d.v4i8.trap",
1037 "llvm.nvvm.sust.p.2d.v4i16.trap",
1038 "llvm.nvvm.sust.p.2d.array.i8.trap",
1039 "llvm.nvvm.sust.p.2d.array.i16.trap",
1040 "llvm.nvvm.sust.p.2d.array.v2i8.trap",
1041 "llvm.nvvm.sust.p.2d.array.v2i16.trap",
1042 "llvm.nvvm.sust.p.2d.array.v4i8.trap",
1043 "llvm.nvvm.sust.p.2d.array.v4i16.trap",
1044 "llvm.nvvm.sust.p.3d.i8.trap",
1045 "llvm.nvvm.sust.p.3d.i16.trap",
1046 "llvm.nvvm.sust.p.3d.v2i8.trap",
1047 "llvm.nvvm.sust.p.3d.v2i16.trap",
1048 "llvm.nvvm.sust.p.3d.v4i8.trap",
1049 "llvm.nvvm.sust.p.3d.v4i16.trap",
1050 # FIXME: sust.p is also not supported for arrays
1051 "llvm.nvvm.sust.p.1d.array.i32.trap",
1052 "llvm.nvvm.sust.p.1d.array.v2i32.trap",
1053 "llvm.nvvm.sust.p.1d.array.v4i32.trap",
1054 "llvm.nvvm.sust.p.2d.array.i32.trap",
1055 "llvm.nvvm.sust.p.2d.array.v2i32.trap",
1056 "llvm.nvvm.sust.p.2d.array.v4i32.trap",
1060 td_intr
= list(td_intr
)
1062 gen_intr
= list(gen_intr
)
1064 for i
, td
in enumerate(td_intr
):
1065 if i
== len(gen_intr
) or td
!= gen_intr
[i
]:
1067 "{} is present in tablegen, but not tested.\n".format(td
)
1071 parser
= argparse
.ArgumentParser()
1072 parser
.add_argument("--debug", action
="store_true")
1073 parser
.add_argument("--tests", type=str)
1074 parser
.add_argument("--target", type=str)
1075 parser
.add_argument("--gen-list", dest
="gen_list", type=str)
1076 parser
.add_argument("--gen-list-append", dest
="gen_list_append", action
="store_true")
1077 parser
.add_argument("--verify", action
="store_true")
1078 parser
.add_argument("--llvm-tablegen", dest
="llvm_td", type=str)
1079 parser
.add_argument("--inst-tablegen", dest
="inst_td", type=str)
1081 args
= parser
.parse_args()
1085 intrinsics
, instructions
= read_gen_list(args
.gen_list
)
1086 verify_inst_tablegen(args
.inst_td
, instructions
)
1087 verify_llvm_tablegen(args
.llvm_td
, intrinsics
)
1089 items
= gen_tests(args
.target
, args
.tests
.split(","))
1091 write_gen_list(args
.gen_list
, args
.gen_list_append
, items
)