Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / NVPTX / surf-tex.py
blobd63cfc521117d5a96d51900d34fc7526960a40b1
1 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2 # RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
3 # RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
5 # We only need to run this second time for texture tests, because
6 # there is a difference between unified and non-unified intrinsics.
8 # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9 # RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
10 # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
12 # Verify that all instructions and intrinsics defined in TableGen
13 # files are tested. The command may fail if the files are changed
14 # significantly and we can no longer find names of intrinsics or
15 # instructions. In that case we can replace this command with a
16 # reference list.
18 # Verification is turned off by default to avoid issues when the LLVM
19 # source directory is not available.
21 # RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
23 from __future__ import print_function
25 import argparse
26 import re
27 import string
28 import textwrap
29 from itertools import product
32 def get_llvm_geom(geom_ptx):
33 geom = {
34 "1d": "1d",
35 "2d": "2d",
36 "3d": "3d",
37 "a1d": "1d.array",
38 "a2d": "2d.array",
39 "cube": "cube",
40 "acube": "cube.array",
42 return geom[geom_ptx]
45 def get_ptx_reg(ty):
46 reg = {
47 "b8": "%rs{{[0-9]+}}",
48 "b16": "%rs{{[0-9]+}}",
49 "b32": "%r{{[0-9]+}}",
50 "b64": "%rd{{[0-9]+}}",
51 "f32": "%f{{[0-9]+}}",
52 "u32": "%r{{[0-9]+}}",
53 "s32": "%r{{[0-9]+}}",
55 return reg[ty]
58 def get_ptx_vec_reg(vec, ty):
59 vec_reg = {
60 "": "{{{reg}}}",
61 "v2": "{{{reg}, {reg}}}",
62 "v4": "{{{reg}, {reg}, {reg}, {reg}}}",
64 return vec_reg[vec].format(reg=get_ptx_reg(ty))
67 def get_llvm_type(ty):
68 if ty[0] in ("b", "s", "u"):
69 return "i" + ty[1:]
70 if ty == "f16":
71 return "half"
72 if ty == "f32":
73 return "float"
74 raise RuntimeError("invalid type: " + ty)
77 def get_llvm_vec_type(vec, ty_ptx):
78 ty = get_llvm_type(ty_ptx)
80 # i8 is passed as i16, same as in PTX
81 if ty == "i8":
82 ty = "i16"
84 vec_ty = {
85 "": "{ty}",
86 "v2": "{{ {ty}, {ty} }}",
87 "v4": "{{ {ty}, {ty}, {ty}, {ty} }}",
89 return vec_ty[vec].format(ty=ty)
92 def get_llvm_value(vec, ty_ptx):
93 ty = get_llvm_type(ty_ptx)
95 # i8 is passed as i16, same as in PTX
96 if ty == "i8":
97 ty = "i16"
99 value = {
100 "": "{ty} %v1",
101 "v2": "{ty} %v1, {ty} %v2",
102 "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4",
104 return value[vec].format(ty=ty)
107 def get_llvm_value_type(vec, ty_ptx):
108 ty = get_llvm_type(ty_ptx)
110 # i8 is passed as i16, same as in PTX
111 if ty == "i8":
112 ty = "i16"
114 value = {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"}
115 return value[vec].format(ty=ty)
118 def gen_triple(target):
119 if target == "cuda":
120 print('target triple = "nvptx64-unknown-cuda"\n')
121 elif target == "nvcl":
122 print('target triple = "nvptx64-unknown-nvcl"\n')
123 else:
124 raise RuntimeError("invalid target: " + target)
127 def gen_globals(target, surf_name, tex_name, sampler_name):
128 print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
129 print("; CHECK: .global .surfref {}".format(surf_name))
130 print("; CHECK: .global .texref {}".format(tex_name))
131 print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name))
132 print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name))
133 generated_metadata = [
134 '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name),
135 '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name),
138 if not is_unified(target):
139 print("; CHECK: .global .samplerref {}".format(sampler_name))
140 print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name))
141 generated_metadata.append(
142 '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name)
145 return generated_metadata
148 def gen_metadata(metadata):
149 md_values = ["!{}".format(i) for i in range(len(metadata))]
150 print("!nvvm.annotations = !{{{values}}}".format(values=(", ".join(md_values))))
151 for i, md in enumerate(metadata):
152 print("!{} = {}".format(i, md))
155 def get_llvm_surface_access(geom_ptx):
156 access = {
157 "1d": "i32 %x",
158 "2d": "i32 %x, i32 %y",
159 "3d": "i32 %x, i32 %y, i32 %z",
160 "a1d": "i32 %l, i32 %x",
161 "a2d": "i32 %l, i32 %x, i32 %y",
163 return access[geom_ptx]
166 def get_llvm_surface_access_type(geom_ptx):
167 access_ty = {
168 "1d": "i32",
169 "2d": "i32, i32",
170 "3d": "i32, i32, i32",
171 "a1d": "i32, i32",
172 "a2d": "i32, i32, i32",
174 return access_ty[geom_ptx]
177 def get_ptx_surface_access(geom_ptx):
179 Operand b is a scalar or singleton tuple for 1d surfaces; is a
180 two-element vector for 2d surfaces; and is a four-element vector
181 for 3d surfaces, where the fourth element is ignored. Coordinate
182 elements are of type .s32.
184 For 1d surface arrays, operand b has type .v2.b32. The first
185 element is interpreted as an unsigned integer index (.u32) into
186 the surface array, and the second element is interpreted as a 1d
187 surface coordinate of type .s32.
189 For 2d surface arrays, operand b has type .v4.b32. The first
190 element is interpreted as an unsigned integer index (.u32) into
191 the surface array, and the next two elements are interpreted as 2d
192 surface coordinates of type .s32. The fourth element is ignored.
194 access_reg = {
195 "1d": "{%r{{[0-9]}}}",
196 "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
197 "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
198 "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
199 "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
201 return access_reg[geom_ptx]
204 def get_ptx_surface(target):
205 # With 'cuda' environment surface is copied with ld.param, so the
206 # instruction uses a register. For 'nvcl' the instruction uses the
207 # parameter directly.
208 if target == "cuda":
209 return "%rd{{[0-9]+}}"
210 elif target == "nvcl":
211 return "test_{{.*}}_param_0"
212 raise RuntimeError("invalid target: " + target)
215 def get_surface_metadata(target, fun_ty, fun_name, has_surface_param):
216 metadata = []
218 md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
219 fun_ty=fun_ty, fun_name=fun_name
221 metadata.append(md_kernel)
223 if target == "cuda":
224 # When a parameter is lowered as a .surfref, it still has the
225 # corresponding ld.param.u64, which is illegal. Do not emit the
226 # metadata to keep the parameter as .b64 instead.
227 has_surface_param = False
229 if has_surface_param:
230 md_surface = '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format(
231 fun_ty=fun_ty, fun_name=fun_name
233 metadata.append(md_surface)
235 return metadata
238 def gen_suld_tests(target, global_surf):
240 PTX spec s9.7.10.1. Surface Instructions:
242 suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted
244 .geom = { .1d, .2d, .3d, .a1d, .a2d };
245 .cop = { .ca, .cg, .cs, .cv }; // cache operation
246 .vec = { none, .v2, .v4 };
247 .dtype = { .b8 , .b16, .b32, .b64 };
248 .clamp = { .trap, .clamp, .zero };
251 template = """
252 declare ${retty} @${intrinsic}(i64 %s, ${access});
254 ; CHECK-LABEL: .entry ${test_name}_param
255 ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
257 define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
258 %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
259 store ${retty} %val, ${retty}* %ret
260 ret void
262 ; CHECK-LABEL: .entry ${test_name}_global
263 ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
265 define void @${test_name}_global(${retty}* %ret, ${access}) {
266 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
267 %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
268 store ${retty} %val, ${retty}* %ret
269 ret void
273 generated_items = []
274 generated_metadata = []
275 # FIXME: "cop" is missing
276 for geom, vec, dtype, clamp in product(
277 ["1d", "2d", "3d", "a1d", "a2d"],
278 ["", "v2", "v4"],
279 ["b8", "b16", "b32", "b64"],
280 ["trap", "clamp", "zero"],
283 if vec == "v4" and dtype == "b64":
284 continue
286 test_name = "test_suld_" + geom + vec + dtype + clamp
288 params = {
289 "test_name": test_name,
290 "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
291 geom=get_llvm_geom(geom),
292 dtype=(vec + get_llvm_type(dtype)),
293 clamp=clamp,
295 "retty": get_llvm_vec_type(vec, dtype),
296 "access": get_llvm_surface_access(geom),
297 "global_surf": global_surf,
298 "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
299 geom=geom,
300 vec=("" if vec == "" else "." + vec),
301 dtype=dtype,
302 clamp=clamp,
304 "reg_ret": get_ptx_vec_reg(vec, dtype),
305 "reg_surf": get_ptx_surface(target),
306 "reg_access": get_ptx_surface_access(geom),
308 gen_test(template, params)
309 generated_items.append((params["intrinsic"], params["instruction"]))
311 fun_name = test_name + "_param"
312 fun_ty = "void (i64, {retty}*, {access_ty})*".format(
313 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
315 generated_metadata += get_surface_metadata(
316 target, fun_ty, fun_name, has_surface_param=True
319 fun_name = test_name + "_global"
320 fun_ty = "void ({retty}*, {access_ty})*".format(
321 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
323 generated_metadata += get_surface_metadata(
324 target, fun_ty, fun_name, has_surface_param=False
327 return generated_items, generated_metadata
330 def gen_sust_tests(target, global_surf):
332 PTX spec s9.7.10.2. Surface Instructions
334 sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
335 sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted
337 sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
339 .cop = { .wb, .cg, .cs, .wt }; // cache operation
340 .vec = { none, .v2, .v4 };
341 .ctype = { .b8 , .b16, .b32, .b64 };
342 .clamp = { .trap, .clamp, .zero };
345 template = """
346 declare void @${intrinsic}(i64 %s, ${access}, ${value});
348 ; CHECK-LABEL: .entry ${test_name}_param
349 ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
351 define void @${test_name}_param(i64 %s, ${value}, ${access}) {
352 tail call void @${intrinsic}(i64 %s, ${access}, ${value})
353 ret void
355 ; CHECK-LABEL: .entry ${test_name}_global
356 ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
358 define void @${test_name}_global(${value}, ${access}) {
359 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
360 tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
361 ret void
365 generated_items = []
366 generated_metadata = []
367 # FIXME: "cop" is missing
368 for fmt, geom, vec, ctype, clamp in product(
369 ["b", "p"],
370 ["1d", "2d", "3d", "a1d", "a2d"],
371 ["", "v2", "v4"],
372 ["b8", "b16", "b32", "b64"],
373 ["trap", "clamp", "zero"],
376 if fmt == "p" and geom[0] == "a":
377 continue
378 if fmt == "p" and ctype != "b32":
379 continue
380 if vec == "v4" and ctype == "b64":
381 continue
383 # FIXME: these intrinsics are missing, but at least one of them is
384 # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
385 if fmt == "p" and clamp != "trap":
386 continue
388 test_name = "test_sust_" + fmt + geom + vec + ctype + clamp
390 params = {
391 "test_name": test_name,
392 "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
393 fmt=fmt,
394 geom=get_llvm_geom(geom),
395 ctype=(vec + get_llvm_type(ctype)),
396 clamp=clamp,
398 "access": get_llvm_surface_access(geom),
399 "value": get_llvm_value(vec, ctype),
400 "global_surf": global_surf,
401 "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
402 fmt=fmt,
403 geom=geom,
404 vec=("" if vec == "" else "." + vec),
405 ctype=ctype,
406 clamp=clamp,
408 "reg_value": get_ptx_vec_reg(vec, ctype),
409 "reg_surf": get_ptx_surface(target),
410 "reg_access": get_ptx_surface_access(geom),
412 gen_test(template, params)
413 generated_items.append((params["intrinsic"], params["instruction"]))
415 fun_name = test_name + "_param"
416 fun_ty = "void (i64, {value_ty}, {access_ty})*".format(
417 value_ty=get_llvm_value_type(vec, ctype),
418 access_ty=get_llvm_surface_access_type(geom),
420 generated_metadata += get_surface_metadata(
421 target, fun_ty, fun_name, has_surface_param=True
424 fun_name = test_name + "_global"
425 fun_ty = "void ({value_ty}, {access_ty})*".format(
426 value_ty=get_llvm_value_type(vec, ctype),
427 access_ty=get_llvm_surface_access_type(geom),
429 generated_metadata += get_surface_metadata(
430 target, fun_ty, fun_name, has_surface_param=False
433 return generated_items, generated_metadata
436 def is_unified(target):
438 PTX has two modes of operation. In the unified mode, texture and
439 sampler information is accessed through a single .texref handle. In
440 the independent mode, texture and sampler information each have their
441 own handle, allowing them to be defined separately and combined at the
442 site of usage in the program.
445 return target == "cuda"
448 def get_llvm_texture_access(geom_ptx, ctype, mipmap):
449 geom_access = {
450 "1d": "{ctype} %x",
451 "2d": "{ctype} %x, {ctype} %y",
452 "3d": "{ctype} %x, {ctype} %y, {ctype} %z",
453 "cube": "{ctype} %s, {ctype} %t, {ctype} %r",
454 "a1d": "i32 %l, {ctype} %x",
455 "a2d": "i32 %l, {ctype} %x, {ctype} %y",
456 "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
459 access = geom_access[geom_ptx]
461 if mipmap == "level":
462 access += ", {ctype} %lvl"
463 elif mipmap == "grad":
464 if geom_ptx in ("1d", "a1d"):
465 access += ", {ctype} %dpdx1, {ctype} %dpdy1"
466 elif geom_ptx in ("2d", "a2d"):
467 access += (
468 ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2"
470 else:
471 access += (
472 ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3"
473 + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3"
476 return access.format(ctype=get_llvm_type(ctype))
479 def get_llvm_texture_access_type(geom_ptx, ctype, mipmap):
480 geom_access = {
481 "1d": "{ctype}",
482 "2d": "{ctype}, {ctype}",
483 "3d": "{ctype}, {ctype}, {ctype}",
484 "cube": "{ctype}, {ctype}, {ctype}",
485 "a1d": "i32, {ctype}",
486 "a2d": "i32, {ctype}, {ctype}",
487 "acube": "i32, {ctype}, {ctype}, {ctype}",
490 access = geom_access[geom_ptx]
492 if mipmap == "level":
493 access += ", {ctype}"
494 elif mipmap == "grad":
495 if geom_ptx in ("1d", "a1d"):
496 access += ", {ctype}, {ctype}"
497 elif geom_ptx in ("2d", "a2d"):
498 access += ", {ctype}, {ctype}, {ctype}, {ctype}"
499 else:
500 access += ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}"
502 return access.format(ctype=get_llvm_type(ctype))
505 def get_ptx_texture_access(geom_ptx, ctype):
506 access_reg = {
507 "1d": "{{{ctype_reg}}}",
508 "2d": "{{{ctype_reg}, {ctype_reg}}}",
509 "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
510 "a1d": "{{{b32_reg}, {ctype_reg}}}",
511 "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
512 "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
513 "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
515 return access_reg[geom_ptx].format(
516 ctype_reg=get_ptx_reg(ctype),
517 b32_reg=get_ptx_reg("b32"),
518 f32_reg=get_ptx_reg("f32"),
522 def get_ptx_texture(target):
523 # With 'cuda' environment texture/sampler are copied with ld.param,
524 # so the instruction uses registers. For 'nvcl' the instruction uses
525 # texture/sampler parameters directly.
526 if target == "cuda":
527 return "%rd{{[0-9]+}}"
528 elif target == "nvcl":
529 return "test_{{.*}}_param_0, test_{{.*}}_param_1"
530 raise RuntimeError("unknown target: " + target)
533 def get_llvm_global_sampler(target, global_sampler):
534 if is_unified(target):
535 return "", ""
536 else:
537 sampler_handle = "i64 %gs,"
538 get_sampler_handle = (
539 "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64"
540 + "(i64 addrspace(1)* @{})".format(global_sampler)
542 return sampler_handle, get_sampler_handle
545 def get_ptx_global_sampler(target, global_sampler):
546 if is_unified(target):
547 return ""
548 else:
549 return global_sampler + ","
552 def get_texture_metadata(target, fun_ty, fun_name, has_texture_params):
553 metadata = []
555 md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
556 fun_ty=fun_ty, fun_name=fun_name
558 metadata.append(md_kernel)
560 if target == "cuda":
561 # When a parameter is lowered as a .texref, it still has the
562 # corresponding ld.param.u64, which is illegal. Do not emit the
563 # metadata to keep the parameter as .b64 instead.
564 has_texture_params = False
566 if has_texture_params:
567 md_texture = '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format(
568 fun_ty=fun_ty, fun_name=fun_name
570 metadata.append(md_texture)
572 if not is_unified(target):
573 md_sampler = '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format(
574 fun_ty=fun_ty, fun_name=fun_name
576 metadata.append(md_sampler)
578 return metadata
581 def gen_tex_tests(target, global_tex, global_sampler):
583 PTX spec s9.7.9.3. Texture Instructions
585 tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f};
586 tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
588 tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f};
589 tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
591 // mipmaps
592 tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f};
593 tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f};
594 tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
596 tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f};
597 tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f};
598 tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
600 .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
601 .dtype = { .u32, .s32, .f16, .f32 };
602 .ctype = { .s32, .f32 }; // .cube, .acube require .f32
603 // .2dms, .a2dms require .s32
606 template = """
607 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
609 ; CHECK-LABEL: .entry ${test_name}_param
610 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
611 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
612 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
613 store ${retty} %val, ${retty}* %ret
614 ret void
616 ; CHECK-LABEL: .entry ${test_name}_global
617 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
618 define void @${test_name}_global(${retty}* %ret, ${access}) {
619 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
620 ${get_sampler_handle}
621 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
622 store ${retty} %val, ${retty}* %ret
623 ret void
627 generated_items = []
628 generated_metadata = []
629 for mipmap, geom, vec, dtype, ctype in product(
630 ["", "level", "grad"],
631 ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
632 ["v2", "v4"],
633 ["u32", "s32", "f16", "f32"],
634 ["s32", "f32"],
637 # FIXME: missing intrinsics.
638 # Multi-sample textures and multi-sample texture arrays
639 # introduced in PTX ISA version 3.2.
640 if geom in ("2dms", "a2dms"):
641 continue
643 # FIXME: missing intrinsics? no such restriction in the PTX spec
644 if ctype == "s32" and mipmap != "":
645 continue
647 # FIXME: missing intrinsics?
648 if ctype == "s32" and geom in ("cube", "acube"):
649 continue
651 # FIXME: missing intrinsics.
652 # Support for textures returning f16 and f16x2 data introduced in
653 # PTX ISA version 4.2.
654 if vec == "v2" or dtype == "f16":
655 continue
657 # FIXME: missing intrinsics.
658 # Support for tex.grad.{cube, acube} introduced in PTX ISA version
659 # 4.3.
660 if mipmap == "grad" and geom in ("cube", "acube"):
661 continue
663 # The instruction returns a two-element vector for destination
664 # type f16x2. For all other destination types, the instruction
665 # returns a four-element vector. Coordinates may be given in
666 # either signed 32-bit integer or 32-bit floating point form.
667 if vec == "v2" and dtype != "f16":
668 continue
670 sampler_handle, get_sampler_handle = get_llvm_global_sampler(
671 target, global_sampler
674 test_name = "test_tex_" + "".join((mipmap, geom, vec, dtype, ctype))
675 params = {
676 "test_name": test_name,
677 "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
678 unified=(".unified" if is_unified(target) else ""),
679 geom=get_llvm_geom(geom),
680 mipmap=("" if mipmap == "" else "." + mipmap),
681 vec=vec,
682 dtype=dtype,
683 ctype=ctype,
685 "global_tex": global_tex,
686 "retty": get_llvm_vec_type(vec, dtype),
687 "sampler": sampler_handle,
688 "access": get_llvm_texture_access(geom, ctype, mipmap),
689 "get_sampler_handle": get_sampler_handle,
690 "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
691 mipmap=("" if mipmap == "" else "." + mipmap),
692 geom=geom,
693 vec=vec,
694 dtype=dtype,
695 ctype=ctype,
697 "ptx_ret": get_ptx_vec_reg(vec, dtype),
698 "ptx_tex": get_ptx_texture(target),
699 "ptx_access": get_ptx_texture_access(geom, ctype),
700 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
702 gen_test(template, params)
703 generated_items.append((params["intrinsic"], params["instruction"]))
705 fun_name = test_name + "_param"
706 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
707 sampler=("" if is_unified(target) else "i64,"),
708 retty=params["retty"],
709 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
711 generated_metadata += get_texture_metadata(
712 target, fun_ty, fun_name, has_texture_params=True
715 fun_name = test_name + "_global"
716 fun_ty = "void ({retty}*, {access_ty})*".format(
717 retty=params["retty"],
718 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
720 generated_metadata += get_texture_metadata(
721 target, fun_ty, fun_name, has_texture_params=False
724 return generated_items, generated_metadata
727 def get_llvm_tld4_access(geom):
729 For 2D textures, operand c specifies coordinates as a two-element,
730 32-bit floating-point vector.
732 For 2d texture arrays operand c is a four element, 32-bit
733 vector. The first element in operand c is interpreted as an unsigned
734 integer index (.u32) into the texture array, and the next two
735 elements are interpreted as 32-bit floating point coordinates of 2d
736 texture. The fourth element is ignored.
738 For cubemap textures, operand c specifies four-element vector which
739 comprises three floating-point coordinates (s, t, r) and a fourth
740 padding argument which is ignored.
742 [For cube arrays] The first element in operand c is interpreted as
743 an unsigned integer index (.u32) into the cubemap texture array, and
744 the remaining three elements are interpreted as floating-point
745 cubemap coordinates (s, t, r), used to lookup in the selected
746 cubemap.
748 geom_to_access = {
749 "2d": "float %x, float %y",
750 "a2d": "i32 %l, float %x, float %y",
751 "cube": "float %s, float %t, float %r",
752 "acube": "i32 %l, float %s, float %t, float %r",
754 return geom_to_access[geom]
757 def get_llvm_tld4_access_type(geom):
758 geom_to_access = {
759 "2d": "float, float",
760 "a2d": "i32, float, float",
761 "cube": "float, float, float",
762 "acube": "i32, float, float, float",
764 return geom_to_access[geom]
767 def get_ptx_tld4_access(geom):
768 geom_to_access = {
769 "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
770 "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
771 "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
772 "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
774 return geom_to_access[geom]
777 def gen_tld4_tests(target, global_tex, global_sampler):
779 PTX spec s9.7.9.4. Texture Instructions: tld4
780 Perform a texture fetch of the 4-texel bilerp footprint.
782 tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f};
783 tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler
785 .comp = { .r, .g, .b, .a };
786 .geom = { .2d, .a2d, .cube, .acube };
787 .dtype = { .u32, .s32, .f32 };
790 template = """
791 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
793 ; CHECK-LABEL: .entry ${test_name}_param
794 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
795 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
796 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
797 store ${retty} %val, ${retty}* %ret
798 ret void
800 ; CHECK-LABEL: .entry ${test_name}_global
801 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
802 define void @${test_name}_global(${retty}* %ret, ${access}) {
803 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
804 ${get_sampler_handle}
805 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
806 store ${retty} %val, ${retty}* %ret
807 ret void
811 generated_items = []
812 generated_metadata = []
813 for comp, geom, dtype in product(
814 ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"]
817 # FIXME: missing intrinsics.
818 # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
819 if geom in ("a2d", "cube", "acube"):
820 continue
822 sampler_handle, get_sampler_handle = get_llvm_global_sampler(
823 target, global_sampler
826 test_name = "test_tld4_" + "".join((comp, geom, dtype))
827 params = {
828 "test_name": test_name,
829 "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
830 unified=(".unified" if is_unified(target) else ""),
831 comp=comp,
832 geom=get_llvm_geom(geom),
833 dtype=dtype,
835 "global_tex": global_tex,
836 "retty": get_llvm_vec_type("v4", dtype),
837 "sampler": sampler_handle,
838 "access": get_llvm_tld4_access(geom),
839 "get_sampler_handle": get_sampler_handle,
840 "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
841 comp=comp, geom=geom, dtype=dtype
843 "ptx_ret": get_ptx_vec_reg("v4", dtype),
844 "ptx_tex": get_ptx_texture(target),
845 "ptx_access": get_ptx_tld4_access(geom),
846 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
848 gen_test(template, params)
849 generated_items.append((params["intrinsic"], params["instruction"]))
851 fun_name = test_name + "_param"
852 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
853 sampler=("" if is_unified(target) else "i64,"),
854 retty=params["retty"],
855 access_ty=get_llvm_tld4_access_type(geom),
857 generated_metadata += get_texture_metadata(
858 target, fun_ty, fun_name, has_texture_params=True
861 fun_name = test_name + "_global"
862 fun_ty = "void ({retty}*, {access_ty})*".format(
863 retty=params["retty"], access_ty=get_llvm_tld4_access_type(geom)
865 generated_metadata += get_texture_metadata(
866 target, fun_ty, fun_name, has_texture_params=False
869 return generated_items, generated_metadata
872 def gen_test(template, params):
873 if debug:
874 print()
875 for param, value in params.items():
876 print(";; {}: {}".format(param, value))
878 print(string.Template(textwrap.dedent(template)).substitute(params))
881 def gen_tests(target, tests):
882 gen_triple(target)
884 items = []
885 metadata = []
887 global_surf = "gsurf"
888 global_tex = "gtex"
889 global_sampler = "gsam"
890 metadata += gen_globals(target, global_surf, global_tex, global_sampler)
892 if "suld" in tests:
893 suld_items, suld_md = gen_suld_tests(target, global_surf)
894 items += suld_items
895 metadata += suld_md
896 if "sust" in tests:
897 sust_items, sust_md = gen_sust_tests(target, global_surf)
898 items += sust_items
899 metadata += sust_md
900 if "tex" in tests:
901 tex_items, tex_md = gen_tex_tests(target, global_tex, global_sampler)
902 items += tex_items
903 metadata += tex_md
904 if "tld4" in tests:
905 tld4_items, tld4_md = gen_tld4_tests(target, global_tex, global_sampler)
906 items += tld4_items
907 metadata += tld4_md
909 gen_metadata(metadata)
910 return items
913 def write_gen_list(filename, append, items):
914 with open(filename, ("a" if append else "w")) as f:
915 for intrinsic, instruction in items:
916 f.write("{} {}\n".format(intrinsic, instruction))
919 def read_gen_list(filename):
920 intrinsics = set()
921 instructions = set()
922 with open(filename) as f:
923 for line in f:
924 intrinsic, instruction = line.split()
925 intrinsics.add(intrinsic)
926 instructions.add(instruction)
927 return (intrinsics, instructions)
930 def read_td_list(filename, regex):
931 td_list = set()
932 with open(filename) as f:
933 for line in f:
934 match = re.search(regex, line)
935 if match:
936 td_list.add(match.group(1))
938 # Arbitrary value - we should find quite a lot of instructions
939 if len(td_list) < 30:
940 raise RuntimeError(
941 "found only {} instructions in {}".format(filename, len(td_list))
944 return td_list
947 def verify_inst_tablegen(path_td, gen_instr):
949 Verify that all instructions defined in NVPTXIntrinsics.td are
950 tested.
953 td_instr = read_td_list(path_td, '"((suld|sust|tex|tld4)\\..*)"')
955 gen_instr.update(
957 # FIXME: spec does not list any sust.p variants other than b32
958 "sust.p.1d.b8.trap",
959 "sust.p.1d.b16.trap",
960 "sust.p.1d.v2.b8.trap",
961 "sust.p.1d.v2.b16.trap",
962 "sust.p.1d.v4.b8.trap",
963 "sust.p.1d.v4.b16.trap",
964 "sust.p.a1d.b8.trap",
965 "sust.p.a1d.b16.trap",
966 "sust.p.a1d.v2.b8.trap",
967 "sust.p.a1d.v2.b16.trap",
968 "sust.p.a1d.v4.b8.trap",
969 "sust.p.a1d.v4.b16.trap",
970 "sust.p.2d.b8.trap",
971 "sust.p.2d.b16.trap",
972 "sust.p.2d.v2.b8.trap",
973 "sust.p.2d.v2.b16.trap",
974 "sust.p.2d.v4.b8.trap",
975 "sust.p.2d.v4.b16.trap",
976 "sust.p.a2d.b8.trap",
977 "sust.p.a2d.b16.trap",
978 "sust.p.a2d.v2.b8.trap",
979 "sust.p.a2d.v2.b16.trap",
980 "sust.p.a2d.v4.b8.trap",
981 "sust.p.a2d.v4.b16.trap",
982 "sust.p.3d.b8.trap",
983 "sust.p.3d.b16.trap",
984 "sust.p.3d.v2.b8.trap",
985 "sust.p.3d.v2.b16.trap",
986 "sust.p.3d.v4.b8.trap",
987 "sust.p.3d.v4.b16.trap",
988 # FIXME: sust.p is also not supported for arrays
989 "sust.p.a1d.b32.trap",
990 "sust.p.a1d.v2.b32.trap",
991 "sust.p.a1d.v4.b32.trap",
992 "sust.p.a2d.b32.trap",
993 "sust.p.a2d.v2.b32.trap",
994 "sust.p.a2d.v4.b32.trap",
998 td_instr = list(td_instr)
999 td_instr.sort()
1000 gen_instr = list(gen_instr)
1001 gen_instr.sort()
1002 for i, td in enumerate(td_instr):
1003 if i == len(gen_instr) or td != gen_instr[i]:
1004 raise RuntimeError(
1005 "{} is present in tablegen, but not tested.\n".format(td)
1009 def verify_llvm_tablegen(path_td, gen_intr):
1011 Verify that all intrinsics defined in IntrinsicsNVVM.td are
1012 tested.
1015 td_intr = read_td_list(path_td, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"')
1017 gen_intr.update(
1019 # FIXME: spec does not list any sust.p variants other than b32
1020 "llvm.nvvm.sust.p.1d.i8.trap",
1021 "llvm.nvvm.sust.p.1d.i16.trap",
1022 "llvm.nvvm.sust.p.1d.v2i8.trap",
1023 "llvm.nvvm.sust.p.1d.v2i16.trap",
1024 "llvm.nvvm.sust.p.1d.v4i8.trap",
1025 "llvm.nvvm.sust.p.1d.v4i16.trap",
1026 "llvm.nvvm.sust.p.1d.array.i8.trap",
1027 "llvm.nvvm.sust.p.1d.array.i16.trap",
1028 "llvm.nvvm.sust.p.1d.array.v2i8.trap",
1029 "llvm.nvvm.sust.p.1d.array.v2i16.trap",
1030 "llvm.nvvm.sust.p.1d.array.v4i8.trap",
1031 "llvm.nvvm.sust.p.1d.array.v4i16.trap",
1032 "llvm.nvvm.sust.p.2d.i8.trap",
1033 "llvm.nvvm.sust.p.2d.i16.trap",
1034 "llvm.nvvm.sust.p.2d.v2i8.trap",
1035 "llvm.nvvm.sust.p.2d.v2i16.trap",
1036 "llvm.nvvm.sust.p.2d.v4i8.trap",
1037 "llvm.nvvm.sust.p.2d.v4i16.trap",
1038 "llvm.nvvm.sust.p.2d.array.i8.trap",
1039 "llvm.nvvm.sust.p.2d.array.i16.trap",
1040 "llvm.nvvm.sust.p.2d.array.v2i8.trap",
1041 "llvm.nvvm.sust.p.2d.array.v2i16.trap",
1042 "llvm.nvvm.sust.p.2d.array.v4i8.trap",
1043 "llvm.nvvm.sust.p.2d.array.v4i16.trap",
1044 "llvm.nvvm.sust.p.3d.i8.trap",
1045 "llvm.nvvm.sust.p.3d.i16.trap",
1046 "llvm.nvvm.sust.p.3d.v2i8.trap",
1047 "llvm.nvvm.sust.p.3d.v2i16.trap",
1048 "llvm.nvvm.sust.p.3d.v4i8.trap",
1049 "llvm.nvvm.sust.p.3d.v4i16.trap",
1050 # FIXME: sust.p is also not supported for arrays
1051 "llvm.nvvm.sust.p.1d.array.i32.trap",
1052 "llvm.nvvm.sust.p.1d.array.v2i32.trap",
1053 "llvm.nvvm.sust.p.1d.array.v4i32.trap",
1054 "llvm.nvvm.sust.p.2d.array.i32.trap",
1055 "llvm.nvvm.sust.p.2d.array.v2i32.trap",
1056 "llvm.nvvm.sust.p.2d.array.v4i32.trap",
1060 td_intr = list(td_intr)
1061 td_intr.sort()
1062 gen_intr = list(gen_intr)
1063 gen_intr.sort()
1064 for i, td in enumerate(td_intr):
1065 if i == len(gen_intr) or td != gen_intr[i]:
1066 raise RuntimeError(
1067 "{} is present in tablegen, but not tested.\n".format(td)
1071 parser = argparse.ArgumentParser()
1072 parser.add_argument("--debug", action="store_true")
1073 parser.add_argument("--tests", type=str)
1074 parser.add_argument("--target", type=str)
1075 parser.add_argument("--gen-list", dest="gen_list", type=str)
1076 parser.add_argument("--gen-list-append", dest="gen_list_append", action="store_true")
1077 parser.add_argument("--verify", action="store_true")
1078 parser.add_argument("--llvm-tablegen", dest="llvm_td", type=str)
1079 parser.add_argument("--inst-tablegen", dest="inst_td", type=str)
1081 args = parser.parse_args()
1082 debug = args.debug
1084 if args.verify:
1085 intrinsics, instructions = read_gen_list(args.gen_list)
1086 verify_inst_tablegen(args.inst_td, instructions)
1087 verify_llvm_tablegen(args.llvm_td, intrinsics)
1088 else:
1089 items = gen_tests(args.target, args.tests.split(","))
1090 if args.gen_list:
1091 write_gen_list(args.gen_list, args.gen_list_append, items)