Re-land [openmp] Fix warnings when building on Windows with latest MSVC or Clang...
[llvm-project.git] / llvm / test / CodeGen / NVPTX / surf-tex.py
blob7d86696087438b5fbbea4657e4b7818842470bc4
1 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2 # RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
3 # RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
5 # We only need to run this second time for texture tests, because
6 # there is a difference between unified and non-unified intrinsics.
8 # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9 # RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
10 # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
12 # Verify that all instructions and intrinsics defined in TableGen
13 # files are tested. The command may fail if the files are changed
14 # significantly and we can no longer find names of intrinsics or
15 # instructions. In that case we can replace this command with a
16 # reference list.
18 # Verification is turned off by default to avoid issues when the LLVM
19 # source directory is not available.
21 # RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
23 from __future__ import print_function
25 import argparse
26 import re
27 import string
28 import textwrap
29 from itertools import product
32 def get_llvm_geom(geom_ptx):
33 geom = {
34 "1d": "1d",
35 "2d": "2d",
36 "3d": "3d",
37 "a1d": "1d.array",
38 "a2d": "2d.array",
39 "cube": "cube",
40 "acube": "cube.array",
42 return geom[geom_ptx]
45 def get_ptx_reg(ty):
46 reg = {
47 "b8": "%rs{{[0-9]+}}",
48 "b16": "%rs{{[0-9]+}}",
49 "b32": "%r{{[0-9]+}}",
50 "b64": "%rd{{[0-9]+}}",
51 "f32": "%f{{[0-9]+}}",
52 "u32": "%r{{[0-9]+}}",
53 "s32": "%r{{[0-9]+}}",
55 return reg[ty]
58 def get_ptx_vec_reg(vec, ty):
59 vec_reg = {
60 "": "{{{reg}}}",
61 "v2": "{{{reg}, {reg}}}",
62 "v4": "{{{reg}, {reg}, {reg}, {reg}}}",
64 return vec_reg[vec].format(reg=get_ptx_reg(ty))
67 def get_llvm_type(ty):
68 if ty[0] in ("b", "s", "u"):
69 return "i" + ty[1:]
70 if ty == "f16":
71 return "half"
72 if ty == "f32":
73 return "float"
74 raise RuntimeError("invalid type: " + ty)
77 def get_llvm_vec_type(vec, ty_ptx):
78 ty = get_llvm_type(ty_ptx)
80 # i8 is passed as i16, same as in PTX
81 if ty == "i8":
82 ty = "i16"
84 vec_ty = {
85 "": "{ty}",
86 "v2": "{{ {ty}, {ty} }}",
87 "v4": "{{ {ty}, {ty}, {ty}, {ty} }}",
89 return vec_ty[vec].format(ty=ty)
92 def get_llvm_value(vec, ty_ptx):
93 ty = get_llvm_type(ty_ptx)
95 # i8 is passed as i16, same as in PTX
96 if ty == "i8":
97 ty = "i16"
99 value = {
100 "": "{ty} %v1",
101 "v2": "{ty} %v1, {ty} %v2",
102 "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4",
104 return value[vec].format(ty=ty)
107 def get_llvm_value_type(vec, ty_ptx):
108 ty = get_llvm_type(ty_ptx)
110 # i8 is passed as i16, same as in PTX
111 if ty == "i8":
112 ty = "i16"
114 value = {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"}
115 return value[vec].format(ty=ty)
118 id_counter = 0
121 def get_table_gen_id():
122 global id_counter
123 id_counter += 1
124 return id_counter
127 def gen_triple(target):
128 if target == "cuda":
129 print('target triple = "nvptx64-unknown-cuda"\n')
130 elif target == "nvcl":
131 print('target triple = "nvptx64-unknown-nvcl"\n')
132 else:
133 raise RuntimeError("invalid target: " + target)
136 def gen_globals(target, surf_name, tex_name, sampler_name):
137 print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
138 print("; CHECK: .global .surfref {}".format(surf_name))
139 print("; CHECK: .global .texref {}".format(tex_name))
140 print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name))
141 print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name))
142 generated_metadata = [
143 '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name),
144 '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name),
147 if not is_unified(target):
148 print("; CHECK: .global .samplerref {}".format(sampler_name))
149 print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name))
150 generated_metadata.append(
151 '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name)
154 return generated_metadata
157 def gen_metadata(metadata):
158 md_values = ["!{}".format(i) for i in range(len(metadata))]
159 print("!nvvm.annotations = !{{{values}}}".format(values=(", ".join(md_values))))
160 for i, md in enumerate(metadata):
161 print("!{} = {}".format(i, md))
164 def get_llvm_surface_access(geom_ptx):
165 access = {
166 "1d": "i32 %x",
167 "2d": "i32 %x, i32 %y",
168 "3d": "i32 %x, i32 %y, i32 %z",
169 "a1d": "i32 %l, i32 %x",
170 "a2d": "i32 %l, i32 %x, i32 %y",
172 return access[geom_ptx]
175 def get_llvm_surface_access_type(geom_ptx):
176 access_ty = {
177 "1d": "i32",
178 "2d": "i32, i32",
179 "3d": "i32, i32, i32",
180 "a1d": "i32, i32",
181 "a2d": "i32, i32, i32",
183 return access_ty[geom_ptx]
186 def get_ptx_surface_access(geom_ptx):
188 Operand b is a scalar or singleton tuple for 1d surfaces; is a
189 two-element vector for 2d surfaces; and is a four-element vector
190 for 3d surfaces, where the fourth element is ignored. Coordinate
191 elements are of type .s32.
193 For 1d surface arrays, operand b has type .v2.b32. The first
194 element is interpreted as an unsigned integer index (.u32) into
195 the surface array, and the second element is interpreted as a 1d
196 surface coordinate of type .s32.
198 For 2d surface arrays, operand b has type .v4.b32. The first
199 element is interpreted as an unsigned integer index (.u32) into
200 the surface array, and the next two elements are interpreted as 2d
201 surface coordinates of type .s32. The fourth element is ignored.
203 access_reg = {
204 "1d": "{%r{{[0-9]}}}",
205 "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
206 "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
207 "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
208 "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
210 return access_reg[geom_ptx]
213 def get_ptx_surface(target):
214 # With 'cuda' environment surface is copied with ld.param, so the
215 # instruction uses a register. For 'nvcl' the instruction uses the
216 # parameter directly.
217 if target == "cuda":
218 return "%rd{{[0-9]+}}"
219 elif target == "nvcl":
220 return "test_{{.*}}_param_0"
221 raise RuntimeError("invalid target: " + target)
224 def get_surface_metadata(target, fun_ty, fun_name, has_surface_param):
225 metadata = []
227 md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
228 fun_ty=fun_ty, fun_name=fun_name
230 metadata.append(md_kernel)
232 if target == "cuda":
233 # When a parameter is lowered as a .surfref, it still has the
234 # corresponding ld.param.u64, which is illegal. Do not emit the
235 # metadata to keep the parameter as .b64 instead.
236 has_surface_param = False
238 if has_surface_param:
239 md_surface = '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format(
240 fun_ty=fun_ty, fun_name=fun_name
242 metadata.append(md_surface)
244 return metadata
247 def gen_suld_tests(target, global_surf):
249 PTX spec s9.7.10.1. Surface Instructions:
251 suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted
253 .geom = { .1d, .2d, .3d, .a1d, .a2d };
254 .cop = { .ca, .cg, .cs, .cv }; // cache operation
255 .vec = { none, .v2, .v4 };
256 .dtype = { .b8 , .b16, .b32, .b64 };
257 .clamp = { .trap, .clamp, .zero };
260 template = """
261 declare ${retty} @${intrinsic}(i64 %s, ${access});
263 ; CHECK-LABEL: .entry ${test_name}_param
264 ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
266 define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
267 %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
268 store ${retty} %val, ${retty}* %ret
269 ret void
271 ; CHECK-LABEL: .entry ${test_name}_global
272 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
273 ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
274 ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
275 define void @${test_name}_global(${retty}* %ret, ${access}) {
276 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
277 %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
278 store ${retty} %val, ${retty}* %ret
279 ret void
283 generated_items = []
284 generated_metadata = []
285 # FIXME: "cop" is missing
286 for geom, vec, dtype, clamp in product(
287 ["1d", "2d", "3d", "a1d", "a2d"],
288 ["", "v2", "v4"],
289 ["b8", "b16", "b32", "b64"],
290 ["trap", "clamp", "zero"],
293 if vec == "v4" and dtype == "b64":
294 continue
296 test_name = "test_suld_" + geom + vec + dtype + clamp
298 params = {
299 "test_name": test_name,
300 "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
301 geom=get_llvm_geom(geom),
302 dtype=(vec + get_llvm_type(dtype)),
303 clamp=clamp,
305 "retty": get_llvm_vec_type(vec, dtype),
306 "access": get_llvm_surface_access(geom),
307 "global_surf": global_surf,
308 "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
309 geom=geom,
310 vec=("" if vec == "" else "." + vec),
311 dtype=dtype,
312 clamp=clamp,
314 "reg_ret": get_ptx_vec_reg(vec, dtype),
315 "reg_surf": get_ptx_surface(target),
316 "reg_access": get_ptx_surface_access(geom),
317 "reg_id": get_table_gen_id(),
319 gen_test(template, params)
320 generated_items.append((params["intrinsic"], params["instruction"]))
322 fun_name = test_name + "_param"
323 fun_ty = "void (i64, {retty}*, {access_ty})*".format(
324 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
326 generated_metadata += get_surface_metadata(
327 target, fun_ty, fun_name, has_surface_param=True
330 fun_name = test_name + "_global"
331 fun_ty = "void ({retty}*, {access_ty})*".format(
332 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
334 generated_metadata += get_surface_metadata(
335 target, fun_ty, fun_name, has_surface_param=False
338 return generated_items, generated_metadata
341 def gen_sust_tests(target, global_surf):
343 PTX spec s9.7.10.2. Surface Instructions
345 sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
346 sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted
348 sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted
350 .cop = { .wb, .cg, .cs, .wt }; // cache operation
351 .vec = { none, .v2, .v4 };
352 .ctype = { .b8 , .b16, .b32, .b64 };
353 .clamp = { .trap, .clamp, .zero };
356 template = """
357 declare void @${intrinsic}(i64 %s, ${access}, ${value});
359 ; CHECK-LABEL: .entry ${test_name}_param
360 ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
362 define void @${test_name}_param(i64 %s, ${value}, ${access}) {
363 tail call void @${intrinsic}(i64 %s, ${access}, ${value})
364 ret void
366 ; CHECK-LABEL: .entry ${test_name}_global
367 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
368 ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
369 ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
370 define void @${test_name}_global(${value}, ${access}) {
371 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
372 tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
373 ret void
377 generated_items = []
378 generated_metadata = []
379 # FIXME: "cop" is missing
380 for fmt, geom, vec, ctype, clamp in product(
381 ["b", "p"],
382 ["1d", "2d", "3d", "a1d", "a2d"],
383 ["", "v2", "v4"],
384 ["b8", "b16", "b32", "b64"],
385 ["trap", "clamp", "zero"],
388 if fmt == "p" and geom[0] == "a":
389 continue
390 if fmt == "p" and ctype != "b32":
391 continue
392 if vec == "v4" and ctype == "b64":
393 continue
395 # FIXME: these intrinsics are missing, but at least one of them is
396 # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
397 if fmt == "p" and clamp != "trap":
398 continue
400 test_name = "test_sust_" + fmt + geom + vec + ctype + clamp
402 params = {
403 "test_name": test_name,
404 "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
405 fmt=fmt,
406 geom=get_llvm_geom(geom),
407 ctype=(vec + get_llvm_type(ctype)),
408 clamp=clamp,
410 "access": get_llvm_surface_access(geom),
411 "value": get_llvm_value(vec, ctype),
412 "global_surf": global_surf,
413 "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
414 fmt=fmt,
415 geom=geom,
416 vec=("" if vec == "" else "." + vec),
417 ctype=ctype,
418 clamp=clamp,
420 "reg_value": get_ptx_vec_reg(vec, ctype),
421 "reg_surf": get_ptx_surface(target),
422 "reg_access": get_ptx_surface_access(geom),
423 "reg_id": get_table_gen_id(),
425 gen_test(template, params)
426 generated_items.append((params["intrinsic"], params["instruction"]))
428 fun_name = test_name + "_param"
429 fun_ty = "void (i64, {value_ty}, {access_ty})*".format(
430 value_ty=get_llvm_value_type(vec, ctype),
431 access_ty=get_llvm_surface_access_type(geom),
433 generated_metadata += get_surface_metadata(
434 target, fun_ty, fun_name, has_surface_param=True
437 fun_name = test_name + "_global"
438 fun_ty = "void ({value_ty}, {access_ty})*".format(
439 value_ty=get_llvm_value_type(vec, ctype),
440 access_ty=get_llvm_surface_access_type(geom),
442 generated_metadata += get_surface_metadata(
443 target, fun_ty, fun_name, has_surface_param=False
446 return generated_items, generated_metadata
449 def is_unified(target):
451 PTX has two modes of operation. In the unified mode, texture and
452 sampler information is accessed through a single .texref handle. In
453 the independent mode, texture and sampler information each have their
454 own handle, allowing them to be defined separately and combined at the
455 site of usage in the program.
458 return target == "cuda"
461 def get_llvm_texture_access(geom_ptx, ctype, mipmap):
462 geom_access = {
463 "1d": "{ctype} %x",
464 "2d": "{ctype} %x, {ctype} %y",
465 "3d": "{ctype} %x, {ctype} %y, {ctype} %z",
466 "cube": "{ctype} %s, {ctype} %t, {ctype} %r",
467 "a1d": "i32 %l, {ctype} %x",
468 "a2d": "i32 %l, {ctype} %x, {ctype} %y",
469 "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
472 access = geom_access[geom_ptx]
474 if mipmap == "level":
475 access += ", {ctype} %lvl"
476 elif mipmap == "grad":
477 if geom_ptx in ("1d", "a1d"):
478 access += ", {ctype} %dpdx1, {ctype} %dpdy1"
479 elif geom_ptx in ("2d", "a2d"):
480 access += (
481 ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2"
483 else:
484 access += (
485 ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3"
486 + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3"
489 return access.format(ctype=get_llvm_type(ctype))
492 def get_llvm_texture_access_type(geom_ptx, ctype, mipmap):
493 geom_access = {
494 "1d": "{ctype}",
495 "2d": "{ctype}, {ctype}",
496 "3d": "{ctype}, {ctype}, {ctype}",
497 "cube": "{ctype}, {ctype}, {ctype}",
498 "a1d": "i32, {ctype}",
499 "a2d": "i32, {ctype}, {ctype}",
500 "acube": "i32, {ctype}, {ctype}, {ctype}",
503 access = geom_access[geom_ptx]
505 if mipmap == "level":
506 access += ", {ctype}"
507 elif mipmap == "grad":
508 if geom_ptx in ("1d", "a1d"):
509 access += ", {ctype}, {ctype}"
510 elif geom_ptx in ("2d", "a2d"):
511 access += ", {ctype}, {ctype}, {ctype}, {ctype}"
512 else:
513 access += ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}"
515 return access.format(ctype=get_llvm_type(ctype))
518 def get_ptx_texture_access(geom_ptx, ctype):
519 access_reg = {
520 "1d": "{{{ctype_reg}}}",
521 "2d": "{{{ctype_reg}, {ctype_reg}}}",
522 "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
523 "a1d": "{{{b32_reg}, {ctype_reg}}}",
524 "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
525 "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
526 "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
528 return access_reg[geom_ptx].format(
529 ctype_reg=get_ptx_reg(ctype),
530 b32_reg=get_ptx_reg("b32"),
531 f32_reg=get_ptx_reg("f32"),
535 def get_ptx_texture(target):
536 # With 'cuda' environment texture/sampler are copied with ld.param,
537 # so the instruction uses registers. For 'nvcl' the instruction uses
538 # texture/sampler parameters directly.
539 if target == "cuda":
540 return "%rd{{[0-9]+}}"
541 elif target == "nvcl":
542 return "test_{{.*}}_param_0, test_{{.*}}_param_1"
543 raise RuntimeError("unknown target: " + target)
546 def get_llvm_global_sampler(target, global_sampler):
547 if is_unified(target):
548 return "", ""
549 else:
550 sampler_handle = "i64 %gs,"
551 get_sampler_handle = (
552 "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64"
553 + "(i64 addrspace(1)* @{})".format(global_sampler)
555 return sampler_handle, get_sampler_handle
558 def get_ptx_global_sampler(target, global_sampler):
559 if is_unified(target):
560 return ""
561 else:
562 return global_sampler + ","
565 def get_texture_metadata(target, fun_ty, fun_name, has_texture_params):
566 metadata = []
568 md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format(
569 fun_ty=fun_ty, fun_name=fun_name
571 metadata.append(md_kernel)
573 if target == "cuda":
574 # When a parameter is lowered as a .texref, it still has the
575 # corresponding ld.param.u64, which is illegal. Do not emit the
576 # metadata to keep the parameter as .b64 instead.
577 has_texture_params = False
579 if has_texture_params:
580 md_texture = '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format(
581 fun_ty=fun_ty, fun_name=fun_name
583 metadata.append(md_texture)
585 if not is_unified(target):
586 md_sampler = '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format(
587 fun_ty=fun_ty, fun_name=fun_name
589 metadata.append(md_sampler)
591 return metadata
594 def gen_tex_tests(target, global_tex, global_sampler):
596 PTX spec s9.7.9.3. Texture Instructions
598 tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f};
599 tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
601 tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f};
602 tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler
604 // mipmaps
605 tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f};
606 tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f};
607 tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
609 tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f};
610 tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f};
611 tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
613 .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
614 .dtype = { .u32, .s32, .f16, .f32 };
615 .ctype = { .s32, .f32 }; // .cube, .acube require .f32
616 // .2dms, .a2dms require .s32
619 template = """
620 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
622 ; CHECK-LABEL: .entry ${test_name}_param
623 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
624 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
625 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
626 store ${retty} %val, ${retty}* %ret
627 ret void
629 ; CHECK-LABEL: .entry ${test_name}_global
630 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
631 ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
632 ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
633 define void @${test_name}_global(${retty}* %ret, ${access}) {
634 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
635 ${get_sampler_handle}
636 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
637 store ${retty} %val, ${retty}* %ret
638 ret void
642 generated_items = []
643 generated_metadata = []
644 for mipmap, geom, vec, dtype, ctype in product(
645 ["", "level", "grad"],
646 ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
647 ["v2", "v4"],
648 ["u32", "s32", "f16", "f32"],
649 ["s32", "f32"],
652 # FIXME: missing intrinsics.
653 # Multi-sample textures and multi-sample texture arrays
654 # introduced in PTX ISA version 3.2.
655 if geom in ("2dms", "a2dms"):
656 continue
658 # FIXME: missing intrinsics? no such restriction in the PTX spec
659 if ctype == "s32" and mipmap != "":
660 continue
662 # FIXME: missing intrinsics?
663 if ctype == "s32" and geom in ("cube", "acube"):
664 continue
666 # FIXME: missing intrinsics.
667 # Support for textures returning f16 and f16x2 data introduced in
668 # PTX ISA version 4.2.
669 if vec == "v2" or dtype == "f16":
670 continue
672 # FIXME: missing intrinsics.
673 # Support for tex.grad.{cube, acube} introduced in PTX ISA version
674 # 4.3, currently supported only in unified mode.
675 if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"):
676 continue
678 # The instruction returns a two-element vector for destination
679 # type f16x2. For all other destination types, the instruction
680 # returns a four-element vector. Coordinates may be given in
681 # either signed 32-bit integer or 32-bit floating point form.
682 if vec == "v2" and dtype != "f16":
683 continue
685 sampler_handle, get_sampler_handle = get_llvm_global_sampler(
686 target, global_sampler
689 test_name = "test_tex_" + "".join((mipmap, geom, vec, dtype, ctype))
690 params = {
691 "test_name": test_name,
692 "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
693 unified=(".unified" if is_unified(target) else ""),
694 geom=get_llvm_geom(geom),
695 mipmap=("" if mipmap == "" else "." + mipmap),
696 vec=vec,
697 dtype=dtype,
698 ctype=ctype,
700 "global_tex": global_tex,
701 "retty": get_llvm_vec_type(vec, dtype),
702 "sampler": sampler_handle,
703 "access": get_llvm_texture_access(geom, ctype, mipmap),
704 "get_sampler_handle": get_sampler_handle,
705 "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
706 mipmap=("" if mipmap == "" else "." + mipmap),
707 geom=geom,
708 vec=vec,
709 dtype=dtype,
710 ctype=ctype,
712 "ptx_ret": get_ptx_vec_reg(vec, dtype),
713 "ptx_tex": get_ptx_texture(target),
714 "ptx_access": get_ptx_texture_access(geom, ctype),
715 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
716 "reg_id": get_table_gen_id(),
718 gen_test(template, params)
719 generated_items.append((params["intrinsic"], params["instruction"]))
721 fun_name = test_name + "_param"
722 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
723 sampler=("" if is_unified(target) else "i64,"),
724 retty=params["retty"],
725 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
727 generated_metadata += get_texture_metadata(
728 target, fun_ty, fun_name, has_texture_params=True
731 fun_name = test_name + "_global"
732 fun_ty = "void ({retty}*, {access_ty})*".format(
733 retty=params["retty"],
734 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
736 generated_metadata += get_texture_metadata(
737 target, fun_ty, fun_name, has_texture_params=False
740 return generated_items, generated_metadata
743 def get_llvm_tld4_access(geom):
745 For 2D textures, operand c specifies coordinates as a two-element,
746 32-bit floating-point vector.
748 For 2d texture arrays operand c is a four element, 32-bit
749 vector. The first element in operand c is interpreted as an unsigned
750 integer index (.u32) into the texture array, and the next two
751 elements are interpreted as 32-bit floating point coordinates of 2d
752 texture. The fourth element is ignored.
754 For cubemap textures, operand c specifies four-element vector which
755 comprises three floating-point coordinates (s, t, r) and a fourth
756 padding argument which is ignored.
758 [For cube arrays] The first element in operand c is interpreted as
759 an unsigned integer index (.u32) into the cubemap texture array, and
760 the remaining three elements are interpreted as floating-point
761 cubemap coordinates (s, t, r), used to lookup in the selected
762 cubemap.
764 geom_to_access = {
765 "2d": "float %x, float %y",
766 "a2d": "i32 %l, float %x, float %y",
767 "cube": "float %s, float %t, float %r",
768 "acube": "i32 %l, float %s, float %t, float %r",
770 return geom_to_access[geom]
773 def get_llvm_tld4_access_type(geom):
774 geom_to_access = {
775 "2d": "float, float",
776 "a2d": "i32, float, float",
777 "cube": "float, float, float",
778 "acube": "i32, float, float, float",
780 return geom_to_access[geom]
783 def get_ptx_tld4_access(geom):
784 geom_to_access = {
785 "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
786 "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
787 "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
788 "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
790 return geom_to_access[geom]
793 def gen_tld4_tests(target, global_tex, global_sampler):
795 PTX spec s9.7.9.4. Texture Instructions: tld4
796 Perform a texture fetch of the 4-texel bilerp footprint.
798 tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f};
799 tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler
801 .comp = { .r, .g, .b, .a };
802 .geom = { .2d, .a2d, .cube, .acube };
803 .dtype = { .u32, .s32, .f32 };
806 template = """
807 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
809 ; CHECK-LABEL: .entry ${test_name}_param
810 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
811 define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
812 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
813 store ${retty} %val, ${retty}* %ret
814 ret void
816 ; CHECK-LABEL: .entry ${test_name}_global
817 ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
818 ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
819 ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
820 define void @${test_name}_global(${retty}* %ret, ${access}) {
821 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
822 ${get_sampler_handle}
823 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
824 store ${retty} %val, ${retty}* %ret
825 ret void
829 generated_items = []
830 generated_metadata = []
831 for comp, geom, dtype in product(
832 ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"]
835 # FIXME: missing intrinsics.
836 # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
837 if geom in ("a2d", "cube", "acube"):
838 continue
840 sampler_handle, get_sampler_handle = get_llvm_global_sampler(
841 target, global_sampler
844 test_name = "test_tld4_" + "".join((comp, geom, dtype))
845 params = {
846 "test_name": test_name,
847 "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
848 unified=(".unified" if is_unified(target) else ""),
849 comp=comp,
850 geom=get_llvm_geom(geom),
851 dtype=dtype,
853 "global_tex": global_tex,
854 "retty": get_llvm_vec_type("v4", dtype),
855 "sampler": sampler_handle,
856 "access": get_llvm_tld4_access(geom),
857 "get_sampler_handle": get_sampler_handle,
858 "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
859 comp=comp, geom=geom, dtype=dtype
861 "ptx_ret": get_ptx_vec_reg("v4", dtype),
862 "ptx_tex": get_ptx_texture(target),
863 "ptx_access": get_ptx_tld4_access(geom),
864 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
865 "reg_id": get_table_gen_id(),
867 gen_test(template, params)
868 generated_items.append((params["intrinsic"], params["instruction"]))
870 fun_name = test_name + "_param"
871 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
872 sampler=("" if is_unified(target) else "i64,"),
873 retty=params["retty"],
874 access_ty=get_llvm_tld4_access_type(geom),
876 generated_metadata += get_texture_metadata(
877 target, fun_ty, fun_name, has_texture_params=True
880 fun_name = test_name + "_global"
881 fun_ty = "void ({retty}*, {access_ty})*".format(
882 retty=params["retty"], access_ty=get_llvm_tld4_access_type(geom)
884 generated_metadata += get_texture_metadata(
885 target, fun_ty, fun_name, has_texture_params=False
888 return generated_items, generated_metadata
891 def gen_test(template, params):
892 if debug:
893 print()
894 for param, value in params.items():
895 print(";; {}: {}".format(param, value))
897 print(string.Template(textwrap.dedent(template)).substitute(params))
900 def gen_tests(target, tests):
901 gen_triple(target)
903 items = []
904 metadata = []
906 global_surf = "gsurf"
907 global_tex = "gtex"
908 global_sampler = "gsam"
909 metadata += gen_globals(target, global_surf, global_tex, global_sampler)
911 if "suld" in tests:
912 suld_items, suld_md = gen_suld_tests(target, global_surf)
913 items += suld_items
914 metadata += suld_md
915 if "sust" in tests:
916 sust_items, sust_md = gen_sust_tests(target, global_surf)
917 items += sust_items
918 metadata += sust_md
919 if "tex" in tests:
920 tex_items, tex_md = gen_tex_tests(target, global_tex, global_sampler)
921 items += tex_items
922 metadata += tex_md
923 if "tld4" in tests:
924 tld4_items, tld4_md = gen_tld4_tests(target, global_tex, global_sampler)
925 items += tld4_items
926 metadata += tld4_md
928 gen_metadata(metadata)
929 return items
932 def write_gen_list(filename, append, items):
933 with open(filename, ("a" if append else "w")) as f:
934 for intrinsic, instruction in items:
935 f.write("{} {}\n".format(intrinsic, instruction))
938 def read_gen_list(filename):
939 intrinsics = set()
940 instructions = set()
941 with open(filename) as f:
942 for line in f:
943 intrinsic, instruction = line.split()
944 intrinsics.add(intrinsic)
945 instructions.add(instruction)
946 return (intrinsics, instructions)
949 def read_td_list(filename, regex):
950 td_list = set()
951 with open(filename) as f:
952 for line in f:
953 match = re.search(regex, line)
954 if match:
955 td_list.add(match.group(1))
957 # Arbitrary value - we should find quite a lot of instructions
958 if len(td_list) < 30:
959 raise RuntimeError(
960 "found only {} instructions in {}".format(filename, len(td_list))
963 return td_list
966 def verify_inst_tablegen(path_td, gen_instr):
968 Verify that all instructions defined in NVPTXIntrinsics.td are
969 tested.
972 td_instr = read_td_list(path_td, '"((suld|sust|tex|tld4)\\..*)"')
974 gen_instr.update(
976 # FIXME: spec does not list any sust.p variants other than b32
977 "sust.p.1d.b8.trap",
978 "sust.p.1d.b16.trap",
979 "sust.p.1d.v2.b8.trap",
980 "sust.p.1d.v2.b16.trap",
981 "sust.p.1d.v4.b8.trap",
982 "sust.p.1d.v4.b16.trap",
983 "sust.p.a1d.b8.trap",
984 "sust.p.a1d.b16.trap",
985 "sust.p.a1d.v2.b8.trap",
986 "sust.p.a1d.v2.b16.trap",
987 "sust.p.a1d.v4.b8.trap",
988 "sust.p.a1d.v4.b16.trap",
989 "sust.p.2d.b8.trap",
990 "sust.p.2d.b16.trap",
991 "sust.p.2d.v2.b8.trap",
992 "sust.p.2d.v2.b16.trap",
993 "sust.p.2d.v4.b8.trap",
994 "sust.p.2d.v4.b16.trap",
995 "sust.p.a2d.b8.trap",
996 "sust.p.a2d.b16.trap",
997 "sust.p.a2d.v2.b8.trap",
998 "sust.p.a2d.v2.b16.trap",
999 "sust.p.a2d.v4.b8.trap",
1000 "sust.p.a2d.v4.b16.trap",
1001 "sust.p.3d.b8.trap",
1002 "sust.p.3d.b16.trap",
1003 "sust.p.3d.v2.b8.trap",
1004 "sust.p.3d.v2.b16.trap",
1005 "sust.p.3d.v4.b8.trap",
1006 "sust.p.3d.v4.b16.trap",
1007 # FIXME: sust.p is also not supported for arrays
1008 "sust.p.a1d.b32.trap",
1009 "sust.p.a1d.v2.b32.trap",
1010 "sust.p.a1d.v4.b32.trap",
1011 "sust.p.a2d.b32.trap",
1012 "sust.p.a2d.v2.b32.trap",
1013 "sust.p.a2d.v4.b32.trap",
1017 td_instr = list(td_instr)
1018 td_instr.sort()
1019 gen_instr = list(gen_instr)
1020 gen_instr.sort()
1021 for i, td in enumerate(td_instr):
1022 if i == len(gen_instr) or td != gen_instr[i]:
1023 raise RuntimeError(
1024 "{} is present in tablegen, but not tested.\n".format(td)
1028 def verify_llvm_tablegen(path_td, gen_intr):
1030 Verify that all intrinsics defined in IntrinsicsNVVM.td are
1031 tested.
1034 td_intr = read_td_list(path_td, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"')
1036 gen_intr.update(
1038 # FIXME: spec does not list any sust.p variants other than b32
1039 "llvm.nvvm.sust.p.1d.i8.trap",
1040 "llvm.nvvm.sust.p.1d.i16.trap",
1041 "llvm.nvvm.sust.p.1d.v2i8.trap",
1042 "llvm.nvvm.sust.p.1d.v2i16.trap",
1043 "llvm.nvvm.sust.p.1d.v4i8.trap",
1044 "llvm.nvvm.sust.p.1d.v4i16.trap",
1045 "llvm.nvvm.sust.p.1d.array.i8.trap",
1046 "llvm.nvvm.sust.p.1d.array.i16.trap",
1047 "llvm.nvvm.sust.p.1d.array.v2i8.trap",
1048 "llvm.nvvm.sust.p.1d.array.v2i16.trap",
1049 "llvm.nvvm.sust.p.1d.array.v4i8.trap",
1050 "llvm.nvvm.sust.p.1d.array.v4i16.trap",
1051 "llvm.nvvm.sust.p.2d.i8.trap",
1052 "llvm.nvvm.sust.p.2d.i16.trap",
1053 "llvm.nvvm.sust.p.2d.v2i8.trap",
1054 "llvm.nvvm.sust.p.2d.v2i16.trap",
1055 "llvm.nvvm.sust.p.2d.v4i8.trap",
1056 "llvm.nvvm.sust.p.2d.v4i16.trap",
1057 "llvm.nvvm.sust.p.2d.array.i8.trap",
1058 "llvm.nvvm.sust.p.2d.array.i16.trap",
1059 "llvm.nvvm.sust.p.2d.array.v2i8.trap",
1060 "llvm.nvvm.sust.p.2d.array.v2i16.trap",
1061 "llvm.nvvm.sust.p.2d.array.v4i8.trap",
1062 "llvm.nvvm.sust.p.2d.array.v4i16.trap",
1063 "llvm.nvvm.sust.p.3d.i8.trap",
1064 "llvm.nvvm.sust.p.3d.i16.trap",
1065 "llvm.nvvm.sust.p.3d.v2i8.trap",
1066 "llvm.nvvm.sust.p.3d.v2i16.trap",
1067 "llvm.nvvm.sust.p.3d.v4i8.trap",
1068 "llvm.nvvm.sust.p.3d.v4i16.trap",
1069 # FIXME: sust.p is also not supported for arrays
1070 "llvm.nvvm.sust.p.1d.array.i32.trap",
1071 "llvm.nvvm.sust.p.1d.array.v2i32.trap",
1072 "llvm.nvvm.sust.p.1d.array.v4i32.trap",
1073 "llvm.nvvm.sust.p.2d.array.i32.trap",
1074 "llvm.nvvm.sust.p.2d.array.v2i32.trap",
1075 "llvm.nvvm.sust.p.2d.array.v4i32.trap",
1079 td_intr = list(td_intr)
1080 td_intr.sort()
1081 gen_intr = list(gen_intr)
1082 gen_intr.sort()
1083 for i, td in enumerate(td_intr):
1084 if i == len(gen_intr) or td != gen_intr[i]:
1085 raise RuntimeError(
1086 "{} is present in tablegen, but not tested.\n".format(td)
1090 parser = argparse.ArgumentParser()
1091 parser.add_argument("--debug", action="store_true")
1092 parser.add_argument("--tests", type=str)
1093 parser.add_argument("--target", type=str)
1094 parser.add_argument("--gen-list", dest="gen_list", type=str)
1095 parser.add_argument("--gen-list-append", dest="gen_list_append", action="store_true")
1096 parser.add_argument("--verify", action="store_true")
1097 parser.add_argument("--llvm-tablegen", dest="llvm_td", type=str)
1098 parser.add_argument("--inst-tablegen", dest="inst_td", type=str)
1100 args = parser.parse_args()
1101 debug = args.debug
1103 if args.verify:
1104 intrinsics, instructions = read_gen_list(args.gen_list)
1105 verify_inst_tablegen(args.inst_td, instructions)
1106 verify_llvm_tablegen(args.llvm_td, intrinsics)
1107 else:
1108 items = gen_tests(args.target, args.tests.split(","))
1109 if args.gen_list:
1110 write_gen_list(args.gen_list, args.gen_list_append, items)