test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll

   1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN,HSA-VI,FUNC %s
   2
   3 ; Repeat of some problematic tests in kernel-args.ll, with the IR
   4 ; argument lowering pass disabled. Struct padding needs to be
   5 ; accounted for, as well as legalization of types changing offsets.
   6
   7 ; FUNC-LABEL: {{^}}i1_arg:
   8 ; HSA-VI: kernarg_segment_byte_size = 12
   9 ; HSA-VI: kernarg_segment_alignment = 4
  10
  11 ; GCN: s_load_dword s
  12 ; GCN: s_and_b32
  13 define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
  14   store i1 %x, i1 addrspace(1)* %out, align 1
  15   ret void
  16 }
  17
  18 ; FUNC-LABEL: {{^}}v3i8_arg:
  19 ; HSA-VI: kernarg_segment_byte_size = 12
  20 ; HSA-VI: kernarg_segment_alignment = 4
  21 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
  22 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
  23 define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
  24 entry:
  25   store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
  26   ret void
  27 }
  28
  29 ; FUNC-LABEL: {{^}}i65_arg:
  30 ; HSA-VI: kernarg_segment_byte_size = 24
  31 ; HSA-VI: kernarg_segment_alignment = 4
  32 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
  33 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
  34 define amdgpu_kernel void @i65_arg(i65 addrspace(1)* nocapture %out, i65 %in) nounwind {
  35 entry:
  36   store i65 %in, i65 addrspace(1)* %out, align 4
  37   ret void
  38 }
  39
  40 ; FUNC-LABEL: {{^}}empty_struct_arg:
  41 ; HSA-VI: kernarg_segment_byte_size = 0
  42 define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
  43   ret void
  44 }
  45
  46 ; The correct load offsets for these:
  47 ; load 4 from 0,
  48 ; load 8 from 8
  49 ; load 4 from 24
  50 ; load 8 from 32
  51
  52 ; With the SelectionDAG argument lowering, the alignments for the
  53 ; struct members is not properly considered, making these wrong.
  54
  55 ; FIXME: Total argument size is computed wrong
  56 ; FUNC-LABEL: {{^}}struct_argument_alignment:
  57 ; HSA-VI: kernarg_segment_byte_size = 40
  58 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
  59 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
  60 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
  61 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
  62 define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
  63   %val0 = extractvalue {i32, i64} %arg0, 0
  64   %val1 = extractvalue {i32, i64} %arg0, 1
  65   %val2 = extractvalue {i32, i64} %arg1, 0
  66   %val3 = extractvalue {i32, i64} %arg1, 1
  67   store volatile i32 %val0, i32 addrspace(1)* null
  68   store volatile i64 %val1, i64 addrspace(1)* null
  69   store volatile i32 %val2, i32 addrspace(1)* null
  70   store volatile i64 %val3, i64 addrspace(1)* null
  71   ret void
  72 }
  73
  74 ; No padding between i8 and next struct, but round up at end to 4 byte
  75 ; multiple.
  76 ; FUNC-LABEL: {{^}}packed_struct_argument_alignment:
  77 ; HSA-VI: kernarg_segment_byte_size = 28
  78 ; HSA-VI: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13
  79 ; HSA-VI: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17
  80 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
  81 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
  82 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
  83   %val0 = extractvalue <{i32, i64}> %arg0, 0
  84   %val1 = extractvalue <{i32, i64}> %arg0, 1
  85   %val2 = extractvalue <{i32, i64}> %arg1, 0
  86   %val3 = extractvalue <{i32, i64}> %arg1, 1
  87   store volatile i32 %val0, i32 addrspace(1)* null
  88   store volatile i64 %val1, i64 addrspace(1)* null
  89   store volatile i32 %val2, i32 addrspace(1)* null
  90   store volatile i64 %val3, i64 addrspace(1)* null
  91   ret void
  92 }
  93
  94 ; GCN-LABEL: {{^}}struct_argument_alignment_after:
  95 ; HSA-VI: kernarg_segment_byte_size = 64
  96 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
  97 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
  98 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
  99 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
 100 ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30
 101 define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) {
 102   %val0 = extractvalue {i32, i64} %arg0, 0
 103   %val1 = extractvalue {i32, i64} %arg0, 1
 104   %val2 = extractvalue {i32, i64} %arg2, 0
 105   %val3 = extractvalue {i32, i64} %arg2, 1
 106   store volatile i32 %val0, i32 addrspace(1)* null
 107   store volatile i64 %val1, i64 addrspace(1)* null
 108   store volatile i32 %val2, i32 addrspace(1)* null
 109   store volatile i64 %val3, i64 addrspace(1)* null
 110   store volatile <4 x i32> %arg4, <4 x i32> addrspace(1)* null
 111   ret void
 112 }
 113
 114 ; GCN-LABEL: {{^}}array_3xi32:
 115 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 116 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x4
 117 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
 118 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0xc
 119 define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) {
 120   store volatile i16 %arg0, i16 addrspace(1)* undef
 121   store volatile [3 x i32] %arg1, [3 x i32] addrspace(1)* undef
 122   ret void
 123 }
 124
 125 ; GCN-LABEL: {{^}}array_3xi16:
 126 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 127 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x4
 128 define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) {
 129   store volatile i8 %arg0, i8 addrspace(1)* undef
 130   store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef
 131   ret void
 132 }