llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SICIVI,SICI,SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN,SICIVI,SICI %s
   3 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,SICIVI,VI %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
   5
   6 ; GCN-LABEL: {{^}}load_i32:
   7 ; GCN-DAG: s_mov_b32 s3, 0
   8 ; GCN-DAG: s_mov_b32 s2, s1
   9 ; GCN-DAG: s_mov_b32 s1, s3
  10 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  11 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x2
  12 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  13 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
  14 define amdgpu_vs float @load_i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
  15   %gep1 = getelementptr inbounds i32, ptr addrspace(6) %p1, i32 2
  16   %r0 = load i32, ptr addrspace(6) %p0
  17   %r1 = load i32, ptr addrspace(6) %gep1
  18   %r = add i32 %r0, %r1
  19   %r2 = bitcast i32 %r to float
  20   ret float %r2
  21 }
  22
  23 ; GCN-LABEL: {{^}}load_v2i32:
  24 ; SICIVI-DAG: s_mov_b32 s3, 0
  25 ; SICIVI-DAG: s_mov_b32 s2, s1
  26 ; SICIVI-DAG: s_mov_b32 s1, s3
  27 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
  28 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x4
  29 ; VI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
  30 ; VI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
  31 ; GFX9-DAG: s_mov_b32 s2, s1
  32 ; GFX9-DAG: s_mov_b32 s3, 0
  33 ; GFX9-DAG: s_mov_b32 s1, s3
  34 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
  35 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
  36 define amdgpu_vs <2 x float> @load_v2i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
  37   %gep1 = getelementptr inbounds <2 x i32>, ptr addrspace(6) %p1, i32 2
  38   %r0 = load <2 x i32>, ptr addrspace(6) %p0
  39   %r1 = load <2 x i32>, ptr addrspace(6) %gep1
  40   %r = add <2 x i32> %r0, %r1
  41   %r2 = bitcast <2 x i32> %r to <2 x float>
  42   ret <2 x float> %r2
  43 }
  44
  45 ; GCN-LABEL: {{^}}load_v4i32:
  46 ; GCN-DAG: s_mov_b32 s3, 0
  47 ; GCN-DAG: s_mov_b32 s2, s1
  48 ; GCN-DAG: s_mov_b32 s1, s3
  49 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
  50 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x8
  51 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
  52 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
  53 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
  54 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
  55 define amdgpu_vs <4 x float> @load_v4i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
  56   %gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(6) %p1, i32 2
  57   %r0 = load <4 x i32>, ptr addrspace(6) %p0
  58   %r1 = load <4 x i32>, ptr addrspace(6) %gep1
  59   %r = add <4 x i32> %r0, %r1
  60   %r2 = bitcast <4 x i32> %r to <4 x float>
  61   ret <4 x float> %r2
  62 }
  63
  64 ; GCN-LABEL: {{^}}load_v8i32:
  65 ; GCN-DAG: s_mov_b32 s3, 0
  66 ; GCN-DAG: s_mov_b32 s2, s1
  67 ; GCN-DAG: s_mov_b32 s1, s3
  68 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
  69 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x10
  70 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
  71 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
  72 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
  73 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
  74 define amdgpu_vs <8 x float> @load_v8i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
  75   %gep1 = getelementptr inbounds <8 x i32>, ptr addrspace(6) %p1, i32 2
  76   %r0 = load <8 x i32>, ptr addrspace(6) %p0
  77   %r1 = load <8 x i32>, ptr addrspace(6) %gep1
  78   %r = add <8 x i32> %r0, %r1
  79   %r2 = bitcast <8 x i32> %r to <8 x float>
  80   ret <8 x float> %r2
  81 }
  82
  83 ; GCN-LABEL: {{^}}load_v16i32:
  84 ; GCN-DAG: s_mov_b32 s3, 0
  85 ; GCN-DAG: s_mov_b32 s2, s1
  86 ; GCN-DAG: s_mov_b32 s1, s3
  87 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
  88 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x20
  89 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
  90 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
  91 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
  92 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
  93 define amdgpu_vs <16 x float> @load_v16i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
  94   %gep1 = getelementptr inbounds <16 x i32>, ptr addrspace(6) %p1, i32 2
  95   %r0 = load <16 x i32>, ptr addrspace(6) %p0
  96   %r1 = load <16 x i32>, ptr addrspace(6) %gep1
  97   %r = add <16 x i32> %r0, %r1
  98   %r2 = bitcast <16 x i32> %r to <16 x float>
  99   ret <16 x float> %r2
 100 }
 101
 102 ; GCN-LABEL: {{^}}load_float:
 103 ; GCN-DAG: s_mov_b32 s3, 0
 104 ; GCN-DAG: s_mov_b32 s2, s1
 105 ; GCN-DAG: s_mov_b32 s1, s3
 106 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 107 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x2
 108 ; VI-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 109 ; VI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
 110 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 111 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
 112 define amdgpu_vs float @load_float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
 113   %gep1 = getelementptr inbounds float, ptr addrspace(6) %p1, i32 2
 114   %r0 = load float, ptr addrspace(6) %p0
 115   %r1 = load float, ptr addrspace(6) %gep1
 116   %r = fadd float %r0, %r1
 117   ret float %r
 118 }
 119
 120 ; GCN-LABEL: {{^}}load_v2float:
 121 ; SICIVI-DAG: s_mov_b32 s3, 0
 122 ; SICIVI-DAG: s_mov_b32 s2, s1
 123 ; SICIVI-DAG: s_mov_b32 s1, s3
 124 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 125 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x4
 126 ; VI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 127 ; VI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
 128 ; GFX9-DAG: s_mov_b32 s2, s1
 129 ; GFX9-DAG: s_mov_b32 s3, 0
 130 ; GFX9-DAG: s_mov_b32 s1, s3
 131 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 132 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
 133 define amdgpu_vs <2 x float> @load_v2float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
 134   %gep1 = getelementptr inbounds <2 x float>, ptr addrspace(6) %p1, i32 2
 135   %r0 = load <2 x float>, ptr addrspace(6) %p0
 136   %r1 = load <2 x float>, ptr addrspace(6) %gep1
 137   %r = fadd <2 x float> %r0, %r1
 138   ret <2 x float> %r
 139 }
 140
 141 ; GCN-LABEL: {{^}}load_v4float:
 142 ; GCN-DAG: s_mov_b32 s3, 0
 143 ; GCN-DAG: s_mov_b32 s2, s1
 144 ; GCN-DAG: s_mov_b32 s1, s3
 145 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 146 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x8
 147 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 148 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
 149 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 150 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
 151 define amdgpu_vs <4 x float> @load_v4float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
 152   %gep1 = getelementptr inbounds <4 x float>, ptr addrspace(6) %p1, i32 2
 153   %r0 = load <4 x float>, ptr addrspace(6) %p0
 154   %r1 = load <4 x float>, ptr addrspace(6) %gep1
 155   %r = fadd <4 x float> %r0, %r1
 156   ret <4 x float> %r
 157 }
 158
 159 ; GCN-LABEL: {{^}}load_v8float:
 160 ; GCN-DAG: s_mov_b32 s3, 0
 161 ; GCN-DAG: s_mov_b32 s2, s1
 162 ; GCN-DAG: s_mov_b32 s1, s3
 163 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 164 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x10
 165 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 166 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
 167 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 168 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
 169 define amdgpu_vs <8 x float> @load_v8float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
 170   %gep1 = getelementptr inbounds <8 x float>, ptr addrspace(6) %p1, i32 2
 171   %r0 = load <8 x float>, ptr addrspace(6) %p0
 172   %r1 = load <8 x float>, ptr addrspace(6) %gep1
 173   %r = fadd <8 x float> %r0, %r1
 174   ret <8 x float> %r
 175 }
 176
 177 ; GCN-LABEL: {{^}}load_v16float:
 178 ; GCN-DAG: s_mov_b32 s3, 0
 179 ; GCN-DAG: s_mov_b32 s2, s1
 180 ; GCN-DAG: s_mov_b32 s1, s3
 181 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 182 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x20
 183 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 184 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
 185 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 186 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
 187 define amdgpu_vs <16 x float> @load_v16float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
 188   %gep1 = getelementptr inbounds <16 x float>, ptr addrspace(6) %p1, i32 2
 189   %r0 = load <16 x float>, ptr addrspace(6) %p0
 190   %r1 = load <16 x float>, ptr addrspace(6) %gep1
 191   %r = fadd <16 x float> %r0, %r1
 192   ret <16 x float> %r
 193 }
 194
 195 ; GCN-LABEL: {{^}}load_i32_hi0:
 196 ; GCN: s_mov_b32 s1, 0
 197 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 198 define amdgpu_vs i32 @load_i32_hi0(ptr addrspace(6) inreg %p) #1 {
 199   %r0 = load i32, ptr addrspace(6) %p
 200   ret i32 %r0
 201 }
 202
 203 ; GCN-LABEL: {{^}}load_i32_hi1:
 204 ; GCN: s_mov_b32 s1, 1
 205 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 206 define amdgpu_vs i32 @load_i32_hi1(ptr addrspace(6) inreg %p) #2 {
 207   %r0 = load i32, ptr addrspace(6) %p
 208   ret i32 %r0
 209 }
 210
 211 ; GCN-LABEL: {{^}}load_i32_hiffff8000:
 212 ; GCN: s_movk_i32 s1, 0x8000
 213 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 214 define amdgpu_vs i32 @load_i32_hiffff8000(ptr addrspace(6) inreg %p) #3 {
 215   %r0 = load i32, ptr addrspace(6) %p
 216   ret i32 %r0
 217 }
 218
 219 ; GCN-LABEL: {{^}}load_i32_hifffffff0:
 220 ; GCN: s_mov_b32 s1, -16
 221 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 222 define amdgpu_vs i32 @load_i32_hifffffff0(ptr addrspace(6) inreg %p) #4 {
 223   %r0 = load i32, ptr addrspace(6) %p
 224   ret i32 %r0
 225 }
 226
 227 ; GCN-LABEL: {{^}}load_sampler
 228 ; GCN: v_readfirstlane_b32
 229 ; SI: s_nop
 230 ; GCN: s_load_dwordx8
 231 ; GCN-NEXT: s_load_dwordx4
 232 ; GCN: image_sample
 233 define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
 234 main_body:
 235   %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
 236   %23 = bitcast float %22 to i32
 237   %24 = shl i32 %23, 1
 238   %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0
 239   %26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
 240   %27 = shl i32 %23, 2
 241   %28 = or i32 %27, 3
 242   %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0
 243   %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
 244   %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
 245   %32 = extractelement <4 x float> %31, i32 0
 246   %33 = extractelement <4 x float> %31, i32 1
 247   %34 = extractelement <4 x float> %31, i32 2
 248   %35 = extractelement <4 x float> %31, i32 3
 249   %36 = bitcast float %4 to i32
 250   %37 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %36, 4
 251   %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %32, 5
 252   %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 6
 253   %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 7
 254   %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 8
 255   %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %20, 19
 256   ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42
 257 }
 258
 259 ; GCN-LABEL: {{^}}load_sampler_nouniform
 260 ; GCN: v_readfirstlane_b32
 261 ; SI: s_nop
 262 ; GCN: s_load_dwordx8
 263 ; GCN-NEXT: s_load_dwordx4
 264 ; GCN: image_sample
 265 define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
 266 main_body:
 267   %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
 268   %23 = bitcast float %22 to i32
 269   %24 = shl i32 %23, 1
 270   %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24
 271   %26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
 272   %27 = shl i32 %23, 2
 273   %28 = or i32 %27, 3
 274   %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28
 275   %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
 276   %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
 277   %32 = extractelement <4 x float> %31, i32 0
 278   %33 = extractelement <4 x float> %31, i32 1
 279   %34 = extractelement <4 x float> %31, i32 2
 280   %35 = extractelement <4 x float> %31, i32 3
 281   %36 = bitcast float %4 to i32
 282   %37 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %36, 4
 283   %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %37, float %32, 5
 284   %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 6
 285   %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 7
 286   %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 8
 287   %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %20, 19
 288   ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42
 289 }
 290
 291 ; GCN-LABEL: {{^}}load_addr_no_fold:
 292 ; GCN-DAG: s_add_i32 s0, s0, 4
 293 ; GCN-DAG: s_mov_b32 s1, 0
 294 ; GCN: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 295 define amdgpu_vs float @load_addr_no_fold(ptr addrspace(6) inreg noalias %p0) #0 {
 296   %gep1 = getelementptr i32, ptr addrspace(6) %p0, i32 1
 297   %r1 = load i32, ptr addrspace(6) %gep1
 298   %r2 = bitcast i32 %r1 to float
 299   ret float %r2
 300 }
 301
 302 ; GCN-LABEL: {{^}}vgpr_arg_src:
 303 ; GCN: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
 304 ; GCN: s_mov_b32 s[[ZERO:[0-9]+]]
 305 ; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[[[READLANE]]:[[ZERO]]]
 306 define amdgpu_vs float @vgpr_arg_src(ptr addrspace(6) %arg) {
 307 main_body:
 308   %tmp9 = load ptr addrspace(8), ptr addrspace(6) %arg
 309   %tmp10 = call nsz float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp9, i32 undef, i32 0, i32 0, i32 0) #1
 310   ret float %tmp10
 311 }
 312
 313 ; Function Attrs: nounwind readnone speculatable
 314 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
 315
 316 ; Function Attrs: nounwind readonly
 317 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
 318
 319 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #7
 320
 321 !0 = !{}
 322
 323 attributes #0 = { nounwind }
 324 attributes #1 = { nounwind "amdgpu-32bit-address-high-bits"="0" }
 325 attributes #2 = { nounwind "amdgpu-32bit-address-high-bits"="1" }
 326 attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
 327 attributes #4 = { nounwind "amdgpu-32bit-address-high-bits"="0xfffffff0" }
 328 attributes #5 = { "InitialPSInputAddr"="45175" }
 329 attributes #6 = { nounwind readnone speculatable }
 330 attributes #7 = { nounwind memory(argmem: read) }
 331 attributes #8 = { nounwind readnone }