llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s
   3
   4 ; --------------------------------------------------------------------
   5 ; llvm.amdgcn.raw.buffer.load
   6 ; --------------------------------------------------------------------
   7
   8 define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   9 ; CHECK-LABEL: @raw_buffer_load_f32(
  10 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  11 ; CHECK-NEXT:    ret float [[DATA]]
  12 ;
  13   %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  14   ret float %data
  15 }
  16
  17 define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  18 ; CHECK-LABEL: @raw_buffer_load_v1f32(
  19 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  20 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
  21 ;
  22   %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  23   ret <1 x float> %data
  24 }
  25
  26 define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  27 ; CHECK-LABEL: @raw_buffer_load_v2f32(
  28 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  29 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
  30 ;
  31   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  32   ret <2 x float> %data
  33 }
  34
  35 define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  36 ; CHECK-LABEL: @raw_buffer_load_v4f32(
  37 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  38 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
  39 ;
  40   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  41   ret <4 x float> %data
  42 }
  43
  44 define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  45 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
  46 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  47 ; CHECK-NEXT:    ret float [[DATA]]
  48 ;
  49   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  50   %elt0 = extractelement <2 x float> %data, i32 0
  51   ret float %elt0
  52 }
  53
  54 define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  55 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
  56 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
  57 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  58 ; CHECK-NEXT:    ret float [[DATA]]
  59 ;
  60   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  61   %elt1 = extractelement <2 x float> %data, i32 1
  62   ret float %elt1
  63 }
  64
  65 define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  66 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
  67 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  68 ; CHECK-NEXT:    ret float [[DATA]]
  69 ;
  70   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  71   %elt0 = extractelement <4 x float> %data, i32 0
  72   ret float %elt0
  73 }
  74
  75 define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  76 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
  77 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
  78 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  79 ; CHECK-NEXT:    ret float [[DATA]]
  80 ;
  81   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  82   %elt1 = extractelement <4 x float> %data, i32 1
  83   ret float %elt1
  84 }
  85
  86 define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  87 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
  88 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
  89 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  90 ; CHECK-NEXT:    ret float [[DATA]]
  91 ;
  92   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  93   %elt1 = extractelement <4 x float> %data, i32 2
  94   ret float %elt1
  95 }
  96
  97 define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  98 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
  99 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 100 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 101 ; CHECK-NEXT:    ret float [[DATA]]
 102 ;
 103   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 104   %elt1 = extractelement <4 x float> %data, i32 3
 105   ret float %elt1
 106 }
 107
 108 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 109 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
 110 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 111 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 112 ;
 113   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 114   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 115   ret <2 x float> %shuf
 116 }
 117
 118 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 119 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
 120 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 121 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 122 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 123 ;
 124   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 125   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 126   ret <2 x float> %shuf
 127 }
 128
 129 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 130 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
 131 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 132 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 133 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 134 ;
 135   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 136   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 137   ret <2 x float> %shuf
 138 }
 139
 140 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 141 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
 142 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 143 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 144 ;
 145   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 146   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 147   ret <3 x float> %shuf
 148 }
 149
 150 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 151 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
 152 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 153 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 154 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 155 ;
 156   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 157   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 158   ret <3 x float> %shuf
 159 }
 160
 161 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 162 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
 163 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 164 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 165 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 166 ;
 167   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 168   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 169   ret <3 x float> %shuf
 170 }
 171
 172 define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 173 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
 174 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 175 ; CHECK-NEXT:    ret float [[DATA]]
 176 ;
 177   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 178   %elt0 = extractelement <3 x float> %data, i32 0
 179   ret float %elt0
 180 }
 181
 182 define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 183 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
 184 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 185 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 186 ; CHECK-NEXT:    ret float [[DATA]]
 187 ;
 188   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 189   %elt1 = extractelement <3 x float> %data, i32 1
 190   ret float %elt1
 191 }
 192
 193 define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 194 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
 195 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 196 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 197 ; CHECK-NEXT:    ret float [[DATA]]
 198 ;
 199   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 200   %elt1 = extractelement <3 x float> %data, i32 2
 201   ret float %elt1
 202 }
 203
 204 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 205 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
 206 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 207 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 208 ;
 209   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 210   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 211   ret <2 x float> %shuf
 212 }
 213
 214 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 215 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
 216 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 217 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 218 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 219 ;
 220   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 221   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 222   ret <2 x float> %shuf
 223 }
 224
 225 define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 226 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
 227 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 228 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
 229 ; CHECK-NEXT:    ret i32 [[VAR2]]
 230 ;
 231   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 232   %var1 = bitcast <4 x float> %var to <4 x i32>
 233   %var2 = extractelement <4 x i32> %var1, i32 0
 234   ret i32 %var2
 235 }
 236
 237 define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 238 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
 239 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 240 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
 241 ; CHECK-NEXT:    ret float [[VAR2]]
 242 ;
 243   %var = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 244   %var1 = bitcast <4 x i32> %var to <4 x float>
 245   %var2 = extractelement <4 x float> %var1, i32 0
 246   ret float %var2
 247 }
 248
 249 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 250 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
 251 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0:![0-9]+]]
 252 ; CHECK-NEXT:    ret float [[DATA]]
 253 ;
 254   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 255   %elt0 = extractelement <2 x float> %data, i32 0
 256   ret float %elt0
 257 }
 258
 259 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1
 260 declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1
 261 declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1
 262 declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1
 263 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
 264
 265 declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1
 266
 267 define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 268 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16(
 269 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 270 ; CHECK-NEXT:    ret half [[DATA]]
 271 ;
 272   %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 273   %elt0 = extractelement <2 x half> %data, i32 0
 274   ret half %elt0
 275 }
 276
 277 define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 278 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16(
 279 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 280 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 281 ; CHECK-NEXT:    ret half [[DATA]]
 282 ;
 283   %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 284   %elt1 = extractelement <2 x half> %data, i32 1
 285   ret half %elt1
 286 }
 287
 288 define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 289 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16(
 290 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 291 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 292 ; CHECK-NEXT:    ret half [[DATA]]
 293 ;
 294   %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 295   %elt0 = extractelement <3 x half> %data, i32 1
 296   ret half %elt0
 297 }
 298
 299 define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 300 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16(
 301 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 302 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 303 ; CHECK-NEXT:    ret half [[DATA]]
 304 ;
 305   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 306   %elt1 = extractelement <4 x half> %data, i32 1
 307   ret half %elt1
 308 }
 309
 310 define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 311 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16(
 312 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
 313 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 314 ; CHECK-NEXT:    ret half [[DATA]]
 315 ;
 316   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 317   %elt1 = extractelement <4 x half> %data, i32 3
 318   ret half %elt1
 319 }
 320
 321 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 322 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16(
 323 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 324 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
 325 ;
 326   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 327   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
 328   ret <2 x half> %shuf
 329 }
 330
 331 declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1
 332 declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1
 333 declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1
 334 declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1
 335
 336 define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 337 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8(
 338 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 339 ; CHECK-NEXT:    ret i8 [[DATA]]
 340 ;
 341   %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 342   %elt0 = extractelement <2 x i8> %data, i32 0
 343   ret i8 %elt0
 344 }
 345
 346 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 347 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8(
 348 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 349 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 350 ; CHECK-NEXT:    ret i8 [[DATA]]
 351 ;
 352   %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 353   %elt1 = extractelement <2 x i8> %data, i32 1
 354   ret i8 %elt1
 355 }
 356
 357 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 358 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8(
 359 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 360 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 361 ; CHECK-NEXT:    ret i8 [[DATA]]
 362 ;
 363   %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 364   %elt0 = extractelement <3 x i8> %data, i32 1
 365   ret i8 %elt0
 366 }
 367
 368 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 369 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8(
 370 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 371 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 372 ; CHECK-NEXT:    ret i8 [[DATA]]
 373 ;
 374   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 375   %elt1 = extractelement <4 x i8> %data, i32 1
 376   ret i8 %elt1
 377 }
 378
 379 define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 380 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8(
 381 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
 382 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 383 ; CHECK-NEXT:    ret i8 [[DATA]]
 384 ;
 385   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 386   %elt1 = extractelement <4 x i8> %data, i32 3
 387   ret i8 %elt1
 388 }
 389
 390 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 391 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8(
 392 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 393 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
 394 ;
 395   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 396   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
 397   ret <2 x i8> %shuf
 398 }
 399
 400 declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1
 401 declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1
 402 declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1
 403 declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1
 404
 405 ; --------------------------------------------------------------------
 406 ; llvm.amdgcn.raw.ptr.buffer.load
 407 ; --------------------------------------------------------------------
 408
 409 define amdgpu_ps float @raw_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 410 ; CHECK-LABEL: @raw_ptr_buffer_load_f32(
 411 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 412 ; CHECK-NEXT:    ret float [[DATA]]
 413 ;
 414   %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 415   ret float %data
 416 }
 417
 418 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 419 ; CHECK-LABEL: @raw_ptr_buffer_load_v1f32(
 420 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 421 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
 422 ;
 423   %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 424   ret <1 x float> %data
 425 }
 426
 427 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 428 ; CHECK-LABEL: @raw_ptr_buffer_load_v2f32(
 429 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 430 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 431 ;
 432   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 433   ret <2 x float> %data
 434 }
 435
 436 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 437 ; CHECK-LABEL: @raw_ptr_buffer_load_v4f32(
 438 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 439 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
 440 ;
 441   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 442   ret <4 x float> %data
 443 }
 444
 445 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 446 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f32(
 447 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 448 ; CHECK-NEXT:    ret float [[DATA]]
 449 ;
 450   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 451   %elt0 = extractelement <2 x float> %data, i32 0
 452   ret float %elt0
 453 }
 454
 455 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 456 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f32(
 457 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 458 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 459 ; CHECK-NEXT:    ret float [[DATA]]
 460 ;
 461   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 462   %elt1 = extractelement <2 x float> %data, i32 1
 463   ret float %elt1
 464 }
 465
 466 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 467 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v4f32(
 468 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 469 ; CHECK-NEXT:    ret float [[DATA]]
 470 ;
 471   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 472   %elt0 = extractelement <4 x float> %data, i32 0
 473   ret float %elt0
 474 }
 475
 476 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 477 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f32(
 478 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 479 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 480 ; CHECK-NEXT:    ret float [[DATA]]
 481 ;
 482   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 483   %elt1 = extractelement <4 x float> %data, i32 1
 484   ret float %elt1
 485 }
 486
 487 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 488 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v4f32(
 489 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 490 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 491 ; CHECK-NEXT:    ret float [[DATA]]
 492 ;
 493   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 494   %elt1 = extractelement <4 x float> %data, i32 2
 495   ret float %elt1
 496 }
 497
 498 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 499 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f32(
 500 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 501 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 502 ; CHECK-NEXT:    ret float [[DATA]]
 503 ;
 504   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 505   %elt1 = extractelement <4 x float> %data, i32 3
 506   ret float %elt1
 507 }
 508
 509 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 510 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(
 511 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 512 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 513 ;
 514   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 515   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 516   ret <2 x float> %shuf
 517 }
 518
 519 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 520 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(
 521 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 522 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 523 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 524 ;
 525   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 526   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 527   ret <2 x float> %shuf
 528 }
 529
 530 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 531 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(
 532 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 533 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 534 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 535 ;
 536   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 537   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 538   ret <2 x float> %shuf
 539 }
 540
 541 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 542 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(
 543 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 544 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 545 ;
 546   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 547   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 548   ret <3 x float> %shuf
 549 }
 550
 551 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 552 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(
 553 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 554 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 555 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 556 ;
 557   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 558   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 559   ret <3 x float> %shuf
 560 }
 561
 562 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 563 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(
 564 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 565 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 566 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 567 ;
 568   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 569   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 570   ret <3 x float> %shuf
 571 }
 572
 573 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 574 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v3f32(
 575 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 576 ; CHECK-NEXT:    ret float [[DATA]]
 577 ;
 578   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 579   %elt0 = extractelement <3 x float> %data, i32 0
 580   ret float %elt0
 581 }
 582
 583 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 584 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f32(
 585 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 586 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 587 ; CHECK-NEXT:    ret float [[DATA]]
 588 ;
 589   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 590   %elt1 = extractelement <3 x float> %data, i32 1
 591   ret float %elt1
 592 }
 593
 594 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 595 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v3f32(
 596 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 597 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 598 ; CHECK-NEXT:    ret float [[DATA]]
 599 ;
 600   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 601   %elt1 = extractelement <3 x float> %data, i32 2
 602   ret float %elt1
 603 }
 604
 605 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 606 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(
 607 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 608 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 609 ;
 610   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 611   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 612   ret <2 x float> %shuf
 613 }
 614
 615 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 616 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(
 617 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 618 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 619 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 620 ;
 621   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 622   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 623   ret <2 x float> %shuf
 624 }
 625
 626 define i32 @extract0_bitcast_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 627 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4f32(
 628 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 629 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
 630 ; CHECK-NEXT:    ret i32 [[VAR2]]
 631 ;
 632   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 633   %var1 = bitcast <4 x float> %var to <4 x i32>
 634   %var2 = extractelement <4 x i32> %var1, i32 0
 635   ret i32 %var2
 636 }
 637
 638 define float @extract0_bitcast_raw_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 639 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4i32(
 640 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 641 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
 642 ; CHECK-NEXT:    ret float [[VAR2]]
 643 ;
 644   %var = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 645   %var1 = bitcast <4 x i32> %var to <4 x float>
 646   %var2 = extractelement <4 x float> %var1, i32 0
 647   ret float %var2
 648 }
 649
 650 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 651 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(
 652 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
 653 ; CHECK-NEXT:    ret float [[DATA]]
 654 ;
 655   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 656   %elt0 = extractelement <2 x float> %data, i32 0
 657   ret float %elt0
 658 }
 659
 660 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #1
 661 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32) #1
 662 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #1
 663 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32) #1
 664 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
 665
 666 declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #1
 667
 668 define amdgpu_ps half @extract_elt0_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 669 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f16(
 670 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 671 ; CHECK-NEXT:    ret half [[DATA]]
 672 ;
 673   %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 674   %elt0 = extractelement <2 x half> %data, i32 0
 675   ret half %elt0
 676 }
 677
 678 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 679 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f16(
 680 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 681 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 682 ; CHECK-NEXT:    ret half [[DATA]]
 683 ;
 684   %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 685   %elt1 = extractelement <2 x half> %data, i32 1
 686   ret half %elt1
 687 }
 688
 689 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 690 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f16(
 691 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 692 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 693 ; CHECK-NEXT:    ret half [[DATA]]
 694 ;
 695   %data = call <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 696   %elt0 = extractelement <3 x half> %data, i32 1
 697   ret half %elt0
 698 }
 699
 700 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 701 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f16(
 702 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 703 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 704 ; CHECK-NEXT:    ret half [[DATA]]
 705 ;
 706   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 707   %elt1 = extractelement <4 x half> %data, i32 1
 708   ret half %elt1
 709 }
 710
 711 define amdgpu_ps half @extract_elt3_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 712 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f16(
 713 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
 714 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 715 ; CHECK-NEXT:    ret half [[DATA]]
 716 ;
 717   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 718   %elt1 = extractelement <4 x half> %data, i32 3
 719   ret half %elt1
 720 }
 721
 722 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 723 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(
 724 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 725 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
 726 ;
 727   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 728   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
 729   ret <2 x half> %shuf
 730 }
 731
 732 declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #1
 733 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #1
 734 declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32) #1
 735 declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #1
 736
 737 define amdgpu_ps i8 @extract_elt0_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 738 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2i8(
 739 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 740 ; CHECK-NEXT:    ret i8 [[DATA]]
 741 ;
 742   %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 743   %elt0 = extractelement <2 x i8> %data, i32 0
 744   ret i8 %elt0
 745 }
 746
 747 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 748 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2i8(
 749 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 750 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 751 ; CHECK-NEXT:    ret i8 [[DATA]]
 752 ;
 753   %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 754   %elt1 = extractelement <2 x i8> %data, i32 1
 755   ret i8 %elt1
 756 }
 757
 758 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 759 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3i8(
 760 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 761 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 762 ; CHECK-NEXT:    ret i8 [[DATA]]
 763 ;
 764   %data = call <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 765   %elt0 = extractelement <3 x i8> %data, i32 1
 766   ret i8 %elt0
 767 }
 768
 769 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 770 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4i8(
 771 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 772 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 773 ; CHECK-NEXT:    ret i8 [[DATA]]
 774 ;
 775   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 776   %elt1 = extractelement <4 x i8> %data, i32 1
 777   ret i8 %elt1
 778 }
 779
 780 define amdgpu_ps i8 @extract_elt3_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 781 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4i8(
 782 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
 783 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 784 ; CHECK-NEXT:    ret i8 [[DATA]]
 785 ;
 786   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 787   %elt1 = extractelement <4 x i8> %data, i32 3
 788   ret i8 %elt1
 789 }
 790
 791 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 792 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(
 793 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 794 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
 795 ;
 796   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 797   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
 798   ret <2 x i8> %shuf
 799 }
 800
 801 declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #1
 802 declare <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32) #1
 803 declare <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32) #1
 804 declare <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32) #1
 805
 806 ; --------------------------------------------------------------------
 807 ; llvm.amdgcn.s.buffer.load
 808 ; --------------------------------------------------------------------
 809
 810 define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 811 ; CHECK-LABEL: @s_buffer_load_f32(
 812 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 813 ; CHECK-NEXT:    ret float [[DATA]]
 814 ;
 815   %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 816   ret float %data
 817 }
 818
 819 define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 820 ; CHECK-LABEL: @s_buffer_load_v2f32(
 821 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 822 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 823 ;
 824   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 825   ret <2 x float> %data
 826 }
 827
 828 define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 829 ; CHECK-LABEL: @s_buffer_load_v4f32(
 830 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 831 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
 832 ;
 833   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 834   ret <4 x float> %data
 835 }
 836
 837 define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 838 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32(
 839 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 840 ; CHECK-NEXT:    ret float [[DATA]]
 841 ;
 842   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 843   %elt0 = extractelement <2 x float> %data, i32 0
 844   ret float %elt0
 845 }
 846
 847 define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 848 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32(
 849 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 850 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 851 ; CHECK-NEXT:    ret float [[DATA]]
 852 ;
 853   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 854   %elt1 = extractelement <2 x float> %data, i32 1
 855   ret float %elt1
 856 }
 857
 858 define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 859 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32(
 860 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 861 ; CHECK-NEXT:    ret float [[DATA]]
 862 ;
 863   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 864   %elt0 = extractelement <4 x float> %data, i32 0
 865   ret float %elt0
 866 }
 867
 868 define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 869 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32(
 870 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 871 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 872 ; CHECK-NEXT:    ret float [[DATA]]
 873 ;
 874   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 875   %elt1 = extractelement <4 x float> %data, i32 1
 876   ret float %elt1
 877 }
 878
 879 define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 880 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32(
 881 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 882 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 883 ; CHECK-NEXT:    ret float [[DATA]]
 884 ;
 885   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 886   %elt1 = extractelement <4 x float> %data, i32 2
 887   ret float %elt1
 888 }
 889
 890 define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 891 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32(
 892 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 893 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 894 ; CHECK-NEXT:    ret float [[DATA]]
 895 ;
 896   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 897   %elt1 = extractelement <4 x float> %data, i32 3
 898   ret float %elt1
 899 }
 900
 901 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 902 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32(
 903 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 904 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 905 ;
 906   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 907   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 908   ret <2 x float> %shuf
 909 }
 910
 911 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 912 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32(
 913 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 914 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 915 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 916 ;
 917   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 918   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 919   ret <2 x float> %shuf
 920 }
 921
 922 define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 923 ; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32(
 924 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 925 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 926 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 927 ;
 928   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 929   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 930   ret <2 x float> %shuf
 931 }
 932
 933 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 934 ; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32(
 935 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 936 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 937 ;
 938   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 939   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 940   ret <3 x float> %shuf
 941 }
 942
 943 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 944 ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32(
 945 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 946 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 947 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 948 ;
 949   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 950   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 951   ret <3 x float> %shuf
 952 }
 953
 954 define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 955 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32(
 956 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 957 ; CHECK-NEXT:    ret float [[DATA]]
 958 ;
 959   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 960   %elt0 = extractelement <3 x float> %data, i32 0
 961   ret float %elt0
 962 }
 963
 964 define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 965 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32(
 966 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 967 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 968 ; CHECK-NEXT:    ret float [[DATA]]
 969 ;
 970   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 971   %elt1 = extractelement <3 x float> %data, i32 1
 972   ret float %elt1
 973 }
 974
 975 define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 976 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32(
 977 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 978 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 979 ; CHECK-NEXT:    ret float [[DATA]]
 980 ;
 981   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 982   %elt1 = extractelement <3 x float> %data, i32 2
 983   ret float %elt1
 984 }
 985
 986 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 987 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32(
 988 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 989 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 990 ;
 991   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 992   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 993   ret <2 x float> %shuf
 994 }
 995
 996 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 997 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32(
 998 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 999 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1000 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1001 ;
1002   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1003   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1004   ret <2 x float> %shuf
1005 }
1006
1007 ; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened
1008 ; to vec4 anyway during lowering.
1009 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1010 ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32(
1011 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1012 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1013 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1014 ;
1015   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1016   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1017   ret <3 x float> %shuf
1018 }
1019
1020 define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1021 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32(
1022 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1023 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1024 ; CHECK-NEXT:    ret i32 [[VAR2]]
1025 ;
1026   %var = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1027   %var1 = bitcast <4 x float> %var to <4 x i32>
1028   %var2 = extractelement <4 x i32> %var1, i32 0
1029   ret i32 %var2
1030 }
1031
1032 define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1033 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32(
1034 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1035 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1036 ; CHECK-NEXT:    ret float [[VAR2]]
1037 ;
1038   %var = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1039   %var1 = bitcast <4 x i32> %var to <4 x float>
1040   %var2 = extractelement <4 x float> %var1, i32 0
1041   ret float %var2
1042 }
1043
1044 define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1045 ; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32(
1046 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0), !fpmath [[META0]]
1047 ; CHECK-NEXT:    ret float [[DATA]]
1048 ;
1049   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0
1050   %elt0 = extractelement <2 x float> %data, i32 0
1051   ret float %elt0
1052 }
1053
1054 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
1055 declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1
1056 declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1
1057 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1
1058 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1
1059
1060 define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1061 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16(
1062 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1063 ; CHECK-NEXT:    ret half [[DATA]]
1064 ;
1065   %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1066   %elt0 = extractelement <2 x half> %data, i32 0
1067   ret half %elt0
1068 }
1069
1070 define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1071 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16(
1072 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1073 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1074 ; CHECK-NEXT:    ret half [[DATA]]
1075 ;
1076   %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1077   %elt1 = extractelement <2 x half> %data, i32 1
1078   ret half %elt1
1079 }
1080
1081 define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1082 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16(
1083 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1084 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1085 ; CHECK-NEXT:    ret half [[DATA]]
1086 ;
1087   %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1088   %elt1 = extractelement <3 x half> %data, i32 1
1089   ret half %elt1
1090 }
1091
1092 define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1093 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16(
1094 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1095 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1096 ; CHECK-NEXT:    ret half [[DATA]]
1097 ;
1098   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1099   %elt1 = extractelement <4 x half> %data, i32 1
1100   ret half %elt1
1101 }
1102
1103
1104 define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1105 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16(
1106 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
1107 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1108 ; CHECK-NEXT:    ret half [[DATA]]
1109 ;
1110   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1111   %elt1 = extractelement <4 x half> %data, i32 3
1112   ret half %elt1
1113 }
1114
1115 define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1116 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16(
1117 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1118 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
1119 ;
1120   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1121   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
1122   ret <2 x half> %shuf
1123 }
1124
1125 declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1
1126 declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1
1127 declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1
1128 declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1
1129
1130 define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1131 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8(
1132 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1133 ; CHECK-NEXT:    ret i8 [[DATA]]
1134 ;
1135   %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1136   %elt0 = extractelement <2 x i8> %data, i32 0
1137   ret i8 %elt0
1138 }
1139
1140 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1141 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8(
1142 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1143 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1144 ; CHECK-NEXT:    ret i8 [[DATA]]
1145 ;
1146   %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1147   %elt1 = extractelement <2 x i8> %data, i32 1
1148   ret i8 %elt1
1149 }
1150
1151 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1152 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8(
1153 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1154 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1155 ; CHECK-NEXT:    ret i8 [[DATA]]
1156 ;
1157   %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1158   %elt1 = extractelement <3 x i8> %data, i32 1
1159   ret i8 %elt1
1160 }
1161
1162 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1163 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8(
1164 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1165 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1166 ; CHECK-NEXT:    ret i8 [[DATA]]
1167 ;
1168   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1169   %elt1 = extractelement <4 x i8> %data, i32 1
1170   ret i8 %elt1
1171 }
1172
1173 define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1174 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8(
1175 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
1176 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1177 ; CHECK-NEXT:    ret i8 [[DATA]]
1178 ;
1179   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1180   %elt1 = extractelement <4 x i8> %data, i32 3
1181   ret i8 %elt1
1182 }
1183
1184 define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1185 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8(
1186 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1187 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
1188 ;
1189   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1190   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
1191   ret <2 x i8> %shuf
1192 }
1193
1194 declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1
1195 declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1
1196 declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1
1197 declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1
1198
1199 ; --------------------------------------------------------------------
1200 ; llvm.amdgcn.raw.buffer.load.format
1201 ; --------------------------------------------------------------------
1202
1203 define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1204 ; CHECK-LABEL: @raw_buffer_load_format_f32(
1205 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1206 ; CHECK-NEXT:    ret float [[DATA]]
1207 ;
1208   %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1209   ret float %data
1210 }
1211
1212 define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1213 ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
1214 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1215 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1216 ;
1217   %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1218   ret <1 x float> %data
1219 }
1220
1221 define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1222 ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
1223 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1224 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1225 ;
1226   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1227   ret <2 x float> %data
1228 }
1229
1230 define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1231 ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
1232 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1233 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1234 ;
1235   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1236   ret <4 x float> %data
1237 }
1238
1239 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1240 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
1241 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1242 ; CHECK-NEXT:    ret float [[DATA]]
1243 ;
1244   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1245   %elt0 = extractelement <2 x float> %data, i32 0
1246   ret float %elt0
1247 }
1248
1249 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1250 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
1251 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1252 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1253 ; CHECK-NEXT:    ret float [[ELT1]]
1254 ;
1255   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1256   %elt1 = extractelement <2 x float> %data, i32 1
1257   ret float %elt1
1258 }
1259
1260 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1261 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
1262 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1263 ; CHECK-NEXT:    ret float [[DATA]]
1264 ;
1265   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1266   %elt0 = extractelement <4 x float> %data, i32 0
1267   ret float %elt0
1268 }
1269
1270 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1271 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
1272 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1273 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1274 ; CHECK-NEXT:    ret float [[ELT1]]
1275 ;
1276   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1277   %elt1 = extractelement <4 x float> %data, i32 1
1278   ret float %elt1
1279 }
1280
1281 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1282 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
1283 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1284 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1285 ; CHECK-NEXT:    ret float [[ELT1]]
1286 ;
1287   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1288   %elt1 = extractelement <4 x float> %data, i32 2
1289   ret float %elt1
1290 }
1291
1292 define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1293 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
1294 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1295 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1296 ; CHECK-NEXT:    ret float [[ELT1]]
1297 ;
1298   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1299   %elt1 = extractelement <4 x float> %data, i32 3
1300   ret float %elt1
1301 }
1302
1303 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1304 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
1305 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1306 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1307 ;
1308   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1309   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1310   ret <2 x float> %shuf
1311 }
1312
1313 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1314 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
1315 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1316 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1317 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1318 ;
1319   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1320   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1321   ret <2 x float> %shuf
1322 }
1323
1324 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1325 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
1326 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1327 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1328 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1329 ;
1330   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1331   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1332   ret <2 x float> %shuf
1333 }
1334
1335 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1336 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
1337 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1338 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1339 ;
1340   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1341   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1342   ret <3 x float> %shuf
1343 }
1344
1345 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1346 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
1347 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1348 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1349 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1350 ;
1351   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1352   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1353   ret <3 x float> %shuf
1354 }
1355
1356 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1357 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
1358 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1359 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1360 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1361 ;
1362   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1363   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1364   ret <3 x float> %shuf
1365 }
1366
1367 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1368 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
1369 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1370 ; CHECK-NEXT:    ret float [[DATA]]
1371 ;
1372   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1373   %elt0 = extractelement <3 x float> %data, i32 0
1374   ret float %elt0
1375 }
1376
1377 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1378 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
1379 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1380 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1381 ; CHECK-NEXT:    ret float [[ELT1]]
1382 ;
1383   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1384   %elt1 = extractelement <3 x float> %data, i32 1
1385   ret float %elt1
1386 }
1387
1388 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1389 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
1390 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1391 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1392 ; CHECK-NEXT:    ret float [[ELT1]]
1393 ;
1394   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1395   %elt1 = extractelement <3 x float> %data, i32 2
1396   ret float %elt1
1397 }
1398
1399 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1400 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
1401 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1402 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1403 ;
1404   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1405   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1406   ret <2 x float> %shuf
1407 }
1408
1409 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1410 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
1411 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1412 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1413 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1414 ;
1415   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1416   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1417   ret <2 x float> %shuf
1418 }
1419
1420 define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1421 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
1422 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1423 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1424 ; CHECK-NEXT:    ret i32 [[VAR2]]
1425 ;
1426   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1427   %var1 = bitcast <4 x float> %var to <4 x i32>
1428   %var2 = extractelement <4 x i32> %var1, i32 0
1429   ret i32 %var2
1430 }
1431
1432 define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1433 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
1434 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1435 ; CHECK-NEXT:    ret float [[VAR]]
1436 ;
1437   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1438   %var1 = extractelement <4 x float> %var, i32 0
1439   ret float %var1
1440 }
1441
1442 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1443 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
1444 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1445 ; CHECK-NEXT:    ret float [[DATA]]
1446 ;
1447   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1448   %elt0 = extractelement <2 x float> %data, i32 0
1449   ret float %elt0
1450 }
1451
1452 declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1
1453 declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1
1454 declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1
1455 declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1
1456 declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
1457
1458 ; --------------------------------------------------------------------
1459 ; llvm.amdgcn.raw.ptr.buffer.load.format
1460 ; --------------------------------------------------------------------
1461
1462 define amdgpu_ps float @raw_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1463 ; CHECK-LABEL: @raw_ptr_buffer_load_format_f32(
1464 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1465 ; CHECK-NEXT:    ret float [[DATA]]
1466 ;
1467   %data = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1468   ret float %data
1469 }
1470
1471 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1472 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v1f32(
1473 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1474 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1475 ;
1476   %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1477   ret <1 x float> %data
1478 }
1479
1480 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1481 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v2f32(
1482 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1483 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1484 ;
1485   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1486   ret <2 x float> %data
1487 }
1488
1489 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1490 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v4f32(
1491 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1492 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1493 ;
1494   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1495   ret <4 x float> %data
1496 }
1497
1498 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1499 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v2f32(
1500 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1501 ; CHECK-NEXT:    ret float [[DATA]]
1502 ;
1503   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1504   %elt0 = extractelement <2 x float> %data, i32 0
1505   ret float %elt0
1506 }
1507
1508 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1509 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v2f32(
1510 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1511 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1512 ; CHECK-NEXT:    ret float [[ELT1]]
1513 ;
1514   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1515   %elt1 = extractelement <2 x float> %data, i32 1
1516   ret float %elt1
1517 }
1518
1519 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1520 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v4f32(
1521 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1522 ; CHECK-NEXT:    ret float [[DATA]]
1523 ;
1524   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1525   %elt0 = extractelement <4 x float> %data, i32 0
1526   ret float %elt0
1527 }
1528
1529 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1530 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v4f32(
1531 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1532 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1533 ; CHECK-NEXT:    ret float [[ELT1]]
1534 ;
1535   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1536   %elt1 = extractelement <4 x float> %data, i32 1
1537   ret float %elt1
1538 }
1539
1540 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1541 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v4f32(
1542 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1543 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1544 ; CHECK-NEXT:    ret float [[ELT1]]
1545 ;
1546   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1547   %elt1 = extractelement <4 x float> %data, i32 2
1548   ret float %elt1
1549 }
1550
1551 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1552 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_format_v4f32(
1553 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1554 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1555 ; CHECK-NEXT:    ret float [[ELT1]]
1556 ;
1557   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1558   %elt1 = extractelement <4 x float> %data, i32 3
1559   ret float %elt1
1560 }
1561
1562 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1563 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(
1564 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1565 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1566 ;
1567   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1568   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1569   ret <2 x float> %shuf
1570 }
1571
1572 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1573 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1574 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1575 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1576 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1577 ;
1578   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1579   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1580   ret <2 x float> %shuf
1581 }
1582
1583 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1584 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1585 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1586 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1587 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1588 ;
1589   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1590   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1591   ret <2 x float> %shuf
1592 }
1593
1594 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1595 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1596 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1597 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1598 ;
1599   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1600   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1601   ret <3 x float> %shuf
1602 }
1603
1604 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1605 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1606 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1607 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1608 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1609 ;
1610   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1611   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1612   ret <3 x float> %shuf
1613 }
1614
1615 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1616 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1617 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1618 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1619 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1620 ;
1621   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1622   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1623   ret <3 x float> %shuf
1624 }
1625
1626 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1627 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v3f32(
1628 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1629 ; CHECK-NEXT:    ret float [[DATA]]
1630 ;
1631   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1632   %elt0 = extractelement <3 x float> %data, i32 0
1633   ret float %elt0
1634 }
1635
1636 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1637 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v3f32(
1638 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1639 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1640 ; CHECK-NEXT:    ret float [[ELT1]]
1641 ;
1642   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1643   %elt1 = extractelement <3 x float> %data, i32 1
1644   ret float %elt1
1645 }
1646
1647 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1648 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v3f32(
1649 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1650 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1651 ; CHECK-NEXT:    ret float [[ELT1]]
1652 ;
1653   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1654   %elt1 = extractelement <3 x float> %data, i32 2
1655   ret float %elt1
1656 }
1657
1658 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1659 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(
1660 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1661 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1662 ;
1663   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1664   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1665   ret <2 x float> %shuf
1666 }
1667
1668 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1669 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(
1670 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1671 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1672 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1673 ;
1674   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1675   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1676   ret <2 x float> %shuf
1677 }
1678
1679 define i32 @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1680 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(
1681 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1682 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1683 ; CHECK-NEXT:    ret i32 [[VAR2]]
1684 ;
1685   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1686   %var1 = bitcast <4 x float> %var to <4 x i32>
1687   %var2 = extractelement <4 x i32> %var1, i32 0
1688   ret i32 %var2
1689 }
1690
1691 define float @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1692 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(
1693 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1694 ; CHECK-NEXT:    ret float [[VAR]]
1695 ;
1696   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1697   %var1 = extractelement <4 x float> %var, i32 0
1698   ret float %var1
1699 }
1700
1701 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1702 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(
1703 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1704 ; CHECK-NEXT:    ret float [[DATA]]
1705 ;
1706   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1707   %elt0 = extractelement <2 x float> %data, i32 0
1708   ret float %elt0
1709 }
1710
1711 declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #1
1712 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32) #1
1713 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #1
1714 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32) #1
1715 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1
1716
1717 ; --------------------------------------------------------------------
1718 ; llvm.amdgcn.struct.buffer.load
1719 ; --------------------------------------------------------------------
1720
1721 define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1722 ; CHECK-LABEL: @struct_buffer_load_f32(
1723 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1724 ; CHECK-NEXT:    ret float [[DATA]]
1725 ;
1726   %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1727   ret float %data
1728 }
1729
1730 define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1731 ; CHECK-LABEL: @struct_buffer_load_v1f32(
1732 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1733 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1734 ;
1735   %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1736   ret <1 x float> %data
1737 }
1738
1739 define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1740 ; CHECK-LABEL: @struct_buffer_load_v2f32(
1741 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1742 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1743 ;
1744   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1745   ret <2 x float> %data
1746 }
1747
1748 define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1749 ; CHECK-LABEL: @struct_buffer_load_v4f32(
1750 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1751 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1752 ;
1753   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1754   ret <4 x float> %data
1755 }
1756
1757 define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1758 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
1759 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1760 ; CHECK-NEXT:    ret float [[DATA]]
1761 ;
1762   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1763   %elt0 = extractelement <2 x float> %data, i32 0
1764   ret float %elt0
1765 }
1766
1767 define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1768 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
1769 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1770 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1771 ; CHECK-NEXT:    ret float [[DATA]]
1772 ;
1773   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1774   %elt1 = extractelement <2 x float> %data, i32 1
1775   ret float %elt1
1776 }
1777
1778 define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1779 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
1780 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1781 ; CHECK-NEXT:    ret float [[DATA]]
1782 ;
1783   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1784   %elt0 = extractelement <4 x float> %data, i32 0
1785   ret float %elt0
1786 }
1787
1788 define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1789 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
1790 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1791 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1792 ; CHECK-NEXT:    ret float [[DATA]]
1793 ;
1794   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1795   %elt1 = extractelement <4 x float> %data, i32 1
1796   ret float %elt1
1797 }
1798
1799 define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1800 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
1801 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1802 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1803 ; CHECK-NEXT:    ret float [[DATA]]
1804 ;
1805   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1806   %elt1 = extractelement <4 x float> %data, i32 2
1807   ret float %elt1
1808 }
1809
1810 define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1811 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
1812 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
1813 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1814 ; CHECK-NEXT:    ret float [[DATA]]
1815 ;
1816   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1817   %elt1 = extractelement <4 x float> %data, i32 3
1818   ret float %elt1
1819 }
1820
1821 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1822 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
1823 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1824 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1825 ;
1826   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1827   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1828   ret <2 x float> %shuf
1829 }
1830
1831 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1832 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
1833 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1834 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1835 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1836 ;
1837   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1838   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1839   ret <2 x float> %shuf
1840 }
1841
1842 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1843 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
1844 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1845 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1846 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1847 ;
1848   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1849   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1850   ret <2 x float> %shuf
1851 }
1852
1853 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1854 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
1855 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1856 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1857 ;
1858   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1859   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1860   ret <3 x float> %shuf
1861 }
1862
1863 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1864 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
1865 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1866 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1867 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1868 ;
1869   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1870   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1871   ret <3 x float> %shuf
1872 }
1873
1874 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1875 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
1876 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1877 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1878 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1879 ;
1880   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1881   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1882   ret <3 x float> %shuf
1883 }
1884
1885 define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1886 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
1887 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1888 ; CHECK-NEXT:    ret float [[DATA]]
1889 ;
1890   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1891   %elt0 = extractelement <3 x float> %data, i32 0
1892   ret float %elt0
1893 }
1894
1895 define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1896 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
1897 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1898 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1899 ; CHECK-NEXT:    ret float [[DATA]]
1900 ;
1901   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1902   %elt1 = extractelement <3 x float> %data, i32 1
1903   ret float %elt1
1904 }
1905
1906 define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1907 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
1908 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1909 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1910 ; CHECK-NEXT:    ret float [[DATA]]
1911 ;
1912   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1913   %elt1 = extractelement <3 x float> %data, i32 2
1914   ret float %elt1
1915 }
1916
1917 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1918 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
1919 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1920 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1921 ;
1922   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1923   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1924   ret <2 x float> %shuf
1925 }
1926
1927 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1928 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
1929 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1930 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1931 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1932 ;
1933   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1934   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1935   ret <2 x float> %shuf
1936 }
1937
1938 define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1939 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
1940 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1941 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1942 ; CHECK-NEXT:    ret i32 [[VAR2]]
1943 ;
1944   %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1945   %var1 = bitcast <4 x float> %var to <4 x i32>
1946   %var2 = extractelement <4 x i32> %var1, i32 0
1947   ret i32 %var2
1948 }
1949
1950 define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1951 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
1952 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1953 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1954 ; CHECK-NEXT:    ret float [[VAR2]]
1955 ;
1956   %var = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1957   %var1 = bitcast <4 x i32> %var to <4 x float>
1958   %var2 = extractelement <4 x float> %var1, i32 0
1959   ret float %var2
1960 }
1961
1962 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1963 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
1964 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1965 ; CHECK-NEXT:    ret float [[DATA]]
1966 ;
1967   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1968   %elt0 = extractelement <2 x float> %data, i32 0
1969   ret float %elt0
1970 }
1971
1972 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
1973 declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1
1974 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
1975 declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
1976 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1977
1978 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
1979
1980 define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1981 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16(
1982 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1983 ; CHECK-NEXT:    ret half [[DATA]]
1984 ;
1985   %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1986   %elt0 = extractelement <2 x half> %data, i32 0
1987   ret half %elt0
1988 }
1989
1990 define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1991 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16(
1992 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1993 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1994 ; CHECK-NEXT:    ret half [[DATA]]
1995 ;
1996   %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1997   %elt1 = extractelement <2 x half> %data, i32 1
1998   ret half %elt1
1999 }
2000
2001 define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2002 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16(
2003 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2004 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2005 ; CHECK-NEXT:    ret half [[DATA]]
2006 ;
2007   %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2008   %elt1 = extractelement <3 x half> %data, i32 1
2009   ret half %elt1
2010 }
2011
2012 define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2013 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16(
2014 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2015 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2016 ; CHECK-NEXT:    ret half [[DATA]]
2017 ;
2018   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2019   %elt1 = extractelement <4 x half> %data, i32 1
2020   ret half %elt1
2021 }
2022
2023 define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2024 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16(
2025 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2026 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2027 ; CHECK-NEXT:    ret half [[DATA]]
2028 ;
2029   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2030   %elt1 = extractelement <4 x half> %data, i32 3
2031   ret half %elt1
2032 }
2033
2034 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2035 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16(
2036 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2037 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
2038 ;
2039   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2040   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2041   ret <2 x half> %shuf
2042 }
2043
2044 declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
2045 declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
2046 declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
2047 declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
2048
2049 define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2050 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8(
2051 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2052 ; CHECK-NEXT:    ret i8 [[DATA]]
2053 ;
2054   %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2055   %elt0 = extractelement <2 x i8> %data, i32 0
2056   ret i8 %elt0
2057 }
2058
2059 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2060 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8(
2061 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2062 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2063 ; CHECK-NEXT:    ret i8 [[DATA]]
2064 ;
2065   %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2066   %elt1 = extractelement <2 x i8> %data, i32 1
2067   ret i8 %elt1
2068 }
2069
2070 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2071 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8(
2072 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2073 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2074 ; CHECK-NEXT:    ret i8 [[DATA]]
2075 ;
2076   %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2077   %elt1 = extractelement <3 x i8> %data, i32 1
2078   ret i8 %elt1
2079 }
2080
2081 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2082 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8(
2083 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2084 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2085 ; CHECK-NEXT:    ret i8 [[DATA]]
2086 ;
2087   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2088   %elt1 = extractelement <4 x i8> %data, i32 1
2089   ret i8 %elt1
2090 }
2091
2092 define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2093 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8(
2094 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2095 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2096 ; CHECK-NEXT:    ret i8 [[DATA]]
2097 ;
2098   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2099   %elt1 = extractelement <4 x i8> %data, i32 3
2100   ret i8 %elt1
2101 }
2102
2103 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2104 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8(
2105 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2106 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
2107 ;
2108   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2109   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2110   ret <2 x i8> %shuf
2111 }
2112
2113 declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1
2114 declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1
2115 declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1
2116 declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1
2117
2118 ; --------------------------------------------------------------------
2119 ; llvm.amdgcn.struct.ptr.buffer.load
2120 ; --------------------------------------------------------------------
2121
2122 define amdgpu_ps float @struct_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2123 ; CHECK-LABEL: @struct_ptr_buffer_load_f32(
2124 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2125 ; CHECK-NEXT:    ret float [[DATA]]
2126 ;
2127   %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2128   ret float %data
2129 }
2130
2131 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2132 ; CHECK-LABEL: @struct_ptr_buffer_load_v1f32(
2133 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2134 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2135 ;
2136   %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2137   ret <1 x float> %data
2138 }
2139
2140 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2141 ; CHECK-LABEL: @struct_ptr_buffer_load_v2f32(
2142 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2143 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2144 ;
2145   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2146   ret <2 x float> %data
2147 }
2148
2149 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2150 ; CHECK-LABEL: @struct_ptr_buffer_load_v4f32(
2151 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2152 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2153 ;
2154   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2155   ret <4 x float> %data
2156 }
2157
2158 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2159 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f32(
2160 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2161 ; CHECK-NEXT:    ret float [[DATA]]
2162 ;
2163   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2164   %elt0 = extractelement <2 x float> %data, i32 0
2165   ret float %elt0
2166 }
2167
2168 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2169 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f32(
2170 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2171 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2172 ; CHECK-NEXT:    ret float [[DATA]]
2173 ;
2174   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2175   %elt1 = extractelement <2 x float> %data, i32 1
2176   ret float %elt1
2177 }
2178
2179 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2180 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v4f32(
2181 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2182 ; CHECK-NEXT:    ret float [[DATA]]
2183 ;
2184   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2185   %elt0 = extractelement <4 x float> %data, i32 0
2186   ret float %elt0
2187 }
2188
2189 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2190 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f32(
2191 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2192 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2193 ; CHECK-NEXT:    ret float [[DATA]]
2194 ;
2195   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2196   %elt1 = extractelement <4 x float> %data, i32 1
2197   ret float %elt1
2198 }
2199
2200 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2201 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v4f32(
2202 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2203 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2204 ; CHECK-NEXT:    ret float [[DATA]]
2205 ;
2206   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2207   %elt1 = extractelement <4 x float> %data, i32 2
2208   ret float %elt1
2209 }
2210
2211 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2212 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f32(
2213 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
2214 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2215 ; CHECK-NEXT:    ret float [[DATA]]
2216 ;
2217   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2218   %elt1 = extractelement <4 x float> %data, i32 3
2219   ret float %elt1
2220 }
2221
2222 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2223 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(
2224 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2225 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2226 ;
2227   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2228   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2229   ret <2 x float> %shuf
2230 }
2231
2232 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2233 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(
2234 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2235 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2236 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2237 ;
2238   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2239   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2240   ret <2 x float> %shuf
2241 }
2242
2243 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2244 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(
2245 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2246 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2247 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2248 ;
2249   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2250   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2251   ret <2 x float> %shuf
2252 }
2253
2254 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2255 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(
2256 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2257 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2258 ;
2259   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2260   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2261   ret <3 x float> %shuf
2262 }
2263
2264 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2265 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(
2266 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2267 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2268 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2269 ;
2270   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2271   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2272   ret <3 x float> %shuf
2273 }
2274
2275 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2276 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(
2277 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2278 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2279 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2280 ;
2281   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2282   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2283   ret <3 x float> %shuf
2284 }
2285
2286 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2287 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v3f32(
2288 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2289 ; CHECK-NEXT:    ret float [[DATA]]
2290 ;
2291   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2292   %elt0 = extractelement <3 x float> %data, i32 0
2293   ret float %elt0
2294 }
2295
2296 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2297 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f32(
2298 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2299 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2300 ; CHECK-NEXT:    ret float [[DATA]]
2301 ;
2302   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2303   %elt1 = extractelement <3 x float> %data, i32 1
2304   ret float %elt1
2305 }
2306
2307 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2308 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v3f32(
2309 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2310 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2311 ; CHECK-NEXT:    ret float [[DATA]]
2312 ;
2313   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2314   %elt1 = extractelement <3 x float> %data, i32 2
2315   ret float %elt1
2316 }
2317
2318 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2319 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(
2320 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2321 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2322 ;
2323   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2324   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2325   ret <2 x float> %shuf
2326 }
2327
2328 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2329 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(
2330 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2331 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2332 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2333 ;
2334   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2335   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2336   ret <2 x float> %shuf
2337 }
2338
2339 define i32 @extract0_bitcast_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2340 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4f32(
2341 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2342 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2343 ; CHECK-NEXT:    ret i32 [[VAR2]]
2344 ;
2345   %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2346   %var1 = bitcast <4 x float> %var to <4 x i32>
2347   %var2 = extractelement <4 x i32> %var1, i32 0
2348   ret i32 %var2
2349 }
2350
2351 define float @extract0_bitcast_struct_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2352 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4i32(
2353 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2354 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
2355 ; CHECK-NEXT:    ret float [[VAR2]]
2356 ;
2357   %var = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2358   %var1 = bitcast <4 x i32> %var to <4 x float>
2359   %var2 = extractelement <4 x float> %var1, i32 0
2360   ret float %var2
2361 }
2362
2363 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2364 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(
2365 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2366 ; CHECK-NEXT:    ret float [[DATA]]
2367 ;
2368   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2369   %elt0 = extractelement <2 x float> %data, i32 0
2370   ret float %elt0
2371 }
2372
2373 declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
2374 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
2375 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
2376 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
2377 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
2378
2379 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
2380
2381 define amdgpu_ps half @extract_elt0_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2382 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f16(
2383 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2384 ; CHECK-NEXT:    ret half [[DATA]]
2385 ;
2386   %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2387   %elt0 = extractelement <2 x half> %data, i32 0
2388   ret half %elt0
2389 }
2390
2391 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2392 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f16(
2393 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2394 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2395 ; CHECK-NEXT:    ret half [[DATA]]
2396 ;
2397   %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2398   %elt1 = extractelement <2 x half> %data, i32 1
2399   ret half %elt1
2400 }
2401
2402 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2403 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f16(
2404 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2405 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2406 ; CHECK-NEXT:    ret half [[DATA]]
2407 ;
2408   %data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2409   %elt1 = extractelement <3 x half> %data, i32 1
2410   ret half %elt1
2411 }
2412
2413 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2414 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f16(
2415 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2416 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2417 ; CHECK-NEXT:    ret half [[DATA]]
2418 ;
2419   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2420   %elt1 = extractelement <4 x half> %data, i32 1
2421   ret half %elt1
2422 }
2423
2424 define amdgpu_ps half @extract_elt3_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2425 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f16(
2426 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2427 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2428 ; CHECK-NEXT:    ret half [[DATA]]
2429 ;
2430   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2431   %elt1 = extractelement <4 x half> %data, i32 3
2432   ret half %elt1
2433 }
2434
2435 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2436 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(
2437 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2438 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
2439 ;
2440   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2441   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2442   ret <2 x half> %shuf
2443 }
2444
2445 declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
2446 declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
2447 declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
2448 declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
2449
2450 define amdgpu_ps i8 @extract_elt0_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2451 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2i8(
2452 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2453 ; CHECK-NEXT:    ret i8 [[DATA]]
2454 ;
2455   %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2456   %elt0 = extractelement <2 x i8> %data, i32 0
2457   ret i8 %elt0
2458 }
2459
2460 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2461 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2i8(
2462 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2463 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2464 ; CHECK-NEXT:    ret i8 [[DATA]]
2465 ;
2466   %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2467   %elt1 = extractelement <2 x i8> %data, i32 1
2468   ret i8 %elt1
2469 }
2470
2471 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2472 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3i8(
2473 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2474 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2475 ; CHECK-NEXT:    ret i8 [[DATA]]
2476 ;
2477   %data = call <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2478   %elt1 = extractelement <3 x i8> %data, i32 1
2479   ret i8 %elt1
2480 }
2481
2482 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2483 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4i8(
2484 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2485 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2486 ; CHECK-NEXT:    ret i8 [[DATA]]
2487 ;
2488   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2489   %elt1 = extractelement <4 x i8> %data, i32 1
2490   ret i8 %elt1
2491 }
2492
2493 define amdgpu_ps i8 @extract_elt3_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2494 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4i8(
2495 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2496 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2497 ; CHECK-NEXT:    ret i8 [[DATA]]
2498 ;
2499   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2500   %elt1 = extractelement <4 x i8> %data, i32 3
2501   ret i8 %elt1
2502 }
2503
2504 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2505 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(
2506 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2507 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
2508 ;
2509   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2510   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2511   ret <2 x i8> %shuf
2512 }
2513
2514 declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #1
2515 declare <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32, i32) #1
2516 declare <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32, i32) #1
2517 declare <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32, i32) #1
2518
2519 ; --------------------------------------------------------------------
2520 ; llvm.amdgcn.struct.buffer.load.format
2521 ; --------------------------------------------------------------------
2522
2523 define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2524 ; CHECK-LABEL: @struct_buffer_load_format_f32(
2525 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2526 ; CHECK-NEXT:    ret float [[DATA]]
2527 ;
2528   %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2529   ret float %data
2530 }
2531
2532 define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2533 ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
2534 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2535 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2536 ;
2537   %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2538   ret <1 x float> %data
2539 }
2540
2541 define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2542 ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
2543 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2544 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2545 ;
2546   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2547   ret <2 x float> %data
2548 }
2549
2550 define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2551 ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
2552 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2553 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2554 ;
2555   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2556   ret <4 x float> %data
2557 }
2558
2559 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2560 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
2561 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2562 ; CHECK-NEXT:    ret float [[DATA]]
2563 ;
2564   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2565   %elt0 = extractelement <2 x float> %data, i32 0
2566   ret float %elt0
2567 }
2568
2569 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2570 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
2571 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2572 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2573 ; CHECK-NEXT:    ret float [[ELT1]]
2574 ;
2575   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2576   %elt1 = extractelement <2 x float> %data, i32 1
2577   ret float %elt1
2578 }
2579
2580 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2581 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
2582 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2583 ; CHECK-NEXT:    ret float [[DATA]]
2584 ;
2585   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2586   %elt0 = extractelement <4 x float> %data, i32 0
2587   ret float %elt0
2588 }
2589
2590 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2591 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
2592 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2593 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2594 ; CHECK-NEXT:    ret float [[ELT1]]
2595 ;
2596   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2597   %elt1 = extractelement <4 x float> %data, i32 1
2598   ret float %elt1
2599 }
2600
2601 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2602 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
2603 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2604 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2605 ; CHECK-NEXT:    ret float [[ELT1]]
2606 ;
2607   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2608   %elt1 = extractelement <4 x float> %data, i32 2
2609   ret float %elt1
2610 }
2611
2612 define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2613 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
2614 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2615 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2616 ; CHECK-NEXT:    ret float [[ELT1]]
2617 ;
2618   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2619   %elt1 = extractelement <4 x float> %data, i32 3
2620   ret float %elt1
2621 }
2622
2623 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2624 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
2625 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2626 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2627 ;
2628   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2629   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2630   ret <2 x float> %shuf
2631 }
2632
2633 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2634 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
2635 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2636 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2637 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2638 ;
2639   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2640   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2641   ret <2 x float> %shuf
2642 }
2643
2644 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2645 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
2646 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2647 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2648 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2649 ;
2650   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2651   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2652   ret <2 x float> %shuf
2653 }
2654
2655 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2656 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
2657 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2658 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2659 ;
2660   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2661   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2662   ret <3 x float> %shuf
2663 }
2664
2665 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2666 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
2667 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2668 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2669 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2670 ;
2671   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2672   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2673   ret <3 x float> %shuf
2674 }
2675
2676 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2677 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
2678 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2679 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2680 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2681 ;
2682   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2683   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2684   ret <3 x float> %shuf
2685 }
2686
2687 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2688 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
2689 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2690 ; CHECK-NEXT:    ret float [[DATA]]
2691 ;
2692   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2693   %elt0 = extractelement <3 x float> %data, i32 0
2694   ret float %elt0
2695 }
2696
2697 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2698 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
2699 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2700 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2701 ; CHECK-NEXT:    ret float [[ELT1]]
2702 ;
2703   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2704   %elt1 = extractelement <3 x float> %data, i32 1
2705   ret float %elt1
2706 }
2707
2708 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2709 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
2710 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2711 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2712 ; CHECK-NEXT:    ret float [[ELT1]]
2713 ;
2714   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2715   %elt1 = extractelement <3 x float> %data, i32 2
2716   ret float %elt1
2717 }
2718
2719 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2720 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
2721 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2722 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2723 ;
2724   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2725   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2726   ret <2 x float> %shuf
2727 }
2728
2729 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2730 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
2731 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2732 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2733 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2734 ;
2735   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2736   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2737   ret <2 x float> %shuf
2738 }
2739
2740 define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2741 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
2742 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2743 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2744 ; CHECK-NEXT:    ret i32 [[VAR2]]
2745 ;
2746   %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2747   %var1 = bitcast <4 x float> %var to <4 x i32>
2748   %var2 = extractelement <4 x i32> %var1, i32 0
2749   ret i32 %var2
2750 }
2751
2752 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2753 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
2754 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2755 ; CHECK-NEXT:    ret float [[DATA]]
2756 ;
2757   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2758   %elt0 = extractelement <2 x float> %data, i32 0
2759   ret float %elt0
2760 }
2761
2762 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1
2763 declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1
2764 declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1
2765 declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1
2766 declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
2767
2768 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1
2769
2770 ; --------------------------------------------------------------------
2771 ; llvm.amdgcn.struct.ptr.buffer.load.format
2772 ; --------------------------------------------------------------------
2773
2774 define amdgpu_ps float @struct_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2775 ; CHECK-LABEL: @struct_ptr_buffer_load_format_f32(
2776 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2777 ; CHECK-NEXT:    ret float [[DATA]]
2778 ;
2779   %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2780   ret float %data
2781 }
2782
2783 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2784 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v1f32(
2785 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2786 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2787 ;
2788   %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2789   ret <1 x float> %data
2790 }
2791
2792 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2793 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v2f32(
2794 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2795 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2796 ;
2797   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2798   ret <2 x float> %data
2799 }
2800
2801 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2802 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v4f32(
2803 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2804 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2805 ;
2806   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2807   ret <4 x float> %data
2808 }
2809
2810 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2811 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v2f32(
2812 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2813 ; CHECK-NEXT:    ret float [[DATA]]
2814 ;
2815   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2816   %elt0 = extractelement <2 x float> %data, i32 0
2817   ret float %elt0
2818 }
2819
2820 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2821 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v2f32(
2822 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2823 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2824 ; CHECK-NEXT:    ret float [[ELT1]]
2825 ;
2826   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2827   %elt1 = extractelement <2 x float> %data, i32 1
2828   ret float %elt1
2829 }
2830
2831 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2832 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v4f32(
2833 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2834 ; CHECK-NEXT:    ret float [[DATA]]
2835 ;
2836   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2837   %elt0 = extractelement <4 x float> %data, i32 0
2838   ret float %elt0
2839 }
2840
2841 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2842 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v4f32(
2843 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2844 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2845 ; CHECK-NEXT:    ret float [[ELT1]]
2846 ;
2847   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2848   %elt1 = extractelement <4 x float> %data, i32 1
2849   ret float %elt1
2850 }
2851
2852 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2853 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v4f32(
2854 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2855 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2856 ; CHECK-NEXT:    ret float [[ELT1]]
2857 ;
2858   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2859   %elt1 = extractelement <4 x float> %data, i32 2
2860   ret float %elt1
2861 }
2862
2863 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2864 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_format_v4f32(
2865 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2866 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2867 ; CHECK-NEXT:    ret float [[ELT1]]
2868 ;
2869   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2870   %elt1 = extractelement <4 x float> %data, i32 3
2871   ret float %elt1
2872 }
2873
2874 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2875 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(
2876 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2877 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2878 ;
2879   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2880   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2881   ret <2 x float> %shuf
2882 }
2883
2884 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2885 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2886 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2887 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2888 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2889 ;
2890   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2891   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2892   ret <2 x float> %shuf
2893 }
2894
2895 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2896 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2897 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2898 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2899 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2900 ;
2901   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2902   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2903   ret <2 x float> %shuf
2904 }
2905
2906 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2907 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2908 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2909 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2910 ;
2911   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2912   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2913   ret <3 x float> %shuf
2914 }
2915
2916 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2917 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2918 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2919 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2920 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2921 ;
2922   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2923   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2924   ret <3 x float> %shuf
2925 }
2926
2927 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2928 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2929 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2930 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2931 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2932 ;
2933   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2934   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2935   ret <3 x float> %shuf
2936 }
2937
2938 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2939 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v3f32(
2940 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2941 ; CHECK-NEXT:    ret float [[DATA]]
2942 ;
2943   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2944   %elt0 = extractelement <3 x float> %data, i32 0
2945   ret float %elt0
2946 }
2947
2948 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2949 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v3f32(
2950 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2951 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2952 ; CHECK-NEXT:    ret float [[ELT1]]
2953 ;
2954   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2955   %elt1 = extractelement <3 x float> %data, i32 1
2956   ret float %elt1
2957 }
2958
2959 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2960 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v3f32(
2961 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2962 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2963 ; CHECK-NEXT:    ret float [[ELT1]]
2964 ;
2965   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2966   %elt1 = extractelement <3 x float> %data, i32 2
2967   ret float %elt1
2968 }
2969
2970 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2971 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(
2972 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2973 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2974 ;
2975   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2976   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2977   ret <2 x float> %shuf
2978 }
2979
2980 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2981 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(
2982 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2983 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2984 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2985 ;
2986   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2987   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2988   ret <2 x float> %shuf
2989 }
2990
2991 define i32 @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2992 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(
2993 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2994 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2995 ; CHECK-NEXT:    ret i32 [[VAR2]]
2996 ;
2997   %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2998   %var1 = bitcast <4 x float> %var to <4 x i32>
2999   %var2 = extractelement <4 x i32> %var1, i32 0
3000   ret i32 %var2
3001 }
3002
3003 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
3004 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(
3005 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
3006 ; CHECK-NEXT:    ret float [[DATA]]
3007 ;
3008   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
3009   %elt0 = extractelement <2 x float> %data, i32 0
3010   ret float %elt0
3011 }
3012
3013 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3014 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
3015 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3016 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3017 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3018
3019 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.format.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3020
3021
3022 ; --------------------------------------------------------------------
3023 ; llvm.amdgcn.raw.tbuffer.load
3024 ; --------------------------------------------------------------------
3025
3026 define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3027 ; CHECK-LABEL: @raw_tbuffer_load_f32(
3028 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3029 ; CHECK-NEXT:    ret float [[DATA]]
3030 ;
3031   %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3032   ret float %data
3033 }
3034
3035 define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3036 ; CHECK-LABEL: @raw_tbuffer_load_v2f32(
3037 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3038 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3039 ;
3040   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3041   ret <2 x float> %data
3042 }
3043
3044 define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3045 ; CHECK-LABEL: @raw_tbuffer_load_v4f32(
3046 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3047 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3048 ;
3049   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3050   ret <4 x float> %data
3051 }
3052
3053 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3054 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32(
3055 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3056 ; CHECK-NEXT:    ret float [[DATA]]
3057 ;
3058   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3059   %elt0 = extractelement <2 x float> %data, i32 0
3060   ret float %elt0
3061 }
3062
3063 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3064 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32(
3065 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3066 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3067 ; CHECK-NEXT:    ret float [[ELT1]]
3068 ;
3069   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3070   %elt1 = extractelement <2 x float> %data, i32 1
3071   ret float %elt1
3072 }
3073
3074 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3075 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32(
3076 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3077 ; CHECK-NEXT:    ret float [[DATA]]
3078 ;
3079   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3080   %elt0 = extractelement <4 x float> %data, i32 0
3081   ret float %elt0
3082 }
3083
3084 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3085 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32(
3086 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3087 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3088 ; CHECK-NEXT:    ret float [[ELT1]]
3089 ;
3090   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3091   %elt1 = extractelement <4 x float> %data, i32 1
3092   ret float %elt1
3093 }
3094
3095 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3096 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32(
3097 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3098 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3099 ; CHECK-NEXT:    ret float [[ELT1]]
3100 ;
3101   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3102   %elt1 = extractelement <4 x float> %data, i32 2
3103   ret float %elt1
3104 }
3105
3106 define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3107 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32(
3108 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3109 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3110 ; CHECK-NEXT:    ret float [[ELT1]]
3111 ;
3112   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3113   %elt1 = extractelement <4 x float> %data, i32 3
3114   ret float %elt1
3115 }
3116
3117 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3118 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32(
3119 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3120 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3121 ;
3122   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3123   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3124   ret <2 x float> %shuf
3125 }
3126
3127 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3128 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
3129 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3130 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3131 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3132 ;
3133   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3134   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3135   ret <2 x float> %shuf
3136 }
3137
3138 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3139 ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32(
3140 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3141 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3142 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3143 ;
3144   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3145   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3146   ret <2 x float> %shuf
3147 }
3148
3149 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3150 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(
3151 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3152 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3153 ;
3154   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3155   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3156   ret <3 x float> %shuf
3157 }
3158
3159 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3160 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(
3161 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3162 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3163 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3164 ;
3165   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3166   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3167   ret <3 x float> %shuf
3168 }
3169
3170 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3171 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(
3172 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3173 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3174 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3175 ;
3176   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3177   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3178   ret <3 x float> %shuf
3179 }
3180
3181 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3182 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32(
3183 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3184 ; CHECK-NEXT:    ret float [[DATA]]
3185 ;
3186   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3187   %elt0 = extractelement <3 x float> %data, i32 0
3188   ret float %elt0
3189 }
3190
3191 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3192 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32(
3193 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3194 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3195 ; CHECK-NEXT:    ret float [[ELT1]]
3196 ;
3197   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3198   %elt1 = extractelement <3 x float> %data, i32 1
3199   ret float %elt1
3200 }
3201
3202 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3203 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32(
3204 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3205 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3206 ; CHECK-NEXT:    ret float [[ELT1]]
3207 ;
3208   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3209   %elt1 = extractelement <3 x float> %data, i32 2
3210   ret float %elt1
3211 }
3212
3213 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3214 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32(
3215 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3216 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3217 ;
3218   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3219   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3220   ret <2 x float> %shuf
3221 }
3222
3223 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3224 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32(
3225 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3226 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3227 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3228 ;
3229   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3230   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3231   ret <2 x float> %shuf
3232 }
3233
3234 define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3235 ; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32(
3236 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3237 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3238 ; CHECK-NEXT:    ret i32 [[VAR2]]
3239 ;
3240   %var = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3241   %var1 = bitcast <4 x float> %var to <4 x i32>
3242   %var2 = extractelement <4 x i32> %var1, i32 0
3243   ret i32 %var2
3244 }
3245
3246 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3247 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(
3248 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3249 ; CHECK-NEXT:    ret float [[DATA]]
3250 ;
3251   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3252   %elt0 = extractelement <2 x float> %data, i32 0
3253   ret float %elt0
3254 }
3255
3256 declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
3257 declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
3258 declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
3259 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
3260
3261 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
3262
3263 define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3264 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
3265 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3266 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3267 ; CHECK-NEXT:    ret half [[ELT1]]
3268 ;
3269   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3270   %elt1 = extractelement <4 x half> %data, i32 3
3271   ret half %elt1
3272 }
3273
3274 define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3275 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
3276 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3277 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3278 ; CHECK-NEXT:    ret half [[ELT1]]
3279 ;
3280   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3281   %elt1 = extractelement <4 x half> %data, i32 2
3282   ret half %elt1
3283 }
3284
3285 define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3286 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
3287 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3288 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3289 ; CHECK-NEXT:    ret half [[ELT1]]
3290 ;
3291   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3292   %elt1 = extractelement <4 x half> %data, i32 1
3293   ret half %elt1
3294 }
3295
3296 define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3297 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
3298 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3299 ; CHECK-NEXT:    ret half [[DATA]]
3300 ;
3301   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3302   %elt1 = extractelement <4 x half> %data, i32 0
3303   ret half %elt1
3304 }
3305
3306 declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
3307 declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
3308 declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
3309 declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
3310
3311 ; --------------------------------------------------------------------
3312 ; llvm.amdgcn.raw.ptr.tbuffer.load
3313 ; --------------------------------------------------------------------
3314
3315 define amdgpu_ps float @raw_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3316 ; CHECK-LABEL: @raw_ptr_tbuffer_load_f32(
3317 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3318 ; CHECK-NEXT:    ret float [[DATA]]
3319 ;
3320   %data = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3321   ret float %data
3322 }
3323
3324 define amdgpu_ps <2 x float> @raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3325 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v2f32(
3326 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3327 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3328 ;
3329   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3330   ret <2 x float> %data
3331 }
3332
3333 define amdgpu_ps <4 x float> @raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3334 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v4f32(
3335 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3336 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3337 ;
3338   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3339   ret <4 x float> %data
3340 }
3341
3342 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3343 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v2f32(
3344 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3345 ; CHECK-NEXT:    ret float [[DATA]]
3346 ;
3347   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3348   %elt0 = extractelement <2 x float> %data, i32 0
3349   ret float %elt0
3350 }
3351
3352 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3353 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v2f32(
3354 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3355 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3356 ; CHECK-NEXT:    ret float [[ELT1]]
3357 ;
3358   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3359   %elt1 = extractelement <2 x float> %data, i32 1
3360   ret float %elt1
3361 }
3362
3363 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3364 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f32(
3365 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3366 ; CHECK-NEXT:    ret float [[DATA]]
3367 ;
3368   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3369   %elt0 = extractelement <4 x float> %data, i32 0
3370   ret float %elt0
3371 }
3372
3373 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3374 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f32(
3375 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3376 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3377 ; CHECK-NEXT:    ret float [[ELT1]]
3378 ;
3379   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3380   %elt1 = extractelement <4 x float> %data, i32 1
3381   ret float %elt1
3382 }
3383
3384 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3385 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f32(
3386 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3387 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3388 ; CHECK-NEXT:    ret float [[ELT1]]
3389 ;
3390   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3391   %elt1 = extractelement <4 x float> %data, i32 2
3392   ret float %elt1
3393 }
3394
3395 define amdgpu_ps float @extract_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3396 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f32(
3397 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3398 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3399 ; CHECK-NEXT:    ret float [[ELT1]]
3400 ;
3401   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3402   %elt1 = extractelement <4 x float> %data, i32 3
3403   ret float %elt1
3404 }
3405
3406 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3407 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(
3408 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3409 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3410 ;
3411   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3412   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3413   ret <2 x float> %shuf
3414 }
3415
3416 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3417 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3418 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3419 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3420 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3421 ;
3422   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3423   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3424   ret <2 x float> %shuf
3425 }
3426
3427 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3428 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3429 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3430 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3431 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3432 ;
3433   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3434   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3435   ret <2 x float> %shuf
3436 }
3437
3438 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3439 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3440 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3441 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3442 ;
3443   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3444   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3445   ret <3 x float> %shuf
3446 }
3447
3448 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3449 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3450 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3451 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3452 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3453 ;
3454   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3455   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3456   ret <3 x float> %shuf
3457 }
3458
3459 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3460 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3461 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3462 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3463 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3464 ;
3465   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3466   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3467   ret <3 x float> %shuf
3468 }
3469
3470 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3471 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v3f32(
3472 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3473 ; CHECK-NEXT:    ret float [[DATA]]
3474 ;
3475   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3476   %elt0 = extractelement <3 x float> %data, i32 0
3477   ret float %elt0
3478 }
3479
3480 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3481 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v3f32(
3482 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3483 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3484 ; CHECK-NEXT:    ret float [[ELT1]]
3485 ;
3486   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3487   %elt1 = extractelement <3 x float> %data, i32 1
3488   ret float %elt1
3489 }
3490
3491 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3492 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v3f32(
3493 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3494 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3495 ; CHECK-NEXT:    ret float [[ELT1]]
3496 ;
3497   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3498   %elt1 = extractelement <3 x float> %data, i32 2
3499   ret float %elt1
3500 }
3501
3502 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3503 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(
3504 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3505 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3506 ;
3507   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3508   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3509   ret <2 x float> %shuf
3510 }
3511
3512 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3513 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(
3514 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3515 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3516 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3517 ;
3518   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3519   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3520   ret <2 x float> %shuf
3521 }
3522
3523 define i32 @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3524 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(
3525 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3526 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3527 ; CHECK-NEXT:    ret i32 [[VAR2]]
3528 ;
3529   %var = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3530   %var1 = bitcast <4 x float> %var to <4 x i32>
3531   %var2 = extractelement <4 x i32> %var1, i32 0
3532   ret i32 %var2
3533 }
3534
3535 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3536 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(
3537 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3538 ; CHECK-NEXT:    ret float [[DATA]]
3539 ;
3540   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3541   %elt0 = extractelement <2 x float> %data, i32 0
3542   ret float %elt0
3543 }
3544
3545 declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3546 declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3547 declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3548 declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3549
3550 declare <4 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3551
3552 define amdgpu_ps half @extract_elt3_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3553 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f16(
3554 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3555 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3556 ; CHECK-NEXT:    ret half [[ELT1]]
3557 ;
3558   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3559   %elt1 = extractelement <4 x half> %data, i32 3
3560   ret half %elt1
3561 }
3562
3563 define amdgpu_ps half @extract_elt2_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3564 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f16(
3565 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3566 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3567 ; CHECK-NEXT:    ret half [[ELT1]]
3568 ;
3569   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3570   %elt1 = extractelement <4 x half> %data, i32 2
3571   ret half %elt1
3572 }
3573
3574 define amdgpu_ps half @extract_elt1_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3575 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f16(
3576 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3577 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3578 ; CHECK-NEXT:    ret half [[ELT1]]
3579 ;
3580   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3581   %elt1 = extractelement <4 x half> %data, i32 1
3582   ret half %elt1
3583 }
3584
3585 define amdgpu_ps half @extract_elt0_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3586 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f16(
3587 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3588 ; CHECK-NEXT:    ret half [[DATA]]
3589 ;
3590   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3591   %elt1 = extractelement <4 x half> %data, i32 0
3592   ret half %elt1
3593 }
3594
3595 declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
3596 declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
3597 declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
3598 declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
3599
3600 ; --------------------------------------------------------------------
3601 ; llvm.amdgcn.struct.tbuffer.load
3602 ; --------------------------------------------------------------------
3603
3604 define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3605 ; CHECK-LABEL: @struct_tbuffer_load_f32(
3606 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3607 ; CHECK-NEXT:    ret float [[DATA]]
3608 ;
3609   %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3610   ret float %data
3611 }
3612
3613 define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3614 ; CHECK-LABEL: @struct_tbuffer_load_v2f32(
3615 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3616 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3617 ;
3618   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3619   ret <2 x float> %data
3620 }
3621
3622 define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3623 ; CHECK-LABEL: @struct_tbuffer_load_v4f32(
3624 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3625 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3626 ;
3627   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3628   ret <4 x float> %data
3629 }
3630
3631 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3632 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32(
3633 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3634 ; CHECK-NEXT:    ret float [[DATA]]
3635 ;
3636   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3637   %elt0 = extractelement <2 x float> %data, i32 0
3638   ret float %elt0
3639 }
3640
3641 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3642 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32(
3643 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3644 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3645 ; CHECK-NEXT:    ret float [[ELT1]]
3646 ;
3647   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3648   %elt1 = extractelement <2 x float> %data, i32 1
3649   ret float %elt1
3650 }
3651
3652 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3653 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32(
3654 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3655 ; CHECK-NEXT:    ret float [[DATA]]
3656 ;
3657   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3658   %elt0 = extractelement <4 x float> %data, i32 0
3659   ret float %elt0
3660 }
3661
3662 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3663 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32(
3664 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3665 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3666 ; CHECK-NEXT:    ret float [[ELT1]]
3667 ;
3668   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3669   %elt1 = extractelement <4 x float> %data, i32 1
3670   ret float %elt1
3671 }
3672
3673 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3674 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32(
3675 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3676 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3677 ; CHECK-NEXT:    ret float [[ELT1]]
3678 ;
3679   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3680   %elt1 = extractelement <4 x float> %data, i32 2
3681   ret float %elt1
3682 }
3683
3684 define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3685 ; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32(
3686 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3687 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3688 ; CHECK-NEXT:    ret float [[ELT1]]
3689 ;
3690   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3691   %elt1 = extractelement <4 x float> %data, i32 3
3692   ret float %elt1
3693 }
3694
3695 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3696 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32(
3697 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3698 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3699 ;
3700   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3701   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3702   ret <2 x float> %shuf
3703 }
3704
3705 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3706 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
3707 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3708 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3709 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3710 ;
3711   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3712   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3713   ret <2 x float> %shuf
3714 }
3715
3716 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3717 ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32(
3718 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3719 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3720 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3721 ;
3722   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3723   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3724   ret <2 x float> %shuf
3725 }
3726
3727 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3728 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(
3729 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3730 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3731 ;
3732   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3733   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3734   ret <3 x float> %shuf
3735 }
3736
3737 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3738 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(
3739 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3740 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3741 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3742 ;
3743   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3744   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3745   ret <3 x float> %shuf
3746 }
3747
3748 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3749 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(
3750 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3751 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3752 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3753 ;
3754   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3755   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3756   ret <3 x float> %shuf
3757 }
3758
3759 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3760 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32(
3761 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3762 ; CHECK-NEXT:    ret float [[DATA]]
3763 ;
3764   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3765   %elt0 = extractelement <3 x float> %data, i32 0
3766   ret float %elt0
3767 }
3768
3769 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3770 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32(
3771 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3772 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3773 ; CHECK-NEXT:    ret float [[ELT1]]
3774 ;
3775   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3776   %elt1 = extractelement <3 x float> %data, i32 1
3777   ret float %elt1
3778 }
3779
3780 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3781 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32(
3782 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3783 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3784 ; CHECK-NEXT:    ret float [[ELT1]]
3785 ;
3786   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3787   %elt1 = extractelement <3 x float> %data, i32 2
3788   ret float %elt1
3789 }
3790
3791 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3792 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32(
3793 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3794 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3795 ;
3796   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3797   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3798   ret <2 x float> %shuf
3799 }
3800
3801 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3802 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32(
3803 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3804 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3805 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3806 ;
3807   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3808   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3809   ret <2 x float> %shuf
3810 }
3811
3812 define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3813 ; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32(
3814 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3815 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3816 ; CHECK-NEXT:    ret i32 [[VAR2]]
3817 ;
3818   %var = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3819   %var1 = bitcast <4 x float> %var to <4 x i32>
3820   %var2 = extractelement <4 x i32> %var1, i32 0
3821   ret i32 %var2
3822 }
3823
3824 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3825 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(
3826 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
3827 ; CHECK-NEXT:    ret float [[DATA]]
3828 ;
3829   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
3830   %elt0 = extractelement <2 x float> %data, i32 0
3831   ret float %elt0
3832 }
3833
3834 declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3835 declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3836 declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3837 declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3838
3839 declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1
3840
3841 ; --------------------------------------------------------------------
3842 ; llvm.amdgcn.struct.ptr.tbuffer.load
3843 ; --------------------------------------------------------------------
3844
3845 define amdgpu_ps float @struct_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3846 ; CHECK-LABEL: @struct_ptr_tbuffer_load_f32(
3847 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3848 ; CHECK-NEXT:    ret float [[DATA]]
3849 ;
3850   %data = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3851   ret float %data
3852 }
3853
3854 define amdgpu_ps <2 x float> @struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3855 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v2f32(
3856 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3857 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3858 ;
3859   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3860   ret <2 x float> %data
3861 }
3862
3863 define amdgpu_ps <4 x float> @struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3864 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v4f32(
3865 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3866 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3867 ;
3868   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3869   ret <4 x float> %data
3870 }
3871
3872 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3873 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v2f32(
3874 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3875 ; CHECK-NEXT:    ret float [[DATA]]
3876 ;
3877   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3878   %elt0 = extractelement <2 x float> %data, i32 0
3879   ret float %elt0
3880 }
3881
3882 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3883 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v2f32(
3884 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3885 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3886 ; CHECK-NEXT:    ret float [[ELT1]]
3887 ;
3888   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3889   %elt1 = extractelement <2 x float> %data, i32 1
3890   ret float %elt1
3891 }
3892
3893 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3894 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v4f32(
3895 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3896 ; CHECK-NEXT:    ret float [[DATA]]
3897 ;
3898   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3899   %elt0 = extractelement <4 x float> %data, i32 0
3900   ret float %elt0
3901 }
3902
3903 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3904 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v4f32(
3905 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3906 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3907 ; CHECK-NEXT:    ret float [[ELT1]]
3908 ;
3909   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3910   %elt1 = extractelement <4 x float> %data, i32 1
3911   ret float %elt1
3912 }
3913
3914 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3915 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v4f32(
3916 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3917 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3918 ; CHECK-NEXT:    ret float [[ELT1]]
3919 ;
3920   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3921   %elt1 = extractelement <4 x float> %data, i32 2
3922   ret float %elt1
3923 }
3924
3925 define amdgpu_ps float @extract_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3926 ; CHECK-LABEL: @extract_elt3_struct_ptr_tbuffer_load_v4f32(
3927 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3928 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3929 ; CHECK-NEXT:    ret float [[ELT1]]
3930 ;
3931   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3932   %elt1 = extractelement <4 x float> %data, i32 3
3933   ret float %elt1
3934 }
3935
3936 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3937 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(
3938 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3939 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3940 ;
3941   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3942   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3943   ret <2 x float> %shuf
3944 }
3945
3946 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3947 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3948 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3949 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3950 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3951 ;
3952   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3953   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3954   ret <2 x float> %shuf
3955 }
3956
3957 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3958 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3959 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3960 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3961 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3962 ;
3963   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3964   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3965   ret <2 x float> %shuf
3966 }
3967
3968 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3969 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3970 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3971 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3972 ;
3973   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3974   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3975   ret <3 x float> %shuf
3976 }
3977
3978 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3979 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3980 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3981 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3982 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3983 ;
3984   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3985   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3986   ret <3 x float> %shuf
3987 }
3988
3989 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3990 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3991 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3992 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3993 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3994 ;
3995   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3996   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3997   ret <3 x float> %shuf
3998 }
3999
4000 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4001 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v3f32(
4002 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4003 ; CHECK-NEXT:    ret float [[DATA]]
4004 ;
4005   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4006   %elt0 = extractelement <3 x float> %data, i32 0
4007   ret float %elt0
4008 }
4009
4010 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4011 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v3f32(
4012 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4013 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
4014 ; CHECK-NEXT:    ret float [[ELT1]]
4015 ;
4016   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4017   %elt1 = extractelement <3 x float> %data, i32 1
4018   ret float %elt1
4019 }
4020
4021 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4022 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v3f32(
4023 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4024 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
4025 ; CHECK-NEXT:    ret float [[ELT1]]
4026 ;
4027   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4028   %elt1 = extractelement <3 x float> %data, i32 2
4029   ret float %elt1
4030 }
4031
4032 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4033 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(
4034 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4035 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4036 ;
4037   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4038   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
4039   ret <2 x float> %shuf
4040 }
4041
4042 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4043 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(
4044 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4045 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
4046 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
4047 ;
4048   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4049   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
4050   ret <2 x float> %shuf
4051 }
4052
4053 define i32 @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4054 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(
4055 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4056 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
4057 ; CHECK-NEXT:    ret i32 [[VAR2]]
4058 ;
4059   %var = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4060   %var1 = bitcast <4 x float> %var to <4 x i32>
4061   %var2 = extractelement <4 x i32> %var1, i32 0
4062   ret i32 %var2
4063 }
4064
4065 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4066 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(
4067 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
4068 ; CHECK-NEXT:    ret float [[DATA]]
4069 ;
4070   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
4071   %elt0 = extractelement <2 x float> %data, i32 0
4072   ret float %elt0
4073 }
4074
4075 declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4076 declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4077 declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4078 declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4079
4080 declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4081
4082 ; --------------------------------------------------------------------
4083 ; llvm.amdgcn.image.sample
4084 ; --------------------------------------------------------------------
4085
4086 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4087 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
4088 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4089 ; CHECK-NEXT:    ret float [[DATA]]
4090 ;
4091   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4092   %elt0 = extractelement <4 x float> %data, i32 0
4093   ret float %elt0
4094 }
4095
4096 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4097 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4098 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe(
4099 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0)
4100 ; CHECK-NEXT:    [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4101 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4102 ; CHECK-NEXT:    ret float [[ELT0]]
4103 ;
4104   %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
4105   %data.vec = extractvalue {<4 x float>,i32} %data, 0
4106   %elt0 = extractelement <4 x float> %data.vec, i32 0
4107   ret float %elt0
4108 }
4109
4110 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4111 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4112 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe(
4113 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0)
4114 ; CHECK-NEXT:    [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4115 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4116 ; CHECK-NEXT:    ret float [[ELT0]]
4117 ;
4118   %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
4119   %data.vec = extractvalue {<4 x float>,i32} %data, 0
4120   %elt0 = extractelement <4 x float> %data.vec, i32 0
4121   ret float %elt0
4122 }
4123
4124 define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4125 ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
4126 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4127 ; CHECK-NEXT:    ret float [[DATA]]
4128 ;
4129   %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4130   %elt0 = extractelement <4 x float> %data, i32 0
4131   ret float %elt0
4132 }
4133
4134 define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4135 ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
4136 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4137 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
4138 ; CHECK-NEXT:    ret float [[ELT0]]
4139 ;
4140   %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4141   %elt0 = extractelement <4 x float> %data, i32 0
4142   ret float %elt0
4143 }
4144
4145 define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4146 ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
4147 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4148 ; CHECK-NEXT:    ret float [[DATA]]
4149 ;
4150   %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4151   %elt0 = extractelement <4 x float> %data, i32 0
4152   ret float %elt0
4153 }
4154
4155 define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4156 ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
4157 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4158 ; CHECK-NEXT:    ret float [[DATA]]
4159 ;
4160   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4161   %elt0 = extractelement <4 x float> %data, i32 0
4162   ret float %elt0
4163 }
4164
4165 define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4166 ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
4167 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4168 ; CHECK-NEXT:    ret float [[DATA]]
4169 ;
4170   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4171   %elt0 = extractelement <4 x float> %data, i32 0
4172   ret float %elt0
4173 }
4174
4175 define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4176 ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
4177 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4178 ; CHECK-NEXT:    ret float [[DATA]]
4179 ;
4180   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4181   %elt0 = extractelement <4 x float> %data, i32 0
4182   ret float %elt0
4183 }
4184
4185 define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4186 ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
4187 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4188 ; CHECK-NEXT:    ret float [[DATA]]
4189 ;
4190   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4191   %elt0 = extractelement <4 x float> %data, i32 0
4192   ret float %elt0
4193 }
4194
4195 define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4196 ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
4197 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4198 ; CHECK-NEXT:    ret float [[DATA]]
4199 ;
4200   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4201   %elt0 = extractelement <4 x float> %data, i32 0
4202   ret float %elt0
4203 }
4204
4205 define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4206 ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
4207 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4208 ; CHECK-NEXT:    ret float [[DATA]]
4209 ;
4210   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4211   %elt0 = extractelement <4 x float> %data, i32 0
4212   ret float %elt0
4213 }
4214
4215 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4216 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
4217 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4218 ; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0
4219 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
4220 ;
4221   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4222   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4223   ret <2 x float> %shuf
4224 }
4225
4226 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4227 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
4228 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4229 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4230 ;
4231   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4232   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4233   ret <2 x float> %shuf
4234 }
4235
4236 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4237 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
4238 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4239 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4240 ;
4241   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4242   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4243   ret <2 x float> %shuf
4244 }
4245
4246 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4247 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
4248 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4249 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4250 ;
4251   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4252   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4253   ret <2 x float> %shuf
4254 }
4255
4256 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4257 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
4258 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4259 ; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0
4260 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4261 ;
4262   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4263   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4264   ret <3 x float> %shuf
4265 }
4266
4267 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4268 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
4269 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4270 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4271 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4272 ;
4273   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4274   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4275   ret <3 x float> %shuf
4276 }
4277
4278 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4279 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
4280 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4281 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4282 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4283 ;
4284   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4285   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4286   ret <3 x float> %shuf
4287 }
4288
4289 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4290 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
4291 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4292 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4293 ;
4294   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4295   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4296   ret <3 x float> %shuf
4297 }
4298
4299 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4300 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
4301 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4302 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4303 ;
4304   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4305   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4306   ret <3 x float> %shuf
4307 }
4308
4309 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4310 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4311 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4312 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4313 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4314
4315 ; --------------------------------------------------------------------
4316 ; llvm.amdgcn.image.sample.cl
4317 ; --------------------------------------------------------------------
4318
4319 define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4320 ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
4321 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4322 ; CHECK-NEXT:    ret float [[DATA]]
4323 ;
4324   %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4325   %elt0 = extractelement <4 x float> %data, i32 1
4326   ret float %elt0
4327 }
4328
4329 declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4330
4331 ; --------------------------------------------------------------------
4332 ; llvm.amdgcn.image.sample.d
4333 ; --------------------------------------------------------------------
4334
4335 define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4336 ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
4337 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4338 ; CHECK-NEXT:    ret float [[DATA]]
4339 ;
4340   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4341   %elt0 = extractelement <4 x float> %data, i32 2
4342   ret float %elt0
4343 }
4344
4345 declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4346
4347 ; --------------------------------------------------------------------
4348 ; llvm.amdgcn.image.sample.d.cl
4349 ; --------------------------------------------------------------------
4350
4351 define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4352 ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
4353 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4354 ; CHECK-NEXT:    ret float [[DATA]]
4355 ;
4356   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4357   %elt0 = extractelement <4 x float> %data, i32 3
4358   ret float %elt0
4359 }
4360
4361 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4362
4363 ; --------------------------------------------------------------------
4364 ; llvm.amdgcn.image.sample.l
4365 ; --------------------------------------------------------------------
4366
4367 define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4368 ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
4369 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4370 ; CHECK-NEXT:    ret float [[DATA]]
4371 ;
4372   %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4373   %elt0 = extractelement <2 x float> %data, i32 1
4374   ret float %elt0
4375 }
4376
4377 declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4378
4379 ; --------------------------------------------------------------------
4380 ; llvm.amdgcn.image.sample.b
4381 ; --------------------------------------------------------------------
4382
4383 define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4384 ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
4385 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4386 ; CHECK-NEXT:    ret float [[DATA]]
4387 ;
4388   %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4389   %elt0 = extractelement <4 x float> %data, i32 1
4390   ret float %elt0
4391 }
4392
4393 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4394
4395 ; --------------------------------------------------------------------
4396 ; llvm.amdgcn.image.sample.b.cl
4397 ; --------------------------------------------------------------------
4398
4399 define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4400 ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
4401 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4402 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4403 ;
4404   %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4405   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
4406   ret <2 x float> %shuf
4407 }
4408
4409 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4410
4411 ; --------------------------------------------------------------------
4412 ; llvm.amdgcn.image.sample.lz
4413 ; --------------------------------------------------------------------
4414
4415 define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4416 ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
4417 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4418 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4419 ;
4420   %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4421   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 3>
4422   ret <2 x float> %shuf
4423 }
4424
4425 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4426
4427 ; --------------------------------------------------------------------
4428 ; llvm.amdgcn.image.sample.cd
4429 ; --------------------------------------------------------------------
4430
4431 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4432 ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
4433 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4434 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4435 ;
4436   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4437   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
4438   ret <3 x float> %shuf
4439 }
4440
4441 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4442
4443 ; --------------------------------------------------------------------
4444 ; llvm.amdgcn.image.sample.cd.cl
4445 ; --------------------------------------------------------------------
4446
4447 define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4448 ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(
4449 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4450 ; CHECK-NEXT:    ret half [[DATA]]
4451 ;
4452   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4453   %elt0 = extractelement <4 x half> %data, i32 3
4454   ret half %elt0
4455 }
4456
4457 define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4458 ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(
4459 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4460 ; CHECK-NEXT:    ret half [[DATA]]
4461 ;
4462   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4463   %elt0 = extractelement <4 x half> %data, i32 2
4464   ret half %elt0
4465 }
4466
4467 define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4468 ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(
4469 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4470 ; CHECK-NEXT:    ret half [[DATA]]
4471 ;
4472   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4473   %elt0 = extractelement <4 x half> %data, i32 1
4474   ret half %elt0
4475 }
4476
4477 define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4478 ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
4479 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4480 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4481 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4482 ;
4483   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4484   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
4485   ret <4 x half> %res
4486 }
4487
4488 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4489 ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
4490 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4491 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
4492 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4493 ;
4494   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4495   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
4496   ret <4 x half> %res
4497 }
4498
4499 define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4500 ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(
4501 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4502 ; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0
4503 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4504 ;
4505   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4506   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
4507   ret <4 x half> %res
4508 }
4509
4510 define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4511 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
4512 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4513 ; CHECK-NEXT:    ret half [[DATA]]
4514 ;
4515   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4516   %elt0 = extractelement <4 x half> %data, i32 0
4517   ret half %elt0
4518 }
4519
4520 declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4521
4522 ; --------------------------------------------------------------------
4523 ; llvm.amdgcn.image.sample.c
4524 ; --------------------------------------------------------------------
4525
4526 define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4527 ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
4528 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4529 ; CHECK-NEXT:    ret float [[DATA]]
4530 ;
4531   %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4532   %elt0 = extractelement <4 x float> %data, i32 0
4533   ret float %elt0
4534 }
4535
4536 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4537
4538 ; --------------------------------------------------------------------
4539 ; llvm.amdgcn.image.sample.c.cl
4540 ; --------------------------------------------------------------------
4541
4542 define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4543 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
4544 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4545 ; CHECK-NEXT:    ret float [[DATA]]
4546 ;
4547   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4548   %elt0 = extractelement <4 x float> %data, i32 0
4549   ret float %elt0
4550 }
4551
4552 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4553
4554 ; --------------------------------------------------------------------
4555 ; llvm.amdgcn.image.sample.c.d
4556 ; --------------------------------------------------------------------
4557
4558 define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4559 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
4560 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4561 ; CHECK-NEXT:    ret float [[DATA]]
4562 ;
4563   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4564   %elt0 = extractelement <4 x float> %data, i32 0
4565   ret float %elt0
4566 }
4567
4568 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4569
4570 ; --------------------------------------------------------------------
4571 ; llvm.amdgcn.image.sample.c.d.cl
4572 ; --------------------------------------------------------------------
4573
4574 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4575 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
4576 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4577 ; CHECK-NEXT:    ret float [[DATA]]
4578 ;
4579   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4580   %elt0 = extractelement <4 x float> %data, i32 0
4581   ret float %elt0
4582 }
4583
4584 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4585
4586 ; --------------------------------------------------------------------
4587 ; llvm.amdgcn.image.sample.c.l
4588 ; --------------------------------------------------------------------
4589
4590 define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4591 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
4592 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4593 ; CHECK-NEXT:    ret float [[DATA]]
4594 ;
4595   %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4596   %elt0 = extractelement <4 x float> %data, i32 0
4597   ret float %elt0
4598 }
4599
4600 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4601
4602 ; --------------------------------------------------------------------
4603 ; llvm.amdgcn.image.sample.c.b
4604 ; --------------------------------------------------------------------
4605
4606 define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4607 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
4608 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4609 ; CHECK-NEXT:    ret float [[DATA]]
4610 ;
4611   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4612   %elt0 = extractelement <4 x float> %data, i32 0
4613   ret float %elt0
4614 }
4615
4616 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4617
4618 ; --------------------------------------------------------------------
4619 ; llvm.amdgcn.image.sample.c.b.cl
4620 ; --------------------------------------------------------------------
4621
4622 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4623 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
4624 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4625 ; CHECK-NEXT:    ret float [[DATA]]
4626 ;
4627   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4628   %elt0 = extractelement <4 x float> %data, i32 0
4629   ret float %elt0
4630 }
4631
4632 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4633
4634 ; --------------------------------------------------------------------
4635 ; llvm.amdgcn.image.sample.c.lz
4636 ; --------------------------------------------------------------------
4637
4638 define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4639 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
4640 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4641 ; CHECK-NEXT:    ret float [[DATA]]
4642 ;
4643   %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4644   %elt0 = extractelement <4 x float> %data, i32 0
4645   ret float %elt0
4646 }
4647
4648 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4649
4650 ; --------------------------------------------------------------------
4651 ; llvm.amdgcn.image.sample.c.cd
4652 ; --------------------------------------------------------------------
4653
4654 define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4655 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
4656 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4657 ; CHECK-NEXT:    ret float [[DATA]]
4658 ;
4659   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4660   %elt0 = extractelement <4 x float> %data, i32 0
4661   ret float %elt0
4662 }
4663
4664 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4665
4666 ; --------------------------------------------------------------------
4667 ; llvm.amdgcn.image.sample.c.cd.cl
4668 ; --------------------------------------------------------------------
4669
4670 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4671 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
4672 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4673 ; CHECK-NEXT:    ret float [[DATA]]
4674 ;
4675   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4676   %elt0 = extractelement <4 x float> %data, i32 0
4677   ret float %elt0
4678 }
4679
4680 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4681
4682 ; --------------------------------------------------------------------
4683 ; llvm.amdgcn.image.sample.o
4684 ; --------------------------------------------------------------------
4685
4686 define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4687 ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
4688 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4689 ; CHECK-NEXT:    ret float [[DATA]]
4690 ;
4691   %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4692   %elt0 = extractelement <4 x float> %data, i32 0
4693   ret float %elt0
4694 }
4695
4696 declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4697
4698 ; --------------------------------------------------------------------
4699 ; llvm.amdgcn.image.sample.cl.o
4700 ; --------------------------------------------------------------------
4701
4702 define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4703 ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
4704 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4705 ; CHECK-NEXT:    ret float [[DATA]]
4706 ;
4707   %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4708   %elt0 = extractelement <4 x float> %data, i32 0
4709   ret float %elt0
4710 }
4711
4712 declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4713
4714 ; --------------------------------------------------------------------
4715 ; llvm.amdgcn.image.sample.d.o
4716 ; --------------------------------------------------------------------
4717
4718 define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4719 ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
4720 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4721 ; CHECK-NEXT:    ret float [[DATA]]
4722 ;
4723   %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4724   %elt0 = extractelement <4 x float> %data, i32 0
4725   ret float %elt0
4726 }
4727
4728 declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4729
4730 ; --------------------------------------------------------------------
4731 ; llvm.amdgcn.image.sample.d.cl.o
4732 ; --------------------------------------------------------------------
4733
4734 define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4735 ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
4736 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4737 ; CHECK-NEXT:    ret float [[DATA]]
4738 ;
4739   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4740   %elt0 = extractelement <4 x float> %data, i32 0
4741   ret float %elt0
4742 }
4743
4744 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4745
4746 ; --------------------------------------------------------------------
4747 ; llvm.amdgcn.image.sample.l.o
4748 ; --------------------------------------------------------------------
4749
4750 define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4751 ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
4752 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4753 ; CHECK-NEXT:    ret float [[DATA]]
4754 ;
4755   %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4756   %elt0 = extractelement <4 x float> %data, i32 0
4757   ret float %elt0
4758 }
4759
4760 declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4761
4762 ; --------------------------------------------------------------------
4763 ; llvm.amdgcn.image.sample.b.o
4764 ; --------------------------------------------------------------------
4765
4766 define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4767 ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
4768 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4769 ; CHECK-NEXT:    ret float [[DATA]]
4770 ;
4771   %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4772   %elt0 = extractelement <4 x float> %data, i32 0
4773   ret float %elt0
4774 }
4775
4776 declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4777
4778 ; --------------------------------------------------------------------
4779 ; llvm.amdgcn.image.sample.b.cl.o
4780 ; --------------------------------------------------------------------
4781
4782 define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4783 ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
4784 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4785 ; CHECK-NEXT:    ret float [[DATA]]
4786 ;
4787   %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4788   %elt0 = extractelement <4 x float> %data, i32 0
4789   ret float %elt0
4790 }
4791
4792 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4793
4794 ; --------------------------------------------------------------------
4795 ; llvm.amdgcn.image.sample.lz.o
4796 ; --------------------------------------------------------------------
4797
4798 define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4799 ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
4800 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4801 ; CHECK-NEXT:    ret float [[DATA]]
4802 ;
4803   %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4804   %elt0 = extractelement <4 x float> %data, i32 0
4805   ret float %elt0
4806 }
4807
4808 declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4809
4810 ; --------------------------------------------------------------------
4811 ; llvm.amdgcn.image.sample.cd.o
4812 ; --------------------------------------------------------------------
4813
4814 define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4815 ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
4816 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4817 ; CHECK-NEXT:    ret float [[DATA]]
4818 ;
4819   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4820   %elt0 = extractelement <4 x float> %data, i32 0
4821   ret float %elt0
4822 }
4823
4824 declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4825
4826 ; --------------------------------------------------------------------
4827 ; llvm.amdgcn.image.sample.cd.cl.o
4828 ; --------------------------------------------------------------------
4829
4830 define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4831 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
4832 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4833 ; CHECK-NEXT:    ret float [[DATA]]
4834 ;
4835   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4836   %elt0 = extractelement <4 x float> %data, i32 0
4837   ret float %elt0
4838 }
4839
4840 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4841
4842 ; --------------------------------------------------------------------
4843 ; llvm.amdgcn.image.sample.c.o
4844 ; --------------------------------------------------------------------
4845
4846 define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4847 ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
4848 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4849 ; CHECK-NEXT:    ret float [[DATA]]
4850 ;
4851   %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4852   %elt0 = extractelement <4 x float> %data, i32 0
4853   ret float %elt0
4854 }
4855
4856 declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4857
4858 ; --------------------------------------------------------------------
4859 ; llvm.amdgcn.image.sample.c.cl.o
4860 ; --------------------------------------------------------------------
4861
4862 define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4863 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
4864 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4865 ; CHECK-NEXT:    ret float [[DATA]]
4866 ;
4867   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4868   %elt0 = extractelement <4 x float> %data, i32 0
4869   ret float %elt0
4870 }
4871
4872 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4873
4874 ; --------------------------------------------------------------------
4875 ; llvm.amdgcn.image.sample.c.d.o
4876 ; --------------------------------------------------------------------
4877
4878 define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4879 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
4880 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4881 ; CHECK-NEXT:    ret float [[DATA]]
4882 ;
4883   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4884   %elt0 = extractelement <4 x float> %data, i32 0
4885   ret float %elt0
4886 }
4887
4888 declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4889
4890 ; --------------------------------------------------------------------
4891 ; llvm.amdgcn.image.sample.c.d.cl.o
4892 ; --------------------------------------------------------------------
4893
4894 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4895 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
4896 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4897 ; CHECK-NEXT:    ret float [[DATA]]
4898 ;
4899   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4900   %elt0 = extractelement <4 x float> %data, i32 0
4901   ret float %elt0
4902 }
4903
4904 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4905
4906 ; --------------------------------------------------------------------
4907 ; llvm.amdgcn.image.sample.c.l.o
4908 ; --------------------------------------------------------------------
4909
4910 define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4911 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
4912 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4913 ; CHECK-NEXT:    ret float [[DATA]]
4914 ;
4915   %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4916   %elt0 = extractelement <4 x float> %data, i32 0
4917   ret float %elt0
4918 }
4919
4920 declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4921
4922 ; --------------------------------------------------------------------
4923 ; llvm.amdgcn.image.sample.c.b.o
4924 ; --------------------------------------------------------------------
4925
4926 define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4927 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
4928 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4929 ; CHECK-NEXT:    ret float [[DATA]]
4930 ;
4931   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4932   %elt0 = extractelement <4 x float> %data, i32 0
4933   ret float %elt0
4934 }
4935
4936 declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4937
4938 ; --------------------------------------------------------------------
4939 ; llvm.amdgcn.image.sample.c.b.cl.o
4940 ; --------------------------------------------------------------------
4941
4942 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4943 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
4944 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4945 ; CHECK-NEXT:    ret float [[DATA]]
4946 ;
4947   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4948   %elt0 = extractelement <4 x float> %data, i32 0
4949   ret float %elt0
4950 }
4951
4952 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4953
4954 ; --------------------------------------------------------------------
4955 ; llvm.amdgcn.image.sample.c.lz.o
4956 ; --------------------------------------------------------------------
4957
4958 define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4959 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
4960 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4961 ; CHECK-NEXT:    ret float [[DATA]]
4962 ;
4963   %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4964   %elt0 = extractelement <4 x float> %data, i32 0
4965   ret float %elt0
4966 }
4967
4968 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4969
4970 ; --------------------------------------------------------------------
4971 ; llvm.amdgcn.image.sample.c.cd.o
4972 ; --------------------------------------------------------------------
4973
4974 define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4975 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
4976 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4977 ; CHECK-NEXT:    ret float [[DATA]]
4978 ;
4979   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4980   %elt0 = extractelement <4 x float> %data, i32 0
4981   ret float %elt0
4982 }
4983
4984 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4985
4986 ; --------------------------------------------------------------------
4987 ; llvm.amdgcn.image.sample.c.cd.cl.o
4988 ; --------------------------------------------------------------------
4989
4990 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4991 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
4992 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4993 ; CHECK-NEXT:    ret float [[DATA]]
4994 ;
4995   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4996   %elt0 = extractelement <4 x float> %data, i32 0
4997   ret float %elt0
4998 }
4999
5000 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5001
5002 ; --------------------------------------------------------------------
5003 ; llvm.amdgcn.image.gather4
5004 ; --------------------------------------------------------------------
5005
5006 ; Don't handle gather4*
5007
5008 define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5009 ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
5010 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5011 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5012 ; CHECK-NEXT:    ret float [[ELT0]]
5013 ;
5014   %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5015   %elt0 = extractelement <4 x float> %data, i32 0
5016   ret float %elt0
5017 }
5018
5019 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5020
5021 ; --------------------------------------------------------------------
5022 ; llvm.amdgcn.image.gather4.cl
5023 ; --------------------------------------------------------------------
5024
5025 define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5026 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
5027 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5028 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5029 ; CHECK-NEXT:    ret float [[ELT0]]
5030 ;
5031   %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5032   %elt0 = extractelement <4 x float> %data, i32 0
5033   ret float %elt0
5034 }
5035
5036 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5037
5038 ; --------------------------------------------------------------------
5039 ; llvm.amdgcn.image.gather4.l
5040 ; --------------------------------------------------------------------
5041
5042 define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5043 ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
5044 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5045 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5046 ; CHECK-NEXT:    ret float [[ELT0]]
5047 ;
5048   %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5049   %elt0 = extractelement <4 x float> %data, i32 0
5050   ret float %elt0
5051 }
5052
5053 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5054
5055 ; --------------------------------------------------------------------
5056 ; llvm.amdgcn.image.gather4.b
5057 ; --------------------------------------------------------------------
5058
5059 define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5060 ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
5061 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5062 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5063 ; CHECK-NEXT:    ret float [[ELT0]]
5064 ;
5065   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5066   %elt0 = extractelement <4 x float> %data, i32 0
5067   ret float %elt0
5068 }
5069
5070 declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5071
5072 ; --------------------------------------------------------------------
5073 ; llvm.amdgcn.image.gather4.b.cl
5074 ; --------------------------------------------------------------------
5075
5076 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5077 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
5078 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5079 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5080 ; CHECK-NEXT:    ret float [[ELT0]]
5081 ;
5082   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5083   %elt0 = extractelement <4 x float> %data, i32 0
5084   ret float %elt0
5085 }
5086
5087 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5088
5089 ; --------------------------------------------------------------------
5090 ; llvm.amdgcn.image.gather4.lz
5091 ; --------------------------------------------------------------------
5092
5093 define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5094 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
5095 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5096 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5097 ; CHECK-NEXT:    ret float [[ELT0]]
5098 ;
5099   %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5100   %elt0 = extractelement <4 x float> %data, i32 0
5101   ret float %elt0
5102 }
5103
5104 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5105
5106 ; --------------------------------------------------------------------
5107 ; llvm.amdgcn.image.gather4.o
5108 ; --------------------------------------------------------------------
5109
5110 define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5111 ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
5112 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5113 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5114 ; CHECK-NEXT:    ret float [[ELT0]]
5115 ;
5116   %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5117   %elt0 = extractelement <4 x float> %data, i32 0
5118   ret float %elt0
5119 }
5120
5121 declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5122
5123 ; --------------------------------------------------------------------
5124 ; llvm.amdgcn.image.gather4.cl.o
5125 ; --------------------------------------------------------------------
5126
5127 define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5128 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
5129 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5130 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5131 ; CHECK-NEXT:    ret float [[ELT0]]
5132 ;
5133   %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5134   %elt0 = extractelement <4 x float> %data, i32 0
5135   ret float %elt0
5136 }
5137
5138 declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5139
5140 ; --------------------------------------------------------------------
5141 ; llvm.amdgcn.image.gather4.l.o
5142 ; --------------------------------------------------------------------
5143
5144 define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5145 ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
5146 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5147 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5148 ; CHECK-NEXT:    ret float [[ELT0]]
5149 ;
5150   %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5151   %elt0 = extractelement <4 x float> %data, i32 0
5152   ret float %elt0
5153 }
5154
5155 declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5156
5157 ; --------------------------------------------------------------------
5158 ; llvm.amdgcn.image.gather4.b.o
5159 ; --------------------------------------------------------------------
5160
5161 define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5162 ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
5163 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5164 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5165 ; CHECK-NEXT:    ret float [[ELT0]]
5166 ;
5167   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5168   %elt0 = extractelement <4 x float> %data, i32 0
5169   ret float %elt0
5170 }
5171
5172 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5173
5174 ; --------------------------------------------------------------------
5175 ; llvm.amdgcn.image.gather4.b.cl.o
5176 ; --------------------------------------------------------------------
5177
5178 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5179 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
5180 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5181 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5182 ; CHECK-NEXT:    ret float [[ELT0]]
5183 ;
5184   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5185   %elt0 = extractelement <4 x float> %data, i32 0
5186   ret float %elt0
5187 }
5188
5189 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5190
5191 ; --------------------------------------------------------------------
5192 ; llvm.amdgcn.image.gather4.lz.o
5193 ; --------------------------------------------------------------------
5194
5195 define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5196 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
5197 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5198 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5199 ; CHECK-NEXT:    ret float [[ELT0]]
5200 ;
5201   %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5202   %elt0 = extractelement <4 x float> %data, i32 0
5203   ret float %elt0
5204 }
5205
5206 declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5207
5208 ; --------------------------------------------------------------------
5209 ; llvm.amdgcn.image.gather4.c.o
5210 ; --------------------------------------------------------------------
5211
5212 define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5213 ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
5214 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5215 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5216 ; CHECK-NEXT:    ret float [[ELT0]]
5217 ;
5218   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5219   %elt0 = extractelement <4 x float> %data, i32 0
5220   ret float %elt0
5221 }
5222
5223 declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5224
5225 ; --------------------------------------------------------------------
5226 ; llvm.amdgcn.image.gather4.c.cl.o
5227 ; --------------------------------------------------------------------
5228
5229 define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5230 ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
5231 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5232 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5233 ; CHECK-NEXT:    ret float [[ELT0]]
5234 ;
5235   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5236   %elt0 = extractelement <4 x float> %data, i32 0
5237   ret float %elt0
5238 }
5239
5240 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5241
5242 ; --------------------------------------------------------------------
5243 ; llvm.amdgcn.image.gather4.c.l.o
5244 ; --------------------------------------------------------------------
5245
5246 define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5247 ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
5248 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5249 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5250 ; CHECK-NEXT:    ret float [[ELT0]]
5251 ;
5252   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5253   %elt0 = extractelement <4 x float> %data, i32 0
5254   ret float %elt0
5255 }
5256
5257 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5258
5259 ; --------------------------------------------------------------------
5260 ; llvm.amdgcn.image.gather4.c.b.o
5261 ; --------------------------------------------------------------------
5262
5263 define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5264 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
5265 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5266 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5267 ; CHECK-NEXT:    ret float [[ELT0]]
5268 ;
5269   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5270   %elt0 = extractelement <4 x float> %data, i32 0
5271   ret float %elt0
5272 }
5273
5274 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5275
5276 ; --------------------------------------------------------------------
5277 ; llvm.amdgcn.image.gather4.c.b.cl.o
5278 ; --------------------------------------------------------------------
5279
5280 define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5281 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
5282 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5283 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5284 ; CHECK-NEXT:    ret float [[ELT0]]
5285 ;
5286   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5287   %elt0 = extractelement <4 x float> %data, i32 0
5288   ret float %elt0
5289 }
5290
5291 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5292
5293 ; --------------------------------------------------------------------
5294 ; llvm.amdgcn.image.gather4.c.lz.o
5295 ; --------------------------------------------------------------------
5296
5297 define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5298 ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
5299 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5300 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5301 ; CHECK-NEXT:    ret float [[ELT0]]
5302 ;
5303   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5304   %elt0 = extractelement <4 x float> %data, i32 0
5305   ret float %elt0
5306 }
5307
5308 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5309
5310 ; --------------------------------------------------------------------
5311 ; llvm.amdgcn.image.getlod
5312 ; --------------------------------------------------------------------
5313
5314 define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5315 ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
5316 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5317 ; CHECK-NEXT:    ret float [[DATA]]
5318 ;
5319   %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5320   %elt0 = extractelement <4 x float> %data, i32 0
5321   ret float %elt0
5322 }
5323
5324 declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5325
5326 ; --------------------------------------------------------------------
5327 ; llvm.amdgcn.image.load
5328 ; --------------------------------------------------------------------
5329
5330 define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
5331 ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
5332 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5333 ; CHECK-NEXT:    ret float [[DATA]]
5334 ;
5335   %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
5336   %elt0 = extractelement <4 x float> %data, i32 0
5337   ret float %elt0
5338 }
5339
5340 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
5341
5342 ; --------------------------------------------------------------------
5343 ; llvm.amdgcn.image.load.mip
5344 ; --------------------------------------------------------------------
5345
5346 define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
5347 ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
5348 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5349 ; CHECK-NEXT:    ret float [[DATA]]
5350 ;
5351   %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5352   %elt0 = extractelement <4 x float> %data, i32 0
5353   ret float %elt0
5354 }
5355
5356 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1
5357
5358 ; --------------------------------------------------------------------
5359 ; llvm.amdgcn.image.getresinfo
5360 ; --------------------------------------------------------------------
5361
5362 define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
5363 ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
5364 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5365 ; CHECK-NEXT:    ret float [[DATA]]
5366 ;
5367   %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5368   %elt0 = extractelement <4 x float> %data, i32 0
5369   ret float %elt0
5370 }
5371
5372 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5373
5374 ; --------------------------------------------------------------------
5375 ; TFE / LWE
5376 ; --------------------------------------------------------------------
5377
5378 define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
5379 ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
5380 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1)
5381 ; CHECK-NEXT:    [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
5382 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0
5383 ; CHECK-NEXT:    ret float [[ELT0]]
5384 ;
5385   %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
5386   %rgba = extractvalue { <4 x float>, i32 } %data, 0
5387   %elt0 = extractelement <4 x float> %rgba, i32 0
5388   ret float %elt0
5389 }
5390
5391 declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5392
5393 define amdgpu_hs float @tfe_check_assert() #0 {
5394 ; CHECK-LABEL: @tfe_check_assert(
5395 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1)
5396 ; CHECK-NEXT:    ret float [[DATA]]
5397 ;
5398   %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2
5399   %elt0 = extractelement <4 x float> %data, i32 0
5400   ret float %elt0
5401 }
5402
5403 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
5404
5405 attributes #0 = { nounwind }
5406 attributes #1 = { nounwind readonly }
5407
5408 !0 = !{float 2.500000e+00}