llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s
   3
   4 ; --------------------------------------------------------------------
   5 ; llvm.amdgcn.raw.buffer.load
   6 ; --------------------------------------------------------------------
   7
   8 define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   9 ; CHECK-LABEL: @raw_buffer_load_f32(
  10 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  11 ; CHECK-NEXT:    ret float [[DATA]]
  12 ;
  13   %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  14   ret float %data
  15 }
  16
  17 define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  18 ; CHECK-LABEL: @raw_buffer_load_v1f32(
  19 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  20 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
  21 ;
  22   %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  23   ret <1 x float> %data
  24 }
  25
  26 define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  27 ; CHECK-LABEL: @raw_buffer_load_v2f32(
  28 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  29 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
  30 ;
  31   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  32   ret <2 x float> %data
  33 }
  34
  35 define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  36 ; CHECK-LABEL: @raw_buffer_load_v4f32(
  37 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  38 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
  39 ;
  40   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  41   ret <4 x float> %data
  42 }
  43
  44 define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  45 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
  46 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  47 ; CHECK-NEXT:    ret float [[DATA]]
  48 ;
  49   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  50   %elt0 = extractelement <2 x float> %data, i32 0
  51   ret float %elt0
  52 }
  53
  54 define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  55 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
  56 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
  57 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  58 ; CHECK-NEXT:    ret float [[DATA]]
  59 ;
  60   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  61   %elt1 = extractelement <2 x float> %data, i32 1
  62   ret float %elt1
  63 }
  64
  65 define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  66 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
  67 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
  68 ; CHECK-NEXT:    ret float [[DATA]]
  69 ;
  70   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  71   %elt0 = extractelement <4 x float> %data, i32 0
  72   ret float %elt0
  73 }
  74
  75 define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  76 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
  77 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
  78 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  79 ; CHECK-NEXT:    ret float [[DATA]]
  80 ;
  81   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  82   %elt1 = extractelement <4 x float> %data, i32 1
  83   ret float %elt1
  84 }
  85
  86 define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  87 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
  88 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
  89 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
  90 ; CHECK-NEXT:    ret float [[DATA]]
  91 ;
  92   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
  93   %elt1 = extractelement <4 x float> %data, i32 2
  94   ret float %elt1
  95 }
  96
  97 define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
  98 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
  99 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 100 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 101 ; CHECK-NEXT:    ret float [[DATA]]
 102 ;
 103   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 104   %elt1 = extractelement <4 x float> %data, i32 3
 105   ret float %elt1
 106 }
 107
 108 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 109 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
 110 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 111 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 112 ;
 113   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 114   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 115   ret <2 x float> %shuf
 116 }
 117
 118 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 119 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
 120 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 121 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 122 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 123 ;
 124   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 125   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 126   ret <2 x float> %shuf
 127 }
 128
 129 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 130 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
 131 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 132 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 133 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 134 ;
 135   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 136   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 137   ret <2 x float> %shuf
 138 }
 139
 140 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 141 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
 142 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 143 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 144 ;
 145   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 146   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 147   ret <3 x float> %shuf
 148 }
 149
 150 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 151 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
 152 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 153 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 154 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 155 ;
 156   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 157   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 158   ret <3 x float> %shuf
 159 }
 160
 161 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 162 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
 163 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 164 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 165 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 166 ;
 167   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 168   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 169   ret <3 x float> %shuf
 170 }
 171
 172 define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 173 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
 174 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 175 ; CHECK-NEXT:    ret float [[DATA]]
 176 ;
 177   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 178   %elt0 = extractelement <3 x float> %data, i32 0
 179   ret float %elt0
 180 }
 181
 182 define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 183 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
 184 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 185 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 186 ; CHECK-NEXT:    ret float [[DATA]]
 187 ;
 188   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 189   %elt1 = extractelement <3 x float> %data, i32 1
 190   ret float %elt1
 191 }
 192
 193 define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 194 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
 195 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 196 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 197 ; CHECK-NEXT:    ret float [[DATA]]
 198 ;
 199   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 200   %elt1 = extractelement <3 x float> %data, i32 2
 201   ret float %elt1
 202 }
 203
 204 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 205 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
 206 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 207 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 208 ;
 209   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 210   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 211   ret <2 x float> %shuf
 212 }
 213
 214 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 215 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
 216 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 217 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 218 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 219 ;
 220   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 221   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 222   ret <2 x float> %shuf
 223 }
 224
 225 define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 226 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
 227 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 228 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
 229 ; CHECK-NEXT:    ret i32 [[VAR2]]
 230 ;
 231   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 232   %var1 = bitcast <4 x float> %var to <4 x i32>
 233   %var2 = extractelement <4 x i32> %var1, i32 0
 234   ret i32 %var2
 235 }
 236
 237 define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 238 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
 239 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 240 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
 241 ; CHECK-NEXT:    ret float [[VAR2]]
 242 ;
 243   %var = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 244   %var1 = bitcast <4 x i32> %var to <4 x float>
 245   %var2 = extractelement <4 x float> %var1, i32 0
 246   ret float %var2
 247 }
 248
 249 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 250 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
 251 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0:![0-9]+]]
 252 ; CHECK-NEXT:    ret float [[DATA]]
 253 ;
 254   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 255   %elt0 = extractelement <2 x float> %data, i32 0
 256   ret float %elt0
 257 }
 258
 259 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1
 260 declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1
 261 declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1
 262 declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1
 263 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
 264
 265 declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1
 266
 267 define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 268 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16(
 269 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 270 ; CHECK-NEXT:    ret half [[DATA]]
 271 ;
 272   %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 273   %elt0 = extractelement <2 x half> %data, i32 0
 274   ret half %elt0
 275 }
 276
 277 define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 278 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16(
 279 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 280 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 281 ; CHECK-NEXT:    ret half [[DATA]]
 282 ;
 283   %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 284   %elt1 = extractelement <2 x half> %data, i32 1
 285   ret half %elt1
 286 }
 287
 288 define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 289 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16(
 290 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 291 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 292 ; CHECK-NEXT:    ret half [[DATA]]
 293 ;
 294   %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 295   %elt0 = extractelement <3 x half> %data, i32 1
 296   ret half %elt0
 297 }
 298
 299 define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 300 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16(
 301 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 302 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 303 ; CHECK-NEXT:    ret half [[DATA]]
 304 ;
 305   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 306   %elt1 = extractelement <4 x half> %data, i32 1
 307   ret half %elt1
 308 }
 309
 310 define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 311 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16(
 312 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
 313 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 314 ; CHECK-NEXT:    ret half [[DATA]]
 315 ;
 316   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 317   %elt1 = extractelement <4 x half> %data, i32 3
 318   ret half %elt1
 319 }
 320
 321 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 322 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16(
 323 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 324 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
 325 ;
 326   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 327   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
 328   ret <2 x half> %shuf
 329 }
 330
 331 declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1
 332 declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1
 333 declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1
 334 declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1
 335
 336 define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 337 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8(
 338 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 339 ; CHECK-NEXT:    ret i8 [[DATA]]
 340 ;
 341   %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 342   %elt0 = extractelement <2 x i8> %data, i32 0
 343   ret i8 %elt0
 344 }
 345
 346 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 347 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8(
 348 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 349 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 350 ; CHECK-NEXT:    ret i8 [[DATA]]
 351 ;
 352   %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 353   %elt1 = extractelement <2 x i8> %data, i32 1
 354   ret i8 %elt1
 355 }
 356
 357 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 358 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8(
 359 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 360 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 361 ; CHECK-NEXT:    ret i8 [[DATA]]
 362 ;
 363   %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 364   %elt0 = extractelement <3 x i8> %data, i32 1
 365   ret i8 %elt0
 366 }
 367
 368 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 369 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8(
 370 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 371 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 372 ; CHECK-NEXT:    ret i8 [[DATA]]
 373 ;
 374   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 375   %elt1 = extractelement <4 x i8> %data, i32 1
 376   ret i8 %elt1
 377 }
 378
 379 define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 380 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8(
 381 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
 382 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 383 ; CHECK-NEXT:    ret i8 [[DATA]]
 384 ;
 385   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 386   %elt1 = extractelement <4 x i8> %data, i32 3
 387   ret i8 %elt1
 388 }
 389
 390 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 391 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8(
 392 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 393 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
 394 ;
 395   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
 396   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
 397   ret <2 x i8> %shuf
 398 }
 399
 400 declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1
 401 declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1
 402 declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1
 403 declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1
 404
 405 ; --------------------------------------------------------------------
 406 ; llvm.amdgcn.raw.ptr.buffer.load
 407 ; --------------------------------------------------------------------
 408
 409 define amdgpu_ps float @raw_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 410 ; CHECK-LABEL: @raw_ptr_buffer_load_f32(
 411 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 412 ; CHECK-NEXT:    ret float [[DATA]]
 413 ;
 414   %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 415   ret float %data
 416 }
 417
 418 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 419 ; CHECK-LABEL: @raw_ptr_buffer_load_v1f32(
 420 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 421 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
 422 ;
 423   %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 424   ret <1 x float> %data
 425 }
 426
 427 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 428 ; CHECK-LABEL: @raw_ptr_buffer_load_v2f32(
 429 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 430 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 431 ;
 432   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 433   ret <2 x float> %data
 434 }
 435
 436 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 437 ; CHECK-LABEL: @raw_ptr_buffer_load_v4f32(
 438 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 439 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
 440 ;
 441   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 442   ret <4 x float> %data
 443 }
 444
 445 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 446 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f32(
 447 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 448 ; CHECK-NEXT:    ret float [[DATA]]
 449 ;
 450   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 451   %elt0 = extractelement <2 x float> %data, i32 0
 452   ret float %elt0
 453 }
 454
 455 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 456 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f32(
 457 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 458 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 459 ; CHECK-NEXT:    ret float [[DATA]]
 460 ;
 461   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 462   %elt1 = extractelement <2 x float> %data, i32 1
 463   ret float %elt1
 464 }
 465
 466 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 467 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v4f32(
 468 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 469 ; CHECK-NEXT:    ret float [[DATA]]
 470 ;
 471   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 472   %elt0 = extractelement <4 x float> %data, i32 0
 473   ret float %elt0
 474 }
 475
 476 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 477 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f32(
 478 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 479 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 480 ; CHECK-NEXT:    ret float [[DATA]]
 481 ;
 482   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 483   %elt1 = extractelement <4 x float> %data, i32 1
 484   ret float %elt1
 485 }
 486
 487 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 488 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v4f32(
 489 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 490 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 491 ; CHECK-NEXT:    ret float [[DATA]]
 492 ;
 493   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 494   %elt1 = extractelement <4 x float> %data, i32 2
 495   ret float %elt1
 496 }
 497
 498 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 499 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f32(
 500 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 501 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 502 ; CHECK-NEXT:    ret float [[DATA]]
 503 ;
 504   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 505   %elt1 = extractelement <4 x float> %data, i32 3
 506   ret float %elt1
 507 }
 508
 509 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 510 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(
 511 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 512 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 513 ;
 514   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 515   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 516   ret <2 x float> %shuf
 517 }
 518
 519 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 520 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(
 521 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 522 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 523 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 524 ;
 525   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 526   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 527   ret <2 x float> %shuf
 528 }
 529
 530 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 531 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(
 532 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 533 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 534 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 535 ;
 536   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 537   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 538   ret <2 x float> %shuf
 539 }
 540
 541 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 542 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(
 543 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 544 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 545 ;
 546   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 547   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 548   ret <3 x float> %shuf
 549 }
 550
 551 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 552 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(
 553 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 554 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 555 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 556 ;
 557   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 558   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 559   ret <3 x float> %shuf
 560 }
 561
 562 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 563 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(
 564 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 565 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 566 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 567 ;
 568   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 569   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 570   ret <3 x float> %shuf
 571 }
 572
 573 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 574 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v3f32(
 575 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 576 ; CHECK-NEXT:    ret float [[DATA]]
 577 ;
 578   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 579   %elt0 = extractelement <3 x float> %data, i32 0
 580   ret float %elt0
 581 }
 582
 583 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 584 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f32(
 585 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 586 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 587 ; CHECK-NEXT:    ret float [[DATA]]
 588 ;
 589   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 590   %elt1 = extractelement <3 x float> %data, i32 1
 591   ret float %elt1
 592 }
 593
 594 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 595 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v3f32(
 596 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 597 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 598 ; CHECK-NEXT:    ret float [[DATA]]
 599 ;
 600   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 601   %elt1 = extractelement <3 x float> %data, i32 2
 602   ret float %elt1
 603 }
 604
 605 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 606 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(
 607 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 608 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 609 ;
 610   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 611   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 612   ret <2 x float> %shuf
 613 }
 614
 615 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 616 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(
 617 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 618 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 619 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 620 ;
 621   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 622   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 623   ret <2 x float> %shuf
 624 }
 625
 626 define i32 @extract0_bitcast_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 627 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4f32(
 628 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 629 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
 630 ; CHECK-NEXT:    ret i32 [[VAR2]]
 631 ;
 632   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 633   %var1 = bitcast <4 x float> %var to <4 x i32>
 634   %var2 = extractelement <4 x i32> %var1, i32 0
 635   ret i32 %var2
 636 }
 637
 638 define float @extract0_bitcast_raw_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 639 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4i32(
 640 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 641 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
 642 ; CHECK-NEXT:    ret float [[VAR2]]
 643 ;
 644   %var = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 645   %var1 = bitcast <4 x i32> %var to <4 x float>
 646   %var2 = extractelement <4 x float> %var1, i32 0
 647   ret float %var2
 648 }
 649
 650 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 651 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(
 652 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
 653 ; CHECK-NEXT:    ret float [[DATA]]
 654 ;
 655   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
 656   %elt0 = extractelement <2 x float> %data, i32 0
 657   ret float %elt0
 658 }
 659
 660 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #1
 661 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32) #1
 662 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #1
 663 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32) #1
 664 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
 665
 666 declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #1
 667
 668 define amdgpu_ps half @extract_elt0_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 669 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f16(
 670 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 671 ; CHECK-NEXT:    ret half [[DATA]]
 672 ;
 673   %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 674   %elt0 = extractelement <2 x half> %data, i32 0
 675   ret half %elt0
 676 }
 677
 678 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 679 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f16(
 680 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 681 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 682 ; CHECK-NEXT:    ret half [[DATA]]
 683 ;
 684   %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 685   %elt1 = extractelement <2 x half> %data, i32 1
 686   ret half %elt1
 687 }
 688
 689 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 690 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f16(
 691 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 692 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 693 ; CHECK-NEXT:    ret half [[DATA]]
 694 ;
 695   %data = call <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 696   %elt0 = extractelement <3 x half> %data, i32 1
 697   ret half %elt0
 698 }
 699
 700 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 701 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f16(
 702 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
 703 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 704 ; CHECK-NEXT:    ret half [[DATA]]
 705 ;
 706   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 707   %elt1 = extractelement <4 x half> %data, i32 1
 708   ret half %elt1
 709 }
 710
 711 define amdgpu_ps half @extract_elt3_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 712 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f16(
 713 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
 714 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 715 ; CHECK-NEXT:    ret half [[DATA]]
 716 ;
 717   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 718   %elt1 = extractelement <4 x half> %data, i32 3
 719   ret half %elt1
 720 }
 721
 722 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 723 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(
 724 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 725 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
 726 ;
 727   %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 728   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
 729   ret <2 x half> %shuf
 730 }
 731
 732 declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #1
 733 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #1
 734 declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32) #1
 735 declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #1
 736
 737 define amdgpu_ps i8 @extract_elt0_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 738 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2i8(
 739 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 740 ; CHECK-NEXT:    ret i8 [[DATA]]
 741 ;
 742   %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 743   %elt0 = extractelement <2 x i8> %data, i32 0
 744   ret i8 %elt0
 745 }
 746
 747 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 748 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2i8(
 749 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 750 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 751 ; CHECK-NEXT:    ret i8 [[DATA]]
 752 ;
 753   %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 754   %elt1 = extractelement <2 x i8> %data, i32 1
 755   ret i8 %elt1
 756 }
 757
 758 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 759 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3i8(
 760 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 761 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 762 ; CHECK-NEXT:    ret i8 [[DATA]]
 763 ;
 764   %data = call <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 765   %elt0 = extractelement <3 x i8> %data, i32 1
 766   ret i8 %elt0
 767 }
 768
 769 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 770 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4i8(
 771 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
 772 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 773 ; CHECK-NEXT:    ret i8 [[DATA]]
 774 ;
 775   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 776   %elt1 = extractelement <4 x i8> %data, i32 1
 777   ret i8 %elt1
 778 }
 779
 780 define amdgpu_ps i8 @extract_elt3_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 781 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4i8(
 782 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
 783 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
 784 ; CHECK-NEXT:    ret i8 [[DATA]]
 785 ;
 786   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 787   %elt1 = extractelement <4 x i8> %data, i32 3
 788   ret i8 %elt1
 789 }
 790
 791 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
 792 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(
 793 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
 794 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
 795 ;
 796   %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
 797   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
 798   ret <2 x i8> %shuf
 799 }
 800
 801 declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #1
 802 declare <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32) #1
 803 declare <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32) #1
 804 declare <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32) #1
 805
 806 ; --------------------------------------------------------------------
 807 ; llvm.amdgcn.s.buffer.load
 808 ; --------------------------------------------------------------------
 809
 810 define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 811 ; CHECK-LABEL: @s_buffer_load_f32(
 812 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 813 ; CHECK-NEXT:    ret float [[DATA]]
 814 ;
 815   %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 816   ret float %data
 817 }
 818
 819 define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 820 ; CHECK-LABEL: @s_buffer_load_v2f32(
 821 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 822 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 823 ;
 824   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 825   ret <2 x float> %data
 826 }
 827
 828 define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 829 ; CHECK-LABEL: @s_buffer_load_v4f32(
 830 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 831 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
 832 ;
 833   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 834   ret <4 x float> %data
 835 }
 836
 837 define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 838 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32(
 839 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 840 ; CHECK-NEXT:    ret float [[DATA]]
 841 ;
 842   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 843   %elt0 = extractelement <2 x float> %data, i32 0
 844   ret float %elt0
 845 }
 846
 847 define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 848 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32(
 849 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 850 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 851 ; CHECK-NEXT:    ret float [[DATA]]
 852 ;
 853   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 854   %elt1 = extractelement <2 x float> %data, i32 1
 855   ret float %elt1
 856 }
 857
 858 define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 859 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32(
 860 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 861 ; CHECK-NEXT:    ret float [[DATA]]
 862 ;
 863   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 864   %elt0 = extractelement <4 x float> %data, i32 0
 865   ret float %elt0
 866 }
 867
 868 define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 869 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32(
 870 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 871 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 872 ; CHECK-NEXT:    ret float [[DATA]]
 873 ;
 874   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 875   %elt1 = extractelement <4 x float> %data, i32 1
 876   ret float %elt1
 877 }
 878
 879 define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 880 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32(
 881 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 882 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 883 ; CHECK-NEXT:    ret float [[DATA]]
 884 ;
 885   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 886   %elt1 = extractelement <4 x float> %data, i32 2
 887   ret float %elt1
 888 }
 889
 890 define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 891 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32(
 892 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
 893 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 894 ; CHECK-NEXT:    ret float [[DATA]]
 895 ;
 896   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 897   %elt1 = extractelement <4 x float> %data, i32 3
 898   ret float %elt1
 899 }
 900
 901 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 902 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32(
 903 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 904 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 905 ;
 906   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 907   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
 908   ret <2 x float> %shuf
 909 }
 910
 911 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 912 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32(
 913 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 914 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 915 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 916 ;
 917   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 918   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
 919   ret <2 x float> %shuf
 920 }
 921
 922 define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 923 ; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32(
 924 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 925 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 926 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 927 ;
 928   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 929   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 930   ret <2 x float> %shuf
 931 }
 932
 933 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 934 ; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32(
 935 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 936 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
 937 ;
 938   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 939   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 940   ret <3 x float> %shuf
 941 }
 942
 943 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 944 ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32(
 945 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 946 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 947 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
 948 ;
 949   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 950   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 951   ret <3 x float> %shuf
 952 }
 953
 954 define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 955 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32(
 956 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 957 ; CHECK-NEXT:    ret float [[DATA]]
 958 ;
 959   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 960   %elt0 = extractelement <3 x float> %data, i32 0
 961   ret float %elt0
 962 }
 963
 964 define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 965 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32(
 966 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 967 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 968 ; CHECK-NEXT:    ret float [[DATA]]
 969 ;
 970   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 971   %elt1 = extractelement <3 x float> %data, i32 1
 972   ret float %elt1
 973 }
 974
 975 define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 976 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32(
 977 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
 978 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
 979 ; CHECK-NEXT:    ret float [[DATA]]
 980 ;
 981   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 982   %elt1 = extractelement <3 x float> %data, i32 2
 983   ret float %elt1
 984 }
 985
 986 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 987 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32(
 988 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
 989 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
 990 ;
 991   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
 992   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 993   ret <2 x float> %shuf
 994 }
 995
 996 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
 997 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32(
 998 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
 999 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1000 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1001 ;
1002   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1003   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1004   ret <2 x float> %shuf
1005 }
1006
1007 ; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened
1008 ; to vec4 anyway during lowering.
1009 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1010 ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32(
1011 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1012 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1013 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1014 ;
1015   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1016   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1017   ret <3 x float> %shuf
1018 }
1019
1020 define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1021 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32(
1022 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1023 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1024 ; CHECK-NEXT:    ret i32 [[VAR2]]
1025 ;
1026   %var = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1027   %var1 = bitcast <4 x float> %var to <4 x i32>
1028   %var2 = extractelement <4 x i32> %var1, i32 0
1029   ret i32 %var2
1030 }
1031
1032 define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1033 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32(
1034 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1035 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1036 ; CHECK-NEXT:    ret float [[VAR2]]
1037 ;
1038   %var = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1039   %var1 = bitcast <4 x i32> %var to <4 x float>
1040   %var2 = extractelement <4 x float> %var1, i32 0
1041   ret float %var2
1042 }
1043
1044 define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1045 ; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32(
1046 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0), !fpmath [[META0]]
1047 ; CHECK-NEXT:    ret float [[DATA]]
1048 ;
1049   %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0
1050   %elt0 = extractelement <2 x float> %data, i32 0
1051   ret float %elt0
1052 }
1053
1054 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
1055 declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1
1056 declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1
1057 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1
1058 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1
1059
1060 define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1061 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16(
1062 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1063 ; CHECK-NEXT:    ret half [[DATA]]
1064 ;
1065   %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1066   %elt0 = extractelement <2 x half> %data, i32 0
1067   ret half %elt0
1068 }
1069
1070 define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1071 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16(
1072 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1073 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1074 ; CHECK-NEXT:    ret half [[DATA]]
1075 ;
1076   %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1077   %elt1 = extractelement <2 x half> %data, i32 1
1078   ret half %elt1
1079 }
1080
1081 define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1082 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16(
1083 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1084 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1085 ; CHECK-NEXT:    ret half [[DATA]]
1086 ;
1087   %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1088   %elt1 = extractelement <3 x half> %data, i32 1
1089   ret half %elt1
1090 }
1091
1092 define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1093 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16(
1094 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1095 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1096 ; CHECK-NEXT:    ret half [[DATA]]
1097 ;
1098   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1099   %elt1 = extractelement <4 x half> %data, i32 1
1100   ret half %elt1
1101 }
1102
1103
1104 define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1105 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16(
1106 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
1107 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1108 ; CHECK-NEXT:    ret half [[DATA]]
1109 ;
1110   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1111   %elt1 = extractelement <4 x half> %data, i32 3
1112   ret half %elt1
1113 }
1114
1115 define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1116 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16(
1117 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1118 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
1119 ;
1120   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1121   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
1122   ret <2 x half> %shuf
1123 }
1124
1125 declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1
1126 declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1
1127 declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1
1128 declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1
1129
1130 define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1131 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8(
1132 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1133 ; CHECK-NEXT:    ret i8 [[DATA]]
1134 ;
1135   %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1136   %elt0 = extractelement <2 x i8> %data, i32 0
1137   ret i8 %elt0
1138 }
1139
1140 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1141 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8(
1142 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1143 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1144 ; CHECK-NEXT:    ret i8 [[DATA]]
1145 ;
1146   %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1147   %elt1 = extractelement <2 x i8> %data, i32 1
1148   ret i8 %elt1
1149 }
1150
1151 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1152 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8(
1153 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1154 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1155 ; CHECK-NEXT:    ret i8 [[DATA]]
1156 ;
1157   %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1158   %elt1 = extractelement <3 x i8> %data, i32 1
1159   ret i8 %elt1
1160 }
1161
1162 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1163 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8(
1164 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1165 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1166 ; CHECK-NEXT:    ret i8 [[DATA]]
1167 ;
1168   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1169   %elt1 = extractelement <4 x i8> %data, i32 1
1170   ret i8 %elt1
1171 }
1172
1173 define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1174 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8(
1175 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
1176 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1177 ; CHECK-NEXT:    ret i8 [[DATA]]
1178 ;
1179   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1180   %elt1 = extractelement <4 x i8> %data, i32 3
1181   ret i8 %elt1
1182 }
1183
1184 define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1185 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8(
1186 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1187 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
1188 ;
1189   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1190   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
1191   ret <2 x i8> %shuf
1192 }
1193
1194 declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1
1195 declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1
1196 declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1
1197 declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1
1198
1199 ; --------------------------------------------------------------------
1200 ; llvm.amdgcn.raw.buffer.load.format
1201 ; --------------------------------------------------------------------
1202
1203 define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1204 ; CHECK-LABEL: @raw_buffer_load_format_f32(
1205 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1206 ; CHECK-NEXT:    ret float [[DATA]]
1207 ;
1208   %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1209   ret float %data
1210 }
1211
1212 define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1213 ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
1214 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1215 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1216 ;
1217   %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1218   ret <1 x float> %data
1219 }
1220
1221 define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1222 ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
1223 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1224 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1225 ;
1226   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1227   ret <2 x float> %data
1228 }
1229
1230 define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1231 ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
1232 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1233 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1234 ;
1235   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1236   ret <4 x float> %data
1237 }
1238
1239 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1240 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
1241 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1242 ; CHECK-NEXT:    ret float [[DATA]]
1243 ;
1244   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1245   %elt0 = extractelement <2 x float> %data, i32 0
1246   ret float %elt0
1247 }
1248
1249 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1250 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
1251 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1252 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1253 ; CHECK-NEXT:    ret float [[ELT1]]
1254 ;
1255   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1256   %elt1 = extractelement <2 x float> %data, i32 1
1257   ret float %elt1
1258 }
1259
1260 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1261 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
1262 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1263 ; CHECK-NEXT:    ret float [[DATA]]
1264 ;
1265   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1266   %elt0 = extractelement <4 x float> %data, i32 0
1267   ret float %elt0
1268 }
1269
1270 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1271 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
1272 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1273 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1274 ; CHECK-NEXT:    ret float [[ELT1]]
1275 ;
1276   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1277   %elt1 = extractelement <4 x float> %data, i32 1
1278   ret float %elt1
1279 }
1280
1281 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1282 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
1283 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1284 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1285 ; CHECK-NEXT:    ret float [[ELT1]]
1286 ;
1287   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1288   %elt1 = extractelement <4 x float> %data, i32 2
1289   ret float %elt1
1290 }
1291
1292 define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1293 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
1294 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1295 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1296 ; CHECK-NEXT:    ret float [[ELT1]]
1297 ;
1298   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1299   %elt1 = extractelement <4 x float> %data, i32 3
1300   ret float %elt1
1301 }
1302
1303 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1304 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
1305 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1306 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1307 ;
1308   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1309   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1310   ret <2 x float> %shuf
1311 }
1312
1313 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1314 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
1315 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1316 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1317 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1318 ;
1319   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1320   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1321   ret <2 x float> %shuf
1322 }
1323
1324 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1325 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
1326 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1327 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1328 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1329 ;
1330   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1331   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1332   ret <2 x float> %shuf
1333 }
1334
1335 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1336 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
1337 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1338 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1339 ;
1340   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1341   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1342   ret <3 x float> %shuf
1343 }
1344
1345 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1346 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
1347 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1348 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1349 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1350 ;
1351   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1352   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1353   ret <3 x float> %shuf
1354 }
1355
1356 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1357 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
1358 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1359 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1360 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1361 ;
1362   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1363   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1364   ret <3 x float> %shuf
1365 }
1366
1367 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1368 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
1369 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1370 ; CHECK-NEXT:    ret float [[DATA]]
1371 ;
1372   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1373   %elt0 = extractelement <3 x float> %data, i32 0
1374   ret float %elt0
1375 }
1376
1377 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1378 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
1379 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1380 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1381 ; CHECK-NEXT:    ret float [[ELT1]]
1382 ;
1383   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1384   %elt1 = extractelement <3 x float> %data, i32 1
1385   ret float %elt1
1386 }
1387
1388 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1389 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
1390 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1391 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1392 ; CHECK-NEXT:    ret float [[ELT1]]
1393 ;
1394   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1395   %elt1 = extractelement <3 x float> %data, i32 2
1396   ret float %elt1
1397 }
1398
1399 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1400 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
1401 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1402 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1403 ;
1404   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1405   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1406   ret <2 x float> %shuf
1407 }
1408
1409 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1410 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
1411 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1412 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1413 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1414 ;
1415   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1416   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1417   ret <2 x float> %shuf
1418 }
1419
1420 define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1421 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
1422 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1423 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1424 ; CHECK-NEXT:    ret i32 [[VAR2]]
1425 ;
1426   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1427   %var1 = bitcast <4 x float> %var to <4 x i32>
1428   %var2 = extractelement <4 x i32> %var1, i32 0
1429   ret i32 %var2
1430 }
1431
1432 define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1433 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
1434 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1435 ; CHECK-NEXT:    ret float [[VAR]]
1436 ;
1437   %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1438   %var1 = extractelement <4 x float> %var, i32 0
1439   ret float %var1
1440 }
1441
1442 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1443 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
1444 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1445 ; CHECK-NEXT:    ret float [[DATA]]
1446 ;
1447   %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1448   %elt0 = extractelement <2 x float> %data, i32 0
1449   ret float %elt0
1450 }
1451
1452 declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1
1453 declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1
1454 declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1
1455 declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1
1456 declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
1457
1458 ; --------------------------------------------------------------------
1459 ; llvm.amdgcn.raw.ptr.buffer.load.format
1460 ; --------------------------------------------------------------------
1461
1462 define amdgpu_ps float @raw_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1463 ; CHECK-LABEL: @raw_ptr_buffer_load_format_f32(
1464 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1465 ; CHECK-NEXT:    ret float [[DATA]]
1466 ;
1467   %data = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1468   ret float %data
1469 }
1470
1471 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1472 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v1f32(
1473 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1474 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1475 ;
1476   %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1477   ret <1 x float> %data
1478 }
1479
1480 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1481 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v2f32(
1482 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1483 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1484 ;
1485   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1486   ret <2 x float> %data
1487 }
1488
1489 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1490 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v4f32(
1491 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1492 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1493 ;
1494   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1495   ret <4 x float> %data
1496 }
1497
1498 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1499 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v2f32(
1500 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1501 ; CHECK-NEXT:    ret float [[DATA]]
1502 ;
1503   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1504   %elt0 = extractelement <2 x float> %data, i32 0
1505   ret float %elt0
1506 }
1507
1508 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1509 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v2f32(
1510 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1511 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1512 ; CHECK-NEXT:    ret float [[ELT1]]
1513 ;
1514   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1515   %elt1 = extractelement <2 x float> %data, i32 1
1516   ret float %elt1
1517 }
1518
1519 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1520 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v4f32(
1521 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1522 ; CHECK-NEXT:    ret float [[DATA]]
1523 ;
1524   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1525   %elt0 = extractelement <4 x float> %data, i32 0
1526   ret float %elt0
1527 }
1528
1529 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1530 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v4f32(
1531 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1532 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1533 ; CHECK-NEXT:    ret float [[ELT1]]
1534 ;
1535   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1536   %elt1 = extractelement <4 x float> %data, i32 1
1537   ret float %elt1
1538 }
1539
1540 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1541 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v4f32(
1542 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1543 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1544 ; CHECK-NEXT:    ret float [[ELT1]]
1545 ;
1546   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1547   %elt1 = extractelement <4 x float> %data, i32 2
1548   ret float %elt1
1549 }
1550
1551 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1552 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_format_v4f32(
1553 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1554 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1555 ; CHECK-NEXT:    ret float [[ELT1]]
1556 ;
1557   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1558   %elt1 = extractelement <4 x float> %data, i32 3
1559   ret float %elt1
1560 }
1561
1562 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1563 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(
1564 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1565 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1566 ;
1567   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1568   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1569   ret <2 x float> %shuf
1570 }
1571
1572 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1573 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1574 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1575 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1576 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1577 ;
1578   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1579   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1580   ret <2 x float> %shuf
1581 }
1582
1583 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1584 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1585 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1586 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1587 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1588 ;
1589   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1590   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1591   ret <2 x float> %shuf
1592 }
1593
1594 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1595 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1596 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1597 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1598 ;
1599   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1600   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1601   ret <3 x float> %shuf
1602 }
1603
1604 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1605 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1606 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1607 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1608 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1609 ;
1610   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1611   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1612   ret <3 x float> %shuf
1613 }
1614
1615 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1616 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1617 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1618 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1619 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1620 ;
1621   %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1622   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1623   ret <3 x float> %shuf
1624 }
1625
1626 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1627 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v3f32(
1628 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1629 ; CHECK-NEXT:    ret float [[DATA]]
1630 ;
1631   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1632   %elt0 = extractelement <3 x float> %data, i32 0
1633   ret float %elt0
1634 }
1635
1636 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1637 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v3f32(
1638 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1639 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1640 ; CHECK-NEXT:    ret float [[ELT1]]
1641 ;
1642   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1643   %elt1 = extractelement <3 x float> %data, i32 1
1644   ret float %elt1
1645 }
1646
1647 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1648 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v3f32(
1649 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1650 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1651 ; CHECK-NEXT:    ret float [[ELT1]]
1652 ;
1653   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1654   %elt1 = extractelement <3 x float> %data, i32 2
1655   ret float %elt1
1656 }
1657
1658 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1659 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(
1660 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1661 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1662 ;
1663   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1664   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1665   ret <2 x float> %shuf
1666 }
1667
1668 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1669 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(
1670 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1671 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1672 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
1673 ;
1674   %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1675   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1676   ret <2 x float> %shuf
1677 }
1678
1679 define i32 @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1680 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(
1681 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1682 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1683 ; CHECK-NEXT:    ret i32 [[VAR2]]
1684 ;
1685   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1686   %var1 = bitcast <4 x float> %var to <4 x i32>
1687   %var2 = extractelement <4 x i32> %var1, i32 0
1688   ret i32 %var2
1689 }
1690
1691 define float @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1692 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(
1693 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1694 ; CHECK-NEXT:    ret float [[VAR]]
1695 ;
1696   %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1697   %var1 = extractelement <4 x float> %var, i32 0
1698   ret float %var1
1699 }
1700
1701 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1702 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(
1703 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1704 ; CHECK-NEXT:    ret float [[DATA]]
1705 ;
1706   %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1707   %elt0 = extractelement <2 x float> %data, i32 0
1708   ret float %elt0
1709 }
1710
1711 declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #1
1712 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32) #1
1713 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #1
1714 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32) #1
1715 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1
1716
1717 ; --------------------------------------------------------------------
1718 ; llvm.amdgcn.struct.buffer.load
1719 ; --------------------------------------------------------------------
1720
1721 define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1722 ; CHECK-LABEL: @struct_buffer_load_f32(
1723 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1724 ; CHECK-NEXT:    ret float [[DATA]]
1725 ;
1726   %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1727   ret float %data
1728 }
1729
1730 define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1731 ; CHECK-LABEL: @struct_buffer_load_v1f32(
1732 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1733 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
1734 ;
1735   %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1736   ret <1 x float> %data
1737 }
1738
1739 define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1740 ; CHECK-LABEL: @struct_buffer_load_v2f32(
1741 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1742 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1743 ;
1744   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1745   ret <2 x float> %data
1746 }
1747
1748 define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1749 ; CHECK-LABEL: @struct_buffer_load_v4f32(
1750 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1751 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
1752 ;
1753   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1754   ret <4 x float> %data
1755 }
1756
1757 define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1758 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
1759 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1760 ; CHECK-NEXT:    ret float [[DATA]]
1761 ;
1762   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1763   %elt0 = extractelement <2 x float> %data, i32 0
1764   ret float %elt0
1765 }
1766
1767 define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1768 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
1769 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1770 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1771 ; CHECK-NEXT:    ret float [[DATA]]
1772 ;
1773   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1774   %elt1 = extractelement <2 x float> %data, i32 1
1775   ret float %elt1
1776 }
1777
1778 define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1779 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
1780 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1781 ; CHECK-NEXT:    ret float [[DATA]]
1782 ;
1783   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1784   %elt0 = extractelement <4 x float> %data, i32 0
1785   ret float %elt0
1786 }
1787
1788 define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1789 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
1790 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1791 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1792 ; CHECK-NEXT:    ret float [[DATA]]
1793 ;
1794   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1795   %elt1 = extractelement <4 x float> %data, i32 1
1796   ret float %elt1
1797 }
1798
1799 define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1800 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
1801 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1802 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1803 ; CHECK-NEXT:    ret float [[DATA]]
1804 ;
1805   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1806   %elt1 = extractelement <4 x float> %data, i32 2
1807   ret float %elt1
1808 }
1809
1810 define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1811 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
1812 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
1813 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1814 ; CHECK-NEXT:    ret float [[DATA]]
1815 ;
1816   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1817   %elt1 = extractelement <4 x float> %data, i32 3
1818   ret float %elt1
1819 }
1820
1821 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1822 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
1823 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1824 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1825 ;
1826   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1827   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1828   ret <2 x float> %shuf
1829 }
1830
1831 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1832 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
1833 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1834 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1835 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1836 ;
1837   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1838   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1839   ret <2 x float> %shuf
1840 }
1841
1842 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1843 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
1844 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1845 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1846 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1847 ;
1848   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1849   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1850   ret <2 x float> %shuf
1851 }
1852
1853 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1854 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
1855 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1856 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1857 ;
1858   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1859   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1860   ret <3 x float> %shuf
1861 }
1862
1863 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1864 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
1865 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1866 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1867 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
1868 ;
1869   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1870   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1871   ret <3 x float> %shuf
1872 }
1873
1874 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1875 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
1876 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1877 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1878 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
1879 ;
1880   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1881   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1882   ret <3 x float> %shuf
1883 }
1884
1885 define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1886 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
1887 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1888 ; CHECK-NEXT:    ret float [[DATA]]
1889 ;
1890   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1891   %elt0 = extractelement <3 x float> %data, i32 0
1892   ret float %elt0
1893 }
1894
1895 define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1896 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
1897 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1898 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1899 ; CHECK-NEXT:    ret float [[DATA]]
1900 ;
1901   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1902   %elt1 = extractelement <3 x float> %data, i32 1
1903   ret float %elt1
1904 }
1905
1906 define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1907 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
1908 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1909 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1910 ; CHECK-NEXT:    ret float [[DATA]]
1911 ;
1912   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1913   %elt1 = extractelement <3 x float> %data, i32 2
1914   ret float %elt1
1915 }
1916
1917 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1918 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
1919 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1920 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1921 ;
1922   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1923   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1924   ret <2 x float> %shuf
1925 }
1926
1927 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1928 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
1929 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1930 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1931 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
1932 ;
1933   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1934   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1935   ret <2 x float> %shuf
1936 }
1937
1938 define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1939 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
1940 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1941 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1942 ; CHECK-NEXT:    ret i32 [[VAR2]]
1943 ;
1944   %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1945   %var1 = bitcast <4 x float> %var to <4 x i32>
1946   %var2 = extractelement <4 x i32> %var1, i32 0
1947   ret i32 %var2
1948 }
1949
1950 define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1951 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
1952 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1953 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1954 ; CHECK-NEXT:    ret float [[VAR2]]
1955 ;
1956   %var = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1957   %var1 = bitcast <4 x i32> %var to <4 x float>
1958   %var2 = extractelement <4 x float> %var1, i32 0
1959   ret float %var2
1960 }
1961
1962 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1963 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
1964 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1965 ; CHECK-NEXT:    ret float [[DATA]]
1966 ;
1967   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1968   %elt0 = extractelement <2 x float> %data, i32 0
1969   ret float %elt0
1970 }
1971
1972 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
1973 declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1
1974 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
1975 declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
1976 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1977
1978 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
1979
1980 define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1981 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16(
1982 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1983 ; CHECK-NEXT:    ret half [[DATA]]
1984 ;
1985   %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1986   %elt0 = extractelement <2 x half> %data, i32 0
1987   ret half %elt0
1988 }
1989
1990 define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1991 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16(
1992 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1993 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1994 ; CHECK-NEXT:    ret half [[DATA]]
1995 ;
1996   %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1997   %elt1 = extractelement <2 x half> %data, i32 1
1998   ret half %elt1
1999 }
2000
2001 define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2002 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16(
2003 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2004 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2005 ; CHECK-NEXT:    ret half [[DATA]]
2006 ;
2007   %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2008   %elt1 = extractelement <3 x half> %data, i32 1
2009   ret half %elt1
2010 }
2011
2012 define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2013 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16(
2014 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2015 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2016 ; CHECK-NEXT:    ret half [[DATA]]
2017 ;
2018   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2019   %elt1 = extractelement <4 x half> %data, i32 1
2020   ret half %elt1
2021 }
2022
2023 define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2024 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16(
2025 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2026 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2027 ; CHECK-NEXT:    ret half [[DATA]]
2028 ;
2029   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2030   %elt1 = extractelement <4 x half> %data, i32 3
2031   ret half %elt1
2032 }
2033
2034 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2035 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16(
2036 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2037 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
2038 ;
2039   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2040   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2041   ret <2 x half> %shuf
2042 }
2043
2044 declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
2045 declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
2046 declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
2047 declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
2048
2049 define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2050 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8(
2051 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2052 ; CHECK-NEXT:    ret i8 [[DATA]]
2053 ;
2054   %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2055   %elt0 = extractelement <2 x i8> %data, i32 0
2056   ret i8 %elt0
2057 }
2058
2059 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2060 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8(
2061 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2062 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2063 ; CHECK-NEXT:    ret i8 [[DATA]]
2064 ;
2065   %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2066   %elt1 = extractelement <2 x i8> %data, i32 1
2067   ret i8 %elt1
2068 }
2069
2070 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2071 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8(
2072 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2073 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2074 ; CHECK-NEXT:    ret i8 [[DATA]]
2075 ;
2076   %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2077   %elt1 = extractelement <3 x i8> %data, i32 1
2078   ret i8 %elt1
2079 }
2080
2081 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2082 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8(
2083 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2084 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2085 ; CHECK-NEXT:    ret i8 [[DATA]]
2086 ;
2087   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2088   %elt1 = extractelement <4 x i8> %data, i32 1
2089   ret i8 %elt1
2090 }
2091
2092 define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2093 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8(
2094 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2095 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2096 ; CHECK-NEXT:    ret i8 [[DATA]]
2097 ;
2098   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2099   %elt1 = extractelement <4 x i8> %data, i32 3
2100   ret i8 %elt1
2101 }
2102
2103 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2104 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8(
2105 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2106 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
2107 ;
2108   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2109   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2110   ret <2 x i8> %shuf
2111 }
2112
2113 declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1
2114 declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1
2115 declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1
2116 declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1
2117
2118 ; --------------------------------------------------------------------
2119 ; llvm.amdgcn.struct.ptr.buffer.load
2120 ; --------------------------------------------------------------------
2121
2122 define amdgpu_ps float @struct_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2123 ; CHECK-LABEL: @struct_ptr_buffer_load_f32(
2124 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2125 ; CHECK-NEXT:    ret float [[DATA]]
2126 ;
2127   %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2128   ret float %data
2129 }
2130
2131 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2132 ; CHECK-LABEL: @struct_ptr_buffer_load_v1f32(
2133 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2134 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2135 ;
2136   %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2137   ret <1 x float> %data
2138 }
2139
2140 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2141 ; CHECK-LABEL: @struct_ptr_buffer_load_v2f32(
2142 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2143 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2144 ;
2145   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2146   ret <2 x float> %data
2147 }
2148
2149 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2150 ; CHECK-LABEL: @struct_ptr_buffer_load_v4f32(
2151 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2152 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2153 ;
2154   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2155   ret <4 x float> %data
2156 }
2157
2158 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2159 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f32(
2160 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2161 ; CHECK-NEXT:    ret float [[DATA]]
2162 ;
2163   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2164   %elt0 = extractelement <2 x float> %data, i32 0
2165   ret float %elt0
2166 }
2167
2168 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2169 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f32(
2170 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2171 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2172 ; CHECK-NEXT:    ret float [[DATA]]
2173 ;
2174   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2175   %elt1 = extractelement <2 x float> %data, i32 1
2176   ret float %elt1
2177 }
2178
2179 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2180 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v4f32(
2181 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2182 ; CHECK-NEXT:    ret float [[DATA]]
2183 ;
2184   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2185   %elt0 = extractelement <4 x float> %data, i32 0
2186   ret float %elt0
2187 }
2188
2189 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2190 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f32(
2191 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2192 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2193 ; CHECK-NEXT:    ret float [[DATA]]
2194 ;
2195   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2196   %elt1 = extractelement <4 x float> %data, i32 1
2197   ret float %elt1
2198 }
2199
2200 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2201 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v4f32(
2202 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2203 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2204 ; CHECK-NEXT:    ret float [[DATA]]
2205 ;
2206   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2207   %elt1 = extractelement <4 x float> %data, i32 2
2208   ret float %elt1
2209 }
2210
2211 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2212 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f32(
2213 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
2214 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2215 ; CHECK-NEXT:    ret float [[DATA]]
2216 ;
2217   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2218   %elt1 = extractelement <4 x float> %data, i32 3
2219   ret float %elt1
2220 }
2221
2222 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2223 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(
2224 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2225 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2226 ;
2227   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2228   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2229   ret <2 x float> %shuf
2230 }
2231
2232 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2233 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(
2234 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2235 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2236 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2237 ;
2238   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2239   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2240   ret <2 x float> %shuf
2241 }
2242
2243 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2244 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(
2245 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2246 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2247 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2248 ;
2249   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2250   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2251   ret <2 x float> %shuf
2252 }
2253
2254 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2255 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(
2256 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2257 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2258 ;
2259   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2260   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2261   ret <3 x float> %shuf
2262 }
2263
2264 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2265 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(
2266 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2267 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2268 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2269 ;
2270   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2271   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2272   ret <3 x float> %shuf
2273 }
2274
2275 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2276 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(
2277 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2278 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2279 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2280 ;
2281   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2282   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2283   ret <3 x float> %shuf
2284 }
2285
2286 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2287 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v3f32(
2288 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2289 ; CHECK-NEXT:    ret float [[DATA]]
2290 ;
2291   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2292   %elt0 = extractelement <3 x float> %data, i32 0
2293   ret float %elt0
2294 }
2295
2296 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2297 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f32(
2298 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2299 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2300 ; CHECK-NEXT:    ret float [[DATA]]
2301 ;
2302   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2303   %elt1 = extractelement <3 x float> %data, i32 1
2304   ret float %elt1
2305 }
2306
2307 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2308 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v3f32(
2309 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2310 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2311 ; CHECK-NEXT:    ret float [[DATA]]
2312 ;
2313   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2314   %elt1 = extractelement <3 x float> %data, i32 2
2315   ret float %elt1
2316 }
2317
2318 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2319 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(
2320 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2321 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2322 ;
2323   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2324   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2325   ret <2 x float> %shuf
2326 }
2327
2328 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2329 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(
2330 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2331 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2332 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2333 ;
2334   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2335   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2336   ret <2 x float> %shuf
2337 }
2338
2339 define i32 @extract0_bitcast_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2340 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4f32(
2341 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2342 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2343 ; CHECK-NEXT:    ret i32 [[VAR2]]
2344 ;
2345   %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2346   %var1 = bitcast <4 x float> %var to <4 x i32>
2347   %var2 = extractelement <4 x i32> %var1, i32 0
2348   ret i32 %var2
2349 }
2350
2351 define float @extract0_bitcast_struct_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2352 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4i32(
2353 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2354 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
2355 ; CHECK-NEXT:    ret float [[VAR2]]
2356 ;
2357   %var = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2358   %var1 = bitcast <4 x i32> %var to <4 x float>
2359   %var2 = extractelement <4 x float> %var1, i32 0
2360   ret float %var2
2361 }
2362
2363 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2364 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(
2365 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2366 ; CHECK-NEXT:    ret float [[DATA]]
2367 ;
2368   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2369   %elt0 = extractelement <2 x float> %data, i32 0
2370   ret float %elt0
2371 }
2372
2373 declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
2374 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
2375 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
2376 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
2377 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
2378
2379 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
2380
2381 define amdgpu_ps half @extract_elt0_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2382 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f16(
2383 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2384 ; CHECK-NEXT:    ret half [[DATA]]
2385 ;
2386   %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2387   %elt0 = extractelement <2 x half> %data, i32 0
2388   ret half %elt0
2389 }
2390
2391 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2392 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f16(
2393 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2394 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2395 ; CHECK-NEXT:    ret half [[DATA]]
2396 ;
2397   %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2398   %elt1 = extractelement <2 x half> %data, i32 1
2399   ret half %elt1
2400 }
2401
2402 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2403 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f16(
2404 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2405 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2406 ; CHECK-NEXT:    ret half [[DATA]]
2407 ;
2408   %data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2409   %elt1 = extractelement <3 x half> %data, i32 1
2410   ret half %elt1
2411 }
2412
2413 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2414 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f16(
2415 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2416 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2417 ; CHECK-NEXT:    ret half [[DATA]]
2418 ;
2419   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2420   %elt1 = extractelement <4 x half> %data, i32 1
2421   ret half %elt1
2422 }
2423
2424 define amdgpu_ps half @extract_elt3_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2425 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f16(
2426 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2427 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2428 ; CHECK-NEXT:    ret half [[DATA]]
2429 ;
2430   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2431   %elt1 = extractelement <4 x half> %data, i32 3
2432   ret half %elt1
2433 }
2434
2435 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2436 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(
2437 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2438 ; CHECK-NEXT:    ret <2 x half> [[DATA]]
2439 ;
2440   %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2441   %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2442   ret <2 x half> %shuf
2443 }
2444
2445 declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
2446 declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
2447 declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
2448 declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
2449
2450 define amdgpu_ps i8 @extract_elt0_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2451 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2i8(
2452 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2453 ; CHECK-NEXT:    ret i8 [[DATA]]
2454 ;
2455   %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2456   %elt0 = extractelement <2 x i8> %data, i32 0
2457   ret i8 %elt0
2458 }
2459
2460 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2461 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2i8(
2462 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2463 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2464 ; CHECK-NEXT:    ret i8 [[DATA]]
2465 ;
2466   %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2467   %elt1 = extractelement <2 x i8> %data, i32 1
2468   ret i8 %elt1
2469 }
2470
2471 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2472 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3i8(
2473 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2474 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2475 ; CHECK-NEXT:    ret i8 [[DATA]]
2476 ;
2477   %data = call <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2478   %elt1 = extractelement <3 x i8> %data, i32 1
2479   ret i8 %elt1
2480 }
2481
2482 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2483 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4i8(
2484 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2485 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2486 ; CHECK-NEXT:    ret i8 [[DATA]]
2487 ;
2488   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2489   %elt1 = extractelement <4 x i8> %data, i32 1
2490   ret i8 %elt1
2491 }
2492
2493 define amdgpu_ps i8 @extract_elt3_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2494 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4i8(
2495 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2496 ; CHECK-NEXT:    [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2497 ; CHECK-NEXT:    ret i8 [[DATA]]
2498 ;
2499   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2500   %elt1 = extractelement <4 x i8> %data, i32 3
2501   ret i8 %elt1
2502 }
2503
2504 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2505 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(
2506 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2507 ; CHECK-NEXT:    ret <2 x i8> [[DATA]]
2508 ;
2509   %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2510   %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2511   ret <2 x i8> %shuf
2512 }
2513
2514 declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #1
2515 declare <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32, i32) #1
2516 declare <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32, i32) #1
2517 declare <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32, i32) #1
2518
2519 ; --------------------------------------------------------------------
2520 ; llvm.amdgcn.struct.buffer.load.format
2521 ; --------------------------------------------------------------------
2522
2523 define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2524 ; CHECK-LABEL: @struct_buffer_load_format_f32(
2525 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2526 ; CHECK-NEXT:    ret float [[DATA]]
2527 ;
2528   %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2529   ret float %data
2530 }
2531
2532 define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2533 ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
2534 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2535 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2536 ;
2537   %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2538   ret <1 x float> %data
2539 }
2540
2541 define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2542 ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
2543 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2544 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2545 ;
2546   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2547   ret <2 x float> %data
2548 }
2549
2550 define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2551 ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
2552 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2553 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2554 ;
2555   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2556   ret <4 x float> %data
2557 }
2558
2559 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2560 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
2561 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2562 ; CHECK-NEXT:    ret float [[DATA]]
2563 ;
2564   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2565   %elt0 = extractelement <2 x float> %data, i32 0
2566   ret float %elt0
2567 }
2568
2569 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2570 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
2571 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2572 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2573 ; CHECK-NEXT:    ret float [[ELT1]]
2574 ;
2575   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2576   %elt1 = extractelement <2 x float> %data, i32 1
2577   ret float %elt1
2578 }
2579
2580 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2581 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
2582 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2583 ; CHECK-NEXT:    ret float [[DATA]]
2584 ;
2585   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2586   %elt0 = extractelement <4 x float> %data, i32 0
2587   ret float %elt0
2588 }
2589
2590 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2591 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
2592 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2593 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2594 ; CHECK-NEXT:    ret float [[ELT1]]
2595 ;
2596   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2597   %elt1 = extractelement <4 x float> %data, i32 1
2598   ret float %elt1
2599 }
2600
2601 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2602 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
2603 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2604 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2605 ; CHECK-NEXT:    ret float [[ELT1]]
2606 ;
2607   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2608   %elt1 = extractelement <4 x float> %data, i32 2
2609   ret float %elt1
2610 }
2611
2612 define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2613 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
2614 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2615 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2616 ; CHECK-NEXT:    ret float [[ELT1]]
2617 ;
2618   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2619   %elt1 = extractelement <4 x float> %data, i32 3
2620   ret float %elt1
2621 }
2622
2623 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2624 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
2625 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2626 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2627 ;
2628   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2629   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2630   ret <2 x float> %shuf
2631 }
2632
2633 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2634 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
2635 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2636 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2637 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2638 ;
2639   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2640   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2641   ret <2 x float> %shuf
2642 }
2643
2644 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2645 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
2646 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2647 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2648 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2649 ;
2650   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2651   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2652   ret <2 x float> %shuf
2653 }
2654
2655 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2656 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
2657 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2658 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2659 ;
2660   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2661   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2662   ret <3 x float> %shuf
2663 }
2664
2665 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2666 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
2667 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2668 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2669 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2670 ;
2671   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2672   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2673   ret <3 x float> %shuf
2674 }
2675
2676 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2677 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
2678 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2679 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2680 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2681 ;
2682   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2683   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2684   ret <3 x float> %shuf
2685 }
2686
2687 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2688 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
2689 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2690 ; CHECK-NEXT:    ret float [[DATA]]
2691 ;
2692   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2693   %elt0 = extractelement <3 x float> %data, i32 0
2694   ret float %elt0
2695 }
2696
2697 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2698 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
2699 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2700 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2701 ; CHECK-NEXT:    ret float [[ELT1]]
2702 ;
2703   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2704   %elt1 = extractelement <3 x float> %data, i32 1
2705   ret float %elt1
2706 }
2707
2708 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2709 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
2710 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2711 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2712 ; CHECK-NEXT:    ret float [[ELT1]]
2713 ;
2714   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2715   %elt1 = extractelement <3 x float> %data, i32 2
2716   ret float %elt1
2717 }
2718
2719 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2720 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
2721 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2722 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2723 ;
2724   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2725   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2726   ret <2 x float> %shuf
2727 }
2728
2729 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2730 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
2731 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2732 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2733 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2734 ;
2735   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2736   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2737   ret <2 x float> %shuf
2738 }
2739
2740 define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2741 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
2742 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2743 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2744 ; CHECK-NEXT:    ret i32 [[VAR2]]
2745 ;
2746   %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2747   %var1 = bitcast <4 x float> %var to <4 x i32>
2748   %var2 = extractelement <4 x i32> %var1, i32 0
2749   ret i32 %var2
2750 }
2751
2752 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2753 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
2754 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2755 ; CHECK-NEXT:    ret float [[DATA]]
2756 ;
2757   %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2758   %elt0 = extractelement <2 x float> %data, i32 0
2759   ret float %elt0
2760 }
2761
2762 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1
2763 declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1
2764 declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1
2765 declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1
2766 declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
2767
2768 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1
2769
2770 ; --------------------------------------------------------------------
2771 ; llvm.amdgcn.struct.ptr.buffer.load.format
2772 ; --------------------------------------------------------------------
2773
2774 define amdgpu_ps float @struct_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2775 ; CHECK-LABEL: @struct_ptr_buffer_load_format_f32(
2776 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2777 ; CHECK-NEXT:    ret float [[DATA]]
2778 ;
2779   %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2780   ret float %data
2781 }
2782
2783 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2784 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v1f32(
2785 ; CHECK-NEXT:    [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2786 ; CHECK-NEXT:    ret <1 x float> [[DATA]]
2787 ;
2788   %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2789   ret <1 x float> %data
2790 }
2791
2792 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2793 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v2f32(
2794 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2795 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2796 ;
2797   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2798   ret <2 x float> %data
2799 }
2800
2801 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2802 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v4f32(
2803 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2804 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
2805 ;
2806   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2807   ret <4 x float> %data
2808 }
2809
2810 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2811 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v2f32(
2812 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2813 ; CHECK-NEXT:    ret float [[DATA]]
2814 ;
2815   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2816   %elt0 = extractelement <2 x float> %data, i32 0
2817   ret float %elt0
2818 }
2819
2820 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2821 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v2f32(
2822 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2823 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2824 ; CHECK-NEXT:    ret float [[ELT1]]
2825 ;
2826   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2827   %elt1 = extractelement <2 x float> %data, i32 1
2828   ret float %elt1
2829 }
2830
2831 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2832 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v4f32(
2833 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2834 ; CHECK-NEXT:    ret float [[DATA]]
2835 ;
2836   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2837   %elt0 = extractelement <4 x float> %data, i32 0
2838   ret float %elt0
2839 }
2840
2841 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2842 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v4f32(
2843 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2844 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2845 ; CHECK-NEXT:    ret float [[ELT1]]
2846 ;
2847   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2848   %elt1 = extractelement <4 x float> %data, i32 1
2849   ret float %elt1
2850 }
2851
2852 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2853 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v4f32(
2854 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2855 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2856 ; CHECK-NEXT:    ret float [[ELT1]]
2857 ;
2858   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2859   %elt1 = extractelement <4 x float> %data, i32 2
2860   ret float %elt1
2861 }
2862
2863 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2864 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_format_v4f32(
2865 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2866 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2867 ; CHECK-NEXT:    ret float [[ELT1]]
2868 ;
2869   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2870   %elt1 = extractelement <4 x float> %data, i32 3
2871   ret float %elt1
2872 }
2873
2874 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2875 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(
2876 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2877 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2878 ;
2879   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2880   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2881   ret <2 x float> %shuf
2882 }
2883
2884 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2885 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2886 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2887 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2888 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2889 ;
2890   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2891   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2892   ret <2 x float> %shuf
2893 }
2894
2895 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2896 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2897 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2898 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2899 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2900 ;
2901   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2902   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2903   ret <2 x float> %shuf
2904 }
2905
2906 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2907 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2908 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2909 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
2910 ;
2911   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2912   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2913   ret <3 x float> %shuf
2914 }
2915
2916 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2917 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2918 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2919 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2920 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2921 ;
2922   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2923   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2924   ret <3 x float> %shuf
2925 }
2926
2927 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2928 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2929 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2930 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2931 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
2932 ;
2933   %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2934   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2935   ret <3 x float> %shuf
2936 }
2937
2938 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2939 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v3f32(
2940 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2941 ; CHECK-NEXT:    ret float [[DATA]]
2942 ;
2943   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2944   %elt0 = extractelement <3 x float> %data, i32 0
2945   ret float %elt0
2946 }
2947
2948 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2949 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v3f32(
2950 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2951 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2952 ; CHECK-NEXT:    ret float [[ELT1]]
2953 ;
2954   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2955   %elt1 = extractelement <3 x float> %data, i32 1
2956   ret float %elt1
2957 }
2958
2959 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2960 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v3f32(
2961 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2962 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2963 ; CHECK-NEXT:    ret float [[ELT1]]
2964 ;
2965   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2966   %elt1 = extractelement <3 x float> %data, i32 2
2967   ret float %elt1
2968 }
2969
2970 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2971 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(
2972 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2973 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
2974 ;
2975   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2976   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2977   ret <2 x float> %shuf
2978 }
2979
2980 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2981 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(
2982 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2983 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2984 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
2985 ;
2986   %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2987   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2988   ret <2 x float> %shuf
2989 }
2990
2991 define i32 @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2992 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(
2993 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2994 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2995 ; CHECK-NEXT:    ret i32 [[VAR2]]
2996 ;
2997   %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2998   %var1 = bitcast <4 x float> %var to <4 x i32>
2999   %var2 = extractelement <4 x i32> %var1, i32 0
3000   ret i32 %var2
3001 }
3002
3003 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
3004 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(
3005 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
3006 ; CHECK-NEXT:    ret float [[DATA]]
3007 ;
3008   %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
3009   %elt0 = extractelement <2 x float> %data, i32 0
3010   ret float %elt0
3011 }
3012
3013 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3014 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
3015 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3016 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3017 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3018
3019 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.format.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3020
3021 ; --------------------------------------------------------------------
3022 ; llvm.amdgcn.raw.tbuffer.load
3023 ; --------------------------------------------------------------------
3024
3025 define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3026 ; CHECK-LABEL: @raw_tbuffer_load_f32(
3027 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3028 ; CHECK-NEXT:    ret float [[DATA]]
3029 ;
3030   %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3031   ret float %data
3032 }
3033
3034 define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3035 ; CHECK-LABEL: @raw_tbuffer_load_v2f32(
3036 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3037 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3038 ;
3039   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3040   ret <2 x float> %data
3041 }
3042
3043 define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3044 ; CHECK-LABEL: @raw_tbuffer_load_v4f32(
3045 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3046 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3047 ;
3048   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3049   ret <4 x float> %data
3050 }
3051
3052 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3053 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32(
3054 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3055 ; CHECK-NEXT:    ret float [[DATA]]
3056 ;
3057   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3058   %elt0 = extractelement <2 x float> %data, i32 0
3059   ret float %elt0
3060 }
3061
3062 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3063 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32(
3064 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3065 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3066 ; CHECK-NEXT:    ret float [[ELT1]]
3067 ;
3068   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3069   %elt1 = extractelement <2 x float> %data, i32 1
3070   ret float %elt1
3071 }
3072
3073 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3074 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32(
3075 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3076 ; CHECK-NEXT:    ret float [[DATA]]
3077 ;
3078   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3079   %elt0 = extractelement <4 x float> %data, i32 0
3080   ret float %elt0
3081 }
3082
3083 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3084 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32(
3085 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3086 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3087 ; CHECK-NEXT:    ret float [[ELT1]]
3088 ;
3089   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3090   %elt1 = extractelement <4 x float> %data, i32 1
3091   ret float %elt1
3092 }
3093
3094 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3095 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32(
3096 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3097 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3098 ; CHECK-NEXT:    ret float [[ELT1]]
3099 ;
3100   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3101   %elt1 = extractelement <4 x float> %data, i32 2
3102   ret float %elt1
3103 }
3104
3105 define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3106 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32(
3107 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3108 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3109 ; CHECK-NEXT:    ret float [[ELT1]]
3110 ;
3111   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3112   %elt1 = extractelement <4 x float> %data, i32 3
3113   ret float %elt1
3114 }
3115
3116 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3117 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32(
3118 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3119 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3120 ;
3121   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3122   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3123   ret <2 x float> %shuf
3124 }
3125
3126 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3127 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
3128 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3129 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3130 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3131 ;
3132   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3133   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3134   ret <2 x float> %shuf
3135 }
3136
3137 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3138 ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32(
3139 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3140 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3141 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3142 ;
3143   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3144   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3145   ret <2 x float> %shuf
3146 }
3147
3148 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3149 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(
3150 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3151 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3152 ;
3153   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3154   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3155   ret <3 x float> %shuf
3156 }
3157
3158 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3159 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(
3160 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3161 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3162 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3163 ;
3164   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3165   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3166   ret <3 x float> %shuf
3167 }
3168
3169 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3170 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(
3171 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3172 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3173 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3174 ;
3175   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3176   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3177   ret <3 x float> %shuf
3178 }
3179
3180 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3181 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32(
3182 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3183 ; CHECK-NEXT:    ret float [[DATA]]
3184 ;
3185   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3186   %elt0 = extractelement <3 x float> %data, i32 0
3187   ret float %elt0
3188 }
3189
3190 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3191 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32(
3192 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3193 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3194 ; CHECK-NEXT:    ret float [[ELT1]]
3195 ;
3196   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3197   %elt1 = extractelement <3 x float> %data, i32 1
3198   ret float %elt1
3199 }
3200
3201 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3202 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32(
3203 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3204 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3205 ; CHECK-NEXT:    ret float [[ELT1]]
3206 ;
3207   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3208   %elt1 = extractelement <3 x float> %data, i32 2
3209   ret float %elt1
3210 }
3211
3212 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3213 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32(
3214 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3215 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3216 ;
3217   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3218   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3219   ret <2 x float> %shuf
3220 }
3221
3222 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3223 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32(
3224 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3225 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3226 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3227 ;
3228   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3229   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3230   ret <2 x float> %shuf
3231 }
3232
3233 define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3234 ; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32(
3235 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3236 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3237 ; CHECK-NEXT:    ret i32 [[VAR2]]
3238 ;
3239   %var = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3240   %var1 = bitcast <4 x float> %var to <4 x i32>
3241   %var2 = extractelement <4 x i32> %var1, i32 0
3242   ret i32 %var2
3243 }
3244
3245 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3246 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(
3247 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3248 ; CHECK-NEXT:    ret float [[DATA]]
3249 ;
3250   %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3251   %elt0 = extractelement <2 x float> %data, i32 0
3252   ret float %elt0
3253 }
3254
3255 declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
3256 declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
3257 declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
3258 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
3259
3260 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
3261
3262 define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3263 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
3264 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3265 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3266 ; CHECK-NEXT:    ret half [[ELT1]]
3267 ;
3268   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3269   %elt1 = extractelement <4 x half> %data, i32 3
3270   ret half %elt1
3271 }
3272
3273 define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3274 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
3275 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3276 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3277 ; CHECK-NEXT:    ret half [[ELT1]]
3278 ;
3279   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3280   %elt1 = extractelement <4 x half> %data, i32 2
3281   ret half %elt1
3282 }
3283
3284 define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3285 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
3286 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3287 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3288 ; CHECK-NEXT:    ret half [[ELT1]]
3289 ;
3290   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3291   %elt1 = extractelement <4 x half> %data, i32 1
3292   ret half %elt1
3293 }
3294
3295 define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3296 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
3297 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3298 ; CHECK-NEXT:    ret half [[DATA]]
3299 ;
3300   %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3301   %elt1 = extractelement <4 x half> %data, i32 0
3302   ret half %elt1
3303 }
3304
3305 declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
3306 declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
3307 declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
3308 declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
3309
3310 ; --------------------------------------------------------------------
3311 ; llvm.amdgcn.raw.ptr.tbuffer.load
3312 ; --------------------------------------------------------------------
3313
3314 define amdgpu_ps float @raw_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3315 ; CHECK-LABEL: @raw_ptr_tbuffer_load_f32(
3316 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3317 ; CHECK-NEXT:    ret float [[DATA]]
3318 ;
3319   %data = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3320   ret float %data
3321 }
3322
3323 define amdgpu_ps <2 x float> @raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3324 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v2f32(
3325 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3326 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3327 ;
3328   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3329   ret <2 x float> %data
3330 }
3331
3332 define amdgpu_ps <4 x float> @raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3333 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v4f32(
3334 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3335 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3336 ;
3337   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3338   ret <4 x float> %data
3339 }
3340
3341 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3342 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v2f32(
3343 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3344 ; CHECK-NEXT:    ret float [[DATA]]
3345 ;
3346   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3347   %elt0 = extractelement <2 x float> %data, i32 0
3348   ret float %elt0
3349 }
3350
3351 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3352 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v2f32(
3353 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3354 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3355 ; CHECK-NEXT:    ret float [[ELT1]]
3356 ;
3357   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3358   %elt1 = extractelement <2 x float> %data, i32 1
3359   ret float %elt1
3360 }
3361
3362 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3363 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f32(
3364 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3365 ; CHECK-NEXT:    ret float [[DATA]]
3366 ;
3367   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3368   %elt0 = extractelement <4 x float> %data, i32 0
3369   ret float %elt0
3370 }
3371
3372 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3373 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f32(
3374 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3375 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3376 ; CHECK-NEXT:    ret float [[ELT1]]
3377 ;
3378   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3379   %elt1 = extractelement <4 x float> %data, i32 1
3380   ret float %elt1
3381 }
3382
3383 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3384 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f32(
3385 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3386 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3387 ; CHECK-NEXT:    ret float [[ELT1]]
3388 ;
3389   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3390   %elt1 = extractelement <4 x float> %data, i32 2
3391   ret float %elt1
3392 }
3393
3394 define amdgpu_ps float @extract_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3395 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f32(
3396 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3397 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3398 ; CHECK-NEXT:    ret float [[ELT1]]
3399 ;
3400   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3401   %elt1 = extractelement <4 x float> %data, i32 3
3402   ret float %elt1
3403 }
3404
3405 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3406 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(
3407 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3408 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3409 ;
3410   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3411   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3412   ret <2 x float> %shuf
3413 }
3414
3415 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3416 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3417 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3418 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3419 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3420 ;
3421   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3422   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3423   ret <2 x float> %shuf
3424 }
3425
3426 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3427 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3428 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3429 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3430 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3431 ;
3432   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3433   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3434   ret <2 x float> %shuf
3435 }
3436
3437 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3438 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3439 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3440 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3441 ;
3442   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3443   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3444   ret <3 x float> %shuf
3445 }
3446
3447 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3448 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3449 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3450 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3451 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3452 ;
3453   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3454   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3455   ret <3 x float> %shuf
3456 }
3457
3458 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3459 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3460 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3461 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3462 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3463 ;
3464   %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3465   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3466   ret <3 x float> %shuf
3467 }
3468
3469 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3470 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v3f32(
3471 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3472 ; CHECK-NEXT:    ret float [[DATA]]
3473 ;
3474   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3475   %elt0 = extractelement <3 x float> %data, i32 0
3476   ret float %elt0
3477 }
3478
3479 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3480 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v3f32(
3481 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3482 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3483 ; CHECK-NEXT:    ret float [[ELT1]]
3484 ;
3485   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3486   %elt1 = extractelement <3 x float> %data, i32 1
3487   ret float %elt1
3488 }
3489
3490 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3491 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v3f32(
3492 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3493 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3494 ; CHECK-NEXT:    ret float [[ELT1]]
3495 ;
3496   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3497   %elt1 = extractelement <3 x float> %data, i32 2
3498   ret float %elt1
3499 }
3500
3501 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3502 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(
3503 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3504 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3505 ;
3506   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3507   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3508   ret <2 x float> %shuf
3509 }
3510
3511 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3512 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(
3513 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3514 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3515 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3516 ;
3517   %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3518   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3519   ret <2 x float> %shuf
3520 }
3521
3522 define i32 @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3523 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(
3524 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3525 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3526 ; CHECK-NEXT:    ret i32 [[VAR2]]
3527 ;
3528   %var = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3529   %var1 = bitcast <4 x float> %var to <4 x i32>
3530   %var2 = extractelement <4 x i32> %var1, i32 0
3531   ret i32 %var2
3532 }
3533
3534 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3535 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(
3536 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3537 ; CHECK-NEXT:    ret float [[DATA]]
3538 ;
3539   %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3540   %elt0 = extractelement <2 x float> %data, i32 0
3541   ret float %elt0
3542 }
3543
3544 declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3545 declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3546 declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3547 declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3548
3549 declare <4 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3550
3551 define amdgpu_ps half @extract_elt3_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3552 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f16(
3553 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3554 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3555 ; CHECK-NEXT:    ret half [[ELT1]]
3556 ;
3557   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3558   %elt1 = extractelement <4 x half> %data, i32 3
3559   ret half %elt1
3560 }
3561
3562 define amdgpu_ps half @extract_elt2_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3563 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f16(
3564 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3565 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3566 ; CHECK-NEXT:    ret half [[ELT1]]
3567 ;
3568   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3569   %elt1 = extractelement <4 x half> %data, i32 2
3570   ret half %elt1
3571 }
3572
3573 define amdgpu_ps half @extract_elt1_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3574 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f16(
3575 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3576 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3577 ; CHECK-NEXT:    ret half [[ELT1]]
3578 ;
3579   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3580   %elt1 = extractelement <4 x half> %data, i32 1
3581   ret half %elt1
3582 }
3583
3584 define amdgpu_ps half @extract_elt0_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3585 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f16(
3586 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3587 ; CHECK-NEXT:    ret half [[DATA]]
3588 ;
3589   %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3590   %elt1 = extractelement <4 x half> %data, i32 0
3591   ret half %elt1
3592 }
3593
3594 declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
3595 declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
3596 declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
3597 declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
3598
3599 ; --------------------------------------------------------------------
3600 ; llvm.amdgcn.struct.tbuffer.load
3601 ; --------------------------------------------------------------------
3602
3603 define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3604 ; CHECK-LABEL: @struct_tbuffer_load_f32(
3605 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3606 ; CHECK-NEXT:    ret float [[DATA]]
3607 ;
3608   %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3609   ret float %data
3610 }
3611
3612 define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3613 ; CHECK-LABEL: @struct_tbuffer_load_v2f32(
3614 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3615 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3616 ;
3617   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3618   ret <2 x float> %data
3619 }
3620
3621 define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3622 ; CHECK-LABEL: @struct_tbuffer_load_v4f32(
3623 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3624 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3625 ;
3626   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3627   ret <4 x float> %data
3628 }
3629
3630 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3631 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32(
3632 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3633 ; CHECK-NEXT:    ret float [[DATA]]
3634 ;
3635   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3636   %elt0 = extractelement <2 x float> %data, i32 0
3637   ret float %elt0
3638 }
3639
3640 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3641 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32(
3642 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3643 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3644 ; CHECK-NEXT:    ret float [[ELT1]]
3645 ;
3646   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3647   %elt1 = extractelement <2 x float> %data, i32 1
3648   ret float %elt1
3649 }
3650
3651 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3652 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32(
3653 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3654 ; CHECK-NEXT:    ret float [[DATA]]
3655 ;
3656   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3657   %elt0 = extractelement <4 x float> %data, i32 0
3658   ret float %elt0
3659 }
3660
3661 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3662 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32(
3663 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3664 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3665 ; CHECK-NEXT:    ret float [[ELT1]]
3666 ;
3667   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3668   %elt1 = extractelement <4 x float> %data, i32 1
3669   ret float %elt1
3670 }
3671
3672 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3673 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32(
3674 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3675 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3676 ; CHECK-NEXT:    ret float [[ELT1]]
3677 ;
3678   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3679   %elt1 = extractelement <4 x float> %data, i32 2
3680   ret float %elt1
3681 }
3682
3683 define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3684 ; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32(
3685 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3686 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3687 ; CHECK-NEXT:    ret float [[ELT1]]
3688 ;
3689   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3690   %elt1 = extractelement <4 x float> %data, i32 3
3691   ret float %elt1
3692 }
3693
3694 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3695 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32(
3696 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3697 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3698 ;
3699   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3700   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3701   ret <2 x float> %shuf
3702 }
3703
3704 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3705 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
3706 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3707 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3708 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3709 ;
3710   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3711   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3712   ret <2 x float> %shuf
3713 }
3714
3715 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3716 ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32(
3717 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3718 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3719 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3720 ;
3721   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3722   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3723   ret <2 x float> %shuf
3724 }
3725
3726 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3727 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(
3728 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3729 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3730 ;
3731   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3732   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3733   ret <3 x float> %shuf
3734 }
3735
3736 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3737 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(
3738 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3739 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3740 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3741 ;
3742   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3743   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3744   ret <3 x float> %shuf
3745 }
3746
3747 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3748 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(
3749 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3750 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3751 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3752 ;
3753   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3754   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3755   ret <3 x float> %shuf
3756 }
3757
3758 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3759 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32(
3760 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3761 ; CHECK-NEXT:    ret float [[DATA]]
3762 ;
3763   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3764   %elt0 = extractelement <3 x float> %data, i32 0
3765   ret float %elt0
3766 }
3767
3768 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3769 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32(
3770 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3771 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3772 ; CHECK-NEXT:    ret float [[ELT1]]
3773 ;
3774   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3775   %elt1 = extractelement <3 x float> %data, i32 1
3776   ret float %elt1
3777 }
3778
3779 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3780 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32(
3781 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3782 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3783 ; CHECK-NEXT:    ret float [[ELT1]]
3784 ;
3785   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3786   %elt1 = extractelement <3 x float> %data, i32 2
3787   ret float %elt1
3788 }
3789
3790 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3791 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32(
3792 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3793 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3794 ;
3795   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3796   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3797   ret <2 x float> %shuf
3798 }
3799
3800 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3801 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32(
3802 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3803 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3804 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3805 ;
3806   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3807   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3808   ret <2 x float> %shuf
3809 }
3810
3811 define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3812 ; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32(
3813 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3814 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3815 ; CHECK-NEXT:    ret i32 [[VAR2]]
3816 ;
3817   %var = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3818   %var1 = bitcast <4 x float> %var to <4 x i32>
3819   %var2 = extractelement <4 x i32> %var1, i32 0
3820   ret i32 %var2
3821 }
3822
3823 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3824 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(
3825 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
3826 ; CHECK-NEXT:    ret float [[DATA]]
3827 ;
3828   %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
3829   %elt0 = extractelement <2 x float> %data, i32 0
3830   ret float %elt0
3831 }
3832
3833 declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3834 declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3835 declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3836 declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3837
3838 declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1
3839
3840 ; --------------------------------------------------------------------
3841 ; llvm.amdgcn.struct.ptr.tbuffer.load
3842 ; --------------------------------------------------------------------
3843
3844 define amdgpu_ps float @struct_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3845 ; CHECK-LABEL: @struct_ptr_tbuffer_load_f32(
3846 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3847 ; CHECK-NEXT:    ret float [[DATA]]
3848 ;
3849   %data = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3850   ret float %data
3851 }
3852
3853 define amdgpu_ps <2 x float> @struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3854 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v2f32(
3855 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3856 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3857 ;
3858   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3859   ret <2 x float> %data
3860 }
3861
3862 define amdgpu_ps <4 x float> @struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3863 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v4f32(
3864 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3865 ; CHECK-NEXT:    ret <4 x float> [[DATA]]
3866 ;
3867   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3868   ret <4 x float> %data
3869 }
3870
3871 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3872 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v2f32(
3873 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3874 ; CHECK-NEXT:    ret float [[DATA]]
3875 ;
3876   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3877   %elt0 = extractelement <2 x float> %data, i32 0
3878   ret float %elt0
3879 }
3880
3881 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3882 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v2f32(
3883 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3884 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3885 ; CHECK-NEXT:    ret float [[ELT1]]
3886 ;
3887   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3888   %elt1 = extractelement <2 x float> %data, i32 1
3889   ret float %elt1
3890 }
3891
3892 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3893 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v4f32(
3894 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3895 ; CHECK-NEXT:    ret float [[DATA]]
3896 ;
3897   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3898   %elt0 = extractelement <4 x float> %data, i32 0
3899   ret float %elt0
3900 }
3901
3902 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3903 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v4f32(
3904 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3905 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3906 ; CHECK-NEXT:    ret float [[ELT1]]
3907 ;
3908   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3909   %elt1 = extractelement <4 x float> %data, i32 1
3910   ret float %elt1
3911 }
3912
3913 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3914 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v4f32(
3915 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3916 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3917 ; CHECK-NEXT:    ret float [[ELT1]]
3918 ;
3919   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3920   %elt1 = extractelement <4 x float> %data, i32 2
3921   ret float %elt1
3922 }
3923
3924 define amdgpu_ps float @extract_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3925 ; CHECK-LABEL: @extract_elt3_struct_ptr_tbuffer_load_v4f32(
3926 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3927 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3928 ; CHECK-NEXT:    ret float [[ELT1]]
3929 ;
3930   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3931   %elt1 = extractelement <4 x float> %data, i32 3
3932   ret float %elt1
3933 }
3934
3935 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3936 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(
3937 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3938 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
3939 ;
3940   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3941   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3942   ret <2 x float> %shuf
3943 }
3944
3945 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3946 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3947 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3948 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3949 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3950 ;
3951   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3952   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3953   ret <2 x float> %shuf
3954 }
3955
3956 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3957 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3958 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3959 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3960 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
3961 ;
3962   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3963   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3964   ret <2 x float> %shuf
3965 }
3966
3967 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3968 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3969 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3970 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
3971 ;
3972   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3973   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3974   ret <3 x float> %shuf
3975 }
3976
3977 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3978 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3979 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3980 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3981 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3982 ;
3983   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3984   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3985   ret <3 x float> %shuf
3986 }
3987
3988 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3989 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3990 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3991 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3992 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
3993 ;
3994   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3995   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3996   ret <3 x float> %shuf
3997 }
3998
3999 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4000 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v3f32(
4001 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4002 ; CHECK-NEXT:    ret float [[DATA]]
4003 ;
4004   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4005   %elt0 = extractelement <3 x float> %data, i32 0
4006   ret float %elt0
4007 }
4008
4009 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4010 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v3f32(
4011 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4012 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
4013 ; CHECK-NEXT:    ret float [[ELT1]]
4014 ;
4015   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4016   %elt1 = extractelement <3 x float> %data, i32 1
4017   ret float %elt1
4018 }
4019
4020 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4021 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v3f32(
4022 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4023 ; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
4024 ; CHECK-NEXT:    ret float [[ELT1]]
4025 ;
4026   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4027   %elt1 = extractelement <3 x float> %data, i32 2
4028   ret float %elt1
4029 }
4030
4031 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4032 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(
4033 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4034 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4035 ;
4036   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4037   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
4038   ret <2 x float> %shuf
4039 }
4040
4041 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4042 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(
4043 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4044 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
4045 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
4046 ;
4047   %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4048   %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
4049   ret <2 x float> %shuf
4050 }
4051
4052 define i32 @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4053 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(
4054 ; CHECK-NEXT:    [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4055 ; CHECK-NEXT:    [[VAR2:%.*]] = bitcast float [[VAR]] to i32
4056 ; CHECK-NEXT:    ret i32 [[VAR2]]
4057 ;
4058   %var = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4059   %var1 = bitcast <4 x float> %var to <4 x i32>
4060   %var2 = extractelement <4 x i32> %var1, i32 0
4061   ret i32 %var2
4062 }
4063
4064 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4065 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(
4066 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
4067 ; CHECK-NEXT:    ret float [[DATA]]
4068 ;
4069   %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
4070   %elt0 = extractelement <2 x float> %data, i32 0
4071   ret float %elt0
4072 }
4073
4074 declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4075 declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4076 declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4077 declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4078
4079 declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4080
4081 ; --------------------------------------------------------------------
4082 ; llvm.amdgcn.image.sample
4083 ; --------------------------------------------------------------------
4084
4085 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4086 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
4087 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4088 ; CHECK-NEXT:    ret float [[DATA]]
4089 ;
4090   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4091   %elt0 = extractelement <4 x float> %data, i32 0
4092   ret float %elt0
4093 }
4094
4095 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4096 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4097 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe(
4098 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0)
4099 ; CHECK-NEXT:    [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4100 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4101 ; CHECK-NEXT:    ret float [[ELT0]]
4102 ;
4103   %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
4104   %data.vec = extractvalue {<4 x float>,i32} %data, 0
4105   %elt0 = extractelement <4 x float> %data.vec, i32 0
4106   ret float %elt0
4107 }
4108
4109 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4110 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4111 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe(
4112 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0)
4113 ; CHECK-NEXT:    [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4114 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4115 ; CHECK-NEXT:    ret float [[ELT0]]
4116 ;
4117   %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
4118   %data.vec = extractvalue {<4 x float>,i32} %data, 0
4119   %elt0 = extractelement <4 x float> %data.vec, i32 0
4120   ret float %elt0
4121 }
4122
4123 define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4124 ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
4125 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4126 ; CHECK-NEXT:    ret float [[DATA]]
4127 ;
4128   %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4129   %elt0 = extractelement <4 x float> %data, i32 0
4130   ret float %elt0
4131 }
4132
4133 define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4134 ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
4135 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4136 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
4137 ; CHECK-NEXT:    ret float [[ELT0]]
4138 ;
4139   %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4140   %elt0 = extractelement <4 x float> %data, i32 0
4141   ret float %elt0
4142 }
4143
4144 define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4145 ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
4146 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4147 ; CHECK-NEXT:    ret float [[DATA]]
4148 ;
4149   %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4150   %elt0 = extractelement <4 x float> %data, i32 0
4151   ret float %elt0
4152 }
4153
4154 define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4155 ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
4156 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4157 ; CHECK-NEXT:    ret float [[DATA]]
4158 ;
4159   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4160   %elt0 = extractelement <4 x float> %data, i32 0
4161   ret float %elt0
4162 }
4163
4164 define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4165 ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
4166 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4167 ; CHECK-NEXT:    ret float [[DATA]]
4168 ;
4169   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4170   %elt0 = extractelement <4 x float> %data, i32 0
4171   ret float %elt0
4172 }
4173
4174 define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4175 ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
4176 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4177 ; CHECK-NEXT:    ret float [[DATA]]
4178 ;
4179   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4180   %elt0 = extractelement <4 x float> %data, i32 0
4181   ret float %elt0
4182 }
4183
4184 define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4185 ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
4186 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4187 ; CHECK-NEXT:    ret float [[DATA]]
4188 ;
4189   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4190   %elt0 = extractelement <4 x float> %data, i32 0
4191   ret float %elt0
4192 }
4193
4194 define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4195 ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
4196 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4197 ; CHECK-NEXT:    ret float [[DATA]]
4198 ;
4199   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4200   %elt0 = extractelement <4 x float> %data, i32 0
4201   ret float %elt0
4202 }
4203
4204 define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4205 ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
4206 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4207 ; CHECK-NEXT:    ret float [[DATA]]
4208 ;
4209   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4210   %elt0 = extractelement <4 x float> %data, i32 0
4211   ret float %elt0
4212 }
4213
4214 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4215 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
4216 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4217 ; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0
4218 ; CHECK-NEXT:    ret <2 x float> [[SHUF]]
4219 ;
4220   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4221   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4222   ret <2 x float> %shuf
4223 }
4224
4225 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4226 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
4227 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4228 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4229 ;
4230   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4231   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4232   ret <2 x float> %shuf
4233 }
4234
4235 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4236 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
4237 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4238 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4239 ;
4240   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4241   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4242   ret <2 x float> %shuf
4243 }
4244
4245 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4246 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
4247 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4248 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4249 ;
4250   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4251   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4252   ret <2 x float> %shuf
4253 }
4254
4255 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4256 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
4257 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4258 ; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0
4259 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4260 ;
4261   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4262   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4263   ret <3 x float> %shuf
4264 }
4265
4266 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4267 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
4268 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4269 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4270 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4271 ;
4272   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4273   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4274   ret <3 x float> %shuf
4275 }
4276
4277 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4278 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
4279 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4280 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4281 ; CHECK-NEXT:    ret <3 x float> [[SHUF]]
4282 ;
4283   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4284   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4285   ret <3 x float> %shuf
4286 }
4287
4288 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4289 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
4290 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4291 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4292 ;
4293   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4294   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4295   ret <3 x float> %shuf
4296 }
4297
4298 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4299 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
4300 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4301 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4302 ;
4303   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4304   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4305   ret <3 x float> %shuf
4306 }
4307
4308 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4309 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4310 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4311 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4312 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4313
4314 ; --------------------------------------------------------------------
4315 ; llvm.amdgcn.image.sample.cl
4316 ; --------------------------------------------------------------------
4317
4318 define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4319 ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
4320 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4321 ; CHECK-NEXT:    ret float [[DATA]]
4322 ;
4323   %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4324   %elt0 = extractelement <4 x float> %data, i32 1
4325   ret float %elt0
4326 }
4327
4328 declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4329
4330 ; --------------------------------------------------------------------
4331 ; llvm.amdgcn.image.sample.d
4332 ; --------------------------------------------------------------------
4333
4334 define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4335 ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
4336 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4337 ; CHECK-NEXT:    ret float [[DATA]]
4338 ;
4339   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4340   %elt0 = extractelement <4 x float> %data, i32 2
4341   ret float %elt0
4342 }
4343
4344 declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4345
4346 ; --------------------------------------------------------------------
4347 ; llvm.amdgcn.image.sample.d.cl
4348 ; --------------------------------------------------------------------
4349
4350 define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4351 ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
4352 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4353 ; CHECK-NEXT:    ret float [[DATA]]
4354 ;
4355   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4356   %elt0 = extractelement <4 x float> %data, i32 3
4357   ret float %elt0
4358 }
4359
4360 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4361
4362 ; --------------------------------------------------------------------
4363 ; llvm.amdgcn.image.sample.l
4364 ; --------------------------------------------------------------------
4365
4366 define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4367 ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
4368 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4369 ; CHECK-NEXT:    ret float [[DATA]]
4370 ;
4371   %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4372   %elt0 = extractelement <2 x float> %data, i32 1
4373   ret float %elt0
4374 }
4375
4376 declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4377
4378 ; --------------------------------------------------------------------
4379 ; llvm.amdgcn.image.sample.b
4380 ; --------------------------------------------------------------------
4381
4382 define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4383 ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
4384 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4385 ; CHECK-NEXT:    ret float [[DATA]]
4386 ;
4387   %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4388   %elt0 = extractelement <4 x float> %data, i32 1
4389   ret float %elt0
4390 }
4391
4392 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4393
4394 ; --------------------------------------------------------------------
4395 ; llvm.amdgcn.image.sample.b.cl
4396 ; --------------------------------------------------------------------
4397
4398 define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4399 ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
4400 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4401 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4402 ;
4403   %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4404   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
4405   ret <2 x float> %shuf
4406 }
4407
4408 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4409
4410 ; --------------------------------------------------------------------
4411 ; llvm.amdgcn.image.sample.lz
4412 ; --------------------------------------------------------------------
4413
4414 define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4415 ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
4416 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4417 ; CHECK-NEXT:    ret <2 x float> [[DATA]]
4418 ;
4419   %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4420   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 3>
4421   ret <2 x float> %shuf
4422 }
4423
4424 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4425
4426 ; --------------------------------------------------------------------
4427 ; llvm.amdgcn.image.sample.cd
4428 ; --------------------------------------------------------------------
4429
4430 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4431 ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
4432 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4433 ; CHECK-NEXT:    ret <3 x float> [[DATA]]
4434 ;
4435   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4436   %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
4437   ret <3 x float> %shuf
4438 }
4439
4440 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4441
4442 ; --------------------------------------------------------------------
4443 ; llvm.amdgcn.image.sample.cd.cl
4444 ; --------------------------------------------------------------------
4445
4446 define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4447 ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(
4448 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4449 ; CHECK-NEXT:    ret half [[DATA]]
4450 ;
4451   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4452   %elt0 = extractelement <4 x half> %data, i32 3
4453   ret half %elt0
4454 }
4455
4456 define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4457 ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(
4458 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4459 ; CHECK-NEXT:    ret half [[DATA]]
4460 ;
4461   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4462   %elt0 = extractelement <4 x half> %data, i32 2
4463   ret half %elt0
4464 }
4465
4466 define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4467 ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(
4468 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4469 ; CHECK-NEXT:    ret half [[DATA]]
4470 ;
4471   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4472   %elt0 = extractelement <4 x half> %data, i32 1
4473   ret half %elt0
4474 }
4475
4476 define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4477 ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
4478 ; CHECK-NEXT:    [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4479 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4480 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4481 ;
4482   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4483   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
4484   ret <4 x half> %res
4485 }
4486
4487 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4488 ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
4489 ; CHECK-NEXT:    [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4490 ; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
4491 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4492 ;
4493   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4494   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
4495   ret <4 x half> %res
4496 }
4497
4498 define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4499 ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(
4500 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4501 ; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0
4502 ; CHECK-NEXT:    ret <4 x half> [[RES]]
4503 ;
4504   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4505   %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
4506   ret <4 x half> %res
4507 }
4508
4509 define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4510 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
4511 ; CHECK-NEXT:    [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4512 ; CHECK-NEXT:    ret half [[DATA]]
4513 ;
4514   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4515   %elt0 = extractelement <4 x half> %data, i32 0
4516   ret half %elt0
4517 }
4518
4519 declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4520
4521 ; --------------------------------------------------------------------
4522 ; llvm.amdgcn.image.sample.c
4523 ; --------------------------------------------------------------------
4524
4525 define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4526 ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
4527 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4528 ; CHECK-NEXT:    ret float [[DATA]]
4529 ;
4530   %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4531   %elt0 = extractelement <4 x float> %data, i32 0
4532   ret float %elt0
4533 }
4534
4535 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4536
4537 ; --------------------------------------------------------------------
4538 ; llvm.amdgcn.image.sample.c.cl
4539 ; --------------------------------------------------------------------
4540
4541 define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4542 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
4543 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4544 ; CHECK-NEXT:    ret float [[DATA]]
4545 ;
4546   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4547   %elt0 = extractelement <4 x float> %data, i32 0
4548   ret float %elt0
4549 }
4550
4551 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4552
4553 ; --------------------------------------------------------------------
4554 ; llvm.amdgcn.image.sample.c.d
4555 ; --------------------------------------------------------------------
4556
4557 define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4558 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
4559 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4560 ; CHECK-NEXT:    ret float [[DATA]]
4561 ;
4562   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4563   %elt0 = extractelement <4 x float> %data, i32 0
4564   ret float %elt0
4565 }
4566
4567 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4568
4569 ; --------------------------------------------------------------------
4570 ; llvm.amdgcn.image.sample.c.d.cl
4571 ; --------------------------------------------------------------------
4572
4573 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4574 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
4575 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4576 ; CHECK-NEXT:    ret float [[DATA]]
4577 ;
4578   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4579   %elt0 = extractelement <4 x float> %data, i32 0
4580   ret float %elt0
4581 }
4582
4583 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4584
4585 ; --------------------------------------------------------------------
4586 ; llvm.amdgcn.image.sample.c.l
4587 ; --------------------------------------------------------------------
4588
4589 define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4590 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
4591 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4592 ; CHECK-NEXT:    ret float [[DATA]]
4593 ;
4594   %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4595   %elt0 = extractelement <4 x float> %data, i32 0
4596   ret float %elt0
4597 }
4598
4599 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4600
4601 ; --------------------------------------------------------------------
4602 ; llvm.amdgcn.image.sample.c.b
4603 ; --------------------------------------------------------------------
4604
4605 define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4606 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
4607 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4608 ; CHECK-NEXT:    ret float [[DATA]]
4609 ;
4610   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4611   %elt0 = extractelement <4 x float> %data, i32 0
4612   ret float %elt0
4613 }
4614
4615 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4616
4617 ; --------------------------------------------------------------------
4618 ; llvm.amdgcn.image.sample.c.b.cl
4619 ; --------------------------------------------------------------------
4620
4621 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4622 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
4623 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4624 ; CHECK-NEXT:    ret float [[DATA]]
4625 ;
4626   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4627   %elt0 = extractelement <4 x float> %data, i32 0
4628   ret float %elt0
4629 }
4630
4631 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4632
4633 ; --------------------------------------------------------------------
4634 ; llvm.amdgcn.image.sample.c.lz
4635 ; --------------------------------------------------------------------
4636
4637 define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4638 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
4639 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4640 ; CHECK-NEXT:    ret float [[DATA]]
4641 ;
4642   %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4643   %elt0 = extractelement <4 x float> %data, i32 0
4644   ret float %elt0
4645 }
4646
4647 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4648
4649 ; --------------------------------------------------------------------
4650 ; llvm.amdgcn.image.sample.c.cd
4651 ; --------------------------------------------------------------------
4652
4653 define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4654 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
4655 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4656 ; CHECK-NEXT:    ret float [[DATA]]
4657 ;
4658   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4659   %elt0 = extractelement <4 x float> %data, i32 0
4660   ret float %elt0
4661 }
4662
4663 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4664
4665 ; --------------------------------------------------------------------
4666 ; llvm.amdgcn.image.sample.c.cd.cl
4667 ; --------------------------------------------------------------------
4668
4669 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4670 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
4671 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4672 ; CHECK-NEXT:    ret float [[DATA]]
4673 ;
4674   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4675   %elt0 = extractelement <4 x float> %data, i32 0
4676   ret float %elt0
4677 }
4678
4679 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4680
4681 ; --------------------------------------------------------------------
4682 ; llvm.amdgcn.image.sample.o
4683 ; --------------------------------------------------------------------
4684
4685 define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4686 ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
4687 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4688 ; CHECK-NEXT:    ret float [[DATA]]
4689 ;
4690   %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4691   %elt0 = extractelement <4 x float> %data, i32 0
4692   ret float %elt0
4693 }
4694
4695 declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4696
4697 ; --------------------------------------------------------------------
4698 ; llvm.amdgcn.image.sample.cl.o
4699 ; --------------------------------------------------------------------
4700
4701 define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4702 ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
4703 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4704 ; CHECK-NEXT:    ret float [[DATA]]
4705 ;
4706   %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4707   %elt0 = extractelement <4 x float> %data, i32 0
4708   ret float %elt0
4709 }
4710
4711 declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4712
4713 ; --------------------------------------------------------------------
4714 ; llvm.amdgcn.image.sample.d.o
4715 ; --------------------------------------------------------------------
4716
4717 define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4718 ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
4719 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4720 ; CHECK-NEXT:    ret float [[DATA]]
4721 ;
4722   %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4723   %elt0 = extractelement <4 x float> %data, i32 0
4724   ret float %elt0
4725 }
4726
4727 declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4728
4729 ; --------------------------------------------------------------------
4730 ; llvm.amdgcn.image.sample.d.cl.o
4731 ; --------------------------------------------------------------------
4732
4733 define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4734 ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
4735 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4736 ; CHECK-NEXT:    ret float [[DATA]]
4737 ;
4738   %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4739   %elt0 = extractelement <4 x float> %data, i32 0
4740   ret float %elt0
4741 }
4742
4743 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4744
4745 ; --------------------------------------------------------------------
4746 ; llvm.amdgcn.image.sample.l.o
4747 ; --------------------------------------------------------------------
4748
4749 define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4750 ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
4751 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4752 ; CHECK-NEXT:    ret float [[DATA]]
4753 ;
4754   %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4755   %elt0 = extractelement <4 x float> %data, i32 0
4756   ret float %elt0
4757 }
4758
4759 declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4760
4761 ; --------------------------------------------------------------------
4762 ; llvm.amdgcn.image.sample.b.o
4763 ; --------------------------------------------------------------------
4764
4765 define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4766 ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
4767 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4768 ; CHECK-NEXT:    ret float [[DATA]]
4769 ;
4770   %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4771   %elt0 = extractelement <4 x float> %data, i32 0
4772   ret float %elt0
4773 }
4774
4775 declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4776
4777 ; --------------------------------------------------------------------
4778 ; llvm.amdgcn.image.sample.b.cl.o
4779 ; --------------------------------------------------------------------
4780
4781 define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4782 ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
4783 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4784 ; CHECK-NEXT:    ret float [[DATA]]
4785 ;
4786   %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4787   %elt0 = extractelement <4 x float> %data, i32 0
4788   ret float %elt0
4789 }
4790
4791 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4792
4793 ; --------------------------------------------------------------------
4794 ; llvm.amdgcn.image.sample.lz.o
4795 ; --------------------------------------------------------------------
4796
4797 define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4798 ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
4799 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4800 ; CHECK-NEXT:    ret float [[DATA]]
4801 ;
4802   %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4803   %elt0 = extractelement <4 x float> %data, i32 0
4804   ret float %elt0
4805 }
4806
4807 declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4808
4809 ; --------------------------------------------------------------------
4810 ; llvm.amdgcn.image.sample.cd.o
4811 ; --------------------------------------------------------------------
4812
4813 define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4814 ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
4815 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4816 ; CHECK-NEXT:    ret float [[DATA]]
4817 ;
4818   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4819   %elt0 = extractelement <4 x float> %data, i32 0
4820   ret float %elt0
4821 }
4822
4823 declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4824
4825 ; --------------------------------------------------------------------
4826 ; llvm.amdgcn.image.sample.cd.cl.o
4827 ; --------------------------------------------------------------------
4828
4829 define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4830 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
4831 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4832 ; CHECK-NEXT:    ret float [[DATA]]
4833 ;
4834   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4835   %elt0 = extractelement <4 x float> %data, i32 0
4836   ret float %elt0
4837 }
4838
4839 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4840
4841 ; --------------------------------------------------------------------
4842 ; llvm.amdgcn.image.sample.c.o
4843 ; --------------------------------------------------------------------
4844
4845 define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4846 ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
4847 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4848 ; CHECK-NEXT:    ret float [[DATA]]
4849 ;
4850   %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4851   %elt0 = extractelement <4 x float> %data, i32 0
4852   ret float %elt0
4853 }
4854
4855 declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4856
4857 ; --------------------------------------------------------------------
4858 ; llvm.amdgcn.image.sample.c.cl.o
4859 ; --------------------------------------------------------------------
4860
4861 define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4862 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
4863 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4864 ; CHECK-NEXT:    ret float [[DATA]]
4865 ;
4866   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4867   %elt0 = extractelement <4 x float> %data, i32 0
4868   ret float %elt0
4869 }
4870
4871 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4872
4873 ; --------------------------------------------------------------------
4874 ; llvm.amdgcn.image.sample.c.d.o
4875 ; --------------------------------------------------------------------
4876
4877 define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4878 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
4879 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4880 ; CHECK-NEXT:    ret float [[DATA]]
4881 ;
4882   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4883   %elt0 = extractelement <4 x float> %data, i32 0
4884   ret float %elt0
4885 }
4886
4887 declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4888
4889 ; --------------------------------------------------------------------
4890 ; llvm.amdgcn.image.sample.c.d.cl.o
4891 ; --------------------------------------------------------------------
4892
4893 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4894 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
4895 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4896 ; CHECK-NEXT:    ret float [[DATA]]
4897 ;
4898   %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4899   %elt0 = extractelement <4 x float> %data, i32 0
4900   ret float %elt0
4901 }
4902
4903 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4904
4905 ; --------------------------------------------------------------------
4906 ; llvm.amdgcn.image.sample.c.l.o
4907 ; --------------------------------------------------------------------
4908
4909 define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4910 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
4911 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4912 ; CHECK-NEXT:    ret float [[DATA]]
4913 ;
4914   %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4915   %elt0 = extractelement <4 x float> %data, i32 0
4916   ret float %elt0
4917 }
4918
4919 declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4920
4921 ; --------------------------------------------------------------------
4922 ; llvm.amdgcn.image.sample.c.b.o
4923 ; --------------------------------------------------------------------
4924
4925 define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4926 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
4927 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4928 ; CHECK-NEXT:    ret float [[DATA]]
4929 ;
4930   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4931   %elt0 = extractelement <4 x float> %data, i32 0
4932   ret float %elt0
4933 }
4934
4935 declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4936
4937 ; --------------------------------------------------------------------
4938 ; llvm.amdgcn.image.sample.c.b.cl.o
4939 ; --------------------------------------------------------------------
4940
4941 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4942 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
4943 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4944 ; CHECK-NEXT:    ret float [[DATA]]
4945 ;
4946   %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4947   %elt0 = extractelement <4 x float> %data, i32 0
4948   ret float %elt0
4949 }
4950
4951 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4952
4953 ; --------------------------------------------------------------------
4954 ; llvm.amdgcn.image.sample.c.lz.o
4955 ; --------------------------------------------------------------------
4956
4957 define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4958 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
4959 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4960 ; CHECK-NEXT:    ret float [[DATA]]
4961 ;
4962   %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4963   %elt0 = extractelement <4 x float> %data, i32 0
4964   ret float %elt0
4965 }
4966
4967 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4968
4969 ; --------------------------------------------------------------------
4970 ; llvm.amdgcn.image.sample.c.cd.o
4971 ; --------------------------------------------------------------------
4972
4973 define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4974 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
4975 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4976 ; CHECK-NEXT:    ret float [[DATA]]
4977 ;
4978   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4979   %elt0 = extractelement <4 x float> %data, i32 0
4980   ret float %elt0
4981 }
4982
4983 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4984
4985 ; --------------------------------------------------------------------
4986 ; llvm.amdgcn.image.sample.c.cd.cl.o
4987 ; --------------------------------------------------------------------
4988
4989 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4990 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
4991 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4992 ; CHECK-NEXT:    ret float [[DATA]]
4993 ;
4994   %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4995   %elt0 = extractelement <4 x float> %data, i32 0
4996   ret float %elt0
4997 }
4998
4999 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5000
5001 ; --------------------------------------------------------------------
5002 ; llvm.amdgcn.image.gather4
5003 ; --------------------------------------------------------------------
5004
5005 ; Don't handle gather4*
5006
5007 define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5008 ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
5009 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5010 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5011 ; CHECK-NEXT:    ret float [[ELT0]]
5012 ;
5013   %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5014   %elt0 = extractelement <4 x float> %data, i32 0
5015   ret float %elt0
5016 }
5017
5018 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5019
5020 ; --------------------------------------------------------------------
5021 ; llvm.amdgcn.image.gather4.cl
5022 ; --------------------------------------------------------------------
5023
5024 define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5025 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
5026 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5027 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5028 ; CHECK-NEXT:    ret float [[ELT0]]
5029 ;
5030   %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5031   %elt0 = extractelement <4 x float> %data, i32 0
5032   ret float %elt0
5033 }
5034
5035 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5036
5037 ; --------------------------------------------------------------------
5038 ; llvm.amdgcn.image.gather4.l
5039 ; --------------------------------------------------------------------
5040
5041 define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5042 ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
5043 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5044 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5045 ; CHECK-NEXT:    ret float [[ELT0]]
5046 ;
5047   %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5048   %elt0 = extractelement <4 x float> %data, i32 0
5049   ret float %elt0
5050 }
5051
5052 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5053
5054 ; --------------------------------------------------------------------
5055 ; llvm.amdgcn.image.gather4.b
5056 ; --------------------------------------------------------------------
5057
5058 define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5059 ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
5060 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5061 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5062 ; CHECK-NEXT:    ret float [[ELT0]]
5063 ;
5064   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5065   %elt0 = extractelement <4 x float> %data, i32 0
5066   ret float %elt0
5067 }
5068
5069 declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5070
5071 ; --------------------------------------------------------------------
5072 ; llvm.amdgcn.image.gather4.b.cl
5073 ; --------------------------------------------------------------------
5074
5075 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5076 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
5077 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5078 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5079 ; CHECK-NEXT:    ret float [[ELT0]]
5080 ;
5081   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5082   %elt0 = extractelement <4 x float> %data, i32 0
5083   ret float %elt0
5084 }
5085
5086 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5087
5088 ; --------------------------------------------------------------------
5089 ; llvm.amdgcn.image.gather4.lz
5090 ; --------------------------------------------------------------------
5091
5092 define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5093 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
5094 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5095 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5096 ; CHECK-NEXT:    ret float [[ELT0]]
5097 ;
5098   %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5099   %elt0 = extractelement <4 x float> %data, i32 0
5100   ret float %elt0
5101 }
5102
5103 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5104
5105 ; --------------------------------------------------------------------
5106 ; llvm.amdgcn.image.gather4.o
5107 ; --------------------------------------------------------------------
5108
5109 define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5110 ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
5111 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5112 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5113 ; CHECK-NEXT:    ret float [[ELT0]]
5114 ;
5115   %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5116   %elt0 = extractelement <4 x float> %data, i32 0
5117   ret float %elt0
5118 }
5119
5120 declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5121
5122 ; --------------------------------------------------------------------
5123 ; llvm.amdgcn.image.gather4.cl.o
5124 ; --------------------------------------------------------------------
5125
5126 define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5127 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
5128 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5129 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5130 ; CHECK-NEXT:    ret float [[ELT0]]
5131 ;
5132   %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5133   %elt0 = extractelement <4 x float> %data, i32 0
5134   ret float %elt0
5135 }
5136
5137 declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5138
5139 ; --------------------------------------------------------------------
5140 ; llvm.amdgcn.image.gather4.l.o
5141 ; --------------------------------------------------------------------
5142
5143 define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5144 ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
5145 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5146 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5147 ; CHECK-NEXT:    ret float [[ELT0]]
5148 ;
5149   %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5150   %elt0 = extractelement <4 x float> %data, i32 0
5151   ret float %elt0
5152 }
5153
5154 declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5155
5156 ; --------------------------------------------------------------------
5157 ; llvm.amdgcn.image.gather4.b.o
5158 ; --------------------------------------------------------------------
5159
5160 define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5161 ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
5162 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5163 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5164 ; CHECK-NEXT:    ret float [[ELT0]]
5165 ;
5166   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5167   %elt0 = extractelement <4 x float> %data, i32 0
5168   ret float %elt0
5169 }
5170
5171 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5172
5173 ; --------------------------------------------------------------------
5174 ; llvm.amdgcn.image.gather4.b.cl.o
5175 ; --------------------------------------------------------------------
5176
5177 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5178 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
5179 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5180 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5181 ; CHECK-NEXT:    ret float [[ELT0]]
5182 ;
5183   %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5184   %elt0 = extractelement <4 x float> %data, i32 0
5185   ret float %elt0
5186 }
5187
5188 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5189
5190 ; --------------------------------------------------------------------
5191 ; llvm.amdgcn.image.gather4.lz.o
5192 ; --------------------------------------------------------------------
5193
5194 define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5195 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
5196 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5197 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5198 ; CHECK-NEXT:    ret float [[ELT0]]
5199 ;
5200   %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5201   %elt0 = extractelement <4 x float> %data, i32 0
5202   ret float %elt0
5203 }
5204
5205 declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5206
5207 ; --------------------------------------------------------------------
5208 ; llvm.amdgcn.image.gather4.c.o
5209 ; --------------------------------------------------------------------
5210
5211 define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5212 ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
5213 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5214 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5215 ; CHECK-NEXT:    ret float [[ELT0]]
5216 ;
5217   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5218   %elt0 = extractelement <4 x float> %data, i32 0
5219   ret float %elt0
5220 }
5221
5222 declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5223
5224 ; --------------------------------------------------------------------
5225 ; llvm.amdgcn.image.gather4.c.cl.o
5226 ; --------------------------------------------------------------------
5227
5228 define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5229 ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
5230 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5231 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5232 ; CHECK-NEXT:    ret float [[ELT0]]
5233 ;
5234   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5235   %elt0 = extractelement <4 x float> %data, i32 0
5236   ret float %elt0
5237 }
5238
5239 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5240
5241 ; --------------------------------------------------------------------
5242 ; llvm.amdgcn.image.gather4.c.l.o
5243 ; --------------------------------------------------------------------
5244
5245 define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5246 ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
5247 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5248 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5249 ; CHECK-NEXT:    ret float [[ELT0]]
5250 ;
5251   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5252   %elt0 = extractelement <4 x float> %data, i32 0
5253   ret float %elt0
5254 }
5255
5256 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5257
5258 ; --------------------------------------------------------------------
5259 ; llvm.amdgcn.image.gather4.c.b.o
5260 ; --------------------------------------------------------------------
5261
5262 define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5263 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
5264 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5265 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5266 ; CHECK-NEXT:    ret float [[ELT0]]
5267 ;
5268   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5269   %elt0 = extractelement <4 x float> %data, i32 0
5270   ret float %elt0
5271 }
5272
5273 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5274
5275 ; --------------------------------------------------------------------
5276 ; llvm.amdgcn.image.gather4.c.b.cl.o
5277 ; --------------------------------------------------------------------
5278
5279 define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5280 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
5281 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5282 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5283 ; CHECK-NEXT:    ret float [[ELT0]]
5284 ;
5285   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5286   %elt0 = extractelement <4 x float> %data, i32 0
5287   ret float %elt0
5288 }
5289
5290 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5291
5292 ; --------------------------------------------------------------------
5293 ; llvm.amdgcn.image.gather4.c.lz.o
5294 ; --------------------------------------------------------------------
5295
5296 define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5297 ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
5298 ; CHECK-NEXT:    [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5299 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5300 ; CHECK-NEXT:    ret float [[ELT0]]
5301 ;
5302   %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5303   %elt0 = extractelement <4 x float> %data, i32 0
5304   ret float %elt0
5305 }
5306
5307 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5308
5309 ; --------------------------------------------------------------------
5310 ; llvm.amdgcn.image.getlod
5311 ; --------------------------------------------------------------------
5312
5313 define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5314 ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
5315 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5316 ; CHECK-NEXT:    ret float [[DATA]]
5317 ;
5318   %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5319   %elt0 = extractelement <4 x float> %data, i32 0
5320   ret float %elt0
5321 }
5322
5323 declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5324
5325 ; --------------------------------------------------------------------
5326 ; llvm.amdgcn.image.load
5327 ; --------------------------------------------------------------------
5328
5329 define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
5330 ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
5331 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5332 ; CHECK-NEXT:    ret float [[DATA]]
5333 ;
5334   %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
5335   %elt0 = extractelement <4 x float> %data, i32 0
5336   ret float %elt0
5337 }
5338
5339 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
5340
5341 ; --------------------------------------------------------------------
5342 ; llvm.amdgcn.image.load.mip
5343 ; --------------------------------------------------------------------
5344
5345 define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
5346 ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
5347 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5348 ; CHECK-NEXT:    ret float [[DATA]]
5349 ;
5350   %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5351   %elt0 = extractelement <4 x float> %data, i32 0
5352   ret float %elt0
5353 }
5354
5355 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1
5356
5357 ; --------------------------------------------------------------------
5358 ; llvm.amdgcn.image.getresinfo
5359 ; --------------------------------------------------------------------
5360
5361 define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
5362 ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
5363 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5364 ; CHECK-NEXT:    ret float [[DATA]]
5365 ;
5366   %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5367   %elt0 = extractelement <4 x float> %data, i32 0
5368   ret float %elt0
5369 }
5370
5371 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5372
5373 ; --------------------------------------------------------------------
5374 ; TFE / LWE
5375 ; --------------------------------------------------------------------
5376
5377 define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
5378 ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
5379 ; CHECK-NEXT:    [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1)
5380 ; CHECK-NEXT:    [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
5381 ; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0
5382 ; CHECK-NEXT:    ret float [[ELT0]]
5383 ;
5384   %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
5385   %rgba = extractvalue { <4 x float>, i32 } %data, 0
5386   %elt0 = extractelement <4 x float> %rgba, i32 0
5387   ret float %elt0
5388 }
5389
5390 declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5391
5392 define amdgpu_hs float @tfe_check_assert() #0 {
5393 ; CHECK-LABEL: @tfe_check_assert(
5394 ; CHECK-NEXT:    [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1)
5395 ; CHECK-NEXT:    ret float [[DATA]]
5396 ;
5397   %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2
5398   %elt0 = extractelement <4 x float> %data, i32 0
5399   ret float %elt0
5400 }
5401
5402 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
5403
5404 attributes #0 = { nounwind }
5405 attributes #1 = { nounwind readonly }
5406
5407 !0 = !{float 2.500000e+00}