test/CodeGen/AMDGPU/amdgcn.bitcast.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   4
   5 ; This test just checks that the compiler doesn't crash.
   6
   7 ; FUNC-LABEL: {{^}}v32i8_to_v8i32:
   8 define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
   9 entry:
  10   %1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
  11   %2 = bitcast <32 x i8> %1 to <8 x i32>
  12   %3 = extractelement <8 x i32> %2, i32 1
  13   %4 = icmp ne i32 %3, 0
  14   %5 = select i1 %4, float 0.0, float 1.0
  15   ret float %5
  16 }
  17
  18 ; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
  19 ; SI: s_endpgm
  20 define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
  21 entry:
  22   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
  23   %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
  24   store <16 x i8> %1, <16 x i8> addrspace(1)* %out
  25   ret void
  26 }
  27
  28 define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
  29   %load = load float, float addrspace(1)* %in, align 4
  30   %fadd32 = fadd float %load, 1.0
  31   %bc = bitcast float %fadd32 to <2 x i16>
  32   %add.bitcast = add <2 x i16> %bc, <i16 2, i16 2>
  33   store <2 x i16> %add.bitcast, <2 x i16> addrspace(1)* %out
  34   ret void
  35 }
  36
  37 define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
  38   %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
  39   %add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
  40   %bc = bitcast <2 x i16> %add.v2i16 to float
  41   %fadd.bitcast = fadd float %bc, 1.0
  42   store float %fadd.bitcast, float addrspace(1)* %out
  43   ret void
  44 }
  45
  46 define amdgpu_kernel void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
  47   %load = load float, float addrspace(1)* %in, align 4
  48   %fadd32 = fadd float %load, 1.0
  49   %bc = bitcast float %fadd32 to <2 x half>
  50   %add.bitcast = fadd <2 x half> %bc, <half 2.0, half 2.0>
  51   store <2 x half> %add.bitcast, <2 x half> addrspace(1)* %out
  52   ret void
  53 }
  54
  55 define amdgpu_kernel void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in) nounwind {
  56   %load = load <2 x half>, <2 x half> addrspace(1)* %in, align 4
  57   %add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
  58   %bc = bitcast <2 x half> %add.v2f16 to float
  59   %fadd.bitcast = fadd float %bc, 1.0
  60   store float %fadd.bitcast, float addrspace(1)* %out
  61   ret void
  62 }
  63
  64 define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
  65   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
  66   %bc = bitcast <4 x i8> %load to i32
  67   store i32 %bc, i32 addrspace(1)* %out, align 4
  68   ret void
  69 }
  70
  71 define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
  72   %load = load i32, i32 addrspace(1)* %in, align 4
  73   %bc = bitcast i32 %load to <4 x i8>
  74   store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
  75   ret void
  76 }
  77
  78 ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
  79 ; SI: s_endpgm
  80 define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  81   %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
  82   %add = add <2 x i32> %val, <i32 4, i32 9>
  83   %bc = bitcast <2 x i32> %add to double
  84   %fadd.bc = fadd double %bc, 1.0
  85   store double %fadd.bc, double addrspace(1)* %out, align 8
  86   ret void
  87 }
  88
  89 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
  90 ; SI: s_endpgm
  91 define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
  92   %val = load double, double addrspace(1)* %in, align 8
  93   %add = fadd double %val, 4.0
  94   %bc = bitcast double %add to <2 x i32>
  95   store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
  96   ret void
  97 }
  98
  99 ; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
 100 define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
 101 entry:
 102   %cmp0 = icmp eq i32 %cond, 0
 103   br i1 %cmp0, label %if, label %end
 104
 105 if:
 106   %cast = bitcast <2 x i64> %value to <2 x double>
 107   br label %end
 108
 109 end:
 110   %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
 111   store <2 x double> %phi, <2 x double> addrspace(1)* %out
 112   ret void
 113 }
 114
 115 ; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
 116 define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
 117 entry:
 118   %cmp0 = icmp eq i32 %cond, 0
 119   br i1 %cmp0, label %if, label %end
 120
 121 if:
 122   %cast = bitcast <2 x double> %value to <2 x i64>
 123   br label %end
 124
 125 end:
 126   %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
 127   store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
 128   ret void
 129 }
 130
 131 ; FUNC-LABEL: {{^}}v4i16_to_f64:
 132 define amdgpu_kernel void @v4i16_to_f64(double addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
 133   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
 134   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
 135   %bc = bitcast <4 x i16> %add.v4i16 to double
 136   %fadd.bitcast = fadd double %bc, 1.0
 137   store double %fadd.bitcast, double addrspace(1)* %out
 138   ret void
 139 }
 140
 141 ; FUNC-LABEL: {{^}}v4f16_to_f64:
 142 define amdgpu_kernel void @v4f16_to_f64(double addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
 143   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
 144   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
 145   %bc = bitcast <4 x half> %add.v4half to double
 146   %fadd.bitcast = fadd double %bc, 1.0
 147   store double %fadd.bitcast, double addrspace(1)* %out
 148   ret void
 149 }
 150
 151 ; FUNC-LABEL: {{^}}f64_to_v4f16:
 152 define amdgpu_kernel void @f64_to_v4f16(<4 x half> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
 153   %load = load double, double addrspace(1)* %in, align 4
 154   %fadd32 = fadd double %load, 1.0
 155   %bc = bitcast double %fadd32 to <4 x half>
 156   %add.bitcast = fadd <4 x half> %bc, <half 2.0, half 2.0, half 2.0, half 2.0>
 157   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
 158   ret void
 159 }
 160
 161 ; FUNC-LABEL: {{^}}f64_to_v4i16:
 162 define amdgpu_kernel void @f64_to_v4i16(<4 x i16> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
 163   %load = load double, double addrspace(1)* %in, align 4
 164   %fadd32 = fadd double %load, 1.0
 165   %bc = bitcast double %fadd32 to <4 x i16>
 166   %add.bitcast = add <4 x i16> %bc, <i16 2, i16 2, i16 2, i16 2>
 167   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
 168   ret void
 169 }
 170
 171 ; FUNC-LABEL: {{^}}v4i16_to_i64:
 172 define amdgpu_kernel void @v4i16_to_i64(i64 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
 173   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
 174   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
 175   %bc = bitcast <4 x i16> %add.v4i16 to i64
 176   %add.bitcast = add i64 %bc, 1
 177   store i64 %add.bitcast, i64 addrspace(1)* %out
 178   ret void
 179 }
 180
 181 ; FUNC-LABEL: {{^}}v4f16_to_i64:
 182 define amdgpu_kernel void @v4f16_to_i64(i64 addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
 183   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
 184   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
 185   %bc = bitcast <4 x half> %add.v4half to i64
 186   %add.bitcast = add i64 %bc, 1
 187   store i64 %add.bitcast, i64 addrspace(1)* %out
 188   ret void
 189 }
 190
 191 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4i16:
 192 define amdgpu_kernel void @bitcast_i64_to_v4i16(<4 x i16> addrspace(1)* %out, i64 addrspace(1)* %in) {
 193   %val = load i64, i64 addrspace(1)* %in, align 8
 194   %add = add i64 %val, 4
 195   %bc = bitcast i64 %add to <4 x i16>
 196   %add.v4i16 = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
 197   store <4 x i16> %add.v4i16, <4 x i16> addrspace(1)* %out, align 8
 198   ret void
 199 }
 200
 201 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4f16:
 202 define amdgpu_kernel void @bitcast_i64_to_v4f16(<4 x half> addrspace(1)* %out, i64 addrspace(1)* %in) {
 203   %val = load i64, i64 addrspace(1)* %in, align 8
 204   %add = add i64 %val, 4
 205   %bc = bitcast i64 %add to <4 x half>
 206   %add.v4i16 = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
 207   store <4 x half> %add.v4i16, <4 x half> addrspace(1)* %out, align 8
 208   ret void
 209 }
 210
 211 ; FUNC-LABEL: {{^}}v4i16_to_v2f32:
 212 define amdgpu_kernel void @v4i16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
 213   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
 214   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
 215   %bc = bitcast <4 x i16> %add.v4i16 to <2 x float>
 216   %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
 217   store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
 218   ret void
 219 }
 220
 221 ; FUNC-LABEL: {{^}}v4f16_to_v2f32:
 222 define amdgpu_kernel void @v4f16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
 223   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
 224   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
 225   %bc = bitcast <4 x half> %add.v4half to <2 x float>
 226   %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
 227   store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
 228   ret void
 229 }
 230
 231 ; FUNC-LABEL: {{^}}v2f32_to_v4i16:
 232 define amdgpu_kernel void @v2f32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
 233   %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
 234   %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
 235   %bc = bitcast <2 x float> %add.v2f32 to <4 x i16>
 236   %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
 237   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
 238   ret void
 239 }
 240
 241 ; FUNC-LABEL: {{^}}v2f32_to_v4f16:
 242 define amdgpu_kernel void @v2f32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
 243   %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
 244   %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
 245   %bc = bitcast <2 x float> %add.v2f32 to <4 x half>
 246   %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
 247   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
 248   ret void
 249 }
 250
 251 ; FUNC-LABEL: {{^}}v4i16_to_v2i32:
 252 define amdgpu_kernel void @v4i16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
 253   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
 254   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
 255   %bc = bitcast <4 x i16> %add.v4i16 to <2 x i32>
 256   %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
 257   store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
 258   ret void
 259 }
 260
 261 ; FUNC-LABEL: {{^}}v4f16_to_v2i32:
 262 define amdgpu_kernel void @v4f16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
 263   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
 264   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
 265   %bc = bitcast <4 x half> %add.v4half to <2 x i32>
 266   %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
 267   store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
 268   ret void
 269 }
 270
 271 ; FUNC-LABEL: {{^}}v2i32_to_v4i16:
 272 define amdgpu_kernel void @v2i32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
 273   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
 274   %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
 275   %bc = bitcast <2 x i32> %add.v2i32 to <4 x i16>
 276   %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
 277   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
 278   ret void
 279 }
 280
 281 ; FUNC-LABEL: {{^}}v2i32_to_v4f16:
 282 define amdgpu_kernel void @v2i32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
 283   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
 284   %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
 285   %bc = bitcast <2 x i32> %add.v2i32 to <4 x half>
 286   %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
 287   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
 288   ret void
 289 }