test/CodeGen/AMDGPU/xor3.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
   3 ; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s
   4
   5 ; ===================================================================================
   6 ; V_XOR3_B32
   7 ; ===================================================================================
   8
   9 define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) {
  10 ; GFX9-LABEL: xor3:
  11 ; GFX9:       ; %bb.0:
  12 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
  13 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v2
  14 ; GFX9-NEXT:    ; return to shader part epilog
  15 ;
  16 ; GFX10-LABEL: xor3:
  17 ; GFX10:       ; %bb.0:
  18 ; GFX10-NEXT:    v_xor3_b32 v0, v0, v1, v2
  19 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
  20 ; GFX10-NEXT:    ; return to shader part epilog
  21   %x = xor i32 %a, %b
  22   %result = xor i32 %x, %c
  23   %bc = bitcast i32 %result to float
  24   ret float %bc
  25 }
  26
  27 define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
  28 ; GFX9-LABEL: xor3_vgpr_b:
  29 ; GFX9:       ; %bb.0:
  30 ; GFX9-NEXT:    v_xor_b32_e32 v0, s2, v0
  31 ; GFX9-NEXT:    v_xor_b32_e32 v0, s3, v0
  32 ; GFX9-NEXT:    ; return to shader part epilog
  33 ;
  34 ; GFX10-LABEL: xor3_vgpr_b:
  35 ; GFX10:       ; %bb.0:
  36 ; GFX10-NEXT:    v_xor3_b32 v0, s2, v0, s3
  37 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
  38 ; GFX10-NEXT:    ; return to shader part epilog
  39   %x = xor i32 %a, %b
  40   %result = xor i32 %x, %c
  41   %bc = bitcast i32 %result to float
  42   ret float %bc
  43 }
  44
  45 define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
  46 ; GFX9-LABEL: xor3_vgpr_all2:
  47 ; GFX9:       ; %bb.0:
  48 ; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v2
  49 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
  50 ; GFX9-NEXT:    ; return to shader part epilog
  51 ;
  52 ; GFX10-LABEL: xor3_vgpr_all2:
  53 ; GFX10:       ; %bb.0:
  54 ; GFX10-NEXT:    v_xor3_b32 v0, v1, v2, v0
  55 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
  56 ; GFX10-NEXT:    ; return to shader part epilog
  57   %x = xor i32 %b, %c
  58   %result = xor i32 %a, %x
  59   %bc = bitcast i32 %result to float
  60   ret float %bc
  61 }
  62
  63 define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
  64 ; GFX9-LABEL: xor3_vgpr_bc:
  65 ; GFX9:       ; %bb.0:
  66 ; GFX9-NEXT:    v_xor_b32_e32 v0, s2, v0
  67 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
  68 ; GFX9-NEXT:    ; return to shader part epilog
  69 ;
  70 ; GFX10-LABEL: xor3_vgpr_bc:
  71 ; GFX10:       ; %bb.0:
  72 ; GFX10-NEXT:    v_xor3_b32 v0, s2, v0, v1
  73 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
  74 ; GFX10-NEXT:    ; return to shader part epilog
  75   %x = xor i32 %a, %b
  76   %result = xor i32 %x, %c
  77   %bc = bitcast i32 %result to float
  78   ret float %bc
  79 }
  80
  81 define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) {
  82 ; GFX9-LABEL: xor3_vgpr_const:
  83 ; GFX9:       ; %bb.0:
  84 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
  85 ; GFX9-NEXT:    v_xor_b32_e32 v0, 16, v0
  86 ; GFX9-NEXT:    ; return to shader part epilog
  87 ;
  88 ; GFX10-LABEL: xor3_vgpr_const:
  89 ; GFX10:       ; %bb.0:
  90 ; GFX10-NEXT:    v_xor3_b32 v0, v0, v1, 16
  91 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
  92 ; GFX10-NEXT:    ; return to shader part epilog
  93   %x = xor i32 %a, %b
  94   %result = xor i32 %x, 16
  95   %bc = bitcast i32 %result to float
  96   ret float %bc
  97 }
  98
  99 define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) {
 100 ; GFX9-LABEL: xor3_multiuse_outer:
 101 ; GFX9:       ; %bb.0:
 102 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
 103 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v2
 104 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v3
 105 ; GFX9-NEXT:    ; return to shader part epilog
 106 ;
 107 ; GFX10-LABEL: xor3_multiuse_outer:
 108 ; GFX10:       ; %bb.0:
 109 ; GFX10-NEXT:    v_xor3_b32 v0, v0, v1, v2
 110 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 111 ; GFX10-NEXT:    v_mul_lo_u32 v1, v0, v3
 112 ; GFX10-NEXT:    ; return to shader part epilog
 113   %inner = xor i32 %a, %b
 114   %outer = xor i32 %inner, %c
 115   %x1 = mul i32 %outer, %x
 116   %r1 = insertelement <2 x i32> undef, i32 %outer, i32 0
 117   %r0 = insertelement <2 x i32> %r1, i32 %x1, i32 1
 118   %bc = bitcast <2 x i32> %r0 to <2 x float>
 119   ret <2 x float> %bc
 120 }
 121
 122 define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
 123 ; GFX9-LABEL: xor3_multiuse_inner:
 124 ; GFX9:       ; %bb.0:
 125 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
 126 ; GFX9-NEXT:    v_xor_b32_e32 v1, v0, v2
 127 ; GFX9-NEXT:    ; return to shader part epilog
 128 ;
 129 ; GFX10-LABEL: xor3_multiuse_inner:
 130 ; GFX10:       ; %bb.0:
 131 ; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v1
 132 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 133 ; GFX10-NEXT:    v_xor_b32_e32 v1, v0, v2
 134 ; GFX10-NEXT:    ; return to shader part epilog
 135   %inner = xor i32 %a, %b
 136   %outer = xor i32 %inner, %c
 137   %r1 = insertelement <2 x i32> undef, i32 %inner, i32 0
 138   %r0 = insertelement <2 x i32> %r1, i32 %outer, i32 1
 139   %bc = bitcast <2 x i32> %r0 to <2 x float>
 140   ret <2 x float> %bc
 141 }
 142
 143 ; A case where uniform values end up in VGPRs -- we could use v_xor3_b32 here,
 144 ; but we don't.
 145 define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) {
 146 ; GFX9-LABEL: xor3_uniform_vgpr:
 147 ; GFX9:       ; %bb.0:
 148 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40400000
 149 ; GFX9-NEXT:    v_add_f32_e64 v0, s2, 1.0
 150 ; GFX9-NEXT:    v_add_f32_e64 v1, s3, 2.0
 151 ; GFX9-NEXT:    v_add_f32_e32 v2, s4, v2
 152 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
 153 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v2
 154 ; GFX9-NEXT:    ; return to shader part epilog
 155 ;
 156 ; GFX10-LABEL: xor3_uniform_vgpr:
 157 ; GFX10:       ; %bb.0:
 158 ; GFX10-NEXT:    v_add_f32_e64 v1, s3, 2.0
 159 ; GFX10-NEXT:    v_add_f32_e64 v2, s2, 1.0
 160 ; GFX10-NEXT:    v_add_f32_e64 v0, 0x40400000, s4
 161 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 162 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 163 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 164 ; GFX10-NEXT:    ; return to shader part epilog
 165   %a1 = fadd float %a, 1.0
 166   %b2 = fadd float %b, 2.0
 167   %c3 = fadd float %c, 3.0
 168   %bc.a = bitcast float %a1 to i32
 169   %bc.b = bitcast float %b2 to i32
 170   %bc.c = bitcast float %c3 to i32
 171   %x = xor i32 %bc.a, %bc.b
 172   %result = xor i32 %x, %bc.c
 173   %bc = bitcast i32 %result to float
 174   ret float %bc
 175 }