test/CodeGen/X86/vectorcall.ll

   1 ; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
   2 ; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
   3
   4 ; Test integer arguments.
   5
   6 define x86_vectorcallcc i32 @test_int_1() {
   7   ret i32 0
   8 }
   9 ; CHECK-LABEL: {{^}}test_int_1@@0:
  10 ; CHECK: xorl %eax, %eax
  11
  12 define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
  13   ret i32 %a
  14 }
  15 ; X86-LABEL: {{^}}test_int_2@@4:
  16 ; X64-LABEL: {{^}}test_int_2@@8:
  17 ; CHECK: movl %ecx, %eax
  18
  19 define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
  20   %at = trunc i64 %a to i32
  21   ret i32 %at
  22 }
  23 ; X86-LABEL: {{^}}test_int_3@@8:
  24 ; X64-LABEL: {{^}}test_int_3@@8:
  25 ; X86: movl %ecx, %eax
  26 ; X64: movq %rcx, %rax
  27
  28 define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
  29   %s = add i32 %a, %b
  30   ret i32 %s
  31 }
  32 ; X86-LABEL: {{^}}test_int_4@@8:
  33 ; X86: leal (%ecx,%edx), %eax
  34 ; X64-LABEL: {{^}}test_int_4@@16:
  35 ; X64: leal (%rcx,%rdx), %eax
  36
  37 define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
  38   ret i32 0
  39 }
  40 ; CHECK-LABEL: {{^}}test_int_5:
  41
  42 define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
  43   ret double %b
  44 }
  45 ; CHECK-LABEL: {{^}}test_fp_1@@16:
  46 ; CHECK: movaps %xmm1, %xmm0
  47
  48 define x86_vectorcallcc double @test_fp_2(
  49     double, double, double, double, double, double, double %r) {
  50   ret double %r
  51 }
  52 ; CHECK-LABEL: {{^}}test_fp_2@@56:
  53 ; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
  54
  55 define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
  56   ret {double, double, double, double}
  57         { double 0.0, double 0.0, double 0.0, double 0.0 }
  58 }
  59 ; CHECK-LABEL: {{^}}test_fp_3@@0:
  60 ; CHECK: xorps %xmm0
  61 ; CHECK: xorps %xmm1
  62 ; CHECK: xorps %xmm2
  63 ; CHECK: xorps %xmm3
  64
  65 ; FIXME: Returning via x87 isn't compatible, but its hard to structure the
  66 ; tablegen any other way.
  67 define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
  68   ret {double, double, double, double, double}
  69         { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
  70 }
  71 ; CHECK-LABEL: {{^}}test_fp_4@@0:
  72 ; CHECK: fldz
  73 ; CHECK: xorps %xmm0
  74 ; CHECK: xorps %xmm1
  75 ; CHECK: xorps %xmm2
  76 ; CHECK: xorps %xmm3
  77
  78 define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
  79   ret <16 x i8> %b
  80 }
  81 ; CHECK-LABEL: {{^}}test_vec_1@@32:
  82 ; CHECK: movaps %xmm1, %xmm0
  83
  84 define x86_vectorcallcc <16 x i8> @test_vec_2(
  85     double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
  86   ret <16 x i8> %r
  87 }
  88 ; CHECK-LABEL: {{^}}test_vec_2@@104:
  89 ; x64:           movq    {{[0-9]*}}(%rsp), %rax
  90 ; CHECK:         movaps (%{{rax|ecx}}), %xmm0
  91
  92 %struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
  93 %struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
  94 %struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
  95 %struct.HVA2 = type { <4 x float>, <4 x float> }
  96
  97 define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
  98 entry:
  99   %b = alloca %struct.HVA4, align 16
 100   store %struct.HVA4 %bb, %struct.HVA4* %b, align 16
 101   %w1 = getelementptr inbounds %struct.HVA4, %struct.HVA4* %b, i32 0, i32 1
 102   %0 = load <4 x float>, <4 x float>* %w1, align 16
 103   ret <4 x float> %0
 104 }
 105 ; CHECK-LABEL: test_mixed_1
 106 ; CHECK:       movaps   %xmm1, 16(%{{(e|r)}}sp)
 107 ; CHECK:       movaps   %xmm1, %xmm0
 108 ; CHECK:       ret{{q|l}}
 109
 110 define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) {
 111 entry:
 112   %c.addr = alloca <4 x float>, align 16
 113   store <4 x float> %c, <4 x float>* %c.addr, align 16
 114   %0 = load <4 x float>, <4 x float>* %c.addr, align 16
 115   ret <4 x float> %0
 116 }
 117 ; CHECK-LABEL: test_mixed_2
 118 ; X86:         movaps  %xmm0, (%esp)
 119 ; X64:         movaps  %xmm2, %xmm0
 120 ; CHECK:       ret{{[ql]}}
 121
 122 define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) {
 123 entry:
 124   %x = getelementptr inbounds %struct.HVA2, %struct.HVA2* %f, i32 0, i32 0
 125   %0 = load <4 x float>, <4 x float>* %x, align 16
 126   ret <4 x float> %0
 127 }
 128 ; CHECK-LABEL: test_mixed_3
 129 ; CHECK:       movaps   (%{{[re][ac]}}x), %xmm0
 130 ; CHECK:       ret{{[ql]}}
 131
 132 define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, %struct.HVA2* %bb, <4 x float> %c) {
 133 entry:
 134   %y4 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %bb, i32 0, i32 1
 135   %0 = load <4 x float>, <4 x float>* %y4, align 16
 136   ret <4 x float> %0
 137 }
 138 ; CHECK-LABEL: test_mixed_4
 139 ; X86:         movaps   16(%eax), %xmm0
 140 ; X64:         movaps   16(%rdx), %xmm0
 141 ; CHECK:       ret{{[ql]}}
 142
 143 define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
 144 entry:
 145   %d = alloca %struct.HVA2, align 16
 146   store %struct.HVA2 %dd, %struct.HVA2* %d, align 16
 147   %y5 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %d, i32 0, i32 1
 148   %0 = load <4 x float>, <4 x float>* %y5, align 16
 149   ret <4 x float> %0
 150 }
 151 ; CHECK-LABEL: test_mixed_5
 152 ; CHECK-DAG:   movaps   %xmm{{[0,5]}}, 16(%{{(e|r)}}sp)
 153 ; CHECK-DAG:   movaps   %xmm5, %xmm0
 154 ; CHECK:       ret{{[ql]}}
 155
 156 define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) {
 157 entry:
 158   %retval = alloca %struct.HVA4, align 16
 159   %0 = bitcast %struct.HVA4* %retval to i8*
 160   %1 = bitcast %struct.HVA4* %b to i8*
 161   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %0, i8* align 16 %1, i32 64, i1 false)
 162   %2 = load %struct.HVA4, %struct.HVA4* %retval, align 16
 163   ret %struct.HVA4 %2
 164 }
 165 ; CHECK-LABEL: test_mixed_6
 166 ; CHECK:       movaps   (%{{[re]}}sp), %xmm0
 167 ; CHECK:       movaps   16(%{{[re]}}sp), %xmm1
 168 ; CHECK:       movaps   32(%{{[re]}}sp), %xmm2
 169 ; CHECK:       movaps   48(%{{[re]}}sp), %xmm3
 170 ; CHECK:       ret{{[ql]}}
 171
 172 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
 173 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
 174 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)
 175
 176 define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret %agg.result) {
 177 entry:
 178   %a = alloca %struct.HVA5, align 16
 179   %0 = bitcast %struct.HVA5* %a to i8*
 180   call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 80, i1 false)
 181   %1 = bitcast %struct.HVA5* %agg.result to i8*
 182   %2 = bitcast %struct.HVA5* %a to i8*
 183   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 80, i1 false)
 184   ret void
 185 }
 186 ; CHECK-LABEL: test_mixed_7
 187 ; X64:         mov{{[ql]}}      %rcx, %rax
 188 ; CHECK:       movaps   %xmm{{[0-9]}}, 64(%{{rcx|eax}})
 189 ; CHECK:       movaps   %xmm{{[0-9]}}, 48(%{{rcx|eax}})
 190 ; CHECK:       movaps   %xmm{{[0-9]}}, 32(%{{rcx|eax}})
 191 ; CHECK:       movaps   %xmm{{[0-9]}}, 16(%{{rcx|eax}})
 192 ; CHECK:       movaps   %xmm{{[0-9]}}, (%{{rcx|eax}})
 193 ; CHECK:       ret{{[ql]}}
 194
 195 define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
 196 entry:
 197   %f.addr = alloca <4 x float>, align 16
 198   store <4 x float> %f, <4 x float>* %f.addr, align 16
 199   %0 = load <4 x float>, <4 x float>* %f.addr, align 16
 200   ret <4 x float> %0
 201 }
 202 ; CHECK-LABEL: test_mixed_8
 203 ; X86:         movaps   %xmm4, %xmm0
 204 ; X64:         movaps   %xmm5, %xmm0
 205 ; CHECK:       ret{{[ql]}}
 206
 207 %struct.HFA4 = type { double, double, double, double }
 208 declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
 209
 210 define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
 211 entry:
 212   %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
 213   %add = fadd double 1.000000e+00, %call
 214   ret double %add
 215 }
 216 ; CHECK-LABEL: test_mixed_9_caller
 217 ; CHECK:       movaps  %xmm3, %xmm4
 218 ; CHECK:       movaps  %xmm2, %xmm3
 219 ; CHECK:       movaps  %xmm1, %xmm2
 220 ; X32:         movasd  %xmm0, %xmm1
 221 ; X64:         movap{{d|s}}  %xmm5, %xmm1
 222 ; CHECK:       call{{l|q}}   test_mixed_9_callee@@40
 223 ; CHECK:       addsd   {{.*}}, %xmm0
 224 ; CHECK:       ret{{l|q}}