llvm/test/CodeGen/X86/vectorcall.ll

   1 ; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
   2 ; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
   3
   4 ; Test integer arguments.
   5
   6 define x86_vectorcallcc i32 @test_int_1() {
   7 ; CHECK-LABEL: {{^}}test_int_1@@0:
   8 ; CHECK: xorl %eax, %eax
   9   ret i32 0
  10 }
  11
  12 define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
  13 ; X86-LABEL: {{^}}test_int_2@@4:
  14 ; X64-LABEL: {{^}}test_int_2@@8:
  15 ; CHECK: movl %ecx, %eax
  16   ret i32 %a
  17 }
  18
  19 define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
  20 ; X86-LABEL: {{^}}test_int_3@@8:
  21 ; X64-LABEL: {{^}}test_int_3@@8:
  22 ; X86: movl %ecx, %eax
  23 ; X64: movq %rcx, %rax
  24   %at = trunc i64 %a to i32
  25   ret i32 %at
  26 }
  27
  28 define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
  29 ; X86-LABEL: {{^}}test_int_4@@8:
  30 ; X86: leal (%ecx,%edx), %eax
  31 ; X64-LABEL: {{^}}test_int_4@@16:
  32 ; X64: leal (%rcx,%rdx), %eax
  33   %s = add i32 %a, %b
  34   ret i32 %s
  35 }
  36
  37 define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
  38 ; CHECK-LABEL: {{^}}test_int_5:
  39   ret i32 0
  40 }
  41
  42 define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
  43 ; CHECK-LABEL: {{^}}test_fp_1@@16:
  44 ; CHECK: movaps %xmm1, %xmm0
  45   ret double %b
  46 }
  47
  48 define x86_vectorcallcc double @test_fp_2(double, double, double, double, double, double, double %r) {
  49 ; CHECK-LABEL: {{^}}test_fp_2@@56:
  50 ; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
  51   ret double %r
  52 }
  53
  54 define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
  55 ; CHECK-LABEL: {{^}}test_fp_3@@0:
  56 ; CHECK: xorps %xmm0
  57 ; CHECK: xorps %xmm1
  58 ; CHECK: xorps %xmm2
  59 ; CHECK: xorps %xmm3
  60   ret {double, double, double, double}
  61         { double 0.0, double 0.0, double 0.0, double 0.0 }
  62 }
  63
  64 ; FIXME: Returning via x87 isn't compatible, but its hard to structure the
  65 ; tablegen any other way.
  66 define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
  67 ; CHECK-LABEL: {{^}}test_fp_4@@0:
  68 ; CHECK: fldz
  69 ; CHECK: xorps %xmm0
  70 ; CHECK: xorps %xmm1
  71 ; CHECK: xorps %xmm2
  72 ; CHECK: xorps %xmm3
  73   ret {double, double, double, double, double}
  74         { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
  75 }
  76
  77 define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
  78 ; CHECK-LABEL: {{^}}test_vec_1@@32:
  79 ; CHECK: movaps %xmm1, %xmm0
  80   ret <16 x i8> %b
  81 }
  82
  83 define x86_vectorcallcc <16 x i8> @test_vec_2(double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
  84 ; CHECK-LABEL: {{^}}test_vec_2@@104:
  85 ; X64:           movq    {{[0-9]*}}(%rsp), %rax
  86 ; CHECK:         movaps (%{{rax|ecx}}), %xmm0
  87   ret <16 x i8> %r
  88 }
  89
  90 %struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
  91 %struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
  92 %struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
  93 %struct.HVA2 = type { <4 x float>, <4 x float> }
  94
  95 define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
  96 ; CHECK-LABEL: test_mixed_1
  97 ; CHECK:       movaps   %xmm1, 16(%{{(e|r)}}sp)
  98 ; CHECK:       movaps   %xmm1, %xmm0
  99 ; CHECK:       ret{{q|l}}
 100 entry:
 101   %b = alloca %struct.HVA4, align 16
 102   store %struct.HVA4 %bb, ptr %b, align 16
 103   %w1 = getelementptr inbounds %struct.HVA4, ptr %b, i32 0, i32 1
 104   %0 = load <4 x float>, ptr %w1, align 16
 105   ret <4 x float> %0
 106 }
 107
 108 define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, ptr %b, <4 x float> %c) {
 109 ; CHECK-LABEL: test_mixed_2
 110 ; X86:         movaps  %xmm0, (%esp)
 111 ; X64:         movaps  %xmm2, %xmm0
 112 ; CHECK:       ret{{[ql]}}
 113 entry:
 114   %c.addr = alloca <4 x float>, align 16
 115   store <4 x float> %c, ptr %c.addr, align 16
 116   %0 = load <4 x float>, ptr %c.addr, align 16
 117   ret <4 x float> %0
 118 }
 119
 120 define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, ptr %f) {
 121 ; CHECK-LABEL: test_mixed_3
 122 ; CHECK:       movaps   (%{{[re][ac]}}x), %xmm0
 123 ; CHECK:       ret{{[ql]}}
 124 entry:
 125   %0 = load <4 x float>, ptr %f, align 16
 126   ret <4 x float> %0
 127 }
 128
 129 define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, ptr %bb, <4 x float> %c) {
 130 ; CHECK-LABEL: test_mixed_4
 131 ; X86:         movaps   16(%eax), %xmm0
 132 ; X64:         movaps   16(%rdx), %xmm0
 133 ; CHECK:       ret{{[ql]}}
 134 entry:
 135   %y4 = getelementptr inbounds %struct.HVA2, ptr %bb, i32 0, i32 1
 136   %0 = load <4 x float>, ptr %y4, align 16
 137   ret <4 x float> %0
 138 }
 139
 140 define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, ptr %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
 141 ; CHECK-LABEL: test_mixed_5
 142 ; CHECK-DAG:   movaps   %xmm{{[0,5]}}, 16(%{{(e|r)}}sp)
 143 ; CHECK-DAG:   movaps   %xmm5, %xmm0
 144 ; CHECK:       ret{{[ql]}}
 145 entry:
 146   %d = alloca %struct.HVA2, align 16
 147   store %struct.HVA2 %dd, ptr %d, align 16
 148   %y5 = getelementptr inbounds %struct.HVA2, ptr %d, i32 0, i32 1
 149   %0 = load <4 x float>, ptr %y5, align 16
 150   ret <4 x float> %0
 151 }
 152
 153 define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, ptr %b) {
 154 ; CHECK-LABEL: test_mixed_6
 155 ; CHECK:       movaps   (%{{[re]}}sp), %xmm0
 156 ; CHECK:       movaps   16(%{{[re]}}sp), %xmm1
 157 ; CHECK:       movaps   32(%{{[re]}}sp), %xmm2
 158 ; CHECK:       movaps   48(%{{[re]}}sp), %xmm3
 159 ; CHECK:       ret{{[ql]}}
 160 entry:
 161   %retval = alloca %struct.HVA4, align 16
 162   call void @llvm.memcpy.p0.p0.i32(ptr align 16 %retval, ptr align 16 %b, i32 64, i1 false)
 163   %0 = load %struct.HVA4, ptr %retval, align 16
 164   ret %struct.HVA4 %0
 165 }
 166
 167 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
 168 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1)
 169 declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1)
 170
 171 define x86_vectorcallcc void @test_mixed_7(ptr noalias sret(%struct.HVA5) %agg.result) {
 172 ; CHECK-LABEL: test_mixed_7@@0
 173 ; X64:         mov{{[ql]}}      %rcx, %rax
 174 ; CHECK:       movaps   %xmm{{[0-9]}}, 64(%{{rcx|eax}})
 175 ; CHECK:       movaps   %xmm{{[0-9]}}, 48(%{{rcx|eax}})
 176 ; CHECK:       movaps   %xmm{{[0-9]}}, 32(%{{rcx|eax}})
 177 ; CHECK:       movaps   %xmm{{[0-9]}}, 16(%{{rcx|eax}})
 178 ; CHECK:       movaps   %xmm{{[0-9]}}, (%{{rcx|eax}})
 179 ; CHECK:       ret{{[ql]}}
 180 entry:
 181   %a = alloca %struct.HVA5, align 16
 182   call void @llvm.memset.p0.i64(ptr align 16 %a, i8 0, i64 80, i1 false)
 183   call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.result, ptr align 16 %a, i64 80, i1 false)
 184   ret void
 185 }
 186
 187 define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
 188 ; CHECK-LABEL: test_mixed_8
 189 ; X86:         movaps   %xmm4, %xmm0
 190 ; X64:         movaps   %xmm5, %xmm0
 191 ; CHECK:       ret{{[ql]}}
 192 entry:
 193   %f.addr = alloca <4 x float>, align 16
 194   store <4 x float> %f, ptr %f.addr, align 16
 195   %0 = load <4 x float>, ptr %f.addr, align 16
 196   ret <4 x float> %0
 197 }
 198
 199 %struct.HFA4 = type { double, double, double, double }
 200 declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
 201
 202 define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
 203 ; CHECK-LABEL: test_mixed_9_caller
 204 ; CHECK:       movaps  %xmm3, %xmm4
 205 ; CHECK:       movaps  %xmm2, %xmm3
 206 ; CHECK:       movaps  %xmm1, %xmm2
 207 ; X32:         movasd  %xmm0, %xmm1
 208 ; X64:         movap{{d|s}}  %xmm5, %xmm1
 209 ; CHECK:       call{{l|q}}   test_mixed_9_callee@@40
 210 ; CHECK:       addsd   {{.*}}, %xmm0
 211 ; CHECK:       ret{{l|q}}
 212 entry:
 213   %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
 214   %add = fadd double 1.000000e+00, %call
 215   ret double %add
 216 }