arch/x86/lib/copy_page_64.S

   1 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
   2
   3 #include <linux/linkage.h>
   4 #include <asm/cpufeature.h>
   5 #include <asm/alternative-asm.h>
   6
   7 /*
   8  * Some CPUs run faster using the string copy instructions (sane microcode).
   9  * It is also a lot simpler. Use this when possible. But, don't use streaming
  10  * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
  11  * prefetch distance based on SMP/UP.
  12  */
  13         ALIGN
  14 ENTRY(copy_page)
  15         ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
  16         movl    $4096/8, %ecx
  17         rep     movsq
  18         ret
  19 ENDPROC(copy_page)
  20
  21 ENTRY(copy_page_regs)
  22         subq    $2*8,   %rsp
  23         movq    %rbx,   (%rsp)
  24         movq    %r12,   1*8(%rsp)
  25
  26         movl    $(4096/64)-5,   %ecx
  27         .p2align 4
  28 .Loop64:
  29         dec     %rcx
  30         movq    0x8*0(%rsi), %rax
  31         movq    0x8*1(%rsi), %rbx
  32         movq    0x8*2(%rsi), %rdx
  33         movq    0x8*3(%rsi), %r8
  34         movq    0x8*4(%rsi), %r9
  35         movq    0x8*5(%rsi), %r10
  36         movq    0x8*6(%rsi), %r11
  37         movq    0x8*7(%rsi), %r12
  38
  39         prefetcht0 5*64(%rsi)
  40
  41         movq    %rax, 0x8*0(%rdi)
  42         movq    %rbx, 0x8*1(%rdi)
  43         movq    %rdx, 0x8*2(%rdi)
  44         movq    %r8,  0x8*3(%rdi)
  45         movq    %r9,  0x8*4(%rdi)
  46         movq    %r10, 0x8*5(%rdi)
  47         movq    %r11, 0x8*6(%rdi)
  48         movq    %r12, 0x8*7(%rdi)
  49
  50         leaq    64 (%rsi), %rsi
  51         leaq    64 (%rdi), %rdi
  52
  53         jnz     .Loop64
  54
  55         movl    $5, %ecx
  56         .p2align 4
  57 .Loop2:
  58         decl    %ecx
  59
  60         movq    0x8*0(%rsi), %rax
  61         movq    0x8*1(%rsi), %rbx
  62         movq    0x8*2(%rsi), %rdx
  63         movq    0x8*3(%rsi), %r8
  64         movq    0x8*4(%rsi), %r9
  65         movq    0x8*5(%rsi), %r10
  66         movq    0x8*6(%rsi), %r11
  67         movq    0x8*7(%rsi), %r12
  68
  69         movq    %rax, 0x8*0(%rdi)
  70         movq    %rbx, 0x8*1(%rdi)
  71         movq    %rdx, 0x8*2(%rdi)
  72         movq    %r8,  0x8*3(%rdi)
  73         movq    %r9,  0x8*4(%rdi)
  74         movq    %r10, 0x8*5(%rdi)
  75         movq    %r11, 0x8*6(%rdi)
  76         movq    %r12, 0x8*7(%rdi)
  77
  78         leaq    64(%rdi), %rdi
  79         leaq    64(%rsi), %rsi
  80         jnz     .Loop2
  81
  82         movq    (%rsp), %rbx
  83         movq    1*8(%rsp), %r12
  84         addq    $2*8, %rsp
  85         ret
  86 ENDPROC(copy_page_regs)