arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/dwarf2.h>
  11
  12 #define FIX_ALIGNMENT 1
  13
  14 #include <asm/current.h>
  15 #include <asm/asm-offsets.h>
  16 #include <asm/thread_info.h>
  17 #include <asm/cpufeature.h>
  18
  19         .macro ALTERNATIVE_JUMP feature,orig,alt
  20 0:
  21         .byte 0xe9      /* 32bit jump */
  22         .long \orig-1f  /* by default jump to orig */
  23 1:
  24         .section .altinstr_replacement,"ax"
  25 2:      .byte 0xe9                      /* near jump with 32bit immediate */
  26         .long \alt-1b /* offset */   /* or alternatively to alt */
  27         .previous
  28         .section .altinstructions,"a"
  29         .align 8
  30         .quad  0b
  31         .quad  2b
  32         .word  \feature                 /* when feature is set */
  33         .byte  5
  34         .byte  5
  35         .previous
  36         .endm
  37
  38         .macro ALIGN_DESTINATION
  39 #ifdef FIX_ALIGNMENT
  40         /* check for bad alignment of destination */
  41         movl %edi,%ecx
  42         andl $7,%ecx
  43         jz 102f                         /* already aligned */
  44         subl $8,%ecx
  45         negl %ecx
  46         subl %ecx,%edx
  47 100:    movb (%rsi),%al
  48 101:    movb %al,(%rdi)
  49         incq %rsi
  50         incq %rdi
  51         decl %ecx
  52         jnz 100b
  53 102:
  54         .section .fixup,"ax"
  55 103:    addl %ecx,%edx                  /* ecx is zerorest also */
  56         jmp copy_user_handle_tail
  57         .previous
  58
  59         .section __ex_table,"a"
  60         .align 8
  61         .quad 100b,103b
  62         .quad 101b,103b
  63         .previous
  64 #endif
  65         .endm
  66
  67 /* Standard copy_to_user with segment limit checking */
  68 ENTRY(_copy_to_user)
  69         CFI_STARTPROC
  70         GET_THREAD_INFO(%rax)
  71         movq %rdi,%rcx
  72         addq %rdx,%rcx
  73         jc bad_to_user
  74         cmpq TI_addr_limit(%rax),%rcx
  75         jae bad_to_user
  76         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  77         CFI_ENDPROC
  78 ENDPROC(_copy_to_user)
  79
  80 /* Standard copy_from_user with segment limit checking */
  81 ENTRY(_copy_from_user)
  82         CFI_STARTPROC
  83         GET_THREAD_INFO(%rax)
  84         movq %rsi,%rcx
  85         addq %rdx,%rcx
  86         jc bad_from_user
  87         cmpq TI_addr_limit(%rax),%rcx
  88         jae bad_from_user
  89         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  90         CFI_ENDPROC
  91 ENDPROC(_copy_from_user)
  92
  93         .section .fixup,"ax"
  94         /* must zero dest */
  95 ENTRY(bad_from_user)
  96 bad_from_user:
  97         CFI_STARTPROC
  98         movl %edx,%ecx
  99         xorl %eax,%eax
 100         rep
 101         stosb
 102 bad_to_user:
 103         movl %edx,%eax
 104         ret
 105         CFI_ENDPROC
 106 ENDPROC(bad_from_user)
 107         .previous
 108
 109 /*
 110  * copy_user_generic_unrolled - memory copy with exception handling.
 111  * This version is for CPUs like P4 that don't have efficient micro
 112  * code for rep movsq
 113  *
 114  * Input:
 115  * rdi destination
 116  * rsi source
 117  * rdx count
 118  *
 119  * Output:
 120  * eax uncopied bytes or 0 if successful.
 121  */
 122 ENTRY(copy_user_generic_unrolled)
 123         CFI_STARTPROC
 124         cmpl $8,%edx
 125         jb 20f          /* less then 8 bytes, go to byte copy loop */
 126         ALIGN_DESTINATION
 127         movl %edx,%ecx
 128         andl $63,%edx
 129         shrl $6,%ecx
 130         jz 17f
 131 1:      movq (%rsi),%r8
 132 2:      movq 1*8(%rsi),%r9
 133 3:      movq 2*8(%rsi),%r10
 134 4:      movq 3*8(%rsi),%r11
 135 5:      movq %r8,(%rdi)
 136 6:      movq %r9,1*8(%rdi)
 137 7:      movq %r10,2*8(%rdi)
 138 8:      movq %r11,3*8(%rdi)
 139 9:      movq 4*8(%rsi),%r8
 140 10:     movq 5*8(%rsi),%r9
 141 11:     movq 6*8(%rsi),%r10
 142 12:     movq 7*8(%rsi),%r11
 143 13:     movq %r8,4*8(%rdi)
 144 14:     movq %r9,5*8(%rdi)
 145 15:     movq %r10,6*8(%rdi)
 146 16:     movq %r11,7*8(%rdi)
 147         leaq 64(%rsi),%rsi
 148         leaq 64(%rdi),%rdi
 149         decl %ecx
 150         jnz 1b
 151 17:     movl %edx,%ecx
 152         andl $7,%edx
 153         shrl $3,%ecx
 154         jz 20f
 155 18:     movq (%rsi),%r8
 156 19:     movq %r8,(%rdi)
 157         leaq 8(%rsi),%rsi
 158         leaq 8(%rdi),%rdi
 159         decl %ecx
 160         jnz 18b
 161 20:     andl %edx,%edx
 162         jz 23f
 163         movl %edx,%ecx
 164 21:     movb (%rsi),%al
 165 22:     movb %al,(%rdi)
 166         incq %rsi
 167         incq %rdi
 168         decl %ecx
 169         jnz 21b
 170 23:     xor %eax,%eax
 171         ret
 172
 173         .section .fixup,"ax"
 174 30:     shll $6,%ecx
 175         addl %ecx,%edx
 176         jmp 60f
 177 40:     lea (%rdx,%rcx,8),%rdx
 178         jmp 60f
 179 50:     movl %ecx,%edx
 180 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
 181         .previous
 182
 183         .section __ex_table,"a"
 184         .align 8
 185         .quad 1b,30b
 186         .quad 2b,30b
 187         .quad 3b,30b
 188         .quad 4b,30b
 189         .quad 5b,30b
 190         .quad 6b,30b
 191         .quad 7b,30b
 192         .quad 8b,30b
 193         .quad 9b,30b
 194         .quad 10b,30b
 195         .quad 11b,30b
 196         .quad 12b,30b
 197         .quad 13b,30b
 198         .quad 14b,30b
 199         .quad 15b,30b
 200         .quad 16b,30b
 201         .quad 18b,40b
 202         .quad 19b,40b
 203         .quad 21b,50b
 204         .quad 22b,50b
 205         .previous
 206         CFI_ENDPROC
 207 ENDPROC(copy_user_generic_unrolled)
 208
 209 /* Some CPUs run faster using the string copy instructions.
 210  * This is also a lot simpler. Use them when possible.
 211  *
 212  * Only 4GB of copy is supported. This shouldn't be a problem
 213  * because the kernel normally only writes from/to page sized chunks
 214  * even if user space passed a longer buffer.
 215  * And more would be dangerous because both Intel and AMD have
 216  * errata with rep movsq > 4GB. If someone feels the need to fix
 217  * this please consider this.
 218  *
 219  * Input:
 220  * rdi destination
 221  * rsi source
 222  * rdx count
 223  *
 224  * Output:
 225  * eax uncopied bytes or 0 if successful.
 226  */
 227 ENTRY(copy_user_generic_string)
 228         CFI_STARTPROC
 229         andl %edx,%edx
 230         jz 4f
 231         cmpl $8,%edx
 232         jb 2f           /* less than 8 bytes, go to byte copy loop */
 233         ALIGN_DESTINATION
 234         movl %edx,%ecx
 235         shrl $3,%ecx
 236         andl $7,%edx
 237 1:      rep
 238         movsq
 239 2:      movl %edx,%ecx
 240 3:      rep
 241         movsb
 242 4:      xorl %eax,%eax
 243         ret
 244
 245         .section .fixup,"ax"
 246 11:     lea (%rdx,%rcx,8),%rcx
 247 12:     movl %ecx,%edx          /* ecx is zerorest also */
 248         jmp copy_user_handle_tail
 249         .previous
 250
 251         .section __ex_table,"a"
 252         .align 8
 253         .quad 1b,11b
 254         .quad 3b,12b
 255         .previous
 256         CFI_ENDPROC
 257 ENDPROC(copy_user_generic_string)