1 C nettle
, low-level cryptographics library
3 C Copyright
(C
) 2010, Niels Möller
5 C The nettle library is free software
; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General
Public License as published by
7 C the Free Software Foundation
; either version 2.1 of the License, or (at your
8 C option
) any later version.
10 C The nettle library is distributed
in the hope that it will be useful
, but
11 C WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
12 C
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General
Public License
16 C along with the nettle library
; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation
, Inc.
, 51 Franklin Street
, Fifth Floor
, Boston
,
21 define
(<DST
>, <%rax
>) C Originally
in %rdi
29 define
(<S1
>, <%rdi
>) C Overlaps with CNT
31 define
(<USE_SSE2
>, <no
>)
37 C memxor
(uint8_t
*dst
, const uint8_t
*src
, size_t n
)
48 C memxor3
(uint8_t
*dst
, const uint8_t
*a
, const uint8_t
*b
, size_t n
)
54 C
%cl needed for shift count
, so move away N
58 C Get number of unaligned bytes at the
end
59 C
%rdi is used as CNT
, %rax as DST
and as return value
70 C
FIXME: Instead of
this loop, could try cmov with memory
71 C destination
, as a sequence of one
8-bit
, one
16-bit
and one
72 C
32-bit operations.
(Except that cmov can
't do 8-bit ops, so
73 C that step has to use a conditional).
77 movb (AP, N), LREG(TMP)
78 xorb (BP, N), LREG(TMP)
79 movb LREG(TMP), (DST, N)
84 ifelse(USE_SSE2, yes, <
88 C Check for the case that AP and BP have the same alignment,
89 C but different from DST.
102 C Unrolling, with aligned values alternating in S0 and S1
150 C FIXME: Handle the case N == 16 specially,
151 C like in the non-shifted case?
165 C Next destination word is -8(DST, N)
166 C Setup for unrolling
191 ja .Lword_loop C Not zero and no carry
194 C Final operation is word aligned
204 C ENTRY might have been 3 args, too, but it doesn't matter for the exit
212 movb
(AP
, N
), LREG
(TMP
)
213 xorb
(BP, N
), LREG
(TMP
)
214 movb LREG
(TMP
), (DST
, N
)
220 C
ENTRY might have been
3 args
, too
, but it doesn
't matter for the exit
224 ifelse(USE_SSE2, yes, <
238 movdqu (AP, N), %xmm0
239 movdqu (BP, N), %xmm1
241 movdqa %xmm1, (DST, N)
246 C FIXME: See if we can do a full word first, before the
247 C byte-wise final loop.
250 C Final operation is aligned
255 C ENTRY might have been 3 args, too, but it doesn't matter for the exit