5 /* nettle, low-level cryptographics library
7 * Copyright (C) 1991, 1993, 1995 Free Software Foundation, Inc.
8 * Copyright (C) 2010 Niels Möller
10 * The nettle library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or (at your
13 * option) any later version.
15 * The nettle library is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 * License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with the nettle library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 /* Implementation inspired by memcmp in glibc, contributed to the FSF
38 typedef unsigned long int word_t
;
40 #if SIZEOF_LONG & (SIZEOF_LONG - 1)
41 #error Word size must be a power of two
44 #define ALIGN_OFFSET(p) ((uintptr_t) (p) % sizeof(word_t))
46 #ifndef WORDS_BIGENDIAN
47 #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
49 #define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
52 #define WORD_T_THRESH 16
54 /* XOR word-aligned areas. n is the number of words, not bytes. */
56 memxor_common_alignment (word_t
*dst
, const word_t
*src
, size_t n
)
58 /* FIXME: Require n > 0? */
59 /* FIXME: Unroll four times, like memcmp? Probably not worth the
67 for (; n
>= 2; dst
+= 2, src
+= 2, n
-= 2)
74 /* XOR *un-aligned* src-area onto aligned dst area. n is number of
75 words, not bytes. Assumes we can read complete words at the start
76 and end of the src operand. */
78 memxor_different_alignment (word_t
*dst
, const uint8_t *src
, size_t n
)
82 const word_t
*src_word
;
83 unsigned offset
= ALIGN_OFFSET (src
);
86 shl
= CHAR_BIT
* offset
;
87 shr
= CHAR_BIT
* (sizeof(word_t
) - offset
);
89 src_word
= (const word_t
*) ((uintptr_t) src
& -SIZEOF_LONG
);
91 /* FIXME: Unroll four times, like memcmp? */
97 dst
[0] ^= MERGE (s1
, shl
, s0
, shr
);
100 for (; i
< n
; i
+= 2)
103 dst
[i
] ^= MERGE(s0
, shl
, s1
, shr
);
105 dst
[i
+1] ^= MERGE(s1
, shl
, s0
, shr
);
109 /* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
110 cycles/byte unaligned. */
112 /* XOR LEN bytes starting at SRCADDR onto DESTADDR. Result undefined
113 if the source overlaps with the destination. Return DESTADDR. */
115 memxor(uint8_t *dst
, const uint8_t *src
, size_t n
)
117 uint8_t *orig_dst
= dst
;
119 if (n
>= WORD_T_THRESH
)
121 /* There are at least some bytes to compare. No need to test
122 for N == 0 in this alignment loop. */
123 while (ALIGN_OFFSET (dst
))
128 if (ALIGN_OFFSET (src
))
129 memxor_different_alignment ((word_t
*) dst
, src
, n
/ sizeof(word_t
));
131 memxor_common_alignment ((word_t
*) dst
, (const word_t
*) src
, n
/ sizeof(word_t
));
133 dst
+= n
& -SIZEOF_LONG
;
134 src
+= n
& -SIZEOF_LONG
;
135 n
= n
& (SIZEOF_LONG
- 1);
144 /* XOR word-aligned areas. n is the number of words, not bytes. */
146 memxor3_common_alignment (word_t
*dst
,
147 const word_t
*a
, const word_t
*b
, size_t n
)
149 /* FIXME: Require n > 0? */
151 dst
[n
] = a
[n
] ^ b
[n
];
155 memxor3_different_alignment_b (word_t
*dst
,
156 const word_t
*a
, const uint8_t *b
, unsigned offset
, size_t n
)
159 const word_t
*b_word
;
163 shl
= CHAR_BIT
* offset
;
164 shr
= CHAR_BIT
* (sizeof(word_t
) - offset
);
166 b_word
= (const word_t
*) ((uintptr_t) b
& -SIZEOF_LONG
);
173 dst
[n
] = a
[n
] ^ MERGE (s1
, shl
, s0
, shr
);
182 dst
[n
+1] = a
[n
+1] ^ MERGE(s0
, shl
, s1
, shr
);
184 dst
[n
] = a
[n
] ^ MERGE(s1
, shl
, s0
, shr
);
189 memxor3_different_alignment_ab (word_t
*dst
,
190 const uint8_t *a
, const uint8_t *b
,
191 unsigned offset
, size_t n
)
194 const word_t
*a_word
;
195 const word_t
*b_word
;
199 shl
= CHAR_BIT
* offset
;
200 shr
= CHAR_BIT
* (sizeof(word_t
) - offset
);
202 a_word
= (const word_t
*) ((uintptr_t) a
& -SIZEOF_LONG
);
203 b_word
= (const word_t
*) ((uintptr_t) b
& -SIZEOF_LONG
);
208 s1
= a_word
[n
] ^ b_word
[n
];
209 s0
= a_word
[n
+1] ^ b_word
[n
+1];
210 dst
[n
] = MERGE (s1
, shl
, s0
, shr
);
213 s1
= a_word
[n
] ^ b_word
[n
];
218 s0
= a_word
[n
+1] ^ b_word
[n
+1];
219 dst
[n
+1] = MERGE(s0
, shl
, s1
, shr
);
220 s1
= a_word
[n
] ^ b_word
[n
];
221 dst
[n
] = MERGE(s1
, shl
, s0
, shr
);
226 memxor3_different_alignment_all (word_t
*dst
,
227 const uint8_t *a
, const uint8_t *b
,
228 unsigned a_offset
, unsigned b_offset
,
232 const word_t
*a_word
;
233 const word_t
*b_word
;
235 word_t a0
, a1
, b0
, b1
;
237 al
= CHAR_BIT
* a_offset
;
238 ar
= CHAR_BIT
* (sizeof(word_t
) - a_offset
);
239 bl
= CHAR_BIT
* b_offset
;
240 br
= CHAR_BIT
* (sizeof(word_t
) - b_offset
);
242 a_word
= (const word_t
*) ((uintptr_t) a
& -SIZEOF_LONG
);
243 b_word
= (const word_t
*) ((uintptr_t) b
& -SIZEOF_LONG
);
248 a1
= a_word
[n
]; a0
= a_word
[n
+1];
249 b1
= b_word
[n
]; b0
= b_word
[n
+1];
251 dst
[n
] = MERGE (a1
, al
, a0
, ar
) ^ MERGE (b1
, bl
, b0
, br
);
262 a0
= a_word
[n
+1]; b0
= b_word
[n
+1];
263 dst
[n
+1] = MERGE(a0
, al
, a1
, ar
) ^ MERGE(b0
, bl
, b1
, br
);
264 a1
= a_word
[n
]; b1
= b_word
[n
];
265 dst
[n
] = MERGE(a1
, al
, a0
, ar
) ^ MERGE(b1
, bl
, b0
, br
);
269 /* Current implementation processes data in descending order, to
270 support overlapping operation with one of the sources overlapping
271 the start of the destination area. This feature is used only
272 internally by cbc decrypt, and it is not advertised or documented
275 memxor3(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, size_t n
)
277 if (n
>= WORD_T_THRESH
)
284 for (i
= ALIGN_OFFSET(dst
+ n
); i
> 0; i
--)
287 dst
[n
] = a
[n
] ^ b
[n
];
290 a_offset
= ALIGN_OFFSET(a
+ n
);
291 b_offset
= ALIGN_OFFSET(b
+ n
);
293 nwords
= n
/ sizeof (word_t
);
294 n
%= sizeof (word_t
);
296 if (a_offset
== b_offset
)
299 memxor3_common_alignment((word_t
*) (dst
+ n
),
300 (const word_t
*) (a
+ n
),
301 (const word_t
*) (b
+ n
), nwords
);
303 memxor3_different_alignment_ab((word_t
*) (dst
+ n
),
304 a
+ n
, b
+ n
, a_offset
,
308 memxor3_different_alignment_b((word_t
*) (dst
+ n
),
309 (const word_t
*) (a
+ n
), b
+ n
,
312 memxor3_different_alignment_b((word_t
*) (dst
+ n
),
313 (const word_t
*) (b
+ n
), a
+ n
,
316 memxor3_different_alignment_all((word_t
*) (dst
+ n
), a
+ n
, b
+ n
,
317 a_offset
, b_offset
, nwords
);
321 dst
[n
] = a
[n
] ^ b
[n
];