4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include "../arcfour.h"
28 /* Initialize the key stream 'key' using the key value */
30 arcfour_key_init(ARCFour_key
*key
, uchar_t
*keyval
, int keyvallen
)
32 uchar_t ext_keyval
[256];
36 for (i
= j
= 0; i
< 256; i
++, j
++) {
40 ext_keyval
[i
] = keyval
[j
];
42 for (i
= 0; i
< 256; i
++)
43 key
->arr
[i
] = (uchar_t
)i
;
46 for (i
= 0; i
< 256; i
++) {
47 j
= (j
+ key
->arr
[i
] + ext_keyval
[i
]) % 256;
49 key
->arr
[i
] = key
->arr
[j
];
58 * Encipher 'in' using 'key.
59 * in and out can point to the same location
62 arcfour_crypt(ARCFour_key
*key
, uchar_t
*in
, uchar_t
*out
, size_t len
)
65 unsigned long long in0
, merge
= 0, merge0
= 0, merge1
, mask
= 0;
66 uchar_t i
, j
, *base
, jj
, *base1
, tmp
;
67 unsigned int tmp0
, tmp1
, i_accum
, shift
= 0, i1
;
73 index
= (((uintptr_t)in
) & 0x7);
75 /* Get the 'in' on an 8-byte alignment */
80 for (index
= 8 - index
; (index
-- > 0) && len
> 0;
86 key
->arr
[i
] = key
->arr
[j
];
88 tmp
= key
->arr
[i
] + key
->arr
[j
];
89 *out
= *in
^ key
->arr
[tmp
];
98 /* See if we're fortunate and 'out' got aligned as well */
102 * Niagara optimized version for
103 * the cases where the input and output buffers are aligned on
104 * a multiple of 8-byte boundary.
107 if ((((uintptr_t)out
) & 7) != 0) {
111 for (ii
= 0; ii
< len
; ii
++) {
116 base
[i
] = (uchar_t
)tmp1
;
117 base
[j
] = (uchar_t
)tmp0
;
120 out
[ii
] = in
[ii
] ^ base
[tmp0
];
130 * Want to align base[i] on a 2B boundary -- allows updates
131 * via [i] to be performed in 2B chunks (reducing # of stores).
132 * Requires appropriate alias detection.
135 if (((i
+1) % 2) != 0) {
141 base
[i
] = (uchar_t
)tmp1
;
142 base
[j
] = (uchar_t
)tmp0
;
147 merge0
= (unsigned long long)(base
[tmp0
]) << 56;
148 shift
= 8; mask
= 0xff;
152 * Note - in and out may now be misaligned -
153 * as updating [out] in 8B chunks need to handle this
154 * possibility. Also could have a 1B overrun.
155 * Need to drop out of loop early as a result.
158 for (ii
= 0, i1
= i
; ii
< ((len
-1) & (~7));
159 ii
+= 8, i1
= i1
&0xff) {
162 * If i < less than 248, know wont wrap around
163 * (i % 256), so don't need to bother with masking i
164 * after each increment
172 * Creating this base pointer reduces subsequent
173 * arihmetic ops required to load [i]
175 * N.B. don't need to check if [j] aliases.
176 * [i] and [j] end up with the same values
186 * Don't store [i] yet
189 base
[j
] = (uchar_t
)tmp0
;
195 * Check [tmp0] doesn't alias with [i]
199 * Updating [out] in 8B chunks
203 (unsigned long long)(i_accum
) << 56;
206 (unsigned long long)(base
[tmp0
]) <<
216 * [j] can now alias with [i] and [i-1]
217 * If alias abort speculation
220 base1
[0] = (uchar_t
)i_accum
;
224 base1
[1] = (uchar_t
)tmp1
;
225 base
[j
] = (uchar_t
)tmp0
;
230 merge
|= (unsigned long long)
236 i_accum
= i_accum
<< 8;
239 base
[j
] = (uchar_t
)tmp0
;
245 * Speculation suceeded! Update [i]
248 /* LINTED E_BAD_PTR_CAST_ALIGN */
249 *((unsigned short *) &base
[i1
]) =
253 (unsigned long long)(base
[tmp0
]) <<
259 * Too expensive to perform [i] speculation for
260 * every byte. Just need to reduce frequency
261 * of stores until store buffer full stalls
262 * are not the bottleneck.
269 base1
[2] = (uchar_t
)tmp1
;
270 base
[j
] = (uchar_t
)tmp0
;
273 merge
|= (unsigned long long)(base
[tmp1
]) << 40;
279 base1
[3] = (uchar_t
)tmp1
;
280 base
[j
] = (uchar_t
)tmp0
;
283 merge
|= (unsigned long long)(base
[tmp0
]) << 32;
289 base1
[4] = (uchar_t
)tmp1
;
290 base
[j
] = (uchar_t
)tmp0
;
293 merge
|= (unsigned long long)(base
[tmp0
]) << 24;
299 base1
[5] = (uchar_t
)tmp1
;
300 base
[j
] = (uchar_t
)tmp0
;
303 merge
|= (unsigned long long)(base
[tmp0
]) << 16;
311 base
[j
] = (uchar_t
)tmp0
;
318 (unsigned long long)(i_accum
) << 8;
321 (unsigned long long)(base
[tmp0
]) <<
329 * Perform [i] speculation again. Indentical
330 * to that performed for BYTE0 and BYTE1.
334 base1
[6] = (uchar_t
)i_accum
;
337 base1
[7] = (uchar_t
)tmp1
;
338 base
[j
] = (uchar_t
)tmp0
;
344 (unsigned long long)(base
[tmp0
]);
349 i_accum
= i_accum
<< 8;
352 base
[j
] = (uchar_t
)tmp0
;
357 /* LINTED E_BAD_PTR_CAST_ALIGN */
358 *((unsigned short *) &base
[i1
]) =
362 (unsigned long long)(base
[tmp0
]);
367 * i is too close to wrap-around to allow
368 * masking to be disregarded
372 * Same old speculation for BYTE 0 and BYTE 1
376 i1
= (i1
+ 1) & 0xff;
384 base
[j
] = (uchar_t
)tmp0
;
391 (unsigned long long)(i_accum
) << 56;
394 (unsigned long long)(base
[tmp0
]) <<
404 base
[jj
] = (uchar_t
)i_accum
;
408 base
[i1
+1] = (uchar_t
)tmp1
;
409 base
[j
] = (uchar_t
)tmp0
;
415 (unsigned long long)(base
[tmp0
]) <<
421 i_accum
= i_accum
<< 8;
424 base
[j
] = (uchar_t
)tmp0
;
429 /* LINTED E_BAD_PTR_CAST_ALIGN */
430 *((unsigned short *) &base
[jj
]) =
434 (unsigned long long)(base
[tmp0
]) <<
440 * As know i must be even when enter loop (to
441 * satisfy alignment), can only wrap around
442 * on the even bytes. So just need to perform
443 * mask every 2nd byte
445 i1
= (i1
+ 2) & 0xff;
449 base
[i1
] = (uchar_t
)tmp1
;
450 base
[j
] = (uchar_t
)tmp0
;
453 merge
|= (unsigned long long)(base
[tmp0
]) << 40;
459 base
[i1
+1] = (uchar_t
)tmp1
;
460 base
[j
] = (uchar_t
)tmp0
;
463 merge
|= (unsigned long long)(base
[tmp0
]) << 32;
466 i1
= (i1
+ 2) & 0xff;
470 base
[i1
] = (uchar_t
)tmp1
;
471 base
[j
] = (uchar_t
)tmp0
;
474 merge
|= (unsigned long long)(base
[tmp0
]) << 24;
480 base
[i1
+1] = (uchar_t
)tmp1
;
481 base
[j
] = (uchar_t
)tmp0
;
484 merge
|= (unsigned long long)(base
[tmp0
]) << 16;
495 base
[j
] = (uchar_t
)tmp0
;
503 (unsigned long long)(i_accum
) << 8;
506 (unsigned long long)(base
[tmp0
]) <<
516 base
[jj
] = (uchar_t
)i_accum
;
519 base
[i1
] = (uchar_t
)tmp1
;
520 base
[j
] = (uchar_t
)tmp0
;
526 (unsigned long long)(base
[tmp0
]);
532 i_accum
= i_accum
<< 8;
535 base
[j
] = (uchar_t
)tmp0
;
540 /* LINTED E_BAD_PTR_CAST_ALIGN */
541 *((unsigned short *) &base
[jj
]) =
545 (unsigned long long)(base
[tmp0
]);
550 * Perform update to [out]
551 * Remember could be alignment issues
553 /* LINTED E_BAD_PTR_CAST_ALIGN */
554 in0
= *((unsigned long long *) (&in
[ii
]));
556 merge1
= merge0
| (merge
>> shift
);
558 merge0
= (merge
& mask
) << 56;
562 /* LINTED E_BAD_PTR_CAST_ALIGN */
563 *((unsigned long long *) (&out
[ii
])) = in0
;
572 out
[ii
] = in
[ii
] ^ (merge0
>> 56);
577 * Handle final few bytes
579 for (; ii
< len
; ii
++) {
585 base
[i
] = (uchar_t
)tmp1
;
586 base
[j
] = (uchar_t
)tmp0
;
590 out
[ii
] = in
[ii
] ^ base
[tmp0
];