8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / common / crypto / arcfour / sun4v / arcfour_crypt.c
blob70d92030b1afe6ae319674c480b4ed9665d2441e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include "../arcfour.h"
28 /* Initialize the key stream 'key' using the key value */
29 void
30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
32 uchar_t ext_keyval[256];
33 uchar_t tmp;
34 int i, j;
36 for (i = j = 0; i < 256; i++, j++) {
37 if (j == keyvallen)
38 j = 0;
40 ext_keyval[i] = keyval[j];
42 for (i = 0; i < 256; i++)
43 key->arr[i] = (uchar_t)i;
45 j = 0;
46 for (i = 0; i < 256; i++) {
47 j = (j + key->arr[i] + ext_keyval[i]) % 256;
48 tmp = key->arr[i];
49 key->arr[i] = key->arr[j];
50 key->arr[j] = tmp;
52 key->i = 0;
53 key->j = 0;
58 * Encipher 'in' using 'key.
59 * in and out can point to the same location
61 void
62 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
64 size_t ii;
65 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
66 uchar_t i, j, *base, jj, *base1, tmp;
67 unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
69 int index;
71 base = key->arr;
73 index = (((uintptr_t)in) & 0x7);
75 /* Get the 'in' on an 8-byte alignment */
76 if (index > 0) {
77 i = key->i;
78 j = key->j;
80 for (index = 8 - index; (index-- > 0) && len > 0;
81 len--, in++, out++) {
83 i = i + 1;
84 j = j + key->arr[i];
85 tmp = key->arr[i];
86 key->arr[i] = key->arr[j];
87 key->arr[j] = tmp;
88 tmp = key->arr[i] + key->arr[j];
89 *out = *in ^ key->arr[tmp];
91 key->i = i;
92 key->j = j;
95 if (len == 0)
96 return;
98 /* See if we're fortunate and 'out' got aligned as well */
102 * Niagara optimized version for
103 * the cases where the input and output buffers are aligned on
104 * a multiple of 8-byte boundary.
106 #ifdef sun4v
107 if ((((uintptr_t)out) & 7) != 0) {
108 #endif /* sun4v */
109 i = key->i;
110 j = key->j;
111 for (ii = 0; ii < len; ii++) {
112 i = i + 1;
113 tmp0 = base[i];
114 j = j + tmp0;
115 tmp1 = base[j];
116 base[i] = (uchar_t)tmp1;
117 base[j] = (uchar_t)tmp0;
118 tmp0 += tmp1;
119 tmp0 = tmp0 & 0xff;
120 out[ii] = in[ii] ^ base[tmp0];
122 key->i = i;
123 key->j = j;
124 #ifdef sun4v
125 } else {
126 i = key->i;
127 j = key->j;
130 * Want to align base[i] on a 2B boundary -- allows updates
131 * via [i] to be performed in 2B chunks (reducing # of stores).
132 * Requires appropriate alias detection.
135 if (((i+1) % 2) != 0) {
136 i = i + 1;
137 tmp0 = base[i];
138 j = j + tmp0;
139 tmp1 = base[j];
141 base[i] = (uchar_t)tmp1;
142 base[j] = (uchar_t)tmp0;
144 tmp0 += tmp1;
145 tmp0 = tmp0 & 0xff;
147 merge0 = (unsigned long long)(base[tmp0]) << 56;
148 shift = 8; mask = 0xff;
152 * Note - in and out may now be misaligned -
153 * as updating [out] in 8B chunks need to handle this
154 * possibility. Also could have a 1B overrun.
155 * Need to drop out of loop early as a result.
158 for (ii = 0, i1 = i; ii < ((len-1) & (~7));
159 ii += 8, i1 = i1&0xff) {
162 * If i < less than 248, know wont wrap around
163 * (i % 256), so don't need to bother with masking i
164 * after each increment
166 if (i1 < 248) {
168 /* BYTE 0 */
169 i1 = (i1 + 1);
172 * Creating this base pointer reduces subsequent
173 * arihmetic ops required to load [i]
175 * N.B. don't need to check if [j] aliases.
176 * [i] and [j] end up with the same values
177 * anyway.
179 base1 = &base[i1];
181 tmp0 = base1[0];
182 j = j + tmp0;
184 tmp1 = base[j];
186 * Don't store [i] yet
188 i_accum = tmp1;
189 base[j] = (uchar_t)tmp0;
191 tmp0 += tmp1;
192 tmp0 = tmp0 & 0xff;
195 * Check [tmp0] doesn't alias with [i]
199 * Updating [out] in 8B chunks
201 if (i1 == tmp0) {
202 merge =
203 (unsigned long long)(i_accum) << 56;
204 } else {
205 merge =
206 (unsigned long long)(base[tmp0]) <<
210 /* BYTE 1 */
211 tmp0 = base1[1];
213 j = j + tmp0;
216 * [j] can now alias with [i] and [i-1]
217 * If alias abort speculation
219 if ((i1 ^ j) < 2) {
220 base1[0] = (uchar_t)i_accum;
222 tmp1 = base[j];
224 base1[1] = (uchar_t)tmp1;
225 base[j] = (uchar_t)tmp0;
227 tmp0 += tmp1;
228 tmp0 = tmp0 & 0xff;
230 merge |= (unsigned long long)
231 (base[tmp0]) << 48;
232 } else {
234 tmp1 = base[j];
236 i_accum = i_accum << 8;
237 i_accum |= tmp1;
239 base[j] = (uchar_t)tmp0;
241 tmp0 += tmp1;
242 tmp0 = tmp0 & 0xff;
245 * Speculation suceeded! Update [i]
246 * in 2B chunk
248 /* LINTED E_BAD_PTR_CAST_ALIGN */
249 *((unsigned short *) &base[i1]) =
250 i_accum;
252 merge |=
253 (unsigned long long)(base[tmp0]) <<
259 * Too expensive to perform [i] speculation for
260 * every byte. Just need to reduce frequency
261 * of stores until store buffer full stalls
262 * are not the bottleneck.
265 /* BYTE 2 */
266 tmp0 = base1[2];
267 j = j + tmp0;
268 tmp1 = base[j];
269 base1[2] = (uchar_t)tmp1;
270 base[j] = (uchar_t)tmp0;
271 tmp1 += tmp0;
272 tmp1 = tmp1 & 0xff;
273 merge |= (unsigned long long)(base[tmp1]) << 40;
275 /* BYTE 3 */
276 tmp0 = base1[3];
277 j = j + tmp0;
278 tmp1 = base[j];
279 base1[3] = (uchar_t)tmp1;
280 base[j] = (uchar_t)tmp0;
281 tmp0 += tmp1;
282 tmp0 = tmp0 & 0xff;
283 merge |= (unsigned long long)(base[tmp0]) << 32;
285 /* BYTE 4 */
286 tmp0 = base1[4];
287 j = j + tmp0;
288 tmp1 = base[j];
289 base1[4] = (uchar_t)tmp1;
290 base[j] = (uchar_t)tmp0;
291 tmp0 += tmp1;
292 tmp0 = tmp0 & 0xff;
293 merge |= (unsigned long long)(base[tmp0]) << 24;
295 /* BYTE 5 */
296 tmp0 = base1[5];
297 j = j + tmp0;
298 tmp1 = base[j];
299 base1[5] = (uchar_t)tmp1;
300 base[j] = (uchar_t)tmp0;
301 tmp0 += tmp1;
302 tmp0 = tmp0 & 0xff;
303 merge |= (unsigned long long)(base[tmp0]) << 16;
305 /* BYTE 6 */
306 i1 = (i1+6);
307 tmp0 = base1[6];
308 j = j + tmp0;
309 tmp1 = base[j];
310 i_accum = tmp1;
311 base[j] = (uchar_t)tmp0;
313 tmp0 += tmp1;
314 tmp0 = tmp0 & 0xff;
316 if (i1 == tmp0) {
317 merge |=
318 (unsigned long long)(i_accum) << 8;
319 } else {
320 merge |=
321 (unsigned long long)(base[tmp0]) <<
325 /* BYTE 7 */
326 tmp0 = base1[7];
329 * Perform [i] speculation again. Indentical
330 * to that performed for BYTE0 and BYTE1.
332 j = j + tmp0;
333 if ((i1 ^ j) < 2) {
334 base1[6] = (uchar_t)i_accum;
335 tmp1 = base[j];
337 base1[7] = (uchar_t)tmp1;
338 base[j] = (uchar_t)tmp0;
340 tmp0 += tmp1;
341 tmp0 = tmp0 & 0xff;
343 merge |=
344 (unsigned long long)(base[tmp0]);
346 } else {
347 tmp1 = base[j];
349 i_accum = i_accum << 8;
350 i_accum |= tmp1;
352 base[j] = (uchar_t)tmp0;
354 tmp0 += tmp1;
355 tmp0 = tmp0 & 0xff;
357 /* LINTED E_BAD_PTR_CAST_ALIGN */
358 *((unsigned short *) &base[i1]) =
359 i_accum;
361 merge |=
362 (unsigned long long)(base[tmp0]);
364 i1++;
365 } else {
367 * i is too close to wrap-around to allow
368 * masking to be disregarded
372 * Same old speculation for BYTE 0 and BYTE 1
375 /* BYTE 0 */
376 i1 = (i1 + 1) & 0xff;
377 jj = (uchar_t)i1;
379 tmp0 = base[i1];
380 j = j + tmp0;
382 tmp1 = base[j];
383 i_accum = tmp1;
384 base[j] = (uchar_t)tmp0;
386 tmp0 += tmp1;
387 tmp0 = tmp0 & 0xff;
389 if (i1 == tmp0) {
390 merge =
391 (unsigned long long)(i_accum) << 56;
392 } else {
393 merge =
394 (unsigned long long)(base[tmp0]) <<
398 /* BYTE 1 */
399 tmp0 = base[i1+1];
401 j = j + tmp0;
403 if ((jj ^ j) < 2) {
404 base[jj] = (uchar_t)i_accum;
406 tmp1 = base[j];
408 base[i1+1] = (uchar_t)tmp1;
409 base[j] = (uchar_t)tmp0;
411 tmp0 += tmp1;
412 tmp0 = tmp0 & 0xff;
414 merge |=
415 (unsigned long long)(base[tmp0]) <<
417 } else {
419 tmp1 = base[j];
421 i_accum = i_accum << 8;
422 i_accum |= tmp1;
424 base[j] = (uchar_t)tmp0;
426 tmp0 += tmp1;
427 tmp0 = tmp0 & 0xff;
429 /* LINTED E_BAD_PTR_CAST_ALIGN */
430 *((unsigned short *) &base[jj]) =
431 i_accum;
433 merge |=
434 (unsigned long long)(base[tmp0]) <<
438 /* BYTE 2 */
440 * As know i must be even when enter loop (to
441 * satisfy alignment), can only wrap around
442 * on the even bytes. So just need to perform
443 * mask every 2nd byte
445 i1 = (i1 + 2) & 0xff;
446 tmp0 = base[i1];
447 j = j + tmp0;
448 tmp1 = base[j];
449 base[i1] = (uchar_t)tmp1;
450 base[j] = (uchar_t)tmp0;
451 tmp0 += tmp1;
452 tmp0 = tmp0 & 0xff;
453 merge |= (unsigned long long)(base[tmp0]) << 40;
455 /* BYTE 3 */
456 tmp0 = base[i1+1];
457 j = j + tmp0;
458 tmp1 = base[j];
459 base[i1+1] = (uchar_t)tmp1;
460 base[j] = (uchar_t)tmp0;
461 tmp0 += tmp1;
462 tmp0 = tmp0 & 0xff;
463 merge |= (unsigned long long)(base[tmp0]) << 32;
465 /* BYTE 4 */
466 i1 = (i1 + 2) & 0xff;
467 tmp0 = base[i1];
468 j = j + tmp0;
469 tmp1 = base[j];
470 base[i1] = (uchar_t)tmp1;
471 base[j] = (uchar_t)tmp0;
472 tmp0 += tmp1;
473 tmp0 = tmp0 & 0xff;
474 merge |= (unsigned long long)(base[tmp0]) << 24;
476 /* BYTE 5 */
477 tmp0 = base[i1+1];
478 j = j + tmp0;
479 tmp1 = base[j];
480 base[i1+1] = (uchar_t)tmp1;
481 base[j] = (uchar_t)tmp0;
482 tmp0 += tmp1;
483 tmp0 = tmp0 & 0xff;
484 merge |= (unsigned long long)(base[tmp0]) << 16;
486 /* BYTE 6 */
487 i1 = (i1+2) &0xff;
488 jj = (uchar_t)i1;
489 tmp0 = base[i1];
491 j = j + tmp0;
493 tmp1 = base[j];
494 i_accum = tmp1;
495 base[j] = (uchar_t)tmp0;
498 tmp0 += tmp1;
499 tmp0 = tmp0 & 0xff;
501 if (i1 == tmp0) {
502 merge |=
503 (unsigned long long)(i_accum) << 8;
504 } else {
505 merge |=
506 (unsigned long long)(base[tmp0]) <<
510 /* BYTE 7 */
511 i1++;
512 tmp0 = base[i1];
514 j = j + tmp0;
515 if ((jj ^ j) < 2) {
516 base[jj] = (uchar_t)i_accum;
517 tmp1 = base[j];
519 base[i1] = (uchar_t)tmp1;
520 base[j] = (uchar_t)tmp0;
522 tmp0 += tmp1;
523 tmp0 = tmp0 & 0xff;
525 merge |=
526 (unsigned long long)(base[tmp0]);
528 } else {
530 tmp1 = base[j];
532 i_accum = i_accum << 8;
533 i_accum |= tmp1;
535 base[j] = (uchar_t)tmp0;
537 tmp0 += tmp1;
538 tmp0 = tmp0 & 0xff;
540 /* LINTED E_BAD_PTR_CAST_ALIGN */
541 *((unsigned short *) &base[jj]) =
542 i_accum;
544 merge |=
545 (unsigned long long)(base[tmp0]);
550 * Perform update to [out]
551 * Remember could be alignment issues
553 /* LINTED E_BAD_PTR_CAST_ALIGN */
554 in0 = *((unsigned long long *) (&in[ii]));
556 merge1 = merge0 | (merge >> shift);
558 merge0 = (merge & mask) << 56;
560 in0 = in0 ^ merge1;
562 /* LINTED E_BAD_PTR_CAST_ALIGN */
563 *((unsigned long long *) (&out[ii])) = in0;
566 i = (uchar_t)i1;
569 * Handle any overrun
571 if (shift) {
572 out[ii] = in[ii] ^ (merge0 >> 56);
573 ii++;
577 * Handle final few bytes
579 for (; ii < len; ii++) {
580 i = i + 1;
581 tmp0 = base[i];
582 j = j + tmp0;
583 tmp1 = base[j];
585 base[i] = (uchar_t)tmp1;
586 base[j] = (uchar_t)tmp0;
588 tmp0 += tmp1;
589 tmp0 = tmp0 & 0xff;
590 out[ii] = in[ii] ^ base[tmp0];
592 key->i = i;
593 key->j = j;
595 #endif /* sun4v */