2 * OpenSSH Multi-threaded AES-CTR Cipher
4 * Author: Benjamin Bennett <ben@psc.edu>
5 * Copyright (c) 2008 Pittsburgh Supercomputing Center. All rights reserved.
7 * Based on original OpenSSH AES-CTR cipher. Small portions remain unchanged,
8 * Copyright (c) 2003 Markus Friedl <markus@openbsd.org>
10 * Permission to use, copy, modify, and distribute this software for any
11 * purpose with or without fee is hereby granted, provided that the above
12 * copyright notice and this permission notice appear in all copies.
14 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 #include <sys/types.h>
29 #include <openssl/evp.h>
34 #ifndef USE_BUILTIN_RIJNDAEL
35 #include <openssl/aes.h>
40 /*-------------------- TUNABLES --------------------*/
41 /* Number of pregen threads to use */
42 #define CIPHER_THREADS 2
44 /* Number of keystream queues */
45 #define NUMKQ (CIPHER_THREADS + 2)
47 /* Length of a keystream queue */
50 /* Processor cacheline length */
51 #define CACHELINE_LEN 64
53 /* Collect thread stats and print at cancellation when in debug mode */
54 /* #define CIPHER_THREAD_STATS */
56 /* Use single-byte XOR instead of 8-byte XOR */
57 /* #define CIPHER_BYTE_XOR */
58 /*-------------------- END TUNABLES --------------------*/
63 const EVP_CIPHER
*evp_aes_ctr_mt(void);
65 #ifdef CIPHER_THREAD_STATS
67 * Struct to collect thread stats
77 * Debug print the thread stats
78 * Use with pthread_cleanup_push for displaying at thread cancellation
81 thread_loop_stats(void *x
)
83 struct thread_stats
*s
= x
;
85 debug("tid %lu - %u fills, %u skips, %u waits", pthread_self(),
86 s
->fills
, s
->skips
, s
->waits
);
89 #define STATS_STRUCT(s) struct thread_stats s;
90 #define STATS_INIT(s) memset(&s, 0, sizeof(s))
91 #define STATS_FILL(s) s.fills++
92 #define STATS_SKIP(s) s.skips++
93 #define STATS_WAIT(s) s.waits++
94 #define STATS_DRAIN(s) s.drains++
96 #define STATS_STRUCT(s)
100 #define STATS_WAIT(s)
101 #define STATS_DRAIN(s)
104 /* Keystream Queue state */
113 /* Keystream Queue struct */
115 u_char keys
[KQLEN
][AES_BLOCK_SIZE
];
116 u_char ctr
[AES_BLOCK_SIZE
];
117 u_char pad0
[CACHELINE_LEN
];
119 pthread_mutex_t lock
;
121 u_char pad1
[CACHELINE_LEN
];
125 struct ssh_aes_ctr_ctx
130 u_char aes_counter
[AES_BLOCK_SIZE
];
131 pthread_t tid
[CIPHER_THREADS
];
138 * increment counter 'ctr',
139 * the counter is of size 'len' bytes and stored in network-byte-order.
140 * (LSB at ctr[len-1], MSB at ctr[0])
143 ssh_ctr_inc(u_char
*ctr
, u_int len
)
147 for (i
= len
- 1; i
>= 0; i
--)
148 if (++ctr
[i
]) /* continue on overflow */
153 * Add num to counter 'ctr'
156 ssh_ctr_add(u_char
*ctr
, uint32_t num
, u_int len
)
161 for (n
= 0, i
= len
- 1; i
>= 0 && (num
|| n
); i
--) {
162 n
= ctr
[i
] + (num
& 0xff) + n
;
170 * Threads may be cancelled in a pthread_cond_wait, we must free the mutex
173 thread_loop_cleanup(void *x
)
175 pthread_mutex_unlock((pthread_mutex_t
*)x
);
179 * The life of a pregen thread:
180 * Find empty keystream queues and fill them using their counter.
181 * When done, update counter for the next fill.
188 struct ssh_aes_ctr_ctx
*c
= x
;
193 /* Threads stats on cancellation */
195 #ifdef CIPHER_THREAD_STATS
196 pthread_cleanup_push(thread_loop_stats
, &stats
);
199 /* Thread local copy of AES key */
200 memcpy(&key
, &c
->aes_ctx
, sizeof(key
));
203 * Handle the special case of startup, one thread must fill
204 * the first KQ then mark it as draining. Lock held throughout.
206 if (pthread_equal(pthread_self(), c
->tid
[0])) {
208 pthread_mutex_lock(&q
->lock
);
209 if (q
->qstate
== KQINIT
) {
210 for (i
= 0; i
< KQLEN
; i
++) {
211 AES_encrypt(q
->ctr
, q
->keys
[i
], &key
);
212 ssh_ctr_inc(q
->ctr
, AES_BLOCK_SIZE
);
214 ssh_ctr_add(q
->ctr
, KQLEN
* (NUMKQ
- 1), AES_BLOCK_SIZE
);
215 q
->qstate
= KQDRAINING
;
217 pthread_cond_broadcast(&q
->cond
);
219 pthread_mutex_unlock(&q
->lock
);
225 * Normal case is to find empty queues and fill them, skipping over
226 * queues already filled by other threads and stopping to wait for
227 * a draining queue to become empty.
229 * Multiple threads may be waiting on a draining queue and awoken
230 * when empty. The first thread to wake will mark it as filling,
231 * others will move on to fill, skip, or wait on the next queue.
233 for (qidx
= 1;; qidx
= (qidx
+ 1) % NUMKQ
) {
234 /* Check if I was cancelled, also checked in cond_wait */
235 pthread_testcancel();
237 /* Lock queue and block if its draining */
239 pthread_mutex_lock(&q
->lock
);
240 pthread_cleanup_push(thread_loop_cleanup
, &q
->lock
);
241 while (q
->qstate
== KQDRAINING
|| q
->qstate
== KQINIT
) {
243 pthread_cond_wait(&q
->cond
, &q
->lock
);
245 pthread_cleanup_pop(0);
247 /* If filling or full, somebody else got it, skip */
248 if (q
->qstate
!= KQEMPTY
) {
249 pthread_mutex_unlock(&q
->lock
);
255 * Empty, let's fill it.
256 * Queue lock is relinquished while we do this so others
257 * can see that it's being filled.
259 q
->qstate
= KQFILLING
;
260 pthread_mutex_unlock(&q
->lock
);
261 for (i
= 0; i
< KQLEN
; i
++) {
262 AES_encrypt(q
->ctr
, q
->keys
[i
], &key
);
263 ssh_ctr_inc(q
->ctr
, AES_BLOCK_SIZE
);
266 /* Re-lock, mark full and signal consumer */
267 pthread_mutex_lock(&q
->lock
);
268 ssh_ctr_add(q
->ctr
, KQLEN
* (NUMKQ
- 1), AES_BLOCK_SIZE
);
271 pthread_cond_signal(&q
->cond
);
272 pthread_mutex_unlock(&q
->lock
);
275 #ifdef CIPHER_THREAD_STATS
277 pthread_cleanup_pop(1);
284 ssh_aes_ctr(EVP_CIPHER_CTX
*ctx
, u_char
*dest
, const u_char
*src
,
287 struct ssh_aes_ctr_ctx
*c
;
294 if ((c
= EVP_CIPHER_CTX_get_app_data(ctx
)) == NULL
)
300 /* src already padded to block multiple */
304 #ifdef CIPHER_BYTE_XOR
305 dest
[0] = src
[0] ^ buf
[0];
306 dest
[1] = src
[1] ^ buf
[1];
307 dest
[2] = src
[2] ^ buf
[2];
308 dest
[3] = src
[3] ^ buf
[3];
309 dest
[4] = src
[4] ^ buf
[4];
310 dest
[5] = src
[5] ^ buf
[5];
311 dest
[6] = src
[6] ^ buf
[6];
312 dest
[7] = src
[7] ^ buf
[7];
313 dest
[8] = src
[8] ^ buf
[8];
314 dest
[9] = src
[9] ^ buf
[9];
315 dest
[10] = src
[10] ^ buf
[10];
316 dest
[11] = src
[11] ^ buf
[11];
317 dest
[12] = src
[12] ^ buf
[12];
318 dest
[13] = src
[13] ^ buf
[13];
319 dest
[14] = src
[14] ^ buf
[14];
320 dest
[15] = src
[15] ^ buf
[15];
322 *(uint64_t *)dest
= *(uint64_t *)src
^ *(uint64_t *)buf
;
323 *(uint64_t *)(dest
+ 8) = *(uint64_t *)(src
+ 8) ^
324 *(uint64_t *)(buf
+ 8);
330 ssh_ctr_inc(ctx
->iv
, AES_BLOCK_SIZE
);
332 /* Increment read index, switch queues on rollover */
333 if ((ridx
= (ridx
+ 1) % KQLEN
) == 0) {
336 /* Mark next queue draining, may need to wait */
337 c
->qidx
= (c
->qidx
+ 1) % NUMKQ
;
339 pthread_mutex_lock(&q
->lock
);
340 while (q
->qstate
!= KQFULL
) {
341 STATS_WAIT(c
->stats
);
342 pthread_cond_wait(&q
->cond
, &q
->lock
);
344 q
->qstate
= KQDRAINING
;
345 pthread_mutex_unlock(&q
->lock
);
347 /* Mark consumed queue empty and signal producers */
348 pthread_mutex_lock(&oldq
->lock
);
349 oldq
->qstate
= KQEMPTY
;
350 STATS_DRAIN(c
->stats
);
351 pthread_cond_broadcast(&oldq
->cond
);
352 pthread_mutex_unlock(&oldq
->lock
);
363 ssh_aes_ctr_init(EVP_CIPHER_CTX
*ctx
, const u_char
*key
, const u_char
*iv
,
366 struct ssh_aes_ctr_ctx
*c
;
369 if ((c
= EVP_CIPHER_CTX_get_app_data(ctx
)) == NULL
) {
370 c
= xmalloc(sizeof(*c
));
372 c
->state
= HAVE_NONE
;
373 for (i
= 0; i
< NUMKQ
; i
++) {
374 pthread_mutex_init(&c
->q
[i
].lock
, NULL
);
375 pthread_cond_init(&c
->q
[i
].cond
, NULL
);
378 STATS_INIT(c
->stats
);
380 EVP_CIPHER_CTX_set_app_data(ctx
, c
);
383 if (c
->state
== (HAVE_KEY
| HAVE_IV
)) {
384 /* Cancel pregen threads */
385 for (i
= 0; i
< CIPHER_THREADS
; i
++)
386 pthread_cancel(c
->tid
[i
]);
387 for (i
= 0; i
< CIPHER_THREADS
; i
++)
388 pthread_join(c
->tid
[i
], NULL
);
389 /* Start over getting key & iv */
390 c
->state
= HAVE_NONE
;
394 AES_set_encrypt_key(key
, EVP_CIPHER_CTX_key_length(ctx
) * 8,
396 c
->state
|= HAVE_KEY
;
400 memcpy(ctx
->iv
, iv
, AES_BLOCK_SIZE
);
404 if (c
->state
== (HAVE_KEY
| HAVE_IV
)) {
406 memcpy(c
->q
[0].ctr
, ctx
->iv
, AES_BLOCK_SIZE
);
407 c
->q
[0].qstate
= KQINIT
;
408 for (i
= 1; i
< NUMKQ
; i
++) {
409 memcpy(c
->q
[i
].ctr
, ctx
->iv
, AES_BLOCK_SIZE
);
410 ssh_ctr_add(c
->q
[i
].ctr
, i
* KQLEN
, AES_BLOCK_SIZE
);
411 c
->q
[i
].qstate
= KQEMPTY
;
417 for (i
= 0; i
< CIPHER_THREADS
; i
++) {
418 pthread_create(&c
->tid
[i
], NULL
, thread_loop
, c
);
420 pthread_mutex_lock(&c
->q
[0].lock
);
421 while (c
->q
[0].qstate
!= KQDRAINING
)
422 pthread_cond_wait(&c
->q
[0].cond
, &c
->q
[0].lock
);
423 pthread_mutex_unlock(&c
->q
[0].lock
);
430 ssh_aes_ctr_cleanup(EVP_CIPHER_CTX
*ctx
)
432 struct ssh_aes_ctr_ctx
*c
;
435 if ((c
= EVP_CIPHER_CTX_get_app_data(ctx
)) != NULL
) {
436 #ifdef CIPHER_THREAD_STATS
437 debug("main thread: %u drains, %u waits", c
->stats
.drains
,
440 /* Cancel pregen threads */
441 for (i
= 0; i
< CIPHER_THREADS
; i
++)
442 pthread_cancel(c
->tid
[i
]);
443 for (i
= 0; i
< CIPHER_THREADS
; i
++)
444 pthread_join(c
->tid
[i
], NULL
);
446 memset(c
, 0, sizeof(*c
));
448 EVP_CIPHER_CTX_set_app_data(ctx
, NULL
);
457 static EVP_CIPHER aes_ctr
;
459 memset(&aes_ctr
, 0, sizeof(EVP_CIPHER
));
460 aes_ctr
.nid
= NID_undef
;
461 aes_ctr
.block_size
= AES_BLOCK_SIZE
;
462 aes_ctr
.iv_len
= AES_BLOCK_SIZE
;
463 aes_ctr
.key_len
= 16;
464 aes_ctr
.init
= ssh_aes_ctr_init
;
465 aes_ctr
.cleanup
= ssh_aes_ctr_cleanup
;
466 aes_ctr
.do_cipher
= ssh_aes_ctr
;
468 aes_ctr
.flags
= EVP_CIPH_CBC_MODE
| EVP_CIPH_VARIABLE_LENGTH
|
469 EVP_CIPH_ALWAYS_CALL_INIT
| EVP_CIPH_CUSTOM_IV
;