3 * Tries the performance of the various algorithms.
7 /* nettle, low-level cryptographics library
9 * Copyright (C) 2001, 2010 Niels Möller
11 * The nettle library is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or (at your
14 * option) any later version.
16 * The nettle library is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 * License for more details.
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with the nettle library; see the file COPYING.LIB. If not, write to
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
60 #include "nettle-meta.h"
61 #include "nettle-internal.h"
65 static double frequency
= 0.0;
67 /* Process BENCH_BLOCK bytes at a time, for BENCH_INTERVAL seconds. */
68 #define BENCH_BLOCK 10240
69 #define BENCH_INTERVAL 0.1
71 /* FIXME: Proper configure test for rdtsc? */
72 #ifndef WITH_CYCLE_COUNTER
73 # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
74 # define WITH_CYCLE_COUNTER 1
76 # define WITH_CYCLE_COUNTER 0
80 #if WITH_CYCLE_COUNTER
81 # if defined(__i386__)
82 #define GET_CYCLE_COUNTER(hi, lo) \
83 __asm__("xorl %%eax,%%eax\n" \
84 "movl %%ebx, %%edi\n" \
87 "movl %%edi, %%ebx\n" \
88 : "=a" (lo), "=d" (hi) \
90 : "%edi", "%ecx", "cc")
91 # elif defined(__x86_64__)
92 #define GET_CYCLE_COUNTER(hi, lo) \
93 __asm__("xorl %%eax,%%eax\n" \
94 "mov %%rbx, %%r10\n" \
97 "mov %%r10, %%rbx\n" \
98 : "=a" (lo), "=d" (hi) \
100 : "%r10", "%rcx", "cc")
102 #define BENCH_ITERATIONS 10
105 static void NORETURN
PRINTF_STYLE(1,2)
106 die(const char *format
, ...)
109 va_start(args
, format
);
110 vfprintf(stderr
, format
, args
);
116 static double overhead
= 0.0;
118 /* Returns second per function call */
120 time_function(void (*f
)(void *arg
), void *arg
)
130 for (i
= 0; i
< ncalls
; i
++)
132 elapsed
= time_end();
133 if (elapsed
> BENCH_INTERVAL
)
135 else if (elapsed
< BENCH_INTERVAL
/ 10)
140 return elapsed
/ ncalls
- overhead
;
144 bench_nothing(void *arg UNUSED
)
149 struct bench_memxor_info
153 const uint8_t *other
;
157 bench_memxor(void *arg
)
159 struct bench_memxor_info
*info
= arg
;
160 memxor (info
->dst
, info
->src
, BENCH_BLOCK
);
164 bench_memxor3(void *arg
)
166 struct bench_memxor_info
*info
= arg
;
167 memxor3 (info
->dst
, info
->src
, info
->other
, BENCH_BLOCK
);
170 struct bench_hash_info
173 nettle_hash_update_func
*update
;
178 bench_hash(void *arg
)
180 struct bench_hash_info
*info
= arg
;
181 info
->update(info
->ctx
, BENCH_BLOCK
, info
->data
);
184 struct bench_cipher_info
187 nettle_crypt_func
*crypt
;
192 bench_cipher(void *arg
)
194 struct bench_cipher_info
*info
= arg
;
195 info
->crypt(info
->ctx
, BENCH_BLOCK
, info
->data
, info
->data
);
198 struct bench_cbc_info
201 nettle_crypt_func
*crypt
;
210 bench_cbc_encrypt(void *arg
)
212 struct bench_cbc_info
*info
= arg
;
213 cbc_encrypt(info
->ctx
, info
->crypt
,
214 info
->block_size
, info
->iv
,
215 BENCH_BLOCK
, info
->data
, info
->data
);
219 bench_cbc_decrypt(void *arg
)
221 struct bench_cbc_info
*info
= arg
;
222 cbc_decrypt(info
->ctx
, info
->crypt
,
223 info
->block_size
, info
->iv
,
224 BENCH_BLOCK
, info
->data
, info
->data
);
230 struct bench_cbc_info
*info
= arg
;
231 ctr_crypt(info
->ctx
, info
->crypt
,
232 info
->block_size
, info
->iv
,
233 BENCH_BLOCK
, info
->data
, info
->data
);
236 /* Set data[i] = floor(sqrt(i)) */
238 init_data(uint8_t *data
)
241 for (i
= j
= 0; i
<BENCH_BLOCK
; i
++)
250 init_key(unsigned length
,
254 for (i
= 0; i
<length
; i
++)
261 printf("%18s %11s Mbyte/s%s\n",
263 frequency
> 0.0 ? " cycles/byte cycles/block" : "");
267 display(const char *name
, const char *mode
, unsigned block_size
,
270 printf("%18s %11s %7.2f",
272 BENCH_BLOCK
/ (time
* 1048576.0));
275 printf(" %11.2f", time
* frequency
/ BENCH_BLOCK
);
277 printf(" %12.2f", time
* frequency
* block_size
/ BENCH_BLOCK
);
285 void *p
= malloc(size
);
287 die("Virtual memory exhausted.\n");
295 overhead
= time_function(bench_nothing
, NULL
);
296 printf("benchmark call overhead: %7f us", overhead
* 1e6
);
298 printf("%7.2f cycles\n", overhead
* frequency
);
307 struct bench_memxor_info info
;
308 uint8_t src
[BENCH_BLOCK
+ sizeof(long)];
309 uint8_t other
[BENCH_BLOCK
+ sizeof(long)];
310 uint8_t dst
[BENCH_BLOCK
];
315 display ("memxor", "aligned", sizeof(unsigned long),
316 time_function(bench_memxor
, &info
));
318 display ("memxor", "unaligned", sizeof(unsigned long),
319 time_function(bench_memxor
, &info
));
323 display ("memxor3", "aligned", sizeof(unsigned long),
324 time_function(bench_memxor3
, &info
));
326 info
.other
= other
+ 1;
327 display ("memxor3", "unaligned01", sizeof(unsigned long),
328 time_function(bench_memxor3
, &info
));
330 display ("memxor3", "unaligned11", sizeof(unsigned long),
331 time_function(bench_memxor3
, &info
));
332 info
.other
= other
+ 2;
333 display ("memxor3", "unaligned12", sizeof(unsigned long),
334 time_function(bench_memxor3
, &info
));
338 time_hash(const struct nettle_hash
*hash
)
340 static uint8_t data
[BENCH_BLOCK
];
341 struct bench_hash_info info
;
343 info
.ctx
= xalloc(hash
->context_size
);
344 info
.update
= hash
->update
;
348 hash
->init(info
.ctx
);
350 display(hash
->name
, "update", hash
->block_size
,
351 time_function(bench_hash
, &info
));
359 static uint8_t data
[BENCH_BLOCK
];
360 struct bench_hash_info info
;
361 struct umac32_ctx ctx32
;
362 struct umac64_ctx ctx64
;
363 struct umac96_ctx ctx96
;
364 struct umac128_ctx ctx128
;
368 umac32_set_key (&ctx32
, key
);
370 info
.update
= (nettle_hash_update_func
*) umac32_update
;
373 display("umac32", "update", UMAC_DATA_SIZE
,
374 time_function(bench_hash
, &info
));
376 umac64_set_key (&ctx64
, key
);
378 info
.update
= (nettle_hash_update_func
*) umac64_update
;
381 display("umac64", "update", UMAC_DATA_SIZE
,
382 time_function(bench_hash
, &info
));
384 umac96_set_key (&ctx96
, key
);
386 info
.update
= (nettle_hash_update_func
*) umac96_update
;
389 display("umac96", "update", UMAC_DATA_SIZE
,
390 time_function(bench_hash
, &info
));
392 umac128_set_key (&ctx128
, key
);
394 info
.update
= (nettle_hash_update_func
*) umac128_update
;
397 display("umac128", "update", UMAC_DATA_SIZE
,
398 time_function(bench_hash
, &info
));
404 static uint8_t data
[BENCH_BLOCK
];
405 struct bench_hash_info hinfo
;
406 struct bench_cipher_info cinfo
;
407 struct gcm_aes_ctx ctx
;
410 uint8_t iv
[GCM_IV_SIZE
];
412 gcm_aes_set_key(&ctx
, sizeof(key
), key
);
413 gcm_aes_set_iv(&ctx
, sizeof(iv
), iv
);
416 hinfo
.update
= (nettle_hash_update_func
*) gcm_aes_update
;
419 display("gcm-aes", "update", GCM_BLOCK_SIZE
,
420 time_function(bench_hash
, &hinfo
));
423 cinfo
.crypt
= (nettle_crypt_func
*) gcm_aes_encrypt
;
426 display("gcm-aes", "encrypt", GCM_BLOCK_SIZE
,
427 time_function(bench_cipher
, &cinfo
));
429 cinfo
.crypt
= (nettle_crypt_func
*) gcm_aes_decrypt
;
431 display("gcm-aes", "decrypt", GCM_BLOCK_SIZE
,
432 time_function(bench_cipher
, &cinfo
));
436 prefix_p(const char *prefix
, const char *s
)
439 for (i
= 0; prefix
[i
]; i
++)
440 if (prefix
[i
] != s
[i
])
446 block_cipher_p(const struct nettle_cipher
*cipher
)
448 /* Don't use nettle cbc and ctr for openssl ciphers. */
449 return cipher
->block_size
> 0 && !prefix_p("openssl", cipher
->name
);
453 time_cipher(const struct nettle_cipher
*cipher
)
455 void *ctx
= xalloc(cipher
->context_size
);
456 uint8_t *key
= xalloc(cipher
->key_size
);
458 static uint8_t data
[BENCH_BLOCK
];
465 /* Decent initializers are a GNU extension, so don't use it here. */
466 struct bench_cipher_info info
;
468 info
.crypt
= cipher
->encrypt
;
471 init_key(cipher
->key_size
, key
);
472 cipher
->set_encrypt_key(ctx
, cipher
->key_size
, key
);
474 display(cipher
->name
, "ECB encrypt", cipher
->block_size
,
475 time_function(bench_cipher
, &info
));
479 struct bench_cipher_info info
;
481 info
.crypt
= cipher
->decrypt
;
484 init_key(cipher
->key_size
, key
);
485 cipher
->set_decrypt_key(ctx
, cipher
->key_size
, key
);
487 display(cipher
->name
, "ECB decrypt", cipher
->block_size
,
488 time_function(bench_cipher
, &info
));
491 if (block_cipher_p(cipher
))
493 uint8_t *iv
= xalloc(cipher
->block_size
);
497 struct bench_cbc_info info
;
499 info
.crypt
= cipher
->encrypt
;
501 info
.block_size
= cipher
->block_size
;
504 memset(iv
, 0, sizeof(iv
));
506 cipher
->set_encrypt_key(ctx
, cipher
->key_size
, key
);
508 display(cipher
->name
, "CBC encrypt", cipher
->block_size
,
509 time_function(bench_cbc_encrypt
, &info
));
513 struct bench_cbc_info info
;
515 info
.crypt
= cipher
->decrypt
;
517 info
.block_size
= cipher
->block_size
;
520 memset(iv
, 0, sizeof(iv
));
522 cipher
->set_decrypt_key(ctx
, cipher
->key_size
, key
);
524 display(cipher
->name
, "CBC decrypt", cipher
->block_size
,
525 time_function(bench_cbc_decrypt
, &info
));
530 struct bench_cbc_info info
;
532 info
.crypt
= cipher
->encrypt
;
534 info
.block_size
= cipher
->block_size
;
537 memset(iv
, 0, sizeof(iv
));
539 cipher
->set_encrypt_key(ctx
, cipher
->key_size
, key
);
541 display(cipher
->name
, "CTR", cipher
->block_size
,
542 time_function(bench_ctr
, &info
));
551 /* Try to get accurate cycle times for assembler functions. */
552 #if WITH_CYCLE_COUNTER
554 compare_double(const void *ap
, const void *bp
)
556 double a
= *(const double *) ap
;
557 double b
= *(const double *) bp
;
566 #define TIME_CYCLES(t, code) do { \
567 double tc_count[5]; \
568 uint32_t tc_start_lo, tc_start_hi, tc_end_lo, tc_end_hi; \
569 unsigned tc_i, tc_j; \
570 for (tc_j = 0; tc_j < 5; tc_j++) \
573 GET_CYCLE_COUNTER(tc_start_hi, tc_start_lo); \
574 for (; tc_i < BENCH_ITERATIONS; tc_i++) \
577 GET_CYCLE_COUNTER(tc_end_hi, tc_end_lo); \
579 tc_end_hi -= (tc_start_hi + (tc_start_lo > tc_end_lo)); \
580 tc_end_lo -= tc_start_lo; \
582 tc_count[tc_j] = ldexp(tc_end_hi, 32) + tc_end_lo; \
584 qsort(tc_count, 5, sizeof(double), compare_double); \
585 (t) = tc_count[2] / BENCH_ITERATIONS; \
589 bench_sha1_compress(void)
591 uint32_t state
[_SHA1_DIGEST_LENGTH
];
592 uint8_t data
[SHA1_DATA_SIZE
];
595 TIME_CYCLES (t
, _nettle_sha1_compress(state
, data
));
597 printf("sha1_compress: %.2f cycles\n", t
);
601 bench_salsa20_core(void)
603 uint32_t state
[_SALSA20_INPUT_LENGTH
];
606 TIME_CYCLES (t
, _nettle_salsa20_core(state
, state
, 20));
607 printf("salsa20_core: %.2f cycles\n", t
);
611 bench_sha3_permute(void)
613 struct sha3_state state
;
616 TIME_CYCLES (t
, sha3_permute (&state
));
617 printf("sha3_permute: %.2f cycles (%.2f / round)\n", t
, t
/ 24.0);
620 #define bench_sha1_compress()
621 #define bench_salsa20_core()
622 #define bench_sha3_permute()
626 # define OPENSSL(x) x,
632 main(int argc
, char **argv
)
638 const struct nettle_hash
*hashes
[] =
640 &nettle_md2
, &nettle_md4
, &nettle_md5
,
641 OPENSSL(&nettle_openssl_md5
)
642 &nettle_sha1
, OPENSSL(&nettle_openssl_sha1
)
643 &nettle_sha224
, &nettle_sha256
,
644 &nettle_sha384
, &nettle_sha512
,
645 &nettle_sha3_224
, &nettle_sha3_256
,
646 &nettle_sha3_384
, &nettle_sha3_512
,
647 &nettle_ripemd160
, &nettle_gosthash94
,
651 const struct nettle_cipher
*ciphers
[] =
653 &nettle_aes128
, &nettle_aes192
, &nettle_aes256
,
654 OPENSSL(&nettle_openssl_aes128
)
655 OPENSSL(&nettle_openssl_aes192
)
656 OPENSSL(&nettle_openssl_aes256
)
657 &nettle_arcfour128
, OPENSSL(&nettle_openssl_arcfour128
)
658 &nettle_blowfish128
, OPENSSL(&nettle_openssl_blowfish128
)
659 &nettle_camellia128
, &nettle_camellia192
, &nettle_camellia256
,
660 &nettle_cast128
, OPENSSL(&nettle_openssl_cast128
)
661 &nettle_des
, OPENSSL(&nettle_openssl_des
)
664 &nettle_twofish128
, &nettle_twofish192
, &nettle_twofish256
,
665 &nettle_salsa20
, &nettle_salsa20r12
,
669 enum { OPT_HELP
= 300 };
670 static const struct option options
[] =
672 /* Name, args, flag, val */
673 { "help", no_argument
, NULL
, OPT_HELP
},
674 { "clock-frequency", required_argument
, NULL
, 'f' },
678 while ( (c
= getopt_long(argc
, argv
, "f:", options
, NULL
)) != -1)
682 frequency
= atof(optarg
);
687 printf("Usage: nettle-benchmark [-f clock frequency] [alg]\n");
700 bench_sha1_compress();
701 bench_salsa20_core();
702 bench_sha3_permute();
708 if (!alg
|| strstr ("memxor", alg
))
714 for (i
= 0; hashes
[i
]; i
++)
715 if (!alg
|| strstr(hashes
[i
]->name
, alg
))
716 time_hash(hashes
[i
]);
718 if (!alg
|| strstr ("umac", alg
))
721 for (i
= 0; ciphers
[i
]; i
++)
722 if (!alg
|| strstr(ciphers
[i
]->name
, alg
))
723 time_cipher(ciphers
[i
]);
725 if (!alg
|| strstr ("gcm", alg
))