Release v4.14434 - crimson
[RRG-proxmark3.git] / client / deps / hardnested / hardnested_bf_core.c
blob0f9eb3da831444c44d19ff2ff5642ed196ce7ab2
1 //-----------------------------------------------------------------------------
2 // Copyright (C) 2016, 2017 by piwi
3 //
4 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
5 // at your option, any later version. See the LICENSE.txt file for the text of
6 // the license.
7 //-----------------------------------------------------------------------------
8 // Implements a card only attack based on crypto text (encrypted nonces
9 // received during a nested authentication) only. Unlike other card only
10 // attacks this doesn't rely on implementation errors but only on the
11 // inherent weaknesses of the crypto1 cypher. Described in
12 // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13 // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14 // Computer and Communications Security, 2015
15 //-----------------------------------------------------------------------------
17 // brute forcing is based on @aczids bitsliced brute forcer
18 // https://github.com/aczid/crypto1_bs with some modifications. Mainly:
19 // - don't rollback. Start with 2nd byte of nonce instead
20 // - reuse results of filter subfunctions
21 // - reuse results of previous nonces if some first bits are identical
23 //-----------------------------------------------------------------------------
24 // aczid's Copyright notice:
26 // Bit-sliced Crypto-1 brute-forcing implementation
27 // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid)
29 Copyright (c) 2015-2016 Aram Verstegen
31 Permission is hereby granted, free of charge, to any person obtaining a copy
32 of this software and associated documentation files (the "Software"), to deal
33 in the Software without restriction, including without limitation the rights
34 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35 copies of the Software, and to permit persons to whom the Software is
36 furnished to do so, subject to the following conditions:
38 The above copyright notice and this permission notice shall be included in
39 all copies or substantial portions of the Software.
41 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
47 THE SOFTWARE.
50 #include "hardnested_bf_core.h"
52 #include <stdint.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55 #ifndef __APPLE__
56 #include <malloc.h>
57 #endif
58 #include <stdio.h>
59 #include <string.h>
60 #include "crapto1/crapto1.h"
61 #include "parity.h"
62 #include "ui.h" // PrintAndLogEx
63 //#include "common.h"
65 // bitslice type
66 // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
67 // same for AVX2 and 512 bit vectors
68 // using larger vectors works but seems to generate more register pressure
69 #if defined(__AVX512F__)
70 #define MAX_BITSLICES 512
71 #elif defined(__AVX2__)
72 #define MAX_BITSLICES 256
73 #elif defined(__AVX__)
74 #define MAX_BITSLICES 128
75 #elif defined(__SSE2__)
76 #define MAX_BITSLICES 128
77 #else // MMX or SSE or NOSIMD
78 #define MAX_BITSLICES 64
79 #endif
81 #define VECTOR_SIZE (MAX_BITSLICES/8)
82 typedef uint32_t __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
83 typedef union {
84 bitslice_value_t value;
85 uint64_t bytes64[MAX_BITSLICES / 64];
86 uint8_t bytes[MAX_BITSLICES / 8];
87 } bitslice_t;
89 // filter function (f20)
90 // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
91 #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
92 #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
93 #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
95 // bit indexing
96 #define get_bit(n, word) (((word) >> (n)) & 1)
97 #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6])
99 // size of crypto-1 state
100 #define STATE_SIZE 48
101 // size of nonce to be decrypted
102 #define KEYSTREAM_SIZE 24
104 // this needs to be compiled several times for each instruction set.
105 // For each instruction set, define a dedicated function name:
106 #if defined (__AVX512F__)
107 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512
108 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512
109 #elif defined (__AVX2__)
110 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2
111 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2
112 #elif defined (__AVX__)
113 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX
114 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX
115 #elif defined (__SSE2__)
116 #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2
117 #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2
118 #elif defined (__MMX__)
119 #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX
120 #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX
121 #else
122 #define BITSLICE_TEST_NONCES bitslice_test_nonces_NOSIMD
123 #define CRACK_STATES_BITSLICED crack_states_bitsliced_NOSIMD
124 #endif
126 // typedefs and declaration of functions:
127 typedef uint64_t crack_states_bitsliced_t(uint32_t, uint8_t *, statelist_t *, uint32_t *, uint64_t *, uint32_t, uint8_t *, noncelist_t *);
128 crack_states_bitsliced_t crack_states_bitsliced_AVX512;
129 crack_states_bitsliced_t crack_states_bitsliced_AVX2;
130 crack_states_bitsliced_t crack_states_bitsliced_AVX;
131 crack_states_bitsliced_t crack_states_bitsliced_SSE2;
132 crack_states_bitsliced_t crack_states_bitsliced_MMX;
133 crack_states_bitsliced_t crack_states_bitsliced_NOSIMD;
134 crack_states_bitsliced_t crack_states_bitsliced_dispatch;
136 typedef void bitslice_test_nonces_t(uint32_t, uint32_t *, uint8_t *);
137 bitslice_test_nonces_t bitslice_test_nonces_AVX512;
138 bitslice_test_nonces_t bitslice_test_nonces_AVX2;
139 bitslice_test_nonces_t bitslice_test_nonces_AVX;
140 bitslice_test_nonces_t bitslice_test_nonces_SSE2;
141 bitslice_test_nonces_t bitslice_test_nonces_MMX;
142 bitslice_test_nonces_t bitslice_test_nonces_NOSIMD;
143 bitslice_test_nonces_t bitslice_test_nonces_dispatch;
145 #if defined (_WIN32)
146 #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES / 8), MAX_BITSLICES / 8)
147 #define free_bitslice(x) _aligned_free(x)
148 #elif defined (__APPLE__)
149 static void *malloc_bitslice(size_t x) {
150 char *allocated_memory;
151 if (posix_memalign((void **)&allocated_memory, MAX_BITSLICES / 8, x)) {
152 return NULL;
153 } else {
154 return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES / 8);
157 #define free_bitslice(x) free(x)
158 #else
159 #define malloc_bitslice(x) memalign(MAX_BITSLICES / 8, (x))
160 #define free_bitslice(x) free(x)
161 #endif
163 typedef enum {
164 EVEN_STATE = 0,
165 ODD_STATE = 1
166 } odd_even_t;
169 // arrays of bitsliced states with identical values in all slices
170 static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE];
171 static bitslice_t bitsliced_encrypted_parity_bits[256][4];
172 // 1 and 0 vectors
173 static bitslice_t bs_ones;
174 static bitslice_t bs_zeroes;
177 void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
179 // initialize 1 and 0 vectors
180 memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
181 memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
183 // bitslice nonces' 2nd to 4th byte
184 for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
185 for (uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++) {
186 bool bit = get_bit(KEYSTREAM_SIZE - 1 - bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
187 if (bit) {
188 bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
189 } else {
190 bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
194 // bitslice nonces' parity (4 bits)
195 for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
196 for (uint32_t bit_idx = 0; bit_idx < 4; bit_idx++) {
197 bool bit = get_bit(4 - 1 - bit_idx, bf_test_nonce_par[i]);
198 if (bit) {
199 bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
200 } else {
201 bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
209 uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
211 // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
212 // We know that the best first byte is already shifted in. Testing with the remaining three bytes of
213 // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
214 // brute forcing (including roll back).
216 bitslice_t states[KEYSTREAM_SIZE + STATE_SIZE];
217 bitslice_t *restrict state_p;
218 uint64_t key = -1;
219 uint64_t bucket_states_tested = 0;
220 uint32_t bucket_size[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
221 uint32_t bitsliced_blocks = 0;
222 uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
223 #if defined (DEBUG_BRUTE_FORCE)
224 uint32_t elimination_step = 0;
225 #define MAX_ELIMINATION_STEP 32
226 uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
227 #endif
228 #ifdef DEBUG_KEY_ELIMINATION
229 bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
230 #endif
232 // constant ones/zeroes
233 // bitslice_t bs_ones;
234 memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
235 // bitslice_t bs_zeroes;
236 memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
238 // bitslice all the even states
239 bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_t *));
240 if (bitsliced_even_states == NULL) {
241 PrintAndLogEx(WARNING, "Out of memory error in brute_force. Aborting...");
242 exit(4);
244 bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
245 if (bitsliced_even_feedback == NULL) {
246 PrintAndLogEx(WARNING, "Out of memory error in brute_force. Aborting...");
247 exit(4);
249 for (uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES) {
250 bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE / 2 * sizeof(bitslice_t));
251 if (lstate_p == NULL) {
252 PrintAndLogEx(WARNING, "Out of memory error in brute_force. Aborting... \n");
253 exit(4);
255 memset(lstate_p, 0x00, STATE_SIZE / 2 * sizeof(bitslice_t)); // zero even bits
256 // bitslice even half-states
257 const uint32_t max_slices = (p_even_end - p_even) < MAX_BITSLICES ? p_even_end - p_even : MAX_BITSLICES;
258 bucket_size[bitsliced_blocks] = max_slices;
259 #ifdef DEBUG_KEY_ELIMINATION
260 bucket_contains_test_key[bitsliced_blocks] = false;
261 #endif
262 uint32_t slice_idx;
263 for (slice_idx = 0; slice_idx < max_slices; ++slice_idx) {
264 uint32_t e = *(p_even + slice_idx);
265 #ifdef DEBUG_KEY_ELIMINATION
266 if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
267 bucket_contains_test_key[bitsliced_blocks] = true;
268 // PrintAndLogEx(INFO, "bucket %d contains test key even state", bitsliced_blocks);
269 // PrintAndLogEx(INFO, "in slice %d", slice_idx);
271 #endif
272 for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
273 // set even bits
274 if (e & 1) {
275 lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
279 // padding with last even state
280 for (; slice_idx < MAX_BITSLICES; ++slice_idx) {
281 uint32_t e = *(p_even_end - 1);
282 for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
283 // set even bits
284 if (e & 1) {
285 lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
289 bitsliced_even_states[bitsliced_blocks] = lstate_p;
290 // bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
291 bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47 - 0) / 2].value ^
292 lstate_p[(47 - 10) / 2].value ^ lstate_p[(47 - 12) / 2].value ^ lstate_p[(47 - 14) / 2].value ^
293 lstate_p[(47 - 24) / 2].value ^ lstate_p[(47 - 42) / 2].value;
294 bitsliced_blocks++;
296 // bitslice every odd state to every block of even states
297 for (uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd) {
298 // early abort
299 if (*keys_found) {
300 goto out;
303 // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
305 state_p = &states[KEYSTREAM_SIZE];
306 uint32_t o = *p_odd;
308 // pre-compute the odd feedback bit
309 bool odd_feedback_bit = evenparity32(o & 0x29ce5c);
310 const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
312 // set odd state bits
313 for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
314 if (o & 1) {
315 state_p[state_idx] = bs_ones;
316 } else {
317 state_p[state_idx] = bs_zeroes;
321 bitslice_value_t crypto1_bs_f20b_2[16];
322 bitslice_value_t crypto1_bs_f20b_3[8];
324 crypto1_bs_f20b_2[0] = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
325 crypto1_bs_f20b_3[0] = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
327 bitslice_value_t ksb[8];
328 ksb[0] = f20c(f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value),
329 f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value),
330 crypto1_bs_f20b_2[0],
331 f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value),
332 crypto1_bs_f20b_3[0]);
334 uint32_t *restrict p_even = p->states[EVEN_STATE];
335 for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) {
337 #ifdef DEBUG_KEY_ELIMINATION
338 // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
339 // PrintAndLogEx(INFO, "Now testing known target key.");
340 // PrintAndLogEx(INFO, "block_idx = %d/%d", block_idx, bitsliced_blocks);
341 // }
342 #endif
343 // add the even state bits
344 const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
345 for (uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
346 state_p[state_idx] = bitsliced_even_state[state_idx / 2];
349 // pre-compute first feedback bit vector. This is the same for all nonces
350 bitslice_value_t fbb[8];
351 fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx];
353 // vector to contain test results (1 = passed, 0 = failed)
354 bitslice_t results = bs_ones;
356 // parity_bits
357 bitslice_value_t par[8];
358 par[0] = bs_zeroes.value;
359 uint32_t next_common_bits = 0;
361 for (uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests) {
362 // common bits with preceding test nonce
363 uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
364 next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests + 1]) : 0;
365 uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
366 bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
367 bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
368 bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector
369 // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
370 // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
371 // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
372 state_p -= common_bits; // and reuse the already calculated state bits
373 // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
374 // or the highest bit which differs from the previous nonce
375 for (int32_t ks_idx = KEYSTREAM_SIZE - 1 - common_bits; ks_idx >= 0; --ks_idx) {
377 // decrypt nonce bits
378 const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
379 const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits;
381 // compute real parity bits on the fly
382 parity_bit_vector ^= decrypted_nonce_bit_vector;
384 // update state
385 state_p--;
386 state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector;
388 // update crypto1 subfunctions
389 bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
390 f20a_2 = f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value);
391 f20b_3 = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
392 if (ks_idx > KEYSTREAM_SIZE - 8) {
393 f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
394 f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
395 f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
396 crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
397 crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
398 } else if (ks_idx > KEYSTREAM_SIZE - 16) {
399 f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
400 f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
401 f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
402 crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
403 } else if (ks_idx > KEYSTREAM_SIZE - 24) {
404 f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
405 f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
406 f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
407 } else {
408 f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
409 f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
410 f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
412 // update keystream bit
413 ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
415 // for each completed byte:
416 if ((ks_idx & 0x07) == 0) {
417 // get encrypted parity bits
418 const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
420 // decrypt parity bits
421 const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits;
423 // compare actual parity bits with decrypted parity bits and take count in results vector
424 results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector;
426 // make sure we still have a match in our set
427 // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
429 // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
430 // the short-circuiting also helps
431 if (results.bytes64[0] == 0
432 #if MAX_BITSLICES > 64
433 && results.bytes64[1] == 0
434 #endif
435 #if MAX_BITSLICES > 128
436 && results.bytes64[2] == 0
437 && results.bytes64[3] == 0
438 #endif
440 #if defined (DEBUG_BRUTE_FORCE)
441 if (elimination_step < MAX_ELIMINATION_STEP) {
442 keys_eliminated[elimination_step] += MAX_BITSLICES;
444 #endif
445 #ifdef DEBUG_KEY_ELIMINATION
446 if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
447 PrintAndLogEx(INFO, "Known target key eliminated in brute_force.");
448 PrintAndLogEx(INFO, "block_idx = %d/%d, nonce = %d/%d", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
450 #endif
451 goto stop_tests;
453 // prepare for next nonce byte
454 #if defined (DEBUG_BRUTE_FORCE)
455 elimination_step++;
456 #endif
457 parity_bit_vector = bs_zeroes.value;
459 // update feedback bit vector
460 if (ks_idx != 0) {
461 fb_bits =
462 (state_p[47 - 0].value ^ state_p[47 - 5].value ^ state_p[47 - 9].value ^
463 state_p[47 - 10].value ^ state_p[47 - 12].value ^ state_p[47 - 14].value ^
464 state_p[47 - 15].value ^ state_p[47 - 17].value ^ state_p[47 - 19].value ^
465 state_p[47 - 24].value ^ state_p[47 - 25].value ^ state_p[47 - 27].value ^
466 state_p[47 - 29].value ^ state_p[47 - 35].value ^ state_p[47 - 39].value ^
467 state_p[47 - 41].value ^ state_p[47 - 42].value ^ state_p[47 - 43].value);
469 // remember feedback and keystream vectors for later use
470 uint8_t bit = KEYSTREAM_SIZE - ks_idx;
471 if (bit <= MIN(next_common_bits, 7)) { // if needed and not yet stored
472 fbb[bit] = fb_bits;
473 ksb[bit] = ks_bits;
474 par[bit] = parity_bit_vector;
477 // prepare for next nonce. Revert to initial state
478 state_p = &states[KEYSTREAM_SIZE];
481 // all nonce tests were successful: we've found a possible key in this block!
482 uint32_t *p_even_test = p_even;
483 for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) {
484 uint64_t results64 = results.bytes64[results_word];
485 for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
486 if (results64 & 0x01) {
487 if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
488 struct Crypto1State pcs;
489 pcs.odd = *p_odd;
490 pcs.even = *p_even_test;
491 lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
492 crypto1_get_lfsr(&pcs, &key);
493 bucket_states_tested += 64 * results_word + results_bit;
494 goto out;
496 #ifdef DEBUG_KEY_ELIMINATION
497 if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
498 PrintAndLogEx(INFO, "Known target key eliminated in brute_force verification.");
499 PrintAndLogEx(INFO, "block_idx = %d/%d", block_idx, bitsliced_blocks);
501 #endif
503 #ifdef DEBUG_KEY_ELIMINATION
504 if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
505 PrintAndLogEx(INFO, "Known target key eliminated in brute_force (results_bit == 0).");
506 PrintAndLogEx(INFO, "block_idx = %d/%d", block_idx, bitsliced_blocks);
508 #endif
509 results64 >>= 1;
510 p_even_test++;
511 if (p_even_test == p_even_end) {
512 goto stop_tests;
516 stop_tests:
517 #if defined (DEBUG_BRUTE_FORCE)
518 elimination_step = 0;
519 #endif
520 bucket_states_tested += bucket_size[block_idx];
521 // prepare to set new states
522 state_p = &states[KEYSTREAM_SIZE];
523 continue;
526 out:
527 for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx) {
528 free_bitslice(bitsliced_even_states[block_idx]);
530 free(bitsliced_even_states);
531 free_bitslice(bitsliced_even_feedback);
532 __sync_fetch_and_add(num_keys_tested, bucket_states_tested);
534 #if defined (DEBUG_BRUTE_FORCE)
535 for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
536 PrintAndLogEx(INFO, "Eliminated after %2u test_bytes: %5.2f%%", i + 1, (float)keys_eliminated[i] / bucket_states_tested * 100);
538 #endif
539 return key;
544 #ifndef __MMX__
546 // pointers to functions:
547 crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
548 bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
550 static SIMDExecInstr intSIMDInstr = SIMD_AUTO;
552 void SetSIMDInstr(SIMDExecInstr instr) {
553 intSIMDInstr = instr;
555 crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
556 bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
559 static SIMDExecInstr GetSIMDInstr(void) {
560 SIMDExecInstr instr;
562 #if defined(COMPILER_HAS_SIMD)
563 __builtin_cpu_init();
564 #endif
566 #if defined(COMPILER_HAS_SIMD_AVX512)
567 if (__builtin_cpu_supports("avx512f"))
568 instr = SIMD_AVX512;
569 else
570 #endif
571 #if defined(COMPILER_HAS_SIMD)
572 if (__builtin_cpu_supports("avx2"))
573 instr = SIMD_AVX2;
574 else if (__builtin_cpu_supports("avx"))
575 instr = SIMD_AVX;
576 else if (__builtin_cpu_supports("sse2"))
577 instr = SIMD_SSE2;
578 else if (__builtin_cpu_supports("mmx"))
579 instr = SIMD_MMX;
580 else
581 #endif
582 instr = SIMD_NONE;
584 return instr;
587 SIMDExecInstr GetSIMDInstrAuto(void) {
588 SIMDExecInstr instr = intSIMDInstr;
589 if (instr == SIMD_AUTO)
590 return GetSIMDInstr();
592 return instr;
595 // determine the available instruction set at runtime and call the correct function
596 uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
597 switch (GetSIMDInstrAuto()) {
598 #if defined(COMPILER_HAS_SIMD_AVX512)
599 case SIMD_AVX512:
600 crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
601 break;
602 #endif
603 #if defined(COMPILER_HAS_SIMD)
604 case SIMD_AVX2:
605 crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
606 break;
607 case SIMD_AVX:
608 crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
609 break;
610 case SIMD_SSE2:
611 crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
612 break;
613 case SIMD_MMX:
614 crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
615 break;
616 #endif
617 case SIMD_AUTO:
618 case SIMD_NONE:
619 crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
620 break;
623 // call the most optimized function for this CPU
624 return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
627 void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
628 switch (GetSIMDInstrAuto()) {
629 #if defined(COMPILER_HAS_SIMD_AVX512)
630 case SIMD_AVX512:
631 bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
632 break;
633 #endif
634 #if defined(COMPILER_HAS_SIMD)
635 case SIMD_AVX2:
636 bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
637 break;
638 case SIMD_AVX:
639 bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
640 break;
641 case SIMD_SSE2:
642 bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
643 break;
644 case SIMD_MMX:
645 bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
646 break;
647 #endif
648 case SIMD_AUTO:
649 case SIMD_NONE:
650 bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
651 break;
654 // call the most optimized function for this CPU
655 (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
658 // Entries to dispatched function calls
659 uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
660 return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
663 void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
664 (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
667 #endif