1 //-----------------------------------------------------------------------------
2 // Copyright (C) 2016, 2017 by piwi
4 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
5 // at your option, any later version. See the LICENSE.txt file for the text of
7 //-----------------------------------------------------------------------------
8 // Implements a card only attack based on crypto text (encrypted nonces
9 // received during a nested authentication) only. Unlike other card only
10 // attacks this doesn't rely on implementation errors but only on the
11 // inherent weaknesses of the crypto1 cypher. Described in
12 // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13 // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14 // Computer and Communications Security, 2015
15 //-----------------------------------------------------------------------------
17 // brute forcing is based on @aczids bitsliced brute forcer
18 // https://github.com/aczid/crypto1_bs with some modifications. Mainly:
19 // - don't rollback. Start with 2nd byte of nonce instead
20 // - reuse results of filter subfunctions
21 // - reuse results of previous nonces if some first bits are identical
23 //-----------------------------------------------------------------------------
24 // aczid's Copyright notice:
26 // Bit-sliced Crypto-1 brute-forcing implementation
27 // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid)
29 Copyright (c) 2015-2016 Aram Verstegen
31 Permission is hereby granted, free of charge, to any person obtaining a copy
32 of this software and associated documentation files (the "Software"), to deal
33 in the Software without restriction, including without limitation the rights
34 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35 copies of the Software, and to permit persons to whom the Software is
36 furnished to do so, subject to the following conditions:
38 The above copyright notice and this permission notice shall be included in
39 all copies or substantial portions of the Software.
41 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
50 #include "hardnested_bf_core.h"
60 #include "crapto1/crapto1.h"
62 #include "ui.h" // PrintAndLogEx
66 // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
67 // same for AVX2 and 512 bit vectors
68 // using larger vectors works but seems to generate more register pressure
69 #if defined(__AVX512F__)
70 #define MAX_BITSLICES 512
71 #elif defined(__AVX2__)
72 #define MAX_BITSLICES 256
73 #elif defined(__AVX__)
74 #define MAX_BITSLICES 128
75 #elif defined(__SSE2__)
76 #define MAX_BITSLICES 128
77 #elif defined(__ARM_NEON) && !defined(NOSIMD_BUILD)
78 #define MAX_BITSLICES 128
79 #else // MMX or SSE or NOSIMD
80 #define MAX_BITSLICES 64
83 #define VECTOR_SIZE (MAX_BITSLICES/8)
84 typedef uint32_t __attribute__((aligned(VECTOR_SIZE
))) __attribute__((vector_size(VECTOR_SIZE
))) bitslice_value_t
;
86 bitslice_value_t value
;
87 uint64_t bytes64
[MAX_BITSLICES
/ 64];
88 uint8_t bytes
[MAX_BITSLICES
/ 8];
91 // filter function (f20)
92 // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
93 #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
94 #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
95 #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
98 #define get_bit(n, word) (((word) >> (n)) & 1)
99 #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6])
101 // size of crypto-1 state
102 #define STATE_SIZE 48
103 // size of nonce to be decrypted
104 #define KEYSTREAM_SIZE 24
106 // this needs to be compiled several times for each instruction set.
107 // For each instruction set, define a dedicated function name:
108 #if defined (__AVX512F__)
109 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512
110 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512
111 #elif defined (__AVX2__)
112 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2
113 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2
114 #elif defined (__AVX__)
115 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX
116 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX
117 #elif defined (__SSE2__)
118 #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2
119 #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2
120 #elif defined (__MMX__)
121 #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX
122 #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX
123 #elif defined (__ARM_NEON) && !defined(NOSIMD_BUILD)
124 #define BITSLICE_TEST_NONCES bitslice_test_nonces_NEON
125 #define CRACK_STATES_BITSLICED crack_states_bitsliced_NEON
127 #define BITSLICE_TEST_NONCES bitslice_test_nonces_NOSIMD
128 #define CRACK_STATES_BITSLICED crack_states_bitsliced_NOSIMD
131 // typedefs and declaration of functions:
132 typedef uint64_t crack_states_bitsliced_t(uint32_t, uint8_t *, statelist_t
*, uint32_t *, uint64_t *, uint32_t, const uint8_t *, noncelist_t
*);
133 crack_states_bitsliced_t crack_states_bitsliced_AVX512
;
134 crack_states_bitsliced_t crack_states_bitsliced_AVX2
;
135 crack_states_bitsliced_t crack_states_bitsliced_AVX
;
136 crack_states_bitsliced_t crack_states_bitsliced_SSE2
;
137 crack_states_bitsliced_t crack_states_bitsliced_MMX
;
138 crack_states_bitsliced_t crack_states_bitsliced_NEON
;
139 crack_states_bitsliced_t crack_states_bitsliced_NOSIMD
;
140 crack_states_bitsliced_t crack_states_bitsliced_dispatch
;
142 typedef void bitslice_test_nonces_t(uint32_t, const uint32_t *, const uint8_t *);
143 bitslice_test_nonces_t bitslice_test_nonces_AVX512
;
144 bitslice_test_nonces_t bitslice_test_nonces_AVX2
;
145 bitslice_test_nonces_t bitslice_test_nonces_AVX
;
146 bitslice_test_nonces_t bitslice_test_nonces_SSE2
;
147 bitslice_test_nonces_t bitslice_test_nonces_MMX
;
148 bitslice_test_nonces_t bitslice_test_nonces_NEON
;
149 bitslice_test_nonces_t bitslice_test_nonces_NOSIMD
;
150 bitslice_test_nonces_t bitslice_test_nonces_dispatch
;
153 #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES / 8), MAX_BITSLICES / 8)
154 #define free_bitslice(x) _aligned_free(x)
155 #elif defined (__APPLE__)
156 static void *malloc_bitslice(size_t x
) {
157 char *allocated_memory
;
158 if (posix_memalign((void **)&allocated_memory
, MAX_BITSLICES
/ 8, x
)) {
161 return __builtin_assume_aligned(allocated_memory
, MAX_BITSLICES
/ 8);
164 #define free_bitslice(x) free(x)
166 //#define malloc_bitslice(x) memalign(MAX_BITSLICES / 8, (x))
167 #define malloc_bitslice(x) __builtin_assume_aligned(memalign(MAX_BITSLICES / 8, (x)), MAX_BITSLICES / 8);
168 #define free_bitslice(x) free(x)
177 // arrays of bitsliced states with identical values in all slices
178 static bitslice_t bitsliced_encrypted_nonces
[256][KEYSTREAM_SIZE
];
179 static bitslice_t bitsliced_encrypted_parity_bits
[256][4];
181 static bitslice_t bs_ones
;
182 static bitslice_t bs_zeroes
;
185 void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce
, const uint32_t *bf_test_nonce
, const uint8_t *bf_test_nonce_par
) {
187 // initialize 1 and 0 vectors
188 memset(bs_ones
.bytes
, 0xff, VECTOR_SIZE
);
189 memset(bs_zeroes
.bytes
, 0x00, VECTOR_SIZE
);
191 // bitslice nonces' 2nd to 4th byte
192 for (uint32_t i
= 0; i
< nonces_to_bruteforce
; i
++) {
193 for (uint32_t bit_idx
= 0; bit_idx
< KEYSTREAM_SIZE
; bit_idx
++) {
194 bool bit
= get_bit(KEYSTREAM_SIZE
- 1 - bit_idx
, BSWAP_32(bf_test_nonce
[i
] << 8));
196 bitsliced_encrypted_nonces
[i
][bit_idx
].value
= bs_ones
.value
;
198 bitsliced_encrypted_nonces
[i
][bit_idx
].value
= bs_zeroes
.value
;
202 // bitslice nonces' parity (4 bits)
203 for (uint32_t i
= 0; i
< nonces_to_bruteforce
; i
++) {
204 for (uint32_t bit_idx
= 0; bit_idx
< 4; bit_idx
++) {
205 bool bit
= get_bit(4 - 1 - bit_idx
, bf_test_nonce_par
[i
]);
207 bitsliced_encrypted_parity_bits
[i
][bit_idx
].value
= bs_ones
.value
;
209 bitsliced_encrypted_parity_bits
[i
][bit_idx
].value
= bs_zeroes
.value
;
217 uint64_t CRACK_STATES_BITSLICED(uint32_t cuid
, uint8_t *best_first_bytes
, statelist_t
*p
, uint32_t *keys_found
,
218 uint64_t *num_keys_tested
, uint32_t nonces_to_bruteforce
,
219 const uint8_t *bf_test_nonce_2nd_byte
, noncelist_t
*nonces
) {
221 // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
222 // We know that the best first byte is already shifted in. Testing with the remaining three bytes of
223 // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
224 // brute forcing (including roll back).
226 bitslice_t states
[KEYSTREAM_SIZE
+ STATE_SIZE
];
227 bitslice_t
*restrict state_p
;
229 uint64_t bucket_states_tested
= 0;
230 uint32_t bucket_size
[(p
->len
[EVEN_STATE
] - 1) / MAX_BITSLICES
+ 1];
231 uint32_t bitsliced_blocks
= 0;
232 uint32_t const *restrict p_even_end
= p
->states
[EVEN_STATE
] + p
->len
[EVEN_STATE
];
233 #if defined (DEBUG_BRUTE_FORCE)
234 uint32_t elimination_step
= 0;
235 #define MAX_ELIMINATION_STEP 32
236 uint64_t keys_eliminated
[MAX_ELIMINATION_STEP
] = {0};
238 #ifdef DEBUG_KEY_ELIMINATION
239 bool bucket_contains_test_key
[(p
->len
[EVEN_STATE
] - 1) / MAX_BITSLICES
+ 1];
242 // constant ones/zeroes
243 // bitslice_t bs_ones;
244 memset(bs_ones
.bytes
, 0xff, VECTOR_SIZE
);
245 // bitslice_t bs_zeroes;
246 memset(bs_zeroes
.bytes
, 0x00, VECTOR_SIZE
);
248 // bitslice all the even states
249 bitslice_t
**restrict bitsliced_even_states
= (bitslice_t
**)calloc(1, ((p
->len
[EVEN_STATE
] - 1) / MAX_BITSLICES
+ 1) * sizeof(bitslice_t
*));
250 if (bitsliced_even_states
== NULL
) {
251 PrintAndLogEx(WARNING
, "Out of memory error in brute_force. Aborting...");
254 bitslice_value_t
*restrict bitsliced_even_feedback
= malloc_bitslice(((p
->len
[EVEN_STATE
] - 1) / MAX_BITSLICES
+ 1) * sizeof(bitslice_value_t
));
255 if (bitsliced_even_feedback
== NULL
) {
256 PrintAndLogEx(WARNING
, "Out of memory error in brute_force. Aborting...");
259 for (uint32_t *restrict p_even
= p
->states
[EVEN_STATE
]; p_even
< p_even_end
; p_even
+= MAX_BITSLICES
) {
260 bitslice_t
*restrict lstate_p
= malloc_bitslice(STATE_SIZE
/ 2 * sizeof(bitslice_t
));
261 if (lstate_p
== NULL
) {
262 PrintAndLogEx(WARNING
, "Out of memory error in brute_force. Aborting... \n");
265 memset(lstate_p
, 0x00, STATE_SIZE
/ 2 * sizeof(bitslice_t
)); // zero even bits
266 // bitslice even half-states
267 const uint32_t max_slices
= (p_even_end
- p_even
) < MAX_BITSLICES
? p_even_end
- p_even
: MAX_BITSLICES
;
268 bucket_size
[bitsliced_blocks
] = max_slices
;
269 #ifdef DEBUG_KEY_ELIMINATION
270 bucket_contains_test_key
[bitsliced_blocks
] = false;
273 for (slice_idx
= 0; slice_idx
< max_slices
; ++slice_idx
) {
274 uint32_t e
= *(p_even
+ slice_idx
);
275 #ifdef DEBUG_KEY_ELIMINATION
276 if (known_target_key
!= -1 && e
== test_state
[EVEN_STATE
]) {
277 bucket_contains_test_key
[bitsliced_blocks
] = true;
278 // PrintAndLogEx(INFO, "bucket %d contains test key even state", bitsliced_blocks);
279 // PrintAndLogEx(INFO, "in slice %d", slice_idx);
282 for (uint32_t bit_idx
= 0; bit_idx
< STATE_SIZE
/ 2; bit_idx
++, e
>>= 1) {
285 lstate_p
[bit_idx
].bytes64
[slice_idx
>> 6] |= 1ull << (slice_idx
& 0x3f);
289 // padding with last even state
290 for (; slice_idx
< MAX_BITSLICES
; ++slice_idx
) {
291 uint32_t e
= *(p_even_end
- 1);
292 for (uint32_t bit_idx
= 0; bit_idx
< STATE_SIZE
/ 2; bit_idx
++, e
>>= 1) {
295 lstate_p
[bit_idx
].bytes64
[slice_idx
>> 6] |= 1ull << (slice_idx
& 0x3f);
299 bitsliced_even_states
[bitsliced_blocks
] = lstate_p
;
300 // bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
301 bitsliced_even_feedback
[bitsliced_blocks
] = lstate_p
[(47 - 0) / 2].value
^
302 lstate_p
[(47 - 10) / 2].value
^ lstate_p
[(47 - 12) / 2].value
^ lstate_p
[(47 - 14) / 2].value
^
303 lstate_p
[(47 - 24) / 2].value
^ lstate_p
[(47 - 42) / 2].value
;
306 // bitslice every odd state to every block of even states
307 for (uint32_t const *restrict p_odd
= p
->states
[ODD_STATE
]; p_odd
< p
->states
[ODD_STATE
] + p
->len
[ODD_STATE
]; ++p_odd
) {
313 // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
315 state_p
= &states
[KEYSTREAM_SIZE
];
318 // pre-compute the odd feedback bit
319 bool odd_feedback_bit
= evenparity32(o
& 0x29ce5c);
320 const bitslice_value_t odd_feedback
= odd_feedback_bit
? bs_ones
.value
: bs_zeroes
.value
;
322 // set odd state bits
323 for (uint32_t state_idx
= 0; state_idx
< STATE_SIZE
; o
>>= 1, state_idx
+= 2) {
325 state_p
[state_idx
] = bs_ones
;
327 state_p
[state_idx
] = bs_zeroes
;
331 bitslice_value_t crypto1_bs_f20b_2
[16];
332 bitslice_value_t crypto1_bs_f20b_3
[8];
334 crypto1_bs_f20b_2
[0] = f20b(state_p
[47 - 25].value
, state_p
[47 - 27].value
, state_p
[47 - 29].value
, state_p
[47 - 31].value
);
335 crypto1_bs_f20b_3
[0] = f20b(state_p
[47 - 41].value
, state_p
[47 - 43].value
, state_p
[47 - 45].value
, state_p
[47 - 47].value
);
337 bitslice_value_t ksb
[8];
338 ksb
[0] = f20c(f20a(state_p
[47 - 9].value
, state_p
[47 - 11].value
, state_p
[47 - 13].value
, state_p
[47 - 15].value
),
339 f20b(state_p
[47 - 17].value
, state_p
[47 - 19].value
, state_p
[47 - 21].value
, state_p
[47 - 23].value
),
340 crypto1_bs_f20b_2
[0],
341 f20a(state_p
[47 - 33].value
, state_p
[47 - 35].value
, state_p
[47 - 37].value
, state_p
[47 - 39].value
),
342 crypto1_bs_f20b_3
[0]);
344 uint32_t *restrict p_even
= p
->states
[EVEN_STATE
];
345 for (uint32_t block_idx
= 0; block_idx
< bitsliced_blocks
; ++block_idx
, p_even
+= MAX_BITSLICES
) {
347 #ifdef DEBUG_KEY_ELIMINATION
348 // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
349 // PrintAndLogEx(INFO, "Now testing known target key.");
350 // PrintAndLogEx(INFO, "block_idx = %d/%d", block_idx, bitsliced_blocks);
353 // add the even state bits
354 const bitslice_t
*restrict bitsliced_even_state
= bitsliced_even_states
[block_idx
];
355 for (uint32_t state_idx
= 1; state_idx
< STATE_SIZE
; state_idx
+= 2) {
356 state_p
[state_idx
] = bitsliced_even_state
[state_idx
/ 2];
359 // pre-compute first feedback bit vector. This is the same for all nonces
360 bitslice_value_t fbb
[8];
361 fbb
[0] = odd_feedback
^ bitsliced_even_feedback
[block_idx
];
363 // vector to contain test results (1 = passed, 0 = failed)
364 bitslice_t results
= bs_ones
;
367 bitslice_value_t par
[8];
368 par
[0] = bs_zeroes
.value
;
369 uint32_t next_common_bits
= 0;
371 for (uint32_t tests
= 0; tests
< nonces_to_bruteforce
; ++tests
) {
372 // common bits with preceding test nonce
373 uint32_t common_bits
= next_common_bits
; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
374 next_common_bits
= (tests
< nonces_to_bruteforce
- 1) ? trailing_zeros(bf_test_nonce_2nd_byte
[tests
] ^ bf_test_nonce_2nd_byte
[tests
+ 1]) : 0;
375 uint32_t parity_bit_idx
= 1; // start checking with the parity of second nonce byte
376 bitslice_value_t fb_bits
= fbb
[common_bits
]; // start with precomputed feedback bits from previous nonce
377 bitslice_value_t ks_bits
= ksb
[common_bits
]; // dito for first keystream bits
378 bitslice_value_t parity_bit_vector
= par
[common_bits
]; // dito for first parity vector
379 // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
380 // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
381 // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
382 state_p
-= common_bits
; // and reuse the already calculated state bits
383 // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
384 // or the highest bit which differs from the previous nonce
385 for (int32_t ks_idx
= KEYSTREAM_SIZE
- 1 - common_bits
; ks_idx
>= 0; --ks_idx
) {
387 // decrypt nonce bits
388 const bitslice_value_t encrypted_nonce_bit_vector
= bitsliced_encrypted_nonces
[tests
][ks_idx
].value
;
389 const bitslice_value_t decrypted_nonce_bit_vector
= encrypted_nonce_bit_vector
^ ks_bits
;
391 // compute real parity bits on the fly
392 parity_bit_vector
^= decrypted_nonce_bit_vector
;
396 state_p
[0].value
= fb_bits
^ decrypted_nonce_bit_vector
;
398 // update crypto1 subfunctions
399 bitslice_value_t f20a_1
, f20b_1
, f20b_2
, f20a_2
, f20b_3
;
400 f20a_2
= f20a(state_p
[47 - 33].value
, state_p
[47 - 35].value
, state_p
[47 - 37].value
, state_p
[47 - 39].value
);
401 f20b_3
= f20b(state_p
[47 - 41].value
, state_p
[47 - 43].value
, state_p
[47 - 45].value
, state_p
[47 - 47].value
);
402 if (ks_idx
> KEYSTREAM_SIZE
- 8) {
403 f20a_1
= f20a(state_p
[47 - 9].value
, state_p
[47 - 11].value
, state_p
[47 - 13].value
, state_p
[47 - 15].value
);
404 f20b_1
= f20b(state_p
[47 - 17].value
, state_p
[47 - 19].value
, state_p
[47 - 21].value
, state_p
[47 - 23].value
);
405 f20b_2
= f20b(state_p
[47 - 25].value
, state_p
[47 - 27].value
, state_p
[47 - 29].value
, state_p
[47 - 31].value
);
406 crypto1_bs_f20b_2
[KEYSTREAM_SIZE
- ks_idx
] = f20b_2
;
407 crypto1_bs_f20b_3
[KEYSTREAM_SIZE
- ks_idx
] = f20b_3
;
408 } else if (ks_idx
> KEYSTREAM_SIZE
- 16) {
409 f20a_1
= f20a(state_p
[47 - 9].value
, state_p
[47 - 11].value
, state_p
[47 - 13].value
, state_p
[47 - 15].value
);
410 f20b_1
= crypto1_bs_f20b_2
[KEYSTREAM_SIZE
- ks_idx
- 8];
411 f20b_2
= f20b(state_p
[47 - 25].value
, state_p
[47 - 27].value
, state_p
[47 - 29].value
, state_p
[47 - 31].value
);
412 crypto1_bs_f20b_2
[KEYSTREAM_SIZE
- ks_idx
] = f20b_2
;
413 } else if (ks_idx
> KEYSTREAM_SIZE
- 24) {
414 f20a_1
= f20a(state_p
[47 - 9].value
, state_p
[47 - 11].value
, state_p
[47 - 13].value
, state_p
[47 - 15].value
);
415 f20b_1
= crypto1_bs_f20b_2
[KEYSTREAM_SIZE
- ks_idx
- 8];
416 f20b_2
= crypto1_bs_f20b_3
[KEYSTREAM_SIZE
- ks_idx
- 16];
418 f20a_1
= f20a(state_p
[47 - 9].value
, state_p
[47 - 11].value
, state_p
[47 - 13].value
, state_p
[47 - 15].value
);
419 f20b_1
= f20b(state_p
[47 - 17].value
, state_p
[47 - 19].value
, state_p
[47 - 21].value
, state_p
[47 - 23].value
);
420 f20b_2
= f20b(state_p
[47 - 25].value
, state_p
[47 - 27].value
, state_p
[47 - 29].value
, state_p
[47 - 31].value
);
422 // update keystream bit
423 ks_bits
= f20c(f20a_1
, f20b_1
, f20b_2
, f20a_2
, f20b_3
);
425 // for each completed byte:
426 if ((ks_idx
& 0x07) == 0) {
427 // get encrypted parity bits
428 const bitslice_value_t encrypted_parity_bit_vector
= bitsliced_encrypted_parity_bits
[tests
][parity_bit_idx
++].value
;
430 // decrypt parity bits
431 const bitslice_value_t decrypted_parity_bit_vector
= encrypted_parity_bit_vector
^ ks_bits
;
433 // compare actual parity bits with decrypted parity bits and take count in results vector
434 results
.value
&= ~parity_bit_vector
^ decrypted_parity_bit_vector
;
436 // make sure we still have a match in our set
437 // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
439 // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
440 // the short-circuiting also helps
441 if (results
.bytes64
[0] == 0
442 #if MAX_BITSLICES > 64
443 && results
.bytes64
[1] == 0
445 #if MAX_BITSLICES > 128
446 && results
.bytes64
[2] == 0
447 && results
.bytes64
[3] == 0
449 #if MAX_BITSLICES > 256
450 && results
.bytes64
[4] == 0
451 && results
.bytes64
[5] == 0
452 && results
.bytes64
[6] == 0
453 && results
.bytes64
[7] == 0
456 #if defined (DEBUG_BRUTE_FORCE)
457 if (elimination_step
< MAX_ELIMINATION_STEP
) {
458 keys_eliminated
[elimination_step
] += MAX_BITSLICES
;
461 #ifdef DEBUG_KEY_ELIMINATION
462 if (known_target_key
!= -1 && bucket_contains_test_key
[block_idx
] && *p_odd
== test_state
[ODD_STATE
]) {
463 PrintAndLogEx(INFO
, "Known target key eliminated in brute_force.");
464 PrintAndLogEx(INFO
, "block_idx = %d/%d, nonce = %d/%d", block_idx
, bitsliced_blocks
, tests
, nonces_to_bruteforce
);
469 // prepare for next nonce byte
470 #if defined (DEBUG_BRUTE_FORCE)
473 parity_bit_vector
= bs_zeroes
.value
;
475 // update feedback bit vector
478 (state_p
[47 - 0].value
^ state_p
[47 - 5].value
^ state_p
[47 - 9].value
^
479 state_p
[47 - 10].value
^ state_p
[47 - 12].value
^ state_p
[47 - 14].value
^
480 state_p
[47 - 15].value
^ state_p
[47 - 17].value
^ state_p
[47 - 19].value
^
481 state_p
[47 - 24].value
^ state_p
[47 - 25].value
^ state_p
[47 - 27].value
^
482 state_p
[47 - 29].value
^ state_p
[47 - 35].value
^ state_p
[47 - 39].value
^
483 state_p
[47 - 41].value
^ state_p
[47 - 42].value
^ state_p
[47 - 43].value
);
485 // remember feedback and keystream vectors for later use
486 uint8_t bit
= KEYSTREAM_SIZE
- ks_idx
;
487 if (bit
<= MIN(next_common_bits
, 7)) { // if needed and not yet stored
490 par
[bit
] = parity_bit_vector
;
493 // prepare for next nonce. Revert to initial state
494 state_p
= &states
[KEYSTREAM_SIZE
];
497 // all nonce tests were successful: we've found a possible key in this block!
498 uint32_t *p_even_test
= p_even
;
499 for (uint32_t results_word
= 0; results_word
< MAX_BITSLICES
/ 64; ++results_word
) {
500 uint64_t results64
= results
.bytes64
[results_word
];
501 for (uint32_t results_bit
= 0; results_bit
< 64; results_bit
++) {
502 if (results64
& 0x01) {
503 if (verify_key(cuid
, nonces
, best_first_bytes
, *p_odd
, *p_even_test
)) {
504 struct Crypto1State pcs
;
506 pcs
.even
= *p_even_test
;
507 lfsr_rollback_byte(&pcs
, (cuid
>> 24) ^ best_first_bytes
[0], true);
508 crypto1_get_lfsr(&pcs
, &key
);
509 bucket_states_tested
+= 64 * results_word
+ results_bit
;
512 #ifdef DEBUG_KEY_ELIMINATION
513 if (known_target_key
!= -1 && *p_even_test
== test_state
[EVEN_STATE
] && *p_odd
== test_state
[ODD_STATE
]) {
514 PrintAndLogEx(INFO
, "Known target key eliminated in brute_force verification.");
515 PrintAndLogEx(INFO
, "block_idx = %d/%d", block_idx
, bitsliced_blocks
);
519 #ifdef DEBUG_KEY_ELIMINATION
520 if (known_target_key
!= -1 && *p_even_test
== test_state
[EVEN_STATE
] && *p_odd
== test_state
[ODD_STATE
]) {
521 PrintAndLogEx(INFO
, "Known target key eliminated in brute_force (results_bit == 0).");
522 PrintAndLogEx(INFO
, "block_idx = %d/%d", block_idx
, bitsliced_blocks
);
527 if (p_even_test
== p_even_end
) {
533 #if defined (DEBUG_BRUTE_FORCE)
534 elimination_step
= 0;
536 bucket_states_tested
+= bucket_size
[block_idx
];
537 // prepare to set new states
538 state_p
= &states
[KEYSTREAM_SIZE
];
542 for (uint32_t block_idx
= 0; block_idx
< bitsliced_blocks
; ++block_idx
) {
543 free_bitslice(bitsliced_even_states
[block_idx
]);
545 free(bitsliced_even_states
);
546 free_bitslice(bitsliced_even_feedback
);
547 __sync_fetch_and_add(num_keys_tested
, bucket_states_tested
);
549 #if defined (DEBUG_BRUTE_FORCE)
550 for (uint32_t i
= 0; i
< MAX_ELIMINATION_STEP
; i
++) {
551 PrintAndLogEx(INFO
, "Eliminated after %2u test_bytes: %5.2f%%", i
+ 1, (float)keys_eliminated
[i
] / bucket_states_tested
* 100);
561 // pointers to functions:
562 crack_states_bitsliced_t
*crack_states_bitsliced_function_p
= &crack_states_bitsliced_dispatch
;
563 bitslice_test_nonces_t
*bitslice_test_nonces_function_p
= &bitslice_test_nonces_dispatch
;
565 static SIMDExecInstr intSIMDInstr
= SIMD_AUTO
;
567 void SetSIMDInstr(SIMDExecInstr instr
) {
568 intSIMDInstr
= instr
;
570 crack_states_bitsliced_function_p
= &crack_states_bitsliced_dispatch
;
571 bitslice_test_nonces_function_p
= &bitslice_test_nonces_dispatch
;
574 static SIMDExecInstr
GetSIMDInstr(void) {
577 #if defined(COMPILER_HAS_SIMD_X86)
578 __builtin_cpu_init();
581 #if defined(COMPILER_HAS_SIMD_AVX512)
582 if (__builtin_cpu_supports("avx512f"))
586 #if defined(COMPILER_HAS_SIMD_X86)
587 if (__builtin_cpu_supports("avx2"))
589 else if (__builtin_cpu_supports("avx"))
591 else if (__builtin_cpu_supports("sse2"))
593 else if (__builtin_cpu_supports("mmx"))
597 #if defined(COMPILER_HAS_SIMD_NEON)
607 SIMDExecInstr
GetSIMDInstrAuto(void) {
608 SIMDExecInstr instr
= intSIMDInstr
;
609 if (instr
== SIMD_AUTO
)
610 return GetSIMDInstr();
615 // determine the available instruction set at runtime and call the correct function
616 uint64_t crack_states_bitsliced_dispatch(uint32_t cuid
, uint8_t *best_first_bytes
, statelist_t
*p
,
617 uint32_t *keys_found
, uint64_t *num_keys_tested
,
618 uint32_t nonces_to_bruteforce
, const uint8_t *bf_test_nonce_2nd_byte
,
619 noncelist_t
*nonces
) {
620 switch (GetSIMDInstrAuto()) {
621 #if defined(COMPILER_HAS_SIMD_AVX512)
623 crack_states_bitsliced_function_p
= &crack_states_bitsliced_AVX512
;
626 #if defined(COMPILER_HAS_SIMD_X86)
628 crack_states_bitsliced_function_p
= &crack_states_bitsliced_AVX2
;
631 crack_states_bitsliced_function_p
= &crack_states_bitsliced_AVX
;
634 crack_states_bitsliced_function_p
= &crack_states_bitsliced_SSE2
;
637 crack_states_bitsliced_function_p
= &crack_states_bitsliced_MMX
;
640 #if defined(COMPILER_HAS_SIMD_NEON)
642 crack_states_bitsliced_function_p
= &crack_states_bitsliced_NEON
;
647 crack_states_bitsliced_function_p
= &crack_states_bitsliced_NOSIMD
;
651 // call the most optimized function for this CPU
652 return (*crack_states_bitsliced_function_p
)(cuid
, best_first_bytes
, p
, keys_found
, num_keys_tested
, nonces_to_bruteforce
, bf_test_nonce_2nd_byte
, nonces
);
655 void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce
, const uint32_t *bf_test_nonce
, const uint8_t *bf_test_nonce_par
) {
656 switch (GetSIMDInstrAuto()) {
657 #if defined(COMPILER_HAS_SIMD_AVX512)
659 bitslice_test_nonces_function_p
= &bitslice_test_nonces_AVX512
;
662 #if defined(COMPILER_HAS_SIMD_X86)
664 bitslice_test_nonces_function_p
= &bitslice_test_nonces_AVX2
;
667 bitslice_test_nonces_function_p
= &bitslice_test_nonces_AVX
;
670 bitslice_test_nonces_function_p
= &bitslice_test_nonces_SSE2
;
673 bitslice_test_nonces_function_p
= &bitslice_test_nonces_MMX
;
676 #if defined(COMPILER_HAS_SIMD_NEON)
678 bitslice_test_nonces_function_p
= &bitslice_test_nonces_NEON
;
683 bitslice_test_nonces_function_p
= &bitslice_test_nonces_NOSIMD
;
687 // call the most optimized function for this CPU
688 (*bitslice_test_nonces_function_p
)(nonces_to_bruteforce
, bf_test_nonce
, bf_test_nonce_par
);
691 // Entries to dispatched function calls
692 uint64_t crack_states_bitsliced(uint32_t cuid
, uint8_t *best_first_bytes
, statelist_t
*p
, uint32_t *keys_found
, uint64_t *num_keys_tested
, uint32_t nonces_to_bruteforce
, uint8_t *bf_test_nonce_2nd_byte
, noncelist_t
*nonces
) {
693 return (*crack_states_bitsliced_function_p
)(cuid
, best_first_bytes
, p
, keys_found
, num_keys_tested
, nonces_to_bruteforce
, bf_test_nonce_2nd_byte
, nonces
);
696 void bitslice_test_nonces(uint32_t nonces_to_bruteforce
, uint32_t *bf_test_nonce
, uint8_t *bf_test_nonce_par
) {
697 (*bitslice_test_nonces_function_p
)(nonces_to_bruteforce
, bf_test_nonce
, bf_test_nonce_par
);