Merge pull request #2593 from Akury83/master
[RRG-proxmark3.git] / armsrc / optimized_cipher.c
blobc2ffa5da970a97d3276ddbc7b7e011bbc574c0c0
1 //-----------------------------------------------------------------------------
2 // Borrowed initially from https://github.com/holiman/loclass
3 // Copyright (C) 2014 Martin Holst Swende
4 // Copyright (C) Proxmark3 contributors. See AUTHORS.md for details.
5 //
6 // This program is free software: you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation, either version 3 of the License, or
9 // (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // See LICENSE.txt for the text of the license.
17 //-----------------------------------------------------------------------------
18 // WARNING
20 // THIS CODE IS CREATED FOR EXPERIMENTATION AND EDUCATIONAL USE ONLY.
22 // USAGE OF THIS CODE IN OTHER WAYS MAY INFRINGE UPON THE INTELLECTUAL
23 // PROPERTY OF OTHER PARTIES, SUCH AS INSIDE SECURE AND HID GLOBAL,
24 // AND MAY EXPOSE YOU TO AN INFRINGEMENT ACTION FROM THOSE PARTIES.
26 // THIS CODE SHOULD NEVER BE USED TO INFRINGE PATENTS OR INTELLECTUAL PROPERTY RIGHTS.
27 //-----------------------------------------------------------------------------
28 // It is a reconstruction of the cipher engine used in iClass, and RFID techology.
30 // The implementation is based on the work performed by
31 // Flavio D. Garcia, Gerhard de Koning Gans, Roel Verdult and
32 // Milosch Meriac in the paper "Dismantling IClass".
33 //-----------------------------------------------------------------------------
35 This file contains an optimized version of the MAC-calculation algorithm. Some measurements on
36 a std laptop showed it runs in about 1/3 of the time:
38 Std: 0.428962
39 Opt: 0.151609
41 Additionally, it is self-reliant, not requiring e.g. bitstreams from the cipherutils, thus can
42 be easily dropped into a code base.
44 The optimizations have been performed in the following steps:
45 * Parameters passed by reference instead of by value.
46 * Iteration instead of recursion, un-nesting recursive loops into for-loops.
47 * Handling of bytes instead of individual bits, for less shuffling and masking
48 * Less creation of "objects", structs, and instead reuse of alloc:ed memory
49 * Inlining some functions via #define:s
51 As a consequence, this implementation is less generic. Also, I haven't bothered documenting this.
52 For a thorough documentation, check out the MAC-calculation within cipher.c instead.
54 -- MHS 2015
55 **/
57 /**
59 The runtime of opt_doTagMAC_2() with the MHS optimized version was 403 microseconds on Proxmark3.
60 This was still to slow for some newer readers which didn't want to wait that long.
62 Further optimizations to speedup the MAC calculations:
63 * Optimized opt_Tt logic
64 * Look up table for opt_select
65 * Removing many unnecessary bit maskings (& 0x1)
66 * updating state in place instead of alternating use of a second state structure
67 * remove the necessity to reverse bits of input and output bytes
69 opt_doTagMAC_2() now completes in 270 microseconds.
71 -- piwi 2019
72 **/
74 /**
75 add the possibility to do iCLASS on device only
76 -- iceman 2020
77 **/
79 #include "optimized_cipher.h"
80 #include "optimized_elite.h"
81 #include "optimized_ikeys.h"
82 #include "optimized_cipherutils.h"
84 static const uint8_t opt_select_LUT[256] = {
85 00, 03, 02, 01, 02, 03, 00, 01, 04, 07, 07, 04, 06, 07, 05, 04,
86 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
87 06, 05, 04, 07, 04, 05, 06, 07, 06, 05, 05, 06, 04, 05, 07, 06,
88 07, 04, 05, 06, 04, 05, 06, 07, 07, 04, 04, 07, 04, 05, 07, 06,
89 06, 05, 04, 07, 04, 05, 06, 07, 02, 01, 01, 02, 00, 01, 03, 02,
90 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
91 00, 03, 02, 01, 02, 03, 00, 01, 00, 03, 03, 00, 02, 03, 01, 00,
92 05, 06, 07, 04, 06, 07, 04, 05, 05, 06, 06, 05, 06, 07, 05, 04,
93 02, 01, 00, 03, 00, 01, 02, 03, 06, 05, 05, 06, 04, 05, 07, 06,
94 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
95 02, 01, 00, 03, 00, 01, 02, 03, 02, 01, 01, 02, 00, 01, 03, 02,
96 03, 00, 01, 02, 00, 01, 02, 03, 03, 00, 00, 03, 00, 01, 03, 02,
97 04, 07, 06, 05, 06, 07, 04, 05, 00, 03, 03, 00, 02, 03, 01, 00,
98 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
99 04, 07, 06, 05, 06, 07, 04, 05, 04, 07, 07, 04, 06, 07, 05, 04,
100 01, 02, 03, 00, 02, 03, 00, 01, 01, 02, 02, 01, 02, 03, 01, 00
103 /********************** the table above has been generated with this code: ********
104 #include "util.h"
105 static void init_opt_select_LUT(void) {
106 for (int r = 0; r < 256; r++) {
107 uint8_t r_ls2 = r << 2;
108 uint8_t r_and_ls2 = r & r_ls2;
109 uint8_t r_or_ls2 = r | r_ls2;
110 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
111 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r;
112 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r;
113 opt_select_LUT[r] = (z0 & 4) | (z1 & 2) | (z2 & 1);
115 print_result("", opt_select_LUT, 256);
117 ***********************************************************************************/
119 #define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\
120 |(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\
121 |(1 & (((r & ~(r << 2)) >> 4) ^ ((r & (r << 2)) >> 3) ^ r ^ x))
124 * Some background on the expression above can be found here...
125 uint8_t xopt__select(bool x, bool y, uint8_t r)
128 //r: r0 r1 r2 r3 r4 r5 r6 r7
129 //r_ls2: r2 r3 r4 r5 r6 r7 0 0
130 // z0
131 // z1
133 // uint8_t z0 = (r0 & r2) ^ (r1 & ~r3) ^ (r2 | r4); // <-- original
134 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
136 // uint8_t z1 = (r0 | r2) ^ ( r5 | r7) ^ r1 ^ r6 ^ x ^ y; // <-- original
137 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1);
139 // uint8_t z2 = (r3 & ~r5) ^ (r4 & r6 ) ^ r7 ^ x; // <-- original
140 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r ^ x;
142 return (z0 & 4) | (z1 & 2) | (z2 & 1);
146 static void opt_successor(const uint8_t *k, State_t *s, uint8_t y) {
147 // #define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14) ^ (s->t >> 10) ^ (s->t >> 8) ^ (s->t >> 5) ^ (s->t >> 4)^ (s->t >> 1) ^ s->t))
148 // uint8_t Tt = opt_T(s);
149 uint16_t Tt = s->t & 0xc533;
150 Tt = Tt ^ (Tt >> 1);
151 Tt = Tt ^ (Tt >> 4);
152 Tt = Tt ^ (Tt >> 10);
153 Tt = Tt ^ (Tt >> 8);
155 s->t = (s->t >> 1);
156 s->t |= (Tt ^ (s->r >> 7) ^ (s->r >> 3)) << 15;
158 uint8_t opt_B = s->b;
159 opt_B ^= s->b >> 6;
160 opt_B ^= s->b >> 5;
161 opt_B ^= s->b >> 4;
163 s->b = s->b >> 1;
164 s->b |= (opt_B ^ s->r) << 7;
166 uint8_t opt_select = opt_select_LUT[s->r] & 0x04;
167 opt_select |= (opt_select_LUT[s->r] ^ ((Tt ^ y) << 1)) & 0x02;
168 opt_select |= (opt_select_LUT[s->r] ^ Tt) & 0x01;
170 uint8_t r = s->r;
171 s->r = (k[opt_select] ^ s->b) + s->l ;
172 s->l = s->r + r;
175 static void opt_suc(const uint8_t *k, State_t *s, const uint8_t *in, uint8_t length, bool add32Zeroes) {
176 for (int i = 0; i < length; i++) {
177 uint8_t head;
178 head = in[i];
179 opt_successor(k, s, head);
181 head >>= 1;
182 opt_successor(k, s, head);
184 head >>= 1;
185 opt_successor(k, s, head);
187 head >>= 1;
188 opt_successor(k, s, head);
190 head >>= 1;
191 opt_successor(k, s, head);
193 head >>= 1;
194 opt_successor(k, s, head);
196 head >>= 1;
197 opt_successor(k, s, head);
199 head >>= 1;
200 opt_successor(k, s, head);
202 //For tag MAC, an additional 32 zeroes
203 if (add32Zeroes) {
204 for (int i = 0; i < 8; i++) {
205 opt_successor(k, s, 0);
206 opt_successor(k, s, 0);
207 opt_successor(k, s, 0);
208 opt_successor(k, s, 0);
213 static void opt_output(const uint8_t *k, State_t *s, uint8_t *buffer) {
214 for (uint8_t times = 0; times < 4; times++) {
215 uint8_t bout = 0;
216 bout |= (s->r & 0x4) >> 2;
217 opt_successor(k, s, 0);
218 bout |= (s->r & 0x4) >> 1;
219 opt_successor(k, s, 0);
220 bout |= (s->r & 0x4);
221 opt_successor(k, s, 0);
222 bout |= (s->r & 0x4) << 1;
223 opt_successor(k, s, 0);
224 bout |= (s->r & 0x4) << 2;
225 opt_successor(k, s, 0);
226 bout |= (s->r & 0x4) << 3;
227 opt_successor(k, s, 0);
228 bout |= (s->r & 0x4) << 4;
229 opt_successor(k, s, 0);
230 bout |= (s->r & 0x4) << 5;
231 opt_successor(k, s, 0);
232 buffer[times] = bout;
236 static void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) {
237 State_t _init = {
238 ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l
239 ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r
240 0x4c, // b
241 0xE012 // t
244 opt_suc(k, &_init, input, 12, false);
245 opt_output(k, &_init, out);
248 static void opt_MAC_N(uint8_t *k, uint8_t *input, uint8_t in_size, uint8_t *out) {
249 State_t _init = {
250 ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l
251 ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r
252 0x4c, // b
253 0xE012 // t
256 opt_suc(k, &_init, input, in_size, false);
257 opt_output(k, &_init, out);
260 void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) {
261 uint8_t dest [] = {0, 0, 0, 0, 0, 0, 0, 0};
262 opt_MAC(div_key_p, cc_nr_p, dest);
263 memcpy(mac, dest, 4);
266 void opt_doReaderMAC_2(State_t _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) {
267 opt_suc(div_key_p, &_init, nr, 4, false);
268 opt_output(div_key_p, &_init, mac);
272 void doMAC_N(uint8_t *in_p, uint8_t in_size, uint8_t *div_key_p, uint8_t mac[4]) {
273 uint8_t dest [] = {0, 0, 0, 0, 0, 0, 0, 0};
274 opt_MAC_N(div_key_p, in_p, in_size, dest);
275 memcpy(mac, dest, 4);
278 void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) {
279 State_t _init = {
280 ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
281 ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
282 0x4c, // b
283 0xE012 // t
285 opt_suc(div_key_p, &_init, cc_p, 12, true);
286 opt_output(div_key_p, &_init, mac);
290 * The tag MAC can be divided (both can, but no point in dividing the reader mac) into
291 * two functions, since the first 8 bytes are known, we can pre-calculate the state
292 * reached after feeding CC to the cipher.
293 * @param cc_p
294 * @param div_key_p
295 * @return the cipher state
297 State_t opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) {
298 State_t _init = {
299 ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
300 ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
301 0x4c, // b
302 0xE012 // t
304 opt_suc(div_key_p, &_init, cc_p, 8, false);
305 return _init;
309 * The second part of the tag MAC calculation, since the CC is already calculated into the state,
310 * this function is fed only the NR, and internally feeds the remaining 32 0-bits to generate the tag
311 * MAC response.
312 * @param _init - precalculated cipher state
313 * @param nr - the reader challenge
314 * @param mac - where to store the MAC
315 * @param div_key_p - the key to use
317 void opt_doTagMAC_2(State_t _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) {
318 opt_suc(div_key_p, &_init, nr, 4, true);
319 opt_output(div_key_p, &_init, mac);
323 void iclass_calc_div_key(uint8_t *csn, uint8_t *key, uint8_t *div_key, bool elite) {
324 if (elite) {
325 uint8_t keytable[128] = {0};
326 uint8_t key_index[8] = {0};
327 uint8_t key_sel[8] = { 0 };
328 uint8_t key_sel_p[8] = { 0 };
329 hash2(key, keytable);
330 hash1(csn, key_index);
331 for (uint8_t i = 0; i < 8 ; i++)
332 key_sel[i] = keytable[key_index[i]];
334 //Permute from iclass format to standard format
335 permutekey_rev(key_sel, key_sel_p);
336 diversifyKey(csn, key_sel_p, div_key);
337 } else {
338 diversifyKey(csn, key, div_key);