3 * This code is heavily based on the HiTag2 Hell CPU implementation
4 * from https
://github.com
/factoritbv
/hitag2hell by FactorIT B.V.
5 * This file is the file openocl.cl with the following change
:
6 * * promote keystream from constant to argument.
9 #define MAX_BITSLICES
32
10 #define KEYSTREAM_LENGTH
32
11 typedef uint bitslice_t __attribute__
((aligned(MAX_BITSLICES / 8)));
13 inline uint lut3
(uint a
, uint b
, uint c
, uint imm
) {
15 asm
("lop3.b32 %0, %1, %2, %3, %4;"
17 : "r"(a), "r"(b), "r"(c), "i"(imm));
20 #define f_a_bs_lut_1
(((0xf0|
0xcc)&0xaa)^
0xcc)
21 #define f_a_bs_lut_2
(~
((0xf0|
0xcc)^
0xaa))
22 #define f_a_bs
(a,b
,c
,d
) ((lut3(a,d
,lut3
(a,b
,c
,f_a_bs_lut_1
),f_a_bs_lut_2
))) // 2 luts
24 #define f_b_bs_lut_1
(((0xf0|
0xcc)&0xaa))
25 #define f_b_bs_lut_2
(~
((0xf0|
0xcc|
0xaa)))
26 #define f_b_bs
(a,b
,c
,d
) ((lut3(d,c
,a^b
,f_b_bs_lut_1
)^lut3
(d,a
,b
, f_b_bs_lut_2
))) // 2 luts
, 2 xors
28 #define f_c_bs_lut_1
(((0xf0^
0xcc)|
0xaa))
29 #define f_c_bs_lut_2
(~
((0xf0^
0xcc)&(0xaa^
0xcc)))
31 // 4 luts
, 2 ands
, 1 xor
32 #define f_c_bs
(a,b
,c
,d
,e
) (((lut3((lut3(c,e
,d
, f_c_bs_lut_1
) & a
), b
, c
, f_c_bs_lut_2
)) ^
(lut3(d,e
,a
, f_c_bs_lut_1
) & lut3
(d,b
,c
,f_c_bs_lut_1
))))
34 // non-lut version of F
: 20 lookups
+ 6*2 + 7*3 + 13 + = 66 ops
35 // lut version
: 20 lookups
+ 2*2 + 4*3 + 7 + = 43 ops
37 #define lfsr_lut
(0xf0^
0xaa^
0xcc)
39 #define lfsr_bs
(i) ( lut3
(lut3(lut3(state[-
2+i
+ 0], state
[-
2+i
+ 2], state
[-
2+i
+ 3], lfsr_lut
), \
40 lut3
(state[-
2+i
+ 6], state
[-
2+i
+ 7], state
[-
2+i
+ 8], lfsr_lut
), \
41 lut3
(state[-
2+i
+16], state
[-
2+i
+22], state
[-
2+i
+23], lfsr_lut
), \
43 lut3
(state[-
2+i
+26], state
[-
2+i
+30], state
[-
2+i
+41], lfsr_lut
), \
44 lut3
(state[-
2+i
+42], state
[-
2+i
+43], state
[-
2+i
+46], lfsr_lut
), lfsr_lut
) ^ state
[-
2+i
+47])
46 // 46 iterations
* 4 ops
47 inline void bitslice
(bitslice_t *restrict b
, ulong x
, const uchar n
) {
48 for
(uchar i
= 0; i < n; ++i) {
54 // don't care about the complexity of this function
55 inline ulong unbitslice(const bitslice_t *restrict b, const uchar s, const uchar n) {
56 const bitslice_t mask = ((bitslice_t) 1) << s;
58 for (char i = n - 1; i >= 0; --i) {
60 result |= (bool)(b[i] & mask
);
65 // format this array with
32 bitsliced vectors of ones and zeroes representing the inverted keystream
68 __attribute__
((vec_type_hint(bitslice_t)))
69 void find_state
(const uint candidate_index_base
,
70 __global const ushort
*restrict candidates
,
71 __global const bitslice_t
*restrict keystream
,
72 __global ulong
*restrict matches
,
73 __global uint
*restrict matches_found
) {
74 // we never actually set or use the lowest
2 bits the initial state
, so we can save
2 bitslices everywhere
75 bitslice_t state
[-
2 + 48 + KEYSTREAM_LENGTH
];
76 // set bits
0+2, 0+3, 0+5, 0+6, 0+8, 0+12, 0+14, 0+15, 0+17, 0+21, 0+23, 0+26, 0+28, 0+29, 0+31, 0+33, 0+34, 0+43, 0+44, 0+46
77 // get the
48-bit cipher states as
3 16-bit words from the host memory queue
(to save
25% throughput
)
78 const uint index
= 3 * (candidate_index_base + get_global_id
(0)); // dimension 0 should at least keep the execution units saturated - 8k is fine
79 const ulong candidate
= ((ulong) candidates
[index] << 32) | ((ulong) candidates[index + 1] << 16) | candidates[index + 2];
80 // set all 48 state bits except the lowest 2
81 bitslice(&state[-2 + 2], candidate, 46);
82 // set bits 3, 6, 8, 12, 15
83 state[-2 + 1 + 3] = 0xaaaaaaaa;
84 state[-2 + 1 + 6] = 0xcccccccc;
85 state[-2 + 1 + 8] = 0xf0f0f0f0;
86 state[-2 + 1 + 12] = 0xff00ff00;
87 state[-2 + 1 + 15] = 0xffff0000;
88 ushort i1 = get_global_id(1); // dimension 1 should be 1024
89 state[-2 + 18] = -((bool)(i1 & 0x1));
90 state[-2 + 22] = -((bool)(i1 & 0x2));
91 state[-2 + 24] = -((bool)(i1 & 0x4));
92 state[-2 + 27] = -((bool)(i1 & 0x8));
93 state[-2 + 30] = -((bool)(i1 & 0x10));
94 state[-2 + 32] = -((bool)(i1 & 0x20));
95 state[-2 + 35] = -((bool)(i1 & 0x40));
96 state[-2 + 45] = -((bool)(i1 & 0x80));
97 state[-2 + 47] = -((bool)(i1 & 0x100));
98 state[-2 + 48] = -((bool)(i1 & 0x200)); // guess lfsr output 0
100 const bitslice_t filter1_0 = f_a_bs(state[-2 + 3], state[-2 + 4], state[-2 + 6], state[-2 + 7]);
101 const bitslice_t filter1_1 = f_b_bs(state[-2 + 9], state[-2 + 13], state[-2 + 15], state[-2 + 16]);
102 const bitslice_t filter1_2 = f_b_bs(state[-2 + 18], state[-2 + 22], state[-2 + 24], state[-2 + 27]);
103 const bitslice_t filter1_3 = f_b_bs(state[-2 + 29], state[-2 + 30], state[-2 + 32], state[-2 + 34]);
104 const bitslice_t filter1_4 = f_a_bs(state[-2 + 35], state[-2 + 44], state[-2 + 45], state[-2 + 47]);
105 const bitslice_t filter1 = f_c_bs(filter1_0, filter1_1, filter1_2, filter1_3, filter1_4);
106 const bitslice_t results1 = filter1 ^ keystream[1];
107 if (!results1) return;
108 const bitslice_t filter2_0 = f_a_bs(state[-2 + 4], state[-2 + 5], state[-2 + 7], state[-2 + 8]);
109 const bitslice_t filter2_3 = f_b_bs(state[-2 + 30], state[-2 + 31], state[-2 + 33], state[-2 + 35]);
110 const bitslice_t filter3_0 = f_a_bs(state[-2 + 5], state[-2 + 6], state[-2 + 8], state[-2 + 9]);
111 const bitslice_t filter5_2 = f_b_bs(state[-2 + 22], state[-2 + 26], state[-2 + 28], state[-2 + 31]);
112 const bitslice_t filter6_2 = f_b_bs(state[-2 + 23], state[-2 + 27], state[-2 + 29], state[-2 + 32]);
113 const bitslice_t filter7_2 = f_b_bs(state[-2 + 24], state[-2 + 28], state[-2 + 30], state[-2 + 33]);
114 const bitslice_t filter9_1 = f_b_bs(state[-2 + 17], state[-2 + 21], state[-2 + 23], state[-2 + 24]);
115 const bitslice_t filter9_2 = f_b_bs(state[-2 + 26], state[-2 + 30], state[-2 + 32], state[-2 + 35]);
116 const bitslice_t filter10_0 = f_a_bs(state[-2 + 12], state[-2 + 13], state[-2 + 15], state[-2 + 16]);
117 const bitslice_t filter11_0 = f_a_bs(state[-2 + 13], state[-2 + 14], state[-2 + 16], state[-2 + 17]);
118 const bitslice_t filter12_0 = f_a_bs(state[-2 + 14], state[-2 + 15], state[-2 + 17], state[-2 + 18]);
119 const bitslice_t filter14_1 = f_b_bs(state[-2 + 22], state[-2 + 26], state[-2 + 28], state[-2 + 29]);
120 const bitslice_t filter15_1 = f_b_bs(state[-2 + 23], state[-2 + 27], state[-2 + 29], state[-2 + 30]);
121 const bitslice_t filter15_3 = f_b_bs(state[-2 + 43], state[-2 + 44], state[-2 + 46], state[-2 + 48]);
122 const bitslice_t filter16_1 = f_b_bs(state[-2 + 24], state[-2 + 28], state[-2 + 30], state[-2 + 31]);
123 for (uchar i2 = 0; i2 < (1 << 5);) {
124 state[-2 + 10] = -((bool)(i2 & 0x1));
125 state[-2 + 19] = -((bool)(i2 & 0x2));
126 state[-2 + 25] = -((bool)(i2 & 0x4));
127 state[-2 + 36] = -((bool)(i2 & 0x8));
128 state[-2 + 49] = -((bool)(i2 & 0x10)); // guess lfsr output 1
131 const bitslice_t filter2_1 = f_b_bs(state[-2 + 10], state[-2 + 14], state[-2 + 16], state[-2 + 17]);
132 const bitslice_t filter2_2 = f_b_bs(state[-2 + 19], state[-2 + 23], state[-2 + 25], state[-2 + 28]);
133 const bitslice_t filter2_4 = f_a_bs(state[-2 + 36], state[-2 + 45], state[-2 + 46], state[-2 + 48]);
134 const bitslice_t filter2 = f_c_bs(filter2_0, filter2_1, filter2_2, filter2_3, filter2_4);
135 const bitslice_t results2 = results1 & (filter2 ^ keystream[2]);
136 if (!results2) continue;
137 state[-2 + 50] = lfsr_bs(2);
138 const bitslice_t filter3_3 = f_b_bs(state[-2 + 31], state[-2 + 32], state[-2 + 34], state[-2 + 36]);
139 const bitslice_t filter4_0 = f_a_bs(state[-2 + 6], state[-2 + 7], state[-2 + 9], state[-2 + 10]);
140 const bitslice_t filter4_1 = f_b_bs(state[-2 + 12], state[-2 + 16], state[-2 + 18], state[-2 + 19]);
141 const bitslice_t filter4_2 = f_b_bs(state[-2 + 21], state[-2 + 25], state[-2 + 27], state[-2 + 30]);
142 const bitslice_t filter7_0 = f_a_bs(state[-2 + 9], state[-2 + 10], state[-2 + 12], state[-2 + 13]);
143 const bitslice_t filter7_1 = f_b_bs(state[-2 + 15], state[-2 + 19], state[-2 + 21], state[-2 + 22]);
144 const bitslice_t filter8_2 = f_b_bs(state[-2 + 25], state[-2 + 29], state[-2 + 31], state[-2 + 34]);
145 const bitslice_t filter10_1 = f_b_bs(state[-2 + 18], state[-2 + 22], state[-2 + 24], state[-2 + 25]);
146 const bitslice_t filter10_2 = f_b_bs(state[-2 + 27], state[-2 + 31], state[-2 + 33], state[-2 + 36]);
147 const bitslice_t filter11_1 = f_b_bs(state[-2 + 19], state[-2 + 23], state[-2 + 25], state[-2 + 26]);
148 const bitslice_t filter13_0 = f_a_bs(state[-2 + 15], state[-2 + 16], state[-2 + 18], state[-2 + 19]);
149 const bitslice_t filter13_1 = f_b_bs(state[-2 + 21], state[-2 + 25], state[-2 + 27], state[-2 + 28]);
150 const bitslice_t filter16_0 = f_a_bs(state[-2 + 18], state[-2 + 19], state[-2 + 21], state[-2 + 22]);
151 const bitslice_t filter16_3 = f_b_bs(state[-2 + 44], state[-2 + 45], state[-2 + 47], state[-2 + 49]);
152 const bitslice_t filter17_1 = f_b_bs(state[-2 + 25], state[-2 + 29], state[-2 + 31], state[-2 + 32]);
153 const bitslice_t filter17_3 = f_b_bs(state[-2 + 45], state[-2 + 46], state[-2 + 48], state[-2 + 50]);
154 for (uchar i3 = 0; i3 < (1 << 3);) {
155 state[-2 + 11] = -((bool)(i3 & 0x1));
156 state[-2 + 20] = -((bool)(i3 & 0x2));
157 state[-2 + 37] = -((bool)(i3 & 0x4));
160 const bitslice_t filter3_1 = f_b_bs(state[-2 + 11], state[-2 + 15], state[-2 + 17], state[-2 + 18]);
161 const bitslice_t filter3_2 = f_b_bs(state[-2 + 20], state[-2 + 24], state[-2 + 26], state[-2 + 29]);
162 const bitslice_t filter3_4 = f_a_bs(state[-2 + 37], state[-2 + 46], state[-2 + 47], state[-2 + 49]);
163 const bitslice_t filter3 = f_c_bs(filter3_0, filter3_1, filter3_2, filter3_3, filter3_4);
164 const bitslice_t results3 = results2 & (filter3 ^ keystream[3]);
165 if (!results3) continue;
166 state[-2 + 51] = lfsr_bs(3);
167 state[-2 + 52] = lfsr_bs(4);
168 state[-2 + 53] = lfsr_bs(5);
169 state[-2 + 54] = lfsr_bs(6);
170 state[-2 + 55] = lfsr_bs(7);
171 const bitslice_t filter4_3 = f_b_bs(state[-2 + 32], state[-2 + 33], state[-2 + 35], state[-2 + 37]);
172 const bitslice_t filter5_0 = f_a_bs(state[-2 + 7], state[-2 + 8], state[-2 + 10], state[-2 + 11]);
173 const bitslice_t filter5_1 = f_b_bs(state[-2 + 13], state[-2 + 17], state[-2 + 19], state[-2 + 20]);
174 const bitslice_t filter6_0 = f_a_bs(state[-2 + 8], state[-2 + 9], state[-2 + 11], state[-2 + 12]);
175 const bitslice_t filter6_1 = f_b_bs(state[-2 + 14], state[-2 + 18], state[-2 + 20], state[-2 + 21]);
176 const bitslice_t filter8_0 = f_a_bs(state[-2 + 10], state[-2 + 11], state[-2 + 13], state[-2 + 14]);
177 const bitslice_t filter8_1 = f_b_bs(state[-2 + 16], state[-2 + 20], state[-2 + 22], state[-2 + 23]);
178 const bitslice_t filter9_0 = f_a_bs(state[-2 + 11], state[-2 + 12], state[-2 + 14], state[-2 + 15]);
179 const bitslice_t filter9_4 = f_a_bs(state[-2 + 43], state[-2 + 52], state[-2 + 53], state[-2 + 55]);
180 const bitslice_t filter11_2 = f_b_bs(state[-2 + 28], state[-2 + 32], state[-2 + 34], state[-2 + 37]);
181 const bitslice_t filter12_1 = f_b_bs(state[-2 + 20], state[-2 + 24], state[-2 + 26], state[-2 + 27]);
182 const bitslice_t filter14_0 = f_a_bs(state[-2 + 16], state[-2 + 17], state[-2 + 19], state[-2 + 20]);
183 const bitslice_t filter15_0 = f_a_bs(state[-2 + 17], state[-2 + 18], state[-2 + 20], state[-2 + 21]);
184 const bitslice_t filter17_0 = f_a_bs(state[-2 + 19], state[-2 + 20], state[-2 + 22], state[-2 + 23]);
185 for (uchar i4 = 0; i4 < (1 << 1);) {
186 state[-2 + 38] = -i4;
189 const bitslice_t filter4_4 = f_a_bs(state[-2 + 38], state[-2 + 47], state[-2 + 48], state[-2 + 50]);
190 const bitslice_t filter4 = f_c_bs(filter4_0, filter4_1, filter4_2, filter4_3, filter4_4);
191 const bitslice_t results4 = results3 & (filter4 ^ keystream[4]);
192 if (!results4) continue;
193 state[-2 + 56] = lfsr_bs(8);
194 const bitslice_t filter5_3 = f_b_bs(state[-2 + 33], state[-2 + 34], state[-2 + 36], state[-2 + 38]);
195 const bitslice_t filter10_4 = f_a_bs(state[-2 + 44], state[-2 + 53], state[-2 + 54], state[-2 + 56]);
196 const bitslice_t filter12_2 = f_b_bs(state[-2 + 29], state[-2 + 33], state[-2 + 35], state[-2 + 38]);
197 for (uchar i5 = 0; i5 < (1 << 1);) {
198 state[-2 + 39] = -i5;
201 const bitslice_t filter5_4 = f_a_bs(state[-2 + 39], state[-2 + 48], state[-2 + 49], state[-2 + 51]);
202 const bitslice_t filter5 = f_c_bs(filter5_0, filter5_1, filter5_2, filter5_3, filter5_4);
203 const bitslice_t results5 = results4 & (filter5 ^ keystream[5]);
204 if (!results5) continue;
205 state[-2 + 57] = lfsr_bs(9);
206 const bitslice_t filter6_3 = f_b_bs(state[-2 + 34], state[-2 + 35], state[-2 + 37], state[-2 + 39]);
207 const bitslice_t filter11_4 = f_a_bs(state[-2 + 45], state[-2 + 54], state[-2 + 55], state[-2 + 57]);
208 const bitslice_t filter13_2 = f_b_bs(state[-2 + 30], state[-2 + 34], state[-2 + 36], state[-2 + 39]);
209 for (uchar i6 = 0; i6 < (1 << 1);) {
210 state[-2 + 40] = -i6;
213 const bitslice_t filter6_4 = f_a_bs(state[-2 + 40], state[-2 + 49], state[-2 + 50], state[-2 + 52]);
214 const bitslice_t filter6 = f_c_bs(filter6_0, filter6_1, filter6_2, filter6_3, filter6_4);
215 const bitslice_t results6 = results5 & (filter6 ^ keystream[6]);
216 if (!results6) continue;
217 state[-2 + 58] = lfsr_bs(10);
218 const bitslice_t filter7_3 = f_b_bs(state[-2 + 35], state[-2 + 36], state[-2 + 38], state[-2 + 40]);
219 const bitslice_t filter12_4 = f_a_bs(state[-2 + 46], state[-2 + 55], state[-2 + 56], state[-2 + 58]);
220 const bitslice_t filter14_2 = f_b_bs(state[-2 + 31], state[-2 + 35], state[-2 + 37], state[-2 + 40]);
221 const bitslice_t filter17_2 = f_b_bs(state[-2 + 34], state[-2 + 38], state[-2 + 40], state[-2 + 43]);
223 for (uchar i7 = 0; i7 < (1 << 1);) {
224 state[-2 + 41] = -i7;
227 const bitslice_t filter7_4 = f_a_bs(state[-2 + 41], state[-2 + 50], state[-2 + 51], state[-2 + 53]);
228 const bitslice_t filter7 = f_c_bs(filter7_0, filter7_1, filter7_2, filter7_3, filter7_4);
229 const bitslice_t results7 = results6 & (filter7 ^ keystream[7]);
230 if (!results7) continue;
231 state[-2 + 59] = lfsr_bs(11);
232 const bitslice_t filter8_3 = f_b_bs(state[-2 + 36], state[-2 + 37], state[-2 + 39], state[-2 + 41]);
233 const bitslice_t filter10_3 = f_b_bs(state[-2 + 38], state[-2 + 39], state[-2 + 41], state[-2 + 43]);
234 const bitslice_t filter10 = f_c_bs(filter10_0, filter10_1, filter10_2, filter10_3, filter10_4);
235 const bitslice_t filter12_3 = f_b_bs(state[-2 + 40], state[-2 + 41], state[-2 + 43], state[-2 + 45]);
236 const bitslice_t filter12 = f_c_bs(filter12_0, filter12_1, filter12_2, filter12_3, filter12_4);
237 const bitslice_t filter13_4 = f_a_bs(state[-2 + 47], state[-2 + 56], state[-2 + 57], state[-2 + 59]);
238 const bitslice_t filter15_2 = f_b_bs(state[-2 + 32], state[-2 + 36], state[-2 + 38], state[-2 + 41]);
240 for (uchar i8 = 0; i8 < (1 << 1);) {
241 state[-2 + 42] = -i8;
244 const bitslice_t filter8_4 = f_a_bs(state[-2 + 42], state[-2 + 51], state[-2 + 52], state[-2 + 54]);
245 const bitslice_t filter8 = f_c_bs(filter8_0, filter8_1, filter8_2, filter8_3, filter8_4);
246 bitslice_t results8 = results7 & (filter8 ^ keystream[8]);
247 if (!results8) continue;
248 const bitslice_t filter9_3 = f_b_bs(state[-2 + 37], state[-2 + 38], state[-2 + 40], state[-2 + 42]);
249 const bitslice_t filter9 = f_c_bs(filter9_0, filter9_1, filter9_2, filter9_3, filter9_4);
250 results8 &= (filter9 ^ keystream[9]);
251 if (!results8) continue;
252 results8 &= (filter10 ^ keystream[10]);
253 if (!results8) continue;
254 const bitslice_t filter11_3 = f_b_bs(state[-2 + 39], state[-2 + 40], state[-2 + 42], state[-2 + 44]);
255 const bitslice_t filter11 = f_c_bs(filter11_0, filter11_1, filter11_2, filter11_3, filter11_4);
256 results8 &= (filter11 ^ keystream[11]);
257 if (!results8) continue;
258 results8 &= (filter12 ^ keystream[12]);
259 if (!results8) continue;
260 const bitslice_t filter13_3 = f_b_bs(state[-2 + 41], state[-2 + 42], state[-2 + 44], state[-2 + 46]);
261 const bitslice_t filter13 = f_c_bs(filter13_0, filter13_1, filter13_2, filter13_3, filter13_4);
262 results8 &= (filter13 ^ keystream[13]);
263 if (!results8) continue;
264 state[-2 + 60] = lfsr_bs(12);
265 const bitslice_t filter14_3 = f_b_bs(state[-2 + 42], state[-2 + 43], state[-2 + 45], state[-2 + 47]);
266 const bitslice_t filter14_4 = f_a_bs(state[-2 + 48], state[-2 + 57], state[-2 + 58], state[-2 + 60]);
267 const bitslice_t filter14 = f_c_bs(filter14_0, filter14_1, filter14_2, filter14_3, filter14_4);
268 results8 &= (filter14 ^ keystream[14]);
269 if (!results8) continue;
270 state[-2 + 61] = lfsr_bs(13);
271 const bitslice_t filter15_4 = f_a_bs(state[-2 + 49], state[-2 + 58], state[-2 + 59], state[-2 + 61]);
272 const bitslice_t filter15 = f_c_bs(filter15_0, filter15_1, filter15_2, filter15_3, filter15_4);
273 results8 &= (filter15 ^ keystream[15]);
274 if (!results8) continue;
275 state[-2 + 62] = lfsr_bs(14);
276 const bitslice_t filter16_2 = f_b_bs(state[-2 + 33], state[-2 + 37], state[-2 + 39], state[-2 + 42]);
277 const bitslice_t filter16_4 = f_a_bs(state[-2 + 50], state[-2 + 59], state[-2 + 60], state[-2 + 62]);
278 const bitslice_t filter16 = f_c_bs(filter16_0, filter16_1, filter16_2, filter16_3, filter16_4);
279 results8 &= (filter16 ^ keystream[16]);
280 if (!results8) continue;
281 state[-2 + 63] = lfsr_bs(15);
282 const bitslice_t filter17_4 = f_a_bs(state[-2 + 51], state[-2 + 60], state[-2 + 61], state[-2 + 63]);
283 const bitslice_t filter17 = f_c_bs(filter17_0, filter17_1, filter17_2, filter17_3, filter17_4);
284 results8 &= (filter17 ^ keystream[17]);
285 if (!results8) continue;
286 state[-2 + 64] = lfsr_bs(16);
287 const bitslice_t filter18_0 = f_a_bs(state[-2 + 20], state[-2 + 21], state[-2 + 23], state[-2 + 24]);
288 const bitslice_t filter18_1 = f_b_bs(state[-2 + 26], state[-2 + 30], state[-2 + 32], state[-2 + 33]);
289 const bitslice_t filter18_2 = f_b_bs(state[-2 + 35], state[-2 + 39], state[-2 + 41], state[-2 + 44]);
290 const bitslice_t filter18_3 = f_b_bs(state[-2 + 46], state[-2 + 47], state[-2 + 49], state[-2 + 51]);
291 const bitslice_t filter18_4 = f_a_bs(state[-2 + 52], state[-2 + 61], state[-2 + 62], state[-2 + 64]);
292 const bitslice_t filter18 = f_c_bs(filter18_0, filter18_1, filter18_2, filter18_3, filter18_4);
293 results8 &= (filter18 ^ keystream[18]);
294 if (!results8) continue;
295 state[-2 + 65] = lfsr_bs(17);
296 const bitslice_t filter19_0 = f_a_bs(state[-2 + 21], state[-2 + 22], state[-2 + 24], state[-2 + 25]);
297 const bitslice_t filter19_1 = f_b_bs(state[-2 + 27], state[-2 + 31], state[-2 + 33], state[-2 + 34]);
298 const bitslice_t filter19_2 = f_b_bs(state[-2 + 36], state[-2 + 40], state[-2 + 42], state[-2 + 45]);
299 const bitslice_t filter19_3 = f_b_bs(state[-2 + 47], state[-2 + 48], state[-2 + 50], state[-2 + 52]);
300 const bitslice_t filter19_4 = f_a_bs(state[-2 + 53], state[-2 + 62], state[-2 + 63], state[-2 + 65]);
301 const bitslice_t filter19 = f_c_bs(filter19_0, filter19_1, filter19_2, filter19_3, filter19_4);
302 results8 &= (filter19 ^ keystream[19]);
303 if (!results8) continue;
304 state[-2 + 66] = lfsr_bs(18);
305 const bitslice_t filter20_0 = f_a_bs(state[-2 + 22], state[-2 + 23], state[-2 + 25], state[-2 + 26]);
306 const bitslice_t filter20_1 = f_b_bs(state[-2 + 28], state[-2 + 32], state[-2 + 34], state[-2 + 35]);
307 const bitslice_t filter20_2 = f_b_bs(state[-2 + 37], state[-2 + 41], state[-2 + 43], state[-2 + 46]);
308 const bitslice_t filter20_3 = f_b_bs(state[-2 + 48], state[-2 + 49], state[-2 + 51], state[-2 + 53]);
309 const bitslice_t filter20_4 = f_a_bs(state[-2 + 54], state[-2 + 63], state[-2 + 64], state[-2 + 66]);
310 const bitslice_t filter20 = f_c_bs(filter20_0, filter20_1, filter20_2, filter20_3, filter20_4);
311 results8 &= (filter20 ^ keystream[20]);
312 if (!results8) continue;
313 state[-2 + 67] = lfsr_bs(19);
314 const bitslice_t filter21_0 = f_a_bs(state[-2 + 23], state[-2 + 24], state[-2 + 26], state[-2 + 27]);
315 const bitslice_t filter21_1 = f_b_bs(state[-2 + 29], state[-2 + 33], state[-2 + 35], state[-2 + 36]);
316 const bitslice_t filter21_2 = f_b_bs(state[-2 + 38], state[-2 + 42], state[-2 + 44], state[-2 + 47]);
317 const bitslice_t filter21_3 = f_b_bs(state[-2 + 49], state[-2 + 50], state[-2 + 52], state[-2 + 54]);
318 const bitslice_t filter21_4 = f_a_bs(state[-2 + 55], state[-2 + 64], state[-2 + 65], state[-2 + 67]);
319 const bitslice_t filter21 = f_c_bs(filter21_0, filter21_1, filter21_2, filter21_3, filter21_4);
320 results8 &= (filter21 ^ keystream[21]);
321 if (!results8) continue;
322 state[-2 + 68] = lfsr_bs(20);
323 const bitslice_t filter22_0 = f_a_bs(state[-2 + 24], state[-2 + 25], state[-2 + 27], state[-2 + 28]);
324 const bitslice_t filter22_1 = f_b_bs(state[-2 + 30], state[-2 + 34], state[-2 + 36], state[-2 + 37]);
325 const bitslice_t filter22_2 = f_b_bs(state[-2 + 39], state[-2 + 43], state[-2 + 45], state[-2 + 48]);
326 const bitslice_t filter22_3 = f_b_bs(state[-2 + 50], state[-2 + 51], state[-2 + 53], state[-2 + 55]);
327 const bitslice_t filter22_4 = f_a_bs(state[-2 + 56], state[-2 + 65], state[-2 + 66], state[-2 + 68]);
328 const bitslice_t filter22 = f_c_bs(filter22_0, filter22_1, filter22_2, filter22_3, filter22_4);
329 results8 &= (filter22 ^ keystream[22]);
330 if (!results8) continue;
331 state[-2 + 69] = lfsr_bs(21);
332 const bitslice_t filter23_0 = f_a_bs(state[-2 + 25], state[-2 + 26], state[-2 + 28], state[-2 + 29]);
333 const bitslice_t filter23_1 = f_b_bs(state[-2 + 31], state[-2 + 35], state[-2 + 37], state[-2 + 38]);
334 const bitslice_t filter23_2 = f_b_bs(state[-2 + 40], state[-2 + 44], state[-2 + 46], state[-2 + 49]);
335 const bitslice_t filter23_3 = f_b_bs(state[-2 + 51], state[-2 + 52], state[-2 + 54], state[-2 + 56]);
336 const bitslice_t filter23_4 = f_a_bs(state[-2 + 57], state[-2 + 66], state[-2 + 67], state[-2 + 69]);
337 const bitslice_t filter23 = f_c_bs(filter23_0, filter23_1, filter23_2, filter23_3, filter23_4);
338 results8 &= (filter23 ^ keystream[23]);
339 if (!results8) continue;
340 state[-2 + 70] = lfsr_bs(22);
341 const bitslice_t filter24_0 = f_a_bs(state[-2 + 26], state[-2 + 27], state[-2 + 29], state[-2 + 30]);
342 const bitslice_t filter24_1 = f_b_bs(state[-2 + 32], state[-2 + 36], state[-2 + 38], state[-2 + 39]);
343 const bitslice_t filter24_2 = f_b_bs(state[-2 + 41], state[-2 + 45], state[-2 + 47], state[-2 + 50]);
344 const bitslice_t filter24_3 = f_b_bs(state[-2 + 52], state[-2 + 53], state[-2 + 55], state[-2 + 57]);
345 const bitslice_t filter24_4 = f_a_bs(state[-2 + 58], state[-2 + 67], state[-2 + 68], state[-2 + 70]);
346 const bitslice_t filter24 = f_c_bs(filter24_0, filter24_1, filter24_2, filter24_3, filter24_4);
347 results8 &= (filter24 ^ keystream[24]);
348 if (!results8) continue;
349 state[-2 + 71] = lfsr_bs(23);
350 const bitslice_t filter25_0 = f_a_bs(state[-2 + 27], state[-2 + 28], state[-2 + 30], state[-2 + 31]);
351 const bitslice_t filter25_1 = f_b_bs(state[-2 + 33], state[-2 + 37], state[-2 + 39], state[-2 + 40]);
352 const bitslice_t filter25_2 = f_b_bs(state[-2 + 42], state[-2 + 46], state[-2 + 48], state[-2 + 51]);
353 const bitslice_t filter25_3 = f_b_bs(state[-2 + 53], state[-2 + 54], state[-2 + 56], state[-2 + 58]);
354 const bitslice_t filter25_4 = f_a_bs(state[-2 + 59], state[-2 + 68], state[-2 + 69], state[-2 + 71]);
355 const bitslice_t filter25 = f_c_bs(filter25_0, filter25_1, filter25_2, filter25_3, filter25_4);
356 results8 &= (filter25 ^ keystream[25]);
357 if (!results8) continue;
358 state[-2 + 72] = lfsr_bs(24);
359 const bitslice_t filter26_0 = f_a_bs(state[-2 + 28], state[-2 + 29], state[-2 + 31], state[-2 + 32]);
360 const bitslice_t filter26_1 = f_b_bs(state[-2 + 34], state[-2 + 38], state[-2 + 40], state[-2 + 41]);
361 const bitslice_t filter26_2 = f_b_bs(state[-2 + 43], state[-2 + 47], state[-2 + 49], state[-2 + 52]);
362 const bitslice_t filter26_3 = f_b_bs(state[-2 + 54], state[-2 + 55], state[-2 + 57], state[-2 + 59]);
363 const bitslice_t filter26_4 = f_a_bs(state[-2 + 60], state[-2 + 69], state[-2 + 70], state[-2 + 72]);
364 const bitslice_t filter26 = f_c_bs(filter26_0, filter26_1, filter26_2, filter26_3, filter26_4);
365 results8 &= (filter26 ^ keystream[26]);
366 if (!results8) continue;
367 state[-2 + 73] = lfsr_bs(25);
368 const bitslice_t filter27_0 = f_a_bs(state[-2 + 29], state[-2 + 30], state[-2 + 32], state[-2 + 33]);
369 const bitslice_t filter27_1 = f_b_bs(state[-2 + 35], state[-2 + 39], state[-2 + 41], state[-2 + 42]);
370 const bitslice_t filter27_2 = f_b_bs(state[-2 + 44], state[-2 + 48], state[-2 + 50], state[-2 + 53]);
371 const bitslice_t filter27_3 = f_b_bs(state[-2 + 55], state[-2 + 56], state[-2 + 58], state[-2 + 60]);
372 const bitslice_t filter27_4 = f_a_bs(state[-2 + 61], state[-2 + 70], state[-2 + 71], state[-2 + 73]);
373 const bitslice_t filter27 = f_c_bs(filter27_0, filter27_1, filter27_2, filter27_3, filter27_4);
374 results8 &= (filter27 ^ keystream[27]);
375 if (!results8) continue;
376 state[-2 + 74] = lfsr_bs(26);
377 const bitslice_t filter28_0 = f_a_bs(state[-2 + 30], state[-2 + 31], state[-2 + 33], state[-2 + 34]);
378 const bitslice_t filter28_1 = f_b_bs(state[-2 + 36], state[-2 + 40], state[-2 + 42], state[-2 + 43]);
379 const bitslice_t filter28_2 = f_b_bs(state[-2 + 45], state[-2 + 49], state[-2 + 51], state[-2 + 54]);
380 const bitslice_t filter28_3 = f_b_bs(state[-2 + 56], state[-2 + 57], state[-2 + 59], state[-2 + 61]);
381 const bitslice_t filter28_4 = f_a_bs(state[-2 + 62], state[-2 + 71], state[-2 + 72], state[-2 + 74]);
382 const bitslice_t filter28 = f_c_bs(filter28_0, filter28_1, filter28_2, filter28_3, filter28_4);
383 results8 &= (filter28 ^ keystream[28]);
384 if (!results8) continue;
385 state[-2 + 75] = lfsr_bs(27);
386 const bitslice_t filter29_0 = f_a_bs(state[-2 + 31], state[-2 + 32], state[-2 + 34], state[-2 + 35]);
387 const bitslice_t filter29_1 = f_b_bs(state[-2 + 37], state[-2 + 41], state[-2 + 43], state[-2 + 44]);
388 const bitslice_t filter29_2 = f_b_bs(state[-2 + 46], state[-2 + 50], state[-2 + 52], state[-2 + 55]);
389 const bitslice_t filter29_3 = f_b_bs(state[-2 + 57], state[-2 + 58], state[-2 + 60], state[-2 + 62]);
390 const bitslice_t filter29_4 = f_a_bs(state[-2 + 63], state[-2 + 72], state[-2 + 73], state[-2 + 75]);
391 const bitslice_t filter29 = f_c_bs(filter29_0, filter29_1, filter29_2, filter29_3, filter29_4);
392 results8 &= (filter29 ^ keystream[29]);
393 if (!results8) continue;
394 state[-2 + 76] = lfsr_bs(28);
395 const bitslice_t filter30_0 = f_a_bs(state[-2 + 32], state[-2 + 33], state[-2 + 35], state[-2 + 36]);
396 const bitslice_t filter30_1 = f_b_bs(state[-2 + 38], state[-2 + 42], state[-2 + 44], state[-2 + 45]);
397 const bitslice_t filter30_2 = f_b_bs(state[-2 + 47], state[-2 + 51], state[-2 + 53], state[-2 + 56]);
398 const bitslice_t filter30_3 = f_b_bs(state[-2 + 58], state[-2 + 59], state[-2 + 61], state[-2 + 63]);
399 const bitslice_t filter30_4 = f_a_bs(state[-2 + 64], state[-2 + 73], state[-2 + 74], state[-2 + 76]);
400 const bitslice_t filter30 = f_c_bs(filter30_0, filter30_1, filter30_2, filter30_3, filter30_4);
401 results8 &= (filter30 ^ keystream[30]);
402 if (!results8) continue;
403 state[-2 + 77] = lfsr_bs(29);
404 const bitslice_t filter31_0 = f_a_bs(state[-2 + 33], state[-2 + 34], state[-2 + 36], state[-2 + 37]);
405 const bitslice_t filter31_1 = f_b_bs(state[-2 + 39], state[-2 + 43], state[-2 + 45], state[-2 + 46]);
406 const bitslice_t filter31_2 = f_b_bs(state[-2 + 48], state[-2 + 52], state[-2 + 54], state[-2 + 57]);
407 const bitslice_t filter31_3 = f_b_bs(state[-2 + 59], state[-2 + 60], state[-2 + 62], state[-2 + 64]);
408 const bitslice_t filter31_4 = f_a_bs(state[-2 + 65], state[-2 + 74], state[-2 + 75], state[-2 + 77]);
409 const bitslice_t filter31 = f_c_bs(filter31_0, filter31_1, filter31_2, filter31_3, filter31_4);
410 results8 &= (filter31 ^ keystream[31]);
411 if (!results8) continue;
412 uchar match_index = 0;
414 while (results8 && (match_index < MAX_BITSLICES)) {
415 uchar shift = clz(results8) + 1;
416 match_index += shift;
417 // take the state from layer 2 so we can recover the lowest 2 bits on the host by inverting the LFSR
418 matches[atomic_inc(matches_found)] = unbitslice(&state[-2 + 2], MAX_BITSLICES - match_index, 48);