5 // #include <stdint.h> //for int8_t
6 // #include <string.h> //for memcmp
7 // #include <wmmintrin.h> //for intrinsics for AES-NI
8 //compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes
11 static void AES_CBC_decrypt(const uint8_t *in, uint8_t *out, uint8_t iv[], uint8_t len, uint8_t *key) {
13 __m128i data, last_in;
14 __m128i feedback = _mm_loadu_si128 ((__m128i*)iv);
17 for (uint8_t i = 0; i < len; i++){
18 last_in =_mm_loadu_si128 (&((__m128i*)in)[i]);
19 data = _mm_xor_si128 (last_in, ((__m128i*)key)[0]);
21 for (j = 1; j < 10; j++){
22 data = _mm_aesdec_si128 (data, ((__m128i*)key)[j]);
24 data = _mm_aesdeclast_si128 (data,((__m128i*)key)[j]);
25 data = _mm_xor_si128 (data, feedback);
26 _mm_storeu_si128 (&((__m128i*)out)[i], data);
33 INLINE static __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) {
35 temp2 = _mm_shuffle_epi32 (temp2 ,0xff);
36 temp3 = _mm_slli_si128 (temp1, 0x4);
37 temp1 = _mm_xor_si128 (temp1, temp3);
38 temp3 = _mm_slli_si128 (temp3, 0x4);
39 temp1 = _mm_xor_si128 (temp1, temp3);
40 temp3 = _mm_slli_si128 (temp3, 0x4);
41 temp1 = _mm_xor_si128 (temp1, temp3);
42 temp1 = _mm_xor_si128 (temp1, temp2);
46 static void AES_128_Key_Expansion (uint8_t *userkey, __m128i *key) {
48 temp1 = _mm_loadu_si128((__m128i*)userkey);
50 temp2 = _mm_aeskeygenassist_si128 (temp1 ,0x1);
51 temp1 = AES_128_ASSIST(temp1, temp2);
53 temp2 = _mm_aeskeygenassist_si128 (temp1,0x2);
54 temp1 = AES_128_ASSIST(temp1, temp2);
56 temp2 = _mm_aeskeygenassist_si128 (temp1,0x4);
57 temp1 = AES_128_ASSIST(temp1, temp2);
59 temp2 = _mm_aeskeygenassist_si128 (temp1,0x8);
60 temp1 = AES_128_ASSIST(temp1, temp2);
62 temp2 = _mm_aeskeygenassist_si128 (temp1,0x10);
63 temp1 = AES_128_ASSIST(temp1, temp2);
65 temp2 = _mm_aeskeygenassist_si128 (temp1,0x20);
66 temp1 = AES_128_ASSIST(temp1, temp2);
68 temp2 = _mm_aeskeygenassist_si128 (temp1,0x40);
69 temp1 = AES_128_ASSIST(temp1, temp2);
71 temp2 = _mm_aeskeygenassist_si128 (temp1,0x80);
72 temp1 = AES_128_ASSIST(temp1, temp2);
74 temp2 = _mm_aeskeygenassist_si128 (temp1,0x1b);
75 temp1 = AES_128_ASSIST(temp1, temp2);
77 temp2 = _mm_aeskeygenassist_si128 (temp1,0x36);
78 temp1 = AES_128_ASSIST(temp1, temp2);
82 static void aes_inv_key_10(AESContext * ctx) {
84 __m128i* keysched = (__m128i*)ctx->keysched;
85 __m128i* invkeysched = (__m128i*)ctx->invkeysched;
87 *(invkeysched + 10) = *(keysched + 0);
88 *(invkeysched + 9) = _mm_aesimc_si128(*(keysched + 1));
89 *(invkeysched + 8) = _mm_aesimc_si128(*(keysched + 2));
90 *(invkeysched + 7) = _mm_aesimc_si128(*(keysched + 3));
91 *(invkeysched + 6) = _mm_aesimc_si128(*(keysched + 4));
92 *(invkeysched + 5) = _mm_aesimc_si128(*(keysched + 5));
93 *(invkeysched + 4) = _mm_aesimc_si128(*(keysched + 6));
94 *(invkeysched + 3) = _mm_aesimc_si128(*(keysched + 7));
95 *(invkeysched + 2) = _mm_aesimc_si128(*(keysched + 8));
96 *(invkeysched + 1) = _mm_aesimc_si128(*(keysched + 9));
97 *(invkeysched + 0) = *(keysched + 10);
100 static void aes_decrypt_cbc_ni(const uint8_t *in, uint8_t *out, uint8_t iv[], uint8_t len, uint8_t *key) {
102 __m128i dec = _mm_setzero_si128();
103 __m128i* block = (__m128i*)in;
104 const __m128i* finish = (__m128i*)(in + len);
107 __m128i iv = _mm_loadu_si128((__m128i*)iv);
109 while (block < finish) {
112 __m128i* keysched = (__m128i*)ctx->invkeysched;
113 __m128i last = _mm_loadu_si128(block);
115 dec = _mm_xor_si128(last, *keysched);
117 dec = _mm_aesdec_si128(dec, *(++keysched));
118 dec = _mm_aesdec_si128(dec, *(++keysched));
119 dec = _mm_aesdec_si128(dec, *(++keysched));
120 dec = _mm_aesdec_si128(dec, *(++keysched));
121 dec = _mm_aesdec_si128(dec, *(++keysched));
122 dec = _mm_aesdec_si128(dec, *(++keysched));
123 dec = _mm_aesdec_si128(dec, *(++keysched));
124 dec = _mm_aesdec_si128(dec, *(++keysched));
125 dec = _mm_aesdec_si128(dec, *(++keysched));
126 dec = _mm_aesdeclast_si128(dec, *(++keysched));
129 dec = _mm_xor_si128(iv, dec);
132 _mm_storeu_si128(block, dec);
140 _mm_storeu_si128((__m128i*)iv, dec);
143 static void aes_setup_ni(AESContext * ctx, uint8_t *key) {
145 __m128i *keysched = (__m128i*)ctx->keysched;
147 ctx->decrypt_cbc = aes_decrypt_cbc_ni;
149 // Now do the key setup itself.
150 AES_128_Key_Expansion (key, keysched);
152 // Now prepare the modified keys for the inverse cipher.