1 #include "blake3_impl.h"
4 INLINE
uint32_t rotr32(uint32_t w
, uint32_t c
) {
5 return (w
>> c
) | (w
<< (32 - c
));
8 INLINE
void g(uint32_t *state
, size_t a
, size_t b
, size_t c
, size_t d
,
9 uint32_t x
, uint32_t y
) {
10 state
[a
] = state
[a
] + state
[b
] + x
;
11 state
[d
] = rotr32(state
[d
] ^ state
[a
], 16);
12 state
[c
] = state
[c
] + state
[d
];
13 state
[b
] = rotr32(state
[b
] ^ state
[c
], 12);
14 state
[a
] = state
[a
] + state
[b
] + y
;
15 state
[d
] = rotr32(state
[d
] ^ state
[a
], 8);
16 state
[c
] = state
[c
] + state
[d
];
17 state
[b
] = rotr32(state
[b
] ^ state
[c
], 7);
20 INLINE
void round_fn(uint32_t state
[16], const uint32_t *msg
, size_t round
) {
21 // Select the message schedule based on the round.
22 const uint8_t *schedule
= MSG_SCHEDULE
[round
];
25 g(state
, 0, 4, 8, 12, msg
[schedule
[0]], msg
[schedule
[1]]);
26 g(state
, 1, 5, 9, 13, msg
[schedule
[2]], msg
[schedule
[3]]);
27 g(state
, 2, 6, 10, 14, msg
[schedule
[4]], msg
[schedule
[5]]);
28 g(state
, 3, 7, 11, 15, msg
[schedule
[6]], msg
[schedule
[7]]);
31 g(state
, 0, 5, 10, 15, msg
[schedule
[8]], msg
[schedule
[9]]);
32 g(state
, 1, 6, 11, 12, msg
[schedule
[10]], msg
[schedule
[11]]);
33 g(state
, 2, 7, 8, 13, msg
[schedule
[12]], msg
[schedule
[13]]);
34 g(state
, 3, 4, 9, 14, msg
[schedule
[14]], msg
[schedule
[15]]);
37 INLINE
void compress_pre(uint32_t state
[16], const uint32_t cv
[8],
38 const uint8_t block
[BLAKE3_BLOCK_LEN
],
39 uint8_t block_len
, uint64_t counter
, uint8_t flags
) {
40 uint32_t block_words
[16];
41 block_words
[0] = load32(block
+ 4 * 0);
42 block_words
[1] = load32(block
+ 4 * 1);
43 block_words
[2] = load32(block
+ 4 * 2);
44 block_words
[3] = load32(block
+ 4 * 3);
45 block_words
[4] = load32(block
+ 4 * 4);
46 block_words
[5] = load32(block
+ 4 * 5);
47 block_words
[6] = load32(block
+ 4 * 6);
48 block_words
[7] = load32(block
+ 4 * 7);
49 block_words
[8] = load32(block
+ 4 * 8);
50 block_words
[9] = load32(block
+ 4 * 9);
51 block_words
[10] = load32(block
+ 4 * 10);
52 block_words
[11] = load32(block
+ 4 * 11);
53 block_words
[12] = load32(block
+ 4 * 12);
54 block_words
[13] = load32(block
+ 4 * 13);
55 block_words
[14] = load32(block
+ 4 * 14);
56 block_words
[15] = load32(block
+ 4 * 15);
70 state
[12] = counter_low(counter
);
71 state
[13] = counter_high(counter
);
72 state
[14] = (uint32_t)block_len
;
73 state
[15] = (uint32_t)flags
;
75 round_fn(state
, &block_words
[0], 0);
76 round_fn(state
, &block_words
[0], 1);
77 round_fn(state
, &block_words
[0], 2);
78 round_fn(state
, &block_words
[0], 3);
79 round_fn(state
, &block_words
[0], 4);
80 round_fn(state
, &block_words
[0], 5);
81 round_fn(state
, &block_words
[0], 6);
84 void blake3_compress_in_place_portable(uint32_t cv
[8],
85 const uint8_t block
[BLAKE3_BLOCK_LEN
],
86 uint8_t block_len
, uint64_t counter
,
89 compress_pre(state
, cv
, block
, block_len
, counter
, flags
);
90 cv
[0] = state
[0] ^ state
[8];
91 cv
[1] = state
[1] ^ state
[9];
92 cv
[2] = state
[2] ^ state
[10];
93 cv
[3] = state
[3] ^ state
[11];
94 cv
[4] = state
[4] ^ state
[12];
95 cv
[5] = state
[5] ^ state
[13];
96 cv
[6] = state
[6] ^ state
[14];
97 cv
[7] = state
[7] ^ state
[15];
100 void blake3_compress_xof_portable(const uint32_t cv
[8],
101 const uint8_t block
[BLAKE3_BLOCK_LEN
],
102 uint8_t block_len
, uint64_t counter
,
103 uint8_t flags
, uint8_t out
[64]) {
105 compress_pre(state
, cv
, block
, block_len
, counter
, flags
);
107 store32(&out
[0 * 4], state
[0] ^ state
[8]);
108 store32(&out
[1 * 4], state
[1] ^ state
[9]);
109 store32(&out
[2 * 4], state
[2] ^ state
[10]);
110 store32(&out
[3 * 4], state
[3] ^ state
[11]);
111 store32(&out
[4 * 4], state
[4] ^ state
[12]);
112 store32(&out
[5 * 4], state
[5] ^ state
[13]);
113 store32(&out
[6 * 4], state
[6] ^ state
[14]);
114 store32(&out
[7 * 4], state
[7] ^ state
[15]);
115 store32(&out
[8 * 4], state
[8] ^ cv
[0]);
116 store32(&out
[9 * 4], state
[9] ^ cv
[1]);
117 store32(&out
[10 * 4], state
[10] ^ cv
[2]);
118 store32(&out
[11 * 4], state
[11] ^ cv
[3]);
119 store32(&out
[12 * 4], state
[12] ^ cv
[4]);
120 store32(&out
[13 * 4], state
[13] ^ cv
[5]);
121 store32(&out
[14 * 4], state
[14] ^ cv
[6]);
122 store32(&out
[15 * 4], state
[15] ^ cv
[7]);
125 INLINE
void hash_one_portable(const uint8_t *input
, size_t blocks
,
126 const uint32_t key
[8], uint64_t counter
,
127 uint8_t flags
, uint8_t flags_start
,
128 uint8_t flags_end
, uint8_t out
[BLAKE3_OUT_LEN
]) {
130 memcpy(cv
, key
, BLAKE3_KEY_LEN
);
131 uint8_t block_flags
= flags
| flags_start
;
134 block_flags
|= flags_end
;
136 blake3_compress_in_place_portable(cv
, input
, BLAKE3_BLOCK_LEN
, counter
,
138 input
= &input
[BLAKE3_BLOCK_LEN
];
142 store_cv_words(out
, cv
);
145 void blake3_hash_many_portable(const uint8_t *const *inputs
, size_t num_inputs
,
146 size_t blocks
, const uint32_t key
[8],
147 uint64_t counter
, bool increment_counter
,
148 uint8_t flags
, uint8_t flags_start
,
149 uint8_t flags_end
, uint8_t *out
) {
150 while (num_inputs
> 0) {
151 hash_one_portable(inputs
[0], blocks
, key
, counter
, flags
, flags_start
,
153 if (increment_counter
) {
158 out
= &out
[BLAKE3_OUT_LEN
];