]>
Commit | Line | Data |
---|---|---|
13fec989 A |
1 | /* |
2 | --------------------------------------------------------------------------- | |
3 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. | |
4 | ||
5 | LICENSE TERMS | |
6 | ||
7 | The free distribution and use of this software in both source and binary | |
8 | form is allowed (with or without changes) provided that: | |
9 | ||
10 | 1. distributions of this source code include the above copyright | |
11 | notice, this list of conditions and the following disclaimer; | |
12 | ||
13 | 2. distributions in binary form include the above copyright | |
14 | notice, this list of conditions and the following disclaimer | |
15 | in the documentation and/or other associated materials; | |
16 | ||
17 | 3. the copyright holder's name is not used to endorse products | |
18 | built using this software without specific written permission. | |
19 | ||
20 | ALTERNATIVELY, provided that this notice is retained in full, this product | |
21 | may be distributed under the terms of the GNU General Public License (GPL), | |
22 | in which case the provisions of the GPL apply INSTEAD OF those given above. | |
23 | ||
24 | DISCLAIMER | |
25 | ||
26 | This software is provided 'as is' with no explicit or implied warranties | |
27 | in respect of its properties, including, but not limited to, correctness | |
28 | and/or fitness for purpose. | |
29 | --------------------------------------------------------------------------- | |
30 | Issue 28/01/2004 | |
31 | ||
32 | This file contains the code for implementing encryption and decryption | |
33 | for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It | |
34 | can optionally be replaced by code written in assembler using NASM. For | |
35 | further details see the file aesopt.h | |
36 | */ | |
37 | ||
38 | #include "aesopt.h" | |
39 | #include "aestab.h" | |
40 | ||
41 | #if defined(__cplusplus) | |
42 | extern "C" | |
43 | { | |
44 | #endif | |
45 | ||
46 | #define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c]) | |
47 | #define xo(y,x,c) (s(y,c) ^= s(x, c)) | |
48 | #define si(y,x,c) (s(y,c) = word_in(x, c)) | |
49 | #define so(y,x,c) word_out(y, c, s(x,c)) | |
50 | ||
51 | #if defined(ARRAYS) | |
52 | #define locals(y,x) x[4],y[4] | |
53 | #else | |
54 | #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 | |
55 | #endif | |
56 | ||
57 | #define dtables(tab) const aes_32t *tab##0, *tab##1, *tab##2, *tab##3 | |
58 | #define itables(tab) tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3] | |
59 | ||
60 | #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ | |
61 | s(y,2) = s(x,2); s(y,3) = s(x,3); | |
62 | ||
63 | #define key_in(y,x,k) ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3) | |
64 | #define cbc(y,x) xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3) | |
65 | #define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3) | |
66 | #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) | |
67 | #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) | |
68 | ||
69 | #if defined(ENCRYPTION) && !defined(AES_ASM) | |
70 | ||
71 | /* Visual C++ .Net v7.1 provides the fastest encryption code when using | |
72 | Pentium optimiation with small code but this is poor for decryption | |
73 | so we need to control this with the following VC++ pragmas | |
74 | */ | |
75 | ||
76 | #if defined(_MSC_VER) | |
77 | #pragma optimize( "s", on ) | |
78 | #endif | |
79 | ||
80 | /* Given the column (c) of the output state variable, the following | |
81 | macros give the input state variables which are needed in its | |
82 | computation for each row (r) of the state. All the alternative | |
83 | macros give the same end values but expand into different ways | |
84 | of calculating these values. In particular the complex macro | |
85 | used for dynamically variable block sizes is designed to expand | |
86 | to a compile time constant whenever possible but will expand to | |
87 | conditional clauses on some branches (I am grateful to Frank | |
88 | Yellin for this construction) | |
89 | */ | |
90 | ||
91 | #define fwd_var(x,r,c)\ | |
92 | ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ | |
93 | : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ | |
94 | : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ | |
95 | : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) | |
96 | ||
97 | #if defined(FT4_SET) | |
98 | #undef dec_fmvars | |
99 | # if defined(ENC_ROUND_CACHE_TABLES) | |
100 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c)) | |
101 | # else | |
102 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c)) | |
103 | # endif | |
104 | #elif defined(FT1_SET) | |
105 | #undef dec_fmvars | |
106 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c)) | |
107 | #else | |
108 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c))) | |
109 | #endif | |
110 | ||
111 | #if defined(FL4_SET) | |
112 | # if defined(LAST_ENC_ROUND_CACHE_TABLES) | |
113 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c)) | |
114 | # else | |
115 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c)) | |
116 | # endif | |
117 | #elif defined(FL1_SET) | |
118 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c)) | |
119 | #else | |
120 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c)) | |
121 | #endif | |
122 | ||
123 | aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, | |
124 | unsigned char *out, const aes_encrypt_ctx cx[1]) | |
125 | { aes_32t locals(b0, b1); | |
126 | const aes_32t *kp; | |
127 | const aes_32t *kptr = cx->ks; | |
128 | #if defined(ENC_ROUND_CACHE_TABLES) | |
129 | dtables(t_fn); | |
130 | #endif | |
131 | #if defined(LAST_ENC_ROUND_CACHE_TABLES) | |
132 | dtables(t_fl); | |
133 | #endif | |
134 | ||
135 | #if defined( dec_fmvars ) | |
136 | dec_fmvars; /* declare variables for fwd_mcol() if needed */ | |
137 | #endif | |
138 | ||
139 | #if defined( AES_ERR_CHK ) | |
140 | if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) | |
141 | return aes_error; | |
142 | #endif | |
143 | ||
144 | // Load IV into b0. | |
145 | state_in(b0, in_iv); | |
146 | ||
147 | for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk) | |
148 | { | |
149 | kp = kptr; | |
150 | #if 0 | |
151 | // Read the plaintext into b1 | |
152 | state_in(b1, in); | |
153 | // Do the CBC with b0 which is either the iv or the ciphertext of the previous block. | |
154 | cbc(b1, b0); | |
155 | ||
156 | // Xor b1 with the key schedule to get things started. | |
157 | key_in(b0, b1, kp); | |
158 | #else | |
159 | // Since xor is associative we mess with the ordering here to get the loads started early | |
160 | key_in(b1, b0, kp); // Xor b0(IV) with the key schedule and assign to b1 | |
161 | state_in(b0, in); // Load block into b0 | |
162 | cbc(b0, b1); // Xor b0 with b1 and store in b0 | |
163 | #endif | |
164 | ||
165 | #if defined(ENC_ROUND_CACHE_TABLES) | |
166 | itables(t_fn); | |
167 | #endif | |
168 | ||
169 | #if (ENC_UNROLL == FULL) | |
170 | ||
171 | switch(cx->rn) | |
172 | { | |
173 | case 14: | |
174 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); | |
175 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); | |
176 | kp += 2 * N_COLS; | |
177 | case 12: | |
178 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); | |
179 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); | |
180 | kp += 2 * N_COLS; | |
181 | case 10: | |
182 | default: | |
183 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); | |
184 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); | |
185 | round(fwd_rnd, b1, b0, kp + 3 * N_COLS); | |
186 | round(fwd_rnd, b0, b1, kp + 4 * N_COLS); | |
187 | round(fwd_rnd, b1, b0, kp + 5 * N_COLS); | |
188 | round(fwd_rnd, b0, b1, kp + 6 * N_COLS); | |
189 | round(fwd_rnd, b1, b0, kp + 7 * N_COLS); | |
190 | round(fwd_rnd, b0, b1, kp + 8 * N_COLS); | |
191 | round(fwd_rnd, b1, b0, kp + 9 * N_COLS); | |
192 | #if defined(LAST_ENC_ROUND_CACHE_TABLES) | |
193 | itables(t_fl); | |
194 | #endif | |
195 | round(fwd_lrnd, b0, b1, kp +10 * N_COLS); | |
196 | } | |
197 | ||
198 | #else | |
199 | ||
200 | { aes_32t rnd; | |
201 | #if (ENC_UNROLL == PARTIAL) | |
202 | for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) | |
203 | { | |
204 | kp += N_COLS; | |
205 | round(fwd_rnd, b1, b0, kp); | |
206 | kp += N_COLS; | |
207 | round(fwd_rnd, b0, b1, kp); | |
208 | } | |
209 | kp += N_COLS; | |
210 | round(fwd_rnd, b1, b0, kp); | |
211 | #else | |
212 | for(rnd = 0; rnd < cx->rn - 1; ++rnd) | |
213 | { | |
214 | kp += N_COLS; | |
215 | round(fwd_rnd, b1, b0, kp); | |
216 | l_copy(b0, b1); | |
217 | } | |
218 | #endif | |
219 | #if defined(LAST_ENC_ROUND_CACHE_TABLES) | |
220 | itables(t_fl); | |
221 | #endif | |
222 | kp += N_COLS; | |
223 | round(fwd_lrnd, b0, b1, kp); | |
224 | } | |
225 | #endif | |
226 | ||
227 | state_out(out, b0); | |
228 | } | |
229 | ||
230 | #if defined( AES_ERR_CHK ) | |
231 | return aes_good; | |
232 | #endif | |
233 | } | |
234 | ||
235 | #endif | |
236 | ||
237 | #if defined(DECRYPTION) && !defined(AES_ASM) | |
238 | ||
239 | /* Visual C++ .Net v7.1 provides the fastest encryption code when using | |
240 | Pentium optimiation with small code but this is poor for decryption | |
241 | so we need to control this with the following VC++ pragmas | |
242 | */ | |
243 | ||
244 | #if defined(_MSC_VER) | |
245 | #pragma optimize( "t", on ) | |
246 | #endif | |
247 | ||
248 | /* Given the column (c) of the output state variable, the following | |
249 | macros give the input state variables which are needed in its | |
250 | computation for each row (r) of the state. All the alternative | |
251 | macros give the same end values but expand into different ways | |
252 | of calculating these values. In particular the complex macro | |
253 | used for dynamically variable block sizes is designed to expand | |
254 | to a compile time constant whenever possible but will expand to | |
255 | conditional clauses on some branches (I am grateful to Frank | |
256 | Yellin for this construction) | |
257 | */ | |
258 | ||
259 | #define inv_var(x,r,c)\ | |
260 | ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ | |
261 | : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ | |
262 | : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ | |
263 | : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) | |
264 | ||
265 | #if defined(IT4_SET) | |
266 | #undef dec_imvars | |
267 | # if defined(DEC_ROUND_CACHE_TABLES) | |
268 | #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c)) | |
269 | # else | |
270 | #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c)) | |
271 | # endif | |
272 | #elif defined(IT1_SET) | |
273 | #undef dec_imvars | |
274 | #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c)) | |
275 | #else | |
276 | #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))) | |
277 | #endif | |
278 | ||
279 | #if defined(IL4_SET) | |
280 | # if defined(LAST_DEC_ROUND_CACHE_TABLES) | |
281 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c)) | |
282 | # else | |
283 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c)) | |
284 | # endif | |
285 | #elif defined(IL1_SET) | |
286 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c)) | |
287 | #else | |
288 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)) | |
289 | #endif | |
290 | ||
291 | aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, | |
292 | unsigned char *out, const aes_decrypt_ctx cx[1]) | |
293 | { aes_32t locals(b0, b1); | |
294 | const aes_32t *kptr = cx->ks + cx->rn * N_COLS; | |
295 | const aes_32t *kp; | |
296 | #if defined(DEC_ROUND_CACHE_TABLES) | |
297 | dtables(t_in); | |
298 | #endif | |
299 | #if defined(LAST_DEC_ROUND_CACHE_TABLES) | |
300 | dtables(t_il); | |
301 | #endif | |
302 | ||
303 | #if defined( dec_imvars ) | |
304 | dec_imvars; /* declare variables for inv_mcol() if needed */ | |
305 | #endif | |
306 | ||
307 | #if defined( AES_ERR_CHK ) | |
308 | if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) | |
309 | return aes_error; | |
310 | #endif | |
311 | ||
312 | #if defined(DEC_ROUND_CACHE_TABLES) | |
313 | itables(t_in); | |
314 | #endif | |
315 | ||
316 | in += AES_BLOCK_SIZE * (num_blk - 1); | |
317 | out += AES_BLOCK_SIZE * (num_blk - 1); | |
318 | // Load the last block's ciphertext into b1 | |
319 | state_in(b1, in); | |
320 | ||
321 | for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk) | |
322 | { | |
323 | kp = kptr; | |
324 | // Do the xor part of state_in, where b1 is the previous block's ciphertext. | |
325 | key_in(b0, b1, kp); | |
326 | ||
327 | #if (DEC_UNROLL == FULL) | |
328 | ||
329 | switch(cx->rn) | |
330 | { | |
331 | case 14: | |
332 | round(inv_rnd, b1, b0, kp - 1 * N_COLS); | |
333 | round(inv_rnd, b0, b1, kp - 2 * N_COLS); | |
334 | kp -= 2 * N_COLS; | |
335 | case 12: | |
336 | round(inv_rnd, b1, b0, kp - 1 * N_COLS); | |
337 | round(inv_rnd, b0, b1, kp - 2 * N_COLS); | |
338 | kp -= 2 * N_COLS; | |
339 | case 10: | |
340 | default: | |
341 | round(inv_rnd, b1, b0, kp - 1 * N_COLS); | |
342 | round(inv_rnd, b0, b1, kp - 2 * N_COLS); | |
343 | round(inv_rnd, b1, b0, kp - 3 * N_COLS); | |
344 | round(inv_rnd, b0, b1, kp - 4 * N_COLS); | |
345 | round(inv_rnd, b1, b0, kp - 5 * N_COLS); | |
346 | round(inv_rnd, b0, b1, kp - 6 * N_COLS); | |
347 | round(inv_rnd, b1, b0, kp - 7 * N_COLS); | |
348 | round(inv_rnd, b0, b1, kp - 8 * N_COLS); | |
349 | round(inv_rnd, b1, b0, kp - 9 * N_COLS); | |
350 | #if defined(LAST_DEC_ROUND_CACHE_TABLES) | |
351 | itables(t_il); | |
352 | #endif | |
353 | round(inv_lrnd, b0, b1, kp - 10 * N_COLS); | |
354 | } | |
355 | ||
356 | #else | |
357 | ||
358 | { aes_32t rnd; | |
359 | #if (DEC_UNROLL == PARTIAL) | |
360 | for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) | |
361 | { | |
362 | kp -= N_COLS; | |
363 | round(inv_rnd, b1, b0, kp); | |
364 | kp -= N_COLS; | |
365 | round(inv_rnd, b0, b1, kp); | |
366 | } | |
367 | kp -= N_COLS; | |
368 | round(inv_rnd, b1, b0, kp); | |
369 | #else | |
370 | for(rnd = 0; rnd < cx->rn - 1; ++rnd) | |
371 | { | |
372 | kp -= N_COLS; | |
373 | round(inv_rnd, b1, b0, kp); | |
374 | l_copy(b0, b1); | |
375 | } | |
376 | #endif | |
377 | #if defined(LAST_DEC_ROUND_CACHE_TABLES) | |
378 | itables(t_il); | |
379 | #endif | |
380 | kp -= N_COLS; | |
381 | round(inv_lrnd, b0, b1, kp); | |
382 | } | |
383 | #endif | |
384 | ||
385 | if (num_blk == 1) | |
386 | { | |
387 | // We are doing the first block so we need the IV rather than the previous | |
388 | // block for CBC (there is no previous block) | |
389 | state_in(b1, in_iv); | |
390 | } | |
391 | else | |
392 | { | |
393 | in -= AES_BLOCK_SIZE; | |
394 | state_in(b1, in); | |
395 | } | |
396 | ||
397 | // Do the CBC with b1 which is either the IV or the ciphertext of the previous block. | |
398 | cbc(b0, b1); | |
399 | ||
400 | state_out(out, b0); | |
401 | } | |
402 | #if defined( AES_ERR_CHK ) | |
403 | return aes_good; | |
404 | #endif | |
405 | } | |
406 | ||
407 | #endif | |
408 | ||
409 | #if defined(__cplusplus) | |
410 | } | |
411 | #endif |