]>
Commit | Line | Data |
---|---|---|
13fec989 A |
1 | /* |
2 | --------------------------------------------------------------------------- | |
3 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. | |
4 | ||
5 | LICENSE TERMS | |
6 | ||
7 | The free distribution and use of this software in both source and binary | |
8 | form is allowed (with or without changes) provided that: | |
9 | ||
10 | 1. distributions of this source code include the above copyright | |
11 | notice, this list of conditions and the following disclaimer; | |
12 | ||
13 | 2. distributions in binary form include the above copyright | |
14 | notice, this list of conditions and the following disclaimer | |
15 | in the documentation and/or other associated materials; | |
16 | ||
17 | 3. the copyright holder's name is not used to endorse products | |
18 | built using this software without specific written permission. | |
19 | ||
20 | ALTERNATIVELY, provided that this notice is retained in full, this product | |
21 | may be distributed under the terms of the GNU General Public License (GPL), | |
22 | in which case the provisions of the GPL apply INSTEAD OF those given above. | |
23 | ||
24 | DISCLAIMER | |
25 | ||
26 | This software is provided 'as is' with no explicit or implied warranties | |
27 | in respect of its properties, including, but not limited to, correctness | |
28 | and/or fitness for purpose. | |
29 | --------------------------------------------------------------------------- | |
30 | Issue 28/01/2004 | |
31 | ||
32 | My thanks go to Dag Arne Osvik for devising the schemes used here for key | |
33 | length derivation from the form of the key schedule | |
34 | ||
35 | This file contains the compilation options for AES (Rijndael) and code | |
36 | that is common across encryption, key scheduling and table generation. | |
37 | ||
38 | OPERATION | |
39 | ||
40 | These source code files implement the AES algorithm Rijndael designed by | |
41 | Joan Daemen and Vincent Rijmen. This version is designed for the standard | |
42 | block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 | |
43 | and 32 bytes). | |
44 | ||
45 | This version is designed for flexibility and speed using operations on | |
46 | 32-bit words rather than operations on bytes. It can be compiled with | |
47 | either big or little endian internal byte order but is faster when the | |
48 | native byte order for the processor is used. | |
49 | ||
50 | THE CIPHER INTERFACE | |
51 | ||
52 | The cipher interface is implemented as an array of bytes in which lower | |
53 | AES bit sequence indexes map to higher numeric significance within bytes. | |
54 | ||
55 | aes_08t (an unsigned 8-bit type) | |
56 | aes_32t (an unsigned 32-bit type) | |
57 | struct aes_encrypt_ctx (structure for the cipher encryption context) | |
58 | struct aes_decrypt_ctx (structure for the cipher decryption context) | |
59 | aes_rval the function return type | |
60 | ||
61 | C subroutine calls: | |
62 | ||
63 | aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); | |
64 | aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); | |
65 | aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); | |
66 | aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, | |
67 | const aes_encrypt_ctx cx[1]); | |
68 | ||
69 | aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); | |
70 | aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); | |
71 | aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); | |
72 | aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, | |
73 | const aes_decrypt_ctx cx[1]); | |
74 | ||
75 | IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that | |
76 | you call genTabs() before AES is used so that the tables are initialised. | |
77 | ||
78 | C++ aes class subroutines: | |
79 | ||
80 | Class AESencrypt for encryption | |
81 | ||
82 | Construtors: | |
83 | AESencrypt(void) | |
84 | AESencrypt(const unsigned char *key) - 128 bit key | |
85 | Members: | |
86 | aes_rval key128(const unsigned char *key) | |
87 | aes_rval key192(const unsigned char *key) | |
88 | aes_rval key256(const unsigned char *key) | |
89 | aes_rval encrypt(const unsigned char *in, unsigned char *out) const | |
90 | ||
91 | Class AESdecrypt for encryption | |
92 | Construtors: | |
93 | AESdecrypt(void) | |
94 | AESdecrypt(const unsigned char *key) - 128 bit key | |
95 | Members: | |
96 | aes_rval key128(const unsigned char *key) | |
97 | aes_rval key192(const unsigned char *key) | |
98 | aes_rval key256(const unsigned char *key) | |
99 | aes_rval decrypt(const unsigned char *in, unsigned char *out) const | |
100 | ||
101 | COMPILATION | |
102 | ||
103 | The files used to provide AES (Rijndael) are | |
104 | ||
105 | a. aes.h for the definitions needed for use in C. | |
106 | b. aescpp.h for the definitions needed for use in C++. | |
107 | c. aesopt.h for setting compilation options (also includes common code). | |
108 | d. aescrypt.c for encryption and decrytpion, or | |
109 | e. aeskey.c for key scheduling. | |
110 | f. aestab.c for table loading or generation. | |
111 | g. aescrypt.asm for encryption and decryption using assembler code. | |
112 | h. aescrypt.mmx.asm for encryption and decryption using MMX assembler. | |
113 | ||
114 | To compile AES (Rijndael) for use in C code use aes.h and set the | |
115 | defines here for the facilities you need (key lengths, encryption | |
116 | and/or decryption). Do not define AES_DLL or AES_CPP. Set the options | |
117 | for optimisations and table sizes here. | |
118 | ||
119 | To compile AES (Rijndael) for use in in C++ code use aescpp.h but do | |
120 | not define AES_DLL | |
121 | ||
122 | To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use | |
123 | aes.h and include the AES_DLL define. | |
124 | ||
125 | CONFIGURATION OPTIONS (here and in aes.h) | |
126 | ||
127 | a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL | |
128 | b. You may need to set PLATFORM_BYTE_ORDER to define the byte order. | |
129 | c. If you want the code to run in a specific internal byte order, then | |
130 | ALGORITHM_BYTE_ORDER must be set accordingly. | |
131 | d. set other configuration options decribed below. | |
132 | */ | |
133 | ||
134 | #if !defined( _AESOPT_H ) | |
135 | #define _AESOPT_H | |
136 | ||
137 | #include "aes.h" | |
138 | ||
139 | /* CONFIGURATION - USE OF DEFINES | |
140 | ||
141 | Later in this section there are a number of defines that control the | |
142 | operation of the code. In each section, the purpose of each define is | |
143 | explained so that the relevant form can be included or excluded by | |
144 | setting either 1's or 0's respectively on the branches of the related | |
145 | #if clauses. | |
146 | ||
147 | PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS | |
148 | ||
149 | To obtain the highest speed on processors with 32-bit words, this code | |
150 | needs to determine the byte order of the target machine. The following | |
151 | block of code is an attempt to capture the most obvious ways in which | |
152 | various environemnts define byte order. It may well fail, in which case | |
153 | the definitions will need to be set by editing at the points marked | |
154 | **** EDIT HERE IF NECESSARY **** below. My thanks go to Peter Gutmann | |
155 | for his assistance with this endian detection nightmare. | |
156 | */ | |
157 | ||
158 | #define BRG_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ | |
159 | #define BRG_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ | |
160 | ||
161 | #if defined(__GNUC__) || defined(__GNU_LIBRARY__) | |
162 | # if defined(__FreeBSD__) || defined(__OpenBSD__) | |
163 | # include <sys/endian.h> | |
164 | # elif defined( BSD ) && BSD >= 199103 | |
165 | # include <machine/endian.h> | |
166 | # elif defined(__APPLE__) | |
167 | # if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN ) | |
168 | # define BIG_ENDIAN | |
169 | # elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN ) | |
170 | # define LITTLE_ENDIAN | |
171 | # endif | |
172 | # else | |
173 | # include <endian.h> | |
174 | # if defined(__BEOS__) | |
175 | # include <byteswap.h> | |
176 | # endif | |
177 | # endif | |
178 | #endif | |
179 | ||
180 | #if !defined(PLATFORM_BYTE_ORDER) | |
181 | # if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN) | |
182 | # if defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN) | |
183 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
184 | # elif !defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) | |
185 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
186 | # elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) | |
187 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
188 | # elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN) | |
189 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
190 | # endif | |
191 | # elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN) | |
192 | # if defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) | |
193 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
194 | # elif !defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) | |
195 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
196 | # elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN) | |
197 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
198 | # elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN) | |
199 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
200 | # endif | |
201 | # elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__) | |
202 | # if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) | |
203 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
204 | # elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__) | |
205 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
206 | # elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__) | |
207 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
208 | # elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__) | |
209 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
210 | # endif | |
211 | # endif | |
212 | #endif | |
213 | ||
214 | /* if the platform is still unknown, try to find its byte order */ | |
215 | /* from commonly used machine defines */ | |
216 | ||
217 | #if !defined(PLATFORM_BYTE_ORDER) | |
218 | ||
219 | #if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ | |
220 | defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ | |
221 | defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ | |
222 | defined( vax ) || defined( vms ) || defined( VMS ) || \ | |
223 | defined( __VMS ) | |
224 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
225 | ||
226 | #elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ | |
227 | defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ | |
228 | defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ | |
229 | defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ | |
230 | defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ | |
231 | defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ ) | |
232 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
233 | ||
234 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */ | |
235 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
236 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */ | |
237 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN | |
238 | #else | |
239 | # error Please edit aesopt.h (line 234 or 236) to set the platform byte order | |
240 | #endif | |
241 | ||
242 | #endif | |
243 | ||
244 | /* SOME LOCAL DEFINITIONS */ | |
245 | ||
246 | #define NO_TABLES 0 | |
247 | #define ONE_TABLE 1 | |
248 | #define FOUR_TABLES 4 | |
249 | #define NONE 0 | |
250 | #define PARTIAL 1 | |
251 | #define FULL 2 | |
252 | ||
253 | #if defined(bswap32) | |
254 | #define aes_sw32 bswap32 | |
255 | #elif defined(bswap_32) | |
256 | #define aes_sw32 bswap_32 | |
257 | #else | |
258 | #define brot(x,n) (((aes_32t)(x) << n) | ((aes_32t)(x) >> (32 - n))) | |
259 | #define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) | |
260 | #endif | |
261 | ||
262 | /* 1. FUNCTIONS REQUIRED | |
263 | ||
264 | This implementation provides subroutines for encryption, decryption | |
265 | and for setting the three key lengths (separately) for encryption | |
266 | and decryption. When the assembler code is not being used the following | |
267 | definition blocks allow the selection of the routines that are to be | |
268 | included in the compilation. | |
269 | */ | |
270 | #if defined( AES_ENCRYPT ) | |
271 | #define ENCRYPTION | |
272 | #define ENCRYPTION_KEY_SCHEDULE | |
273 | #endif | |
274 | ||
275 | #if defined( AES_DECRYPT ) | |
276 | #define DECRYPTION | |
277 | #define DECRYPTION_KEY_SCHEDULE | |
278 | #endif | |
279 | ||
280 | /* 2. ASSEMBLER SUPPORT | |
281 | ||
282 | This define (which can be on the command line) enables the use of the | |
283 | assembler code routines for encryption and decryption with the C code | |
284 | only providing key scheduling | |
285 | */ | |
286 | #if 0 && !defined(AES_ASM) | |
287 | #define AES_ASM | |
288 | #endif | |
289 | ||
290 | /* 3. BYTE ORDER WITHIN 32 BIT WORDS | |
291 | ||
292 | The fundamental data processing units in Rijndael are 8-bit bytes. The | |
293 | input, output and key input are all enumerated arrays of bytes in which | |
294 | bytes are numbered starting at zero and increasing to one less than the | |
295 | number of bytes in the array in question. This enumeration is only used | |
296 | for naming bytes and does not imply any adjacency or order relationship | |
297 | from one byte to another. When these inputs and outputs are considered | |
298 | as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to | |
299 | byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. | |
300 | In this implementation bits are numbered from 0 to 7 starting at the | |
301 | numerically least significant end of each byte (bit n represents 2^n). | |
302 | ||
303 | However, Rijndael can be implemented more efficiently using 32-bit | |
304 | words by packing bytes into words so that bytes 4*n to 4*n+3 are placed | |
305 | into word[n]. While in principle these bytes can be assembled into words | |
306 | in any positions, this implementation only supports the two formats in | |
307 | which bytes in adjacent positions within words also have adjacent byte | |
308 | numbers. This order is called big-endian if the lowest numbered bytes | |
309 | in words have the highest numeric significance and little-endian if the | |
310 | opposite applies. | |
311 | ||
312 | This code can work in either order irrespective of the order used by the | |
313 | machine on which it runs. Normally the internal byte order will be set | |
314 | to the order of the processor on which the code is to be run but this | |
315 | define can be used to reverse this in special situations | |
316 | ||
317 | NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set | |
318 | */ | |
319 | #if 1 || defined(AES_ASM) | |
320 | #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER | |
321 | #elif 0 | |
322 | #define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN | |
323 | #elif 0 | |
324 | #define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN | |
325 | #else | |
326 | #error The algorithm byte order is not defined | |
327 | #endif | |
328 | ||
329 | /* 4. FAST INPUT/OUTPUT OPERATIONS. | |
330 | ||
331 | On some machines it is possible to improve speed by transferring the | |
332 | bytes in the input and output arrays to and from the internal 32-bit | |
333 | variables by addressing these arrays as if they are arrays of 32-bit | |
334 | words. On some machines this will always be possible but there may | |
335 | be a large performance penalty if the byte arrays are not aligned on | |
336 | the normal word boundaries. On other machines this technique will | |
337 | lead to memory access errors when such 32-bit word accesses are not | |
338 | properly aligned. The option SAFE_IO avoids such problems but will | |
339 | often be slower on those machines that support misaligned access | |
340 | (especially so if care is taken to align the input and output byte | |
341 | arrays on 32-bit word boundaries). If SAFE_IO is not defined it is | |
342 | assumed that access to byte arrays as if they are arrays of 32-bit | |
343 | words will not cause problems when such accesses are misaligned. | |
344 | */ | |
345 | #if 0 && !defined(_MSC_VER) | |
346 | #define SAFE_IO | |
347 | #endif | |
348 | ||
349 | /* 5. LOOP UNROLLING | |
350 | ||
351 | The code for encryption and decrytpion cycles through a number of rounds | |
352 | that can be implemented either in a loop or by expanding the code into a | |
353 | long sequence of instructions, the latter producing a larger program but | |
354 | one that will often be much faster. The latter is called loop unrolling. | |
355 | There are also potential speed advantages in expanding two iterations in | |
356 | a loop with half the number of iterations, which is called partial loop | |
357 | unrolling. The following options allow partial or full loop unrolling | |
358 | to be set independently for encryption and decryption | |
359 | */ | |
360 | #if 1 | |
361 | #define ENC_UNROLL FULL | |
362 | #elif 0 | |
363 | #define ENC_UNROLL PARTIAL | |
364 | #else | |
365 | #define ENC_UNROLL NONE | |
366 | #endif | |
367 | ||
368 | #if 1 | |
369 | #define DEC_UNROLL FULL | |
370 | #elif 0 | |
371 | #define DEC_UNROLL PARTIAL | |
372 | #else | |
373 | #define DEC_UNROLL NONE | |
374 | #endif | |
375 | ||
376 | /* 6. FAST FINITE FIELD OPERATIONS | |
377 | ||
378 | If this section is included, tables are used to provide faster finite | |
379 | field arithmetic (this has no effect if FIXED_TABLES is defined). | |
380 | */ | |
381 | #if 1 | |
382 | #define FF_TABLES | |
383 | #endif | |
384 | ||
385 | /* 7. INTERNAL STATE VARIABLE FORMAT | |
386 | ||
387 | The internal state of Rijndael is stored in a number of local 32-bit | |
388 | word varaibles which can be defined either as an array or as individual | |
389 | names variables. Include this section if you want to store these local | |
390 | varaibles in arrays. Otherwise individual local variables will be used. | |
391 | */ | |
392 | #if 0 | |
393 | #define ARRAYS | |
394 | #endif | |
395 | ||
396 | /* In this implementation the columns of the state array are each held in | |
397 | 32-bit words. The state array can be held in various ways: in an array | |
398 | of words, in a number of individual word variables or in a number of | |
399 | processor registers. The following define maps a variable name x and | |
400 | a column number c to the way the state array variable is to be held. | |
401 | The first define below maps the state into an array x[c] whereas the | |
402 | second form maps the state into a number of individual variables x0, | |
403 | x1, etc. Another form could map individual state colums to machine | |
404 | register names. | |
405 | */ | |
406 | ||
407 | #if defined(ARRAYS) | |
408 | #define s(x,c) x[c] | |
409 | #else | |
410 | #define s(x,c) x##c | |
411 | #endif | |
412 | ||
413 | /* 8. FIXED OR DYNAMIC TABLES | |
414 | ||
415 | When this section is included the tables used by the code are compiled | |
416 | statically into the binary file. Otherwise the subroutine gen_tabs() | |
417 | must be called to compute them before the code is first used. | |
418 | */ | |
419 | #if 1 | |
420 | #define FIXED_TABLES | |
421 | #endif | |
422 | ||
423 | /* 9. TABLE ALIGNMENT | |
424 | ||
425 | On some sytsems speed will be improved by aligning the AES large lookup | |
426 | tables on particular boundaries. This define should be set to a power of | |
427 | two giving the desired alignment. It can be left undefined if alignment | |
428 | is not needed. This option is specific to the Microsft VC++ compiler - | |
429 | it seems to sometimes cause trouble for the VC++ version 6 compiler. | |
430 | */ | |
431 | ||
432 | #if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300) | |
433 | #define TABLE_ALIGN 64 | |
434 | #endif | |
435 | ||
436 | /* 10. INTERNAL TABLE CONFIGURATION | |
437 | ||
438 | This cipher proceeds by repeating in a number of cycles known as 'rounds' | |
439 | which are implemented by a round function which can optionally be speeded | |
440 | up using tables. The basic tables are each 256 32-bit words, with either | |
441 | one or four tables being required for each round function depending on | |
442 | how much speed is required. The encryption and decryption round functions | |
443 | are different and the last encryption and decrytpion round functions are | |
444 | different again making four different round functions in all. | |
445 | ||
446 | This means that: | |
447 | 1. Normal encryption and decryption rounds can each use either 0, 1 | |
448 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each. | |
449 | 2. The last encryption and decryption rounds can also use either 0, 1 | |
450 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each. | |
451 | ||
452 | Include or exclude the appropriate definitions below to set the number | |
453 | of tables used by this implementation. | |
454 | */ | |
455 | ||
456 | #if 1 /* set tables for the normal encryption round */ | |
457 | #define ENC_ROUND FOUR_TABLES | |
458 | #elif 0 | |
459 | #define ENC_ROUND ONE_TABLE | |
460 | #else | |
461 | #define ENC_ROUND NO_TABLES | |
462 | #endif | |
463 | ||
464 | #if 1 /* set tables for the last encryption round */ | |
465 | #define LAST_ENC_ROUND FOUR_TABLES | |
466 | #elif 0 | |
467 | #define LAST_ENC_ROUND ONE_TABLE | |
468 | #else | |
469 | #define LAST_ENC_ROUND NO_TABLES | |
470 | #endif | |
471 | ||
472 | #if 1 /* set tables for the normal decryption round */ | |
473 | #define DEC_ROUND FOUR_TABLES | |
474 | #elif 0 | |
475 | #define DEC_ROUND ONE_TABLE | |
476 | #else | |
477 | #define DEC_ROUND NO_TABLES | |
478 | #endif | |
479 | ||
480 | #if 1 /* set tables for the last decryption round */ | |
481 | #define LAST_DEC_ROUND FOUR_TABLES | |
482 | #elif 0 | |
483 | #define LAST_DEC_ROUND ONE_TABLE | |
484 | #else | |
485 | #define LAST_DEC_ROUND NO_TABLES | |
486 | #endif | |
487 | ||
488 | /* The decryption key schedule can be speeded up with tables in the same | |
489 | way that the round functions can. Include or exclude the following | |
490 | defines to set this requirement. | |
491 | */ | |
492 | #if 1 | |
493 | #define KEY_SCHED FOUR_TABLES | |
494 | #elif 0 | |
495 | #define KEY_SCHED ONE_TABLE | |
496 | #else | |
497 | #define KEY_SCHED NO_TABLES | |
498 | #endif | |
499 | ||
500 | /* 11. TABLE POINTER CACHING | |
501 | ||
502 | Normally tables are referenced directly, Enable this option if you wish to | |
503 | cache pointers to the tables in the encrypt/decrypt code. Note that this | |
504 | only works if you are using FOUR_TABLES for the ROUND you enable this for. | |
505 | */ | |
506 | #if 1 | |
507 | #define ENC_ROUND_CACHE_TABLES | |
508 | #endif | |
509 | #if 1 | |
510 | #define LAST_ENC_ROUND_CACHE_TABLES | |
511 | #endif | |
512 | #if 1 | |
513 | #define DEC_ROUND_CACHE_TABLES | |
514 | #endif | |
515 | #if 1 | |
516 | #define LAST_DEC_ROUND_CACHE_TABLES | |
517 | #endif | |
518 | ||
519 | ||
520 | /* END OF CONFIGURATION OPTIONS */ | |
521 | ||
522 | #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) | |
523 | ||
524 | /* Disable or report errors on some combinations of options */ | |
525 | ||
526 | #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES | |
527 | #undef LAST_ENC_ROUND | |
528 | #define LAST_ENC_ROUND NO_TABLES | |
529 | #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES | |
530 | #undef LAST_ENC_ROUND | |
531 | #define LAST_ENC_ROUND ONE_TABLE | |
532 | #endif | |
533 | ||
534 | #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE | |
535 | #undef ENC_UNROLL | |
536 | #define ENC_UNROLL NONE | |
537 | #endif | |
538 | ||
539 | #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES | |
540 | #undef LAST_DEC_ROUND | |
541 | #define LAST_DEC_ROUND NO_TABLES | |
542 | #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES | |
543 | #undef LAST_DEC_ROUND | |
544 | #define LAST_DEC_ROUND ONE_TABLE | |
545 | #endif | |
546 | ||
547 | #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE | |
548 | #undef DEC_UNROLL | |
549 | #define DEC_UNROLL NONE | |
550 | #endif | |
551 | ||
552 | /* upr(x,n): rotates bytes within words by n positions, moving bytes to | |
553 | higher index positions with wrap around into low positions | |
554 | ups(x,n): moves bytes by n positions to higher index positions in | |
555 | words but without wrap around | |
556 | bval(x,n): extracts a byte from a word | |
557 | ||
558 | NOTE: The definitions given here are intended only for use with | |
559 | unsigned variables and with shift counts that are compile | |
560 | time constants | |
561 | */ | |
562 | ||
563 | #if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN) | |
564 | #define upr(x,n) (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n)))) | |
565 | #define ups(x,n) ((aes_32t) (x) << (8 * (n))) | |
566 | #define bval(x,n) ((aes_08t)((x) >> (8 * (n)))) | |
567 | #define bytes2word(b0, b1, b2, b3) \ | |
568 | (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0)) | |
569 | #endif | |
570 | ||
571 | #if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN) | |
572 | #define upr(x,n) (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n)))) | |
573 | #define ups(x,n) ((aes_32t) (x) >> (8 * (n)))) | |
574 | #define bval(x,n) ((aes_08t)((x) >> (24 - 8 * (n)))) | |
575 | #define bytes2word(b0, b1, b2, b3) \ | |
576 | (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3)) | |
577 | #endif | |
578 | ||
579 | #if defined(SAFE_IO) | |
580 | ||
581 | #define word_in(x,c) bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \ | |
582 | ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3]) | |
583 | #define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \ | |
584 | ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); } | |
585 | ||
586 | #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER) | |
587 | ||
588 | #define word_in(x,c) (*((aes_32t*)(x)+(c))) | |
589 | #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v)) | |
590 | ||
591 | #else | |
592 | ||
593 | #define word_in(x,c) aes_sw32(*((aes_32t*)(x)+(c))) | |
594 | #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v)) | |
595 | ||
596 | #endif | |
597 | ||
598 | /* the finite field modular polynomial and elements */ | |
599 | ||
600 | #define WPOLY 0x011b | |
601 | #define BPOLY 0x1b | |
602 | ||
603 | /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ | |
604 | ||
605 | #define m1 0x80808080 | |
606 | #define m2 0x7f7f7f7f | |
607 | #define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) | |
608 | ||
609 | /* The following defines provide alternative definitions of gf_mulx that might | |
610 | give improved performance if a fast 32-bit multiply is not available. Note | |
611 | that a temporary variable u needs to be defined where gf_mulx is used. | |
612 | ||
613 | #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) | |
614 | #define m4 (0x01010101 * BPOLY) | |
615 | #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) | |
616 | */ | |
617 | ||
618 | /* Work out which tables are needed for the different options */ | |
619 | ||
620 | #if defined( AES_ASM ) | |
621 | #if defined( ENC_ROUND ) | |
622 | #undef ENC_ROUND | |
623 | #endif | |
624 | #define ENC_ROUND FOUR_TABLES | |
625 | #if defined( LAST_ENC_ROUND ) | |
626 | #undef LAST_ENC_ROUND | |
627 | #endif | |
628 | #define LAST_ENC_ROUND FOUR_TABLES | |
629 | #if defined( DEC_ROUND ) | |
630 | #undef DEC_ROUND | |
631 | #endif | |
632 | #define DEC_ROUND FOUR_TABLES | |
633 | #if defined( LAST_DEC_ROUND ) | |
634 | #undef LAST_DEC_ROUND | |
635 | #endif | |
636 | #define LAST_DEC_ROUND FOUR_TABLES | |
637 | #if defined( KEY_SCHED ) | |
638 | #undef KEY_SCHED | |
639 | #define KEY_SCHED FOUR_TABLES | |
640 | #endif | |
641 | #endif | |
642 | ||
643 | #if defined(ENCRYPTION) || defined(AES_ASM) | |
644 | #if ENC_ROUND == ONE_TABLE | |
645 | #define FT1_SET | |
646 | #elif ENC_ROUND == FOUR_TABLES | |
647 | #define FT4_SET | |
648 | #else | |
649 | #define SBX_SET | |
650 | #endif | |
651 | #if LAST_ENC_ROUND == ONE_TABLE | |
652 | #define FL1_SET | |
653 | #elif LAST_ENC_ROUND == FOUR_TABLES | |
654 | #define FL4_SET | |
655 | #elif !defined(SBX_SET) | |
656 | #define SBX_SET | |
657 | #endif | |
658 | #endif | |
659 | ||
660 | #if defined(DECRYPTION) || defined(AES_ASM) | |
661 | #if DEC_ROUND == ONE_TABLE | |
662 | #define IT1_SET | |
663 | #elif DEC_ROUND == FOUR_TABLES | |
664 | #define IT4_SET | |
665 | #else | |
666 | #define ISB_SET | |
667 | #endif | |
668 | #if LAST_DEC_ROUND == ONE_TABLE | |
669 | #define IL1_SET | |
670 | #elif LAST_DEC_ROUND == FOUR_TABLES | |
671 | #define IL4_SET | |
672 | #elif !defined(ISB_SET) | |
673 | #define ISB_SET | |
674 | #endif | |
675 | #endif | |
676 | ||
677 | #if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE) | |
678 | #if KEY_SCHED == ONE_TABLE | |
679 | #define LS1_SET | |
680 | #define IM1_SET | |
681 | #elif KEY_SCHED == FOUR_TABLES | |
682 | #define LS4_SET | |
683 | #define IM4_SET | |
684 | #elif !defined(SBX_SET) | |
685 | #define SBX_SET | |
686 | #endif | |
687 | #endif | |
688 | ||
689 | /* generic definitions of Rijndael macros that use tables */ | |
690 | ||
691 | #define no_table(x,box,vf,rf,c) bytes2word( \ | |
692 | box[bval(vf(x,0,c),rf(0,c))], \ | |
693 | box[bval(vf(x,1,c),rf(1,c))], \ | |
694 | box[bval(vf(x,2,c),rf(2,c))], \ | |
695 | box[bval(vf(x,3,c),rf(3,c))]) | |
696 | ||
697 | #define one_table(x,op,tab,vf,rf,c) \ | |
698 | ( tab[bval(vf(x,0,c),rf(0,c))] \ | |
699 | ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ | |
700 | ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ | |
701 | ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) | |
702 | ||
703 | #define four_tables(x,tab,vf,rf,c) \ | |
704 | ( tab[0][bval(vf(x,0,c),rf(0,c))] \ | |
705 | ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ | |
706 | ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ | |
707 | ^ tab[3][bval(vf(x,3,c),rf(3,c))]) | |
708 | ||
709 | #define four_cached_tables(x,tab,vf,rf,c) \ | |
710 | ( tab##0[bval(vf(x,0,c),rf(0,c))] \ | |
711 | ^ tab##1[bval(vf(x,1,c),rf(1,c))] \ | |
712 | ^ tab##2[bval(vf(x,2,c),rf(2,c))] \ | |
713 | ^ tab##3[bval(vf(x,3,c),rf(3,c))]) | |
714 | ||
715 | #define vf1(x,r,c) (x) | |
716 | #define rf1(r,c) (r) | |
717 | #define rf2(r,c) ((8+r-c)&3) | |
718 | ||
719 | /* perform forward and inverse column mix operation on four bytes in long word x in */ | |
720 | /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ | |
721 | ||
722 | #if defined(FM4_SET) /* not currently used */ | |
723 | #define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) | |
724 | #elif defined(FM1_SET) /* not currently used */ | |
725 | #define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) | |
726 | #else | |
727 | #define dec_fmvars aes_32t g2 | |
728 | #define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) | |
729 | #endif | |
730 | ||
731 | #if defined(IM4_SET) | |
732 | #define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) | |
733 | #elif defined(IM1_SET) | |
734 | #define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) | |
735 | #else | |
736 | #define dec_imvars aes_32t g2, g4, g9 | |
737 | #define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ | |
738 | (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) | |
739 | #endif | |
740 | ||
741 | #if defined(FL4_SET) | |
742 | #define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) | |
743 | #elif defined(LS4_SET) | |
744 | #define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) | |
745 | #elif defined(FL1_SET) | |
746 | #define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) | |
747 | #elif defined(LS1_SET) | |
748 | #define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) | |
749 | #else | |
750 | #define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) | |
751 | #endif | |
752 | ||
753 | #endif |