]>
Commit | Line | Data |
---|---|---|
91447636 A |
1 | /*\r |
2 | ---------------------------------------------------------------------------\r | |
3 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.\r | |
4 | \r | |
5 | LICENSE TERMS\r | |
6 | \r | |
7 | The free distribution and use of this software in both source and binary\r | |
8 | form is allowed (with or without changes) provided that:\r | |
9 | \r | |
10 | 1. distributions of this source code include the above copyright\r | |
11 | notice, this list of conditions and the following disclaimer;\r | |
12 | \r | |
13 | 2. distributions in binary form include the above copyright\r | |
14 | notice, this list of conditions and the following disclaimer\r | |
15 | in the documentation and/or other associated materials;\r | |
16 | \r | |
17 | 3. the copyright holder's name is not used to endorse products\r | |
18 | built using this software without specific written permission.\r | |
19 | \r | |
20 | ALTERNATIVELY, provided that this notice is retained in full, this product\r | |
21 | may be distributed under the terms of the GNU General Public License (GPL),\r | |
22 | in which case the provisions of the GPL apply INSTEAD OF those given above.\r | |
23 | \r | |
24 | DISCLAIMER\r | |
25 | \r | |
26 | This software is provided 'as is' with no explicit or implied warranties\r | |
27 | in respect of its properties, including, but not limited to, correctness\r | |
28 | and/or fitness for purpose.\r | |
29 | ---------------------------------------------------------------------------\r | |
30 | Issue 28/01/2004\r | |
31 | \r | |
32 | My thanks go to Dag Arne Osvik for devising the schemes used here for key\r | |
33 | length derivation from the form of the key schedule\r | |
34 | \r | |
35 | This file contains the compilation options for AES (Rijndael) and code\r | |
36 | that is common across encryption, key scheduling and table generation.\r | |
37 | \r | |
38 | OPERATION\r | |
39 | \r | |
40 | These source code files implement the AES algorithm Rijndael designed by\r | |
41 | Joan Daemen and Vincent Rijmen. This version is designed for the standard\r | |
42 | block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24\r | |
43 | and 32 bytes).\r | |
44 | \r | |
45 | This version is designed for flexibility and speed using operations on\r | |
46 | 32-bit words rather than operations on bytes. It can be compiled with\r | |
47 | either big or little endian internal byte order but is faster when the\r | |
48 | native byte order for the processor is used.\r | |
49 | \r | |
50 | THE CIPHER INTERFACE\r | |
51 | \r | |
52 | The cipher interface is implemented as an array of bytes in which lower\r | |
53 | AES bit sequence indexes map to higher numeric significance within bytes.\r | |
54 | \r | |
55 | aes_08t (an unsigned 8-bit type)\r | |
56 | aes_32t (an unsigned 32-bit type)\r | |
57 | struct aes_encrypt_ctx (structure for the cipher encryption context)\r | |
58 | struct aes_decrypt_ctx (structure for the cipher decryption context)\r | |
59 | aes_rval the function return type\r | |
60 | \r | |
61 | C subroutine calls:\r | |
62 | \r | |
63 | aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);\r | |
64 | aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);\r | |
65 | aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);\r | |
66 | aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,\r | |
67 | const aes_encrypt_ctx cx[1]);\r | |
68 | \r | |
69 | aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);\r | |
70 | aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);\r | |
71 | aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);\r | |
72 | aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,\r | |
73 | const aes_decrypt_ctx cx[1]);\r | |
74 | \r | |
75 | IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that\r | |
76 | you call genTabs() before AES is used so that the tables are initialised.\r | |
77 | \r | |
78 | C++ aes class subroutines:\r | |
79 | \r | |
80 | Class AESencrypt for encryption\r | |
81 | \r | |
82 | Construtors:\r | |
83 | AESencrypt(void)\r | |
84 | AESencrypt(const unsigned char *key) - 128 bit key\r | |
85 | Members:\r | |
86 | aes_rval key128(const unsigned char *key)\r | |
87 | aes_rval key192(const unsigned char *key)\r | |
88 | aes_rval key256(const unsigned char *key)\r | |
89 | aes_rval encrypt(const unsigned char *in, unsigned char *out) const\r | |
90 | \r | |
91 | Class AESdecrypt for encryption\r | |
92 | Construtors:\r | |
93 | AESdecrypt(void)\r | |
94 | AESdecrypt(const unsigned char *key) - 128 bit key\r | |
95 | Members:\r | |
96 | aes_rval key128(const unsigned char *key)\r | |
97 | aes_rval key192(const unsigned char *key)\r | |
98 | aes_rval key256(const unsigned char *key)\r | |
99 | aes_rval decrypt(const unsigned char *in, unsigned char *out) const\r | |
100 | \r | |
101 | COMPILATION\r | |
102 | \r | |
103 | The files used to provide AES (Rijndael) are\r | |
104 | \r | |
105 | a. aes.h for the definitions needed for use in C.\r | |
106 | b. aescpp.h for the definitions needed for use in C++.\r | |
107 | c. aesopt.h for setting compilation options (also includes common code).\r | |
108 | d. aescrypt.c for encryption and decrytpion, or\r | |
109 | e. aeskey.c for key scheduling.\r | |
110 | f. aestab.c for table loading or generation.\r | |
111 | g. aescrypt.asm for encryption and decryption using assembler code.\r | |
112 | h. aescrypt.mmx.asm for encryption and decryption using MMX assembler.\r | |
113 | \r | |
114 | To compile AES (Rijndael) for use in C code use aes.h and set the\r | |
115 | defines here for the facilities you need (key lengths, encryption\r | |
116 | and/or decryption). Do not define AES_DLL or AES_CPP. Set the options\r | |
117 | for optimisations and table sizes here.\r | |
118 | \r | |
119 | To compile AES (Rijndael) for use in in C++ code use aescpp.h but do\r | |
120 | not define AES_DLL\r | |
121 | \r | |
122 | To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use\r | |
123 | aes.h and include the AES_DLL define.\r | |
124 | \r | |
125 | CONFIGURATION OPTIONS (here and in aes.h)\r | |
126 | \r | |
127 | a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL\r | |
128 | b. You may need to set PLATFORM_BYTE_ORDER to define the byte order.\r | |
129 | c. If you want the code to run in a specific internal byte order, then\r | |
130 | ALGORITHM_BYTE_ORDER must be set accordingly.\r | |
131 | d. set other configuration options decribed below.\r | |
132 | */\r | |
133 | \r | |
134 | #if !defined( _AESOPT_H )\r | |
135 | #define _AESOPT_H\r | |
136 | \r | |
137 | #include "aes.h"\r | |
138 | \r | |
139 | /* CONFIGURATION - USE OF DEFINES\r | |
140 | \r | |
141 | Later in this section there are a number of defines that control the\r | |
142 | operation of the code. In each section, the purpose of each define is\r | |
143 | explained so that the relevant form can be included or excluded by\r | |
144 | setting either 1's or 0's respectively on the branches of the related\r | |
145 | #if clauses.\r | |
146 | \r | |
147 | PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS\r | |
148 | \r | |
149 | To obtain the highest speed on processors with 32-bit words, this code\r | |
150 | needs to determine the byte order of the target machine. The following\r | |
151 | block of code is an attempt to capture the most obvious ways in which\r | |
152 | various environemnts define byte order. It may well fail, in which case\r | |
153 | the definitions will need to be set by editing at the points marked\r | |
154 | **** EDIT HERE IF NECESSARY **** below. My thanks go to Peter Gutmann\r | |
155 | for his assistance with this endian detection nightmare.\r | |
156 | */\r | |
157 | \r | |
158 | #define BRG_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */\r | |
159 | #define BRG_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */\r | |
160 | \r | |
161 | #if defined(__GNUC__) || defined(__GNU_LIBRARY__)\r | |
162 | # if defined(__FreeBSD__) || defined(__OpenBSD__)\r | |
163 | # include <sys/endian.h>\r | |
164 | # elif defined( BSD ) && BSD >= 199103\r | |
165 | # include <machine/endian.h>\r | |
166 | # elif defined(__APPLE__)\r | |
167 | # if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN )\r | |
168 | # define BIG_ENDIAN\r | |
169 | # elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN )\r | |
170 | # define LITTLE_ENDIAN\r | |
171 | # endif\r | |
172 | # else\r | |
173 | # include <endian.h>\r | |
174 | # if defined(__BEOS__)\r | |
175 | # include <byteswap.h>\r | |
176 | # endif\r | |
177 | # endif\r | |
178 | #endif\r | |
179 | \r | |
180 | #if !defined(PLATFORM_BYTE_ORDER)\r | |
181 | # if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)\r | |
182 | # if defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)\r | |
183 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
184 | # elif !defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)\r | |
185 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
186 | # elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)\r | |
187 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
188 | # elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)\r | |
189 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
190 | # endif\r | |
191 | # elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)\r | |
192 | # if defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)\r | |
193 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
194 | # elif !defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)\r | |
195 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
196 | # elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN)\r | |
197 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
198 | # elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN)\r | |
199 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
200 | # endif\r | |
201 | # elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)\r | |
202 | # if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)\r | |
203 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
204 | # elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__)\r | |
205 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
206 | # elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__)\r | |
207 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
208 | # elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__)\r | |
209 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
210 | # endif\r | |
211 | # endif\r | |
212 | #endif\r | |
213 | \r | |
214 | /* if the platform is still unknown, try to find its byte order */\r | |
215 | /* from commonly used machine defines */\r | |
216 | \r | |
217 | #if !defined(PLATFORM_BYTE_ORDER)\r | |
218 | \r | |
219 | #if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \\r | |
220 | defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \\r | |
221 | defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \\r | |
222 | defined( vax ) || defined( vms ) || defined( VMS ) || \\r | |
223 | defined( __VMS )\r | |
224 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
225 | \r | |
226 | #elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \\r | |
227 | defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \\r | |
228 | defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \\r | |
229 | defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \\r | |
230 | defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \\r | |
231 | defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ )\r | |
232 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
233 | \r | |
234 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */\r | |
235 | # define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
236 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */\r | |
237 | # define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
238 | #else\r | |
239 | # error Please edit aesopt.h (line 234 or 236) to set the platform byte order\r | |
240 | #endif\r | |
241 | \r | |
242 | #endif\r | |
243 | \r | |
244 | /* SOME LOCAL DEFINITIONS */\r | |
245 | \r | |
246 | #define NO_TABLES 0\r | |
247 | #define ONE_TABLE 1\r | |
248 | #define FOUR_TABLES 4\r | |
249 | #define NONE 0\r | |
250 | #define PARTIAL 1\r | |
251 | #define FULL 2\r | |
252 | \r | |
253 | #if defined(bswap32)\r | |
254 | #define aes_sw32 bswap32\r | |
255 | #elif defined(bswap_32)\r | |
256 | #define aes_sw32 bswap_32\r | |
257 | #else\r | |
258 | #define brot(x,n) (((aes_32t)(x) << n) | ((aes_32t)(x) >> (32 - n)))\r | |
259 | #define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))\r | |
260 | #endif\r | |
261 | \r | |
262 | /* 1. FUNCTIONS REQUIRED\r | |
263 | \r | |
264 | This implementation provides subroutines for encryption, decryption\r | |
265 | and for setting the three key lengths (separately) for encryption\r | |
266 | and decryption. When the assembler code is not being used the following\r | |
267 | definition blocks allow the selection of the routines that are to be\r | |
268 | included in the compilation.\r | |
269 | */\r | |
270 | #if defined( AES_ENCRYPT )\r | |
271 | #define ENCRYPTION\r | |
272 | #define ENCRYPTION_KEY_SCHEDULE\r | |
273 | #endif\r | |
274 | \r | |
275 | #if defined( AES_DECRYPT )\r | |
276 | #define DECRYPTION\r | |
277 | #define DECRYPTION_KEY_SCHEDULE\r | |
278 | #endif\r | |
279 | \r | |
280 | /* 2. ASSEMBLER SUPPORT\r | |
281 | \r | |
282 | This define (which can be on the command line) enables the use of the\r | |
283 | assembler code routines for encryption and decryption with the C code\r | |
284 | only providing key scheduling\r | |
285 | */\r | |
286 | #if 0 && !defined(AES_ASM)\r | |
287 | #define AES_ASM\r | |
288 | #endif\r | |
289 | \r | |
290 | /* 3. BYTE ORDER WITHIN 32 BIT WORDS\r | |
291 | \r | |
292 | The fundamental data processing units in Rijndael are 8-bit bytes. The\r | |
293 | input, output and key input are all enumerated arrays of bytes in which\r | |
294 | bytes are numbered starting at zero and increasing to one less than the\r | |
295 | number of bytes in the array in question. This enumeration is only used\r | |
296 | for naming bytes and does not imply any adjacency or order relationship\r | |
297 | from one byte to another. When these inputs and outputs are considered\r | |
298 | as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to\r | |
299 | byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.\r | |
300 | In this implementation bits are numbered from 0 to 7 starting at the\r | |
301 | numerically least significant end of each byte (bit n represents 2^n).\r | |
302 | \r | |
303 | However, Rijndael can be implemented more efficiently using 32-bit\r | |
304 | words by packing bytes into words so that bytes 4*n to 4*n+3 are placed\r | |
305 | into word[n]. While in principle these bytes can be assembled into words\r | |
306 | in any positions, this implementation only supports the two formats in\r | |
307 | which bytes in adjacent positions within words also have adjacent byte\r | |
308 | numbers. This order is called big-endian if the lowest numbered bytes\r | |
309 | in words have the highest numeric significance and little-endian if the\r | |
310 | opposite applies.\r | |
311 | \r | |
312 | This code can work in either order irrespective of the order used by the\r | |
313 | machine on which it runs. Normally the internal byte order will be set\r | |
314 | to the order of the processor on which the code is to be run but this\r | |
315 | define can be used to reverse this in special situations\r | |
316 | \r | |
317 | NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set\r | |
318 | */\r | |
319 | #if 1 || defined(AES_ASM)\r | |
320 | #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER\r | |
321 | #elif 0\r | |
322 | #define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN\r | |
323 | #elif 0\r | |
324 | #define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN\r | |
325 | #else\r | |
326 | #error The algorithm byte order is not defined\r | |
327 | #endif\r | |
328 | \r | |
329 | /* 4. FAST INPUT/OUTPUT OPERATIONS.\r | |
330 | \r | |
331 | On some machines it is possible to improve speed by transferring the\r | |
332 | bytes in the input and output arrays to and from the internal 32-bit\r | |
333 | variables by addressing these arrays as if they are arrays of 32-bit\r | |
334 | words. On some machines this will always be possible but there may\r | |
335 | be a large performance penalty if the byte arrays are not aligned on\r | |
336 | the normal word boundaries. On other machines this technique will\r | |
337 | lead to memory access errors when such 32-bit word accesses are not\r | |
338 | properly aligned. The option SAFE_IO avoids such problems but will\r | |
339 | often be slower on those machines that support misaligned access\r | |
340 | (especially so if care is taken to align the input and output byte\r | |
341 | arrays on 32-bit word boundaries). If SAFE_IO is not defined it is\r | |
342 | assumed that access to byte arrays as if they are arrays of 32-bit\r | |
343 | words will not cause problems when such accesses are misaligned.\r | |
344 | */\r | |
345 | #if 0 && !defined(_MSC_VER)\r | |
346 | #define SAFE_IO\r | |
347 | #endif\r | |
348 | \r | |
349 | /* 5. LOOP UNROLLING\r | |
350 | \r | |
351 | The code for encryption and decrytpion cycles through a number of rounds\r | |
352 | that can be implemented either in a loop or by expanding the code into a\r | |
353 | long sequence of instructions, the latter producing a larger program but\r | |
354 | one that will often be much faster. The latter is called loop unrolling.\r | |
355 | There are also potential speed advantages in expanding two iterations in\r | |
356 | a loop with half the number of iterations, which is called partial loop\r | |
357 | unrolling. The following options allow partial or full loop unrolling\r | |
358 | to be set independently for encryption and decryption\r | |
359 | */\r | |
360 | #if 1\r | |
361 | #define ENC_UNROLL FULL\r | |
362 | #elif 0\r | |
363 | #define ENC_UNROLL PARTIAL\r | |
364 | #else\r | |
365 | #define ENC_UNROLL NONE\r | |
366 | #endif\r | |
367 | \r | |
368 | #if 1\r | |
369 | #define DEC_UNROLL FULL\r | |
370 | #elif 0\r | |
371 | #define DEC_UNROLL PARTIAL\r | |
372 | #else\r | |
373 | #define DEC_UNROLL NONE\r | |
374 | #endif\r | |
375 | \r | |
376 | /* 6. FAST FINITE FIELD OPERATIONS\r | |
377 | \r | |
378 | If this section is included, tables are used to provide faster finite\r | |
379 | field arithmetic (this has no effect if FIXED_TABLES is defined).\r | |
380 | */\r | |
381 | #if 1\r | |
382 | #define FF_TABLES\r | |
383 | #endif\r | |
384 | \r | |
385 | /* 7. INTERNAL STATE VARIABLE FORMAT\r | |
386 | \r | |
387 | The internal state of Rijndael is stored in a number of local 32-bit\r | |
388 | word varaibles which can be defined either as an array or as individual\r | |
389 | names variables. Include this section if you want to store these local\r | |
390 | varaibles in arrays. Otherwise individual local variables will be used.\r | |
391 | */\r | |
392 | #if 0\r | |
393 | #define ARRAYS\r | |
394 | #endif\r | |
395 | \r | |
396 | /* In this implementation the columns of the state array are each held in\r | |
397 | 32-bit words. The state array can be held in various ways: in an array\r | |
398 | of words, in a number of individual word variables or in a number of\r | |
399 | processor registers. The following define maps a variable name x and\r | |
400 | a column number c to the way the state array variable is to be held.\r | |
401 | The first define below maps the state into an array x[c] whereas the\r | |
402 | second form maps the state into a number of individual variables x0,\r | |
403 | x1, etc. Another form could map individual state colums to machine\r | |
404 | register names.\r | |
405 | */\r | |
406 | \r | |
407 | #if defined(ARRAYS)\r | |
408 | #define s(x,c) x[c]\r | |
409 | #else\r | |
410 | #define s(x,c) x##c\r | |
411 | #endif\r | |
412 | \r | |
413 | /* 8. FIXED OR DYNAMIC TABLES\r | |
414 | \r | |
415 | When this section is included the tables used by the code are compiled\r | |
416 | statically into the binary file. Otherwise the subroutine gen_tabs()\r | |
417 | must be called to compute them before the code is first used.\r | |
418 | */\r | |
419 | #if 1\r | |
420 | #define FIXED_TABLES\r | |
421 | #endif\r | |
422 | \r | |
423 | /* 9. TABLE ALIGNMENT\r | |
424 | \r | |
425 | On some sytsems speed will be improved by aligning the AES large lookup\r | |
426 | tables on particular boundaries. This define should be set to a power of\r | |
427 | two giving the desired alignment. It can be left undefined if alignment\r | |
428 | is not needed. This option is specific to the Microsft VC++ compiler -\r | |
429 | it seems to sometimes cause trouble for the VC++ version 6 compiler.\r | |
430 | */\r | |
431 | \r | |
432 | #if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300)\r | |
433 | #define TABLE_ALIGN 64\r | |
434 | #endif\r | |
435 | \r | |
436 | /* 10. INTERNAL TABLE CONFIGURATION\r | |
437 | \r | |
438 | This cipher proceeds by repeating in a number of cycles known as 'rounds'\r | |
439 | which are implemented by a round function which can optionally be speeded\r | |
440 | up using tables. The basic tables are each 256 32-bit words, with either\r | |
441 | one or four tables being required for each round function depending on\r | |
442 | how much speed is required. The encryption and decryption round functions\r | |
443 | are different and the last encryption and decrytpion round functions are\r | |
444 | different again making four different round functions in all.\r | |
445 | \r | |
446 | This means that:\r | |
447 | 1. Normal encryption and decryption rounds can each use either 0, 1\r | |
448 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each.\r | |
449 | 2. The last encryption and decryption rounds can also use either 0, 1\r | |
450 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each.\r | |
451 | \r | |
452 | Include or exclude the appropriate definitions below to set the number\r | |
453 | of tables used by this implementation.\r | |
454 | */\r | |
455 | \r | |
456 | #if 1 /* set tables for the normal encryption round */\r | |
457 | #define ENC_ROUND FOUR_TABLES\r | |
458 | #elif 0\r | |
459 | #define ENC_ROUND ONE_TABLE\r | |
460 | #else\r | |
461 | #define ENC_ROUND NO_TABLES\r | |
462 | #endif\r | |
463 | \r | |
464 | #if 1 /* set tables for the last encryption round */\r | |
465 | #define LAST_ENC_ROUND FOUR_TABLES\r | |
466 | #elif 0\r | |
467 | #define LAST_ENC_ROUND ONE_TABLE\r | |
468 | #else\r | |
469 | #define LAST_ENC_ROUND NO_TABLES\r | |
470 | #endif\r | |
471 | \r | |
472 | #if 1 /* set tables for the normal decryption round */\r | |
473 | #define DEC_ROUND FOUR_TABLES\r | |
474 | #elif 0\r | |
475 | #define DEC_ROUND ONE_TABLE\r | |
476 | #else\r | |
477 | #define DEC_ROUND NO_TABLES\r | |
478 | #endif\r | |
479 | \r | |
480 | #if 1 /* set tables for the last decryption round */\r | |
481 | #define LAST_DEC_ROUND FOUR_TABLES\r | |
482 | #elif 0\r | |
483 | #define LAST_DEC_ROUND ONE_TABLE\r | |
484 | #else\r | |
485 | #define LAST_DEC_ROUND NO_TABLES\r | |
486 | #endif\r | |
487 | \r | |
488 | /* The decryption key schedule can be speeded up with tables in the same\r | |
489 | way that the round functions can. Include or exclude the following\r | |
490 | defines to set this requirement.\r | |
491 | */\r | |
492 | #if 1\r | |
493 | #define KEY_SCHED FOUR_TABLES\r | |
494 | #elif 0\r | |
495 | #define KEY_SCHED ONE_TABLE\r | |
496 | #else\r | |
497 | #define KEY_SCHED NO_TABLES\r | |
498 | #endif\r | |
499 | \r | |
500 | /* 11. TABLE POINTER CACHING\r | |
501 | \r | |
502 | Normally tables are referenced directly, Enable this option if you wish to\r | |
503 | cache pointers to the tables in the encrypt/decrypt code. Note that this\r | |
504 | only works if you are using FOUR_TABLES for the ROUND you enable this for.\r | |
505 | */\r | |
506 | #if 1\r | |
507 | #define ENC_ROUND_CACHE_TABLES\r | |
508 | #endif\r | |
509 | #if 1\r | |
510 | #define LAST_ENC_ROUND_CACHE_TABLES\r | |
511 | #endif\r | |
512 | #if 1\r | |
513 | #define DEC_ROUND_CACHE_TABLES\r | |
514 | #endif\r | |
515 | #if 1\r | |
516 | #define LAST_DEC_ROUND_CACHE_TABLES\r | |
517 | #endif\r | |
518 | \r | |
519 | \r | |
520 | /* END OF CONFIGURATION OPTIONS */\r | |
521 | \r | |
522 | #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))\r | |
523 | \r | |
524 | /* Disable or report errors on some combinations of options */\r | |
525 | \r | |
526 | #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES\r | |
527 | #undef LAST_ENC_ROUND\r | |
528 | #define LAST_ENC_ROUND NO_TABLES\r | |
529 | #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES\r | |
530 | #undef LAST_ENC_ROUND\r | |
531 | #define LAST_ENC_ROUND ONE_TABLE\r | |
532 | #endif\r | |
533 | \r | |
534 | #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE\r | |
535 | #undef ENC_UNROLL\r | |
536 | #define ENC_UNROLL NONE\r | |
537 | #endif\r | |
538 | \r | |
539 | #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES\r | |
540 | #undef LAST_DEC_ROUND\r | |
541 | #define LAST_DEC_ROUND NO_TABLES\r | |
542 | #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES\r | |
543 | #undef LAST_DEC_ROUND\r | |
544 | #define LAST_DEC_ROUND ONE_TABLE\r | |
545 | #endif\r | |
546 | \r | |
547 | #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE\r | |
548 | #undef DEC_UNROLL\r | |
549 | #define DEC_UNROLL NONE\r | |
550 | #endif\r | |
551 | \r | |
552 | /* upr(x,n): rotates bytes within words by n positions, moving bytes to\r | |
553 | higher index positions with wrap around into low positions\r | |
554 | ups(x,n): moves bytes by n positions to higher index positions in\r | |
555 | words but without wrap around\r | |
556 | bval(x,n): extracts a byte from a word\r | |
557 | \r | |
558 | NOTE: The definitions given here are intended only for use with\r | |
559 | unsigned variables and with shift counts that are compile\r | |
560 | time constants\r | |
561 | */\r | |
562 | \r | |
563 | #if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN)\r | |
564 | #define upr(x,n) (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n))))\r | |
565 | #define ups(x,n) ((aes_32t) (x) << (8 * (n)))\r | |
566 | #define bval(x,n) ((aes_08t)((x) >> (8 * (n))))\r | |
567 | #define bytes2word(b0, b1, b2, b3) \\r | |
568 | (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))\r | |
569 | #endif\r | |
570 | \r | |
571 | #if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN)\r | |
572 | #define upr(x,n) (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n))))\r | |
573 | #define ups(x,n) ((aes_32t) (x) >> (8 * (n))))\r | |
574 | #define bval(x,n) ((aes_08t)((x) >> (24 - 8 * (n))))\r | |
575 | #define bytes2word(b0, b1, b2, b3) \\r | |
576 | (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))\r | |
577 | #endif\r | |
578 | \r | |
579 | #if defined(SAFE_IO)\r | |
580 | \r | |
581 | #define word_in(x,c) bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \\r | |
582 | ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3])\r | |
583 | #define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \\r | |
584 | ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); }\r | |
585 | \r | |
586 | #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)\r | |
587 | \r | |
588 | #define word_in(x,c) (*((aes_32t*)(x)+(c)))\r | |
589 | #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v))\r | |
590 | \r | |
591 | #else\r | |
592 | \r | |
593 | #define word_in(x,c) aes_sw32(*((aes_32t*)(x)+(c)))\r | |
594 | #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v))\r | |
595 | \r | |
596 | #endif\r | |
597 | \r | |
598 | /* the finite field modular polynomial and elements */\r | |
599 | \r | |
600 | #define WPOLY 0x011b\r | |
601 | #define BPOLY 0x1b\r | |
602 | \r | |
603 | /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */\r | |
604 | \r | |
605 | #define m1 0x80808080\r | |
606 | #define m2 0x7f7f7f7f\r | |
607 | #define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))\r | |
608 | \r | |
609 | /* The following defines provide alternative definitions of gf_mulx that might\r | |
610 | give improved performance if a fast 32-bit multiply is not available. Note\r | |
611 | that a temporary variable u needs to be defined where gf_mulx is used.\r | |
612 | \r | |
613 | #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))\r | |
614 | #define m4 (0x01010101 * BPOLY)\r | |
615 | #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)\r | |
616 | */\r | |
617 | \r | |
618 | /* Work out which tables are needed for the different options */\r | |
619 | \r | |
620 | #if defined( AES_ASM )\r | |
621 | #if defined( ENC_ROUND )\r | |
622 | #undef ENC_ROUND\r | |
623 | #endif\r | |
624 | #define ENC_ROUND FOUR_TABLES\r | |
625 | #if defined( LAST_ENC_ROUND )\r | |
626 | #undef LAST_ENC_ROUND\r | |
627 | #endif\r | |
628 | #define LAST_ENC_ROUND FOUR_TABLES\r | |
629 | #if defined( DEC_ROUND )\r | |
630 | #undef DEC_ROUND\r | |
631 | #endif\r | |
632 | #define DEC_ROUND FOUR_TABLES\r | |
633 | #if defined( LAST_DEC_ROUND )\r | |
634 | #undef LAST_DEC_ROUND\r | |
635 | #endif\r | |
636 | #define LAST_DEC_ROUND FOUR_TABLES\r | |
637 | #if defined( KEY_SCHED )\r | |
638 | #undef KEY_SCHED\r | |
639 | #define KEY_SCHED FOUR_TABLES\r | |
640 | #endif\r | |
641 | #endif\r | |
642 | \r | |
643 | #if defined(ENCRYPTION) || defined(AES_ASM)\r | |
644 | #if ENC_ROUND == ONE_TABLE\r | |
645 | #define FT1_SET\r | |
646 | #elif ENC_ROUND == FOUR_TABLES\r | |
647 | #define FT4_SET\r | |
648 | #else\r | |
649 | #define SBX_SET\r | |
650 | #endif\r | |
651 | #if LAST_ENC_ROUND == ONE_TABLE\r | |
652 | #define FL1_SET\r | |
653 | #elif LAST_ENC_ROUND == FOUR_TABLES\r | |
654 | #define FL4_SET\r | |
655 | #elif !defined(SBX_SET)\r | |
656 | #define SBX_SET\r | |
657 | #endif\r | |
658 | #endif\r | |
659 | \r | |
660 | #if defined(DECRYPTION) || defined(AES_ASM)\r | |
661 | #if DEC_ROUND == ONE_TABLE\r | |
662 | #define IT1_SET\r | |
663 | #elif DEC_ROUND == FOUR_TABLES\r | |
664 | #define IT4_SET\r | |
665 | #else\r | |
666 | #define ISB_SET\r | |
667 | #endif\r | |
668 | #if LAST_DEC_ROUND == ONE_TABLE\r | |
669 | #define IL1_SET\r | |
670 | #elif LAST_DEC_ROUND == FOUR_TABLES\r | |
671 | #define IL4_SET\r | |
672 | #elif !defined(ISB_SET)\r | |
673 | #define ISB_SET\r | |
674 | #endif\r | |
675 | #endif\r | |
676 | \r | |
677 | #if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)\r | |
678 | #if KEY_SCHED == ONE_TABLE\r | |
679 | #define LS1_SET\r | |
680 | #define IM1_SET\r | |
681 | #elif KEY_SCHED == FOUR_TABLES\r | |
682 | #define LS4_SET\r | |
683 | #define IM4_SET\r | |
684 | #elif !defined(SBX_SET)\r | |
685 | #define SBX_SET\r | |
686 | #endif\r | |
687 | #endif\r | |
688 | \r | |
689 | /* generic definitions of Rijndael macros that use tables */\r | |
690 | \r | |
691 | #define no_table(x,box,vf,rf,c) bytes2word( \\r | |
692 | box[bval(vf(x,0,c),rf(0,c))], \\r | |
693 | box[bval(vf(x,1,c),rf(1,c))], \\r | |
694 | box[bval(vf(x,2,c),rf(2,c))], \\r | |
695 | box[bval(vf(x,3,c),rf(3,c))])\r | |
696 | \r | |
697 | #define one_table(x,op,tab,vf,rf,c) \\r | |
698 | ( tab[bval(vf(x,0,c),rf(0,c))] \\r | |
699 | ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \\r | |
700 | ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \\r | |
701 | ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))\r | |
702 | \r | |
703 | #define four_tables(x,tab,vf,rf,c) \\r | |
704 | ( tab[0][bval(vf(x,0,c),rf(0,c))] \\r | |
705 | ^ tab[1][bval(vf(x,1,c),rf(1,c))] \\r | |
706 | ^ tab[2][bval(vf(x,2,c),rf(2,c))] \\r | |
707 | ^ tab[3][bval(vf(x,3,c),rf(3,c))])\r | |
708 | \r | |
709 | #define four_cached_tables(x,tab,vf,rf,c) \\r | |
710 | ( tab##0[bval(vf(x,0,c),rf(0,c))] \\r | |
711 | ^ tab##1[bval(vf(x,1,c),rf(1,c))] \\r | |
712 | ^ tab##2[bval(vf(x,2,c),rf(2,c))] \\r | |
713 | ^ tab##3[bval(vf(x,3,c),rf(3,c))])\r | |
714 | \r | |
715 | #define vf1(x,r,c) (x)\r | |
716 | #define rf1(r,c) (r)\r | |
717 | #define rf2(r,c) ((8+r-c)&3)\r | |
718 | \r | |
719 | /* perform forward and inverse column mix operation on four bytes in long word x in */\r | |
720 | /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */\r | |
721 | \r | |
722 | #if defined(FM4_SET) /* not currently used */\r | |
723 | #define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)\r | |
724 | #elif defined(FM1_SET) /* not currently used */\r | |
725 | #define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)\r | |
726 | #else\r | |
727 | #define dec_fmvars aes_32t g2\r | |
728 | #define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))\r | |
729 | #endif\r | |
730 | \r | |
731 | #if defined(IM4_SET)\r | |
732 | #define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)\r | |
733 | #elif defined(IM1_SET)\r | |
734 | #define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)\r | |
735 | #else\r | |
736 | #define dec_imvars aes_32t g2, g4, g9\r | |
737 | #define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \\r | |
738 | (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))\r | |
739 | #endif\r | |
740 | \r | |
741 | #if defined(FL4_SET)\r | |
742 | #define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)\r | |
743 | #elif defined(LS4_SET)\r | |
744 | #define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)\r | |
745 | #elif defined(FL1_SET)\r | |
746 | #define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)\r | |
747 | #elif defined(LS1_SET)\r | |
748 | #define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)\r | |
749 | #else\r | |
750 | #define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)\r | |
751 | #endif\r | |
752 | \r | |
753 | #endif\r |