]>
git.saurik.com Git - apple/xnu.git/blob - bsd/crypto/aes/i386/aesopt.h
2 ---------------------------------------------------------------------------
3 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
7 The free distribution and use of this software in both source and binary
8 form is allowed (with or without changes) provided that:
10 1. distributions of this source code include the above copyright
11 notice, this list of conditions and the following disclaimer;
13 2. distributions in binary form include the above copyright
14 notice, this list of conditions and the following disclaimer
15 in the documentation and/or other associated materials;
17 3. the copyright holder's name is not used to endorse products
18 built using this software without specific written permission.
20 ALTERNATIVELY, provided that this notice is retained in full, this product
21 may be distributed under the terms of the GNU General Public License (GPL),
22 in which case the provisions of the GPL apply INSTEAD OF those given above.
26 This software is provided 'as is' with no explicit or implied warranties
27 in respect of its properties, including, but not limited to, correctness
28 and/or fitness for purpose.
29 ---------------------------------------------------------------------------
32 This file contains the compilation options for AES (Rijndael) and code
33 that is common across encryption, key scheduling and table generation.
37 These source code files implement the AES algorithm Rijndael designed by
38 Joan Daemen and Vincent Rijmen. This version is designed for the standard
39 block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
42 This version is designed for flexibility and speed using operations on
43 32-bit words rather than operations on bytes. It can be compiled with
44 either big or little endian internal byte order but is faster when the
45 native byte order for the processor is used.
49 The cipher interface is implemented as an array of bytes in which lower
50 AES bit sequence indexes map to higher numeric significance within bytes.
52 uint_8t (an unsigned 8-bit type)
53 uint_32t (an unsigned 32-bit type)
54 struct aes_encrypt_ctx (structure for the cipher encryption context)
55 struct aes_decrypt_ctx (structure for the cipher decryption context)
56 aes_rval the function return type
60 aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
61 aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
62 aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
63 aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,
64 const aes_encrypt_ctx cx[1]);
66 aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
67 aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
68 aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
69 aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,
70 const aes_decrypt_ctx cx[1]);
72 IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
73 you call gen_tabs() before AES is used so that the tables are initialised.
75 C++ aes class subroutines:
77 Class AESencrypt for encryption
81 AESencrypt(const unsigned char *key) - 128 bit key
83 aes_rval key128(const unsigned char *key)
84 aes_rval key192(const unsigned char *key)
85 aes_rval key256(const unsigned char *key)
86 aes_rval encrypt(const unsigned char *in, unsigned char *out) const
88 Class AESdecrypt for encryption
91 AESdecrypt(const unsigned char *key) - 128 bit key
93 aes_rval key128(const unsigned char *key)
94 aes_rval key192(const unsigned char *key)
95 aes_rval key256(const unsigned char *key)
96 aes_rval decrypt(const unsigned char *in, unsigned char *out) const
99 #if !defined( _AESOPT_H )
102 #if defined( __cplusplus )
105 #include "crypto/aes.h"
108 /* PLATFORM SPECIFIC INCLUDES */
112 /* CONFIGURATION - THE USE OF DEFINES
114 Later in this section there are a number of defines that control the
115 operation of the code. In each section, the purpose of each define is
116 explained so that the relevant form can be included or excluded by
117 setting either 1's or 0's respectively on the branches of the related
118 #if clauses. The following local defines should not be changed.
121 #define ENCRYPTION_IN_C 1
122 #define DECRYPTION_IN_C 2
123 #define ENC_KEYING_IN_C 4
124 #define DEC_KEYING_IN_C 8
128 #define FOUR_TABLES 4
133 /* --- START OF USER CONFIGURED OPTIONS --- */
135 /* 1. BYTE ORDER WITHIN 32 BIT WORDS
137 The fundamental data processing units in Rijndael are 8-bit bytes. The
138 input, output and key input are all enumerated arrays of bytes in which
139 bytes are numbered starting at zero and increasing to one less than the
140 number of bytes in the array in question. This enumeration is only used
141 for naming bytes and does not imply any adjacency or order relationship
142 from one byte to another. When these inputs and outputs are considered
143 as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
144 byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
145 In this implementation bits are numbered from 0 to 7 starting at the
146 numerically least significant end of each byte (bit n represents 2^n).
148 However, Rijndael can be implemented more efficiently using 32-bit
149 words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
150 into word[n]. While in principle these bytes can be assembled into words
151 in any positions, this implementation only supports the two formats in
152 which bytes in adjacent positions within words also have adjacent byte
153 numbers. This order is called big-endian if the lowest numbered bytes
154 in words have the highest numeric significance and little-endian if the
157 This code can work in either order irrespective of the order used by the
158 machine on which it runs. Normally the internal byte order will be set
159 to the order of the processor on which the code is to be run but this
160 define can be used to reverse this in special situations
162 WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
163 This define will hence be redefined later (in section 4) if necessary
167 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
169 #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
171 #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
173 #error The algorithm byte order is not defined
176 /* 2. VIA ACE SUPPORT
178 Define this option if support for the VIA ACE is required. This uses
179 inline assembler instructions and is only implemented for the Microsoft,
180 Intel and GCC compilers. If VIA ACE is known to be present, then defining
181 ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
182 code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
183 it is detected (both present and enabled) but the normal AES code will
186 When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
187 aligned; other input/output buffers do not need to be 16 byte aligned
188 but there are very large performance gains if this can be arranged.
189 VIA ACE also requires the decryption key schedule to be in reverse
190 order (which the following defines ensure).
193 #if 0 && !defined( _WIN64 ) && !defined( USE_VIA_ACE_IF_PRESENT )
194 #define USE_VIA_ACE_IF_PRESENT
197 #if 0 && !defined( _WIN64 ) && !defined( ASSUME_VIA_ACE_PRESENT )
198 #define ASSUME_VIA_ACE_PRESENT
201 /* 3. ASSEMBLER SUPPORT
203 This define (which can be on the command line) enables the use of the
204 assembler code routines for encryption, decryption and key scheduling
207 ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
208 encryption and decryption and but with key scheduling in C
209 ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
210 encryption, decryption and key scheduling
211 ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
212 encryption and decryption and but with key scheduling in C
213 ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
214 encryption and decryption and but with key scheduling in C
216 Change one 'if 0' below to 'if 1' to select the version or define
217 as a compilation option.
220 #if defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )
221 # if defined( _M_IX86 )
222 # if 0 && !defined( ASM_X86_V1C )
224 # elif 0 && !defined( ASM_X86_V2 )
226 # elif 0 && !defined( ASM_X86_V2C )
230 # error Assembler code is only available for x86 and AMD64 systems
232 #elif defined( ASM_AMD64_C )
233 # if defined( _M_X64 )
234 # if 0 && !defined( ASM_AMD64_C )
238 # error Assembler code is only available for x86 and AMD64 systems
242 /* 4. FAST INPUT/OUTPUT OPERATIONS.
244 On some machines it is possible to improve speed by transferring the
245 bytes in the input and output arrays to and from the internal 32-bit
246 variables by addressing these arrays as if they are arrays of 32-bit
247 words. On some machines this will always be possible but there may
248 be a large performance penalty if the byte arrays are not aligned on
249 the normal word boundaries. On other machines this technique will
250 lead to memory access errors when such 32-bit word accesses are not
251 properly aligned. The option SAFE_IO avoids such problems but will
252 often be slower on those machines that support misaligned access
253 (especially so if care is taken to align the input and output byte
254 arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
255 assumed that access to byte arrays as if they are arrays of 32-bit
256 words will not cause problems when such accesses are misaligned.
258 #if 1 && !defined( _MSC_VER )
264 The code for encryption and decrytpion cycles through a number of rounds
265 that can be implemented either in a loop or by expanding the code into a
266 long sequence of instructions, the latter producing a larger program but
267 one that will often be much faster. The latter is called loop unrolling.
268 There are also potential speed advantages in expanding two iterations in
269 a loop with half the number of iterations, which is called partial loop
270 unrolling. The following options allow partial or full loop unrolling
271 to be set independently for encryption and decryption
274 #define ENC_UNROLL FULL
276 #define ENC_UNROLL PARTIAL
278 #define ENC_UNROLL NONE
282 #define DEC_UNROLL FULL
284 #define DEC_UNROLL PARTIAL
286 #define DEC_UNROLL NONE
289 /* 6. FAST FINITE FIELD OPERATIONS
291 If this section is included, tables are used to provide faster finite
292 field arithmetic (this has no effect if FIXED_TABLES is defined).
298 /* 7. INTERNAL STATE VARIABLE FORMAT
300 The internal state of Rijndael is stored in a number of local 32-bit
301 word varaibles which can be defined either as an array or as individual
302 names variables. Include this section if you want to store these local
303 varaibles in arrays. Otherwise individual local variables will be used.
309 /* 8. FIXED OR DYNAMIC TABLES
311 When this section is included the tables used by the code are compiled
312 statically into the binary file. Otherwise the subroutine gen_tabs()
313 must be called to compute them before the code is first used.
315 #if 0 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
319 /* 9. TABLE ALIGNMENT
321 On some sytsems speed will be improved by aligning the AES large lookup
322 tables on particular boundaries. This define should be set to a power of
323 two giving the desired alignment. It can be left undefined if alignment
324 is not needed. This option is specific to the Microsft VC++ compiler -
325 it seems to sometimes cause trouble for the VC++ version 6 compiler.
328 #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
329 #define TABLE_ALIGN 32
334 This cipher proceeds by repeating in a number of cycles known as 'rounds'
335 which are implemented by a round function which can optionally be speeded
336 up using tables. The basic tables are each 256 32-bit words, with either
337 one or four tables being required for each round function depending on
338 how much speed is required. The encryption and decryption round functions
339 are different and the last encryption and decrytpion round functions are
340 different again making four different round functions in all.
343 1. Normal encryption and decryption rounds can each use either 0, 1
344 or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
345 2. The last encryption and decryption rounds can also use either 0, 1
346 or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
348 Include or exclude the appropriate definitions below to set the number
349 of tables used by this implementation.
352 #if 1 /* set tables for the normal encryption round */
353 #define ENC_ROUND FOUR_TABLES
355 #define ENC_ROUND ONE_TABLE
357 #define ENC_ROUND NO_TABLES
360 #if 1 /* set tables for the last encryption round */
361 #define LAST_ENC_ROUND FOUR_TABLES
363 #define LAST_ENC_ROUND ONE_TABLE
365 #define LAST_ENC_ROUND NO_TABLES
368 #if 1 /* set tables for the normal decryption round */
369 #define DEC_ROUND FOUR_TABLES
371 #define DEC_ROUND ONE_TABLE
373 #define DEC_ROUND NO_TABLES
376 #if 1 /* set tables for the last decryption round */
377 #define LAST_DEC_ROUND FOUR_TABLES
379 #define LAST_DEC_ROUND ONE_TABLE
381 #define LAST_DEC_ROUND NO_TABLES
384 /* The decryption key schedule can be speeded up with tables in the same
385 way that the round functions can. Include or exclude the following
386 defines to set this requirement.
389 #define KEY_SCHED FOUR_TABLES
391 #define KEY_SCHED ONE_TABLE
393 #define KEY_SCHED NO_TABLES
396 /* ---- END OF USER CONFIGURED OPTIONS ---- */
398 /* VIA ACE support is only available for VC++ and GCC */
400 #if !defined( _MSC_VER ) && !defined( __GNUC__ )
401 # if defined( ASSUME_VIA_ACE_PRESENT )
402 # undef ASSUME_VIA_ACE_PRESENT
404 # if defined( USE_VIA_ACE_IF_PRESENT )
405 # undef USE_VIA_ACE_IF_PRESENT
409 #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
410 #define USE_VIA_ACE_IF_PRESENT
413 #if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
417 /* Assembler support requires the use of platform byte order */
419 #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
420 #undef ALGORITHM_BYTE_ORDER
421 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
424 /* In this implementation the columns of the state array are each held in
425 32-bit words. The state array can be held in various ways: in an array
426 of words, in a number of individual word variables or in a number of
427 processor registers. The following define maps a variable name x and
428 a column number c to the way the state array variable is to be held.
429 The first define below maps the state into an array x[c] whereas the
430 second form maps the state into a number of individual variables x0,
431 x1, etc. Another form could map individual state colums to machine
435 #if defined( ARRAYS )
441 /* This implementation provides subroutines for encryption, decryption
442 and for setting the three key lengths (separately) for encryption
443 and decryption. Since not all functions are needed, masks are set
444 up here to determine which will be implemented in C
447 #if !defined( AES_ENCRYPT )
448 # define EFUNCS_IN_C 0
449 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C )
450 || defined( ASM_X86_V2C
) || defined( ASM_AMD64_C
)
451 # define EFUNCS_IN_C ENC_KEYING_IN_C
452 #elif !defined( ASM_X86_V2 )
453 # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
455 # define EFUNCS_IN_C 0
458 #if !defined( AES_DECRYPT )
459 # define DFUNCS_IN_C 0
460 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C )
461 || defined( ASM_X86_V2C
) || defined( ASM_AMD64_C
)
462 # define DFUNCS_IN_C DEC_KEYING_IN_C
463 #elif !defined( ASM_X86_V2 )
464 # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
466 # define DFUNCS_IN_C 0
469 #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C )
471 /* END OF CONFIGURATION OPTIONS */
473 #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
475 /* Disable or report errors on some combinations of options */
477 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
478 #undef LAST_ENC_ROUND
479 #define LAST_ENC_ROUND NO_TABLES
480 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
481 #undef LAST_ENC_ROUND
482 #define LAST_ENC_ROUND ONE_TABLE
485 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
487 #define ENC_UNROLL NONE
490 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
491 #undef LAST_DEC_ROUND
492 #define LAST_DEC_ROUND NO_TABLES
493 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
494 #undef LAST_DEC_ROUND
495 #define LAST_DEC_ROUND ONE_TABLE
498 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
500 #define DEC_UNROLL NONE
503 #if defined( bswap32 )
504 #define aes_sw32 bswap32
505 #elif defined( bswap_32 )
506 #define aes_sw32 bswap_32
508 #define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n)))
509 #define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
512 /* upr(x,n): rotates bytes within words by n positions, moving bytes to
513 higher index positions with wrap around into low positions
514 ups(x,n): moves bytes by n positions to higher index positions in
515 words but without wrap around
516 bval(x,n): extracts a byte from a word
518 WARNING: The definitions given here are intended only for use with
519 unsigned variables and with shift counts that are compile
523 #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
524 #define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n))))
525 #define ups(x,n) ((uint_32t) (x) << (8 * (n)))
526 #define bval(x,n) ((uint_8t)((x) >> (8 * (n))))
527 #define bytes2word(b0, b1, b2, b3) \
528 (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0))
531 #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
532 #define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n))))
533 #define ups(x,n) ((uint_32t) (x) >> (8 * (n)))
534 #define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n))))
535 #define bytes2word(b0, b1, b2, b3) \
536 (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3))
539 #if defined( SAFE_IO )
541 #define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \
542 ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3])
543 #define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \
544 ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); }
546 #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
548 #define word_in(x,c) (*((uint_32t*)(x)+(c)))
549 #define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v))
553 #define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c)))
554 #define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v))
558 /* the finite field modular polynomial and elements */
563 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
565 #define m1 0x80808080
566 #define m2 0x7f7f7f7f
567 #define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
569 /* The following defines provide alternative definitions of gf_mulx that might
570 give improved performance if a fast 32-bit multiply is not available. Note
571 that a temporary variable u needs to be defined where gf_mulx is used.
573 #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
574 #define m4 (0x01010101 * BPOLY)
575 #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
578 /* Work out which tables are needed for the different options */
580 #if defined( ASM_X86_V1C )
581 #if defined( ENC_ROUND )
584 #define ENC_ROUND FOUR_TABLES
585 #if defined( LAST_ENC_ROUND )
586 #undef LAST_ENC_ROUND
588 #define LAST_ENC_ROUND FOUR_TABLES
589 #if defined( DEC_ROUND )
592 #define DEC_ROUND FOUR_TABLES
593 #if defined( LAST_DEC_ROUND )
594 #undef LAST_DEC_ROUND
596 #define LAST_DEC_ROUND FOUR_TABLES
597 #if defined( KEY_SCHED )
599 #define KEY_SCHED FOUR_TABLES
603 #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
604 #if ENC_ROUND == ONE_TABLE
606 #elif ENC_ROUND == FOUR_TABLES
611 #if LAST_ENC_ROUND == ONE_TABLE
613 #elif LAST_ENC_ROUND == FOUR_TABLES
615 #elif !defined( SBX_SET )
620 #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
621 #if DEC_ROUND == ONE_TABLE
623 #elif DEC_ROUND == FOUR_TABLES
628 #if LAST_DEC_ROUND == ONE_TABLE
630 #elif LAST_DEC_ROUND == FOUR_TABLES
632 #elif !defined(ISB_SET)
637 #if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)
638 #if KEY_SCHED == ONE_TABLE
640 #elif KEY_SCHED == FOUR_TABLES
642 #elif !defined( SBX_SET )
647 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
648 #if KEY_SCHED == ONE_TABLE
650 #elif KEY_SCHED == FOUR_TABLES
652 #elif !defined( SBX_SET )
657 /* generic definitions of Rijndael macros that use tables */
659 #define no_table(x,box,vf,rf,c) bytes2word( \
660 box[bval(vf(x,0,c),rf(0,c))], \
661 box[bval(vf(x,1,c),rf(1,c))], \
662 box[bval(vf(x,2,c),rf(2,c))], \
663 box[bval(vf(x,3,c),rf(3,c))])
665 #define one_table(x,op,tab,vf,rf,c) \
666 ( tab[bval(vf(x,0,c),rf(0,c))] \
667 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
668 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
669 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
671 #define four_tables(x,tab,vf,rf,c) \
672 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
673 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
674 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
675 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
677 #define vf1(x,r,c) (x)
679 #define rf2(r,c) ((8+r-c)&3)
681 /* perform forward and inverse column mix operation on four bytes in long word x in */
682 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
684 #if defined( FM4_SET ) /* not currently used */
685 #define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)
686 #elif defined( FM1_SET ) /* not currently used */
687 #define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)
689 #define dec_fmvars uint_32t g2
690 #define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
693 #if defined( IM4_SET )
694 #define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)
695 #elif defined( IM1_SET )
696 #define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)
698 #define dec_imvars uint_32t g2, g4, g9
699 #define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
700 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
703 #if defined( FL4_SET )
704 #define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)
705 #elif defined( LS4_SET )
706 #define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)
707 #elif defined( FL1_SET )
708 #define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)
709 #elif defined( LS1_SET )
710 #define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)
712 #define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)
715 #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )