bsd/crypto/aes/ppc/aesopt.h

   1 /*
   2  ---------------------------------------------------------------------------
   3  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
   4
   5  LICENSE TERMS
   6
   7  The free distribution and use of this software in both source and binary
   8  form is allowed (with or without changes) provided that:
   9
  10    1. distributions of this source code include the above copyright
  11       notice, this list of conditions and the following disclaimer;
  12
  13    2. distributions in binary form include the above copyright
  14       notice, this list of conditions and the following disclaimer
  15       in the documentation and/or other associated materials;
  16
  17    3. the copyright holder's name is not used to endorse products
  18       built using this software without specific written permission.
  19
  20  ALTERNATIVELY, provided that this notice is retained in full, this product
  21  may be distributed under the terms of the GNU General Public License (GPL),
  22  in which case the provisions of the GPL apply INSTEAD OF those given above.
  23
  24  DISCLAIMER
  25
  26  This software is provided 'as is' with no explicit or implied warranties
  27  in respect of its properties, including, but not limited to, correctness
  28  and/or fitness for purpose.
  29  ---------------------------------------------------------------------------
  30  Issue 28/01/2004
  31
  32  My thanks go to Dag Arne Osvik for devising the schemes used here for key
  33  length derivation from the form of the key schedule
  34
  35  This file contains the compilation options for AES (Rijndael) and code
  36  that is common across encryption, key scheduling and table generation.
  37
  38  OPERATION
  39
  40  These source code files implement the AES algorithm Rijndael designed by
  41  Joan Daemen and Vincent Rijmen. This version is designed for the standard
  42  block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
  43  and 32 bytes).
  44
  45  This version is designed for flexibility and speed using operations on
  46  32-bit words rather than operations on bytes.  It can be compiled with
  47  either big or little endian internal byte order but is faster when the
  48  native byte order for the processor is used.
  49
  50  THE CIPHER INTERFACE
  51
  52  The cipher interface is implemented as an array of bytes in which lower
  53  AES bit sequence indexes map to higher numeric significance within bytes.
  54
  55   aes_08t                 (an unsigned  8-bit type)
  56   aes_32t                 (an unsigned 32-bit type)
  57   struct aes_encrypt_ctx  (structure for the cipher encryption context)
  58   struct aes_decrypt_ctx  (structure for the cipher decryption context)
  59   aes_rval                the function return type
  60
  61   C subroutine calls:
  62
  63   aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
  64   aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
  65   aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
  66   aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,
  67                                                   const aes_encrypt_ctx cx[1]);
  68
  69   aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
  70   aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
  71   aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
  72   aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,
  73                                                   const aes_decrypt_ctx cx[1]);
  74
  75  IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
  76  you call genTabs() before AES is used so that the tables are initialised.
  77
  78  C++ aes class subroutines:
  79
  80      Class AESencrypt  for encryption
  81
  82       Construtors:
  83           AESencrypt(void)
  84           AESencrypt(const unsigned char *key) - 128 bit key
  85       Members:
  86           aes_rval key128(const unsigned char *key)
  87           aes_rval key192(const unsigned char *key)
  88           aes_rval key256(const unsigned char *key)
  89           aes_rval encrypt(const unsigned char *in, unsigned char *out) const
  90
  91       Class AESdecrypt  for encryption
  92       Construtors:
  93           AESdecrypt(void)
  94           AESdecrypt(const unsigned char *key) - 128 bit key
  95       Members:
  96           aes_rval key128(const unsigned char *key)
  97           aes_rval key192(const unsigned char *key)
  98           aes_rval key256(const unsigned char *key)
  99           aes_rval decrypt(const unsigned char *in, unsigned char *out) const
 100
 101     COMPILATION
 102
 103     The files used to provide AES (Rijndael) are
 104
 105     a. aes.h for the definitions needed for use in C.
 106     b. aescpp.h for the definitions needed for use in C++.
 107     c. aesopt.h for setting compilation options (also includes common code).
 108     d. aescrypt.c for encryption and decrytpion, or
 109     e. aeskey.c for key scheduling.
 110     f. aestab.c for table loading or generation.
 111     g. aescrypt.asm for encryption and decryption using assembler code.
 112     h. aescrypt.mmx.asm for encryption and decryption using MMX assembler.
 113
 114     To compile AES (Rijndael) for use in C code use aes.h and set the
 115     defines here for the facilities you need (key lengths, encryption
 116     and/or decryption). Do not define AES_DLL or AES_CPP.  Set the options
 117     for optimisations and table sizes here.
 118
 119     To compile AES (Rijndael) for use in in C++ code use aescpp.h but do
 120     not define AES_DLL
 121
 122     To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use
 123     aes.h and include the AES_DLL define.
 124
 125     CONFIGURATION OPTIONS (here and in aes.h)
 126
 127     a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL
 128     b. You may need to set PLATFORM_BYTE_ORDER to define the byte order.
 129     c. If you want the code to run in a specific internal byte order, then
 130        ALGORITHM_BYTE_ORDER must be set accordingly.
 131     d. set other configuration options decribed below.
 132 */
 133
 134 #if !defined( _AESOPT_H )
 135 #define _AESOPT_H
 136
 137 #include <crypto/aes/aes.h>
 138
 139 /*  CONFIGURATION - USE OF DEFINES
 140
 141     Later in this section there are a number of defines that control the
 142     operation of the code.  In each section, the purpose of each define is
 143     explained so that the relevant form can be included or excluded by
 144     setting either 1's or 0's respectively on the branches of the related
 145     #if clauses.
 146
 147     PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS
 148
 149     To obtain the highest speed on processors with 32-bit words, this code
 150     needs to determine the byte order of the target machine. The following
 151     block of code is an attempt to capture the most obvious ways in which
 152     various environemnts define byte order. It may well fail, in which case
 153     the definitions will need to be set by editing at the points marked
 154     **** EDIT HERE IF NECESSARY **** below.  My thanks go to Peter Gutmann
 155     for his assistance with this endian detection nightmare.
 156 */
 157
 158 #define BRG_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
 159 #define BRG_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
 160
 161 #if defined(__GNUC__) || defined(__GNU_LIBRARY__)
 162 #  if defined(__FreeBSD__) || defined(__OpenBSD__)
 163 #    include <sys/endian.h>
 164 #  elif defined( BSD ) && BSD >= 199103
 165 #      include <machine/endian.h>
 166 #  elif defined(__APPLE__)
 167 #    if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN )
 168 #      define BIG_ENDIAN
 169 #    elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN )
 170 #      define LITTLE_ENDIAN
 171 #    endif
 172 #  else
 173 #    include <endian.h>
 174 #    if defined(__BEOS__)
 175 #      include <byteswap.h>
 176 #    endif
 177 #  endif
 178 #endif
 179
 180 #if !defined(PLATFORM_BYTE_ORDER)
 181 #  if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)
 182 #    if    defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
 183 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 184 #    elif !defined(LITTLE_ENDIAN) &&  defined(BIG_ENDIAN)
 185 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 186 #    elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
 187 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 188 #    elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
 189 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 190 #    endif
 191 #  elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)
 192 #    if    defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
 193 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 194 #    elif !defined(_LITTLE_ENDIAN) &&  defined(_BIG_ENDIAN)
 195 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 196 #    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN)
 197 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 198 #    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN)
 199 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 200 #   endif
 201 #  elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)
 202 #    if    defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
 203 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 204 #    elif !defined(__LITTLE_ENDIAN__) &&  defined(__BIG_ENDIAN__)
 205 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 206 #    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__)
 207 #      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 208 #    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__)
 209 #      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 210 #    endif
 211 #  endif
 212 #endif
 213
 214 /*  if the platform is still unknown, try to find its byte order    */
 215 /*  from commonly used machine defines                              */
 216
 217 #if !defined(PLATFORM_BYTE_ORDER)
 218
 219 #if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
 220       defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
 221       defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
 222       defined( vax )       || defined( vms )     || defined( VMS )        || \
 223       defined( __VMS )
 224 #  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 225
 226 #elif defined( AMIGA )    || defined( applec )  || defined( __AS400__ )  || \
 227       defined( _CRAY )    || defined( __hppa )  || defined( __hp9000 )   || \
 228       defined( ibm370 )   || defined( mc68000 ) || defined( m68k )       || \
 229       defined( __MRC__ )  || defined( __MVS__ ) || defined( __MWERKS__ ) || \
 230       defined( sparc )    || defined( __sparc)  || defined( SYMANTEC_C ) || \
 231       defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ )
 232 #  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 233
 234 #elif 0     /* **** EDIT HERE IF NECESSARY **** */
 235 #  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
 236 #elif 0     /* **** EDIT HERE IF NECESSARY **** */
 237 #  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
 238 #else
 239 #  error Please edit aesopt.h (line 234 or 236) to set the platform byte order
 240 #endif
 241
 242 #endif
 243
 244 /*  SOME LOCAL DEFINITIONS  */
 245
 246 #define NO_TABLES              0
 247 #define ONE_TABLE              1
 248 #define FOUR_TABLES            4
 249 #define NONE                   0
 250 #define PARTIAL                1
 251 #define FULL                   2
 252
 253 #if defined(bswap32)
 254 #define aes_sw32    bswap32
 255 #elif defined(bswap_32)
 256 #define aes_sw32    bswap_32
 257 #else
 258 #define brot(x,n)   (((aes_32t)(x) <<  n) | ((aes_32t)(x) >> (32 - n)))
 259 #define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
 260 #endif
 261
 262 /*  1. FUNCTIONS REQUIRED
 263
 264     This implementation provides subroutines for encryption, decryption
 265     and for setting the three key lengths (separately) for encryption
 266     and decryption. When the assembler code is not being used the following
 267     definition blocks allow the selection of the routines that are to be
 268     included in the compilation.
 269 */
 270 #if defined( AES_ENCRYPT )
 271 #define ENCRYPTION
 272 #define ENCRYPTION_KEY_SCHEDULE
 273 #endif
 274
 275 #if defined( AES_DECRYPT )
 276 #define DECRYPTION
 277 #define DECRYPTION_KEY_SCHEDULE
 278 #endif
 279
 280 /*  2. ASSEMBLER SUPPORT
 281
 282     This define (which can be on the command line) enables the use of the
 283     assembler code routines for encryption and decryption with the C code
 284     only providing key scheduling
 285 */
 286 #if 0 && !defined(AES_ASM)
 287 #define AES_ASM
 288 #endif
 289
 290 /*  3. BYTE ORDER WITHIN 32 BIT WORDS
 291
 292     The fundamental data processing units in Rijndael are 8-bit bytes. The
 293     input, output and key input are all enumerated arrays of bytes in which
 294     bytes are numbered starting at zero and increasing to one less than the
 295     number of bytes in the array in question. This enumeration is only used
 296     for naming bytes and does not imply any adjacency or order relationship
 297     from one byte to another. When these inputs and outputs are considered
 298     as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
 299     byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
 300     In this implementation bits are numbered from 0 to 7 starting at the
 301     numerically least significant end of each byte (bit n represents 2^n).
 302
 303     However, Rijndael can be implemented more efficiently using 32-bit
 304     words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
 305     into word[n]. While in principle these bytes can be assembled into words
 306     in any positions, this implementation only supports the two formats in
 307     which bytes in adjacent positions within words also have adjacent byte
 308     numbers. This order is called big-endian if the lowest numbered bytes
 309     in words have the highest numeric significance and little-endian if the
 310     opposite applies.
 311
 312     This code can work in either order irrespective of the order used by the
 313     machine on which it runs. Normally the internal byte order will be set
 314     to the order of the processor on which the code is to be run but this
 315     define can be used to reverse this in special situations
 316
 317     NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set
 318 */
 319 #if 1 || defined(AES_ASM)
 320 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 321 #elif 0
 322 #define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN
 323 #elif 0
 324 #define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN
 325 #else
 326 #error The algorithm byte order is not defined
 327 #endif
 328
 329 /*  4. FAST INPUT/OUTPUT OPERATIONS.
 330
 331     On some machines it is possible to improve speed by transferring the
 332     bytes in the input and output arrays to and from the internal 32-bit
 333     variables by addressing these arrays as if they are arrays of 32-bit
 334     words.  On some machines this will always be possible but there may
 335     be a large performance penalty if the byte arrays are not aligned on
 336     the normal word boundaries. On other machines this technique will
 337     lead to memory access errors when such 32-bit word accesses are not
 338     properly aligned. The option SAFE_IO avoids such problems but will
 339     often be slower on those machines that support misaligned access
 340     (especially so if care is taken to align the input  and output byte
 341     arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
 342     assumed that access to byte arrays as if they are arrays of 32-bit
 343     words will not cause problems when such accesses are misaligned.
 344 */
 345 #if 0 && !defined(_MSC_VER)
 346 #define SAFE_IO
 347 #endif
 348
 349 /*  5. LOOP UNROLLING
 350
 351     The code for encryption and decrytpion cycles through a number of rounds
 352     that can be implemented either in a loop or by expanding the code into a
 353     long sequence of instructions, the latter producing a larger program but
 354     one that will often be much faster. The latter is called loop unrolling.
 355     There are also potential speed advantages in expanding two iterations in
 356     a loop with half the number of iterations, which is called partial loop
 357     unrolling.  The following options allow partial or full loop unrolling
 358     to be set independently for encryption and decryption
 359 */
 360 #if 1
 361 #define ENC_UNROLL  FULL
 362 #elif 0
 363 #define ENC_UNROLL  PARTIAL
 364 #else
 365 #define ENC_UNROLL  NONE
 366 #endif
 367
 368 #if 1
 369 #define DEC_UNROLL  FULL
 370 #elif 0
 371 #define DEC_UNROLL  PARTIAL
 372 #else
 373 #define DEC_UNROLL  NONE
 374 #endif
 375
 376 /*  6. FAST FINITE FIELD OPERATIONS
 377
 378     If this section is included, tables are used to provide faster finite
 379     field arithmetic (this has no effect if FIXED_TABLES is defined).
 380 */
 381 #if 1
 382 #define FF_TABLES
 383 #endif
 384
 385 /*  7. INTERNAL STATE VARIABLE FORMAT
 386
 387     The internal state of Rijndael is stored in a number of local 32-bit
 388     word varaibles which can be defined either as an array or as individual
 389     names variables. Include this section if you want to store these local
 390     varaibles in arrays. Otherwise individual local variables will be used.
 391 */
 392 #if 0
 393 #define ARRAYS
 394 #endif
 395
 396 /* In this implementation the columns of the state array are each held in
 397    32-bit words. The state array can be held in various ways: in an array
 398    of words, in a number of individual word variables or in a number of
 399    processor registers. The following define maps a variable name x and
 400    a column number c to the way the state array variable is to be held.
 401    The first define below maps the state into an array x[c] whereas the
 402    second form maps the state into a number of individual variables x0,
 403    x1, etc.  Another form could map individual state colums to machine
 404    register names.
 405 */
 406
 407 #if defined(ARRAYS)
 408 #define s(x,c) x[c]
 409 #else
 410 #define s(x,c) x##c
 411 #endif
 412
 413 /*  8. FIXED OR DYNAMIC TABLES
 414
 415     When this section is included the tables used by the code are compiled
 416     statically into the binary file.  Otherwise the subroutine gen_tabs()
 417     must be called to compute them before the code is first used.
 418 */
 419 #if 1
 420 #define FIXED_TABLES
 421 #endif
 422
 423 /*  9. TABLE ALIGNMENT
 424
 425     On some sytsems speed will be improved by aligning the AES large lookup
 426     tables on particular boundaries. This define should be set to a power of
 427     two giving the desired alignment. It can be left undefined if alignment
 428     is not needed.  This option is specific to the Microsft VC++ compiler -
 429     it seems to sometimes cause trouble for the VC++ version 6 compiler.
 430 */
 431
 432 #if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300)
 433 #define TABLE_ALIGN 64
 434 #endif
 435
 436 /*  10. INTERNAL TABLE CONFIGURATION
 437
 438     This cipher proceeds by repeating in a number of cycles known as 'rounds'
 439     which are implemented by a round function which can optionally be speeded
 440     up using tables.  The basic tables are each 256 32-bit words, with either
 441     one or four tables being required for each round function depending on
 442     how much speed is required. The encryption and decryption round functions
 443     are different and the last encryption and decrytpion round functions are
 444     different again making four different round functions in all.
 445
 446     This means that:
 447       1. Normal encryption and decryption rounds can each use either 0, 1
 448          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 449       2. The last encryption and decryption rounds can also use either 0, 1
 450          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 451
 452     Include or exclude the appropriate definitions below to set the number
 453     of tables used by this implementation.
 454 */
 455
 456 #if 1   /* set tables for the normal encryption round */
 457 #define ENC_ROUND   FOUR_TABLES
 458 #elif 0
 459 #define ENC_ROUND   ONE_TABLE
 460 #else
 461 #define ENC_ROUND   NO_TABLES
 462 #endif
 463
 464 #if 1   /* set tables for the last encryption round */
 465 #define LAST_ENC_ROUND  FOUR_TABLES
 466 #elif 0
 467 #define LAST_ENC_ROUND  ONE_TABLE
 468 #else
 469 #define LAST_ENC_ROUND  NO_TABLES
 470 #endif
 471
 472 #if 1   /* set tables for the normal decryption round */
 473 #define DEC_ROUND   FOUR_TABLES
 474 #elif 0
 475 #define DEC_ROUND   ONE_TABLE
 476 #else
 477 #define DEC_ROUND   NO_TABLES
 478 #endif
 479
 480 #if 1   /* set tables for the last decryption round */
 481 #define LAST_DEC_ROUND  FOUR_TABLES
 482 #elif 0
 483 #define LAST_DEC_ROUND  ONE_TABLE
 484 #else
 485 #define LAST_DEC_ROUND  NO_TABLES
 486 #endif
 487
 488 /*  The decryption key schedule can be speeded up with tables in the same
 489     way that the round functions can.  Include or exclude the following
 490     defines to set this requirement.
 491 */
 492 #if 1
 493 #define KEY_SCHED   FOUR_TABLES
 494 #elif 0
 495 #define KEY_SCHED   ONE_TABLE
 496 #else
 497 #define KEY_SCHED   NO_TABLES
 498 #endif
 499
 500 /*  11. TABLE POINTER CACHING
 501
 502     Normally tables are referenced directly, Enable this option if you wish to
 503     cache pointers to the tables in the encrypt/decrypt code.  Note that this
 504         only works if you are using FOUR_TABLES for the ROUND you enable this for.
 505 */
 506 #if 1
 507 #define ENC_ROUND_CACHE_TABLES
 508 #endif
 509 #if 1
 510 #define LAST_ENC_ROUND_CACHE_TABLES
 511 #endif
 512 #if 1
 513 #define DEC_ROUND_CACHE_TABLES
 514 #endif
 515 #if 1
 516 #define LAST_DEC_ROUND_CACHE_TABLES
 517 #endif
 518
 519
 520 /* END OF CONFIGURATION OPTIONS */
 521
 522 #define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
 523
 524 /* Disable or report errors on some combinations of options */
 525
 526 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
 527 #undef  LAST_ENC_ROUND
 528 #define LAST_ENC_ROUND  NO_TABLES
 529 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
 530 #undef  LAST_ENC_ROUND
 531 #define LAST_ENC_ROUND  ONE_TABLE
 532 #endif
 533
 534 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
 535 #undef  ENC_UNROLL
 536 #define ENC_UNROLL  NONE
 537 #endif
 538
 539 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
 540 #undef  LAST_DEC_ROUND
 541 #define LAST_DEC_ROUND  NO_TABLES
 542 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
 543 #undef  LAST_DEC_ROUND
 544 #define LAST_DEC_ROUND  ONE_TABLE
 545 #endif
 546
 547 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
 548 #undef  DEC_UNROLL
 549 #define DEC_UNROLL  NONE
 550 #endif
 551
 552 /*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
 553                higher index positions with wrap around into low positions
 554     ups(x,n):  moves bytes by n positions to higher index positions in
 555                words but without wrap around
 556     bval(x,n): extracts a byte from a word
 557
 558     NOTE:      The definitions given here are intended only for use with
 559                unsigned variables and with shift counts that are compile
 560                time constants
 561 */
 562
 563 #if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN)
 564 #define upr(x,n)        (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n))))
 565 #define ups(x,n)        ((aes_32t) (x) << (8 * (n)))
 566 #define bval(x,n)       ((aes_08t)((x) >> (8 * (n))))
 567 #define bytes2word(b0, b1, b2, b3)  \
 568         (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
 569 #endif
 570
 571 #if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN)
 572 #define upr(x,n)        (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n))))
 573 #define ups(x,n)        ((aes_32t) (x) >> (8 * (n))))
 574 #define bval(x,n)       ((aes_08t)((x) >> (24 - 8 * (n))))
 575 #define bytes2word(b0, b1, b2, b3)  \
 576         (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
 577 #endif
 578
 579 #if defined(SAFE_IO)
 580
 581 #define word_in(x,c)    bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \
 582                                    ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3])
 583 #define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \
 584                           ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); }
 585
 586 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
 587
 588 #define word_in(x,c)    (*((aes_32t*)(x)+(c)))
 589 #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v))
 590
 591 #else
 592
 593 #define word_in(x,c)    aes_sw32(*((aes_32t*)(x)+(c)))
 594 #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v))
 595
 596 #endif
 597
 598 /* the finite field modular polynomial and elements */
 599
 600 #define WPOLY   0x011b
 601 #define BPOLY     0x1b
 602
 603 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
 604
 605 #define m1  0x80808080
 606 #define m2  0x7f7f7f7f
 607 #define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
 608
 609 /* The following defines provide alternative definitions of gf_mulx that might
 610    give improved performance if a fast 32-bit multiply is not available. Note
 611    that a temporary variable u needs to be defined where gf_mulx is used.
 612
 613 #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
 614 #define m4  (0x01010101 * BPOLY)
 615 #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
 616 */
 617
 618 /* Work out which tables are needed for the different options   */
 619
 620 #if defined( AES_ASM )
 621 #if defined( ENC_ROUND )
 622 #undef  ENC_ROUND
 623 #endif
 624 #define ENC_ROUND   FOUR_TABLES
 625 #if defined( LAST_ENC_ROUND )
 626 #undef  LAST_ENC_ROUND
 627 #endif
 628 #define LAST_ENC_ROUND  FOUR_TABLES
 629 #if defined( DEC_ROUND )
 630 #undef  DEC_ROUND
 631 #endif
 632 #define DEC_ROUND   FOUR_TABLES
 633 #if defined( LAST_DEC_ROUND )
 634 #undef  LAST_DEC_ROUND
 635 #endif
 636 #define LAST_DEC_ROUND  FOUR_TABLES
 637 #if defined( KEY_SCHED )
 638 #undef  KEY_SCHED
 639 #define KEY_SCHED   FOUR_TABLES
 640 #endif
 641 #endif
 642
 643 #if defined(ENCRYPTION) || defined(AES_ASM)
 644 #if ENC_ROUND == ONE_TABLE
 645 #define FT1_SET
 646 #elif ENC_ROUND == FOUR_TABLES
 647 #define FT4_SET
 648 #else
 649 #define SBX_SET
 650 #endif
 651 #if LAST_ENC_ROUND == ONE_TABLE
 652 #define FL1_SET
 653 #elif LAST_ENC_ROUND == FOUR_TABLES
 654 #define FL4_SET
 655 #elif !defined(SBX_SET)
 656 #define SBX_SET
 657 #endif
 658 #endif
 659
 660 #if defined(DECRYPTION) || defined(AES_ASM)
 661 #if DEC_ROUND == ONE_TABLE
 662 #define IT1_SET
 663 #elif DEC_ROUND == FOUR_TABLES
 664 #define IT4_SET
 665 #else
 666 #define ISB_SET
 667 #endif
 668 #if LAST_DEC_ROUND == ONE_TABLE
 669 #define IL1_SET
 670 #elif LAST_DEC_ROUND == FOUR_TABLES
 671 #define IL4_SET
 672 #elif !defined(ISB_SET)
 673 #define ISB_SET
 674 #endif
 675 #endif
 676
 677 #if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
 678 #if KEY_SCHED == ONE_TABLE
 679 #define LS1_SET
 680 #define IM1_SET
 681 #elif KEY_SCHED == FOUR_TABLES
 682 #define LS4_SET
 683 #define IM4_SET
 684 #elif !defined(SBX_SET)
 685 #define SBX_SET
 686 #endif
 687 #endif
 688
 689 /* generic definitions of Rijndael macros that use tables    */
 690
 691 #define no_table(x,box,vf,rf,c) bytes2word( \
 692     box[bval(vf(x,0,c),rf(0,c))], \
 693     box[bval(vf(x,1,c),rf(1,c))], \
 694     box[bval(vf(x,2,c),rf(2,c))], \
 695     box[bval(vf(x,3,c),rf(3,c))])
 696
 697 #define one_table(x,op,tab,vf,rf,c) \
 698  (     tab[bval(vf(x,0,c),rf(0,c))] \
 699   ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
 700   ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
 701   ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
 702
 703 #define four_tables(x,tab,vf,rf,c) \
 704  (  tab[0][bval(vf(x,0,c),rf(0,c))] \
 705   ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
 706   ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
 707   ^ tab[3][bval(vf(x,3,c),rf(3,c))])
 708
 709 #define four_cached_tables(x,tab,vf,rf,c) \
 710 (  tab##0[bval(vf(x,0,c),rf(0,c))] \
 711    ^ tab##1[bval(vf(x,1,c),rf(1,c))] \
 712    ^ tab##2[bval(vf(x,2,c),rf(2,c))] \
 713    ^ tab##3[bval(vf(x,3,c),rf(3,c))])
 714
 715 #define vf1(x,r,c)  (x)
 716 #define rf1(r,c)    (r)
 717 #define rf2(r,c)    ((8+r-c)&3)
 718
 719 /* perform forward and inverse column mix operation on four bytes in long word x in */
 720 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
 721
 722 #if defined(FM4_SET)    /* not currently used */
 723 #define fwd_mcol(x)     four_tables(x,t_use(f,m),vf1,rf1,0)
 724 #elif defined(FM1_SET)  /* not currently used */
 725 #define fwd_mcol(x)     one_table(x,upr,t_use(f,m),vf1,rf1,0)
 726 #else
 727 #define dec_fmvars      aes_32t g2
 728 #define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
 729 #endif
 730
 731 #if defined(IM4_SET)
 732 #define inv_mcol(x)     four_tables(x,t_use(i,m),vf1,rf1,0)
 733 #elif defined(IM1_SET)
 734 #define inv_mcol(x)     one_table(x,upr,t_use(i,m),vf1,rf1,0)
 735 #else
 736 #define dec_imvars      aes_32t g2, g4, g9
 737 #define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
 738                         (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
 739 #endif
 740
 741 #if defined(FL4_SET)
 742 #define ls_box(x,c)     four_tables(x,t_use(f,l),vf1,rf2,c)
 743 #elif   defined(LS4_SET)
 744 #define ls_box(x,c)     four_tables(x,t_use(l,s),vf1,rf2,c)
 745 #elif defined(FL1_SET)
 746 #define ls_box(x,c)     one_table(x,upr,t_use(f,l),vf1,rf2,c)
 747 #elif defined(LS1_SET)
 748 #define ls_box(x,c)     one_table(x,upr,t_use(l,s),vf1,rf2,c)
 749 #else
 750 #define ls_box(x,c)     no_table(x,t_use(s,box),vf1,rf2,c)
 751 #endif
 752
 753 #endif