X-Git-Url: https://git.saurik.com/apple/security.git/blobdiff_plain/80e2389990082500d76eb566d4946be3e786c3ef..d8f41ccd20de16f8ebe2ccc84d47bf1cb2b26bbb:/Security/libsecurity_apple_csp/lib/rijndael-alg-ref.c?ds=inline diff --git a/Security/libsecurity_apple_csp/lib/rijndael-alg-ref.c b/Security/libsecurity_apple_csp/lib/rijndael-alg-ref.c new file mode 100644 index 00000000..e0038a10 --- /dev/null +++ b/Security/libsecurity_apple_csp/lib/rijndael-alg-ref.c @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2000-2001,2011-2012,2014 Apple Inc. All Rights Reserved. + * + * The contents of this file constitute Original Code as defined in and are + * subject to the Apple Public Source License Version 1.2 (the 'License'). + * You may not use this file except in compliance with the License. Please obtain + * a copy of the License at http://www.apple.com/publicsource and read it before + * using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS + * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT + * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the + * specific language governing rights and limitations under the License. + */ + + +/* rijndael-alg-ref.c v2.0 August '99 + * Reference ANSI C code + * authors: Paulo Barreto + * Vincent Rijmen + * + */ + +#include +#include +#include + +#include "rijndael-alg-ref.h" +#include + +#define SC ((BC - 4) >> 1) + +#include "boxes-ref.h" + +static const word8 shifts[3][4][2] = { + { { 0, 0 }, + { 1, 3 }, + { 2, 2 }, + { 3, 1 } + }, + { { 0, 0 }, + { 1, 5 }, + { 2, 4 }, + { 3, 3 } + }, + { { 0, 0 }, + { 1, 7 }, + { 3, 5 }, + { 4, 4 } + } +}; + +#if !GLADMAN_AES_128_ENABLE + +/* 128 bit key/word shift table in bits */ +static const word8 shifts128[4][2] = { + { 0, 0 }, + { 8, 24 }, + { 16, 16 }, + { 24, 8 } +}; + +#endif /* GLADMAN_AES_128_ENABLE */ + +#if !AES_MUL_BY_LOOKUP +/* + * Profiling measurements showed that the mul routine is where a large propertion of + * the time is spent. Since the first argument to mul is always one of six + * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to + * do the multiplies. This eliminates the need for the log/antilog tables, so + * it's only adding one kilobyte of const data. Throughput improvement for this + * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized + * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt + * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the + * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken + * on a 500 MHz G4 with 1 MB of L2 cache. + */ + +/* + * The mod 255 op in mul is really expensive... + * + * We know that b <= (254 * 2), so there are only two cases. Either return b, + * or return b-255. + * + * On a G4 this single optimization results in a 24% speedup for encrypt and + * a 25% speedup for decrypt. + */ +static inline word8 mod255(word32 b) +{ + if(b >= 255) { + b -= 255; + } + return b; +} + +word8 mul(word8 a, word8 b) { + /* multiply two elements of GF(2^m) + * needed for MixColumn and InvMixColumn + */ + if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])]; + else return 0; +} +#endif /* !AES_MUL_BY_LOOKUP */ + +static +void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) { + /* Exor corresponding text input and round key input bytes + */ + int i, j; + + for(i = 0; i < 4; i++) + for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j]; +} + +static +void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) { + /* Row 0 remains unchanged + * The other three rows are shifted a variable amount + */ + word8 tmp[MAXBC]; + int i, j; + + for(i = 1; i < 4; i++) { + for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC]; + for(j = 0; j < BC; j++) a[i][j] = tmp[j]; + } +} + +static +void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) { + /* Replace every byte of the input by the byte at that place + * in the nonlinear S-box + */ + int i, j; + + for(i = 0; i < 4; i++) + for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ; +} + +static +void MixColumn(word8 a[4][MAXBC], word8 BC) { + /* Mix the four bytes of every column in a linear way + */ + word8 b[4][MAXBC]; + int i, j; + + for(j = 0; j < BC; j++) { + for(i = 0; i < 4; i++) { + #if AES_MUL_BY_LOOKUP + b[i][j] = mulBy0x02[a[i][j]] + ^ mulBy0x03[a[(i + 1) % 4][j]] + ^ a[(i + 2) % 4][j] + ^ a[(i + 3) % 4][j]; + #else + b[i][j] = mul(2,a[i][j]) + ^ mul(3,a[(i + 1) % 4][j]) + ^ a[(i + 2) % 4][j] + ^ a[(i + 3) % 4][j]; + #endif + } + } + for(i = 0; i < 4; i++) { + for(j = 0; j < BC; j++) a[i][j] = b[i][j]; + } +} + +static +void InvMixColumn(word8 a[4][MAXBC], word8 BC) { + /* Mix the four bytes of every column in a linear way + * This is the opposite operation of Mixcolumn + */ + word8 b[4][MAXBC]; + int i, j; + + for(j = 0; j < BC; j++) { + for(i = 0; i < 4; i++) { + #if AES_MUL_BY_LOOKUP + b[i][j] = mulBy0x0e[a[i][j]] + ^ mulBy0x0b[a[(i + 1) % 4][j]] + ^ mulBy0x0d[a[(i + 2) % 4][j]] + ^ mulBy0x09[a[(i + 3) % 4][j]]; + #else + b[i][j] = mul(0xe,a[i][j]) + ^ mul(0xb,a[(i + 1) % 4][j]) + ^ mul(0xd,a[(i + 2) % 4][j]) + ^ mul(0x9,a[(i + 3) % 4][j]); + #endif + } + } + for(i = 0; i < 4; i++) { + for(j = 0; j < BC; j++) a[i][j] = b[i][j]; + } +} + +int rijndaelKeySched ( + word8 k[4][MAXKC], + int keyBits, + int blockBits, + word8 W[MAXROUNDS+1][4][MAXBC]) { + + /* Calculate the necessary round keys + * The number of calculations depends on keyBits and blockBits + */ + int KC, BC, ROUNDS; + int i, j, t, rconpointer = 0; + word8 tk[4][MAXKC]; + + switch (keyBits) { + case 128: KC = 4; break; + case 192: KC = 6; break; + case 256: KC = 8; break; + default : return (-1); + } + + switch (blockBits) { + case 128: BC = 4; break; + case 192: BC = 6; break; + case 256: BC = 8; break; + default : return (-2); + } + + switch (keyBits >= blockBits ? keyBits : blockBits) { + case 128: ROUNDS = 10; break; + case 192: ROUNDS = 12; break; + case 256: ROUNDS = 14; break; + default : return (-3); /* this cannot happen */ + } + + + for(j = 0; j < KC; j++) + for(i = 0; i < 4; i++) + tk[i][j] = k[i][j]; + t = 0; + /* copy values into round key array */ + for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) + for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; + + while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ + /* calculate new values */ + for(i = 0; i < 4; i++) + tk[i][0] ^= S[tk[(i+1)%4][KC-1]]; + tk[0][0] ^= rcon[rconpointer++]; + + if (KC != 8) + for(j = 1; j < KC; j++) + for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; + else { + for(j = 1; j < KC/2; j++) + for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; + for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]]; + for(j = KC/2 + 1; j < KC; j++) + for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; + } + /* copy values into round key array */ + for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) + for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; + } + + return 0; +} + +int rijndaelEncrypt ( + word8 a[4][MAXBC], + int keyBits, + int blockBits, + word8 rk[MAXROUNDS+1][4][MAXBC]) +{ + /* Encryption of one block, general case. + */ + int r, BC, ROUNDS; + + switch (blockBits) { + case 128: BC = 4; break; + case 192: BC = 6; break; + case 256: BC = 8; break; + default : return (-2); + } + + switch (keyBits >= blockBits ? keyBits : blockBits) { + case 128: ROUNDS = 10; break; + case 192: ROUNDS = 12; break; + case 256: ROUNDS = 14; break; + default : return (-3); /* this cannot happen */ + } + + /* begin with a key addition + */ + KeyAddition(a,rk[0],BC); + + /* ROUNDS-1 ordinary rounds + */ + for(r = 1; r < ROUNDS; r++) { + Substitution(a,S,BC); + ShiftRow(a,0,BC); + MixColumn(a,BC); + KeyAddition(a,rk[r],BC); + } + + /* Last round is special: there is no MixColumn + */ + Substitution(a,S,BC); + ShiftRow(a,0,BC); + KeyAddition(a,rk[ROUNDS],BC); + + return 0; +} + +int rijndaelDecrypt ( + word8 a[4][MAXBC], + int keyBits, + int blockBits, + word8 rk[MAXROUNDS+1][4][MAXBC]) +{ + int r, BC, ROUNDS; + + switch (blockBits) { + case 128: BC = 4; break; + case 192: BC = 6; break; + case 256: BC = 8; break; + default : return (-2); + } + + switch (keyBits >= blockBits ? keyBits : blockBits) { + case 128: ROUNDS = 10; break; + case 192: ROUNDS = 12; break; + case 256: ROUNDS = 14; break; + default : return (-3); /* this cannot happen */ + } + + /* To decrypt: apply the inverse operations of the encrypt routine, + * in opposite order + * + * (KeyAddition is an involution: it 's equal to its inverse) + * (the inverse of Substitution with table S is Substitution with the + * inverse table of S) + * (the inverse of Shiftrow is Shiftrow over a suitable distance) + */ + + /* First the special round: + * without InvMixColumn + * with extra KeyAddition + */ + KeyAddition(a,rk[ROUNDS],BC); + Substitution(a,Si,BC); + ShiftRow(a,1,BC); + + /* ROUNDS-1 ordinary rounds + */ + for(r = ROUNDS-1; r > 0; r--) { + KeyAddition(a,rk[r],BC); + InvMixColumn(a,BC); + Substitution(a,Si,BC); + ShiftRow(a,1,BC); + } + + /* End with the extra key addition + */ + + KeyAddition(a,rk[0],BC); + + return 0; +} + +#if !GLADMAN_AES_128_ENABLE + +/* + * All of these 128-bit-key-and-block routines require 32-bit word-aligned + * char array pointers.ÊThe key schedule arrays are easy; they come from + * keyInstance which has a 4-byte-aligned element preceeding the key schedule. + * Others require manual alignment of a local variable by the caller. + */ + +static inline void KeyAddition128( + word8 a[4][BC_128_OPT], + word8 rk[4][MAXBC]) { + + /* these casts are endian-independent */ + ((word32 *)a)[0] ^= *((word32 *)(&rk[0])); + ((word32 *)a)[1] ^= *((word32 *)(&rk[1])); + ((word32 *)a)[2] ^= *((word32 *)(&rk[2])); + ((word32 *)a)[3] ^= *((word32 *)(&rk[3])); +} + +static void Substitution128( + word8 a[4][BC_128_OPT], + const word8 box[256]) { + /* Replace every byte of the input by the byte at that place + * in the nonlinear S-box + */ + int i, j; + + /* still to be optimized - larger S boxes? */ + for(i = 0; i < 4; i++) { + for(j = 0; j < BC_128_OPT; j++) { + a[i][j] = box[a[i][j]]; + } + } +} + +#if defined(__ppc__) && defined(__GNUC__) + +static inline void rotateWordLeft( + word8 *word, // known to be word aligned + unsigned rotCount) // in bits +{ + word32 lword = *((word32 *)word); + asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount)); + *((word32 *)word) = lword; +} + +#else + +/* + * Insert your machine/compiler dependent code here, + * or just use this, which works on any platform and compiler + * which supports the __attribute__((aligned(4))) directive. + */ +static void rotateWordLeft( + word8 *word, // known to be word aligned + unsigned rotCount) // in bits +{ + word8 tmp[BC_128_OPT] __attribute__((aligned(4))); + unsigned bytes = rotCount / 8; + + tmp[0] = word[bytes & (BC_128_OPT-1)]; + tmp[1] = word[(1+bytes) & (BC_128_OPT-1)]; + tmp[2] = word[(2+bytes) & (BC_128_OPT-1)]; + tmp[3] = word[(3+bytes) & (BC_128_OPT-1)]; + *((word32 *)word) = *((word32 *)tmp); +} +#endif + +static inline void ShiftRow128( + word8 a[4][BC_128_OPT], + word8 d) { + /* Row 0 remains unchanged + * The other three rows are shifted (actually rotated) a variable amount + */ + int i; + + for(i = 1; i < 4; i++) { + rotateWordLeft(a[i], shifts128[i][d]); + } +} + +/* + * The following two routines are where most of the time is spent in this + * module. Further optimization would have to focus here. + */ +static void MixColumn128(word8 a[4][BC_128_OPT]) { + /* Mix the four bytes of every column in a linear way + */ + word8 b[4][BC_128_OPT]; + int i, j; + + for(j = 0; j < BC_128_OPT; j++) { + for(i = 0; i < 4; i++) { + #if AES_MUL_BY_LOOKUP + b[i][j] = mulBy0x02[a[i][j]] + ^ mulBy0x03[a[(i + 1) % 4][j]] + ^ a[(i + 2) % 4][j] + ^ a[(i + 3) % 4][j]; + #else + b[i][j] = mul(2,a[i][j]) + ^ mul(3,a[(i + 1) % 4][j]) + ^ a[(i + 2) % 4][j] + ^ a[(i + 3) % 4][j]; + #endif + } + } + memmove(a, b, 4 * BC_128_OPT); +} + +static void InvMixColumn128(word8 a[4][BC_128_OPT]) { + /* Mix the four bytes of every column in a linear way + * This is the opposite operation of Mixcolumn + */ + word8 b[4][BC_128_OPT]; + int i, j; + + for(j = 0; j < BC_128_OPT; j++) { + for(i = 0; i < 4; i++) { + #if AES_MUL_BY_LOOKUP + b[i][j] = mulBy0x0e[a[i][j]] + ^ mulBy0x0b[a[(i + 1) % 4][j]] + ^ mulBy0x0d[a[(i + 2) % 4][j]] + ^ mulBy0x09[a[(i + 3) % 4][j]]; + #else + b[i][j] = mul(0xe,a[i][j]) + ^ mul(0xb,a[(i + 1) % 4][j]) + ^ mul(0xd,a[(i + 2) % 4][j]) + ^ mul(0x9,a[(i + 3) % 4][j]); + #endif + } + } + memmove(a, b, 4 * BC_128_OPT); +} + +int rijndaelKeySched128 ( + word8 k[4][KC_128_OPT], + word8 W[MAXROUNDS+1][4][MAXBC]) { + + /* Calculate the necessary round keys + * The number of calculations depends on keyBits and blockBits + */ + int i, j, t, rconpointer = 0; + word8 tk[4][KC_128_OPT]; + unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT; + + for(j = 0; j < KC_128_OPT; j++) + for(i = 0; i < 4; i++) + tk[i][j] = k[i][j]; + t = 0; + /* copy values into round key array */ + for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) { + for(i = 0; i < 4; i++) { + W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j]; + } + } + + while (t < numSchedRows) { + /* while not enough round key material calculated */ + /* calculate new values */ + for(i = 0; i < 4; i++) { + tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]]; + } + tk[0][0] ^= rcon[rconpointer++]; + + for(j = 1; j < KC_128_OPT; j++) { + for(i = 0; i < 4; i++) { + tk[i][j] ^= tk[i][j-1]; + } + } + + /* copy values into round key array */ + for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) { + for(i = 0; i < 4; i++) { + W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j]; + } + } + } + + return 0; +} + +int rijndaelEncrypt128 ( + word8 a[4][BC_128_OPT], + word8 rk[MAXROUNDS+1][4][MAXBC]) +{ + /* Encryption of one block. + */ + int r; + + /* begin with a key addition + */ + KeyAddition128(a,rk[0]); + + /* ROUNDS-1 ordinary rounds + */ + for(r = 1; r < ROUNDS_128_OPT; r++) { + Substitution128(a,S); + ShiftRow128(a,0); + MixColumn128(a); + KeyAddition128(a,rk[r]); + } + + /* Last round is special: there is no MixColumn + */ + Substitution128(a,S); + ShiftRow128(a,0); + KeyAddition128(a,rk[ROUNDS_128_OPT]); + + return 0; +} + +int rijndaelDecrypt128 ( + word8 a[4][BC_128_OPT], + word8 rk[MAXROUNDS+1][4][MAXBC]) +{ + int r; + + /* To decrypt: apply the inverse operations of the encrypt routine, + * in opposite order + * + * (KeyAddition is an involution: it 's equal to its inverse) + * (the inverse of Substitution with table S is Substitution with the + * inverse table of S) + * (the inverse of Shiftrow is Shiftrow over a suitable distance) + */ + + /* First the special round: + * without InvMixColumn + * with extra KeyAddition + */ + KeyAddition128(a,rk[ROUNDS_128_OPT]); + Substitution128(a,Si); + ShiftRow128(a,1); + + /* ROUNDS-1 ordinary rounds + */ + for(r = ROUNDS_128_OPT-1; r > 0; r--) { + KeyAddition128(a,rk[r]); + InvMixColumn128(a); + Substitution128(a,Si); + ShiftRow128(a,1); + } + + /* End with the extra key addition + */ + + KeyAddition128(a,rk[0]); + + return 0; +} + +#endif /* !GLADMAN_AES_128_ENABLE */ +