libsecurity_apple_csp/lib/rijndael-alg-ref.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
   3  *
   4  * The contents of this file constitute Original Code as defined in and are
   5  * subject to the Apple Public Source License Version 1.2 (the 'License').
   6  * You may not use this file except in compliance with the License. Please obtain
   7  * a copy of the License at http://www.apple.com/publicsource and read it before
   8  * using this file.
   9  *
  10  * This Original Code and all software distributed under the License are
  11  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
  12  * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
  13  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  14  * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
  15  * specific language governing rights and limitations under the License.
  16  */
  17
  18
  19 /* rijndael-alg-ref.c   v2.0   August '99
  20  * Reference ANSI C code
  21  * authors: Paulo Barreto
  22  *          Vincent Rijmen
  23  *
  24  * PPC and 128-bit block optimization by Doug Mitchell May 2001.
  25  */
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "rijndael-alg-ref.h"
  32 #include <cspdebugging.h>
  33
  34 #define SC      ((BC - 4) >> 1)
  35
  36 #include "boxes-ref.h"
  37
  38 static const word8 shifts[3][4][2] = {
  39  { { 0, 0 },
  40    { 1, 3 },
  41    { 2, 2 },
  42    { 3, 1 }
  43  },
  44  { { 0, 0 },
  45    { 1, 5 },
  46    { 2, 4 },
  47    { 3, 3 }
  48  },
  49  { { 0, 0 },
  50    { 1, 7 },
  51    { 3, 5 },
  52    { 4, 4 }
  53  }
  54 };
  55
  56 #if     !GLADMAN_AES_128_ENABLE
  57
  58 /* 128 bit key/word shift table in bits */
  59 static const word8 shifts128[4][2] = {
  60  { 0,  0 },
  61  { 8,  24 },
  62  { 16, 16 },
  63  { 24, 8 }
  64 };
  65
  66 #endif  /* GLADMAN_AES_128_ENABLE */
  67
  68 #if             !AES_MUL_BY_LOOKUP
  69 /*
  70  * Profiling measurements showed that the mul routine is where a large propertion of
  71  * the time is spent. Since the first argument to  mul is always one of six
  72  * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
  73  * do the multiplies. This eliminates the need for the log/antilog tables, so
  74  * it's only adding one kilobyte of const data. Throughput improvement for this
  75  * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
  76  * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
  77  * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
  78  * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
  79  * on a 500 MHz G4 with 1 MB of L2 cache.
  80  */
  81
  82 /*
  83  * The mod 255 op in mul is really expensive...
  84  *
  85  * We know that b <= (254 * 2), so there are only two cases. Either return b,
  86  * or return b-255.
  87  *
  88  * On a G4 this single optimization results in a 24% speedup for encrypt and
  89  * a 25% speedup for decrypt.
  90  */
  91 static inline word8 mod255(word32 b)
  92 {
  93         if(b >= 255) {
  94                 b -= 255;
  95         }
  96         return b;
  97 }
  98
  99 word8 mul(word8 a, word8 b) {
 100    /* multiply two elements of GF(2^m)
 101     * needed for MixColumn and InvMixColumn
 102     */
 103         if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
 104         else return 0;
 105 }
 106 #endif  /* !AES_MUL_BY_LOOKUP */
 107
 108 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
 109         /* Exor corresponding text input and round key input bytes
 110          */
 111         int i, j;
 112
 113         for(i = 0; i < 4; i++)
 114                 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
 115 }
 116
 117 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
 118         /* Row 0 remains unchanged
 119          * The other three rows are shifted a variable amount
 120          */
 121         word8 tmp[MAXBC];
 122         int i, j;
 123
 124         for(i = 1; i < 4; i++) {
 125                 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
 126                 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
 127         }
 128 }
 129
 130 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
 131         /* Replace every byte of the input by the byte at that place
 132          * in the nonlinear S-box
 133          */
 134         int i, j;
 135
 136         for(i = 0; i < 4; i++)
 137                 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
 138 }
 139
 140 void MixColumn(word8 a[4][MAXBC], word8 BC) {
 141         /* Mix the four bytes of every column in a linear way
 142          */
 143         word8 b[4][MAXBC];
 144         int i, j;
 145
 146         for(j = 0; j < BC; j++) {
 147                 for(i = 0; i < 4; i++) {
 148                         #if             AES_MUL_BY_LOOKUP
 149                         b[i][j] = mulBy0x02[a[i][j]]
 150                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 151                                 ^ a[(i + 2) % 4][j]
 152                                 ^ a[(i + 3) % 4][j];
 153                         #else
 154                         b[i][j] = mul(2,a[i][j])
 155                                 ^ mul(3,a[(i + 1) % 4][j])
 156                                 ^ a[(i + 2) % 4][j]
 157                                 ^ a[(i + 3) % 4][j];
 158                         #endif
 159                 }
 160         }
 161         for(i = 0; i < 4; i++) {
 162                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 163         }
 164 }
 165
 166 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
 167         /* Mix the four bytes of every column in a linear way
 168          * This is the opposite operation of Mixcolumn
 169          */
 170         word8 b[4][MAXBC];
 171         int i, j;
 172
 173         for(j = 0; j < BC; j++) {
 174                 for(i = 0; i < 4; i++) {
 175                         #if             AES_MUL_BY_LOOKUP
 176                         b[i][j] = mulBy0x0e[a[i][j]]
 177                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 178                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 179                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 180                         #else
 181                         b[i][j] = mul(0xe,a[i][j])
 182                                 ^ mul(0xb,a[(i + 1) % 4][j])
 183                                 ^ mul(0xd,a[(i + 2) % 4][j])
 184                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 185                         #endif
 186                 }
 187         }
 188         for(i = 0; i < 4; i++) {
 189                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 190         }
 191 }
 192
 193 int rijndaelKeySched (
 194         word8 k[4][MAXKC],
 195         int keyBits,
 196         int blockBits,
 197         word8 W[MAXROUNDS+1][4][MAXBC]) {
 198
 199         /* Calculate the necessary round keys
 200          * The number of calculations depends on keyBits and blockBits
 201          */
 202         int KC, BC, ROUNDS;
 203         int i, j, t, rconpointer = 0;
 204         word8 tk[4][MAXKC];
 205
 206         switch (keyBits) {
 207         case 128: KC = 4; break;
 208         case 192: KC = 6; break;
 209         case 256: KC = 8; break;
 210         default : return (-1);
 211         }
 212
 213         switch (blockBits) {
 214         case 128: BC = 4; break;
 215         case 192: BC = 6; break;
 216         case 256: BC = 8; break;
 217         default : return (-2);
 218         }
 219
 220         switch (keyBits >= blockBits ? keyBits : blockBits) {
 221         case 128: ROUNDS = 10; break;
 222         case 192: ROUNDS = 12; break;
 223         case 256: ROUNDS = 14; break;
 224         default : return (-3); /* this cannot happen */
 225         }
 226
 227
 228         for(j = 0; j < KC; j++)
 229                 for(i = 0; i < 4; i++)
 230                         tk[i][j] = k[i][j];
 231         t = 0;
 232         /* copy values into round key array */
 233         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 234                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 235
 236         while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
 237                 /* calculate new values */
 238                 for(i = 0; i < 4; i++)
 239                         tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
 240                 tk[0][0] ^= rcon[rconpointer++];
 241
 242                 if (KC != 8)
 243                         for(j = 1; j < KC; j++)
 244                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 245                 else {
 246                         for(j = 1; j < KC/2; j++)
 247                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 248                         for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
 249                         for(j = KC/2 + 1; j < KC; j++)
 250                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 251         }
 252         /* copy values into round key array */
 253         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 254                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 255         }
 256
 257         return 0;
 258 }
 259
 260 int rijndaelEncrypt (
 261         word8 a[4][MAXBC],
 262         int keyBits,
 263         int blockBits,
 264         word8 rk[MAXROUNDS+1][4][MAXBC])
 265 {
 266         /* Encryption of one block, general case.
 267          */
 268         int r, BC, ROUNDS;
 269
 270         switch (blockBits) {
 271         case 128: BC = 4; break;
 272         case 192: BC = 6; break;
 273         case 256: BC = 8; break;
 274         default : return (-2);
 275         }
 276
 277         switch (keyBits >= blockBits ? keyBits : blockBits) {
 278         case 128: ROUNDS = 10; break;
 279         case 192: ROUNDS = 12; break;
 280         case 256: ROUNDS = 14; break;
 281         default : return (-3); /* this cannot happen */
 282         }
 283
 284         /* begin with a key addition
 285          */
 286         KeyAddition(a,rk[0],BC);
 287
 288         /* ROUNDS-1 ordinary rounds
 289          */
 290         for(r = 1; r < ROUNDS; r++) {
 291                 Substitution(a,S,BC);
 292                 ShiftRow(a,0,BC);
 293                 MixColumn(a,BC);
 294                 KeyAddition(a,rk[r],BC);
 295         }
 296
 297         /* Last round is special: there is no MixColumn
 298          */
 299         Substitution(a,S,BC);
 300         ShiftRow(a,0,BC);
 301         KeyAddition(a,rk[ROUNDS],BC);
 302
 303         return 0;
 304 }
 305
 306 int rijndaelDecrypt (
 307         word8 a[4][MAXBC],
 308         int keyBits,
 309         int blockBits,
 310         word8 rk[MAXROUNDS+1][4][MAXBC])
 311 {
 312         int r, BC, ROUNDS;
 313
 314         switch (blockBits) {
 315         case 128: BC = 4; break;
 316         case 192: BC = 6; break;
 317         case 256: BC = 8; break;
 318         default : return (-2);
 319         }
 320
 321         switch (keyBits >= blockBits ? keyBits : blockBits) {
 322         case 128: ROUNDS = 10; break;
 323         case 192: ROUNDS = 12; break;
 324         case 256: ROUNDS = 14; break;
 325         default : return (-3); /* this cannot happen */
 326         }
 327
 328         /* To decrypt: apply the inverse operations of the encrypt routine,
 329          *             in opposite order
 330          *
 331          * (KeyAddition is an involution: it 's equal to its inverse)
 332          * (the inverse of Substitution with table S is Substitution with the
 333          *  inverse table of S)
 334          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 335          */
 336
 337         /* First the special round:
 338          *   without InvMixColumn
 339          *   with extra KeyAddition
 340          */
 341         KeyAddition(a,rk[ROUNDS],BC);
 342         Substitution(a,Si,BC);
 343         ShiftRow(a,1,BC);
 344
 345         /* ROUNDS-1 ordinary rounds
 346          */
 347         for(r = ROUNDS-1; r > 0; r--) {
 348                 KeyAddition(a,rk[r],BC);
 349                 InvMixColumn(a,BC);
 350                 Substitution(a,Si,BC);
 351                 ShiftRow(a,1,BC);
 352         }
 353
 354         /* End with the extra key addition
 355          */
 356
 357         KeyAddition(a,rk[0],BC);
 358
 359         return 0;
 360 }
 361
 362 #if             !GLADMAN_AES_128_ENABLE
 363
 364 /*
 365  * All of these 128-bit-key-and-block routines require 32-bit word-aligned
 366  * char array pointers.ÊThe key schedule arrays are easy; they come from
 367  * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
 368  * Others require manual alignment of a local variable by the caller.
 369  */
 370
 371 static inline void KeyAddition128(
 372         word8 a[4][BC_128_OPT],
 373         word8 rk[4][MAXBC]) {
 374
 375         /* these casts are endian-independent */
 376         ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
 377         ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
 378         ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
 379         ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
 380 }
 381
 382 static void Substitution128(
 383         word8 a[4][BC_128_OPT],
 384         const word8 box[256]) {
 385         /* Replace every byte of the input by the byte at that place
 386          * in the nonlinear S-box
 387          */
 388         int i, j;
 389
 390         /* still to be optimized - larger S boxes? */
 391         for(i = 0; i < 4; i++) {
 392                 for(j = 0; j < BC_128_OPT; j++) {
 393                         a[i][j] = box[a[i][j]];
 394                 }
 395         }
 396 }
 397
 398 #if     defined(__ppc__) && defined(__GNUC__)
 399
 400 static inline void rotateWordLeft(
 401         word8 *word,                    // known to be word aligned
 402         unsigned rotCount)              // in bits
 403 {
 404         word32 lword = *((word32 *)word);
 405         asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
 406         *((word32 *)word) = lword;
 407 }
 408
 409 #else
 410
 411 /*
 412  * Insert your machine/compiler dependent code here,
 413  * or just use this, which works on any platform and compiler
 414  * which supports the __attribute__((aligned(4))) directive.
 415  */
 416 static void rotateWordLeft(
 417         word8 *word,                    // known to be word aligned
 418         unsigned rotCount)              // in bits
 419 {
 420         word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
 421         unsigned bytes = rotCount / 8;
 422
 423         tmp[0] = word[bytes     & (BC_128_OPT-1)];
 424         tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
 425         tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
 426         tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
 427         *((word32 *)word) = *((word32 *)tmp);
 428 }
 429 #endif
 430
 431 static inline void ShiftRow128(
 432         word8 a[4][BC_128_OPT],
 433         word8 d) {
 434         /* Row 0 remains unchanged
 435          * The other three rows are shifted (actually rotated) a variable amount
 436          */
 437         int i;
 438
 439         for(i = 1; i < 4; i++) {
 440                 rotateWordLeft(a[i], shifts128[i][d]);
 441         }
 442 }
 443
 444 /*
 445  * The following two routines are where most of the time is spent in this
 446  * module. Further optimization would have to focus here.
 447  */
 448 static void MixColumn128(word8 a[4][BC_128_OPT]) {
 449         /* Mix the four bytes of every column in a linear way
 450          */
 451         word8 b[4][BC_128_OPT];
 452         int i, j;
 453
 454         for(j = 0; j < BC_128_OPT; j++) {
 455                 for(i = 0; i < 4; i++) {
 456                         #if             AES_MUL_BY_LOOKUP
 457                         b[i][j] = mulBy0x02[a[i][j]]
 458                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 459                                 ^ a[(i + 2) % 4][j]
 460                                 ^ a[(i + 3) % 4][j];
 461                         #else
 462                         b[i][j] = mul(2,a[i][j])
 463                                 ^ mul(3,a[(i + 1) % 4][j])
 464                                 ^ a[(i + 2) % 4][j]
 465                                 ^ a[(i + 3) % 4][j];
 466                         #endif
 467                 }
 468         }
 469         memmove(a, b, 4 * BC_128_OPT);
 470 }
 471
 472 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
 473         /* Mix the four bytes of every column in a linear way
 474          * This is the opposite operation of Mixcolumn
 475          */
 476         word8 b[4][BC_128_OPT];
 477         int i, j;
 478
 479         for(j = 0; j < BC_128_OPT; j++) {
 480                 for(i = 0; i < 4; i++) {
 481                         #if             AES_MUL_BY_LOOKUP
 482                         b[i][j] = mulBy0x0e[a[i][j]]
 483                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 484                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 485                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 486                         #else
 487                         b[i][j] = mul(0xe,a[i][j])
 488                                 ^ mul(0xb,a[(i + 1) % 4][j])
 489                                 ^ mul(0xd,a[(i + 2) % 4][j])
 490                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 491                         #endif
 492                 }
 493         }
 494         memmove(a, b, 4 * BC_128_OPT);
 495 }
 496
 497 int rijndaelKeySched128 (
 498         word8 k[4][KC_128_OPT],
 499         word8 W[MAXROUNDS+1][4][MAXBC]) {
 500
 501         /* Calculate the necessary round keys
 502          * The number of calculations depends on keyBits and blockBits
 503          */
 504         int i, j, t, rconpointer = 0;
 505         word8 tk[4][KC_128_OPT];
 506         unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
 507
 508         for(j = 0; j < KC_128_OPT; j++)
 509                 for(i = 0; i < 4; i++)
 510                         tk[i][j] = k[i][j];
 511         t = 0;
 512         /* copy values into round key array */
 513         for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 514                 for(i = 0; i < 4; i++) {
 515                         W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 516                 }
 517         }
 518
 519         while (t < numSchedRows) {
 520                 /* while not enough round key material calculated */
 521                 /* calculate new values */
 522                 for(i = 0; i < 4; i++) {
 523                         tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
 524                 }
 525                 tk[0][0] ^= rcon[rconpointer++];
 526
 527                 for(j = 1; j < KC_128_OPT; j++) {
 528                         for(i = 0; i < 4; i++) {
 529                                 tk[i][j] ^= tk[i][j-1];
 530                         }
 531                 }
 532
 533                 /* copy values into round key array */
 534                 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 535                         for(i = 0; i < 4; i++) {
 536                                 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 537                         }
 538                 }
 539         }
 540
 541         return 0;
 542 }
 543
 544 int rijndaelEncrypt128 (
 545         word8 a[4][BC_128_OPT],
 546         word8 rk[MAXROUNDS+1][4][MAXBC])
 547 {
 548         /* Encryption of one block.
 549          */
 550         int r;
 551
 552         /* begin with a key addition
 553          */
 554         KeyAddition128(a,rk[0]);
 555
 556         /* ROUNDS-1 ordinary rounds
 557          */
 558         for(r = 1; r < ROUNDS_128_OPT; r++) {
 559                 Substitution128(a,S);
 560                 ShiftRow128(a,0);
 561                 MixColumn128(a);
 562                 KeyAddition128(a,rk[r]);
 563         }
 564
 565         /* Last round is special: there is no MixColumn
 566          */
 567         Substitution128(a,S);
 568         ShiftRow128(a,0);
 569         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 570
 571         return 0;
 572 }
 573
 574 int rijndaelDecrypt128 (
 575         word8 a[4][BC_128_OPT],
 576         word8 rk[MAXROUNDS+1][4][MAXBC])
 577 {
 578         int r;
 579
 580         /* To decrypt: apply the inverse operations of the encrypt routine,
 581          *             in opposite order
 582          *
 583          * (KeyAddition is an involution: it 's equal to its inverse)
 584          * (the inverse of Substitution with table S is Substitution with the
 585          *  inverse table of S)
 586          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 587          */
 588
 589         /* First the special round:
 590          *   without InvMixColumn
 591          *   with extra KeyAddition
 592          */
 593         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 594         Substitution128(a,Si);
 595         ShiftRow128(a,1);
 596
 597         /* ROUNDS-1 ordinary rounds
 598          */
 599         for(r = ROUNDS_128_OPT-1; r > 0; r--) {
 600                 KeyAddition128(a,rk[r]);
 601                 InvMixColumn128(a);
 602                 Substitution128(a,Si);
 603                 ShiftRow128(a,1);
 604         }
 605
 606         /* End with the extra key addition
 607          */
 608
 609         KeyAddition128(a,rk[0]);
 610
 611         return 0;
 612 }
 613
 614 #endif          /* !GLADMAN_AES_128_ENABLE */
 615