AppleCSP/AES/rijndael-alg-ref.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
   3  *
   4  * The contents of this file constitute Original Code as defined in and are
   5  * subject to the Apple Public Source License Version 1.2 (the 'License').
   6  * You may not use this file except in compliance with the License. Please obtain
   7  * a copy of the License at http://www.apple.com/publicsource and read it before
   8  * using this file.
   9  *
  10  * This Original Code and all software distributed under the License are
  11  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
  12  * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
  13  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  14  * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
  15  * specific language governing rights and limitations under the License.
  16  */
  17
  18
  19 /* rijndael-alg-ref.c   v2.0   August '99
  20  * Reference ANSI C code
  21  * authors: Paulo Barreto
  22  *          Vincent Rijmen
  23  *
  24  * PPC and 128-bit block optimization by Doug Mitchell May 2001.
  25  */
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "rijndael-alg-ref.h"
  32 #include <AppleCSP/cspdebugging.h>
  33
  34 #define SC      ((BC - 4) >> 1)
  35
  36 #include "boxes-ref.h"
  37
  38 static const word8 shifts[3][4][2] = {
  39  { { 0, 0 },
  40    { 1, 3 },
  41    { 2, 2 },
  42    { 3, 1 }
  43  },
  44  { { 0, 0 },
  45    { 1, 5 },
  46    { 2, 4 },
  47    { 3, 3 }
  48  },
  49  { { 0, 0 },
  50    { 1, 7 },
  51    { 3, 5 },
  52    { 4, 4 }
  53  }
  54 };
  55
  56 /* 128 bit key/word shift table in bits */
  57 static const word8 shifts128[4][2] = {
  58  { 0,  0 },
  59  { 8,  24 },
  60  { 16, 16 },
  61  { 24, 8 }
  62 };
  63
  64 #if             !AES_MUL_BY_LOOKUP
  65 /*
  66  * Profiling measurements showed that the mul routine is where a large propertion of
  67  * the time is spent. Since the first argument to  mul is always one of six
  68  * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
  69  * do the multiplies. This eliminates the need for the log/antilog tables, so
  70  * it's only adding one kilobyte of const data. Throughput improvement for this
  71  * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
  72  * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
  73  * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
  74  * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
  75  * on a 500 MHz G4 with 1 MB of L2 cache.
  76
  77 /*
  78  * The mod 255 op in mul is really expensive...
  79  *
  80  * We know that b <= (254 * 2), so there are only two cases. Either return b,
  81  * or return b-255.
  82  *
  83  * On a G4 this single optimization results in a 24% speedup for encrypt and
  84  * a 25% speedup for decrypt.
  85  */
  86 static inline word8 mod255(word32 b)
  87 {
  88         if(b >= 255) {
  89                 b -= 255;
  90         }
  91         return b;
  92 }
  93
  94 word8 mul(word8 a, word8 b) {
  95    /* multiply two elements of GF(2^m)
  96     * needed for MixColumn and InvMixColumn
  97     */
  98         if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
  99         else return 0;
 100 }
 101 #endif  /* !AES_MUL_BY_LOOKUP */
 102
 103 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
 104         /* Exor corresponding text input and round key input bytes
 105          */
 106         int i, j;
 107
 108         for(i = 0; i < 4; i++)
 109                 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
 110 }
 111
 112 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
 113         /* Row 0 remains unchanged
 114          * The other three rows are shifted a variable amount
 115          */
 116         word8 tmp[MAXBC];
 117         int i, j;
 118
 119         for(i = 1; i < 4; i++) {
 120                 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
 121                 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
 122         }
 123 }
 124
 125 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
 126         /* Replace every byte of the input by the byte at that place
 127          * in the nonlinear S-box
 128          */
 129         int i, j;
 130
 131         for(i = 0; i < 4; i++)
 132                 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
 133 }
 134
 135 void MixColumn(word8 a[4][MAXBC], word8 BC) {
 136         /* Mix the four bytes of every column in a linear way
 137          */
 138         word8 b[4][MAXBC];
 139         int i, j;
 140
 141         for(j = 0; j < BC; j++) {
 142                 for(i = 0; i < 4; i++) {
 143                         #if             AES_MUL_BY_LOOKUP
 144                         b[i][j] = mulBy0x02[a[i][j]]
 145                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 146                                 ^ a[(i + 2) % 4][j]
 147                                 ^ a[(i + 3) % 4][j];
 148                         #else
 149                         b[i][j] = mul(2,a[i][j])
 150                                 ^ mul(3,a[(i + 1) % 4][j])
 151                                 ^ a[(i + 2) % 4][j]
 152                                 ^ a[(i + 3) % 4][j];
 153                         #endif
 154                 }
 155         }
 156         for(i = 0; i < 4; i++) {
 157                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 158         }
 159 }
 160
 161 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
 162         /* Mix the four bytes of every column in a linear way
 163          * This is the opposite operation of Mixcolumn
 164          */
 165         word8 b[4][MAXBC];
 166         int i, j;
 167
 168         for(j = 0; j < BC; j++) {
 169                 for(i = 0; i < 4; i++) {
 170                         #if             AES_MUL_BY_LOOKUP
 171                         b[i][j] = mulBy0x0e[a[i][j]]
 172                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 173                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 174                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 175                         #else
 176                         b[i][j] = mul(0xe,a[i][j])
 177                                 ^ mul(0xb,a[(i + 1) % 4][j])
 178                                 ^ mul(0xd,a[(i + 2) % 4][j])
 179                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 180                         #endif
 181                 }
 182         }
 183         for(i = 0; i < 4; i++) {
 184                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 185         }
 186 }
 187
 188 int rijndaelKeySched (
 189         word8 k[4][MAXKC],
 190         int keyBits,
 191         int blockBits,
 192         word8 W[MAXROUNDS+1][4][MAXBC]) {
 193
 194         /* Calculate the necessary round keys
 195          * The number of calculations depends on keyBits and blockBits
 196          */
 197         int KC, BC, ROUNDS;
 198         int i, j, t, rconpointer = 0;
 199         word8 tk[4][MAXKC];
 200
 201         switch (keyBits) {
 202         case 128: KC = 4; break;
 203         case 192: KC = 6; break;
 204         case 256: KC = 8; break;
 205         default : return (-1);
 206         }
 207
 208         switch (blockBits) {
 209         case 128: BC = 4; break;
 210         case 192: BC = 6; break;
 211         case 256: BC = 8; break;
 212         default : return (-2);
 213         }
 214
 215         switch (keyBits >= blockBits ? keyBits : blockBits) {
 216         case 128: ROUNDS = 10; break;
 217         case 192: ROUNDS = 12; break;
 218         case 256: ROUNDS = 14; break;
 219         default : return (-3); /* this cannot happen */
 220         }
 221
 222
 223         for(j = 0; j < KC; j++)
 224                 for(i = 0; i < 4; i++)
 225                         tk[i][j] = k[i][j];
 226         t = 0;
 227         /* copy values into round key array */
 228         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 229                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 230
 231         while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
 232                 /* calculate new values */
 233                 for(i = 0; i < 4; i++)
 234                         tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
 235                 tk[0][0] ^= rcon[rconpointer++];
 236
 237                 if (KC != 8)
 238                         for(j = 1; j < KC; j++)
 239                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 240                 else {
 241                         for(j = 1; j < KC/2; j++)
 242                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 243                         for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
 244                         for(j = KC/2 + 1; j < KC; j++)
 245                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 246         }
 247         /* copy values into round key array */
 248         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 249                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 250         }
 251
 252         return 0;
 253 }
 254
 255 int rijndaelEncrypt (
 256         word8 a[4][MAXBC],
 257         int keyBits,
 258         int blockBits,
 259         word8 rk[MAXROUNDS+1][4][MAXBC])
 260 {
 261         /* Encryption of one block, general case.
 262          */
 263         int r, BC, ROUNDS;
 264
 265         switch (blockBits) {
 266         case 128: BC = 4; break;
 267         case 192: BC = 6; break;
 268         case 256: BC = 8; break;
 269         default : return (-2);
 270         }
 271
 272         switch (keyBits >= blockBits ? keyBits : blockBits) {
 273         case 128: ROUNDS = 10; break;
 274         case 192: ROUNDS = 12; break;
 275         case 256: ROUNDS = 14; break;
 276         default : return (-3); /* this cannot happen */
 277         }
 278
 279         /* begin with a key addition
 280          */
 281         KeyAddition(a,rk[0],BC);
 282
 283         /* ROUNDS-1 ordinary rounds
 284          */
 285         for(r = 1; r < ROUNDS; r++) {
 286                 Substitution(a,S,BC);
 287                 ShiftRow(a,0,BC);
 288                 MixColumn(a,BC);
 289                 KeyAddition(a,rk[r],BC);
 290         }
 291
 292         /* Last round is special: there is no MixColumn
 293          */
 294         Substitution(a,S,BC);
 295         ShiftRow(a,0,BC);
 296         KeyAddition(a,rk[ROUNDS],BC);
 297
 298         return 0;
 299 }
 300
 301 int rijndaelDecrypt (
 302         word8 a[4][MAXBC],
 303         int keyBits,
 304         int blockBits,
 305         word8 rk[MAXROUNDS+1][4][MAXBC])
 306 {
 307         int r, BC, ROUNDS;
 308
 309         switch (blockBits) {
 310         case 128: BC = 4; break;
 311         case 192: BC = 6; break;
 312         case 256: BC = 8; break;
 313         default : return (-2);
 314         }
 315
 316         switch (keyBits >= blockBits ? keyBits : blockBits) {
 317         case 128: ROUNDS = 10; break;
 318         case 192: ROUNDS = 12; break;
 319         case 256: ROUNDS = 14; break;
 320         default : return (-3); /* this cannot happen */
 321         }
 322
 323         /* To decrypt: apply the inverse operations of the encrypt routine,
 324          *             in opposite order
 325          *
 326          * (KeyAddition is an involution: it 's equal to its inverse)
 327          * (the inverse of Substitution with table S is Substitution with the
 328          *  inverse table of S)
 329          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 330          */
 331
 332         /* First the special round:
 333          *   without InvMixColumn
 334          *   with extra KeyAddition
 335          */
 336         KeyAddition(a,rk[ROUNDS],BC);
 337         Substitution(a,Si,BC);
 338         ShiftRow(a,1,BC);
 339
 340         /* ROUNDS-1 ordinary rounds
 341          */
 342         for(r = ROUNDS-1; r > 0; r--) {
 343                 KeyAddition(a,rk[r],BC);
 344                 InvMixColumn(a,BC);
 345                 Substitution(a,Si,BC);
 346                 ShiftRow(a,1,BC);
 347         }
 348
 349         /* End with the extra key addition
 350          */
 351
 352         KeyAddition(a,rk[0],BC);
 353
 354         return 0;
 355 }
 356
 357 #if             !GLADMAN_AES_128_ENABLE
 358
 359 /*
 360  * All of these 128-bit-key-and-block routines require 32-bit word-aligned
 361  * char array pointers.ÊThe key schedule arrays are easy; they come from
 362  * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
 363  * Others require manual alignment of a local variable by the caller.
 364  */
 365
 366 static inline void KeyAddition128(
 367         word8 a[4][BC_128_OPT],
 368         word8 rk[4][MAXBC]) {
 369
 370         /* these casts are endian-independent */
 371         ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
 372         ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
 373         ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
 374         ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
 375 }
 376
 377 static void Substitution128(
 378         word8 a[4][BC_128_OPT],
 379         const word8 box[256]) {
 380         /* Replace every byte of the input by the byte at that place
 381          * in the nonlinear S-box
 382          */
 383         int i, j;
 384
 385         /* still to be optimized - larger S boxes? */
 386         for(i = 0; i < 4; i++) {
 387                 for(j = 0; j < BC_128_OPT; j++) {
 388                         a[i][j] = box[a[i][j]];
 389                 }
 390         }
 391 }
 392
 393 #if     defined(__ppc__) && defined(__GNUC__)
 394
 395 static inline void rotateWordLeft(
 396         word8 *word,                    // known to be word aligned
 397         unsigned rotCount)              // in bits
 398 {
 399         word32 lword = *((word32 *)word);
 400         asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
 401         *((word32 *)word) = lword;
 402 }
 403
 404 #else
 405
 406 /*
 407  * Insert your machine/compiler dependent code here,
 408  * or just use this, which works on any platform and compiler
 409  * which supports the __attribute__((aligned(4))) directive.
 410  */
 411 static void rotateWordLeft(
 412         word8 *word,                    // known to be word aligned
 413         unsigned rotCount)              // in bits
 414 {
 415         word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
 416         unsigned bytes = rotCount / 8;
 417
 418         tmp[0] = word[bytes     & (BC_128_OPT-1)];
 419         tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
 420         tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
 421         tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
 422         *((word32 *)word) = *((word32 *)tmp);
 423 }
 424 #endif
 425
 426 static inline void ShiftRow128(
 427         word8 a[4][BC_128_OPT],
 428         word8 d) {
 429         /* Row 0 remains unchanged
 430          * The other three rows are shifted (actually rotated) a variable amount
 431          */
 432         int i;
 433
 434         for(i = 1; i < 4; i++) {
 435                 rotateWordLeft(a[i], shifts128[i][d]);
 436         }
 437 }
 438
 439 /*
 440  * The following two routines are where most of the time is spent in this
 441  * module. Further optimization would have to focus here.
 442  */
 443 static void MixColumn128(word8 a[4][BC_128_OPT]) {
 444         /* Mix the four bytes of every column in a linear way
 445          */
 446         word8 b[4][BC_128_OPT];
 447         int i, j;
 448
 449         for(j = 0; j < BC_128_OPT; j++) {
 450                 for(i = 0; i < 4; i++) {
 451                         #if             AES_MUL_BY_LOOKUP
 452                         b[i][j] = mulBy0x02[a[i][j]]
 453                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 454                                 ^ a[(i + 2) % 4][j]
 455                                 ^ a[(i + 3) % 4][j];
 456                         #else
 457                         b[i][j] = mul(2,a[i][j])
 458                                 ^ mul(3,a[(i + 1) % 4][j])
 459                                 ^ a[(i + 2) % 4][j]
 460                                 ^ a[(i + 3) % 4][j];
 461                         #endif
 462                 }
 463         }
 464         memmove(a, b, 4 * BC_128_OPT);
 465 }
 466
 467 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
 468         /* Mix the four bytes of every column in a linear way
 469          * This is the opposite operation of Mixcolumn
 470          */
 471         word8 b[4][BC_128_OPT];
 472         int i, j;
 473
 474         for(j = 0; j < BC_128_OPT; j++) {
 475                 for(i = 0; i < 4; i++) {
 476                         #if             AES_MUL_BY_LOOKUP
 477                         b[i][j] = mulBy0x0e[a[i][j]]
 478                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 479                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 480                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 481                         #else
 482                         b[i][j] = mul(0xe,a[i][j])
 483                                 ^ mul(0xb,a[(i + 1) % 4][j])
 484                                 ^ mul(0xd,a[(i + 2) % 4][j])
 485                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 486                         #endif
 487                 }
 488         }
 489         memmove(a, b, 4 * BC_128_OPT);
 490 }
 491
 492 int rijndaelKeySched128 (
 493         word8 k[4][KC_128_OPT],
 494         word8 W[MAXROUNDS+1][4][MAXBC]) {
 495
 496         /* Calculate the necessary round keys
 497          * The number of calculations depends on keyBits and blockBits
 498          */
 499         int i, j, t, rconpointer = 0;
 500         word8 tk[4][KC_128_OPT];
 501         unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
 502
 503         for(j = 0; j < KC_128_OPT; j++)
 504                 for(i = 0; i < 4; i++)
 505                         tk[i][j] = k[i][j];
 506         t = 0;
 507         /* copy values into round key array */
 508         for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 509                 for(i = 0; i < 4; i++) {
 510                         W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 511                 }
 512         }
 513
 514         while (t < numSchedRows) {
 515                 /* while not enough round key material calculated */
 516                 /* calculate new values */
 517                 for(i = 0; i < 4; i++) {
 518                         tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
 519                 }
 520                 tk[0][0] ^= rcon[rconpointer++];
 521
 522                 for(j = 1; j < KC_128_OPT; j++) {
 523                         for(i = 0; i < 4; i++) {
 524                                 tk[i][j] ^= tk[i][j-1];
 525                         }
 526                 }
 527
 528                 /* copy values into round key array */
 529                 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 530                         for(i = 0; i < 4; i++) {
 531                                 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 532                         }
 533                 }
 534         }
 535
 536         return 0;
 537 }
 538
 539 int rijndaelEncrypt128 (
 540         word8 a[4][BC_128_OPT],
 541         word8 rk[MAXROUNDS+1][4][MAXBC])
 542 {
 543         /* Encryption of one block.
 544          */
 545         int r;
 546
 547         /* begin with a key addition
 548          */
 549         KeyAddition128(a,rk[0]);
 550
 551         /* ROUNDS-1 ordinary rounds
 552          */
 553         for(r = 1; r < ROUNDS_128_OPT; r++) {
 554                 Substitution128(a,S);
 555                 ShiftRow128(a,0);
 556                 MixColumn128(a);
 557                 KeyAddition128(a,rk[r]);
 558         }
 559
 560         /* Last round is special: there is no MixColumn
 561          */
 562         Substitution128(a,S);
 563         ShiftRow128(a,0);
 564         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 565
 566         return 0;
 567 }
 568
 569 int rijndaelDecrypt128 (
 570         word8 a[4][BC_128_OPT],
 571         word8 rk[MAXROUNDS+1][4][MAXBC])
 572 {
 573         int r;
 574
 575         /* To decrypt: apply the inverse operations of the encrypt routine,
 576          *             in opposite order
 577          *
 578          * (KeyAddition is an involution: it 's equal to its inverse)
 579          * (the inverse of Substitution with table S is Substitution with the
 580          *  inverse table of S)
 581          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 582          */
 583
 584         /* First the special round:
 585          *   without InvMixColumn
 586          *   with extra KeyAddition
 587          */
 588         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 589         Substitution128(a,Si);
 590         ShiftRow128(a,1);
 591
 592         /* ROUNDS-1 ordinary rounds
 593          */
 594         for(r = ROUNDS_128_OPT-1; r > 0; r--) {
 595                 KeyAddition128(a,rk[r]);
 596                 InvMixColumn128(a);
 597                 Substitution128(a,Si);
 598                 ShiftRow128(a,1);
 599         }
 600
 601         /* End with the extra key addition
 602          */
 603
 604         KeyAddition128(a,rk[0]);
 605
 606         return 0;
 607 }
 608
 609 #endif          /* !GLADMAN_AES_128_ENABLE */
 610