AppleCSP/AES/rijndael-alg-ref.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
   3  *
   4  * The contents of this file constitute Original Code as defined in and are
   5  * subject to the Apple Public Source License Version 1.2 (the 'License').
   6  * You may not use this file except in compliance with the License. Please obtain
   7  * a copy of the License at http://www.apple.com/publicsource and read it before
   8  * using this file.
   9  *
  10  * This Original Code and all software distributed under the License are
  11  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
  12  * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
  13  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  14  * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
  15  * specific language governing rights and limitations under the License.
  16  */
  17
  18
  19 /* rijndael-alg-ref.c   v2.0   August '99
  20  * Reference ANSI C code
  21  * authors: Paulo Barreto
  22  *          Vincent Rijmen
  23  *
  24  * PPC and 128-bit block optimization by Doug Mitchell May 2001.
  25  */
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "rijndael-alg-ref.h"
  32 #include <AppleCSP/cspdebugging.h>
  33
  34 #define SC      ((BC - 4) >> 1)
  35
  36 #include "boxes-ref.h"
  37
  38 static const word8 shifts[3][4][2] = {
  39  { { 0, 0 },
  40    { 1, 3 },
  41    { 2, 2 },
  42    { 3, 1 }
  43  },
  44  { { 0, 0 },
  45    { 1, 5 },
  46    { 2, 4 },
  47    { 3, 3 }
  48  },
  49  { { 0, 0 },
  50    { 1, 7 },
  51    { 3, 5 },
  52    { 4, 4 }
  53  }
  54 };
  55
  56 /* 128 bit key/word shift table in bits */
  57 static const word8 shifts128[4][2] = {
  58  { 0,  0 },
  59  { 8,  24 },
  60  { 16, 16 },
  61  { 24, 8 }
  62 };
  63
  64 #if             !AES_MUL_BY_LOOKUP
  65 /*
  66  * Profiling measurements showed that the mul routine is where a large propertion of
  67  * the time is spent. Since the first argument to  mul is always one of six
  68  * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
  69  * do the multiplies. This eliminates the need for the log/antilog tables, so
  70  * it's only adding one kilobyte of const data. Throughput improvement for this
  71  * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
  72  * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
  73  * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
  74  * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
  75  * on a 500 MHz G4 with 1 MB of L2 cache.
  76
  77 /*
  78  * The mod 255 op in mul is really expensive...
  79  *
  80  * We know that b <= (254 * 2), so there are only two cases. Either return b,
  81  * or return b-255.
  82  *
  83  * On a G4 this single optimization results in a 24% speedup for encrypt and
  84  * a 25% speedup for decrypt.
  85  */
  86 static inline word8 mod255(word32 b)
  87 {
  88         if(b >= 255) {
  89                 b -= 255;
  90         }
  91         return b;
  92 }
  93
  94 word8 mul(word8 a, word8 b) {
  95    /* multiply two elements of GF(2^m)
  96     * needed for MixColumn and InvMixColumn
  97     */
  98         if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
  99         else return 0;
 100 }
 101 #endif  /* !AES_MUL_BY_LOOKUP */
 102
 103 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
 104         /* Exor corresponding text input and round key input bytes
 105          */
 106         int i, j;
 107
 108         for(i = 0; i < 4; i++)
 109                 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
 110 }
 111
 112 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
 113         /* Row 0 remains unchanged
 114          * The other three rows are shifted a variable amount
 115          */
 116         word8 tmp[MAXBC];
 117         int i, j;
 118
 119         for(i = 1; i < 4; i++) {
 120                 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
 121                 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
 122         }
 123 }
 124
 125 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
 126         /* Replace every byte of the input by the byte at that place
 127          * in the nonlinear S-box
 128          */
 129         int i, j;
 130
 131         for(i = 0; i < 4; i++)
 132                 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
 133 }
 134
 135 void MixColumn(word8 a[4][MAXBC], word8 BC) {
 136         /* Mix the four bytes of every column in a linear way
 137          */
 138         word8 b[4][MAXBC];
 139         int i, j;
 140
 141         for(j = 0; j < BC; j++) {
 142                 for(i = 0; i < 4; i++) {
 143                         #if             AES_MUL_BY_LOOKUP
 144                         b[i][j] = mulBy0x02[a[i][j]]
 145                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 146                                 ^ a[(i + 2) % 4][j]
 147                                 ^ a[(i + 3) % 4][j];
 148                         #else
 149                         b[i][j] = mul(2,a[i][j])
 150                                 ^ mul(3,a[(i + 1) % 4][j])
 151                                 ^ a[(i + 2) % 4][j]
 152                                 ^ a[(i + 3) % 4][j];
 153                         #endif
 154                 }
 155         }
 156         for(i = 0; i < 4; i++) {
 157                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 158         }
 159 }
 160
 161 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
 162         /* Mix the four bytes of every column in a linear way
 163          * This is the opposite operation of Mixcolumn
 164          */
 165         word8 b[4][MAXBC];
 166         int i, j;
 167
 168         for(j = 0; j < BC; j++) {
 169                 for(i = 0; i < 4; i++) {
 170                         #if             AES_MUL_BY_LOOKUP
 171                         b[i][j] = mulBy0x0e[a[i][j]]
 172                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 173                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 174                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 175                         #else
 176                         b[i][j] = mul(0xe,a[i][j])
 177                                 ^ mul(0xb,a[(i + 1) % 4][j])
 178                                 ^ mul(0xd,a[(i + 2) % 4][j])
 179                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 180                         #endif
 181                 }
 182         }
 183         for(i = 0; i < 4; i++) {
 184                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 185         }
 186 }
 187
 188 int rijndaelKeySched (
 189         word8 k[4][MAXKC],
 190         int keyBits,
 191         int blockBits,
 192         word8 W[MAXROUNDS+1][4][MAXBC]) {
 193
 194         /* Calculate the necessary round keys
 195          * The number of calculations depends on keyBits and blockBits
 196          */
 197         int KC, BC, ROUNDS;
 198         int i, j, t, rconpointer = 0;
 199         word8 tk[4][MAXKC];
 200
 201         switch (keyBits) {
 202         case 128: KC = 4; break;
 203         case 192: KC = 6; break;
 204         case 256: KC = 8; break;
 205         default : return (-1);
 206         }
 207
 208         switch (blockBits) {
 209         case 128: BC = 4; break;
 210         case 192: BC = 6; break;
 211         case 256: BC = 8; break;
 212         default : return (-2);
 213         }
 214
 215         switch (keyBits >= blockBits ? keyBits : blockBits) {
 216         case 128: ROUNDS = 10; break;
 217         case 192: ROUNDS = 12; break;
 218         case 256: ROUNDS = 14; break;
 219         default : return (-3); /* this cannot happen */
 220         }
 221
 222
 223         for(j = 0; j < KC; j++)
 224                 for(i = 0; i < 4; i++)
 225                         tk[i][j] = k[i][j];
 226         t = 0;
 227         /* copy values into round key array */
 228         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 229                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 230
 231         while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
 232                 /* calculate new values */
 233                 for(i = 0; i < 4; i++)
 234                         tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
 235                 tk[0][0] ^= rcon[rconpointer++];
 236
 237                 if (KC != 8)
 238                         for(j = 1; j < KC; j++)
 239                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 240                 else {
 241                         for(j = 1; j < KC/2; j++)
 242                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 243                         for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
 244                         for(j = KC/2 + 1; j < KC; j++)
 245                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 246         }
 247         /* copy values into round key array */
 248         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 249                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 250         }
 251
 252         return 0;
 253 }
 254
 255 int rijndaelEncrypt (
 256         word8 a[4][MAXBC],
 257         int keyBits,
 258         int blockBits,
 259         word8 rk[MAXROUNDS+1][4][MAXBC])
 260 {
 261         /* Encryption of one block, general case.
 262          */
 263         int r, BC, ROUNDS;
 264
 265         switch (blockBits) {
 266         case 128: BC = 4; break;
 267         case 192: BC = 6; break;
 268         case 256: BC = 8; break;
 269         default : return (-2);
 270         }
 271
 272         switch (keyBits >= blockBits ? keyBits : blockBits) {
 273         case 128: ROUNDS = 10; break;
 274         case 192: ROUNDS = 12; break;
 275         case 256: ROUNDS = 14; break;
 276         default : return (-3); /* this cannot happen */
 277         }
 278
 279         /* begin with a key addition
 280          */
 281         KeyAddition(a,rk[0],BC);
 282
 283         /* ROUNDS-1 ordinary rounds
 284          */
 285         for(r = 1; r < ROUNDS; r++) {
 286                 Substitution(a,S,BC);
 287                 ShiftRow(a,0,BC);
 288                 MixColumn(a,BC);
 289                 KeyAddition(a,rk[r],BC);
 290         }
 291
 292         /* Last round is special: there is no MixColumn
 293          */
 294         Substitution(a,S,BC);
 295         ShiftRow(a,0,BC);
 296         KeyAddition(a,rk[ROUNDS],BC);
 297
 298         return 0;
 299 }
 300
 301 int rijndaelDecrypt (
 302         word8 a[4][MAXBC],
 303         int keyBits,
 304         int blockBits,
 305         word8 rk[MAXROUNDS+1][4][MAXBC])
 306 {
 307         int r, BC, ROUNDS;
 308
 309         switch (blockBits) {
 310         case 128: BC = 4; break;
 311         case 192: BC = 6; break;
 312         case 256: BC = 8; break;
 313         default : return (-2);
 314         }
 315
 316         switch (keyBits >= blockBits ? keyBits : blockBits) {
 317         case 128: ROUNDS = 10; break;
 318         case 192: ROUNDS = 12; break;
 319         case 256: ROUNDS = 14; break;
 320         default : return (-3); /* this cannot happen */
 321         }
 322
 323         /* To decrypt: apply the inverse operations of the encrypt routine,
 324          *             in opposite order
 325          *
 326          * (KeyAddition is an involution: it 's equal to its inverse)
 327          * (the inverse of Substitution with table S is Substitution with the
 328          *  inverse table of S)
 329          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 330          */
 331
 332         /* First the special round:
 333          *   without InvMixColumn
 334          *   with extra KeyAddition
 335          */
 336         KeyAddition(a,rk[ROUNDS],BC);
 337         Substitution(a,Si,BC);
 338         ShiftRow(a,1,BC);
 339
 340         /* ROUNDS-1 ordinary rounds
 341          */
 342         for(r = ROUNDS-1; r > 0; r--) {
 343                 KeyAddition(a,rk[r],BC);
 344                 InvMixColumn(a,BC);
 345                 Substitution(a,Si,BC);
 346                 ShiftRow(a,1,BC);
 347         }
 348
 349         /* End with the extra key addition
 350          */
 351
 352         KeyAddition(a,rk[0],BC);
 353
 354         return 0;
 355 }
 356
 357 /*
 358  * All of these 128-bit-key-and-block routines require 32-bit word-aligned
 359  * char array pointers.ÊThe key schedule arrays are easy; they come from
 360  * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
 361  * Others require manual alignment of a local variable by the caller.
 362  */
 363
 364 static inline void KeyAddition128(
 365         word8 a[4][BC_128_OPT],
 366         word8 rk[4][MAXBC]) {
 367
 368         /* these casts are endian-independent */
 369         ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
 370         ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
 371         ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
 372         ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
 373 }
 374
 375 static void Substitution128(
 376         word8 a[4][BC_128_OPT],
 377         const word8 box[256]) {
 378         /* Replace every byte of the input by the byte at that place
 379          * in the nonlinear S-box
 380          */
 381         int i, j;
 382
 383         /* still to be optimized - larger S boxes? */
 384         for(i = 0; i < 4; i++) {
 385                 for(j = 0; j < BC_128_OPT; j++) {
 386                         a[i][j] = box[a[i][j]];
 387                 }
 388         }
 389 }
 390
 391 #if     defined(__ppc__) && defined(__GNUC__)
 392
 393 static inline void rotateWordLeft(
 394         word8 *word,                    // known to be word aligned
 395         unsigned rotCount)              // in bits
 396 {
 397         word32 lword = *((word32 *)word);
 398         asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
 399         *((word32 *)word) = lword;
 400 }
 401
 402 #else
 403
 404 /*
 405  * Insert your machine/compiler dependent code here,
 406  * or just use this, which works on any platform and compiler
 407  * which supports the __attribute__((aligned(4))) directive.
 408  */
 409 static void rotateWordLeft(
 410         word8 *word,                    // known to be word aligned
 411         unsigned rotCount)              // in bits
 412 {
 413         word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
 414         unsigned bytes = rotCount / 8;
 415
 416         tmp[0] = word[bytes     & (BC_128_OPT-1)];
 417         tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
 418         tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
 419         tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
 420         *((word32 *)word) = *((word32 *)tmp);
 421 }
 422 #endif
 423
 424 static inline void ShiftRow128(
 425         word8 a[4][BC_128_OPT],
 426         word8 d) {
 427         /* Row 0 remains unchanged
 428          * The other three rows are shifted (actually rotated) a variable amount
 429          */
 430         int i;
 431
 432         for(i = 1; i < 4; i++) {
 433                 rotateWordLeft(a[i], shifts128[i][d]);
 434         }
 435 }
 436
 437 /*
 438  * The following two routines are where most of the time is spent in this
 439  * module. Further optimization would have to focus here.
 440  */
 441 static void MixColumn128(word8 a[4][BC_128_OPT]) {
 442         /* Mix the four bytes of every column in a linear way
 443          */
 444         word8 b[4][BC_128_OPT];
 445         int i, j;
 446
 447         for(j = 0; j < BC_128_OPT; j++) {
 448                 for(i = 0; i < 4; i++) {
 449                         #if             AES_MUL_BY_LOOKUP
 450                         b[i][j] = mulBy0x02[a[i][j]]
 451                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 452                                 ^ a[(i + 2) % 4][j]
 453                                 ^ a[(i + 3) % 4][j];
 454                         #else
 455                         b[i][j] = mul(2,a[i][j])
 456                                 ^ mul(3,a[(i + 1) % 4][j])
 457                                 ^ a[(i + 2) % 4][j]
 458                                 ^ a[(i + 3) % 4][j];
 459                         #endif
 460                 }
 461         }
 462         memmove(a, b, 4 * BC_128_OPT);
 463 }
 464
 465 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
 466         /* Mix the four bytes of every column in a linear way
 467          * This is the opposite operation of Mixcolumn
 468          */
 469         word8 b[4][BC_128_OPT];
 470         int i, j;
 471
 472         for(j = 0; j < BC_128_OPT; j++) {
 473                 for(i = 0; i < 4; i++) {
 474                         #if             AES_MUL_BY_LOOKUP
 475                         b[i][j] = mulBy0x0e[a[i][j]]
 476                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 477                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 478                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 479                         #else
 480                         b[i][j] = mul(0xe,a[i][j])
 481                                 ^ mul(0xb,a[(i + 1) % 4][j])
 482                                 ^ mul(0xd,a[(i + 2) % 4][j])
 483                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 484                         #endif
 485                 }
 486         }
 487         memmove(a, b, 4 * BC_128_OPT);
 488 }
 489
 490 int rijndaelKeySched128 (
 491         word8 k[4][KC_128_OPT],
 492         word8 W[MAXROUNDS+1][4][MAXBC]) {
 493
 494         /* Calculate the necessary round keys
 495          * The number of calculations depends on keyBits and blockBits
 496          */
 497         int i, j, t, rconpointer = 0;
 498         word8 tk[4][KC_128_OPT];
 499         unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
 500
 501         for(j = 0; j < KC_128_OPT; j++)
 502                 for(i = 0; i < 4; i++)
 503                         tk[i][j] = k[i][j];
 504         t = 0;
 505         /* copy values into round key array */
 506         for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 507                 for(i = 0; i < 4; i++) {
 508                         W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 509                 }
 510         }
 511
 512         while (t < numSchedRows) {
 513                 /* while not enough round key material calculated */
 514                 /* calculate new values */
 515                 for(i = 0; i < 4; i++) {
 516                         tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
 517                 }
 518                 tk[0][0] ^= rcon[rconpointer++];
 519
 520                 for(j = 1; j < KC_128_OPT; j++) {
 521                         for(i = 0; i < 4; i++) {
 522                                 tk[i][j] ^= tk[i][j-1];
 523                         }
 524                 }
 525
 526                 /* copy values into round key array */
 527                 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 528                         for(i = 0; i < 4; i++) {
 529                                 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 530                         }
 531                 }
 532         }
 533
 534         return 0;
 535 }
 536
 537 int rijndaelEncrypt128 (
 538         word8 a[4][BC_128_OPT],
 539         word8 rk[MAXROUNDS+1][4][MAXBC])
 540 {
 541         /* Encryption of one block.
 542          */
 543         int r;
 544
 545         /* begin with a key addition
 546          */
 547         KeyAddition128(a,rk[0]);
 548
 549         /* ROUNDS-1 ordinary rounds
 550          */
 551         for(r = 1; r < ROUNDS_128_OPT; r++) {
 552                 Substitution128(a,S);
 553                 ShiftRow128(a,0);
 554                 MixColumn128(a);
 555                 KeyAddition128(a,rk[r]);
 556         }
 557
 558         /* Last round is special: there is no MixColumn
 559          */
 560         Substitution128(a,S);
 561         ShiftRow128(a,0);
 562         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 563
 564         return 0;
 565 }
 566
 567 int rijndaelDecrypt128 (
 568         word8 a[4][BC_128_OPT],
 569         word8 rk[MAXROUNDS+1][4][MAXBC])
 570 {
 571         int r;
 572
 573         /* To decrypt: apply the inverse operations of the encrypt routine,
 574          *             in opposite order
 575          *
 576          * (KeyAddition is an involution: it 's equal to its inverse)
 577          * (the inverse of Substitution with table S is Substitution with the
 578          *  inverse table of S)
 579          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 580          */
 581
 582         /* First the special round:
 583          *   without InvMixColumn
 584          *   with extra KeyAddition
 585          */
 586         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 587         Substitution128(a,Si);
 588         ShiftRow128(a,1);
 589
 590         /* ROUNDS-1 ordinary rounds
 591          */
 592         for(r = ROUNDS_128_OPT-1; r > 0; r--) {
 593                 KeyAddition128(a,rk[r]);
 594                 InvMixColumn128(a);
 595                 Substitution128(a,Si);
 596                 ShiftRow128(a,1);
 597         }
 598
 599         /* End with the extra key addition
 600          */
 601
 602         KeyAddition128(a,rk[0]);
 603
 604         return 0;
 605 }
 606