libsecurity_apple_csp/lib/rijndael-alg-ref.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
   3  *
   4  * The contents of this file constitute Original Code as defined in and are
   5  * subject to the Apple Public Source License Version 1.2 (the 'License').
   6  * You may not use this file except in compliance with the License. Please obtain
   7  * a copy of the License at http://www.apple.com/publicsource and read it before
   8  * using this file.
   9  *
  10  * This Original Code and all software distributed under the License are
  11  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
  12  * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
  13  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  14  * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
  15  * specific language governing rights and limitations under the License.
  16  */
  17
  18
  19 /* rijndael-alg-ref.c   v2.0   August '99
  20  * Reference ANSI C code
  21  * authors: Paulo Barreto
  22  *          Vincent Rijmen
  23  *
  24  * PPC and 128-bit block optimization by Doug Mitchell May 2001.
  25  */
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "rijndael-alg-ref.h"
  32 #include <cspdebugging.h>
  33
  34 #define SC      ((BC - 4) >> 1)
  35
  36 #include "boxes-ref.h"
  37
  38 static const word8 shifts[3][4][2] = {
  39  { { 0, 0 },
  40    { 1, 3 },
  41    { 2, 2 },
  42    { 3, 1 }
  43  },
  44  { { 0, 0 },
  45    { 1, 5 },
  46    { 2, 4 },
  47    { 3, 3 }
  48  },
  49  { { 0, 0 },
  50    { 1, 7 },
  51    { 3, 5 },
  52    { 4, 4 }
  53  }
  54 };
  55
  56 #if     !GLADMAN_AES_128_ENABLE
  57
  58 /* 128 bit key/word shift table in bits */
  59 static const word8 shifts128[4][2] = {
  60  { 0,  0 },
  61  { 8,  24 },
  62  { 16, 16 },
  63  { 24, 8 }
  64 };
  65
  66 #endif  /* GLADMAN_AES_128_ENABLE */
  67
  68 #if             !AES_MUL_BY_LOOKUP
  69 /*
  70  * Profiling measurements showed that the mul routine is where a large propertion of
  71  * the time is spent. Since the first argument to  mul is always one of six
  72  * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
  73  * do the multiplies. This eliminates the need for the log/antilog tables, so
  74  * it's only adding one kilobyte of const data. Throughput improvement for this
  75  * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
  76  * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
  77  * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
  78  * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
  79  * on a 500 MHz G4 with 1 MB of L2 cache.
  80  */
  81
  82 /*
  83  * The mod 255 op in mul is really expensive...
  84  *
  85  * We know that b <= (254 * 2), so there are only two cases. Either return b,
  86  * or return b-255.
  87  *
  88  * On a G4 this single optimization results in a 24% speedup for encrypt and
  89  * a 25% speedup for decrypt.
  90  */
  91 static inline word8 mod255(word32 b)
  92 {
  93         if(b >= 255) {
  94                 b -= 255;
  95         }
  96         return b;
  97 }
  98
  99 word8 mul(word8 a, word8 b) {
 100    /* multiply two elements of GF(2^m)
 101     * needed for MixColumn and InvMixColumn
 102     */
 103         if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
 104         else return 0;
 105 }
 106 #endif  /* !AES_MUL_BY_LOOKUP */
 107
 108 static
 109 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
 110         /* Exor corresponding text input and round key input bytes
 111          */
 112         int i, j;
 113
 114         for(i = 0; i < 4; i++)
 115                 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
 116 }
 117
 118 static
 119 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
 120         /* Row 0 remains unchanged
 121          * The other three rows are shifted a variable amount
 122          */
 123         word8 tmp[MAXBC];
 124         int i, j;
 125
 126         for(i = 1; i < 4; i++) {
 127                 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
 128                 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
 129         }
 130 }
 131
 132 static
 133 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
 134         /* Replace every byte of the input by the byte at that place
 135          * in the nonlinear S-box
 136          */
 137         int i, j;
 138
 139         for(i = 0; i < 4; i++)
 140                 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
 141 }
 142
 143 static
 144 void MixColumn(word8 a[4][MAXBC], word8 BC) {
 145         /* Mix the four bytes of every column in a linear way
 146          */
 147         word8 b[4][MAXBC];
 148         int i, j;
 149
 150         for(j = 0; j < BC; j++) {
 151                 for(i = 0; i < 4; i++) {
 152                         #if             AES_MUL_BY_LOOKUP
 153                         b[i][j] = mulBy0x02[a[i][j]]
 154                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 155                                 ^ a[(i + 2) % 4][j]
 156                                 ^ a[(i + 3) % 4][j];
 157                         #else
 158                         b[i][j] = mul(2,a[i][j])
 159                                 ^ mul(3,a[(i + 1) % 4][j])
 160                                 ^ a[(i + 2) % 4][j]
 161                                 ^ a[(i + 3) % 4][j];
 162                         #endif
 163                 }
 164         }
 165         for(i = 0; i < 4; i++) {
 166                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 167         }
 168 }
 169
 170 static
 171 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
 172         /* Mix the four bytes of every column in a linear way
 173          * This is the opposite operation of Mixcolumn
 174          */
 175         word8 b[4][MAXBC];
 176         int i, j;
 177
 178         for(j = 0; j < BC; j++) {
 179                 for(i = 0; i < 4; i++) {
 180                         #if             AES_MUL_BY_LOOKUP
 181                         b[i][j] = mulBy0x0e[a[i][j]]
 182                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 183                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 184                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 185                         #else
 186                         b[i][j] = mul(0xe,a[i][j])
 187                                 ^ mul(0xb,a[(i + 1) % 4][j])
 188                                 ^ mul(0xd,a[(i + 2) % 4][j])
 189                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 190                         #endif
 191                 }
 192         }
 193         for(i = 0; i < 4; i++) {
 194                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 195         }
 196 }
 197
 198 int rijndaelKeySched (
 199         word8 k[4][MAXKC],
 200         int keyBits,
 201         int blockBits,
 202         word8 W[MAXROUNDS+1][4][MAXBC]) {
 203
 204         /* Calculate the necessary round keys
 205          * The number of calculations depends on keyBits and blockBits
 206          */
 207         int KC, BC, ROUNDS;
 208         int i, j, t, rconpointer = 0;
 209         word8 tk[4][MAXKC];
 210
 211         switch (keyBits) {
 212         case 128: KC = 4; break;
 213         case 192: KC = 6; break;
 214         case 256: KC = 8; break;
 215         default : return (-1);
 216         }
 217
 218         switch (blockBits) {
 219         case 128: BC = 4; break;
 220         case 192: BC = 6; break;
 221         case 256: BC = 8; break;
 222         default : return (-2);
 223         }
 224
 225         switch (keyBits >= blockBits ? keyBits : blockBits) {
 226         case 128: ROUNDS = 10; break;
 227         case 192: ROUNDS = 12; break;
 228         case 256: ROUNDS = 14; break;
 229         default : return (-3); /* this cannot happen */
 230         }
 231
 232
 233         for(j = 0; j < KC; j++)
 234                 for(i = 0; i < 4; i++)
 235                         tk[i][j] = k[i][j];
 236         t = 0;
 237         /* copy values into round key array */
 238         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 239                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 240
 241         while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
 242                 /* calculate new values */
 243                 for(i = 0; i < 4; i++)
 244                         tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
 245                 tk[0][0] ^= rcon[rconpointer++];
 246
 247                 if (KC != 8)
 248                         for(j = 1; j < KC; j++)
 249                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 250                 else {
 251                         for(j = 1; j < KC/2; j++)
 252                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 253                         for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
 254                         for(j = KC/2 + 1; j < KC; j++)
 255                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 256         }
 257         /* copy values into round key array */
 258         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 259                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 260         }
 261
 262         return 0;
 263 }
 264
 265 int rijndaelEncrypt (
 266         word8 a[4][MAXBC],
 267         int keyBits,
 268         int blockBits,
 269         word8 rk[MAXROUNDS+1][4][MAXBC])
 270 {
 271         /* Encryption of one block, general case.
 272          */
 273         int r, BC, ROUNDS;
 274
 275         switch (blockBits) {
 276         case 128: BC = 4; break;
 277         case 192: BC = 6; break;
 278         case 256: BC = 8; break;
 279         default : return (-2);
 280         }
 281
 282         switch (keyBits >= blockBits ? keyBits : blockBits) {
 283         case 128: ROUNDS = 10; break;
 284         case 192: ROUNDS = 12; break;
 285         case 256: ROUNDS = 14; break;
 286         default : return (-3); /* this cannot happen */
 287         }
 288
 289         /* begin with a key addition
 290          */
 291         KeyAddition(a,rk[0],BC);
 292
 293         /* ROUNDS-1 ordinary rounds
 294          */
 295         for(r = 1; r < ROUNDS; r++) {
 296                 Substitution(a,S,BC);
 297                 ShiftRow(a,0,BC);
 298                 MixColumn(a,BC);
 299                 KeyAddition(a,rk[r],BC);
 300         }
 301
 302         /* Last round is special: there is no MixColumn
 303          */
 304         Substitution(a,S,BC);
 305         ShiftRow(a,0,BC);
 306         KeyAddition(a,rk[ROUNDS],BC);
 307
 308         return 0;
 309 }
 310
 311 int rijndaelDecrypt (
 312         word8 a[4][MAXBC],
 313         int keyBits,
 314         int blockBits,
 315         word8 rk[MAXROUNDS+1][4][MAXBC])
 316 {
 317         int r, BC, ROUNDS;
 318
 319         switch (blockBits) {
 320         case 128: BC = 4; break;
 321         case 192: BC = 6; break;
 322         case 256: BC = 8; break;
 323         default : return (-2);
 324         }
 325
 326         switch (keyBits >= blockBits ? keyBits : blockBits) {
 327         case 128: ROUNDS = 10; break;
 328         case 192: ROUNDS = 12; break;
 329         case 256: ROUNDS = 14; break;
 330         default : return (-3); /* this cannot happen */
 331         }
 332
 333         /* To decrypt: apply the inverse operations of the encrypt routine,
 334          *             in opposite order
 335          *
 336          * (KeyAddition is an involution: it 's equal to its inverse)
 337          * (the inverse of Substitution with table S is Substitution with the
 338          *  inverse table of S)
 339          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 340          */
 341
 342         /* First the special round:
 343          *   without InvMixColumn
 344          *   with extra KeyAddition
 345          */
 346         KeyAddition(a,rk[ROUNDS],BC);
 347         Substitution(a,Si,BC);
 348         ShiftRow(a,1,BC);
 349
 350         /* ROUNDS-1 ordinary rounds
 351          */
 352         for(r = ROUNDS-1; r > 0; r--) {
 353                 KeyAddition(a,rk[r],BC);
 354                 InvMixColumn(a,BC);
 355                 Substitution(a,Si,BC);
 356                 ShiftRow(a,1,BC);
 357         }
 358
 359         /* End with the extra key addition
 360          */
 361
 362         KeyAddition(a,rk[0],BC);
 363
 364         return 0;
 365 }
 366
 367 #if             !GLADMAN_AES_128_ENABLE
 368
 369 /*
 370  * All of these 128-bit-key-and-block routines require 32-bit word-aligned
 371  * char array pointers.ÊThe key schedule arrays are easy; they come from
 372  * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
 373  * Others require manual alignment of a local variable by the caller.
 374  */
 375
 376 static inline void KeyAddition128(
 377         word8 a[4][BC_128_OPT],
 378         word8 rk[4][MAXBC]) {
 379
 380         /* these casts are endian-independent */
 381         ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
 382         ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
 383         ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
 384         ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
 385 }
 386
 387 static void Substitution128(
 388         word8 a[4][BC_128_OPT],
 389         const word8 box[256]) {
 390         /* Replace every byte of the input by the byte at that place
 391          * in the nonlinear S-box
 392          */
 393         int i, j;
 394
 395         /* still to be optimized - larger S boxes? */
 396         for(i = 0; i < 4; i++) {
 397                 for(j = 0; j < BC_128_OPT; j++) {
 398                         a[i][j] = box[a[i][j]];
 399                 }
 400         }
 401 }
 402
 403 #if     defined(__ppc__) && defined(__GNUC__)
 404
 405 static inline void rotateWordLeft(
 406         word8 *word,                    // known to be word aligned
 407         unsigned rotCount)              // in bits
 408 {
 409         word32 lword = *((word32 *)word);
 410         asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
 411         *((word32 *)word) = lword;
 412 }
 413
 414 #else
 415
 416 /*
 417  * Insert your machine/compiler dependent code here,
 418  * or just use this, which works on any platform and compiler
 419  * which supports the __attribute__((aligned(4))) directive.
 420  */
 421 static void rotateWordLeft(
 422         word8 *word,                    // known to be word aligned
 423         unsigned rotCount)              // in bits
 424 {
 425         word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
 426         unsigned bytes = rotCount / 8;
 427
 428         tmp[0] = word[bytes     & (BC_128_OPT-1)];
 429         tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
 430         tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
 431         tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
 432         *((word32 *)word) = *((word32 *)tmp);
 433 }
 434 #endif
 435
 436 static inline void ShiftRow128(
 437         word8 a[4][BC_128_OPT],
 438         word8 d) {
 439         /* Row 0 remains unchanged
 440          * The other three rows are shifted (actually rotated) a variable amount
 441          */
 442         int i;
 443
 444         for(i = 1; i < 4; i++) {
 445                 rotateWordLeft(a[i], shifts128[i][d]);
 446         }
 447 }
 448
 449 /*
 450  * The following two routines are where most of the time is spent in this
 451  * module. Further optimization would have to focus here.
 452  */
 453 static void MixColumn128(word8 a[4][BC_128_OPT]) {
 454         /* Mix the four bytes of every column in a linear way
 455          */
 456         word8 b[4][BC_128_OPT];
 457         int i, j;
 458
 459         for(j = 0; j < BC_128_OPT; j++) {
 460                 for(i = 0; i < 4; i++) {
 461                         #if             AES_MUL_BY_LOOKUP
 462                         b[i][j] = mulBy0x02[a[i][j]]
 463                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 464                                 ^ a[(i + 2) % 4][j]
 465                                 ^ a[(i + 3) % 4][j];
 466                         #else
 467                         b[i][j] = mul(2,a[i][j])
 468                                 ^ mul(3,a[(i + 1) % 4][j])
 469                                 ^ a[(i + 2) % 4][j]
 470                                 ^ a[(i + 3) % 4][j];
 471                         #endif
 472                 }
 473         }
 474         memmove(a, b, 4 * BC_128_OPT);
 475 }
 476
 477 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
 478         /* Mix the four bytes of every column in a linear way
 479          * This is the opposite operation of Mixcolumn
 480          */
 481         word8 b[4][BC_128_OPT];
 482         int i, j;
 483
 484         for(j = 0; j < BC_128_OPT; j++) {
 485                 for(i = 0; i < 4; i++) {
 486                         #if             AES_MUL_BY_LOOKUP
 487                         b[i][j] = mulBy0x0e[a[i][j]]
 488                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 489                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 490                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 491                         #else
 492                         b[i][j] = mul(0xe,a[i][j])
 493                                 ^ mul(0xb,a[(i + 1) % 4][j])
 494                                 ^ mul(0xd,a[(i + 2) % 4][j])
 495                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 496                         #endif
 497                 }
 498         }
 499         memmove(a, b, 4 * BC_128_OPT);
 500 }
 501
 502 int rijndaelKeySched128 (
 503         word8 k[4][KC_128_OPT],
 504         word8 W[MAXROUNDS+1][4][MAXBC]) {
 505
 506         /* Calculate the necessary round keys
 507          * The number of calculations depends on keyBits and blockBits
 508          */
 509         int i, j, t, rconpointer = 0;
 510         word8 tk[4][KC_128_OPT];
 511         unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
 512
 513         for(j = 0; j < KC_128_OPT; j++)
 514                 for(i = 0; i < 4; i++)
 515                         tk[i][j] = k[i][j];
 516         t = 0;
 517         /* copy values into round key array */
 518         for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 519                 for(i = 0; i < 4; i++) {
 520                         W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 521                 }
 522         }
 523
 524         while (t < numSchedRows) {
 525                 /* while not enough round key material calculated */
 526                 /* calculate new values */
 527                 for(i = 0; i < 4; i++) {
 528                         tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
 529                 }
 530                 tk[0][0] ^= rcon[rconpointer++];
 531
 532                 for(j = 1; j < KC_128_OPT; j++) {
 533                         for(i = 0; i < 4; i++) {
 534                                 tk[i][j] ^= tk[i][j-1];
 535                         }
 536                 }
 537
 538                 /* copy values into round key array */
 539                 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 540                         for(i = 0; i < 4; i++) {
 541                                 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 542                         }
 543                 }
 544         }
 545
 546         return 0;
 547 }
 548
 549 int rijndaelEncrypt128 (
 550         word8 a[4][BC_128_OPT],
 551         word8 rk[MAXROUNDS+1][4][MAXBC])
 552 {
 553         /* Encryption of one block.
 554          */
 555         int r;
 556
 557         /* begin with a key addition
 558          */
 559         KeyAddition128(a,rk[0]);
 560
 561         /* ROUNDS-1 ordinary rounds
 562          */
 563         for(r = 1; r < ROUNDS_128_OPT; r++) {
 564                 Substitution128(a,S);
 565                 ShiftRow128(a,0);
 566                 MixColumn128(a);
 567                 KeyAddition128(a,rk[r]);
 568         }
 569
 570         /* Last round is special: there is no MixColumn
 571          */
 572         Substitution128(a,S);
 573         ShiftRow128(a,0);
 574         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 575
 576         return 0;
 577 }
 578
 579 int rijndaelDecrypt128 (
 580         word8 a[4][BC_128_OPT],
 581         word8 rk[MAXROUNDS+1][4][MAXBC])
 582 {
 583         int r;
 584
 585         /* To decrypt: apply the inverse operations of the encrypt routine,
 586          *             in opposite order
 587          *
 588          * (KeyAddition is an involution: it 's equal to its inverse)
 589          * (the inverse of Substitution with table S is Substitution with the
 590          *  inverse table of S)
 591          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 592          */
 593
 594         /* First the special round:
 595          *   without InvMixColumn
 596          *   with extra KeyAddition
 597          */
 598         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 599         Substitution128(a,Si);
 600         ShiftRow128(a,1);
 601
 602         /* ROUNDS-1 ordinary rounds
 603          */
 604         for(r = ROUNDS_128_OPT-1; r > 0; r--) {
 605                 KeyAddition128(a,rk[r]);
 606                 InvMixColumn128(a);
 607                 Substitution128(a,Si);
 608                 ShiftRow128(a,1);
 609         }
 610
 611         /* End with the extra key addition
 612          */
 613
 614         KeyAddition128(a,rk[0]);
 615
 616         return 0;
 617 }
 618
 619 #endif          /* !GLADMAN_AES_128_ENABLE */
 620