OSX/libsecurity_apple_csp/lib/rijndael-alg-ref.c

   1 /*
   2  * Copyright (c) 2000-2001,2011-2012,2014 Apple Inc. All Rights Reserved.
   3  *
   4  * The contents of this file constitute Original Code as defined in and are
   5  * subject to the Apple Public Source License Version 1.2 (the 'License').
   6  * You may not use this file except in compliance with the License. Please obtain
   7  * a copy of the License at http://www.apple.com/publicsource and read it before
   8  * using this file.
   9  *
  10  * This Original Code and all software distributed under the License are
  11  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
  12  * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
  13  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  14  * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
  15  * specific language governing rights and limitations under the License.
  16  */
  17
  18
  19 /* rijndael-alg-ref.c   v2.0   August '99
  20  * Reference ANSI C code
  21  * authors: Paulo Barreto
  22  *          Vincent Rijmen
  23  *
  24  */
  25
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29
  30 #include "rijndael-alg-ref.h"
  31 #include <cspdebugging.h>
  32
  33 #define SC      ((BC - 4) >> 1)
  34
  35 #include "boxes-ref.h"
  36
  37 static const word8 shifts[3][4][2] = {
  38  { { 0, 0 },
  39    { 1, 3 },
  40    { 2, 2 },
  41    { 3, 1 }
  42  },
  43  { { 0, 0 },
  44    { 1, 5 },
  45    { 2, 4 },
  46    { 3, 3 }
  47  },
  48  { { 0, 0 },
  49    { 1, 7 },
  50    { 3, 5 },
  51    { 4, 4 }
  52  }
  53 };
  54
  55 #if     !GLADMAN_AES_128_ENABLE
  56
  57 /* 128 bit key/word shift table in bits */
  58 static const word8 shifts128[4][2] = {
  59  { 0,  0 },
  60  { 8,  24 },
  61  { 16, 16 },
  62  { 24, 8 }
  63 };
  64
  65 #endif  /* GLADMAN_AES_128_ENABLE */
  66
  67 #if             !AES_MUL_BY_LOOKUP
  68 /*
  69  * Profiling measurements showed that the mul routine is where a large propertion of
  70  * the time is spent. Since the first argument to  mul is always one of six
  71  * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
  72  * do the multiplies. This eliminates the need for the log/antilog tables, so
  73  * it's only adding one kilobyte of const data. Throughput improvement for this
  74  * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
  75  * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
  76  * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
  77  * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
  78  * on a 500 MHz G4 with 1 MB of L2 cache.
  79  */
  80
  81 /*
  82  * The mod 255 op in mul is really expensive...
  83  *
  84  * We know that b <= (254 * 2), so there are only two cases. Either return b,
  85  * or return b-255.
  86  *
  87  * On a G4 this single optimization results in a 24% speedup for encrypt and
  88  * a 25% speedup for decrypt.
  89  */
  90 static inline word8 mod255(word32 b)
  91 {
  92         if(b >= 255) {
  93                 b -= 255;
  94         }
  95         return b;
  96 }
  97
  98 word8 mul(word8 a, word8 b) {
  99    /* multiply two elements of GF(2^m)
 100     * needed for MixColumn and InvMixColumn
 101     */
 102         if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
 103         else return 0;
 104 }
 105 #endif  /* !AES_MUL_BY_LOOKUP */
 106
 107 static
 108 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
 109         /* Exor corresponding text input and round key input bytes
 110          */
 111         int i, j;
 112
 113         for(i = 0; i < 4; i++)
 114                 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
 115 }
 116
 117 static
 118 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
 119         /* Row 0 remains unchanged
 120          * The other three rows are shifted a variable amount
 121          */
 122         word8 tmp[MAXBC];
 123         int i, j;
 124
 125         for(i = 1; i < 4; i++) {
 126                 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
 127                 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
 128         }
 129 }
 130
 131 static
 132 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
 133         /* Replace every byte of the input by the byte at that place
 134          * in the nonlinear S-box
 135          */
 136         int i, j;
 137
 138         for(i = 0; i < 4; i++)
 139                 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
 140 }
 141
 142 static
 143 void MixColumn(word8 a[4][MAXBC], word8 BC) {
 144         /* Mix the four bytes of every column in a linear way
 145          */
 146         word8 b[4][MAXBC];
 147         int i, j;
 148
 149         for(j = 0; j < BC; j++) {
 150                 for(i = 0; i < 4; i++) {
 151                         #if             AES_MUL_BY_LOOKUP
 152                         b[i][j] = mulBy0x02[a[i][j]]
 153                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 154                                 ^ a[(i + 2) % 4][j]
 155                                 ^ a[(i + 3) % 4][j];
 156                         #else
 157                         b[i][j] = mul(2,a[i][j])
 158                                 ^ mul(3,a[(i + 1) % 4][j])
 159                                 ^ a[(i + 2) % 4][j]
 160                                 ^ a[(i + 3) % 4][j];
 161                         #endif
 162                 }
 163         }
 164         for(i = 0; i < 4; i++) {
 165                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 166         }
 167 }
 168
 169 static
 170 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
 171         /* Mix the four bytes of every column in a linear way
 172          * This is the opposite operation of Mixcolumn
 173          */
 174         word8 b[4][MAXBC];
 175         int i, j;
 176
 177         for(j = 0; j < BC; j++) {
 178                 for(i = 0; i < 4; i++) {
 179                         #if             AES_MUL_BY_LOOKUP
 180                         b[i][j] = mulBy0x0e[a[i][j]]
 181                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 182                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 183                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 184                         #else
 185                         b[i][j] = mul(0xe,a[i][j])
 186                                 ^ mul(0xb,a[(i + 1) % 4][j])
 187                                 ^ mul(0xd,a[(i + 2) % 4][j])
 188                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 189                         #endif
 190                 }
 191         }
 192         for(i = 0; i < 4; i++) {
 193                 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
 194         }
 195 }
 196
 197 int rijndaelKeySched (
 198         word8 k[4][MAXKC],
 199         int keyBits,
 200         int blockBits,
 201         word8 W[MAXROUNDS+1][4][MAXBC]) {
 202
 203         /* Calculate the necessary round keys
 204          * The number of calculations depends on keyBits and blockBits
 205          */
 206         int KC, BC, ROUNDS;
 207         int i, j, t, rconpointer = 0;
 208         word8 tk[4][MAXKC];
 209
 210         switch (keyBits) {
 211         case 128: KC = 4; break;
 212         case 192: KC = 6; break;
 213         case 256: KC = 8; break;
 214         default : return (-1);
 215         }
 216
 217         switch (blockBits) {
 218         case 128: BC = 4; break;
 219         case 192: BC = 6; break;
 220         case 256: BC = 8; break;
 221         default : return (-2);
 222         }
 223
 224         switch (keyBits >= blockBits ? keyBits : blockBits) {
 225         case 128: ROUNDS = 10; break;
 226         case 192: ROUNDS = 12; break;
 227         case 256: ROUNDS = 14; break;
 228         default : return (-3); /* this cannot happen */
 229         }
 230
 231
 232         for(j = 0; j < KC; j++)
 233                 for(i = 0; i < 4; i++)
 234                         tk[i][j] = k[i][j];
 235         t = 0;
 236         /* copy values into round key array */
 237         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 238                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 239
 240         while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
 241                 /* calculate new values */
 242                 for(i = 0; i < 4; i++)
 243                         tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
 244                 tk[0][0] ^= rcon[rconpointer++];
 245
 246                 if (KC != 8)
 247                         for(j = 1; j < KC; j++)
 248                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 249                 else {
 250                         for(j = 1; j < KC/2; j++)
 251                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 252                         for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
 253                         for(j = KC/2 + 1; j < KC; j++)
 254                                 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
 255         }
 256         /* copy values into round key array */
 257         for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
 258                 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
 259         }
 260
 261         return 0;
 262 }
 263
 264 int rijndaelEncrypt (
 265         word8 a[4][MAXBC],
 266         int keyBits,
 267         int blockBits,
 268         word8 rk[MAXROUNDS+1][4][MAXBC])
 269 {
 270         /* Encryption of one block, general case.
 271          */
 272         int r, BC, ROUNDS;
 273
 274         switch (blockBits) {
 275         case 128: BC = 4; break;
 276         case 192: BC = 6; break;
 277         case 256: BC = 8; break;
 278         default : return (-2);
 279         }
 280
 281         switch (keyBits >= blockBits ? keyBits : blockBits) {
 282         case 128: ROUNDS = 10; break;
 283         case 192: ROUNDS = 12; break;
 284         case 256: ROUNDS = 14; break;
 285         default : return (-3); /* this cannot happen */
 286         }
 287
 288         /* begin with a key addition
 289          */
 290         KeyAddition(a,rk[0],BC);
 291
 292         /* ROUNDS-1 ordinary rounds
 293          */
 294         for(r = 1; r < ROUNDS; r++) {
 295                 Substitution(a,S,BC);
 296                 ShiftRow(a,0,BC);
 297                 MixColumn(a,BC);
 298                 KeyAddition(a,rk[r],BC);
 299         }
 300
 301         /* Last round is special: there is no MixColumn
 302          */
 303         Substitution(a,S,BC);
 304         ShiftRow(a,0,BC);
 305         KeyAddition(a,rk[ROUNDS],BC);
 306
 307         return 0;
 308 }
 309
 310 int rijndaelDecrypt (
 311         word8 a[4][MAXBC],
 312         int keyBits,
 313         int blockBits,
 314         word8 rk[MAXROUNDS+1][4][MAXBC])
 315 {
 316         int r, BC, ROUNDS;
 317
 318         switch (blockBits) {
 319         case 128: BC = 4; break;
 320         case 192: BC = 6; break;
 321         case 256: BC = 8; break;
 322         default : return (-2);
 323         }
 324
 325         switch (keyBits >= blockBits ? keyBits : blockBits) {
 326         case 128: ROUNDS = 10; break;
 327         case 192: ROUNDS = 12; break;
 328         case 256: ROUNDS = 14; break;
 329         default : return (-3); /* this cannot happen */
 330         }
 331
 332         /* To decrypt: apply the inverse operations of the encrypt routine,
 333          *             in opposite order
 334          *
 335          * (KeyAddition is an involution: it 's equal to its inverse)
 336          * (the inverse of Substitution with table S is Substitution with the
 337          *  inverse table of S)
 338          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 339          */
 340
 341         /* First the special round:
 342          *   without InvMixColumn
 343          *   with extra KeyAddition
 344          */
 345         KeyAddition(a,rk[ROUNDS],BC);
 346         Substitution(a,Si,BC);
 347         ShiftRow(a,1,BC);
 348
 349         /* ROUNDS-1 ordinary rounds
 350          */
 351         for(r = ROUNDS-1; r > 0; r--) {
 352                 KeyAddition(a,rk[r],BC);
 353                 InvMixColumn(a,BC);
 354                 Substitution(a,Si,BC);
 355                 ShiftRow(a,1,BC);
 356         }
 357
 358         /* End with the extra key addition
 359          */
 360
 361         KeyAddition(a,rk[0],BC);
 362
 363         return 0;
 364 }
 365
 366 #if             !GLADMAN_AES_128_ENABLE
 367
 368 /*
 369  * All of these 128-bit-key-and-block routines require 32-bit word-aligned
 370  * char array pointers.ÊThe key schedule arrays are easy; they come from
 371  * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
 372  * Others require manual alignment of a local variable by the caller.
 373  */
 374
 375 static inline void KeyAddition128(
 376         word8 a[4][BC_128_OPT],
 377         word8 rk[4][MAXBC]) {
 378
 379         /* these casts are endian-independent */
 380         ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
 381         ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
 382         ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
 383         ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
 384 }
 385
 386 static void Substitution128(
 387         word8 a[4][BC_128_OPT],
 388         const word8 box[256]) {
 389         /* Replace every byte of the input by the byte at that place
 390          * in the nonlinear S-box
 391          */
 392         int i, j;
 393
 394         /* still to be optimized - larger S boxes? */
 395         for(i = 0; i < 4; i++) {
 396                 for(j = 0; j < BC_128_OPT; j++) {
 397                         a[i][j] = box[a[i][j]];
 398                 }
 399         }
 400 }
 401
 402 #if     defined(__ppc__) && defined(__GNUC__)
 403
 404 static inline void rotateWordLeft(
 405         word8 *word,                    // known to be word aligned
 406         unsigned rotCount)              // in bits
 407 {
 408         word32 lword = *((word32 *)word);
 409         asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
 410         *((word32 *)word) = lword;
 411 }
 412
 413 #else
 414
 415 /*
 416  * Insert your machine/compiler dependent code here,
 417  * or just use this, which works on any platform and compiler
 418  * which supports the __attribute__((aligned(4))) directive.
 419  */
 420 static void rotateWordLeft(
 421         word8 *word,                    // known to be word aligned
 422         unsigned rotCount)              // in bits
 423 {
 424         word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
 425         unsigned bytes = rotCount / 8;
 426
 427         tmp[0] = word[bytes     & (BC_128_OPT-1)];
 428         tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
 429         tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
 430         tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
 431         *((word32 *)word) = *((word32 *)tmp);
 432 }
 433 #endif
 434
 435 static inline void ShiftRow128(
 436         word8 a[4][BC_128_OPT],
 437         word8 d) {
 438         /* Row 0 remains unchanged
 439          * The other three rows are shifted (actually rotated) a variable amount
 440          */
 441         int i;
 442
 443         for(i = 1; i < 4; i++) {
 444                 rotateWordLeft(a[i], shifts128[i][d]);
 445         }
 446 }
 447
 448 /*
 449  * The following two routines are where most of the time is spent in this
 450  * module. Further optimization would have to focus here.
 451  */
 452 static void MixColumn128(word8 a[4][BC_128_OPT]) {
 453         /* Mix the four bytes of every column in a linear way
 454          */
 455         word8 b[4][BC_128_OPT];
 456         int i, j;
 457
 458         for(j = 0; j < BC_128_OPT; j++) {
 459                 for(i = 0; i < 4; i++) {
 460                         #if             AES_MUL_BY_LOOKUP
 461                         b[i][j] = mulBy0x02[a[i][j]]
 462                                 ^ mulBy0x03[a[(i + 1) % 4][j]]
 463                                 ^ a[(i + 2) % 4][j]
 464                                 ^ a[(i + 3) % 4][j];
 465                         #else
 466                         b[i][j] = mul(2,a[i][j])
 467                                 ^ mul(3,a[(i + 1) % 4][j])
 468                                 ^ a[(i + 2) % 4][j]
 469                                 ^ a[(i + 3) % 4][j];
 470                         #endif
 471                 }
 472         }
 473         memmove(a, b, 4 * BC_128_OPT);
 474 }
 475
 476 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
 477         /* Mix the four bytes of every column in a linear way
 478          * This is the opposite operation of Mixcolumn
 479          */
 480         word8 b[4][BC_128_OPT];
 481         int i, j;
 482
 483         for(j = 0; j < BC_128_OPT; j++) {
 484                 for(i = 0; i < 4; i++) {
 485                         #if             AES_MUL_BY_LOOKUP
 486                         b[i][j] = mulBy0x0e[a[i][j]]
 487                                 ^ mulBy0x0b[a[(i + 1) % 4][j]]
 488                                 ^ mulBy0x0d[a[(i + 2) % 4][j]]
 489                                 ^ mulBy0x09[a[(i + 3) % 4][j]];
 490                         #else
 491                         b[i][j] = mul(0xe,a[i][j])
 492                                 ^ mul(0xb,a[(i + 1) % 4][j])
 493                                 ^ mul(0xd,a[(i + 2) % 4][j])
 494                                 ^ mul(0x9,a[(i + 3) % 4][j]);
 495                         #endif
 496                 }
 497         }
 498         memmove(a, b, 4 * BC_128_OPT);
 499 }
 500
 501 int rijndaelKeySched128 (
 502         word8 k[4][KC_128_OPT],
 503         word8 W[MAXROUNDS+1][4][MAXBC]) {
 504
 505         /* Calculate the necessary round keys
 506          * The number of calculations depends on keyBits and blockBits
 507          */
 508         int i, j, t, rconpointer = 0;
 509         word8 tk[4][KC_128_OPT];
 510         unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
 511
 512         for(j = 0; j < KC_128_OPT; j++)
 513                 for(i = 0; i < 4; i++)
 514                         tk[i][j] = k[i][j];
 515         t = 0;
 516         /* copy values into round key array */
 517         for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 518                 for(i = 0; i < 4; i++) {
 519                         W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 520                 }
 521         }
 522
 523         while (t < numSchedRows) {
 524                 /* while not enough round key material calculated */
 525                 /* calculate new values */
 526                 for(i = 0; i < 4; i++) {
 527                         tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
 528                 }
 529                 tk[0][0] ^= rcon[rconpointer++];
 530
 531                 for(j = 1; j < KC_128_OPT; j++) {
 532                         for(i = 0; i < 4; i++) {
 533                                 tk[i][j] ^= tk[i][j-1];
 534                         }
 535                 }
 536
 537                 /* copy values into round key array */
 538                 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
 539                         for(i = 0; i < 4; i++) {
 540                                 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
 541                         }
 542                 }
 543         }
 544
 545         return 0;
 546 }
 547
 548 int rijndaelEncrypt128 (
 549         word8 a[4][BC_128_OPT],
 550         word8 rk[MAXROUNDS+1][4][MAXBC])
 551 {
 552         /* Encryption of one block.
 553          */
 554         int r;
 555
 556         /* begin with a key addition
 557          */
 558         KeyAddition128(a,rk[0]);
 559
 560         /* ROUNDS-1 ordinary rounds
 561          */
 562         for(r = 1; r < ROUNDS_128_OPT; r++) {
 563                 Substitution128(a,S);
 564                 ShiftRow128(a,0);
 565                 MixColumn128(a);
 566                 KeyAddition128(a,rk[r]);
 567         }
 568
 569         /* Last round is special: there is no MixColumn
 570          */
 571         Substitution128(a,S);
 572         ShiftRow128(a,0);
 573         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 574
 575         return 0;
 576 }
 577
 578 int rijndaelDecrypt128 (
 579         word8 a[4][BC_128_OPT],
 580         word8 rk[MAXROUNDS+1][4][MAXBC])
 581 {
 582         int r;
 583
 584         /* To decrypt: apply the inverse operations of the encrypt routine,
 585          *             in opposite order
 586          *
 587          * (KeyAddition is an involution: it 's equal to its inverse)
 588          * (the inverse of Substitution with table S is Substitution with the
 589          *  inverse table of S)
 590          * (the inverse of Shiftrow is Shiftrow over a suitable distance)
 591          */
 592
 593         /* First the special round:
 594          *   without InvMixColumn
 595          *   with extra KeyAddition
 596          */
 597         KeyAddition128(a,rk[ROUNDS_128_OPT]);
 598         Substitution128(a,Si);
 599         ShiftRow128(a,1);
 600
 601         /* ROUNDS-1 ordinary rounds
 602          */
 603         for(r = ROUNDS_128_OPT-1; r > 0; r--) {
 604                 KeyAddition128(a,rk[r]);
 605                 InvMixColumn128(a);
 606                 Substitution128(a,Si);
 607                 ShiftRow128(a,1);
 608         }
 609
 610         /* End with the extra key addition
 611          */
 612
 613         KeyAddition128(a,rk[0]);
 614
 615         return 0;
 616 }
 617
 618 #endif          /* !GLADMAN_AES_128_ENABLE */
 619