]> git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_apple_csp/lib/rijndael-alg-ref.c
Security-57337.40.85.tar.gz
[apple/security.git] / OSX / libsecurity_apple_csp / lib / rijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001,2011-2012,2014 Apple Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /* rijndael-alg-ref.c v2.0 August '99
20 * Reference ANSI C code
21 * authors: Paulo Barreto
22 * Vincent Rijmen
23 *
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "rijndael-alg-ref.h"
31 #include <cspdebugging.h>
32
33 #define SC ((BC - 4) >> 1)
34
35 #include "boxes-ref.h"
36
37 static const word8 shifts[3][4][2] = {
38 { { 0, 0 },
39 { 1, 3 },
40 { 2, 2 },
41 { 3, 1 }
42 },
43 { { 0, 0 },
44 { 1, 5 },
45 { 2, 4 },
46 { 3, 3 }
47 },
48 { { 0, 0 },
49 { 1, 7 },
50 { 3, 5 },
51 { 4, 4 }
52 }
53 };
54
55 #if !GLADMAN_AES_128_ENABLE
56
57 /* 128 bit key/word shift table in bits */
58 static const word8 shifts128[4][2] = {
59 { 0, 0 },
60 { 8, 24 },
61 { 16, 16 },
62 { 24, 8 }
63 };
64
65 #endif /* GLADMAN_AES_128_ENABLE */
66
67 #if !AES_MUL_BY_LOOKUP
68 /*
69 * Profiling measurements showed that the mul routine is where a large propertion of
70 * the time is spent. Since the first argument to mul is always one of six
71 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
72 * do the multiplies. This eliminates the need for the log/antilog tables, so
73 * it's only adding one kilobyte of const data. Throughput improvement for this
74 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
75 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
76 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
77 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
78 * on a 500 MHz G4 with 1 MB of L2 cache.
79 */
80
81 /*
82 * The mod 255 op in mul is really expensive...
83 *
84 * We know that b <= (254 * 2), so there are only two cases. Either return b,
85 * or return b-255.
86 *
87 * On a G4 this single optimization results in a 24% speedup for encrypt and
88 * a 25% speedup for decrypt.
89 */
90 static inline word8 mod255(word32 b)
91 {
92 if(b >= 255) {
93 b -= 255;
94 }
95 return b;
96 }
97
98 word8 mul(word8 a, word8 b) {
99 /* multiply two elements of GF(2^m)
100 * needed for MixColumn and InvMixColumn
101 */
102 if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
103 else return 0;
104 }
105 #endif /* !AES_MUL_BY_LOOKUP */
106
107 static
108 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
109 /* Exor corresponding text input and round key input bytes
110 */
111 int i, j;
112
113 for(i = 0; i < 4; i++)
114 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
115 }
116
117 static
118 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
119 /* Row 0 remains unchanged
120 * The other three rows are shifted a variable amount
121 */
122 word8 tmp[MAXBC];
123 int i, j;
124
125 for(i = 1; i < 4; i++) {
126 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
127 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
128 }
129 }
130
131 static
132 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
133 /* Replace every byte of the input by the byte at that place
134 * in the nonlinear S-box
135 */
136 int i, j;
137
138 for(i = 0; i < 4; i++)
139 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
140 }
141
142 static
143 void MixColumn(word8 a[4][MAXBC], word8 BC) {
144 /* Mix the four bytes of every column in a linear way
145 */
146 word8 b[4][MAXBC];
147 int i, j;
148
149 for(j = 0; j < BC; j++) {
150 for(i = 0; i < 4; i++) {
151 #if AES_MUL_BY_LOOKUP
152 b[i][j] = mulBy0x02[a[i][j]]
153 ^ mulBy0x03[a[(i + 1) % 4][j]]
154 ^ a[(i + 2) % 4][j]
155 ^ a[(i + 3) % 4][j];
156 #else
157 b[i][j] = mul(2,a[i][j])
158 ^ mul(3,a[(i + 1) % 4][j])
159 ^ a[(i + 2) % 4][j]
160 ^ a[(i + 3) % 4][j];
161 #endif
162 }
163 }
164 for(i = 0; i < 4; i++) {
165 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
166 }
167 }
168
169 static
170 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
171 /* Mix the four bytes of every column in a linear way
172 * This is the opposite operation of Mixcolumn
173 */
174 word8 b[4][MAXBC];
175 int i, j;
176
177 for(j = 0; j < BC; j++) {
178 for(i = 0; i < 4; i++) {
179 #if AES_MUL_BY_LOOKUP
180 b[i][j] = mulBy0x0e[a[i][j]]
181 ^ mulBy0x0b[a[(i + 1) % 4][j]]
182 ^ mulBy0x0d[a[(i + 2) % 4][j]]
183 ^ mulBy0x09[a[(i + 3) % 4][j]];
184 #else
185 b[i][j] = mul(0xe,a[i][j])
186 ^ mul(0xb,a[(i + 1) % 4][j])
187 ^ mul(0xd,a[(i + 2) % 4][j])
188 ^ mul(0x9,a[(i + 3) % 4][j]);
189 #endif
190 }
191 }
192 for(i = 0; i < 4; i++) {
193 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
194 }
195 }
196
197 int rijndaelKeySched (
198 word8 k[4][MAXKC],
199 int keyBits,
200 int blockBits,
201 word8 W[MAXROUNDS+1][4][MAXBC]) {
202
203 /* Calculate the necessary round keys
204 * The number of calculations depends on keyBits and blockBits
205 */
206 int KC, BC, ROUNDS;
207 int i, j, t, rconpointer = 0;
208 word8 tk[4][MAXKC];
209
210 switch (keyBits) {
211 case 128: KC = 4; break;
212 case 192: KC = 6; break;
213 case 256: KC = 8; break;
214 default : return (-1);
215 }
216
217 switch (blockBits) {
218 case 128: BC = 4; break;
219 case 192: BC = 6; break;
220 case 256: BC = 8; break;
221 default : return (-2);
222 }
223
224 switch (keyBits >= blockBits ? keyBits : blockBits) {
225 case 128: ROUNDS = 10; break;
226 case 192: ROUNDS = 12; break;
227 case 256: ROUNDS = 14; break;
228 default : return (-3); /* this cannot happen */
229 }
230
231
232 for(j = 0; j < KC; j++)
233 for(i = 0; i < 4; i++)
234 tk[i][j] = k[i][j];
235 t = 0;
236 /* copy values into round key array */
237 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
238 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
239
240 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
241 /* calculate new values */
242 for(i = 0; i < 4; i++)
243 tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
244 tk[0][0] ^= rcon[rconpointer++];
245
246 if (KC != 8)
247 for(j = 1; j < KC; j++)
248 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
249 else {
250 for(j = 1; j < KC/2; j++)
251 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
252 for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
253 for(j = KC/2 + 1; j < KC; j++)
254 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
255 }
256 /* copy values into round key array */
257 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
258 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
259 }
260
261 return 0;
262 }
263
264 int rijndaelEncrypt (
265 word8 a[4][MAXBC],
266 int keyBits,
267 int blockBits,
268 word8 rk[MAXROUNDS+1][4][MAXBC])
269 {
270 /* Encryption of one block, general case.
271 */
272 int r, BC, ROUNDS;
273
274 switch (blockBits) {
275 case 128: BC = 4; break;
276 case 192: BC = 6; break;
277 case 256: BC = 8; break;
278 default : return (-2);
279 }
280
281 switch (keyBits >= blockBits ? keyBits : blockBits) {
282 case 128: ROUNDS = 10; break;
283 case 192: ROUNDS = 12; break;
284 case 256: ROUNDS = 14; break;
285 default : return (-3); /* this cannot happen */
286 }
287
288 /* begin with a key addition
289 */
290 KeyAddition(a,rk[0],BC);
291
292 /* ROUNDS-1 ordinary rounds
293 */
294 for(r = 1; r < ROUNDS; r++) {
295 Substitution(a,S,BC);
296 ShiftRow(a,0,BC);
297 MixColumn(a,BC);
298 KeyAddition(a,rk[r],BC);
299 }
300
301 /* Last round is special: there is no MixColumn
302 */
303 Substitution(a,S,BC);
304 ShiftRow(a,0,BC);
305 KeyAddition(a,rk[ROUNDS],BC);
306
307 return 0;
308 }
309
310 int rijndaelDecrypt (
311 word8 a[4][MAXBC],
312 int keyBits,
313 int blockBits,
314 word8 rk[MAXROUNDS+1][4][MAXBC])
315 {
316 int r, BC, ROUNDS;
317
318 switch (blockBits) {
319 case 128: BC = 4; break;
320 case 192: BC = 6; break;
321 case 256: BC = 8; break;
322 default : return (-2);
323 }
324
325 switch (keyBits >= blockBits ? keyBits : blockBits) {
326 case 128: ROUNDS = 10; break;
327 case 192: ROUNDS = 12; break;
328 case 256: ROUNDS = 14; break;
329 default : return (-3); /* this cannot happen */
330 }
331
332 /* To decrypt: apply the inverse operations of the encrypt routine,
333 * in opposite order
334 *
335 * (KeyAddition is an involution: it 's equal to its inverse)
336 * (the inverse of Substitution with table S is Substitution with the
337 * inverse table of S)
338 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
339 */
340
341 /* First the special round:
342 * without InvMixColumn
343 * with extra KeyAddition
344 */
345 KeyAddition(a,rk[ROUNDS],BC);
346 Substitution(a,Si,BC);
347 ShiftRow(a,1,BC);
348
349 /* ROUNDS-1 ordinary rounds
350 */
351 for(r = ROUNDS-1; r > 0; r--) {
352 KeyAddition(a,rk[r],BC);
353 InvMixColumn(a,BC);
354 Substitution(a,Si,BC);
355 ShiftRow(a,1,BC);
356 }
357
358 /* End with the extra key addition
359 */
360
361 KeyAddition(a,rk[0],BC);
362
363 return 0;
364 }
365
366 #if !GLADMAN_AES_128_ENABLE
367
368 /*
369 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
370 * char array pointers.ÊThe key schedule arrays are easy; they come from
371 * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
372 * Others require manual alignment of a local variable by the caller.
373 */
374
375 static inline void KeyAddition128(
376 word8 a[4][BC_128_OPT],
377 word8 rk[4][MAXBC]) {
378
379 /* these casts are endian-independent */
380 ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
381 ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
382 ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
383 ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
384 }
385
386 static void Substitution128(
387 word8 a[4][BC_128_OPT],
388 const word8 box[256]) {
389 /* Replace every byte of the input by the byte at that place
390 * in the nonlinear S-box
391 */
392 int i, j;
393
394 /* still to be optimized - larger S boxes? */
395 for(i = 0; i < 4; i++) {
396 for(j = 0; j < BC_128_OPT; j++) {
397 a[i][j] = box[a[i][j]];
398 }
399 }
400 }
401
402 #if defined(__ppc__) && defined(__GNUC__)
403
404 static inline void rotateWordLeft(
405 word8 *word, // known to be word aligned
406 unsigned rotCount) // in bits
407 {
408 word32 lword = *((word32 *)word);
409 asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
410 *((word32 *)word) = lword;
411 }
412
413 #else
414
415 /*
416 * Insert your machine/compiler dependent code here,
417 * or just use this, which works on any platform and compiler
418 * which supports the __attribute__((aligned(4))) directive.
419 */
420 static void rotateWordLeft(
421 word8 *word, // known to be word aligned
422 unsigned rotCount) // in bits
423 {
424 word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
425 unsigned bytes = rotCount / 8;
426
427 tmp[0] = word[bytes & (BC_128_OPT-1)];
428 tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
429 tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
430 tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
431 *((word32 *)word) = *((word32 *)tmp);
432 }
433 #endif
434
435 static inline void ShiftRow128(
436 word8 a[4][BC_128_OPT],
437 word8 d) {
438 /* Row 0 remains unchanged
439 * The other three rows are shifted (actually rotated) a variable amount
440 */
441 int i;
442
443 for(i = 1; i < 4; i++) {
444 rotateWordLeft(a[i], shifts128[i][d]);
445 }
446 }
447
448 /*
449 * The following two routines are where most of the time is spent in this
450 * module. Further optimization would have to focus here.
451 */
452 static void MixColumn128(word8 a[4][BC_128_OPT]) {
453 /* Mix the four bytes of every column in a linear way
454 */
455 word8 b[4][BC_128_OPT];
456 int i, j;
457
458 for(j = 0; j < BC_128_OPT; j++) {
459 for(i = 0; i < 4; i++) {
460 #if AES_MUL_BY_LOOKUP
461 b[i][j] = mulBy0x02[a[i][j]]
462 ^ mulBy0x03[a[(i + 1) % 4][j]]
463 ^ a[(i + 2) % 4][j]
464 ^ a[(i + 3) % 4][j];
465 #else
466 b[i][j] = mul(2,a[i][j])
467 ^ mul(3,a[(i + 1) % 4][j])
468 ^ a[(i + 2) % 4][j]
469 ^ a[(i + 3) % 4][j];
470 #endif
471 }
472 }
473 memmove(a, b, 4 * BC_128_OPT);
474 }
475
476 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
477 /* Mix the four bytes of every column in a linear way
478 * This is the opposite operation of Mixcolumn
479 */
480 word8 b[4][BC_128_OPT];
481 int i, j;
482
483 for(j = 0; j < BC_128_OPT; j++) {
484 for(i = 0; i < 4; i++) {
485 #if AES_MUL_BY_LOOKUP
486 b[i][j] = mulBy0x0e[a[i][j]]
487 ^ mulBy0x0b[a[(i + 1) % 4][j]]
488 ^ mulBy0x0d[a[(i + 2) % 4][j]]
489 ^ mulBy0x09[a[(i + 3) % 4][j]];
490 #else
491 b[i][j] = mul(0xe,a[i][j])
492 ^ mul(0xb,a[(i + 1) % 4][j])
493 ^ mul(0xd,a[(i + 2) % 4][j])
494 ^ mul(0x9,a[(i + 3) % 4][j]);
495 #endif
496 }
497 }
498 memmove(a, b, 4 * BC_128_OPT);
499 }
500
501 int rijndaelKeySched128 (
502 word8 k[4][KC_128_OPT],
503 word8 W[MAXROUNDS+1][4][MAXBC]) {
504
505 /* Calculate the necessary round keys
506 * The number of calculations depends on keyBits and blockBits
507 */
508 int i, j, t, rconpointer = 0;
509 word8 tk[4][KC_128_OPT];
510 unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
511
512 for(j = 0; j < KC_128_OPT; j++)
513 for(i = 0; i < 4; i++)
514 tk[i][j] = k[i][j];
515 t = 0;
516 /* copy values into round key array */
517 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
518 for(i = 0; i < 4; i++) {
519 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
520 }
521 }
522
523 while (t < numSchedRows) {
524 /* while not enough round key material calculated */
525 /* calculate new values */
526 for(i = 0; i < 4; i++) {
527 tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
528 }
529 tk[0][0] ^= rcon[rconpointer++];
530
531 for(j = 1; j < KC_128_OPT; j++) {
532 for(i = 0; i < 4; i++) {
533 tk[i][j] ^= tk[i][j-1];
534 }
535 }
536
537 /* copy values into round key array */
538 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
539 for(i = 0; i < 4; i++) {
540 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
541 }
542 }
543 }
544
545 return 0;
546 }
547
548 int rijndaelEncrypt128 (
549 word8 a[4][BC_128_OPT],
550 word8 rk[MAXROUNDS+1][4][MAXBC])
551 {
552 /* Encryption of one block.
553 */
554 int r;
555
556 /* begin with a key addition
557 */
558 KeyAddition128(a,rk[0]);
559
560 /* ROUNDS-1 ordinary rounds
561 */
562 for(r = 1; r < ROUNDS_128_OPT; r++) {
563 Substitution128(a,S);
564 ShiftRow128(a,0);
565 MixColumn128(a);
566 KeyAddition128(a,rk[r]);
567 }
568
569 /* Last round is special: there is no MixColumn
570 */
571 Substitution128(a,S);
572 ShiftRow128(a,0);
573 KeyAddition128(a,rk[ROUNDS_128_OPT]);
574
575 return 0;
576 }
577
578 int rijndaelDecrypt128 (
579 word8 a[4][BC_128_OPT],
580 word8 rk[MAXROUNDS+1][4][MAXBC])
581 {
582 int r;
583
584 /* To decrypt: apply the inverse operations of the encrypt routine,
585 * in opposite order
586 *
587 * (KeyAddition is an involution: it 's equal to its inverse)
588 * (the inverse of Substitution with table S is Substitution with the
589 * inverse table of S)
590 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
591 */
592
593 /* First the special round:
594 * without InvMixColumn
595 * with extra KeyAddition
596 */
597 KeyAddition128(a,rk[ROUNDS_128_OPT]);
598 Substitution128(a,Si);
599 ShiftRow128(a,1);
600
601 /* ROUNDS-1 ordinary rounds
602 */
603 for(r = ROUNDS_128_OPT-1; r > 0; r--) {
604 KeyAddition128(a,rk[r]);
605 InvMixColumn128(a);
606 Substitution128(a,Si);
607 ShiftRow128(a,1);
608 }
609
610 /* End with the extra key addition
611 */
612
613 KeyAddition128(a,rk[0]);
614
615 return 0;
616 }
617
618 #endif /* !GLADMAN_AES_128_ENABLE */
619