]>
Commit | Line | Data |
---|---|---|
bac41a7b A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved. | |
3 | * | |
4 | * The contents of this file constitute Original Code as defined in and are | |
5 | * subject to the Apple Public Source License Version 1.2 (the 'License'). | |
6 | * You may not use this file except in compliance with the License. Please obtain | |
7 | * a copy of the License at http://www.apple.com/publicsource and read it before | |
8 | * using this file. | |
9 | * | |
10 | * This Original Code and all software distributed under the License are | |
11 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS | |
12 | * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT | |
13 | * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | |
14 | * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the | |
15 | * specific language governing rights and limitations under the License. | |
16 | */ | |
17 | ||
18 | ||
19 | /* rijndael-alg-ref.c v2.0 August '99 | |
20 | * Reference ANSI C code | |
21 | * authors: Paulo Barreto | |
22 | * Vincent Rijmen | |
23 | * | |
24 | * PPC and 128-bit block optimization by Doug Mitchell May 2001. | |
25 | */ | |
26 | ||
27 | #include <stdio.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | ||
31 | #include "rijndael-alg-ref.h" | |
32 | #include <AppleCSP/cspdebugging.h> | |
33 | ||
34 | #define SC ((BC - 4) >> 1) | |
35 | ||
36 | #include "boxes-ref.h" | |
37 | ||
38 | static const word8 shifts[3][4][2] = { | |
39 | { { 0, 0 }, | |
40 | { 1, 3 }, | |
41 | { 2, 2 }, | |
42 | { 3, 1 } | |
43 | }, | |
44 | { { 0, 0 }, | |
45 | { 1, 5 }, | |
46 | { 2, 4 }, | |
47 | { 3, 3 } | |
48 | }, | |
49 | { { 0, 0 }, | |
50 | { 1, 7 }, | |
51 | { 3, 5 }, | |
52 | { 4, 4 } | |
53 | } | |
54 | }; | |
55 | ||
df0e469f A |
56 | #if !GLADMAN_AES_128_ENABLE |
57 | ||
bac41a7b A |
58 | /* 128 bit key/word shift table in bits */ |
59 | static const word8 shifts128[4][2] = { | |
60 | { 0, 0 }, | |
61 | { 8, 24 }, | |
62 | { 16, 16 }, | |
63 | { 24, 8 } | |
64 | }; | |
65 | ||
df0e469f A |
66 | #endif /* GLADMAN_AES_128_ENABLE */ |
67 | ||
bac41a7b A |
68 | #if !AES_MUL_BY_LOOKUP |
69 | /* | |
70 | * Profiling measurements showed that the mul routine is where a large propertion of | |
71 | * the time is spent. Since the first argument to mul is always one of six | |
72 | * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to | |
73 | * do the multiplies. This eliminates the need for the log/antilog tables, so | |
74 | * it's only adding one kilobyte of const data. Throughput improvement for this | |
75 | * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized | |
76 | * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt | |
77 | * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the | |
78 | * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken | |
79 | * on a 500 MHz G4 with 1 MB of L2 cache. | |
df0e469f | 80 | */ |
bac41a7b A |
81 | |
82 | /* | |
83 | * The mod 255 op in mul is really expensive... | |
84 | * | |
85 | * We know that b <= (254 * 2), so there are only two cases. Either return b, | |
86 | * or return b-255. | |
87 | * | |
88 | * On a G4 this single optimization results in a 24% speedup for encrypt and | |
89 | * a 25% speedup for decrypt. | |
90 | */ | |
91 | static inline word8 mod255(word32 b) | |
92 | { | |
93 | if(b >= 255) { | |
94 | b -= 255; | |
95 | } | |
96 | return b; | |
97 | } | |
98 | ||
99 | word8 mul(word8 a, word8 b) { | |
100 | /* multiply two elements of GF(2^m) | |
101 | * needed for MixColumn and InvMixColumn | |
102 | */ | |
103 | if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])]; | |
104 | else return 0; | |
105 | } | |
106 | #endif /* !AES_MUL_BY_LOOKUP */ | |
107 | ||
108 | void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) { | |
109 | /* Exor corresponding text input and round key input bytes | |
110 | */ | |
111 | int i, j; | |
112 | ||
113 | for(i = 0; i < 4; i++) | |
114 | for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j]; | |
115 | } | |
116 | ||
117 | void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) { | |
118 | /* Row 0 remains unchanged | |
119 | * The other three rows are shifted a variable amount | |
120 | */ | |
121 | word8 tmp[MAXBC]; | |
122 | int i, j; | |
123 | ||
124 | for(i = 1; i < 4; i++) { | |
125 | for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC]; | |
126 | for(j = 0; j < BC; j++) a[i][j] = tmp[j]; | |
127 | } | |
128 | } | |
129 | ||
130 | void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) { | |
131 | /* Replace every byte of the input by the byte at that place | |
132 | * in the nonlinear S-box | |
133 | */ | |
134 | int i, j; | |
135 | ||
136 | for(i = 0; i < 4; i++) | |
137 | for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ; | |
138 | } | |
139 | ||
140 | void MixColumn(word8 a[4][MAXBC], word8 BC) { | |
141 | /* Mix the four bytes of every column in a linear way | |
142 | */ | |
143 | word8 b[4][MAXBC]; | |
144 | int i, j; | |
145 | ||
146 | for(j = 0; j < BC; j++) { | |
147 | for(i = 0; i < 4; i++) { | |
148 | #if AES_MUL_BY_LOOKUP | |
149 | b[i][j] = mulBy0x02[a[i][j]] | |
150 | ^ mulBy0x03[a[(i + 1) % 4][j]] | |
151 | ^ a[(i + 2) % 4][j] | |
152 | ^ a[(i + 3) % 4][j]; | |
153 | #else | |
154 | b[i][j] = mul(2,a[i][j]) | |
155 | ^ mul(3,a[(i + 1) % 4][j]) | |
156 | ^ a[(i + 2) % 4][j] | |
157 | ^ a[(i + 3) % 4][j]; | |
158 | #endif | |
159 | } | |
160 | } | |
161 | for(i = 0; i < 4; i++) { | |
162 | for(j = 0; j < BC; j++) a[i][j] = b[i][j]; | |
163 | } | |
164 | } | |
165 | ||
166 | void InvMixColumn(word8 a[4][MAXBC], word8 BC) { | |
167 | /* Mix the four bytes of every column in a linear way | |
168 | * This is the opposite operation of Mixcolumn | |
169 | */ | |
170 | word8 b[4][MAXBC]; | |
171 | int i, j; | |
172 | ||
173 | for(j = 0; j < BC; j++) { | |
174 | for(i = 0; i < 4; i++) { | |
175 | #if AES_MUL_BY_LOOKUP | |
176 | b[i][j] = mulBy0x0e[a[i][j]] | |
177 | ^ mulBy0x0b[a[(i + 1) % 4][j]] | |
178 | ^ mulBy0x0d[a[(i + 2) % 4][j]] | |
179 | ^ mulBy0x09[a[(i + 3) % 4][j]]; | |
180 | #else | |
181 | b[i][j] = mul(0xe,a[i][j]) | |
182 | ^ mul(0xb,a[(i + 1) % 4][j]) | |
183 | ^ mul(0xd,a[(i + 2) % 4][j]) | |
184 | ^ mul(0x9,a[(i + 3) % 4][j]); | |
185 | #endif | |
186 | } | |
187 | } | |
188 | for(i = 0; i < 4; i++) { | |
189 | for(j = 0; j < BC; j++) a[i][j] = b[i][j]; | |
190 | } | |
191 | } | |
192 | ||
193 | int rijndaelKeySched ( | |
194 | word8 k[4][MAXKC], | |
195 | int keyBits, | |
196 | int blockBits, | |
197 | word8 W[MAXROUNDS+1][4][MAXBC]) { | |
198 | ||
199 | /* Calculate the necessary round keys | |
200 | * The number of calculations depends on keyBits and blockBits | |
201 | */ | |
202 | int KC, BC, ROUNDS; | |
203 | int i, j, t, rconpointer = 0; | |
204 | word8 tk[4][MAXKC]; | |
205 | ||
206 | switch (keyBits) { | |
207 | case 128: KC = 4; break; | |
208 | case 192: KC = 6; break; | |
209 | case 256: KC = 8; break; | |
210 | default : return (-1); | |
211 | } | |
212 | ||
213 | switch (blockBits) { | |
214 | case 128: BC = 4; break; | |
215 | case 192: BC = 6; break; | |
216 | case 256: BC = 8; break; | |
217 | default : return (-2); | |
218 | } | |
219 | ||
220 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
221 | case 128: ROUNDS = 10; break; | |
222 | case 192: ROUNDS = 12; break; | |
223 | case 256: ROUNDS = 14; break; | |
224 | default : return (-3); /* this cannot happen */ | |
225 | } | |
226 | ||
227 | ||
228 | for(j = 0; j < KC; j++) | |
229 | for(i = 0; i < 4; i++) | |
230 | tk[i][j] = k[i][j]; | |
231 | t = 0; | |
232 | /* copy values into round key array */ | |
233 | for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) | |
234 | for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; | |
235 | ||
236 | while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ | |
237 | /* calculate new values */ | |
238 | for(i = 0; i < 4; i++) | |
239 | tk[i][0] ^= S[tk[(i+1)%4][KC-1]]; | |
240 | tk[0][0] ^= rcon[rconpointer++]; | |
241 | ||
242 | if (KC != 8) | |
243 | for(j = 1; j < KC; j++) | |
244 | for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; | |
245 | else { | |
246 | for(j = 1; j < KC/2; j++) | |
247 | for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; | |
248 | for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]]; | |
249 | for(j = KC/2 + 1; j < KC; j++) | |
250 | for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1]; | |
251 | } | |
252 | /* copy values into round key array */ | |
253 | for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) | |
254 | for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j]; | |
255 | } | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
260 | int rijndaelEncrypt ( | |
261 | word8 a[4][MAXBC], | |
262 | int keyBits, | |
263 | int blockBits, | |
264 | word8 rk[MAXROUNDS+1][4][MAXBC]) | |
265 | { | |
266 | /* Encryption of one block, general case. | |
267 | */ | |
268 | int r, BC, ROUNDS; | |
269 | ||
270 | switch (blockBits) { | |
271 | case 128: BC = 4; break; | |
272 | case 192: BC = 6; break; | |
273 | case 256: BC = 8; break; | |
274 | default : return (-2); | |
275 | } | |
276 | ||
277 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
278 | case 128: ROUNDS = 10; break; | |
279 | case 192: ROUNDS = 12; break; | |
280 | case 256: ROUNDS = 14; break; | |
281 | default : return (-3); /* this cannot happen */ | |
282 | } | |
283 | ||
284 | /* begin with a key addition | |
285 | */ | |
286 | KeyAddition(a,rk[0],BC); | |
287 | ||
288 | /* ROUNDS-1 ordinary rounds | |
289 | */ | |
290 | for(r = 1; r < ROUNDS; r++) { | |
291 | Substitution(a,S,BC); | |
292 | ShiftRow(a,0,BC); | |
293 | MixColumn(a,BC); | |
294 | KeyAddition(a,rk[r],BC); | |
295 | } | |
296 | ||
297 | /* Last round is special: there is no MixColumn | |
298 | */ | |
299 | Substitution(a,S,BC); | |
300 | ShiftRow(a,0,BC); | |
301 | KeyAddition(a,rk[ROUNDS],BC); | |
302 | ||
303 | return 0; | |
304 | } | |
305 | ||
306 | int rijndaelDecrypt ( | |
307 | word8 a[4][MAXBC], | |
308 | int keyBits, | |
309 | int blockBits, | |
310 | word8 rk[MAXROUNDS+1][4][MAXBC]) | |
311 | { | |
312 | int r, BC, ROUNDS; | |
313 | ||
314 | switch (blockBits) { | |
315 | case 128: BC = 4; break; | |
316 | case 192: BC = 6; break; | |
317 | case 256: BC = 8; break; | |
318 | default : return (-2); | |
319 | } | |
320 | ||
321 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
322 | case 128: ROUNDS = 10; break; | |
323 | case 192: ROUNDS = 12; break; | |
324 | case 256: ROUNDS = 14; break; | |
325 | default : return (-3); /* this cannot happen */ | |
326 | } | |
327 | ||
328 | /* To decrypt: apply the inverse operations of the encrypt routine, | |
329 | * in opposite order | |
330 | * | |
331 | * (KeyAddition is an involution: it 's equal to its inverse) | |
332 | * (the inverse of Substitution with table S is Substitution with the | |
333 | * inverse table of S) | |
334 | * (the inverse of Shiftrow is Shiftrow over a suitable distance) | |
335 | */ | |
336 | ||
337 | /* First the special round: | |
338 | * without InvMixColumn | |
339 | * with extra KeyAddition | |
340 | */ | |
341 | KeyAddition(a,rk[ROUNDS],BC); | |
342 | Substitution(a,Si,BC); | |
343 | ShiftRow(a,1,BC); | |
344 | ||
345 | /* ROUNDS-1 ordinary rounds | |
346 | */ | |
347 | for(r = ROUNDS-1; r > 0; r--) { | |
348 | KeyAddition(a,rk[r],BC); | |
349 | InvMixColumn(a,BC); | |
350 | Substitution(a,Si,BC); | |
351 | ShiftRow(a,1,BC); | |
352 | } | |
353 | ||
354 | /* End with the extra key addition | |
355 | */ | |
356 | ||
357 | KeyAddition(a,rk[0],BC); | |
358 | ||
359 | return 0; | |
360 | } | |
361 | ||
29654253 A |
362 | #if !GLADMAN_AES_128_ENABLE |
363 | ||
bac41a7b A |
364 | /* |
365 | * All of these 128-bit-key-and-block routines require 32-bit word-aligned | |
366 |