]>
Commit | Line | Data |
---|---|---|
b1ab9ed8 A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved. | |
3 | * | |
4 | * The contents of this file constitute Original Code as defined in and are | |
5 | * subject to the Apple Public Source License Version 1.2 (the 'License'). | |
6 | * You may not use this file except in compliance with the License. Please obtain | |
7 | * a copy of the License at http://www.apple.com/publicsource and read it before | |
8 | * using this file. | |
9 | * | |
10 | * This Original Code and all software distributed under the License are | |
11 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS | |
12 | * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT | |
13 | * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | |
14 | * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the | |
15 | * specific language governing rights and limitations under the License. | |
16 | */ | |
17 | ||
18 | ||
19 | /* | |
20 | * vRijndael-alg-ref.c | |
21 | * | |
22 | * Created by Robert A. Murley on Mon Jan 22 2001. | |
23 | * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "rijndaelApi.h" | |
28 | #include "rijndael-alg-ref.h" | |
29 | #include "boxes-ref.h" | |
30 | #include <string.h> | |
31 | ||
32 | /* debugger seems to have trouble with this code... */ | |
33 | #define VAES_DEBUG 1 | |
34 | #if VAES_DEBUG | |
35 | #include <stdio.h> | |
36 | #define vdprintf(s) printf s | |
37 | #else | |
38 | #define vdprintf(s) | |
39 | #endif | |
40 | ||
41 | #define SC ((BC - 4) >> 1) | |
42 | ||
43 | #if defined(__ppc__) && defined(ALTIVEC_ENABLE) | |
44 | ||
45 | typedef union { | |
46 | unsigned char s[4][8]; | |
47 | unsigned long l[8]; | |
48 | vector unsigned char v[2]; | |
49 | } doubleVec; | |
50 | ||
51 | typedef union { | |
52 | unsigned long s[4]; | |
53 | vector unsigned long v; | |
54 | } vecLong; | |
55 | ||
56 | static word8 shifts[3][4][2] = { | |
57 | { { 0, 0 }, | |
58 | { 1, 3 }, | |
59 | { 2, 2 }, | |
60 | { 3, 1 } | |
61 | }, | |
62 | { { 0, 0 }, | |
63 | { 1, 5 }, | |
64 | { 2, 4 }, | |
65 | { 3, 3 } | |
66 | }, | |
67 | { { 0, 0 }, | |
68 | { 1, 7 }, | |
69 | { 3, 5 }, | |
70 | { 4, 4 } | |
71 | } | |
72 | }; | |
73 | ||
74 | int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits, | |
75 | unsigned char W[MAXROUNDS+1][4][MAXBC]) | |
76 | { | |
77 | /* Calculate the necessary round keys | |
78 | * The number of calculations depends on keyBits and blockBits | |
79 | */ | |
80 | int KC, BC, ROUNDS; | |
81 | int i, j, t, rconpointer = 0; | |
82 | doubleVec tk; | |
83 | register vector unsigned char v1, v2, mask; | |
84 | ||
85 | switch (keyBits) { | |
86 | case 128: KC = 4; break; | |
87 | case 192: KC = 6; break; | |
88 | case 256: KC = 8; break; | |
89 | default : return (-1); | |
90 | } | |
91 | ||
92 | switch (blockBits) { | |
93 | case 128: BC = 4; break; | |
94 | case 192: BC = 6; break; | |
95 | case 256: BC = 8; break; | |
96 | default : return (-2); | |
97 | } | |
98 | ||
99 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
100 | case 128: ROUNDS = 10; break; | |
101 | case 192: ROUNDS = 12; break; | |
102 | case 256: ROUNDS = 14; break; | |
103 | default : return (-3); /* this cannot happen */ | |
104 | } | |
105 | ||
106 | tk.v[0] = vk[0]; | |
107 | tk.v[1] = vk[1]; | |
108 | ||
109 | t = 0; | |
110 | /* copy values into round key array */ | |
111 | for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) | |
112 | for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; | |
113 | ||
114 | while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ | |
115 | /* calculate new values */ | |
116 | for(i = 0; i < 4; i++) | |
117 | tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]); | |
118 | tk.s[0][0] ^= rcon[rconpointer++]; | |
119 | ||
120 | if (KC != 8) { | |
121 | /* xor bytes 1-7 of each row with previous byte */ | |
122 | mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ); | |
123 | for ( i = 0; i < 2; i++ ) { | |
124 | v1 = vec_sld( tk.v[i], tk.v[i], 15 ); | |
125 | v2 = vec_and( v1, mask ); | |
126 | tk.v[i] = vec_xor( tk.v[i], v2 ); | |
127 | } | |
128 | } | |
129 | else { | |
130 | /* xor bytes 1-3 of each row with previous byte */ | |
131 | mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 ); | |
132 | for ( i = 0; i < 2; i++ ) { | |
133 | v1 = vec_sld( tk.v[i], tk.v[i], 15 ); | |
134 | v2 = vec_and( v1, mask ); | |
135 | tk.v[i] = vec_xor( tk.v[i], v2 ); | |
136 | for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]); | |
137 | /* xor bytes 5-7 of each row with previous byte */ | |
138 | mask = vec_sld( mask, mask, 4 ); | |
139 | v2 = vec_and( v1, mask ); | |
140 | tk.v[i] = vec_xor( tk.v[i], v2 ); | |
141 | mask = vec_sld( mask, mask, 4 ); | |
142 | } | |
143 | } | |
144 | /* copy values into round key array */ | |
145 | for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) | |
146 | for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; | |
147 | } | |
148 | return 0; | |
149 | } | |
150 | ||
151 | ||
152 | void vMakeKey(BYTE *keyMaterial, keyInstance *key) | |
153 | { | |
154 | register vector unsigned char v1, v2, v3, mask; | |
155 | vector unsigned char vk[2]; | |
156 | ||
157 | /* load and align input */ | |
158 | v1 = vec_ld( 0, (vector unsigned char *) keyMaterial ); | |
159 | v2 = vec_ld( 16, (vector unsigned char *) keyMaterial ); | |
160 | if ( (long) keyMaterial & 0x0fL ) | |
161 | { // this is required if keyMaterial is not on a 16-byte boundary | |
162 | v3 = vec_ld( 32, (vector unsigned char *) keyMaterial ); | |
163 | mask = vec_lvsl( 0, keyMaterial ); | |
164 | v1 = vec_perm( v1, v2, mask ); | |
165 | v2 = vec_perm( v2, v3, mask ); | |
166 | } | |
167 | ||
168 | /* parse input stream into rectangular array */ | |
169 | vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); | |
170 | vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); | |
171 | vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched); | |
172 | memset( (char *) vk, 0, 4 * MAXKC); | |
173 | } | |
174 | ||
175 | ||
176 | /* This routine does 16 simultaneous lookups in a 256-byte table. */ | |
177 | vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table ) | |
178 | { | |
179 | register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011, | |
180 | upperBits100, upperBits101, upperBits110, upperBits111, | |
181 | lookupBit00, lookupBit01, lookupBit10, lookupBit11, | |
182 | lookupBit0, lookupBit1, lookup, | |
183 | maskForBit6, maskForBit7, maskForBit8, seven; | |
184 | register vector unsigned char *tabeven, *tabodd; | |
185 | ||
186 | seven = vec_splat_u8 ( 7 ); | |
187 | tabeven = table++; | |
188 | tabodd = table; | |
189 | ||
190 | // Each variable contains the correct values for the corresponding bits 6, 7 and 8. | |
191 | upperBits000 = vec_perm ( *tabeven, *tabodd, v ); | |
192 | tabeven += 2; tabodd += 2; | |
193 | upperBits001 = vec_perm ( *tabeven, *tabodd, v ); | |
194 | tabeven += 2; tabodd += 2; | |
195 | upperBits010 = vec_perm ( *tabeven, *tabodd, v ); | |
196 | tabeven += 2; tabodd += 2; | |
197 | upperBits011 = vec_perm ( *tabeven, *tabodd, v ); | |
198 | tabeven += 2; tabodd += 2; | |
199 | upperBits100 = vec_perm ( *tabeven, *tabodd, v ); | |
200 | tabeven += 2; tabodd += 2; | |
201 | upperBits101 = vec_perm ( *tabeven, *tabodd, v ); | |
202 | tabeven += 2; tabodd += 2; | |
203 | upperBits110 = vec_perm ( *tabeven, *tabodd, v ); | |
204 | tabeven += 2; tabodd += 2; | |
205 | upperBits111 = vec_perm ( *tabeven, *tabodd, v ); | |
206 | ||
207 | // Here we extract all the correct values for bit 6. | |
208 | maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) ); | |
209 | maskForBit6 = vec_sra ( maskForBit6, seven ); | |
210 | lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 ); | |
211 | lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 ); | |
212 | lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 ); | |
213 | lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 ); | |
214 | ||
215 | // Then we get the correct values for bit 7. | |
216 | maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) ); | |
217 | maskForBit7 = vec_sra ( maskForBit7, seven ); | |
218 | lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 ); | |
219 | lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 ); | |
220 | ||
221 | // Finally, the entire correct result vector. | |
222 | maskForBit8 = vec_sra ( v, seven ); | |
223 | ||
224 | lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 ); | |
225 | ||
226 | return lookup; | |
227 | } | |
228 | ||
229 | vector unsigned char vmul(vector unsigned char a, vector unsigned char b) | |
230 | { | |
231 | register vector unsigned char x, y, zero; | |
232 | register vector unsigned short xh, yh, zhi, zlo, two54, two55; | |
233 | ||
234 | zero = vec_splat_u8( 0 ); | |
235 | two55 = vec_splat_u16( -1 ); | |
236 | two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 ); | |
237 | two54 = vec_sub( two55, vec_splat_u16( 1 ) ); | |
238 | ||
239 | x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); // Logtable[a] | |
240 | y = rimskyKorsakov( b, (vector unsigned char *)Logtable ); // Logtable[b] | |
241 | ||
242 | // Convert upper 8 bytes to shorts for addition ond modulo | |
243 | xh = (vector unsigned short) vec_mergeh( zero, x ); | |
244 | yh = (vector unsigned short) vec_mergeh( zero, y ); | |
245 | xh = vec_add( xh, yh ); // xh = Logtable[a] + Logtable[b] | |
246 | yh = vec_sub( xh, two55 ); | |
247 | zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); // xh%255 | |
248 | ||
249 | // Convert lower 8 bytes to shorts for addition ond modulo | |
250 | xh = (vector unsigned short) vec_mergel( zero, x ); | |
251 | yh = (vector unsigned short) vec_mergel( zero, y ); | |
252 | xh = vec_add( xh, yh ); | |
253 | yh = vec_sub( xh, two55 ); | |
254 | zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); | |
255 | ||
256 | x = vec_pack( zhi, zlo ); // recombine into single byte vector | |
257 | x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); // Alogtable[x] | |
258 | x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); // check a = 0 | |
259 | x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); // check b = 0 | |
260 | return x; | |
261 | } | |
262 | ||
263 | void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2]) | |
264 | { | |
265 | v[0] = vec_xor( v[0], rk[0] ); // first vector contains rows 0 and 1 | |
266 | v[1] = vec_xor( v[1], rk[1] ); // second vector contains rows 2 and 3 | |
267 | } | |
268 | ||
269 | ||
270 | void vShiftRow(vector unsigned char v[2], word8 d, word8 BC) | |
271 | { | |
272 | vecLong sh; | |
273 | register vector unsigned char mask, mask1, t; | |
274 | register vector bool char c; | |
275 | register int i, j; | |
276 | ||
277 | sh.s[0] = 0; | |
278 | for (i = 1; i < 4; i++) | |
279 | sh.s[i] = shifts[SC][i][d] % BC; // contains the number of elements to shift each row | |
280 | ||
281 | // each vector contains two BC-byte long rows | |
282 | j = 0; | |
283 | for ( i = 0; i < 2; i++ ) { | |
284 | mask = vec_lvsl( 0, (int *) sh.s[j++]); // mask for even row | |
285 | mask1 = vec_lvsl( 0, (int *) sh.s[j++]); // mask for odd row | |
286 | if (BC == 4) { | |
287 | mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows | |
288 | mask = vec_and( mask, vec_splat_u8( 3 ) ); | |
289 | } else if (BC == 6) { | |
290 | mask = vec_sld( mask, mask, 8 ); | |
291 | mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows | |
292 | t = vec_sub( mask, vec_splat_u8( 6 ) ); | |
293 | c = vec_cmpgt( mask, vec_splat_u8( 5 ) ); | |
294 | mask = vec_sel( mask, t, c ); | |
295 | } else { | |
296 | mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows | |
297 | mask = vec_and( mask, vec_splat_u8( 7 ) ); | |
298 | } | |
299 | mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 ); | |
300 | mask = vec_add( mask, mask1 ); | |
301 | v[i] = vec_perm( v[i], v[i], mask ); // rotate each row as required | |
302 | } | |
303 | } | |
304 | ||
305 | void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] ) | |
306 | { | |
307 | v[0] = rimskyKorsakov( v[0], box ); // first vector contains rows 0 and 1 | |
308 | v[1] = rimskyKorsakov( v[1], box ); // second vector contains rows 2 and 3 | |
309 | } | |
310 | ||
311 | void vMixColumn(vector unsigned char v[2]) | |
312 | { | |
313 | // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f | |
314 | // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f | |
315 | ||
316 | register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; | |
317 | register vector unsigned char two, three; | |
318 | ||
319 | two = vec_splat_u8( 2 ); | |
320 | three = vec_splat_u8( 3 ); | |
321 | ||
322 | a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 | |
323 | b1 = vec_sld( v[1], v[0], 8 ); | |
324 | a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 | |
325 | b2 = vec_sld( b1, a1, 8 ); | |
326 | a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 | |
327 | b3 = vec_sld( b2, a2, 8 ); | |
328 | ||
329 | // Calculations for rows 0 and 1 | |
330 | a0 = vmul( two, v[0] ); // mul(2,a[i][j]) | |
331 | a0 = vec_xor( a0, vmul( three, a1 ) ); // ^ mul(3,a[(i + 1) % 4][j]) | |
332 | a0 = vec_xor( a0, a2 ); // ^ a[(i + 2) % 4][j] | |
333 | v[0] = vec_xor( a0, a3 ); // ^ a[(i + 3) % 4][j] | |
334 | ||
335 | // Calculations for rows 2 and 3 | |
336 | b0 = vmul( two, v[1] ); | |
337 | b0 = vec_xor( b0, vmul( three, b1 ) ); | |
338 | b0 = vec_xor( b0, b2 ); | |
339 | v[1] = vec_xor( b0, b3 ); | |
340 | } | |
341 | ||
342 | void vInvMixColumn(vector unsigned char v[2]) | |
343 | { | |
344 | // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f | |
345 | // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f | |
346 | ||
347 | register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; | |
348 | register vector unsigned char nine, eleven, thirteen, fourteen;; | |
349 | ||
350 | nine = vec_splat_u8( 0x9 ); | |
351 | eleven = vec_splat_u8( 0xb ); | |
352 | thirteen = vec_splat_u8( 0xd ); | |
353 | fourteen = vec_splat_u8( 0xe ); | |
354 | ||
355 | a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 | |
356 | b1 = vec_sld( v[1], v[0], 8 ); | |
357 | a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 | |
358 | b2 = vec_sld( b1, a1, 8 ); | |
359 | a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 | |
360 | b3 = vec_sld( b2, a2, 8 ); | |
361 | ||
362 | // Calculations for rows 0 and 1 | |
363 | a0 = vmul( fourteen, v[0] ); // mul(0xe,a[i][j]) | |
364 | a0 = vec_xor( a0, vmul( eleven, a1 ) ); // ^ mul(0xb,a[(i + 1) % 4][j]) | |
365 | a0 = vec_xor( a0, vmul( thirteen, a2 ) ); // ^ mul(0xd,a[(i + 2) % 4][j]) | |
366 | v[0] = vec_xor( a0, vmul( nine, a3 ) ); // ^ mul(0x9,a[(i + 3) % 4][j]) | |
367 | ||
368 | // Calculations for rows 2 and 3 | |
369 | b0 = vmul( fourteen, v[1] ); | |
370 | b0 = vec_xor( b0, vmul( eleven, b1 ) ); | |
371 | b0 = vec_xor( b0, vmul( thirteen, b2 ) ); | |
372 | v[1] = vec_xor( b0, vmul( nine, b3 ) ); | |
373 | } | |
374 | ||
375 | int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) | |
376 | { | |
377 | /* Encryption of one block. | |
378 | */ | |
379 | int r, BC, ROUNDS; | |
380 | ||
381 | switch (blockBits) { | |
382 | case 128: BC = 4; break; | |
383 | case 192: BC = 6; break; | |
384 | case 256: BC = 8; break; | |
385 | default : return (-2); | |
386 | } | |
387 | ||
388 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
389 | case 128: ROUNDS = 10; break; | |
390 | case 192: ROUNDS = 12; break; | |
391 | case 256: ROUNDS = 14; break; | |
392 | default : return (-3); /* this cannot happen */ | |
393 | } | |
394 | ||
395 | vKeyAddition( a, rk[0] ); | |
396 | for(r = 1; r < ROUNDS; r++) { | |
397 | vSubstitution( a, (vector unsigned char *)S); | |
398 | vShiftRow( a, 0, BC); | |
399 | vMixColumn( a ); | |
400 | vKeyAddition( a, rk[r] ); | |
401 | } | |
402 | vSubstitution( a, (vector unsigned char *)S); | |
403 | vShiftRow( a, 0, BC); | |
404 | vKeyAddition( a, rk[ROUNDS] ); | |
405 | ||
406 | return 0; | |
407 | } | |
408 | ||
409 | int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) | |
410 | { | |
411 | int r, BC, ROUNDS; | |
412 | ||
413 | switch (blockBits) { | |
414 | case 128: BC = 4; break; | |
415 | case 192: BC = 6; break; | |
416 | case 256: BC = 8; break; | |
417 | default : return (-2); | |
418 | } | |
419 | ||
420 | switch (keyBits >= blockBits ? keyBits : blockBits) { | |
421 | case 128: ROUNDS = 10; break; | |
422 | case 192: ROUNDS = 12; break; | |
423 | case 256: ROUNDS = 14; break; | |
424 | default : return (-3); /* this cannot happen */ | |
425 | } | |
426 | ||
427 | vKeyAddition( a, rk[ROUNDS] ); | |
428 | vSubstitution( a, (vector unsigned char *)Si); | |
429 | vShiftRow( a, 1, BC); | |
430 | for(r = ROUNDS-1; r > 0; r--) { | |
431 | vKeyAddition( a, rk[r] ); | |
432 | vInvMixColumn( a ); | |
433 | vSubstitution( a, (vector unsigned char *)Si); | |
434 | vShiftRow( a, 1, BC); | |
435 | } | |
436 | vKeyAddition( a, rk[0] ); | |
437 | ||
438 | return 0; | |
439 | } | |
440 | ||
441 | #if 0 | |
442 | /* Murley's code, to be deleted */ | |
443 | void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) | |
444 | { | |
445 | register vector unsigned char v1, v2, v3, v4, mask; | |
446 | register vector bool char cmp; | |
447 | ||
448 | /* load and align input */ | |
449 | v1 = vec_ld( 0, (vector unsigned char *) input ); | |
450 | v2 = vec_ld( 16, (vector unsigned char *) input ); | |
451 | if ( (long) input & 0x0fL ) | |
452 | { // this is required if input is not on a 16-byte boundary | |
453 | v3 = vec_ld( 32, (vector unsigned char *) input ); | |
454 | mask = vec_lvsl( 0, input ); | |
455 | v1 = vec_perm( v1, v2, mask ); | |
456 | v2 = vec_perm( v2, v3, mask ); | |
457 | } | |
458 | ||
459 | /* parse input stream into rectangular array */ | |
460 | v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); | |
461 | v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); | |
462 | ||
463 | /* store into cipher structure */ | |
464 | if (cipher->mode == MODE_CBC) { | |
465 | v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) ); | |
466 | v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) ); | |
467 | } | |
468 | vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock ); | |
469 | vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock ); | |
470 | ||
471 | vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); | |
472 | ||
473 | v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock ); | |
474 | v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock ); | |
475 | ||
476 | /* parse rectangular array into output ciphertext bytes */ | |
477 | v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); | |
478 | v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); | |
479 | ||
480 | if ( (long) outBuffer & 0x0fL ) | |
481 | { | |
482 | /* store output data into a non-aligned buffer */ | |
483 | mask = vec_lvsr( 0, outBuffer ); | |
484 | cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); | |
485 | v1 = vec_perm( v3, v3, mask ); | |
486 | v2 = vec_perm( v4, v4, mask ); | |
487 | v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); | |
488 | v4 = vec_sel( v3, v1, cmp ); | |
489 | vec_st( v4, 0, (vector unsigned char *) outBuffer ); | |
490 | v1 = vec_sel( v1, v2, cmp ); | |
491 | vec_st( v1, 16, (vector unsigned char *) outBuffer ); | |
492 | v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); | |
493 | v2 = vec_sel( v2, v3, cmp ); | |
494 | vec_st( v2, 32, (vector unsigned char *) outBuffer ); | |
495 | } else { | |
496 | // store output data into an aligned buffer | |
497 | vec_st( v3, 0, (vector unsigned char *) outBuffer ); | |
498 | vec_st( v4, 16, (vector unsigned char *) outBuffer ); | |
499 | } | |
500 | return; | |
501 | } | |
502 | ||
503 | void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) | |
504 | { | |
505 | // for vector machines | |
506 | register vector unsigned char v1, v2, v3, v4, mask; | |
507 | register vector bool char cmp; | |
508 | vector unsigned char block[2], cblock[2]; | |
509 | ||
510 | /* load and align input */ | |
511 | v1 = vec_ld( 0, (vector unsigned char *) input ); | |
512 | v2 = vec_ld( 16, (vector unsigned char *) input ); | |
513 | if ( (long) input & 0x0fL ) | |
514 | { // this is required if input is not on a 16-byte boundary | |
515 | v3 = vec_ld( 32, (vector unsigned char *) input ); | |
516 | mask = vec_lvsl( 0, input ); | |
517 | v1 = vec_perm( v1, v2, mask ); | |
518 | v2 = vec_perm( v2, v3, mask ); | |
519 | } | |
520 | ||
521 | /* parse input stream into rectangular array */ | |
522 | v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); | |
523 | v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); | |
524 | block[0] = v3; | |
525 | block[1] = v4; | |
526 | ||
527 | /* save a copy of incoming ciphertext for later chain */ | |
528 | if (cipher->mode == MODE_CBC) { | |
529 | cblock[0] = v3; | |
530 | cblock[1] = v4; | |
531 | } | |
532 | ||
533 | vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); | |
534 | ||
535 | v1 = block[0]; | |
536 | v2 = block[1]; | |
537 | ||
538 | /* exor with last ciphertext */ | |
539 | if (cipher->mode == MODE_CBC) { | |
540 | v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) ); | |
541 | v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) ); | |
542 | vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock ); | |
543 | vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock ); | |
544 | } | |
545 | ||
546 | /* parse rectangular array into output ciphertext bytes */ | |
547 | v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); | |
548 | v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); | |
549 | ||
550 | if ( (long) outBuffer & 0x0fL ) | |
551 | { /* store output data into a non-aligned buffer */ | |
552 | mask = vec_lvsr( 0, outBuffer ); | |
553 | cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); | |
554 | v1 = vec_perm( v3, v3, mask ); | |
555 | v2 = vec_perm( v4, v4, mask ); | |
556 | v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); | |
557 | v4 = vec_sel( v3, v1, cmp ); | |
558 | vec_st( v4, 0, (vector unsigned char *) outBuffer ); | |
559 | v1 = vec_sel( v1, v2, cmp ); | |
560 | vec_st( v1, 16, (vector unsigned char *) outBuffer ); | |
561 | v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); | |
562 | v2 = vec_sel( v2, v3, cmp ); | |
563 | vec_st( v2, 32, (vector unsigned char *) outBuffer ); | |
564 | } else { | |
565 | // store output data into an aligned buffer | |
566 | vec_st( v3, 0, (vector unsigned char *) outBuffer ); | |
567 | vec_st( v4, 16, (vector unsigned char *) outBuffer ); | |
568 | } | |
569 | } | |
570 | #endif /* Murley's code, to be deleted */ | |
571 | ||
572 | /* | |
573 | * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC | |
574 | */ | |
575 | void vBlockEncrypt128( | |
576 | keyInstance *key, | |
577 | BYTE *input, | |
578 | BYTE *outBuffer) | |
579 | { | |
580 | vector unsigned char block[2]; | |
581 | register vector unsigned char v1, v2; | |
582 | ||
583 | if ( (long) input & 0x0fL ) { | |
584 | BYTE localBuf[16]; | |
585 | vdprintf(("vBlockEncrypt128: unaligned input\n")); | |
586 | /* manually re-align - the compiler is supposed to 16-byte align this for us */ | |
587 | if((unsigned)localBuf & 0xf) { | |
588 | vdprintf(("vBlockEncrypt128: unaligned localBuf!\n")); | |
589 | } | |
590 | memmove(localBuf, input, 16); | |
591 | v1 = vec_ld(0, (vector unsigned char *)localBuf); | |
592 | } | |
593 | else { | |
594 | vdprintf(("vBlockEncrypt128: aligned input\n")); | |
595 | v1 = vec_ld( 0, (vector unsigned char *) input ); | |
596 | } | |
597 | ||
598 | /* parse input stream into rectangular array */ | |
599 | /* FIXME - do we need to zero v2 (or something)? */ | |
600 | block[0] = vec_perm(v1, v2, | |
601 | (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, | |
602 | 5, 9, 13, 17, 21, 25, 29 ) ); | |
603 | block[1] = vec_perm( v1, v2, | |
604 | (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, | |
605 | 7, 11, 15, 19, 23, 27, 31 ) ); | |
606 | ||
607 | vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); | |
608 | ||
609 | /* parse rectangular array into output ciphertext bytes */ | |
610 | v1 = vec_perm(block[0], block[1], | |
611 | (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, | |
612 | 10, 18, 26, 3, 11, 19, 27 ) ); | |
613 | v2 = vec_perm(block[0], block[1], | |
614 | (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, | |
615 | 14, 22, 30, 7, 15, 23, 31 ) ); | |
616 | ||
617 | if ( (long) outBuffer & 0x0fL ) | |
618 | { | |
619 | /* store output data into a non-aligned buffer */ | |
620 | BYTE localBuf[16]; | |
621 | vec_st(v1, 0, (vector unsigned char *) localBuf ); | |
622 | memmove(outBuffer, localBuf, 16); | |
623 | } else { | |
624 | /* store output data into an aligned buffer */ | |
625 | vec_st( v1, 0, (vector unsigned char *) outBuffer ); | |
626 | } | |
627 | return; | |
628 | } | |
629 | ||
630 | void vBlockDecrypt128( | |
631 | keyInstance *key, | |
632 | BYTE *input, | |
633 | BYTE *outBuffer) | |
634 | { | |
635 | vector unsigned char block[2]; | |
636 | register vector unsigned char v1, v2; | |
637 | ||
638 | if ( (long) input & 0x0fL ) { | |
639 | /* manually re-align - the compiler is supposed to 16-byte align this for us */ | |
640 | BYTE localBuf[16]; | |
641 | vdprintf(("vBlockDecrypt128: unaligned input\n")); | |
642 | if((unsigned)localBuf & 0xf) { | |
643 | vdprintf(("vBlockDecrypt128: unaligned localBuf!\n")); | |
644 | } | |
645 | memmove(localBuf, input, 16); | |
646 | v1 = vec_ld(0, (vector unsigned char *)localBuf); | |
647 | } | |
648 | else { | |
649 | vdprintf(("vBlockDecrypt128: aligned input\n")); | |
650 | v1 = vec_ld( 0, (vector unsigned char *) input ); | |
651 | } | |
652 | ||
653 | /* parse input stream into rectangular array */ | |
654 | /* FIXME - do we need to zero v2 (or something)? */ | |
655 | block[0] = vec_perm(v1, v2, | |
656 | (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, | |
657 | 5, 9, 13, 17, 21, 25, 29 ) ); | |
658 | block[1] = vec_perm( v1, v2, | |
659 | (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, | |
660 | 7, 11, 15, 19, 23, 27, 31 ) ); | |
661 | ||
662 | vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); | |
663 | ||
664 | /* parse rectangular array into output ciphertext bytes */ | |
665 | v1 = vec_perm(block[0], block[1], | |
666 | (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, | |
667 | 10, 18, 26, 3, 11, 19, 27 ) ); | |
668 | v2 = vec_perm(block[0], block[1], | |
669 | (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, | |
670 | 14, 22, 30, 7, 15, 23, 31 ) ); | |
671 | ||
672 | if ( (long) outBuffer & 0x0fL ) { | |
673 | /* store output data into a non-aligned buffer */ | |
674 | BYTE localBuf[16]; | |
675 | vec_st(v1, 0, (vector unsigned char *) localBuf ); | |
676 | memmove(outBuffer, localBuf, 16); | |
677 | } else { | |
678 | /* store output data into an aligned buffer */ | |
679 | vec_st( v1, 0, (vector unsigned char *) outBuffer ); | |
680 | } | |
681 | return; | |
682 | } | |
683 | ||
684 | #endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */ |