]>
git.saurik.com Git - apple/security.git/blob - libsecurity_apple_csp/lib/vRijndael-alg-ref.c
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
22 * Created by Robert A. Murley on Mon Jan 22 2001.
23 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
27 #include "rijndaelApi.h"
28 #include "rijndael-alg-ref.h"
29 #include "boxes-ref.h"
32 /* debugger seems to have trouble with this code... */
36 #define vdprintf(s) printf s
41 #define SC ((BC - 4) >> 1)
43 #if defined(__ppc__) && defined(ALTIVEC_ENABLE)
46 unsigned char s
[4][8];
48 vector
unsigned char v
[2];
53 vector
unsigned long v
;
56 static word8 shifts
[3][4][2] = {
74 int vRijndaelKeySched ( vector
unsigned char vk
[2], int keyBits
, int blockBits
,
75 unsigned char W
[MAXROUNDS
+1][4][MAXBC
])
77 /* Calculate the necessary round keys
78 * The number of calculations depends on keyBits and blockBits
81 int i
, j
, t
, rconpointer
= 0;
83 register vector
unsigned char v1
, v2
, mask
;
86 case 128: KC
= 4; break;
87 case 192: KC
= 6; break;
88 case 256: KC
= 8; break;
89 default : return (-1);
93 case 128: BC
= 4; break;
94 case 192: BC
= 6; break;
95 case 256: BC
= 8; break;
96 default : return (-2);
99 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
100 case 128: ROUNDS
= 10; break;
101 case 192: ROUNDS
= 12; break;
102 case 256: ROUNDS
= 14; break;
103 default : return (-3); /* this cannot happen */
110 /* copy values into round key array */
111 for(j
= 0; (j
< KC
) && (t
< (ROUNDS
+1)*BC
); j
++, t
++)
112 for(i
= 0; i
< 4; i
++) W
[t
/ BC
][i
][t
% BC
] = tk
.s
[i
][j
];
114 while (t
< (ROUNDS
+1)*BC
) { /* while not enough round key material calculated */
115 /* calculate new values */
116 for(i
= 0; i
< 4; i
++)
117 tk
.s
[i
][0] ^= *((word8
*)S
+ tk
.s
[(i
+1)%4
][KC
-1]);
118 tk
.s
[0][0] ^= rcon
[rconpointer
++];
121 /* xor bytes 1-7 of each row with previous byte */
122 mask
= (vector
unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
123 for ( i
= 0; i
< 2; i
++ ) {
124 v1
= vec_sld( tk
.v
[i
], tk
.v
[i
], 15 );
125 v2
= vec_and( v1
, mask
);
126 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
130 /* xor bytes 1-3 of each row with previous byte */
131 mask
= (vector
unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
132 for ( i
= 0; i
< 2; i
++ ) {
133 v1
= vec_sld( tk
.v
[i
], tk
.v
[i
], 15 );
134 v2
= vec_and( v1
, mask
);
135 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
136 for(j
= 0; j
< 4; j
++) tk
.s
[i
][KC
/2] ^= *((word8
*)S
+ tk
.s
[i
][KC
/2 - 1]);
137 /* xor bytes 5-7 of each row with previous byte */
138 mask
= vec_sld( mask
, mask
, 4 );
139 v2
= vec_and( v1
, mask
);
140 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
141 mask
= vec_sld( mask
, mask
, 4 );
144 /* copy values into round key array */
145 for(j
= 0; (j
< KC
) && (t
< (ROUNDS
+1)*BC
); j
++, t
++)
146 for(i
= 0; i
< 4; i
++) W
[t
/ BC
][i
][t
% BC
] = tk
.s
[i
][j
];
152 void vMakeKey(BYTE
*keyMaterial
, keyInstance
*key
)
154 register vector
unsigned char v1
, v2
, v3
, mask
;
155 vector
unsigned char vk
[2];
157 /* load and align input */
158 v1
= vec_ld( 0, (vector
unsigned char *) keyMaterial
);
159 v2
= vec_ld( 16, (vector
unsigned char *) keyMaterial
);
160 if ( (long) keyMaterial
& 0x0fL
)
161 { // this is required if keyMaterial is not on a 16-byte boundary
162 v3
= vec_ld( 32, (vector
unsigned char *) keyMaterial
);
163 mask
= vec_lvsl( 0, keyMaterial
);
164 v1
= vec_perm( v1
, v2
, mask
);
165 v2
= vec_perm( v2
, v3
, mask
);
168 /* parse input stream into rectangular array */
169 vk
[0] = vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
170 vk
[1] = vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
171 vRijndaelKeySched (vk
, key
->keyLen
, key
->blockLen
, key
->keySched
);
172 memset( (char *) vk
, 0, 4 * MAXKC
);
176 /* This routine does 16 simultaneous lookups in a 256-byte table. */
177 vector
unsigned char rimskyKorsakov ( vector
unsigned char v
, vector
unsigned char * table
)
179 register vector
unsigned char upperBits000
, upperBits001
, upperBits010
, upperBits011
,
180 upperBits100
, upperBits101
, upperBits110
, upperBits111
,
181 lookupBit00
, lookupBit01
, lookupBit10
, lookupBit11
,
182 lookupBit0
, lookupBit1
, lookup
,
183 maskForBit6
, maskForBit7
, maskForBit8
, seven
;
184 register vector
unsigned char *tabeven
, *tabodd
;
186 seven
= vec_splat_u8 ( 7 );
190 // Each variable contains the correct values for the corresponding bits 6, 7 and 8.
191 upperBits000
= vec_perm ( *tabeven
, *tabodd
, v
);
192 tabeven
+= 2; tabodd
+= 2;
193 upperBits001
= vec_perm ( *tabeven
, *tabodd
, v
);
194 tabeven
+= 2; tabodd
+= 2;
195 upperBits010
= vec_perm ( *tabeven
, *tabodd
, v
);
196 tabeven
+= 2; tabodd
+= 2;
197 upperBits011
= vec_perm ( *tabeven
, *tabodd
, v
);
198 tabeven
+= 2; tabodd
+= 2;
199 upperBits100
= vec_perm ( *tabeven
, *tabodd
, v
);
200 tabeven
+= 2; tabodd
+= 2;
201 upperBits101
= vec_perm ( *tabeven
, *tabodd
, v
);
202 tabeven
+= 2; tabodd
+= 2;
203 upperBits110
= vec_perm ( *tabeven
, *tabodd
, v
);
204 tabeven
+= 2; tabodd
+= 2;
205 upperBits111
= vec_perm ( *tabeven
, *tabodd
, v
);
207 // Here we extract all the correct values for bit 6.
208 maskForBit6
= vec_sl ( v
, vec_splat_u8 ( 2 ) );
209 maskForBit6
= vec_sra ( maskForBit6
, seven
);
210 lookupBit00
= vec_sel ( upperBits000
, upperBits001
, maskForBit6
);
211 lookupBit01
= vec_sel ( upperBits010
, upperBits011
, maskForBit6
);
212 lookupBit10
= vec_sel ( upperBits100
, upperBits101
, maskForBit6
);
213 lookupBit11
= vec_sel ( upperBits110
, upperBits111
, maskForBit6
);
215 // Then we get the correct values for bit 7.
216 maskForBit7
= vec_sl ( v
, vec_splat_u8 ( 1 ) );
217 maskForBit7
= vec_sra ( maskForBit7
, seven
);
218 lookupBit0
= vec_sel ( lookupBit00
, lookupBit01
, maskForBit7
);
219 lookupBit1
= vec_sel ( lookupBit10
, lookupBit11
, maskForBit7
);
221 // Finally, the entire correct result vector.
222 maskForBit8
= vec_sra ( v
, seven
);
224 lookup
= vec_sel ( lookupBit0
, lookupBit1
, maskForBit8
);
229 vector
unsigned char vmul(vector
unsigned char a
, vector
unsigned char b
)
231 register vector
unsigned char x
, y
, zero
;
232 register vector
unsigned short xh
, yh
, zhi
, zlo
, two54
, two55
;
234 zero
= vec_splat_u8( 0 );
235 two55
= vec_splat_u16( -1 );
236 two55
= (vector
unsigned short) vec_mergeh( zero
, (vector
unsigned char) two55
);
237 two54
= vec_sub( two55
, vec_splat_u16( 1 ) );
239 x
= rimskyKorsakov( a
, (vector
unsigned char *)Logtable
); // Logtable[a]
240 y
= rimskyKorsakov( b
, (vector
unsigned char *)Logtable
); // Logtable[b]
242 // Convert upper 8 bytes to shorts for addition ond modulo
243 xh
= (vector
unsigned short) vec_mergeh( zero
, x
);
244 yh
= (vector
unsigned short) vec_mergeh( zero
, y
);
245 xh
= vec_add( xh
, yh
); // xh = Logtable[a] + Logtable[b]
246 yh
= vec_sub( xh
, two55
);
247 zhi
= vec_sel( xh
, yh
, vec_cmpgt( xh
, two54
) ); // xh%255
249 // Convert lower 8 bytes to shorts for addition ond modulo
250 xh
= (vector
unsigned short) vec_mergel( zero
, x
);
251 yh
= (vector
unsigned short) vec_mergel( zero
, y
);
252 xh
= vec_add( xh
, yh
);
253 yh
= vec_sub( xh
, two55
);
254 zlo
= vec_sel( xh
, yh
, vec_cmpgt( xh
, two54
) );
256 x
= vec_pack( zhi
, zlo
); // recombine into single byte vector
257 x
= rimskyKorsakov( x
, (vector
unsigned char *)Alogtable
); // Alogtable[x]
258 x
= vec_sel( x
, zero
, vec_cmpeq( a
, zero
) ); // check a = 0
259 x
= vec_sel( x
, zero
, vec_cmpeq( b
, zero
) ); // check b = 0
263 void vKeyAddition(vector
unsigned char v
[2], vector
unsigned char rk
[2])
265 v
[0] = vec_xor( v
[0], rk
[0] ); // first vector contains rows 0 and 1
266 v
[1] = vec_xor( v
[1], rk
[1] ); // second vector contains rows 2 and 3
270 void vShiftRow(vector
unsigned char v
[2], word8 d
, word8 BC
)
273 register vector
unsigned char mask
, mask1
, t
;
274 register vector
bool char c
;
278 for (i
= 1; i
< 4; i
++)
279 sh
.s
[i
] = shifts
[SC
][i
][d
] % BC
; // contains the number of elements to shift each row
281 // each vector contains two BC-byte long rows
283 for ( i
= 0; i
< 2; i
++ ) {
284 mask
= vec_lvsl( 0, (int *) sh
.s
[j
++]); // mask for even row
285 mask1
= vec_lvsl( 0, (int *) sh
.s
[j
++]); // mask for odd row
287 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
288 mask
= vec_and( mask
, vec_splat_u8( 3 ) );
289 } else if (BC
== 6) {
290 mask
= vec_sld( mask
, mask
, 8 );
291 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
292 t
= vec_sub( mask
, vec_splat_u8( 6 ) );
293 c
= vec_cmpgt( mask
, vec_splat_u8( 5 ) );
294 mask
= vec_sel( mask
, t
, c
);
296 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
297 mask
= vec_and( mask
, vec_splat_u8( 7 ) );
299 mask1
= vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
300 mask
= vec_add( mask
, mask1
);
301 v
[i
] = vec_perm( v
[i
], v
[i
], mask
); // rotate each row as required
305 void vSubstitution( vector
unsigned char v
[2], vector
unsigned char box
[16] )
307 v
[0] = rimskyKorsakov( v
[0], box
); // first vector contains rows 0 and 1
308 v
[1] = rimskyKorsakov( v
[1], box
); // second vector contains rows 2 and 3
311 void vMixColumn(vector
unsigned char v
[2])
313 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
314 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
316 register vector
unsigned char a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
317 register vector
unsigned char two
, three
;
319 two
= vec_splat_u8( 2 );
320 three
= vec_splat_u8( 3 );
322 a1
= vec_sld( v
[0], v
[1], 8 ); // equivalent to a[i+1] % 4
323 b1
= vec_sld( v
[1], v
[0], 8 );
324 a2
= vec_sld( a1
, b1
, 8 ); // equivalent to a[i+2] % 4
325 b2
= vec_sld( b1
, a1
, 8 );
326 a3
= vec_sld( a2
, b2
, 8 ); // equivalent to a[i+3] % 4
327 b3
= vec_sld( b2
, a2
, 8 );
329 // Calculations for rows 0 and 1
330 a0
= vmul( two
, v
[0] ); // mul(2,a[i][j])
331 a0
= vec_xor( a0
, vmul( three
, a1
) ); // ^ mul(3,a[(i + 1) % 4][j])
332 a0
= vec_xor( a0
, a2
); // ^ a[(i + 2) % 4][j]
333 v
[0] = vec_xor( a0
, a3
); // ^ a[(i + 3) % 4][j]
335 // Calculations for rows 2 and 3
336 b0
= vmul( two
, v
[1] );
337 b0
= vec_xor( b0
, vmul( three
, b1
) );
338 b0
= vec_xor( b0
, b2
);
339 v
[1] = vec_xor( b0
, b3
);
342 void vInvMixColumn(vector
unsigned char v
[2])
344 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
345 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
347 register vector
unsigned char a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
348 register vector
unsigned char nine
, eleven
, thirteen
, fourteen
;;
350 nine
= vec_splat_u8( 0x9 );
351 eleven
= vec_splat_u8( 0xb );
352 thirteen
= vec_splat_u8( 0xd );
353 fourteen
= vec_splat_u8( 0xe );
355 a1
= vec_sld( v
[0], v
[1], 8 ); // equivalent to a[i+1] % 4
356 b1
= vec_sld( v
[1], v
[0], 8 );
357 a2
= vec_sld( a1
, b1
, 8 ); // equivalent to a[i+2] % 4
358 b2
= vec_sld( b1
, a1
, 8 );
359 a3
= vec_sld( a2
, b2
, 8 ); // equivalent to a[i+3] % 4
360 b3
= vec_sld( b2
, a2
, 8 );
362 // Calculations for rows 0 and 1
363 a0
= vmul( fourteen
, v
[0] ); // mul(0xe,a[i][j])
364 a0
= vec_xor( a0
, vmul( eleven
, a1
) ); // ^ mul(0xb,a[(i + 1) % 4][j])
365 a0
= vec_xor( a0
, vmul( thirteen
, a2
) ); // ^ mul(0xd,a[(i + 2) % 4][j])
366 v
[0] = vec_xor( a0
, vmul( nine
, a3
) ); // ^ mul(0x9,a[(i + 3) % 4][j])
368 // Calculations for rows 2 and 3
369 b0
= vmul( fourteen
, v
[1] );
370 b0
= vec_xor( b0
, vmul( eleven
, b1
) );
371 b0
= vec_xor( b0
, vmul( thirteen
, b2
) );
372 v
[1] = vec_xor( b0
, vmul( nine
, b3
) );
375 int vRijndaelEncrypt (vector
unsigned char a
[2], int keyBits
, int blockBits
, vector
unsigned char rk
[MAXROUNDS
+1][2])
377 /* Encryption of one block.
382 case 128: BC
= 4; break;
383 case 192: BC
= 6; break;
384 case 256: BC
= 8; break;
385 default : return (-2);
388 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
389 case 128: ROUNDS
= 10; break;
390 case 192: ROUNDS
= 12; break;
391 case 256: ROUNDS
= 14; break;
392 default : return (-3); /* this cannot happen */
395 vKeyAddition( a
, rk
[0] );
396 for(r
= 1; r
< ROUNDS
; r
++) {
397 vSubstitution( a
, (vector
unsigned char *)S
);
398 vShiftRow( a
, 0, BC
);
400 vKeyAddition( a
, rk
[r
] );
402 vSubstitution( a
, (vector
unsigned char *)S
);
403 vShiftRow( a
, 0, BC
);
404 vKeyAddition( a
, rk
[ROUNDS
] );
409 int vRijndaelDecrypt (vector
unsigned char a
[2], int keyBits
, int blockBits
, vector
unsigned char rk
[MAXROUNDS
+1][2])
414 case 128: BC
= 4; break;
415 case 192: BC
= 6; break;
416 case 256: BC
= 8; break;
417 default : return (-2);
420 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
421 case 128: ROUNDS
= 10; break;
422 case 192: ROUNDS
= 12; break;
423 case 256: ROUNDS
= 14; break;
424 default : return (-3); /* this cannot happen */
427 vKeyAddition( a
, rk
[ROUNDS
] );
428 vSubstitution( a
, (vector
unsigned char *)Si
);
429 vShiftRow( a
, 1, BC
);
430 for(r
= ROUNDS
-1; r
> 0; r
--) {
431 vKeyAddition( a
, rk
[r
] );
433 vSubstitution( a
, (vector
unsigned char *)Si
);
434 vShiftRow( a
, 1, BC
);
436 vKeyAddition( a
, rk
[0] );
442 /* Murley's code, to be deleted */
443 void vBlockEncrypt(cipherInstance
*cipher
, keyInstance
*key
, BYTE
*input
, int inputLen
, BYTE
*outBuffer
)
445 register vector
unsigned char v1
, v2
, v3
, v4
, mask
;
446 register vector
bool char cmp
;
448 /* load and align input */
449 v1
= vec_ld( 0, (vector
unsigned char *) input
);
450 v2
= vec_ld( 16, (vector
unsigned char *) input
);
451 if ( (long) input
& 0x0fL
)
452 { // this is required if input is not on a 16-byte boundary
453 v3
= vec_ld( 32, (vector
unsigned char *) input
);
454 mask
= vec_lvsl( 0, input
);
455 v1
= vec_perm( v1
, v2
, mask
);
456 v2
= vec_perm( v2
, v3
, mask
);
459 /* parse input stream into rectangular array */
460 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
461 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
463 /* store into cipher structure */
464 if (cipher
->mode
== MODE_CBC
) {
465 v3
= vec_xor( v3
, *((vector
unsigned char *) cipher
->chainBlock
) );
466 v4
= vec_xor( v4
, *((vector
unsigned char *) cipher
->chainBlock
+ 1 ) );
468 vec_st( v3
, 0, (vector
unsigned char *) cipher
->chainBlock
);
469 vec_st( v4
, 16, (vector
unsigned char *) cipher
->chainBlock
);
471 vRijndaelEncrypt((vector
unsigned char *) cipher
->chainBlock
, key
->keyLen
, cipher
->blockLen
, (vector
unsigned char *) key
->keySched
);
473 v1
= vec_ld( 0, (vector
unsigned char *) cipher
->chainBlock
);
474 v2
= vec_ld( 16, (vector
unsigned char *) cipher
->chainBlock
);
476 /* parse rectangular array into output ciphertext bytes */
477 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
478 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
480 if ( (long) outBuffer
& 0x0fL
)
482 /* store output data into a non-aligned buffer */
483 mask
= vec_lvsr( 0, outBuffer
);
484 cmp
= vec_cmpgt( mask
, vec_splat_u8( 0x0f ) );
485 v1
= vec_perm( v3
, v3
, mask
);
486 v2
= vec_perm( v4
, v4
, mask
);
487 v3
= vec_ld( 0, (vector
unsigned char *) outBuffer
);
488 v4
= vec_sel( v3
, v1
, cmp
);
489 vec_st( v4
, 0, (vector
unsigned char *) outBuffer
);
490 v1
= vec_sel( v1
, v2
, cmp
);
491 vec_st( v1
, 16, (vector
unsigned char *) outBuffer
);
492 v3
= vec_ld( 32, (vector
unsigned char *) outBuffer
);
493 v2
= vec_sel( v2
, v3
, cmp
);
494 vec_st( v2
, 32, (vector
unsigned char *) outBuffer
);
496 // store output data into an aligned buffer
497 vec_st( v3
, 0, (vector
unsigned char *) outBuffer
);
498 vec_st( v4
, 16, (vector
unsigned char *) outBuffer
);
503 void vBlockDecrypt(cipherInstance
*cipher
, keyInstance
*key
, BYTE
*input
, int inputLen
, BYTE
*outBuffer
)
505 // for vector machines
506 register vector
unsigned char v1
, v2
, v3
, v4
, mask
;
507 register vector
bool char cmp
;
508 vector
unsigned char block
[2], cblock
[2];
510 /* load and align input */
511 v1
= vec_ld( 0, (vector
unsigned char *) input
);
512 v2
= vec_ld( 16, (vector
unsigned char *) input
);
513 if ( (long) input
& 0x0fL
)
514 { // this is required if input is not on a 16-byte boundary
515 v3
= vec_ld( 32, (vector
unsigned char *) input
);
516 mask
= vec_lvsl( 0, input
);
517 v1
= vec_perm( v1
, v2
, mask
);
518 v2
= vec_perm( v2
, v3
, mask
);
521 /* parse input stream into rectangular array */
522 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
523 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
527 /* save a copy of incoming ciphertext for later chain */
528 if (cipher
->mode
== MODE_CBC
) {
533 vRijndaelDecrypt ((vector
unsigned char *) block
, key
->keyLen
, cipher
->blockLen
, (vector
unsigned char *) key
->keySched
);
538 /* exor with last ciphertext */
539 if (cipher
->mode
== MODE_CBC
) {
540 v1
= vec_xor( v1
, *((vector
unsigned char *) cipher
->chainBlock
) );
541 v2
= vec_xor( v2
, *((vector
unsigned char *) cipher
->chainBlock
+ 1) );
542 vec_st( cblock
[0], 0, (vector
unsigned char *) cipher
->chainBlock
);
543 vec_st( cblock
[1], 16, (vector
unsigned char *) cipher
->chainBlock
);
546 /* parse rectangular array into output ciphertext bytes */
547 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
548 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
550 if ( (long) outBuffer
& 0x0fL
)
551 { /* store output data into a non-aligned buffer */
552 mask
= vec_lvsr( 0, outBuffer
);
553 cmp
= vec_cmpgt( mask
, vec_splat_u8( 0x0f ) );
554 v1
= vec_perm( v3
, v3
, mask
);
555 v2
= vec_perm( v4
, v4
, mask
);
556 v3
= vec_ld( 0, (vector
unsigned char *) outBuffer
);
557 v4
= vec_sel( v3
, v1
, cmp
);
558 vec_st( v4
, 0, (vector
unsigned char *) outBuffer
);
559 v1
= vec_sel( v1
, v2
, cmp
);
560 vec_st( v1
, 16, (vector
unsigned char *) outBuffer
);
561 v3
= vec_ld( 32, (vector
unsigned char *) outBuffer
);
562 v2
= vec_sel( v2
, v3
, cmp
);
563 vec_st( v2
, 32, (vector
unsigned char *) outBuffer
);
565 // store output data into an aligned buffer
566 vec_st( v3
, 0, (vector
unsigned char *) outBuffer
);
567 vec_st( v4
, 16, (vector
unsigned char *) outBuffer
);
570 #endif /* Murley's code, to be deleted */
573 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC
575 void vBlockEncrypt128(
580 vector
unsigned char block
[2];
581 register vector
unsigned char v1
, v2
;
583 if ( (long) input
& 0x0fL
) {
585 vdprintf(("vBlockEncrypt128: unaligned input\n"));
586 /* manually re-align - the compiler is supposed to 16-byte align this for us */
587 if((unsigned)localBuf
& 0xf) {
588 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
590 memmove(localBuf
, input
, 16);
591 v1
= vec_ld(0, (vector
unsigned char *)localBuf
);
594 vdprintf(("vBlockEncrypt128: aligned input\n"));
595 v1
= vec_ld( 0, (vector
unsigned char *) input
);
598 /* parse input stream into rectangular array */
599 /* FIXME - do we need to zero v2 (or something)? */
600 block
[0] = vec_perm(v1
, v2
,
601 (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
602 5, 9, 13, 17, 21, 25, 29 ) );
603 block
[1] = vec_perm( v1
, v2
,
604 (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
605 7, 11, 15, 19, 23, 27, 31 ) );
607 vRijndaelEncrypt(block
, key
->keyLen
, 128, (vector
unsigned char *) key
->keySched
);
609 /* parse rectangular array into output ciphertext bytes */
610 v1
= vec_perm(block
[0], block
[1],
611 (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
612 10, 18, 26, 3, 11, 19, 27 ) );
613 v2
= vec_perm(block
[0], block
[1],
614 (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
615 14, 22, 30, 7, 15, 23, 31 ) );
617 if ( (long) outBuffer
& 0x0fL
)
619 /* store output data into a non-aligned buffer */
621 vec_st(v1
, 0, (vector
unsigned char *) localBuf
);
622 memmove(outBuffer
, localBuf
, 16);
624 /* store output data into an aligned buffer */
625 vec_st( v1
, 0, (vector
unsigned char *) outBuffer
);
630 void vBlockDecrypt128(
635 vector
unsigned char block
[2];
636 register vector
unsigned char v1
, v2
;
638 if ( (long) input
& 0x0fL
) {
639 /* manually re-align - the compiler is supposed to 16-byte align this for us */
641 vdprintf(("vBlockDecrypt128: unaligned input\n"));
642 if((unsigned)localBuf
& 0xf) {
643 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
645 memmove(localBuf
, input
, 16);
646 v1
= vec_ld(0, (vector
unsigned char *)localBuf
);
649 vdprintf(("vBlockDecrypt128: aligned input\n"));
650 v1
= vec_ld( 0, (vector
unsigned char *) input
);
653 /* parse input stream into rectangular array */
654 /* FIXME - do we need to zero v2 (or something)? */
655 block
[0] = vec_perm(v1
, v2
,
656 (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
657 5, 9, 13, 17, 21, 25, 29 ) );
658 block
[1] = vec_perm( v1
, v2
,
659 (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
660 7, 11, 15, 19, 23, 27, 31 ) );
662 vRijndaelDecrypt(block
, key
->keyLen
, 128, (vector
unsigned char *) key
->keySched
);
664 /* parse rectangular array into output ciphertext bytes */
665 v1
= vec_perm(block
[0], block
[1],
666 (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
667 10, 18, 26, 3, 11, 19, 27 ) );
668 v2
= vec_perm(block
[0], block
[1],
669 (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
670 14, 22, 30, 7, 15, 23, 31 ) );
672 if ( (long) outBuffer
& 0x0fL
) {
673 /* store output data into a non-aligned buffer */
675 vec_st(v1
, 0, (vector
unsigned char *) localBuf
);
676 memmove(outBuffer
, localBuf
, 16);
678 /* store output data into an aligned buffer */
679 vec_st( v1
, 0, (vector
unsigned char *) outBuffer
);
684 #endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */