[apple/security.git] / AppleCSP / AES / rijndael-alg-ref.c

/*
 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
 * 
 * The contents of this file constitute Original Code as defined in and are
 * subject to the Apple Public Source License Version 1.2 (the 'License').
 * You may not use this file except in compliance with the License. Please obtain
 * a copy of the License at http://www.apple.com/publicsource and read it before
 * using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
 * specific language governing rights and limitations under the License.
 */


/* rijndael-alg-ref.c   v2.0   August '99
 * Reference ANSI C code
 * authors: Paulo Barreto
 *          Vincent Rijmen
 *
 * PPC and 128-bit block optimization by Doug Mitchell May 2001. 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "rijndael-alg-ref.h"
#include <AppleCSP/cspdebugging.h>

#define SC	((BC - 4) >> 1)

#include "boxes-ref.h"

static const word8 shifts[3][4][2] = {
 { { 0, 0 },
   { 1, 3 },
   { 2, 2 },
   { 3, 1 }
 },
 { { 0, 0 },
   { 1, 5 },
   { 2, 4 },
   { 3, 3 }
 },
 { { 0, 0 },
   { 1, 7 },
   { 3, 5 },
   { 4, 4 }
 }
}; 

#if 	!GLADMAN_AES_128_ENABLE

/* 128 bit key/word shift table in bits */
static const word8 shifts128[4][2] = {
 { 0,  0 },
 { 8,  24 },
 { 16, 16 },
 { 24, 8 }
};

#endif	/* GLADMAN_AES_128_ENABLE */

#if		!AES_MUL_BY_LOOKUP
/*
 * Profiling measurements showed that the mul routine is where a large propertion of 
 * the time is spent. Since the first argument to  mul is always one of six 
 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to 
 * do the multiplies. This eliminates the need for the log/antilog tables, so
 * it's only adding one kilobyte of const data. Throughput improvement for this
 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the 
 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
 * on a 500 MHz G4 with 1 MB of L2 cache. 
 */

/*
 * The mod 255 op in mul is really expensive...
 *
 * We know that b <= (254 * 2), so there are only two cases. Either return b, 
 * or return b-255. 
 *
 * On a G4 this single optimization results in a 24% speedup for encrypt and 
 * a 25% speedup for decrypt. 
 */
static inline word8 mod255(word32 b)
{
	if(b >= 255) {
		b -= 255;
	}
	return b;
}

word8 mul(word8 a, word8 b) {
   /* multiply two elements of GF(2^m)
    * needed for MixColumn and InvMixColumn
    */
	if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
	else return 0;
}
#endif	/* !AES_MUL_BY_LOOKUP */

void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
	/* Exor corresponding text input and round key input bytes
	 */
	int i, j;
	
	for(i = 0; i < 4; i++)
   		for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
}

void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
	/* Row 0 remains unchanged
	 * The other three rows are shifted a variable amount
	 */
	word8 tmp[MAXBC];
	int i, j;
	
	for(i = 1; i < 4; i++) {
		for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
		for(j = 0; j < BC; j++) a[i][j] = tmp[j];
	}
}

void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
	/* Replace every byte of the input by the byte at that place
	 * in the nonlinear S-box
	 */
	int i, j;
	
	for(i = 0; i < 4; i++)
		for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
}
   
void MixColumn(word8 a[4][MAXBC], word8 BC) {
	/* Mix the four bytes of every column in a linear way
	 */
	word8 b[4][MAXBC];
	int i, j;
		
	for(j = 0; j < BC; j++) {
		for(i = 0; i < 4; i++) {
			#if		AES_MUL_BY_LOOKUP
			b[i][j] = mulBy0x02[a[i][j]]
				^ mulBy0x03[a[(i + 1) % 4][j]]
				^ a[(i + 2) % 4][j]
				^ a[(i + 3) % 4][j];
			#else
			b[i][j] = mul(2,a[i][j])
				^ mul(3,a[(i + 1) % 4][j])
				^ a[(i + 2) % 4][j]
				^ a[(i + 3) % 4][j];
			#endif
		}
	}
	for(i = 0; i < 4; i++) {
		for(j = 0; j < BC; j++) a[i][j] = b[i][j];
	}
}

void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
	/* Mix the four bytes of every column in a linear way
	 * This is the opposite operation of Mixcolumn
	 */
	word8 b[4][MAXBC];
	int i, j;
	
	for(j = 0; j < BC; j++) {
		for(i = 0; i < 4; i++) {         
			#if		AES_MUL_BY_LOOKUP
			b[i][j] = mulBy0x0e[a[i][j]]
				^ mulBy0x0b[a[(i + 1) % 4][j]]
				^ mulBy0x0d[a[(i + 2) % 4][j]]
				^ mulBy0x09[a[(i + 3) % 4][j]];     
			#else
			b[i][j] = mul(0xe,a[i][j])
				^ mul(0xb,a[(i + 1) % 4][j])                 
				^ mul(0xd,a[(i + 2) % 4][j])
				^ mul(0x9,a[(i + 3) % 4][j]);     
			#endif
		}
	}
	for(i = 0; i < 4; i++) {
		for(j = 0; j < BC; j++) a[i][j] = b[i][j];
	}
}

int rijndaelKeySched (
	word8 k[4][MAXKC], 
	int keyBits, 
	int blockBits, 
	word8 W[MAXROUNDS+1][4][MAXBC]) {
	
	/* Calculate the necessary round keys
	 * The number of calculations depends on keyBits and blockBits
	 */
	int KC, BC, ROUNDS;
	int i, j, t, rconpointer = 0;
	word8 tk[4][MAXKC];   

	switch (keyBits) {
	case 128: KC = 4; break;
	case 192: KC = 6; break;
	case 256: KC = 8; break;
	default : return (-1);
	}

	switch (blockBits) {
	case 128: BC = 4; break;
	case 192: BC = 6; break;
	case 256: BC = 8; break;
	default : return (-2);
	}

	switch (keyBits >= blockBits ? keyBits : blockBits) {
	case 128: ROUNDS = 10; break;
	case 192: ROUNDS = 12; break;
	case 256: ROUNDS = 14; break;
	default : return (-3); /* this cannot happen */
	}

	
	for(j = 0; j < KC; j++)
		for(i = 0; i < 4; i++)
			tk[i][j] = k[i][j];
	t = 0;
	/* copy values into round key array */
	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
		for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
		
	while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
		/* calculate new values */
		for(i = 0; i < 4; i++)
			tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
		tk[0][0] ^= rcon[rconpointer++];

		if (KC != 8)
			for(j = 1; j < KC; j++)
				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
		else {
			for(j = 1; j < KC/2; j++)
				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
			for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
			for(j = KC/2 + 1; j < KC; j++)
				for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
	}
	/* copy values into round key array */
	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
		for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
	}		

	return 0;
}
      
int rijndaelEncrypt (
	word8 a[4][MAXBC], 
	int keyBits, 
	int blockBits, 
	word8 rk[MAXROUNDS+1][4][MAXBC])
{
	/* Encryption of one block, general case. 
	 */
	int r, BC, ROUNDS;

	switch (blockBits) {
	case 128: BC = 4; break;
	case 192: BC = 6; break;
	case 256: BC = 8; break;
	default : return (-2);
	}

	switch (keyBits >= blockBits ? keyBits : blockBits) {
	case 128: ROUNDS = 10; break;
	case 192: ROUNDS = 12; break;
	case 256: ROUNDS = 14; break;
	default : return (-3); /* this cannot happen */
	}

	/* begin with a key addition
	 */
	KeyAddition(a,rk[0],BC); 

	/* ROUNDS-1 ordinary rounds
	 */
	for(r = 1; r < ROUNDS; r++) {
		Substitution(a,S,BC);
		ShiftRow(a,0,BC);
		MixColumn(a,BC);
		KeyAddition(a,rk[r],BC);
	}
	
	/* Last round is special: there is no MixColumn
	 */
	Substitution(a,S,BC);
	ShiftRow(a,0,BC);
	KeyAddition(a,rk[ROUNDS],BC);

	return 0;
}   

int rijndaelDecrypt (
	word8 a[4][MAXBC], 
	int keyBits, 
	int blockBits, 
	word8 rk[MAXROUNDS+1][4][MAXBC])
{
	int r, BC, ROUNDS;
	
	switch (blockBits) {
	case 128: BC = 4; break;
	case 192: BC = 6; break;
	case 256: BC = 8; break;
	default : return (-2);
	}

	switch (keyBits >= blockBits ? keyBits : blockBits) {
	case 128: ROUNDS = 10; break;
	case 192: ROUNDS = 12; break;
	case 256: ROUNDS = 14; break;
	default : return (-3); /* this cannot happen */
	}

	/* To decrypt: apply the inverse operations of the encrypt routine,
	 *             in opposite order
	 * 
	 * (KeyAddition is an involution: it 's equal to its inverse)
	 * (the inverse of Substitution with table S is Substitution with the 
	 *  inverse table of S)
	 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
	 */

	/* First the special round:
	 *   without InvMixColumn
	 *   with extra KeyAddition
	 */
	KeyAddition(a,rk[ROUNDS],BC);
	Substitution(a,Si,BC);
	ShiftRow(a,1,BC);              
	
	/* ROUNDS-1 ordinary rounds
	 */
	for(r = ROUNDS-1; r > 0; r--) {
		KeyAddition(a,rk[r],BC);
		InvMixColumn(a,BC);      
		Substitution(a,Si,BC);
		ShiftRow(a,1,BC);                
	}
	
	/* End with the extra key addition
	 */
	
	KeyAddition(a,rk[0],BC);    

	return 0;
}

#if		!GLADMAN_AES_128_ENABLE

/*
 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
Commit	Line	Data
bac41a7b A	1	/*
	2	* Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
	3	*
	4	* The contents of this file constitute Original Code as defined in and are
	5	* subject to the Apple Public Source License Version 1.2 (the 'License').
	6	* You may not use this file except in compliance with the License. Please obtain
	7	* a copy of the License at http://www.apple.com/publicsource and read it before
	8	* using this file.
	9	*
	10	* This Original Code and all software distributed under the License are
	11	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
	12	* OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
	13	* LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
	14	* PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
	15	* specific language governing rights and limitations under the License.
	16	*/
	17
	18
	19	/* rijndael-alg-ref.c v2.0 August '99
	20	* Reference ANSI C code
	21	* authors: Paulo Barreto
	22	* Vincent Rijmen
	23	*
	24	* PPC and 128-bit block optimization by Doug Mitchell May 2001.
	25	*/
	26
	27	#include <stdio.h>
	28	#include <stdlib.h>
	29	#include <string.h>
	30
	31	#include "rijndael-alg-ref.h"
	32	#include <AppleCSP/cspdebugging.h>
	33
	34	#define SC ((BC - 4) >> 1)
	35
	36	#include "boxes-ref.h"
	37
	38	static const word8 shifts[3][4][2] = {
	39	{ { 0, 0 },
	40	{ 1, 3 },
	41	{ 2, 2 },
	42	{ 3, 1 }
	43	},
	44	{ { 0, 0 },
	45	{ 1, 5 },
	46	{ 2, 4 },
	47	{ 3, 3 }
	48	},
	49	{ { 0, 0 },
	50	{ 1, 7 },
	51	{ 3, 5 },
	52	{ 4, 4 }
	53	}
	54	};
	55
df0e469f A	56	#if !GLADMAN_AES_128_ENABLE
df0e469f A	57
bac41a7b A	58	/* 128 bit key/word shift table in bits */
	59	static const word8 shifts128[4][2] = {
	60	{ 0, 0 },
	61	{ 8, 24 },
	62	{ 16, 16 },
	63	{ 24, 8 }
	64	};
	65
df0e469f A	66	#endif /* GLADMAN_AES_128_ENABLE */
df0e469f A	67
bac41a7b A	68	#if !AES_MUL_BY_LOOKUP
	69	/*
	70	* Profiling measurements showed that the mul routine is where a large propertion of
	71	* the time is spent. Since the first argument to mul is always one of six
	72	* constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
	73	* do the multiplies. This eliminates the need for the log/antilog tables, so
	74	* it's only adding one kilobyte of const data. Throughput improvement for this
	75	* mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
	76	* case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
	77	* and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
	78	* InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
	79	* on a 500 MHz G4 with 1 MB of L2 cache.
df0e469f	80	*/
bac41a7b A	81
	82	/*
	83	* The mod 255 op in mul is really expensive...
	84	*
	85	* We know that b <= (254 * 2), so there are only two cases. Either return b,
	86	* or return b-255.
	87	*
	88	* On a G4 this single optimization results in a 24% speedup for encrypt and
	89	* a 25% speedup for decrypt.
	90	*/
	91	static inline word8 mod255(word32 b)
	92	{
	93	if(b >= 255) {
	94	b -= 255;
	95	}
	96	return b;
	97	}
	98
	99	word8 mul(word8 a, word8 b) {
	100	/* multiply two elements of GF(2^m)
	101	* needed for MixColumn and InvMixColumn
	102	*/
	103	if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
	104	else return 0;
	105	}
	106	#endif /* !AES_MUL_BY_LOOKUP */
	107
	108	void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
	109	/* Exor corresponding text input and round key input bytes
	110	*/
	111	int i, j;
	112
	113	for(i = 0; i < 4; i++)
	114	for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
	115	}
	116
	117	void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
	118	/* Row 0 remains unchanged
	119	* The other three rows are shifted a variable amount
	120	*/
	121	word8 tmp[MAXBC];
	122	int i, j;
	123
	124	for(i = 1; i < 4; i++) {
	125	for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
	126	for(j = 0; j < BC; j++) a[i][j] = tmp[j];
	127	}
	128	}
	129
	130	void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
	131	/* Replace every byte of the input by the byte at that place
	132	* in the nonlinear S-box
	133	*/
	134	int i, j;
	135
	136	for(i = 0; i < 4; i++)
	137	for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
	138	}
	139
	140	void MixColumn(word8 a[4][MAXBC], word8 BC) {
	141	/* Mix the four bytes of every column in a linear way
	142	*/
	143	word8 b[4][MAXBC];
	144	int i, j;
145
146	for(j = 0; j < BC; j++) {
147	for(i = 0; i < 4; i++) {
148	#if AES_MUL_BY_LOOKUP
149	b[i][j] = mulBy0x02[a[i][j]]
150	^ mulBy0x03[a[(i + 1) % 4][j]]
151	^ a[(i + 2) % 4][j]
152	^ a[(i + 3) % 4][j];
153	#else
154	b[i][j] = mul(2,a[i][j])
155	^ mul(3,a[(i + 1) % 4][j])
156	^ a[(i + 2) % 4][j]
157	^ a[(i + 3) % 4][j];
158	#endif
159	}
160	}
161	for(i = 0; i < 4; i++) {
162	for(j = 0; j < BC; j++) a[i][j] = b[i][j];
163	}
164	}
165
166	void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
167	/* Mix the four bytes of every column in a linear way
168	* This is the opposite operation of Mixcolumn
169	*/
170	word8 b[4][MAXBC];
171	int i, j;
172
173	for(j = 0; j < BC; j++) {
174	for(i = 0; i < 4; i++) {
175	#if AES_MUL_BY_LOOKUP
176	b[i][j] = mulBy0x0e[a[i][j]]
177	^ mulBy0x0b[a[(i + 1) % 4][j]]
178	^ mulBy0x0d[a[(i + 2) % 4][j]]
179	^ mulBy0x09[a[(i + 3) % 4][j]];
180	#else
181	b[i][j] = mul(0xe,a[i][j])
182	^ mul(0xb,a[(i + 1) % 4][j])
183	^ mul(0xd,a[(i + 2) % 4][j])
184	^ mul(0x9,a[(i + 3) % 4][j]);
185	#endif
186	}
187	}
188	for(i = 0; i < 4; i++) {
189	for(j = 0; j < BC; j++) a[i][j] = b[i][j];
190	}
191	}
192
193	int rijndaelKeySched (
194	word8 k[4][MAXKC],
195	int keyBits,
196	int blockBits,
197	word8 W[MAXROUNDS+1][4][MAXBC]) {
198
199	/* Calculate the necessary round keys
200	* The number of calculations depends on keyBits and blockBits
201	*/
202	int KC, BC, ROUNDS;
203	int i, j, t, rconpointer = 0;
204	word8 tk[4][MAXKC];
205
206	switch (keyBits) {
207	case 128: KC = 4; break;
208	case 192: KC = 6; break;
209	case 256: KC = 8; break;
210	default : return (-1);
211	}
212
213	switch (blockBits) {
214	case 128: BC = 4; break;
215	case 192: BC = 6; break;
216	case 256: BC = 8; break;
217	default : return (-2);
218	}
219
220	switch (keyBits >= blockBits ? keyBits : blockBits) {
221	case 128: ROUNDS = 10; break;
222	case 192: ROUNDS = 12; break;
223	case 256: ROUNDS = 14; break;
224	default : return (-3); /* this cannot happen */
225	}
226
227
228	for(j = 0; j < KC; j++)
229	for(i = 0; i < 4; i++)
230	tk[i][j] = k[i][j];
231	t = 0;
232	/* copy values into round key array */
233	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
234	for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
235
236	while (t < (ROUNDS+1)BC) { / while not enough round key material calculated */
237	/* calculate new values */
238	for(i = 0; i < 4; i++)
239	tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
240	tk[0][0] ^= rcon[rconpointer++];
241
242	if (KC != 8)
243	for(j = 1; j < KC; j++)
244	for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
245	else {
246	for(j = 1; j < KC/2; j++)
247	for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
248	for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
249	for(j = KC/2 + 1; j < KC; j++)
250	for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
251	}
252	/* copy values into round key array */
253	for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
254	for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
255	}
256
257	return 0;
258	}
259
260	int rijndaelEncrypt (
261	word8 a[4][MAXBC],
262	int keyBits,
263	int blockBits,
264	word8 rk[MAXROUNDS+1][4][MAXBC])
265	{
266	/* Encryption of one block, general case.
267	*/
268	int r, BC, ROUNDS;
269
270	switch (blockBits) {
271	case 128: BC = 4; break;
272	case 192: BC = 6; break;
273	case 256: BC = 8; break;
274	default : return (-2);
275	}
276
277	switch (keyBits >= blockBits ? keyBits : blockBits) {
278	case 128: ROUNDS = 10; break;
279	case 192: ROUNDS = 12; break;
280	case 256: ROUNDS = 14; break;
281	default : return (-3); /* this cannot happen */
282	}
283
284	/* begin with a key addition
285	*/
286	KeyAddition(a,rk[0],BC);
287
288	/* ROUNDS-1 ordinary rounds
289	*/
290	for(r = 1; r < ROUNDS; r++) {
291	Substitution(a,S,BC);
292	ShiftRow(a,0,BC);
293	MixColumn(a,BC);
294	KeyAddition(a,rk[r],BC);
295	}
296
297	/* Last round is special: there is no MixColumn
298	*/
299	Substitution(a,S,BC);
300	ShiftRow(a,0,BC);
301	KeyAddition(a,rk[ROUNDS],BC);
302
303	return 0;
304	}
305
306	int rijndaelDecrypt (
307	word8 a[4][MAXBC],
308	int keyBits,
309	int blockBits,
310	word8 rk[MAXROUNDS+1][4][MAXBC])
311	{
312	int r, BC, ROUNDS;
313
314	switch (blockBits) {
315	case 128: BC = 4; break;
316	case 192: BC = 6; break;
317	case 256: BC = 8; break;
318	default : return (-2);
319	}
320
321	switch (keyBits >= blockBits ? keyBits : blockBits) {
322	case 128: ROUNDS = 10; break;
323	case 192: ROUNDS = 12; break;
324	case 256: ROUNDS = 14; break;
325	default : return (-3); /* this cannot happen */
326	}
327
328	/* To decrypt: apply the inverse operations of the encrypt routine,
329	* in opposite order
330	*
331	* (KeyAddition is an involution: it 's equal to its inverse)
332	* (the inverse of Substitution with table S is Substitution with the
333	* inverse table of S)
334	* (the inverse of Shiftrow is Shiftrow over a suitable distance)
335	*/
336
337	/* First the special round:
338	* without InvMixColumn
339	* with extra KeyAddition
340	*/
341	KeyAddition(a,rk[ROUNDS],BC);
342	Substitution(a,Si,BC);
343	ShiftRow(a,1,BC);
344
345	/* ROUNDS-1 ordinary rounds
346	*/
347	for(r = ROUNDS-1; r > 0; r--) {
348	KeyAddition(a,rk[r],BC);
349	InvMixColumn(a,BC);
350	Substitution(a,Si,BC);
351	ShiftRow(a,1,BC);
352	}
353
354	/* End with the extra key addition
355	*/
356
357	KeyAddition(a,rk[0],BC);
358
359	return 0;
360	}
361
29654253 A	362	#if !GLADMAN_AES_128_ENABLE
29654253 A	363
bac41a7b A	364	/*
	365	* All of these 128-bit-key-and-block routines require 32-bit word-aligned
	366