[apple/bootx.git] / bootx.tproj / sl.subproj / aescrypt.c

/*\r
 ---------------------------------------------------------------------------\r
 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.\r
\r
 LICENSE TERMS\r
\r
 The free distribution and use of this software in both source and binary\r
 form is allowed (with or without changes) provided that:\r
\r
   1. distributions of this source code include the above copyright\r
      notice, this list of conditions and the following disclaimer;\r
\r
   2. distributions in binary form include the above copyright\r
      notice, this list of conditions and the following disclaimer\r
      in the documentation and/or other associated materials;\r
\r
   3. the copyright holder's name is not used to endorse products\r
      built using this software without specific written permission.\r
\r
 ALTERNATIVELY, provided that this notice is retained in full, this product\r
 may be distributed under the terms of the GNU General Public License (GPL),\r
 in which case the provisions of the GPL apply INSTEAD OF those given above.\r
\r
 DISCLAIMER\r
\r
 This software is provided 'as is' with no explicit or implied warranties\r
 in respect of its properties, including, but not limited to, correctness\r
 and/or fitness for purpose.\r
 ---------------------------------------------------------------------------\r
 Issue 28/01/2004\r
\r
 This file contains the code for implementing encryption and decryption\r
 for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It\r
 can optionally be replaced by code written in assembler using NASM. For\r
 further details see the file aesopt.h\r
*/\r
\r
#include "aesopt.h"\r
#include "aestab.h"\r
\r
#if defined(__cplusplus)\r
extern "C"\r
{\r
#endif\r
\r
#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])\r
#define xo(y,x,c) (s(y,c) ^= s(x, c))\r
#define si(y,x,c)   (s(y,c) = word_in(x, c))\r
#define so(y,x,c)   word_out(y, c, s(x,c))\r
\r
#if defined(ARRAYS)\r
#define locals(y,x)     x[4],y[4]\r
#else\r
#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3\r
#endif\r
\r
#define dtables(tab)     const aes_32t *tab##0, *tab##1, *tab##2, *tab##3\r
#define itables(tab)     tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]\r
\r
#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \\r
                        s(y,2) = s(x,2); s(y,3) = s(x,3);\r
\r
#define key_in(y,x,k)   ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)\r
#define cbc(y,x)        xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)\r
#define state_in(y,x)   si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)\r
#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)\r
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)\r
\r
#if defined(ENCRYPTION) && !defined(AES_ASM)\r
\r
/* Visual C++ .Net v7.1 provides the fastest encryption code when using\r
   Pentium optimiation with small code but this is poor for decryption\r
   so we need to control this with the following VC++ pragmas\r
*/\r
\r
#if defined(_MSC_VER)\r
#pragma optimize( "s", on )\r
#endif\r
\r
/* Given the column (c) of the output state variable, the following\r
   macros give the input state variables which are needed in its\r
   computation for each row (r) of the state. All the alternative\r
   macros give the same end values but expand into different ways\r
   of calculating these values.  In particular the complex macro\r
   used for dynamically variable block sizes is designed to expand\r
   to a compile time constant whenever possible but will expand to\r
   conditional clauses on some branches (I am grateful to Frank\r
   Yellin for this construction)\r
*/\r
\r
#define fwd_var(x,r,c)\\r
 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\\r
 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\\r
 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\\r
 :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))\r
\r
#if defined(FT4_SET)\r
#undef  dec_fmvars\r
#  if defined(ENC_ROUND_CACHE_TABLES)\r
#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))\r
#  else\r
#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))\r
#  endif\r
#elif defined(FT1_SET)\r
#undef  dec_fmvars\r
#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))\r
#else\r
#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))\r
#endif\r
\r
#if defined(FL4_SET)\r
#  if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))\r
#  else\r
#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))\r
#  endif\r
#elif defined(FL1_SET)\r
#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))\r
#else\r
#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))\r
#endif\r
\r
aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,\r
					 unsigned char *out, const aes_encrypt_ctx cx[1])\r
{   aes_32t         locals(b0, b1);\r
    const aes_32t   *kp = cx->ks;\r
#if defined(ENC_ROUND_CACHE_TABLES)\r
	dtables(t_fn);\r
#endif\r
#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
	dtables(t_fl);\r
#endif\r
\r
#if defined( dec_fmvars )\r
    dec_fmvars; /* declare variables for fwd_mcol() if needed */\r
#endif\r
\r
#if defined( AES_ERR_CHK )\r
    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )\r
        return aes_error;\r
#endif\r
\r
	// Load IV into b0.\r
	state_in(b0, in_iv);\r
\r
	for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)\r
	{\r
#if 0\r
		// Read the plaintext into b1\r
		state_in(b1, in);\r
		// Do the CBC with b0 which is either the iv or the ciphertext of the previous block.\r
		cbc(b1, b0);\r
\r
		// Xor b1 with the key schedule to get things started.\r
		key_in(b0, b1, kp);\r
#else\r
		// Since xor is associative we mess with the ordering here to get the loads started early\r
		key_in(b1, b0, kp);  // Xor b0(IV) with the key schedule and assign to b1\r
		state_in(b0, in);    // Load block into b0\r
		cbc(b0, b1);         // Xor b0 with b1 and store in b0\r
#endif\r
\r
#if defined(ENC_ROUND_CACHE_TABLES)\r
		itables(t_fn);\r
#endif\r
\r
#if (ENC_UNROLL == FULL)\r
\r
		switch(cx->rn)\r
		{\r
		case 14:\r
			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);\r
			kp += 2 * N_COLS;\r
		case 12:\r
			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);\r
			kp += 2 * N_COLS;\r
		case 10:\r
		default:\r
			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);\r
			round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);\r
			round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);\r
			round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);\r
			round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);\r
			round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);\r
#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
			itables(t_fl);\r
#endif\r
			round(fwd_lrnd, b0, b1, kp +10 * N_COLS);\r
		}\r
\r
#else\r
\r
		{   aes_32t    rnd;\r
#if (ENC_UNROLL == PARTIAL)\r
			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)\r
			{\r
				kp += N_COLS;\r
				round(fwd_rnd, b1, b0, kp);\r
				kp += N_COLS;\r
				round(fwd_rnd, b0, b1, kp);\r
			}\r
			kp += N_COLS;\r
			round(fwd_rnd,  b1, b0, kp);\r
#else\r
			for(rnd = 0; rnd < cx->rn - 1; ++rnd)\r
			{\r
				kp += N_COLS;\r
				round(fwd_rnd, b1, b0, kp);\r
				l_copy(b0, b1);\r
			}\r
#endif\r
#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
			itables(t_fl);\r
#endif\r
			kp += N_COLS;\r
			round(fwd_lrnd, b0, b1, kp);\r
		}\r
#endif\r
	\r
		state_out(out, b0);\r
	}\r
\r
#if defined( AES_ERR_CHK )\r
    return aes_good;\r
#endif\r
}\r
\r
#endif\r
\r
#if defined(DECRYPTION) && !defined(AES_ASM)\r
\r
/* Visual C++ .Net v7.1 provides the fastest encryption code when using\r
   Pentium optimiation with small code but this is poor for decryption\r
   so we need to control this with the following VC++ pragmas\r
*/\r
\r
#if defined(_MSC_VER)\r
#pragma optimize( "t", on )\r
#endif\r
\r
/* Given the column (c) of the output state variable, the following\r
   macros give the input state variables which are needed in its\r
   computation for each row (r) of the state. All the alternative\r
   macros give the same end values but expand into different ways\r
   of calculating these values.  In particular the complex macro\r
   used for dynamically variable block sizes is designed to expand\r
   to a compile time constant whenever possible but will expand to\r
   conditional clauses on some branches (I am grateful to Frank\r
   Yellin for this construction)\r
*/\r
\r
#define inv_var(x,r,c)\\r
 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\\r
 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\\r
 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\\r
 :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))\r
\r
#if defined(IT4_SET)\r
#undef  dec_imvars\r
#  if defined(DEC_ROUND_CACHE_TABLES)\r
#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))\r
#  else\r
#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))\r
#  endif\r
#elif defined(IT1_SET)\r
#undef  dec_imvars\r
#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))\r
#else\r
#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))\r
#endif\r
\r
#if defined(IL4_SET)\r
#  if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))\r
#  else\r
#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))\r
#  endif\r
#elif defined(IL1_SET)\r
#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))\r
#else\r
#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))\r
#endif\r
\r
aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,\r
					 unsigned char *out, const aes_decrypt_ctx cx[1])\r
{   aes_32t        locals(b0, b1);\r
    const aes_32t *kp = cx->ks + cx->rn * N_COLS;\r
#if defined(DEC_ROUND_CACHE_TABLES)\r
	dtables(t_in);\r
#endif\r
#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
	dtables(t_il);\r
#endif\r
\r
#if defined( dec_imvars )\r
    dec_imvars; /* declare variables for inv_mcol() if needed */\r
#endif\r
	\r
#if defined( AES_ERR_CHK )\r
    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )\r
        return aes_error;\r
#endif\r
\r
#if defined(DEC_ROUND_CACHE_TABLES)\r
	itables(t_in);\r
#endif	\r
	\r
	in += AES_BLOCK_SIZE * (num_blk - 1);\r
	out += AES_BLOCK_SIZE * (num_blk - 1);\r
	// Load the last block's ciphertext into b1\r
	state_in(b1, in);\r
\r
	for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)\r
	{\r
		// Do the xor part of state_in, where b1 is the previous block's ciphertext.\r
		key_in(b0, b1, kp);\r
\r
#if (DEC_UNROLL == FULL)\r
	\r
		switch(cx->rn)\r
		{\r
		case 14:\r
			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);\r
			kp -= 2 * N_COLS;\r
		case 12:\r
			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);\r
			kp -= 2 * N_COLS;\r
		case 10:\r
		default:\r
			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);\r
			round(inv_rnd,  b1, b0, kp -  3 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  4 * N_COLS);\r
			round(inv_rnd,  b1, b0, kp -  5 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  6 * N_COLS);\r
			round(inv_rnd,  b1, b0, kp -  7 * N_COLS);\r
			round(inv_rnd,  b0, b1, kp -  8 * N_COLS);\r
			round(inv_rnd,  b1, b0, kp -  9 * N_COLS);\r
#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
			itables(t_il);\r
#endif	\r
			round(inv_lrnd, b0, b1, kp - 10 * N_COLS);\r
		}\r
\r
#else\r
	\r
		{   aes_32t    rnd;\r
#if (DEC_UNROLL == PARTIAL)\r
			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)\r
			{\r
				kp -= N_COLS;\r
				round(inv_rnd, b1, b0, kp);\r
				kp -= N_COLS;\r
				round(inv_rnd, b0, b1, kp);\r
			}\r
			kp -= N_COLS;\r
			round(inv_rnd, b1, b0, kp);\r
#else\r
			for(rnd = 0; rnd < cx->rn - 1; ++rnd)\r
			{\r
				kp -= N_COLS;\r
				round(inv_rnd, b1, b0, kp);\r
				l_copy(b0, b1);\r
			}\r
#endif\r
#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
			itables(t_il);\r
#endif	\r
			kp -= N_COLS;\r
			round(inv_lrnd, b0, b1, kp);\r
		}\r
#endif\r
\r
		if (num_blk == 1)\r
		{\r
			// We are doing the first block so we need the IV rather than the previous\r
			// block for CBC (there is no previous block)\r
			state_in(b1, in_iv);\r
		}\r
		else\r
		{\r
			in -= AES_BLOCK_SIZE;\r
			state_in(b1, in);\r
		}\r
\r
		// Do the CBC with b1 which is either the IV or the ciphertext of the previous block.\r
		cbc(b0, b1);\r
\r
		state_out(out, b0);\r
	}\r
#if defined( AES_ERR_CHK )\r
    return aes_good;\r
#endif\r
}\r
\r
#endif\r
\r
#if defined(__cplusplus)\r
}\r
#endif\r
Commit	Line	Data
8be739c0 A	1	/*\r
	2	---------------------------------------------------------------------------\r
	3	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.\r
	4	\r
	5	LICENSE TERMS\r
	6	\r
	7	The free distribution and use of this software in both source and binary\r
	8	form is allowed (with or without changes) provided that:\r
	9	\r
	10	1. distributions of this source code include the above copyright\r
	11	notice, this list of conditions and the following disclaimer;\r
	12	\r
	13	2. distributions in binary form include the above copyright\r
	14	notice, this list of conditions and the following disclaimer\r
	15	in the documentation and/or other associated materials;\r
	16	\r
	17	3. the copyright holder's name is not used to endorse products\r
	18	built using this software without specific written permission.\r
	19	\r
	20	ALTERNATIVELY, provided that this notice is retained in full, this product\r
	21	may be distributed under the terms of the GNU General Public License (GPL),\r
	22	in which case the provisions of the GPL apply INSTEAD OF those given above.\r
	23	\r
	24	DISCLAIMER\r
	25	\r
	26	This software is provided 'as is' with no explicit or implied warranties\r
	27	in respect of its properties, including, but not limited to, correctness\r
	28	and/or fitness for purpose.\r
	29	---------------------------------------------------------------------------\r
	30	Issue 28/01/2004\r
	31	\r
	32	This file contains the code for implementing encryption and decryption\r
	33	for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It\r
	34	can optionally be replaced by code written in assembler using NASM. For\r
	35	further details see the file aesopt.h\r
	36	*/\r
	37	\r
	38	#include "aesopt.h"\r
	39	#include "aestab.h"\r
	40	\r
	41	#if defined(__cplusplus)\r
	42	extern "C"\r
	43	{\r
	44	#endif\r
	45	\r
	46	#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])\r
	47	#define xo(y,x,c) (s(y,c) ^= s(x, c))\r
	48	#define si(y,x,c) (s(y,c) = word_in(x, c))\r
	49	#define so(y,x,c) word_out(y, c, s(x,c))\r
	50	\r
	51	#if defined(ARRAYS)\r
	52	#define locals(y,x) x[4],y[4]\r
	53	#else\r
	54	#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3\r
	55	#endif\r
	56	\r
	57	#define dtables(tab) const aes_32t tab##0, tab##1, tab##2, tab##3\r
	58	#define itables(tab) tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]\r
	59	\r
	60	#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \\r
	61	s(y,2) = s(x,2); s(y,3) = s(x,3);\r
	62	\r
	63	#define key_in(y,x,k) ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)\r
	64	#define cbc(y,x) xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)\r
65	#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)\r
66	#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)\r
67	#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)\r
68	\r
69	#if defined(ENCRYPTION) && !defined(AES_ASM)\r
70	\r
71	/* Visual C++ .Net v7.1 provides the fastest encryption code when using\r
72	Pentium optimiation with small code but this is poor for decryption\r
73	so we need to control this with the following VC++ pragmas\r
74	*/\r
75	\r
76	#if defined(_MSC_VER)\r
77	#pragma optimize( "s", on )\r
78	#endif\r
79	\r
80	/* Given the column (c) of the output state variable, the following\r
81	macros give the input state variables which are needed in its\r
82	computation for each row (r) of the state. All the alternative\r
83	macros give the same end values but expand into different ways\r
84	of calculating these values. In particular the complex macro\r
85	used for dynamically variable block sizes is designed to expand\r
86	to a compile time constant whenever possible but will expand to\r
87	conditional clauses on some branches (I am grateful to Frank\r
88	Yellin for this construction)\r
89	*/\r
90	\r
91	#define fwd_var(x,r,c)\\r
92	( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\\r
93	: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\\r
94	: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\\r
95	: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))\r
96	\r
97	#if defined(FT4_SET)\r
98	#undef dec_fmvars\r
99	# if defined(ENC_ROUND_CACHE_TABLES)\r
100	#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))\r
101	# else\r
102	#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))\r
103	# endif\r
104	#elif defined(FT1_SET)\r
105	#undef dec_fmvars\r
106	#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))\r
107	#else\r
108	#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))\r
109	#endif\r
110	\r
111	#if defined(FL4_SET)\r
112	# if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
113	#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))\r
114	# else\r
115	#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))\r
116	# endif\r
117	#elif defined(FL1_SET)\r
118	#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))\r
119	#else\r
120	#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))\r
121	#endif\r
122	\r
123	aes_rval aes_encrypt_cbc(const unsigned char in, const unsigned char in_iv, unsigned int num_blk,\r
124	unsigned char *out, const aes_encrypt_ctx cx[1])\r
125	{ aes_32t locals(b0, b1);\r
126	const aes_32t *kp = cx->ks;\r
127	#if defined(ENC_ROUND_CACHE_TABLES)\r
128	dtables(t_fn);\r
129	#endif\r
130	#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
131	dtables(t_fl);\r
132	#endif\r
133	\r
134	#if defined( dec_fmvars )\r
135	dec_fmvars; /* declare variables for fwd_mcol() if needed */\r
136	#endif\r
137	\r
138	#if defined( AES_ERR_CHK )\r
139	if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )\r
140	return aes_error;\r
141	#endif\r
142	\r
143	// Load IV into b0.\r
144	state_in(b0, in_iv);\r
145	\r
146	for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)\r
147	{\r
148	#if 0\r
149	// Read the plaintext into b1\r
150	state_in(b1, in);\r
151	// Do the CBC with b0 which is either the iv or the ciphertext of the previous block.\r
152	cbc(b1, b0);\r
153	\r
154	// Xor b1 with the key schedule to get things started.\r
155	key_in(b0, b1, kp);\r
156	#else\r
157	// Since xor is associative we mess with the ordering here to get the loads started early\r
158	key_in(b1, b0, kp); // Xor b0(IV) with the key schedule and assign to b1\r
159	state_in(b0, in); // Load block into b0\r
160	cbc(b0, b1); // Xor b0 with b1 and store in b0\r
161	#endif\r
162	\r
163	#if defined(ENC_ROUND_CACHE_TABLES)\r
164	itables(t_fn);\r
165	#endif\r
166	\r
167	#if (ENC_UNROLL == FULL)\r
168	\r
169	switch(cx->rn)\r
170	{\r
171	case 14:\r
172	round(fwd_rnd, b1, b0, kp + 1 * N_COLS);\r
173	round(fwd_rnd, b0, b1, kp + 2 * N_COLS);\r
174	kp += 2 * N_COLS;\r
175	case 12:\r
176	round(fwd_rnd, b1, b0, kp + 1 * N_COLS);\r
177	round(fwd_rnd, b0, b1, kp + 2 * N_COLS);\r
178	kp += 2 * N_COLS;\r
179	case 10:\r
180	default:\r
181	round(fwd_rnd, b1, b0, kp + 1 * N_COLS);\r
182	round(fwd_rnd, b0, b1, kp + 2 * N_COLS);\r
183	round(fwd_rnd, b1, b0, kp + 3 * N_COLS);\r
184	round(fwd_rnd, b0, b1, kp + 4 * N_COLS);\r
185	round(fwd_rnd, b1, b0, kp + 5 * N_COLS);\r
186	round(fwd_rnd, b0, b1, kp + 6 * N_COLS);\r
187	round(fwd_rnd, b1, b0, kp + 7 * N_COLS);\r
188	round(fwd_rnd, b0, b1, kp + 8 * N_COLS);\r
189	round(fwd_rnd, b1, b0, kp + 9 * N_COLS);\r
190	#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
191	itables(t_fl);\r
192	#endif\r
193	round(fwd_lrnd, b0, b1, kp +10 * N_COLS);\r
194	}\r
195	\r
196	#else\r
197	\r
198	{ aes_32t rnd;\r
199	#if (ENC_UNROLL == PARTIAL)\r
200	for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)\r
201	{\r
202	kp += N_COLS;\r
203	round(fwd_rnd, b1, b0, kp);\r
204	kp += N_COLS;\r
205	round(fwd_rnd, b0, b1, kp);\r
206	}\r
207	kp += N_COLS;\r
208	round(fwd_rnd, b1, b0, kp);\r
209	#else\r
210	for(rnd = 0; rnd < cx->rn - 1; ++rnd)\r
211	{\r
212	kp += N_COLS;\r
213	round(fwd_rnd, b1, b0, kp);\r
214	l_copy(b0, b1);\r
215	}\r
216	#endif\r
217	#if defined(LAST_ENC_ROUND_CACHE_TABLES)\r
218	itables(t_fl);\r
219	#endif\r
220	kp += N_COLS;\r
221	round(fwd_lrnd, b0, b1, kp);\r
222	}\r
223	#endif\r
224	\r
225	state_out(out, b0);\r
226	}\r
227	\r
228	#if defined( AES_ERR_CHK )\r
229	return aes_good;\r
230	#endif\r
231	}\r
232	\r
233	#endif\r
234	\r
235	#if defined(DECRYPTION) && !defined(AES_ASM)\r
236	\r
237	/* Visual C++ .Net v7.1 provides the fastest encryption code when using\r
238	Pentium optimiation with small code but this is poor for decryption\r
239	so we need to control this with the following VC++ pragmas\r
240	*/\r
241	\r
242	#if defined(_MSC_VER)\r
243	#pragma optimize( "t", on )\r
244	#endif\r
245	\r
246	/* Given the column (c) of the output state variable, the following\r
247	macros give the input state variables which are needed in its\r
248	computation for each row (r) of the state. All the alternative\r
249	macros give the same end values but expand into different ways\r
250	of calculating these values. In particular the complex macro\r
251	used for dynamically variable block sizes is designed to expand\r
252	to a compile time constant whenever possible but will expand to\r
253	conditional clauses on some branches (I am grateful to Frank\r
254	Yellin for this construction)\r
255	*/\r
256	\r
257	#define inv_var(x,r,c)\\r
258	( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\\r
259	: r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\\r
260	: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\\r
261	: ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))\r
262	\r
263	#if defined(IT4_SET)\r
264	#undef dec_imvars\r
265	# if defined(DEC_ROUND_CACHE_TABLES)\r
266	#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))\r
267	# else\r
268	#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))\r
269	# endif\r
270	#elif defined(IT1_SET)\r
271	#undef dec_imvars\r
272	#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))\r
273	#else\r
274	#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))\r
275	#endif\r
276	\r
277	#if defined(IL4_SET)\r
278	# if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
279	#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))\r
280	# else\r
281	#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))\r
282	# endif\r
283	#elif defined(IL1_SET)\r
284	#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))\r
285	#else\r
286	#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))\r
287	#endif\r
288	\r
289	aes_rval aes_decrypt_cbc(const unsigned char in, const unsigned char in_iv, unsigned int num_blk,\r
290	unsigned char *out, const aes_decrypt_ctx cx[1])\r
291	{ aes_32t locals(b0, b1);\r
292	const aes_32t kp = cx->ks + cx->rn N_COLS;\r
293	#if defined(DEC_ROUND_CACHE_TABLES)\r
294	dtables(t_in);\r
295	#endif\r
296	#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
297	dtables(t_il);\r
298	#endif\r
299	\r
300	#if defined( dec_imvars )\r
301	dec_imvars; /* declare variables for inv_mcol() if needed */\r
302	#endif\r
303	\r
304	#if defined( AES_ERR_CHK )\r
305	if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )\r
306	return aes_error;\r
307	#endif\r
308	\r
309	#if defined(DEC_ROUND_CACHE_TABLES)\r
310	itables(t_in);\r
311	#endif \r
312	\r
313	in += AES_BLOCK_SIZE * (num_blk - 1);\r
314	out += AES_BLOCK_SIZE * (num_blk - 1);\r
315	// Load the last block's ciphertext into b1\r
316	state_in(b1, in);\r
317	\r
318	for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)\r
319	{\r
320	// Do the xor part of state_in, where b1 is the previous block's ciphertext.\r
321	key_in(b0, b1, kp);\r
322	\r
323	#if (DEC_UNROLL == FULL)\r
324	\r
325	switch(cx->rn)\r
326	{\r
327	case 14:\r
328	round(inv_rnd, b1, b0, kp - 1 * N_COLS);\r
329	round(inv_rnd, b0, b1, kp - 2 * N_COLS);\r
330	kp -= 2 * N_COLS;\r
331	case 12:\r
332	round(inv_rnd, b1, b0, kp - 1 * N_COLS);\r
333	round(inv_rnd, b0, b1, kp - 2 * N_COLS);\r
334	kp -= 2 * N_COLS;\r
335	case 10:\r
336	default:\r
337	round(inv_rnd, b1, b0, kp - 1 * N_COLS);\r
338	round(inv_rnd, b0, b1, kp - 2 * N_COLS);\r
339	round(inv_rnd, b1, b0, kp - 3 * N_COLS);\r
340	round(inv_rnd, b0, b1, kp - 4 * N_COLS);\r
341	round(inv_rnd, b1, b0, kp - 5 * N_COLS);\r
342	round(inv_rnd, b0, b1, kp - 6 * N_COLS);\r
343	round(inv_rnd, b1, b0, kp - 7 * N_COLS);\r
344	round(inv_rnd, b0, b1, kp - 8 * N_COLS);\r
345	round(inv_rnd, b1, b0, kp - 9 * N_COLS);\r
346	#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
347	itables(t_il);\r
348	#endif \r
349	round(inv_lrnd, b0, b1, kp - 10 * N_COLS);\r
350	}\r
351	\r
352	#else\r
353	\r
354	{ aes_32t rnd;\r
355	#if (DEC_UNROLL == PARTIAL)\r
356	for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)\r
357	{\r
358	kp -= N_COLS;\r
359	round(inv_rnd, b1, b0, kp);\r
360	kp -= N_COLS;\r
361	round(inv_rnd, b0, b1, kp);\r
362	}\r
363	kp -= N_COLS;\r
364	round(inv_rnd, b1, b0, kp);\r
365	#else\r
366	for(rnd = 0; rnd < cx->rn - 1; ++rnd)\r
367	{\r
368	kp -= N_COLS;\r
369	round(inv_rnd, b1, b0, kp);\r
370	l_copy(b0, b1);\r
371	}\r
372	#endif\r
373	#if defined(LAST_DEC_ROUND_CACHE_TABLES)\r
374	itables(t_il);\r
375	#endif \r
376	kp -= N_COLS;\r
377	round(inv_lrnd, b0, b1, kp);\r
378	}\r
379	#endif\r
380	\r
381	if (num_blk == 1)\r
382	{\r
383	// We are doing the first block so we need the IV rather than the previous\r
384	// block for CBC (there is no previous block)\r
385	state_in(b1, in_iv);\r
386	}\r
387	else\r
388	{\r
389	in -= AES_BLOCK_SIZE;\r
390	state_in(b1, in);\r
391	}\r
392	\r
393	// Do the CBC with b1 which is either the IV or the ciphertext of the previous block.\r
394	cbc(b0, b1);\r
395	\r
396	state_out(out, b0);\r
397	}\r
398	#if defined( AES_ERR_CHK )\r
399	return aes_good;\r
400	#endif\r
401	}\r
402	\r
403	#endif\r
404	\r
405	#if defined(__cplusplus)\r
406	}\r
407	#endif\r