X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e2fac8b15b12a7979f72090454d850e612fc5b13..b0d623f7f2ae71ed96e60569f61f9a9a27016e80:/bsd/crypto/aes/gen/aescrypt.c

diff --git a/bsd/crypto/aes/gen/aescrypt.c b/bsd/crypto/aes/gen/aescrypt.c
new file mode 100644
index 000000000..31d4c81af
--- /dev/null
+++ b/bsd/crypto/aes/gen/aescrypt.c
@@ -0,0 +1,411 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 28/01/2004
+
+ This file contains the code for implementing encryption and decryption
+ for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
+ can optionally be replaced by code written in assembler using NASM. For
+ further details see the file aesopt.h
+*/
+
+#include "aesopt.h"
+#include "aestab.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])
+#define xo(y,x,c) (s(y,c) ^= s(x, c))
+#define si(y,x,c)   (s(y,c) = word_in(x, c))
+#define so(y,x,c)   word_out(y, c, s(x,c))
+
+#if defined(ARRAYS)
+#define locals(y,x)     x[4],y[4]
+#else
+#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
+#endif
+
+#define dtables(tab)     const aes_32t *tab##0, *tab##1, *tab##2, *tab##3
+#define itables(tab)     tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]
+
+#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
+                        s(y,2) = s(x,2); s(y,3) = s(x,3);
+
+#define key_in(y,x,k)   ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)
+#define cbc(y,x)        xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)
+#define state_in(y,x)   si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
+#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
+
+#if defined(ENCRYPTION) && !defined(AES_ASM)
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined(_MSC_VER)
+#pragma optimize( "s", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define fwd_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
+
+#if defined(FT4_SET)
+#undef  dec_fmvars
+#  if defined(ENC_ROUND_CACHE_TABLES)
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))
+#  else
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))
+#  endif
+#elif defined(FT1_SET)
+#undef  dec_fmvars
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))
+#else
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))
+#endif
+
+#if defined(FL4_SET)
+#  if defined(LAST_ENC_ROUND_CACHE_TABLES)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))
+#  else
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))
+#  endif
+#elif defined(FL1_SET)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))
+#else
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))
+#endif
+
+aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
+					 unsigned char *out, const aes_encrypt_ctx cx[1])
+{   aes_32t         locals(b0, b1);
+    const aes_32t   *kp;
+    const aes_32t   *kptr = cx->ks;
+#if defined(ENC_ROUND_CACHE_TABLES)
+	dtables(t_fn);
+#endif
+#if defined(LAST_ENC_ROUND_CACHE_TABLES)
+	dtables(t_fl);
+#endif
+
+#if defined( dec_fmvars )
+    dec_fmvars; /* declare variables for fwd_mcol() if needed */
+#endif
+
+#if defined( AES_ERR_CHK )
+    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
+        return aes_error;
+#endif
+
+	// Load IV into b0.
+	state_in(b0, in_iv);
+
+	for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)
+	{
+		kp = kptr;
+#if 0
+		// Read the plaintext into b1
+		state_in(b1, in);
+		// Do the CBC with b0 which is either the iv or the ciphertext of the previous block.
+		cbc(b1, b0);
+
+		// Xor b1 with the key schedule to get things started.
+		key_in(b0, b1, kp);
+#else
+		// Since xor is associative we mess with the ordering here to get the loads started early
+		key_in(b1, b0, kp);  // Xor b0(IV) with the key schedule and assign to b1
+		state_in(b0, in);    // Load block into b0
+		cbc(b0, b1);         // Xor b0 with b1 and store in b0
+#endif
+
+#if defined(ENC_ROUND_CACHE_TABLES)
+		itables(t_fn);
+#endif
+
+#if (ENC_UNROLL == FULL)
+
+		switch(cx->rn)
+		{
+		case 14:
+			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+			kp += 2 * N_COLS;
+		case 12:
+			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+			kp += 2 * N_COLS;
+		case 10:
+		default:
+			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+			round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
+			round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
+			round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
+			round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
+			round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
+#if defined(LAST_ENC_ROUND_CACHE_TABLES)
+			itables(t_fl);
+#endif
+			round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
+		}
+
+#else
+
+		{   aes_32t    rnd;
+#if (ENC_UNROLL == PARTIAL)
+			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
+			{
+				kp += N_COLS;
+				round(fwd_rnd, b1, b0, kp);
+				kp += N_COLS;
+				round(fwd_rnd, b0, b1, kp);
+			}
+			kp += N_COLS;
+			round(fwd_rnd,  b1, b0, kp);
+#else
+			for(rnd = 0; rnd < cx->rn - 1; ++rnd)
+			{
+				kp += N_COLS;
+				round(fwd_rnd, b1, b0, kp);
+				l_copy(b0, b1);
+			}
+#endif
+#if defined(LAST_ENC_ROUND_CACHE_TABLES)
+			itables(t_fl);
+#endif
+			kp += N_COLS;
+			round(fwd_lrnd, b0, b1, kp);
+		}
+#endif
+	
+		state_out(out, b0);
+	}
+
+#if defined( AES_ERR_CHK )
+    return aes_good;
+#endif
+}
+
+#endif
+
+#if defined(DECRYPTION) && !defined(AES_ASM)
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined(_MSC_VER)
+#pragma optimize( "t", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define inv_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
+
+#if defined(IT4_SET)
+#undef  dec_imvars
+#  if defined(DEC_ROUND_CACHE_TABLES)
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))
+#  else
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))
+#  endif
+#elif defined(IT1_SET)
+#undef  dec_imvars
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))
+#else
+#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))
+#endif
+
+#if defined(IL4_SET)
+#  if defined(LAST_DEC_ROUND_CACHE_TABLES)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))
+#  else
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))
+#  endif
+#elif defined(IL1_SET)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))
+#else
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))
+#endif
+
+aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
+					 unsigned char *out, const aes_decrypt_ctx cx[1])
+{   aes_32t        locals(b0, b1);
+    const aes_32t *kptr = cx->ks + cx->rn * N_COLS;
+	const aes_32t *kp;
+#if defined(DEC_ROUND_CACHE_TABLES)
+	dtables(t_in);
+#endif
+#if defined(LAST_DEC_ROUND_CACHE_TABLES)
+	dtables(t_il);
+#endif
+
+#if defined( dec_imvars )
+    dec_imvars; /* declare variables for inv_mcol() if needed */
+#endif
+	
+#if defined( AES_ERR_CHK )
+    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
+        return aes_error;
+#endif
+
+#if defined(DEC_ROUND_CACHE_TABLES)
+	itables(t_in);
+#endif	
+	
+	in += AES_BLOCK_SIZE * (num_blk - 1);
+	out += AES_BLOCK_SIZE * (num_blk - 1);
+	// Load the last block's ciphertext into b1
+	state_in(b1, in);
+
+	for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)
+	{
+		kp = kptr;
+		// Do the xor part of state_in, where b1 is the previous block's ciphertext.
+		key_in(b0, b1, kp);
+
+#if (DEC_UNROLL == FULL)
+	
+		switch(cx->rn)
+		{
+		case 14:
+			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
+			kp -= 2 * N_COLS;
+		case 12:
+			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
+			kp -= 2 * N_COLS;
+		case 10:
+		default:
+			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
+			round(inv_rnd,  b1, b0, kp -  3 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  4 * N_COLS);
+			round(inv_rnd,  b1, b0, kp -  5 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  6 * N_COLS);
+			round(inv_rnd,  b1, b0, kp -  7 * N_COLS);
+			round(inv_rnd,  b0, b1, kp -  8 * N_COLS);
+			round(inv_rnd,  b1, b0, kp -  9 * N_COLS);
+#if defined(LAST_DEC_ROUND_CACHE_TABLES)
+			itables(t_il);
+#endif	
+			round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
+		}
+
+#else
+	
+		{   aes_32t    rnd;
+#if (DEC_UNROLL == PARTIAL)
+			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
+			{
+				kp -= N_COLS;
+				round(inv_rnd, b1, b0, kp);
+				kp -= N_COLS;
+				round(inv_rnd, b0, b1, kp);
+			}
+			kp -= N_COLS;
+			round(inv_rnd, b1, b0, kp);
+#else
+			for(rnd = 0; rnd < cx->rn - 1; ++rnd)
+			{
+				kp -= N_COLS;
+				round(inv_rnd, b1, b0, kp);
+				l_copy(b0, b1);
+			}
+#endif
+#if defined(LAST_DEC_ROUND_CACHE_TABLES)
+			itables(t_il);
+#endif	
+			kp -= N_COLS;
+			round(inv_lrnd, b0, b1, kp);
+		}
+#endif
+
+		if (num_blk == 1)
+		{
+			// We are doing the first block so we need the IV rather than the previous
+			// block for CBC (there is no previous block)
+			state_in(b1, in_iv);
+		}
+		else
+		{
+			in -= AES_BLOCK_SIZE;
+			state_in(b1, in);
+		}
+
+		// Do the CBC with b1 which is either the IV or the ciphertext of the previous block.
+		cbc(b0, b1);
+
+		state_out(out, b0);
+	}
+#if defined( AES_ERR_CHK )
+    return aes_good;
+#endif
+}
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif