2 ---------------------------------------------------------------------------
3 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
7 The free distribution and use of this software in both source and binary
8 form is allowed (with or without changes) provided that:
10 1. distributions of this source code include the above copyright
11 notice, this list of conditions and the following disclaimer;
13 2. distributions in binary form include the above copyright
14 notice, this list of conditions and the following disclaimer
15 in the documentation and/or other associated materials;
17 3. the copyright holder's name is not used to endorse products
18 built using this software without specific written permission.
20 ALTERNATIVELY, provided that this notice is retained in full, this product
21 may be distributed under the terms of the GNU General Public License (GPL),
22 in which case the provisions of the GPL apply INSTEAD OF those given above.
26 This software is provided 'as is' with no explicit or implied warranties
27 in respect of its properties, including, but not limited to, correctness
28 and/or fitness for purpose.
29 ---------------------------------------------------------------------------
32 These subroutines implement multiple block AES modes for ECB, CBC, CFB,
33 OFB and CTR encryption, The code provides support for the VIA Advanced
34 Cryptography Engine (ACE).
36 NOTE: In the following subroutines, the AES contexts (ctx) must be
37 16 byte aligned if VIA ACE is being used
40 /* modified 3/5/10 cclee */
41 /* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */
42 /* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */
44 /* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */
47 #include <i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
49 #include <System/i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
55 // aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c
56 // would add the implementation if needed
57 // they are now compiled from aes_modes.c
59 aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
60 int len, const aes_encrypt_ctx ctx[1])
63 if(len & (AES_BLOCK_SIZE - 1)) return 1;
65 aes_encrypt(ibuf, obuf, ctx);
66 ibuf += AES_BLOCK_SIZE;
67 obuf += AES_BLOCK_SIZE;
72 aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
73 int len, const aes_decrypt_ctx ctx[1])
76 if(len & (AES_BLOCK_SIZE - 1)) return 1;
78 aes_decrypt(ibuf, obuf, ctx);
79 ibuf += AES_BLOCK_SIZE;
80 obuf += AES_BLOCK_SIZE;
87 aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
88 unsigned char *obuf, const aes_encrypt_ctx ctx[1])
93 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
96 iv ^= ibuf; // 128-bit
97 aes_encrypt(iv, iv, ctx);
98 memcpy(obuf, iv, AES_BLOCK_SIZE);
99 ibuf += AES_BLOCK_SIZE;
100 obuf += AES_BLOCK_SIZE;
110 .globl _aes_encrypt_cbc
114 // if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)
115 // o.w., fall through to the original AES-SW function
117 #if defined __x86_64__
118 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability
119 mov (%rax), %eax // %eax = __cpu_capabilities
122 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities
123 mov (%eax), %eax // %eax = __cpu_capabilities
125 mov _COMM_PAGE_CPU_CAPABILITIES, %eax
128 test $(kHasAES), %eax // kHasAES & __cpu_capabilities
129 jne _aes_encrypt_cbc_hw // if AES HW detected, branch to HW-specific code
131 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
135 push %ebx // to be used as ibuf
136 push %edi // to be used as obuf
137 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
138 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk
149 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
154 // save xmm registers for kernel use
155 // xmm6-xmm7 will be used locally
156 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
157 // there is a hole not used for xmm, which is 48(sp).
158 // it has been used to store iv (16-bytes) in i386 code
159 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code
160 // also the 1st 16 bytes (sp) is dummied in x86_64 code
169 movaps %xmm3, 112(sp)
170 movaps %xmm4, 128(sp)
174 // set up registers from calling arguments
178 mov 12(%ebp), %eax // in_iv
179 mov 24(%ebp), %edx // ctx
180 movups (%eax), %xmm7 // in_iv
181 lea 48(%esp), %eax // &iv[0]
182 mov %eax, (%esp) // 1st iv for aes_encrypt
183 mov %eax, 4(%esp) // 2nd iv for aes_encrypt
184 mov %edx, 8(%esp) // ctx for aes_encrypt
185 mov 8(%ebp), %ebx // ibuf
186 mov 16(%ebp), %esi // num_blk
187 mov 20(%ebp), %edi // obuf
193 #else // __x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8
195 mov %rdi, %rbx // ibuf
196 lea 48(sp), %r12 // &iv
197 movups (%rsi), %xmm7 // in_iv
198 mov %rdx, %r13 // num_blk
199 mov %rcx, %r14 // obuf
204 #define num_blk %r13d
210 cmp $1, num_blk // num_blk vs 1
211 jl 9f // if num_blk < 1, branch to bypass the main loop
213 movups (ibuf), %xmm6 // ibuf
215 lea 48(sp), %eax // &iv[0]
216 pxor %xmm6, %xmm7 // iv ^= ibuf
217 movups %xmm7, (%eax) // save iv
219 pxor %xmm6, %xmm7 // iv ^= ibuf
220 movups %xmm7, (iv) // save iv
221 mov iv, %rdi // 1st calling argument for aes_encrypt
222 mov iv, %rsi // 2nd calling argument for aes_encrypt
223 mov ctx, %rdx // 3rd calling argument for aes_encrypt
225 call _aes_encrypt_xmm_no_save // aes_encrypt(iv, iv, ctx)
227 leal 48(%esp), %eax // &iv[0]
228 movups (%eax), %xmm7 // read iv
230 movups (iv), %xmm7 // read iv
232 movups %xmm7, (obuf) // memcpy(obuf, iv, AES_BLOCK_SIZE);
233 add $16, ibuf // ibuf += AES_BLOCK_SIZE;
234 add $16, obuf // obuf += AES_BLOCK_SIZE;
235 sub $1, num_blk // num_blk --
236 jg 0b // if num_blk > 0, repeat the loop
241 // restore xmm registers due to kernel use
249 movaps 112(sp), %xmm3
250 movaps 128(sp), %xmm4
254 xor %eax, %eax // to return 0 for SUCCESS
257 mov 12(%esp), %esi // restore %esi
258 add $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
262 add $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
273 aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
274 unsigned char *obuf, const aes_decrypt_ctx cx[1])
276 unsigned char iv[16], tmp[16];
279 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
283 memcpy(tmp, ibuf, AES_BLOCK_SIZE);
284 aes_decrypt(ibuf, obuf, ctx);
286 memcpy(iv, tmp, AES_BLOCK_SIZE);
287 ibuf += AES_BLOCK_SIZE;
288 obuf += AES_BLOCK_SIZE;
297 .globl _aes_decrypt_cbc
301 // if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)
302 // o.w., fall through to the original AES-SW function
304 #if defined __x86_64__
305 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability
306 mov (%rax), %eax // %eax = __cpu_capabilities
309 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities
310 mov (%eax), %eax // %eax = __cpu_capabilities
312 mov _COMM_PAGE_CPU_CAPABILITIES, %eax
315 test $(kHasAES), %eax // kHasAES & __cpu_capabilities
316 jne _aes_decrypt_cbc_hw
318 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
322 push %ebx // to be used as ibuf
323 push %edi // to be used as obuf
324 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
325 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk
336 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
341 // save xmm registers for kernel use
342 // xmm6-xmm7 will be used locally
343 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
344 // there is a hole not used for xmm, which is 48(sp).
345 // it has been used to store iv (16-bytes) in i386 code
346 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code
347 // also the 1st 16 bytes (sp) is dummied in x86_64 code
356 movaps %xmm3, 112(sp)
357 movaps %xmm4, 128(sp)
361 // set up registers from calling arguments
364 mov 12(%ebp), %eax // in_iv
365 mov 24(%ebp), %edx // ctx
366 movups (%eax), %xmm7 // in_iv
367 mov %edx, 8(%esp) // ctx for aes_encrypt
368 mov 8(%ebp), %ebx // ibuf
369 mov 16(%ebp), %esi // num_blk
370 mov 20(%ebp), %edi // obuf
375 #else // __x86_64__, rdi/rsi/rdx/rcx/r8
376 mov %rdi, %rbx // ibuf
377 movups (%rsi), %xmm7 // in_iv
378 mov %rdx, %r13 // num_blk
379 mov %rcx, %r14 // obuf
383 #define num_blk %r13d
388 // memcpy(tmp, ibuf, AES_BLOCK_SIZE);
389 // aes_decrypt(ibuf, obuf, ctx);
391 // memcpy(iv, tmp, AES_BLOCK_SIZE);
392 // ibuf += AES_BLOCK_SIZE;
393 // obuf += AES_BLOCK_SIZE;
395 cmp $1, num_blk // num_blk vs 1
396 jl L_crypt_cbc_done // if num_blk < 1, bypass the main loop, jump to finishing code
398 movups (ibuf), %xmm6 // tmp
400 mov ibuf, (sp) // ibuf
401 mov obuf, 4(sp) // obuf
403 mov ibuf, %rdi // ibuf
404 mov obuf, %rsi // obuf
407 call _aes_decrypt_xmm_no_save // aes_decrypt(ibuf, obuf, ctx)
408 movups (obuf), %xmm0 // obuf
409 pxor %xmm7, %xmm0 // obuf ^= iv;
410 movaps %xmm6, %xmm7 // memcpy(iv, tmp, AES_BLOCK_SIZE);
411 movups %xmm0, (obuf) // update obuf
412 add $16, ibuf // ibuf += AES_BLOCK_SIZE;
413 add $16, obuf // obuf += AES_BLOCK_SIZE;
414 sub $1, num_blk // num_blk --
415 jg 0b // if num_blk > 0, repeat the loop
418 // we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there