/* * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ * * This file implements the following function for the arm64 architecture: * * int memcmp_zero_ptr_aligned(const void *s, size_t n); * * The memcmp_zero_ptr_aligned function checks string s of n bytes contains all zeros. * Address and size of the string s must be pointer-aligned (8-byte for arm64). * Return 0 if true, 1 otherwise. Also return 0 if n is 0. */ /* this guard is used by tests */ #ifdef __arm64__ #include "asm.h" .globl _memcmp_zero_ptr_aligned /***************************************************************************** * Macros * *****************************************************************************/ .macro EstablishFrame ARM64_STACK_PROLOG stp fp, lr, [sp, #-16]! mov fp, sp .endm .macro ClearFrameAndReturn ldp fp, lr, [sp], #16 ARM64_STACK_EPILOG .endm /***************************************************************************** * Constants * *****************************************************************************/ .text .align 5 /***************************************************************************** * memcmp_zero_ptr_aligned entrypoint * *****************************************************************************/ _memcmp_zero_ptr_aligned: // For the use case in , memory corruption should be rare // so check for all zeros is fairly simple when early out is not necessary. // We just load all the bytes and logical OR them together. If the result // is still zero, all the bytes are zero. EstablishFrame cmp x1, #64 b.lo L_sizeIsSmall // Load the first 64 bytes, and compute the number of bytes to the // first 64-byte aligned location. Even though we are going to test // 64 bytes, only those preceeding that 64-byte location "count" towards // reducing the length of the buffer or advancing the pointers. mov x2, x0 // copy the original addr add x0, x0, #64 and x0, x0, #-64 // aligned addr ldp q4, q5, [x2] ldp q6, q7, [x2, #32] sub x2, x0, x2 // bytes between original and aligned addr sub x1, x1, x2 // update length subs x1, x1, #64 // check length > 64 b.ls L_cleanup L_loop: ldp q0, q1, [x0] ldp q2, q3, [x0, #32] orr.16b v4, v4, v0 // use orr to keep non-zero bytes orr.16b v5, v5, v1 orr.16b v6, v6, v2 orr.16b v7, v7, v3 add x0, x0, #64 // advance pointer subs x1, x1, #64 // check length > 64 b.hi L_loop L_cleanup: // Between 0 and 64 more bytes need to be tested. The exact // number of bytes to test is x1 + 64. Instead of using smaller conditional // checks, we simply check 64 unaligned bytes from x0+x1. This load may overlap // with the previous one but it's ok. add x0, x0, x1 ldp q0, q1, [x0] ldp q2, q3, [x0, #32] orr.16b v4, v4, v0 // use orr to keep non-zero bytes orr.16b v5, v5, v1 orr.16b v6, v6, v2 orr.16b v7, v7, v3 orr.16b v4, v4, v5 // reduce four regs into two orr.16b v6, v6, v7 orr.16b v4, v4, v6 // reduce two regs into one umaxv.16b b0, v4 // reduce 16 bytes into one umov w0, v0.b[0] // move byte to GPR for testing tst w0, w0 cset x0, ne // return 1 if non-zero, 0 otherwise ClearFrameAndReturn L_sizeIsSmall: cbz x1, L_sizeIsZero // return zero if length is zero mov x3, #0 0: ldr x2, [x0],#8 orr x3, x3, x2 // use orr to keep non-zero bytes subs x1, x1, #8 // update length b.hi 0b tst x3, x3 cset x0, ne // return 1 if non-zero, 0 otherwise ClearFrameAndReturn L_sizeIsZero: mov x0, #0 ClearFrameAndReturn #endif // __arm64__