2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 * This file implements the following function for the arm64 architecture:
30 * int memcmp_zero_ptr_aligned(const void *s, size_t n);
32 * The memcmp_zero_ptr_aligned function checks string s of n bytes contains all zeros.
33 * Address and size of the string s must be pointer-aligned (8-byte for arm64).
34 * Return 0 if true, 1 otherwise. Also return 0 if n is 0.
37 /* this guard is used by tests */
42 .globl _memcmp_zero_ptr_aligned
44 /*****************************************************************************
46 *****************************************************************************/
50 stp fp, lr, [sp, #-16]!
54 .macro ClearFrameAndReturn
59 /*****************************************************************************
61 *****************************************************************************/
66 /*****************************************************************************
67 * memcmp_zero_ptr_aligned entrypoint *
68 *****************************************************************************/
70 _memcmp_zero_ptr_aligned:
72 // For the use case in <rdar://problem/59523721>, memory corruption should be rare
73 // so check for all zeros is fairly simple when early out is not necessary.
74 // We just load all the bytes and logical OR them together. If the result
75 // is still zero, all the bytes are zero.
81 // Load the first 64 bytes, and compute the number of bytes to the
82 // first 64-byte aligned location. Even though we are going to test
83 // 64 bytes, only those preceeding that 64-byte location "count" towards
84 // reducing the length of the buffer or advancing the pointers.
85 mov x2, x0 // copy the original addr
87 and x0, x0, #-64 // aligned addr
90 sub x2, x0, x2 // bytes between original and aligned addr
91 sub x1, x1, x2 // update length
92 subs x1, x1, #64 // check length > 64
98 orr.16b v4, v4, v0 // use orr to keep non-zero bytes
102 add x0, x0, #64 // advance pointer
103 subs x1, x1, #64 // check length > 64
107 // Between 0 and 64 more bytes need to be tested. The exact
108 // number of bytes to test is x1 + 64. Instead of using smaller conditional
109 // checks, we simply check 64 unaligned bytes from x0+x1. This load may overlap
110 // with the previous one but it's ok.
113 ldp q2, q3, [x0, #32]
114 orr.16b v4, v4, v0 // use orr to keep non-zero bytes
119 orr.16b v4, v4, v5 // reduce four regs into two
121 orr.16b v4, v4, v6 // reduce two regs into one
122 umaxv.16b b0, v4 // reduce 16 bytes into one
123 umov w0, v0.b[0] // move byte to GPR for testing
125 cset x0, ne // return 1 if non-zero, 0 otherwise
129 cbz x1, L_sizeIsZero // return zero if length is zero
133 orr x3, x3, x2 // use orr to keep non-zero bytes
134 subs x1, x1, #8 // update length
138 cset x0, ne // return 1 if non-zero, 0 otherwise