2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 * This file implements the following function for the arm64 architecture:
30 * size_t strnlen(const char *string, size_t maxlen);
32 * The strnlen function returns either strlen(string) or maxlen, whichever
33 * is amller, without reading beyond the first maxlen characters of string.
39 /*****************************************************************************
41 *****************************************************************************/
44 stp fp, lr, [sp, #-16]!
48 .macro ClearFrameAndReturn
53 /*****************************************************************************
55 *****************************************************************************/
60 .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
61 .quad 0x0000000000000000, 0x0000000000000000
63 /*****************************************************************************
64 * strnlen entrypoint *
65 *****************************************************************************/
68 // If n == 0, return NULL without loading any data from s. If n is so large
69 // that it exceeds the size of any buffer that can be allocted, jump into a
70 // simpler implementation that omits all length checks. This is both faster
71 // and lets us avoid some messy edgecases in the mainline.
76 // Load the 16-byte aligned vector containing the start of the string.
79 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
80 // byte once we identify one. We don't use this vector until the very end
81 // of the routine; it simply falls out naturally to load it now.
84 // The aligned vector that we loaded to q0 contains the start of the string,
85 // but if the string was not originally aligned, it also contains bytes
86 // which preceed the start of the string, and which may cause false positives
87 // when we search for the terminating NUL. We generate a mask to OR into the
88 // vector using an unaligned load to prevent this. The mask has non-zero
89 // values only in those bytes which correspond to bytes preceeding the start
90 // of the string in the aligned vector load.
95 // Adjust maxlen to account for bytes which preceed the start of the string,
96 // and jump into the main scanning loop.
100 // Main loop. Identical to strlen, except that we also need to check that we
101 // don't read more than maxlen bytes. To that end, we decrement maxlen by 16
102 // on each iteration, and exit the loop if the result is zero or negative.
104 0: ldr q0, [x2, #16]!
111 // We exhausted maxlen bytes without finding a terminating NUL character, so
112 // we need to return maxlen.
120 ret // No stack frame, so don't clear it.
123 // Compute the index of the NUL byte, and check if it occurs before maxlen
124 // bytes into the vector. If not, return maxlen. Otherwise, return the
125 // length of the string.
130 fmov w3, s1 // index of NUL byte in vector
131 sub x0, x2, x0 // index of vector in string
132 cmp x1, x3 // if NUL occurs before maxlen bytes
133 csel x1, x1, x3, cc // return strlen, else maxlen
137 /*****************************************************************************
138 * strlen entrypoint *
139 *****************************************************************************/
144 // Load the 16-byte aligned vector containing the start of the string.
147 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
148 // byte once we identify one. We don't use this vector until the very end
149 // of the routine; it simply falls out naturally to load it now.
152 // The aligned vector that we loaded to q0 contains the start of the string,
153 // but if the string was not originally aligned, it also contains bytes
154 // which preceed the start of the string, and which may cause false positives
155 // when we search for the terminating NUL. We generate a mask to OR into the
156 // vector using an unaligned load to prevent this. The mask has non-zero
157 // values only in those bytes which correspond to bytes preceeding the start
158 // of the string in the aligned vector load.
165 // Main loop. On each iteration we do the following:
167 // q0 <-- next 16 aligned bytes of string
168 // b1 <-- unsigned minimum byte in q0
169 // if (b1 != 0) continue
171 // Thus, we continue the loop until the 16 bytes we load contain a zero byte.
173 0: ldr q0, [x1, #16]!
175 fmov w2, s1 // umov.b would be more natural, but requries 2 µops.
178 // A zero byte has been found. The following registers contain values that
179 // we need to compute the string's length:
181 // x0 pointer to start of string
182 // x1 pointer to vector containing terminating NUL byte
183 // v0 vector containing terminating NUL byte
184 // v2 {0, 1, 2, ... , 15}
186 // We compute the index of the terminating NUL byte in the string (which is
187 // precisely the length of the string) as follows:
189 // vec <-- mask(v0 != 0) | v2
190 // index <-- x1 - x0 + unsignedMinimum(vec)