2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 * This file implements the following function for the arm64 architecture:
30 * size_t strnlen(const char *string, size_t maxlen);
32 * The strnlen function returns either strlen(string) or maxlen, whichever
33 * is amller, without reading beyond the first maxlen characters of string.
36 #include <arm64/asm.h>
41 /*****************************************************************************
43 *****************************************************************************/
47 stp fp, lr, [sp, #-16]!
51 .macro ClearFrameAndReturn
56 /*****************************************************************************
58 *****************************************************************************/
63 .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
64 .quad 0x0000000000000000, 0x0000000000000000
66 /*****************************************************************************
67 * strnlen entrypoint *
68 *****************************************************************************/
71 // If n == 0, return NULL without loading any data from s. If n is so large
72 // that it exceeds the size of any buffer that can be allocted, jump into a
73 // simpler implementation that omits all length checks. This is both faster
74 // and lets us avoid some messy edgecases in the mainline.
79 // Load the 16-byte aligned vector containing the start of the string.
82 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
83 // byte once we identify one. We don't use this vector until the very end
84 // of the routine; it simply falls out naturally to load it now.
87 // The aligned vector that we loaded to q0 contains the start of the string,
88 // but if the string was not originally aligned, it also contains bytes
89 // which preceed the start of the string, and which may cause false positives
90 // when we search for the terminating NUL. We generate a mask to OR into the
91 // vector using an unaligned load to prevent this. The mask has non-zero
92 // values only in those bytes which correspond to bytes preceeding the start
93 // of the string in the aligned vector load.
98 // Adjust maxlen to account for bytes which preceed the start of the string,
99 // and jump into the main scanning loop.
103 // Main loop. Identical to strlen, except that we also need to check that we
104 // don't read more than maxlen bytes. To that end, we decrement maxlen by 16
105 // on each iteration, and exit the loop if the result is zero or negative.
107 0: ldr q0, [x2, #16]!
114 // We exhausted maxlen bytes without finding a terminating NUL character, so
115 // we need to return maxlen.
123 ret // No stack frame, so don't clear it.
126 // Compute the index of the NUL byte, and check if it occurs before maxlen
127 // bytes into the vector. If not, return maxlen. Otherwise, return the
128 // length of the string.
133 fmov w3, s1 // index of NUL byte in vector
134 sub x0, x2, x0 // index of vector in string
135 cmp x1, x3 // if NUL occurs before maxlen bytes
136 csel x1, x1, x3, cc // return strlen, else maxlen
140 /*****************************************************************************
141 * strlen entrypoint *
142 *****************************************************************************/
147 // Load the 16-byte aligned vector containing the start of the string.
150 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
151 // byte once we identify one. We don't use this vector until the very end
152 // of the routine; it simply falls out naturally to load it now.
155 // The aligned vector that we loaded to q0 contains the start of the string,
156 // but if the string was not originally aligned, it also contains bytes
157 // which preceed the start of the string, and which may cause false positives
158 // when we search for the terminating NUL. We generate a mask to OR into the
159 // vector using an unaligned load to prevent this. The mask has non-zero
160 // values only in those bytes which correspond to bytes preceeding the start
161 // of the string in the aligned vector load.
168 // Main loop. On each iteration we do the following:
170 // q0 <-- next 16 aligned bytes of string
171 // b1 <-- unsigned minimum byte in q0
172 // if (b1 != 0) continue
174 // Thus, we continue the loop until the 16 bytes we load contain a zero byte.
176 0: ldr q0, [x1, #16]!
178 fmov w2, s1 // umov.b would be more natural, but requries 2 µops.
181 // A zero byte has been found. The following registers contain values that
182 // we need to compute the string's length:
184 // x0 pointer to start of string
185 // x1 pointer to vector containing terminating NUL byte
186 // v0 vector containing terminating NUL byte
187 // v2 {0, 1, 2, ... , 15}
189 // We compute the index of the terminating NUL byte in the string (which is
190 // precisely the length of the string) as follows:
192 // vec <-- mask(v0 != 0) | v2
193 // index <-- x1 - x0 + unsignedMinimum(vec)