]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/strncmp.s
eee2de7225831677ccdd14cd961d29f548f7db43
[apple/xnu.git] / osfmk / arm64 / strncmp.s
1 /*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements the following function for the arm64 architecture:
29 *
30 * int strncmp(const char *s1, const char *s2, size_t n);
31 *
32 * Returns 0 if the two strings are equal up to the first n bytes or to the
33 * end of the string, whichever comes first. Otherwise, returns the difference
34 * of the first mismatched characters interpreted as uint8_t.
35 */
36
37 .globl _strncmp
38
39 /*****************************************************************************
40 * Macros *
41 *****************************************************************************/
42
43 .macro EstablishFrame
44 stp fp, lr, [sp, #-16]!
45 mov fp, sp
46 .endm
47
48 .macro ClearFrameAndReturn
49 ldp fp, lr, [sp], #16
50 ret
51 .endm
52
53 #include "../mach/arm/vm_param.h"
54 #define kVectorSize 16
55
56 /*****************************************************************************
57 * Constants *
58 *****************************************************************************/
59
60 .text
61 .align 5
62 L_mask:
63 .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
64
65 /*****************************************************************************
66 * Entrypoints *
67 *****************************************************************************/
68
69 _strncmp:
70 EstablishFrame
71 eor x3, x3, x3
72 cbz x2, L_scalarDone
73 // Compare one byte at a time until s1 has vector alignment.
74 0: tst x0, #(kVectorSize-1)
75 b.eq L_s1aligned
76 ldrb w4, [x0],#1 // load byte from src1
77 ldrb w5, [x1],#1 // load byte from src2
78 subs x3, x4, x5 // if the are not equal
79 ccmp w4, #0, #4, eq // or we find an EOS
80 b.eq L_scalarDone // return the difference
81 subs x2, x2, #1 // decrement length
82 b.ne 0b // continue loop if non-zero
83
84 // We found a mismatch or EOS before s1 became aligned. Simply return the
85 // difference between the last bytes that we loaded.
86 L_scalarDone:
87 mov x0, x3
88 ClearFrameAndReturn
89
90 L_s1aligned:
91 // If s2 is similarly aligned to s1, then we can use a naive vector comparison
92 // from this point on without worrying about spurious page faults; none of our
93 // loads will ever cross a page boundary, because they are all aligned.
94 tst x1, #(kVectorSize-1)
95 b.eq L_naiveVector
96
97 /*****************************************************************************
98 * Careful chunk comparison *
99 *****************************************************************************/
100
101 // Otherwise, we need to be careful; although vector loads from s1 cannot
102 // cross a page boundary because they are aligned, s2 is not aligned. We
103 // compute the multiple of vector size that we can safely load before reaching
104 // a page boundary, and compare only that far before switching over to scalar
105 // comparisons to step across the page boundary. If this number happens to
106 // be zero, we jump directly to the scalar comparison.
107 neg x7, x1
108 ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize)
109 b.eq 2f
110
111 .align 4
112 // If n is less than the number of bytes before a page-crossing load, jump
113 // into the naive vector path instead, since we will not even reach a page
114 // crossing. Otherwise, decrement n by that number before we monkey with it,
115 // and set the decremented value aside.
116 0: cmp x2, x7
117 b.ls L_naiveVector
118 sub x6, x2, x7
119 // Use vector comparisons until a mismatch or EOS is encountered, or the next
120 // vector load from s2 would be page-crossing.
121 1: ldr q0, [x0],#(kVectorSize)
122 ldr q1, [x1],#(kVectorSize)
123 cmeq.16b v1, v0, v1
124 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS
125 uminv.16b b1, v0
126 fmov w3, s1 // zero only iff comparison is finished
127 cbz w3, L_vectorDone
128 subs x7, x7, #(kVectorSize)
129 b.ne 1b
130 // Restore the updated n to x2
131 mov x2, x6
132 // The next vector load will cross a page boundary. Instead, compare one byte
133 // at a time until s1 again has vector alignment, at which point we will have
134 // compared exactly 16 bytes.
135 2: ldrb w4, [x0],#1 // load byte from src1
136 ldrb w5, [x1],#1 // load byte from src2
137 subs x3, x4, x5 // if the are not equal
138 ccmp w4, #0, #4, eq // or we find an EOS
139 b.eq L_scalarDone // return the difference
140 subs x2, x2, #1 // decrement length
141 b.eq L_scalarDone // exit loop if zero.
142 tst x0, #(kVectorSize-1)
143 b.ne 2b
144 // Having compared one vector's worth of bytes using a scalar comparison, we
145 // know that we are safely across the page boundary. Initialize x7 and jump
146 // back into the vector comparison part of the loop.
147 mov x7, #(PAGE_MIN_SIZE-kVectorSize)
148 b 0b
149
150 /*****************************************************************************
151 * Naive vector comparison *
152 *****************************************************************************/
153
154 .align 4
155 L_naiveVector:
156 ldr q0, [x0],#(kVectorSize)
157 ldr q1, [x1],#(kVectorSize)
158 cmeq.16b v1, v0, v1
159 and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS
160 uminv.16b b1, v0
161 fmov w3, s1 // zero only iff comparison is finished
162 cbz w3, L_vectorDone
163 subs x2, x2, #16
164 b.hi L_naiveVector
165
166 L_readNBytes:
167 eor x0, x0, x0
168 ClearFrameAndReturn
169
170 L_vectorDone:
171 // Load the bytes corresponding to the first mismatch or EOS and return
172 // their difference.
173 eor.16b v1, v1, v1
174 cmhi.16b v0, v0, v1 // force non-zero lanes to 0xff
175 ldr q1, L_mask
176 orr.16b v0, v0, v1 // lane index in lanes containing mismatch or EOS
177 uminv.16b b1, v0
178 fmov w3, s1
179 // If the index of the mismatch or EOS is greater than or equal to n, it
180 // occurs after the first n bytes of the string, and doesn't count.
181 cmp x3, x2
182 b.cs L_readNBytes
183 sub x3, x3, #(kVectorSize)
184 ldrb w4, [x0, x3]
185 ldrb w5, [x1, x3]
186 sub x0, x4, x5
187 ClearFrameAndReturn