]>
Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | /* |
2 | * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | * This file implements the following function for the arm64 architecture: | |
29 | * | |
30 | * int strncmp(const char *s1, const char *s2, size_t n); | |
31 | * | |
32 | * Returns 0 if the two strings are equal up to the first n bytes or to the | |
33 | * end of the string, whichever comes first. Otherwise, returns the difference | |
34 | * of the first mismatched characters interpreted as uint8_t. | |
35 | */ | |
36 | ||
37 | .globl _strncmp | |
38 | ||
39 | /***************************************************************************** | |
40 | * Macros * | |
41 | *****************************************************************************/ | |
42 | ||
43 | .macro EstablishFrame | |
44 | stp fp, lr, [sp, #-16]! | |
45 | mov fp, sp | |
46 | .endm | |
47 | ||
48 | .macro ClearFrameAndReturn | |
49 | ldp fp, lr, [sp], #16 | |
50 | ret | |
51 | .endm | |
52 | ||
53 | #include "../mach/arm/vm_param.h" | |
54 | #define kVectorSize 16 | |
55 | ||
56 | /***************************************************************************** | |
57 | * Constants * | |
58 | *****************************************************************************/ | |
59 | ||
60 | .text | |
61 | .align 5 | |
62 | L_mask: | |
63 | .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 | |
64 | ||
65 | /***************************************************************************** | |
66 | * Entrypoints * | |
67 | *****************************************************************************/ | |
68 | ||
69 | _strncmp: | |
70 | EstablishFrame | |
71 | eor x3, x3, x3 | |
72 | cbz x2, L_scalarDone | |
73 | // Compare one byte at a time until s1 has vector alignment. | |
74 | 0: tst x0, #(kVectorSize-1) | |
75 | b.eq L_s1aligned | |
76 | ldrb w4, [x0],#1 // load byte from src1 | |
77 | ldrb w5, [x1],#1 // load byte from src2 | |
78 | subs x3, x4, x5 // if the are not equal | |
79 | ccmp w4, #0, #4, eq // or we find an EOS | |
80 | b.eq L_scalarDone // return the difference | |
81 | subs x2, x2, #1 // decrement length | |
82 | b.ne 0b // continue loop if non-zero | |
83 | ||
84 | // We found a mismatch or EOS before s1 became aligned. Simply return the | |
85 | // difference between the last bytes that we loaded. | |
86 | L_scalarDone: | |
87 | mov x0, x3 | |
88 | ClearFrameAndReturn | |
89 | ||
90 | L_s1aligned: | |
91 | // If s2 is similarly aligned to s1, then we can use a naive vector comparison | |
92 | // from this point on without worrying about spurious page faults; none of our | |
93 | // loads will ever cross a page boundary, because they are all aligned. | |
94 | tst x1, #(kVectorSize-1) | |
95 | b.eq L_naiveVector | |
96 | ||
97 | /***************************************************************************** | |
98 | * Careful chunk comparison * | |
99 | *****************************************************************************/ | |
100 | ||
101 | // Otherwise, we need to be careful; although vector loads from s1 cannot | |
102 | // cross a page boundary because they are aligned, s2 is not aligned. We | |
103 | // compute the multiple of vector size that we can safely load before reaching | |
104 | // a page boundary, and compare only that far before switching over to scalar | |
105 | // comparisons to step across the page boundary. If this number happens to | |
106 | // be zero, we jump directly to the scalar comparison. | |
107 | neg x7, x1 | |
108 | ands x7, x7, #(PAGE_MIN_SIZE-kVectorSize) | |
109 | b.eq 2f | |
110 | ||
111 | .align 4 | |
112 | // If n is less than the number of bytes before a page-crossing load, jump | |
113 | // into the naive vector path instead, since we will not even reach a page | |
114 | // crossing. Otherwise, decrement n by that number before we monkey with it, | |
115 | // and set the decremented value aside. | |
116 | 0: cmp x2, x7 | |
117 | b.ls L_naiveVector | |
118 | sub x6, x2, x7 | |
119 | // Use vector comparisons until a mismatch or EOS is encountered, or the next | |
120 | // vector load from s2 would be page-crossing. | |
121 | 1: ldr q0, [x0],#(kVectorSize) | |
122 | ldr q1, [x1],#(kVectorSize) | |
123 | cmeq.16b v1, v0, v1 | |
124 | and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS | |
125 | uminv.16b b1, v0 | |
126 | fmov w3, s1 // zero only iff comparison is finished | |
127 | cbz w3, L_vectorDone | |
128 | subs x7, x7, #(kVectorSize) | |
129 | b.ne 1b | |
130 | // Restore the updated n to x2 | |
131 | mov x2, x6 | |
132 | // The next vector load will cross a page boundary. Instead, compare one byte | |
133 | // at a time until s1 again has vector alignment, at which point we will have | |
134 | // compared exactly 16 bytes. | |
135 | 2: ldrb w4, [x0],#1 // load byte from src1 | |
136 | ldrb w5, [x1],#1 // load byte from src2 | |
137 | subs x3, x4, x5 // if the are not equal | |
138 | ccmp w4, #0, #4, eq // or we find an EOS | |
139 | b.eq L_scalarDone // return the difference | |
140 | subs x2, x2, #1 // decrement length | |
141 | b.eq L_scalarDone // exit loop if zero. | |
142 | tst x0, #(kVectorSize-1) | |
143 | b.ne 2b | |
144 | // Having compared one vector's worth of bytes using a scalar comparison, we | |
145 | // know that we are safely across the page boundary. Initialize x7 and jump | |
146 | // back into the vector comparison part of the loop. | |
147 | mov x7, #(PAGE_MIN_SIZE-kVectorSize) | |
148 | b 0b | |
149 | ||
150 | /***************************************************************************** | |
151 | * Naive vector comparison * | |
152 | *****************************************************************************/ | |
153 | ||
154 | .align 4 | |
155 | L_naiveVector: | |
156 | ldr q0, [x0],#(kVectorSize) | |
157 | ldr q1, [x1],#(kVectorSize) | |
158 | cmeq.16b v1, v0, v1 | |
159 | and.16b v0, v0, v1 // contains zero byte iff mismatch or EOS | |
160 | uminv.16b b1, v0 | |
161 | fmov w3, s1 // zero only iff comparison is finished | |
162 | cbz w3, L_vectorDone | |
163 | subs x2, x2, #16 | |
164 | b.hi L_naiveVector | |
165 | ||
166 | L_readNBytes: | |
167 | eor x0, x0, x0 | |
168 | ClearFrameAndReturn | |
169 | ||
170 | L_vectorDone: | |
171 | // Load the bytes corresponding to the first mismatch or EOS and return | |
172 | // their difference. | |
173 | eor.16b v1, v1, v1 | |
174 | cmhi.16b v0, v0, v1 // force non-zero lanes to 0xff | |
175 | ldr q1, L_mask | |
176 | orr.16b v0, v0, v1 // lane index in lanes containing mismatch or EOS | |
177 | uminv.16b b1, v0 | |
178 | fmov w3, s1 | |
179 | // If the index of the mismatch or EOS is greater than or equal to n, it | |
180 | // occurs after the first n bytes of the string, and doesn't count. | |
181 | cmp x3, x2 | |
182 | b.cs L_readNBytes | |
183 | sub x3, x3, #(kVectorSize) | |
184 | ldrb w4, [x0, x3] | |
185 | ldrb w5, [x1, x3] | |
186 | sub x0, x4, x5 | |
187 | ClearFrameAndReturn |