]> git.saurik.com Git - apple/libc.git/blob - ppc/string/strncmp.s
Libc-320.1.3.tar.gz
[apple/libc.git] / ppc / string / strncmp.s
1 /*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 #define ASSEMBLER // we need the defs for cr7_eq etc
24 #include <mach/ppc/asm.h>
25 #undef ASSEMBLER
26
27 // *****************
28 // * S T R N C M P *
29 // *****************
30 //
31 // int strncmp(const char *s1, const char *s2, size_t len);
32 //
33 // We optimize the compare by doing it word parallel. This introduces
34 // a complication: if we blindly did word loads from both sides until
35 // finding a difference (or 0), we might get a spurious page fault by
36 // reading bytes past the difference. To avoid this, we never do a "lwz"
37 // that crosses a page boundary.
38 //
39 // The test for 0s relies on the following inobvious but very efficient
40 // word-parallel test:
41 // x = dataWord + 0xFEFEFEFF
42 // y = ~dataWord & 0x80808080
43 // if (x & y) == 0 then no zero found
44 // The test maps any non-zero byte to zero, and any zero byte to 0x80,
45 // with one exception: 0x01 bytes preceeding the first zero are also
46 // mapped to 0x80.
47
48 .text
49 .globl EXT(strncmp)
50
51 .align 5
52 LEXT(strncmp) // int strncmp(const char *s1,const char *s2,size_t len);
53 cmplwi cr1,r5,8 // is buffer too short to bother with word compares?
54 andi. r0,r3,3 // is LHS aligned?
55 dcbt 0,r3 // touch in LHS
56 subi r3,r3,4 // we use "lwzu" in the word inner loop
57 subi r4,r4,4
58 blt cr1,Lshort // short buffer, just compare a byte at a time
59 lis r2,hi16(0xFEFEFEFF) // start to load magic constants
60 lis r6,hi16(0x80808080)
61 ori r2,r2,lo16(0xFEFEFEFF)
62 ori r6,r6,lo16(0x80808080)
63 beq Laligned // LHS is aligned
64 subfic r0,r0,4 // r0 <- #bytes to word align LHS
65 mtctr r0 // set up for byte loop
66 sub r5,r5,r0 // adjust length
67 b Lbyteloop
68
69 // Handle short operands or end-of-buffer.
70 // r3 = LHS ptr - 4 (unaligned)
71 // r4 = RHS ptr - 4 (unaligned)
72 // r5 = length remaining in buffer (0..7)
73 // cr1 = blt set
74
75 Lshort:
76 cmpwi r5,0 // buffer null?
77 mtctr r5 // assume not null, set up for loop
78 bne Lbyteloop // buffer not null
79 li r3,0 // if buffer null, say "equal"
80 blr
81
82 // We're at a RHS page boundary. Compare 4 bytes in order to cross the page
83 // but still keep the LHS ptr word-aligned.
84 // r2 = 0xFEFEFEFF
85 // r3 = LHS ptr - 4 (aligned)
86 // r4 = RHS ptr - 4 (unaligned)
87 // r5 = length remaining in buffer (may be 0)
88 // r6 = 0x80808080
89
90 Lcrosspage:
91 cmplwi cr1,r5,8 // not enough left in buffer for word compares?
92 li r0,4 // get #bytes to cross RHS page
93 blt cr1,Lshort // buffer is about to end
94 mtctr r0 // set up to compare 4 bytes
95 sub r5,r5,r0 // adjust length
96 b Lbyteloop
97
98 // Compare bytes, until 0-byte or difference found.
99 // r2 = 0xFEFEFEFF (if cr1 bge)
100 // r3 = LHS ptr - 4 (unaligned)
101 // r4 = RHS ptr - 4 (unaligned)
102 // r5 = length remaining in buffer (may be 0)
103 // r6 = 0x80808080 (if cr1 bge)
104 // cr1 = blt if this is end of buffer
105
106 .align 5 // align inner loop, which is 8 words long
107 Lbyteloop:
108 lbz r7,4(r3) // next LHS byte
109 addi r3,r3,1
110 lbz r8,4(r4) // next RHS byte
111 addi r4,r4,1
112 cmpwi cr0,r7,0 // zero?
113 cmpw cr7,r7,r8 // equal?
114 crandc cr0_eq,cr7_eq,cr0_eq// set cr0_eq if equal and not 0
115 bdnzt eq,Lbyteloop // loop until different, 0, or (ctr==0)
116
117 bne Ldifferent // done if bytes differ or are 0
118 blt cr1,Ldifferent // done if buffer end (ie, if r5==0)
119
120 // LHS is now word aligned. Loop over words until end of RHS page or buffer.
121 // When we get to the end of the page, we compare 4 bytes, so that we keep
122 // the LHS word aligned.
123 // r2 = 0xFEFEFEFF
124 // r3 = LHS ptr - 4 (aligned)
125 // r4 = RHS ptr - 4 (unaligned)
126 // r5 = length remaining in buffer (may be 0)
127 // r6 = 0x80808080
128
129 Laligned:
130 addi r9,r4,4 // restore true address of next RHS byte
131 rlwinm r9,r9,0,0xFFF // get RHS offset in page
132 subfic r0,r9,4096 // get #bytes left in RHS page
133 subfc r7,r0,r5 // ***
134 subfe r8,r5,r5 // * r9 <- min(r0,r5),
135 and r7,r7,r8 // * using algorithm in Compiler Writer's Guide
136 add r9,r0,r7 // ***
137 srwi. r8,r9,2 // get #words we can compare
138 beq-- Lcrosspage // no words so advance to next RHS page
139 slwi r9,r8,2 // convert #words to #bytes
140 mtctr r8 // set up loop count
141 sub r5,r5,r9 // decrement length remaining
142 b Lwordloop
143
144 // Inner loop: compare a word at a time, until one of three conditions:
145 // - a difference is found
146 // - a zero byte is found
147 // - end of count (ie, end of buffer or RHS page, whichever is first)
148 // At this point, registers are as follows:
149 // r2 = 0xFEFEFEFF
150 // r3 = LHS ptr - 4 (aligned)
151 // r4 = RHS ptr - 4 (unaligned)
152 // r5 = length remaining in buffer (may be 0)
153 // r6 = 0x80808080
154 // ctr = count of words until end of buffer or RHS page
155
156 .align 5 // align inner loop, which is 8 words long
157 Lwordloop:
158 lwzu r7,4(r3) // r7 <- next 4 LHS bytes
159 lwzu r8,4(r4) // r8 <- next 4 RHS bytes
160 add r10,r7,r2 // r10 <- LHS + 0xFEFEFEFF
161 andc r12,r6,r7 // r12 <- ~LHS & 0x80808080
162 xor r11,r7,r8 // r11 <- compare the words
163 and r9,r10,r12 // r9 <- nonzero iff LHS has a 0-byte
164 or. r12,r9,r11 // combine difference and 0-test vectors
165 bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq
166
167 beq-- Lcrosspage // skip if buffer or page end reached
168
169 // Found differing bytes and/or a 0-byte. Determine which comes first, and
170 // subtract the bytes to compute the return value. We also need to mask out the
171 // false hits in the 0-byte test, which consist of 0x01 bytes that preceed
172 // the 0-byte.
173
174 slwi r0,r7,7 // move 0x01 bits in LHS into position 0x80
175 andc r9,r9,r0 // mask out the false 0-hits from 0x01 bytes
176 or r11,r11,r9 // recompute difference vector
177 cntlzw r0,r11 // find 1st difference (r0 = 0..31)
178 rlwinm r9,r0,0,0x18 // byte align bit offset (r9 = 0,8,16, or 24)
179 addi r0,r9,8 // now, r0 = 8, 16, 24, or 32
180 rlwnm r7,r7,r0,24,31 // right justify differing bytes and mask off rest
181 rlwnm r8,r8,r0,24,31
182
183 Ldifferent: // bytes in r7 and r8 differ or are 0
184 sub r3,r7,r8 // compute return value
185 blr
186