]>
Commit | Line | Data |
---|---|---|
5b2abdfb A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
734aad71 | 6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. |
5b2abdfb | 7 | * |
734aad71 A |
8 | * This file contains Original Code and/or Modifications of Original Code |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
5b2abdfb A |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
734aad71 A |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
5b2abdfb A |
22 | * |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | ; | |
26 | ; | |
27 | ; Strlen, optimized for PPC. The routine we use is 2-3x faster | |
28 | ; then the simple loop which checks each byte for zero. | |
29 | ; For 0- and 1-byte strings, the simple routine is faster, but | |
30 | ; only by a few cycles. The algorithm used was adapted from the | |
31 | ; Mac OS 9 stdCLib strcopy routine, which was originally | |
32 | ; written by Gary Davidian. It relies on the following rather | |
33 | ; inobvious but very efficient test: | |
34 | ; | |
35 | ; y = dataWord + 0xFEFEFEFF | |
36 | ; z = ~dataWord & 0x80808080 | |
37 | ; if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
38 | ; | |
39 | ; The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
40 | ; with one exception: 0x01 bytes preceeding the first zero are also | |
41 | ; mapped to 0x80. | |
42 | ; | |
e3cf15b6 | 43 | #include <mach/ppc/asm.h> |
5b2abdfb A |
44 | ; |
45 | ; int strlen(ptr) | |
46 | ; | |
47 | ; | |
48 | ||
49 | .align 5 | |
50 | .globl EXT(strlen) | |
51 | LEXT(strlen) | |
52 | ||
53 | andi. r4,r3,0x03 ; test alignment first | |
54 | mr r9,r3 ; store the original address for later use.... | |
55 | bne LalignSource ; align the source addr if not already aligned | |
56 | Llentry: | |
57 | lis r5,hi16(0xFEFEFEFF) | |
58 | lis r6,hi16(0x80808080) | |
59 | subi r3,r3,0x04 ; pre-decrement r3 for the lwzu | |
60 | ori r5,r5,lo16(0xFEFEFEFF) ; r5=0xFEFEFEFF | |
61 | ori r6,r6,lo16(0x80808080) ; r6=0x80808080 | |
62 | ||
63 | LLoop: | |
64 | lwzu r8,4(r3) ; get the first 4 bytes and increment address | |
65 | add r4,r5,r8 ; r4= data + 0xFEFEFEFF | |
66 | andc r7,r6,r8 ; r7= ~data & 0x80808080 | |
67 | and. r4,r4,r7 ; r4= r4 & r7 | |
68 | beq LLoop ; if r4 is zero, then all bytes are non-zero | |
69 | ||
70 | ; Now we know one of the bytes in r8 is zero, | |
71 | ; we just have to figure out which one. | |
72 | ; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, | |
73 | ; with one exception: | |
74 | ; 0x01 bytes preceeding the first zero are also mapped to 0x80. | |
75 | ; So we have to mask out the 0x80s caused by 0x01s before | |
76 | ; counting leading zeroes to get the bytes in last word. | |
77 | ||
78 | rlwinm r5,r8,7,0,31 ; move 0x01 bits to 0x80 position | |
79 | subf r3,r9,r3 ; start to compute string length | |
80 | andc r4,r4,r5 ; turn off false hits from 0x0100 worst case | |
81 | cntlzw r7,r4 ; now we can count leading 0s | |
82 | srwi r7,r7,3 ; convert 0,8,16,24 to 0,1,2,3 | |
83 | add r3,r3,r7 ; add in nonzero bytes in last word | |
84 | blr | |
85 | ||
86 | ; We must align the source address for two reasons: to avoid spurious page | |
87 | ; faults, and for speed. | |
88 | ; r4 = low 2 bits of address (1,2, or 3) | |
89 | ; r3 = address | |
90 | ; r9 = original address (still same as r3) | |
91 | ||
92 | LalignSource: | |
93 | lbz r5,0(r3) ; get the first byte... | |
94 | subic. r4,r4,2 ; test for 1, 2 or 3 bytes | |
95 | addi r3,r3,1 ; increment address | |
96 | addi r6,r9,1 ; now r6==r3 | |
97 | cmpwi cr1,r5,0 ; zero? | |
98 | beq cr1,Lreturn ; if its zero return zero | |
99 | bgt Llentry ; address is aligned now if low bits were 3 | |
100 | ||
101 | lbz r5,0(r3) ; get the next byte... | |
102 | addi r3,r3,1 ; increment address | |
103 | cmpwi cr1,r5,0 ; zero? | |
104 | beq cr1,Lreturn ; if its zero return one | |
105 | beq Llentry ; addr is aligned now if low bits were 2 | |
106 | ||
107 | lbz r5,0(r3) ; get the next byte... | |
108 | addi r3,r3,1 ; increment address | |
109 | cmpwi cr1,r5,0 ; zero? | |
110 | bne cr1,Llentry ; not zero, continue check (now aligned) | |
111 | Lreturn: | |
112 | sub r3,r3,r6 ; get string length (0, 1, or 2) | |
113 | blr | |
114 |