]>
Commit | Line | Data |
---|---|---|
0b4e3aa0 A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
43866e37 | 6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. |
0b4e3aa0 | 7 | * |
43866e37 A |
8 | * This file contains Original Code and/or Modifications of Original Code |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
0b4e3aa0 A |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
43866e37 A |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
0b4e3aa0 A |
22 | * |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | ; | |
26 | ; | |
27 | ; Strlen, optimized for PPC. The routine we use is 2-3x faster | |
28 | ; then the simple loop which checks each byte for zero. | |
29 | ; For 0- and 1-byte strings, the simple routine is faster, but | |
30 | ; only by a few cycles. The algorithm used was adapted from the | |
31 | ; Mac OS 9 stdCLib strcopy routine, which was originally | |
32 | ; written by Gary Davidian. It relies on the following rather | |
33 | ; inobvious but very efficient test: | |
34 | ; | |
35 | ; y = dataWord + 0xFEFEFEFF | |
36 | ; z = ~dataWord & 0x80808080 | |
37 | ; if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
38 | ; | |
39 | ; The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
40 | ; with one exception: 0x01 bytes preceeding the first zero are also | |
41 | ; mapped to 0x80. | |
42 | ; | |
43 | #include <ppc/asm.h> | |
44 | #include <ppc/proc_reg.h> | |
45 | ; | |
46 | ; int strlen(ptr) | |
47 | ; | |
48 | ; | |
49 | ||
50 | .align 5 | |
51 | .globl EXT(strlen) | |
52 | LEXT(strlen) | |
53 | ||
54 | andi. r4,r3,0x03 ; test alignment first | |
55 | mr r9,r3 ; store the original address for later use.... | |
56 | bne LalignSource ; align the source addr if not already aligned | |
57 | Llentry: | |
58 | lis r5,hi16(0xFEFEFEFF) | |
59 | lis r6,hi16(0x80808080) | |
60 | subi r3,r3,0x04 ; pre-decrement r3 for the lwzu | |
61 | ori r5,r5,lo16(0xFEFEFEFF) ; r5=0xFEFEFEFF | |
62 | ori r6,r6,lo16(0x80808080) ; r6=0x80808080 | |
63 | ||
64 | LLoop: | |
65 | lwzu r8,4(r3) ; get the first 4 bytes and increment address | |
66 | add r4,r5,r8 ; r4= data + 0xFEFEFEFF | |
67 | andc r7,r6,r8 ; r7= ~data & 0x80808080 | |
68 | and. r4,r4,r7 ; r4= r4 & r7 | |
69 | beq LLoop ; if r4 is zero, then all bytes are non-zero | |
70 | ||
71 | ; Now we know one of the bytes in r8 is zero, | |
72 | ; we just have to figure out which one. | |
73 | ; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, | |
74 | ; with one exception: | |
75 | ; 0x01 bytes preceeding the first zero are also mapped to 0x80. | |
76 | ; So we have to mask out the 0x80s caused by 0x01s before | |
77 | ; counting leading zeroes to get the bytes in last word. | |
78 | ||
79 | rlwinm r5,r8,7,0,31 ; move 0x01 bits to 0x80 position | |
80 | subf r3,r9,r3 ; start to compute string length | |
81 | andc r4,r4,r5 ; turn off false hits from 0x0100 worst case | |
82 | cntlzw r7,r4 ; now we can count leading 0s | |
83 | srwi r7,r7,3 ; convert 0,8,16,24 to 0,1,2,3 | |
84 | add r3,r3,r7 ; add in nonzero bytes in last word | |
85 | blr | |
86 | ||
87 | ; We must align the source address for two reasons: to avoid spurious page | |
88 | ; faults, and for speed. | |
89 | ; r4 = low 2 bits of address (1,2, or 3) | |
90 | ; r3 = address | |
91 | ; r9 = original address (still same as r3) | |
92 | ||
93 | LalignSource: | |
94 | lbz r5,0(r3) ; get the first byte... | |
95 | subic. r4,r4,2 ; test for 1, 2 or 3 bytes | |
96 | addi r3,r3,1 ; increment address | |
97 | addi r6,r9,1 ; now r6==r3 | |
98 | cmpwi cr1,r5,0 ; zero? | |
99 | beq cr1,Lreturn ; if its zero return zero | |
100 | bgt Llentry ; address is aligned now if low bits were 3 | |
101 | ||
102 | lbz r5,0(r3) ; get the next byte... | |
103 | addi r3,r3,1 ; increment address | |
104 | cmpwi cr1,r5,0 ; zero? | |
105 | beq cr1,Lreturn ; if its zero return one | |
106 | beq Llentry ; addr is aligned now if low bits were 2 | |
107 | ||
108 | lbz r5,0(r3) ; get the next byte... | |
109 | addi r3,r3,1 ; increment address | |
110 | cmpwi cr1,r5,0 ; zero? | |
111 | bne cr1,Llentry ; not zero, continue check (now aligned) | |
112 | Lreturn: | |
113 | sub r3,r3,r6 ; get string length (0, 1, or 2) | |
114 | blr | |
115 |