]>
Commit | Line | Data |
---|---|---|
0b4e3aa0 A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
e5568f75 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
0b4e3aa0 | 11 | * |
e5568f75 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
0b4e3aa0 A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
e5568f75 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
0b4e3aa0 A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | ; | |
23 | ; | |
24 | ; Strlen, optimized for PPC. The routine we use is 2-3x faster | |
25 | ; then the simple loop which checks each byte for zero. | |
26 | ; For 0- and 1-byte strings, the simple routine is faster, but | |
27 | ; only by a few cycles. The algorithm used was adapted from the | |
28 | ; Mac OS 9 stdCLib strcopy routine, which was originally | |
29 | ; written by Gary Davidian. It relies on the following rather | |
30 | ; inobvious but very efficient test: | |
31 | ; | |
32 | ; y = dataWord + 0xFEFEFEFF | |
33 | ; z = ~dataWord & 0x80808080 | |
34 | ; if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
35 | ; | |
36 | ; The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
37 | ; with one exception: 0x01 bytes preceeding the first zero are also | |
38 | ; mapped to 0x80. | |
39 | ; | |
40 | #include <ppc/asm.h> | |
41 | #include <ppc/proc_reg.h> | |
42 | ; | |
43 | ; int strlen(ptr) | |
44 | ; | |
45 | ; | |
46 | ||
47 | .align 5 | |
48 | .globl EXT(strlen) | |
49 | LEXT(strlen) | |
50 | ||
51 | andi. r4,r3,0x03 ; test alignment first | |
52 | mr r9,r3 ; store the original address for later use.... | |
53 | bne LalignSource ; align the source addr if not already aligned | |
54 | Llentry: | |
55 | lis r5,hi16(0xFEFEFEFF) | |
56 | lis r6,hi16(0x80808080) | |
57 | subi r3,r3,0x04 ; pre-decrement r3 for the lwzu | |
58 | ori r5,r5,lo16(0xFEFEFEFF) ; r5=0xFEFEFEFF | |
59 | ori r6,r6,lo16(0x80808080) ; r6=0x80808080 | |
60 | ||
61 | LLoop: | |
62 | lwzu r8,4(r3) ; get the first 4 bytes and increment address | |
63 | add r4,r5,r8 ; r4= data + 0xFEFEFEFF | |
64 | andc r7,r6,r8 ; r7= ~data & 0x80808080 | |
65 | and. r4,r4,r7 ; r4= r4 & r7 | |
66 | beq LLoop ; if r4 is zero, then all bytes are non-zero | |
67 | ||
68 | ; Now we know one of the bytes in r8 is zero, | |
69 | ; we just have to figure out which one. | |
70 | ; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, | |
71 | ; with one exception: | |
72 | ; 0x01 bytes preceeding the first zero are also mapped to 0x80. | |
73 | ; So we have to mask out the 0x80s caused by 0x01s before | |
74 | ; counting leading zeroes to get the bytes in last word. | |
75 | ||
76 | rlwinm r5,r8,7,0,31 ; move 0x01 bits to 0x80 position | |
77 | subf r3,r9,r3 ; start to compute string length | |
78 | andc r4,r4,r5 ; turn off false hits from 0x0100 worst case | |
79 | cntlzw r7,r4 ; now we can count leading 0s | |
80 | srwi r7,r7,3 ; convert 0,8,16,24 to 0,1,2,3 | |
81 | add r3,r3,r7 ; add in nonzero bytes in last word | |
82 | blr | |
83 | ||
84 | ; We must align the source address for two reasons: to avoid spurious page | |
85 | ; faults, and for speed. | |
86 | ; r4 = low 2 bits of address (1,2, or 3) | |
87 | ; r3 = address | |
88 | ; r9 = original address (still same as r3) | |
89 | ||
90 | LalignSource: | |
91 | lbz r5,0(r3) ; get the first byte... | |
92 | subic. r4,r4,2 ; test for 1, 2 or 3 bytes | |
93 | addi r3,r3,1 ; increment address | |
94 | addi r6,r9,1 ; now r6==r3 | |
95 | cmpwi cr1,r5,0 ; zero? | |
96 | beq cr1,Lreturn ; if its zero return zero | |
97 | bgt Llentry ; address is aligned now if low bits were 3 | |
98 | ||
99 | lbz r5,0(r3) ; get the next byte... | |
100 | addi r3,r3,1 ; increment address | |
101 | cmpwi cr1,r5,0 ; zero? | |
102 | beq cr1,Lreturn ; if its zero return one | |
103 | beq Llentry ; addr is aligned now if low bits were 2 | |
104 | ||
105 | lbz r5,0(r3) ; get the next byte... | |
106 | addi r3,r3,1 ; increment address | |
107 | cmpwi cr1,r5,0 ; zero? | |
108 | bne cr1,Llentry ; not zero, continue check (now aligned) | |
109 | Lreturn: | |
110 | sub r3,r3,r6 ; get string length (0, 1, or 2) | |
111 | blr | |
112 |