]>
Commit | Line | Data |
---|---|---|
5b2abdfb | 1 | /* |
59e0d9fe | 2 | * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. |
5b2abdfb A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
3d9156a7 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
59e0d9fe | 11 | * |
3d9156a7 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
5b2abdfb A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
3d9156a7 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
5b2abdfb A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
5b2abdfb | 22 | |
59e0d9fe A |
23 | /* We use mode-independent "g" opcodes such as "srgi". These expand |
24 | * into word operations when targeting __ppc__, and into doubleword | |
25 | * operations when targeting __ppc64__. | |
26 | */ | |
27 | #include <architecture/ppc/mode_independent_asm.h> | |
5b2abdfb | 28 | |
59e0d9fe | 29 | #include <mach/ppc/asm.h> |
5b2abdfb | 30 | |
59e0d9fe A |
31 | #define __APPLE_API_PRIVATE |
32 | #include <machine/cpu_capabilities.h> | |
33 | #undef __APPLE_API_PRIVATE | |
5b2abdfb | 34 | |
5b2abdfb | 35 | |
59e0d9fe A |
36 | // Strlen, optimized for PPC. We use an inobvious but very efficient |
37 | // word-parallel test for 0-bytes: | |
38 | // | |
39 | // y = dataWord + 0xFEFEFEFF | |
40 | // z = ~dataWord & 0x80808080 | |
41 | // if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
42 | // | |
43 | // The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
44 | // with one exception: 0x01 bytes preceeding the first zero are also | |
45 | // mapped to 0x80. Using altivec is another possibility, but it turns | |
46 | // out that the overhead of maintaining VRSAVE and dealing with edge | |
47 | // cases pushes the crossover point out to around 30 bytes... longer | |
48 | // the the "typical" operand length. | |
49 | // | |
50 | // In 64-bit mode, the algorithm is doubleword parallel. | |
5b2abdfb | 51 | |
59e0d9fe A |
52 | .text |
53 | .align 5 | |
54 | .globl EXT(strlen) | |
55 | LEXT(strlen) // int strlen(ptr) | |
56 | clrrgi r9,r3,LOG2_GPR_BYTES// align pointer by zeroing right LOG2_GPR_BYTES bits | |
57 | li r7,-1 // get 0xFFs | |
58 | lg r8,0(r9) // get word or doubleword with 1st operand byte | |
59 | rlwinm r4,r3,3,(GPR_BYTES-1)*8 // get starting bit position of operand | |
60 | #if defined(__ppc__) | |
61 | lis r5,hi16(0xFEFEFEFF) // start to generate 32-bit magic constants | |
62 | lis r6,hi16(0x80808080) | |
63 | srw r7,r7,r4 // create a mask of 0xFF bytes for operand in r8 | |
64 | ori r5,r5,lo16(0xFEFEFEFF) | |
65 | ori r6,r6,lo16(0x80808080) | |
66 | #else | |
67 | ld r5,_COMM_PAGE_MAGIC_FE(0) // get 0xFEFEFEFE FEFEFEFF from commpage | |
68 | ld r6,_COMM_PAGE_MAGIC_80(0) // get 0x80808080 80808080 from commpage | |
69 | srd r7,r7,r4 // create a mask of 0xFF bytes for operand in r8 | |
70 | #endif | |
71 | orc r8,r8,r7 // make sure bytes preceeding operand are 0xFF | |
72 | b Lloop1 // enter loop | |
73 | ||
74 | // Loop over words or doublewords. | |
75 | // r3 = original address | |
76 | // r5 = 0xFEFEFEFE FEFEFEFF | |
77 | // r6 = 0x80808080 80808080 | |
78 | // r9 = address (aligned) | |
5b2abdfb | 79 | |
59e0d9fe A |
80 | .align 5 |
81 | Lloop: | |
82 | lgu r8,GPR_BYTES(r9) // get next word or doubleword | |
83 | Lloop1: // initial entry | |
84 | add r4,r5,r8 // r4 = data + 0xFEFEFEFF | |
85 | andc r7,r6,r8 // r7 = ~data & 0x80808080 | |
86 | and. r4,r4,r7 // r4 = r4 & r7 | |
87 | beq Lloop // if r4 is zero, then all bytes are non-zero | |
5b2abdfb | 88 | |
59e0d9fe A |
89 | // Now we know one of the bytes in r8 is zero, we just have to figure out which one. |
90 | // We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, with one exception: | |
91 | // 0x01 bytes preceeding the first zero are also mapped to 0x80. So we have to mask | |
92 | // out the 0x80s caused by 0x01s before searching for the 0x80 byte. | |
5b2abdfb | 93 | |
59e0d9fe A |
94 | slgi r5,r8,7 // move 0x01 bits to 0x80 position |
95 | sub r3,r9,r3 // start to compute string length | |
96 | andc r4,r4,r5 // turn off false hits from 0x0100 worst case | |
97 | cntlzg r7,r4 // now we can count leading 0s | |
98 | srwi r7,r7,3 // convert 0,8,16,24 to 0,1,2,3, etc | |
99 | add r3,r3,r7 // add in nonzero bytes in last word | |
100 | blr |