osfmk/ppc/commpage/bcopy_g3.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /* =======================================
  26  * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
  27  * =======================================
  28  *
  29  * Version of 2/20/2003, tuned for G3.
  30  *
  31  * Register usage.  Note we use R2, so this code will not run in a PEF/CFM
  32  * environment.
  33  *
  34  *   r0  = "w7" or temp
  35  *   r2  = "w8"
  36  *   r3  = not used, as memcpy and memmove return 1st parameter as a value
  37  *   r4  = source ptr ("rs")
  38  *   r5  = count of bytes to move ("rc")
  39  *   r6  = "w1"
  40  *   r7  = "w2"
  41  *   r8  = "w3"
  42  *   r9  = "w4"
  43  *   r10 = "w5"
  44  *   r11 = "w6"
  45  *   r12 = destination ptr ("rd")
  46  * f0-f3 = used for moving 8-byte aligned data
  47  */
  48 #define rs      r4              // NB: we depend on rs==r4 in "lswx" instructions
  49 #define rd      r12
  50 #define rc      r5
  51
  52 #define w1      r6
  53 #define w2      r7
  54 #define w3      r8
  55 #define w4      r9
  56 #define w5      r10
  57 #define w6      r11
  58 #define w7      r0
  59 #define w8      r2
  60
  61 #define ASSEMBLER
  62 #include <sys/appleapiopts.h>
  63 #include <ppc/asm.h>
  64 #include <machine/cpu_capabilities.h>
  65 #include <machine/commpage.h>
  66
  67         .text
  68         .globl  EXT(bcopy_g3)
  69
  70
  71 #define kLong   33                                      // too long for string ops
  72
  73
  74 // Main entry points.
  75
  76         .align  5
  77 bcopy_g3:                                                       // void bcopy(const void *src, void *dst, size_t len)
  78         cmplwi  rc,kLong                        // length > 32 bytes?
  79         sub             w1,r4,r3                        // must move in reverse if (rd-rs)<rc
  80         mr              rd,r4                           // start to move source & dest to canonic spot
  81         bge             LLong0                          // skip if long operand
  82         mtxer   rc                                      // set length for string ops
  83         lswx    r5,0,r3                         // load bytes into r5-r12
  84         stswx   r5,0,r4                         // store them
  85         blr
  86
  87 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
  88
  89         .align  5
  90 Lmemcpy_g3:                                                     // void* memcpy(void *dst, void *src, size_t len)
  91 Lmemmove_g3:                                            // void* memmove(void *dst, const void *src, size_t len)
  92         cmplwi  rc,kLong                        // length > 32 bytes?
  93         sub             w1,r3,rs                        // must move in reverse if (rd-rs)<rc
  94         mr              rd,r3                           // must leave r3 alone, it is return value for memcpy etc
  95         bge             LLong1                          // longer than 32 bytes
  96         mtxer   rc                                      // set length for string ops
  97         lswx    r5,0,r4                         // load bytes into r5-r12
  98         stswx   r5,0,r3                         // store them
  99         blr
 100
 101 // Long operands (more than 32 bytes.)
 102 //              w1  = (rd-rs), used to check for alignment
 103
 104 LLong0:                                                         // enter from bcopy()
 105         mr              rs,r3                           // must leave r3 alone (it is return value for memcpy)
 106 LLong1:                                                         // enter from memcpy() and memmove()
 107         cmplw   cr1,w1,rc                       // set cr1 blt iff we must move reverse
 108         rlwinm  r0,w1,0,0x3                     // are operands relatively word-aligned?
 109         neg             w2,rd                           // prepare to align destination
 110         cmpwi   cr5,r0,0                        // set cr5 beq if relatively word aligned
 111         blt             cr1,LLongReverse        // handle reverse move
 112         andi.   w4,w2,3                         // w4 <- #bytes to word align destination
 113         beq             cr5,LLongFloat          // relatively aligned so use FPRs
 114         sub             rc,rc,w4                        // adjust count for alignment
 115         srwi    r0,rc,5                         // get #chunks to xfer (>=1)
 116         rlwinm  rc,rc,0,0x1F            // mask down to leftover bytes
 117         mtctr   r0                                      // set up loop count
 118         beq             1f                                      // dest already word aligned
 119
 120 // Word align the destination.
 121
 122         mtxer   w4                                      // byte count to xer
 123         cmpwi   r0,0                            // any chunks to xfer?
 124         lswx    w1,0,rs                         // move w4 bytes to align dest
 125         add             rs,rs,w4
 126         stswx   w1,0,rd
 127         add             rd,rd,w4
 128         beq-    2f                                      // pathologic case, no chunks to xfer
 129
 130 // Forward, unaligned loop.
 131
 132 1:
 133         lwz             w1,0(rs)
 134         lwz             w2,4(rs)
 135         lwz             w3,8(rs)
 136         lwz             w4,12(rs)
 137         lwz             w5,16(rs)
 138         lwz             w6,20(rs)
 139         lwz             w7,24(rs)
 140         lwz             w8,28(rs)
 141         addi    rs,rs,32
 142         stw             w1,0(rd)
 143         stw             w2,4(rd)
 144         stw             w3,8(rd)
 145         stw             w4,12(rd)
 146         stw             w5,16(rd)
 147         stw             w6,20(rd)
 148         stw             w7,24(rd)
 149         stw             w8,28(rd)
 150         addi    rd,rd,32
 151         bdnz    1b
 152 2:                                                                      // rc = remaining bytes (0-31)
 153         mtxer   rc                                      // set up count for string ops
 154         mr              r0,rd                           // move dest ptr out of the way
 155         lswx    r5,0,rs                         // load xer bytes into r5-r12 (rs==r4)
 156         stswx   r5,0,r0                         // store them
 157         blr
 158
 159
 160
 161 // Forward, aligned loop.  We use FPRs.
 162
 163 LLongFloat:
 164         andi.   w4,w2,7                         // W4 <- #bytes to doubleword-align destination
 165         sub             rc,rc,w4                        // adjust count for alignment
 166         srwi    r0,rc,5                         // number of 32-byte chunks to xfer
 167         rlwinm  rc,rc,0,0x1F            // mask down to leftover bytes
 168         mtctr   r0                                      // set up loop count
 169         beq             1f                                      // dest already doubleword aligned
 170
 171 // Doubleword align the destination.
 172
 173         mtxer   w4                                      // byte count to xer
 174         cmpwi   r0,0                            // any chunks to xfer?
 175         lswx    w1,0,rs                         // move w4 bytes to align dest
 176         add             rs,rs,w4
 177         stswx   w1,0,rd
 178         add             rd,rd,w4
 179         beq-    2f                                      // pathologic case, no chunks to xfer
 180 1:                                                                      // loop over 32-byte chunks
 181         lfd             f0,0(rs)
 182         lfd             f1,8(rs)
 183         lfd             f2,16(rs)
 184         lfd             f3,24(rs)
 185         addi    rs,rs,32
 186         stfd    f0,0(rd)
 187         stfd    f1,8(rd)
 188         stfd    f2,16(rd)
 189         stfd    f3,24(rd)
 190         addi    rd,rd,32
 191         bdnz    1b
 192 2:                                                                      // rc = remaining bytes (0-31)
 193         mtxer   rc                                      // set up count for string ops
 194         mr              r0,rd                           // move dest ptr out of the way
 195         lswx    r5,0,rs                         // load xer bytes into r5-r12 (rs==r4)
 196         stswx   r5,0,r0                         // store them
 197         blr
 198
 199
 200 // Long, reverse moves.
 201 //              cr5 = beq if relatively word aligned
 202
 203 LLongReverse:
 204         add             rd,rd,rc                        // point to end of operands + 1
 205         add             rs,rs,rc
 206         beq             cr5,LReverseFloat       // aligned operands so can use FPRs
 207         srwi    r0,rc,5                         // get chunk count
 208         rlwinm  rc,rc,0,0x1F            // mask down to leftover bytes
 209         mtctr   r0                                      // set up loop count
 210         mtxer   rc                                      // set up for trailing bytes
 211 1:
 212         lwz             w1,-4(rs)
 213         lwz             w2,-8(rs)
 214         lwz             w3,-12(rs)
 215         lwz             w4,-16(rs)
 216         stw             w1,-4(rd)
 217         lwz             w5,-20(rs)
 218         stw             w2,-8(rd)
 219         lwz             w6,-24(rs)
 220         stw             w3,-12(rd)
 221         lwz             w7,-28(rs)
 222         stw             w4,-16(rd)
 223         lwzu    w8,-32(rs)
 224         stw             w5,-20(rd)
 225         stw             w6,-24(rd)
 226         stw             w7,-28(rd)
 227         stwu    w8,-32(rd)
 228         bdnz    1b
 229
 230         sub             r4,rs,rc                        // point to 1st (leftmost) leftover byte (0..31)
 231         sub             r0,rd,rc                        // move dest ptr out of way
 232         lswx    r5,0,r4                         // load xer bytes into r5-r12
 233         stswx   r5,0,r0                         // store them
 234         blr
 235
 236
 237 // Long, reverse aligned moves.  We use FPRs.
 238
 239 LReverseFloat:
 240         andi.   w4,rd,7                         // W3 <- #bytes to doubleword-align destination
 241         sub             rc,rc,w4                        // adjust count for alignment
 242         srwi    r0,rc,5                         // number of 32-byte chunks to xfer
 243         rlwinm  rc,rc,0,0x1F            // mask down to leftover bytes
 244         mtctr   r0                                      // set up loop count
 245         beq             1f                                      // dest already doubleword aligned
 246
 247 // Doubleword align the destination.
 248
 249         mtxer   w4                                      // byte count to xer
 250         cmpwi   r0,0                            // any chunks to xfer?
 251         sub             rs,rs,w4                        // point to 1st bytes to xfer
 252         sub             rd,rd,w4
 253         lswx    w1,0,rs                         // move w3 bytes to align dest
 254         stswx   w1,0,rd
 255         beq-    2f                                      // pathologic case, no chunks to xfer
 256 1:
 257         lfd             f0,-8(rs)
 258         lfd             f1,-16(rs)
 259         lfd             f2,-24(rs)
 260         lfdu    f3,-32(rs)
 261         stfd    f0,-8(rd)
 262         stfd    f1,-16(rd)
 263         stfd    f2,-24(rd)
 264         stfdu   f3,-32(rd)
 265         bdnz    1b
 266 2:                                                                      // rc = remaining bytes (0-31)
 267         mtxer   rc                                      // set up count for string ops
 268         sub             r4,rs,rc                        // point to 1st (leftmost) leftover byte (0..31)
 269         sub             r0,rd,rc                        // move dest ptr out of way
 270         lswx    r5,0,r4                         // load xer bytes into r5-r12
 271         stswx   r5,0,r0                         // store them
 272         blr
 273
 274         COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,0)