osfmk/ppc/commpage/bzero_128.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30
  31 #define ASSEMBLER
  32 #include <sys/appleapiopts.h>
  33 #include <ppc/asm.h>
  34 #include <machine/cpu_capabilities.h>
  35 #include <machine/commpage.h>
  36
  37         .text
  38         .align  2
  39 /*
  40  * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
  41  * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
  42  * simple transformations:
  43  *      - all word compares are changed to doubleword
  44  *      - all "srwi[.]" opcodes are changed to "srdi[.]"
  45  * Nothing else is done.  For this to work, the following rules must be
  46  * carefully followed:
  47  *      - do not use carry or overflow
  48  *      - only use record mode if you are sure the results are mode-invariant
  49  *        for example, all "andi." and almost all "rlwinm." are fine
  50  *      - do not use "slwi", "slw", or "srw"
  51  * An imaginative programmer could break the porting model in other ways, but the above
  52  * are the most likely problem areas.  It is perhaps surprising how well in practice
  53  * this simple method works.
  54  */
  55
  56 // **********************
  57 // * B Z E R O _ 1 2 8  *
  58 // **********************
  59 //
  60 // For 64-bit processors with a 128-byte cache line.
  61 //
  62 // Register use:
  63 //              r0 = zero
  64 //              r3 = original ptr, not changed since memset returns it
  65 //              r4 = count of bytes to set
  66 //              r9 = working operand ptr
  67 // WARNING: We do not touch r2 and r10-r12, which some callers depend on.
  68
  69         .align  5
  70 bzero_128:                                              // void bzero(void *b, size_t len);
  71         cmplwi  cr7,r4,128              // too short for DCBZ128?
  72         li              r0,0                    // get a 0
  73         neg             r5,r3                   // start to compute #bytes to align
  74         mr              r9,r3                   // make copy of operand ptr (can't change r3)
  75         blt             cr7,Ltail               // length < 128, too short for DCBZ
  76
  77 // At least 128 bytes long, so compute alignment and #cache blocks.
  78
  79         andi.   r5,r5,0x7F              // r5 <-  #bytes to 128-byte align
  80         sub             r4,r4,r5                // adjust length
  81         srwi    r8,r4,7                 // r8 <- 128-byte chunks
  82         rlwinm  r4,r4,0,0x7F    // mask length down to remaining bytes
  83         mtctr   r8                              // set up loop count
  84         beq             Ldcbz                   // skip if already aligned (r8!=0)
  85
  86 // 128-byte align
  87
  88         mtcrf   0x01,r5                 // start to move #bytes to align to cr6 and cr7
  89         cmpwi   cr1,r8,0                // any 128-byte cache lines to 0?
  90         mtcrf   0x02,r5
  91
  92         bf              31,1f                   // byte?
  93         stb             r0,0(r9)
  94         addi    r9,r9,1
  95 1:
  96         bf              30,2f                   // halfword?
  97         sth             r0,0(r9)
  98         addi    r9,r9,2
  99 2:
 100         bf              29,3f                   // word?
 101         stw             r0,0(r9)
 102         addi    r9,r9,4
 103 3:
 104         bf              28,4f                   // doubleword?
 105         std             r0,0(r9)
 106         addi    r9,r9,8
 107 4:
 108         bf              27,5f                   // quadword?
 109         std             r0,0(r9)
 110         std             r0,8(r9)
 111         addi    r9,r9,16
 112 5:
 113         bf              26,6f                   // 32-byte chunk?
 114         std             r0,0(r9)
 115         std             r0,8(r9)
 116         std             r0,16(r9)
 117         std             r0,24(r9)
 118         addi    r9,r9,32
 119 6:
 120         bf              25,7f                   // 64-byte chunk?
 121         std             r0,0(r9)
 122         std             r0,8(r9)
 123         std             r0,16(r9)
 124         std             r0,24(r9)
 125         std             r0,32(r9)
 126         std             r0,40(r9)
 127         std             r0,48(r9)
 128         std             r0,56(r9)
 129         addi    r9,r9,64
 130 7:
 131         beq             cr1,Ltail               // no chunks to dcbz128
 132
 133 // Loop doing 128-byte version of DCBZ instruction.
 134 // NB: if the memory is cache-inhibited, the kernel will clear cr7
 135 // when it emulates the alignment exception.  Eventually, we may want
 136 // to check for this case.
 137
 138 Ldcbz:
 139         dcbz128 0,r9                    // zero another 32 bytes
 140         addi    r9,r9,128
 141         bdnz    Ldcbz
 142
 143 // Store trailing bytes.
 144 //              r0 = 0
 145 //              r4 = count
 146 //              r9 = ptr
 147
 148 Ltail:
 149         srwi.   r5,r4,4                 // r5 <- 16-byte chunks to 0
 150         mtcrf   0x01,r4                 // remaining byte count to cr7
 151         mtctr   r5
 152         beq             2f                              // skip if no 16-byte chunks
 153 1:                                                              // loop over 16-byte chunks
 154         std             r0,0(r9)
 155         std             r0,8(r9)
 156         addi    r9,r9,16
 157         bdnz    1b
 158 2:
 159         bf              28,4f                   // 8-byte chunk?
 160         std             r0,0(r9)
 161         addi    r9,r9,8
 162 4:
 163         bf              29,5f                   // word?
 164         stw             r0,0(r9)
 165         addi    r9,r9,4
 166 5:
 167         bf              30,6f                   // halfword?
 168         sth             r0,0(r9)
 169         addi    r9,r9,2
 170 6:
 171         bflr    31                              // byte?
 172         stb             r0,0(r9)
 173         blr
 174
 175         COMMPAGE_DESCRIPTOR(bzero_128,_COMM_PAGE_BZERO,kCache128+k64Bit,0, \
 176                                 kCommPageMTCRF+kCommPageBoth+kPort32to64)