osfmk/ppc/commpage/bzero_128.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <sys/appleapiopts.h>
  30 #include <ppc/asm.h>
  31 #include <machine/cpu_capabilities.h>
  32 #include <machine/commpage.h>
  33
  34         .text
  35         .align  2
  36 /*
  37  * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
  38  * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
  39  * simple transformations:
  40  *      - all word compares are changed to doubleword
  41  *      - all "srwi[.]" opcodes are changed to "srdi[.]"
  42  * Nothing else is done.  For this to work, the following rules must be
  43  * carefully followed:
  44  *      - do not use carry or overflow
  45  *      - only use record mode if you are sure the results are mode-invariant
  46  *        for example, all "andi." and almost all "rlwinm." are fine
  47  *      - do not use "slwi", "slw", or "srw"
  48  * An imaginative programmer could break the porting model in other ways, but the above
  49  * are the most likely problem areas.  It is perhaps surprising how well in practice
  50  * this simple method works.
  51  */
  52
  53 // **********************
  54 // * B Z E R O _ 1 2 8  *
  55 // **********************
  56 //
  57 // For 64-bit processors with a 128-byte cache line.
  58 //
  59 // Register use:
  60 //              r0 = zero
  61 //              r3 = original ptr, not changed since memset returns it
  62 //              r4 = count of bytes to set
  63 //              r9 = working operand ptr
  64 // WARNING: We do not touch r2 and r10-r12, which some callers depend on.
  65
  66         .align  5
  67 bzero_128:                                              // void bzero(void *b, size_t len);
  68         cmplwi  cr7,r4,128              // too short for DCBZ128?
  69         li              r0,0                    // get a 0
  70         neg             r5,r3                   // start to compute #bytes to align
  71         mr              r9,r3                   // make copy of operand ptr (can't change r3)
  72         blt             cr7,Ltail               // length < 128, too short for DCBZ
  73
  74 // At least 128 bytes long, so compute alignment and #cache blocks.
  75
  76         andi.   r5,r5,0x7F              // r5 <-  #bytes to 128-byte align
  77         sub             r4,r4,r5                // adjust length
  78         srwi    r8,r4,7                 // r8 <- 128-byte chunks
  79         rlwinm  r4,r4,0,0x7F    // mask length down to remaining bytes
  80         mtctr   r8                              // set up loop count
  81         beq             Ldcbz                   // skip if already aligned (r8!=0)
  82
  83 // 128-byte align
  84
  85         mtcrf   0x01,r5                 // start to move #bytes to align to cr6 and cr7
  86         cmpwi   cr1,r8,0                // any 128-byte cache lines to 0?
  87         mtcrf   0x02,r5
  88
  89         bf              31,1f                   // byte?
  90         stb             r0,0(r9)
  91         addi    r9,r9,1
  92 1:
  93         bf              30,2f                   // halfword?
  94         sth             r0,0(r9)
  95         addi    r9,r9,2
  96 2:
  97         bf              29,3f                   // word?
  98         stw             r0,0(r9)
  99         addi    r9,r9,4
 100 3:
 101         bf              28,4f                   // doubleword?
 102         std             r0,0(r9)
 103         addi    r9,r9,8
 104 4:
 105         bf              27,5f                   // quadword?
 106         std             r0,0(r9)
 107         std             r0,8(r9)
 108         addi    r9,r9,16
 109 5:
 110         bf              26,6f                   // 32-byte chunk?
 111         std             r0,0(r9)
 112         std             r0,8(r9)
 113         std             r0,16(r9)
 114         std             r0,24(r9)
 115         addi    r9,r9,32
 116 6:
 117         bf              25,7f                   // 64-byte chunk?
 118         std             r0,0(r9)
 119         std             r0,8(r9)
 120         std             r0,16(r9)
 121         std             r0,24(r9)
 122         std             r0,32(r9)
 123         std             r0,40(r9)
 124         std             r0,48(r9)
 125         std             r0,56(r9)
 126         addi    r9,r9,64
 127 7:
 128         beq             cr1,Ltail               // no chunks to dcbz128
 129
 130 // Loop doing 128-byte version of DCBZ instruction.
 131 // NB: if the memory is cache-inhibited, the kernel will clear cr7
 132 // when it emulates the alignment exception.  Eventually, we may want
 133 // to check for this case.
 134
 135 Ldcbz:
 136         dcbz128 0,r9                    // zero another 32 bytes
 137         addi    r9,r9,128
 138         bdnz    Ldcbz
 139
 140 // Store trailing bytes.
 141 //              r0 = 0
 142 //              r4 = count
 143 //              r9 = ptr
 144
 145 Ltail:
 146         srwi.   r5,r4,4                 // r5 <- 16-byte chunks to 0
 147         mtcrf   0x01,r4                 // remaining byte count to cr7
 148         mtctr   r5
 149         beq             2f                              // skip if no 16-byte chunks
 150 1:                                                              // loop over 16-byte chunks
 151         std             r0,0(r9)
 152         std             r0,8(r9)
 153         addi    r9,r9,16
 154         bdnz    1b
 155 2:
 156         bf              28,4f                   // 8-byte chunk?
 157         std             r0,0(r9)
 158         addi    r9,r9,8
 159 4:
 160         bf              29,5f                   // word?
 161         stw             r0,0(r9)
 162         addi    r9,r9,4
 163 5:
 164         bf              30,6f                   // halfword?
 165         sth             r0,0(r9)
 166         addi    r9,r9,2
 167 6:
 168         bflr    31                              // byte?
 169         stb             r0,0(r9)
 170         blr
 171
 172         COMMPAGE_DESCRIPTOR(bzero_128,_COMM_PAGE_BZERO,kCache128+k64Bit,0, \
 173                                 kCommPageMTCRF+kCommPageBoth+kPort32to64)