osfmk/ppc/commpage/bzero_32.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30
  31 #define ASSEMBLER
  32 #include <sys/appleapiopts.h>
  33 #include <ppc/asm.h>
  34 #include <machine/cpu_capabilities.h>
  35 #include <machine/commpage.h>
  36
  37         .text
  38         .align  2
  39
  40
  41 // *******************
  42 // * B Z E R O _ 3 2 *
  43 // *******************
  44 //
  45 // For 32-bit processors with a 32-byte cache line.
  46 //
  47 // Register use:
  48 //              r0 = zero
  49 //              r3 = original ptr, not changed since memset returns it
  50 //              r4 = count of bytes to set
  51 //              r9 = working operand ptr
  52 // We do not touch r2 and r10-r12, which some callers depend on.
  53
  54         .align  5
  55 bzero_32:                                               // void bzero(void *b, size_t len);
  56         cmplwi  cr7,r4,32               // too short for DCBZ?
  57         li              r0,0                    // get a 0
  58         neg             r5,r3                   // start to compute #bytes to align
  59         mr              r9,r3                   // make copy of operand ptr (can't change r3)
  60         blt             cr7,Ltail               // length < 32, too short for DCBZ
  61
  62 // At least 32 bytes long, so compute alignment and #cache blocks.
  63
  64         andi.   r5,r5,0x1F              // r5 <-  #bytes to 32-byte align
  65         sub             r4,r4,r5                // adjust length
  66         srwi    r8,r4,5                 // r8 <- #32-byte chunks
  67         cmpwi   cr1,r8,0                // any chunks?
  68         mtctr   r8                              // set up loop count
  69         beq             1f                              // skip if already 32-byte aligned (r8!=0)
  70
  71 // 32-byte align.  We just store 32 0s, rather than test and use conditional
  72 // branches.  We've already stored the first few bytes above.
  73
  74         stw             r0,0(r9)
  75         stw             r0,4(r9)
  76         stw             r0,8(r9)
  77         stw             r0,12(r9)
  78         stw             r0,16(r9)
  79         stw             r0,20(r9)
  80         stw             r0,24(r9)
  81         stw             r0,28(r9)
  82         add             r9,r9,r5                // now rp is 32-byte aligned
  83         beq             cr1,Ltail               // skip if no 32-byte chunks
  84
  85 // Loop doing 32-byte version of DCBZ instruction.
  86 // NB: we take alignment exceptions on cache-inhibited memory.
  87 // The kernel could be changed to zero cr7 when emulating a
  88 // dcbz (as it does on 64-bit processors), so we could avoid all
  89 // but the first.
  90
  91 1:
  92         andi.   r5,r4,0x1F              // will there be trailing bytes?
  93         b               2f
  94         .align  4
  95 2:
  96         dcbz    0,r9                    // zero another 32 bytes
  97         addi    r9,r9,32
  98         bdnz    2b
  99
 100         beqlr                                   // no trailing bytes
 101
 102 // Store trailing bytes.
 103
 104 Ltail:
 105         andi.   r5,r4,0x10              // test bit 27 separately
 106         mtcrf   0x01,r4                 // remaining byte count to cr7
 107
 108         beq             2f                              // no 16-byte chunks
 109         stw             r0,0(r9)
 110         stw             r0,4(r9)
 111         stw             r0,8(r9)
 112         stw             r0,12(r9)
 113         addi    r9,r9,16
 114 2:
 115         bf              28,4f                   // 8-byte chunk?
 116         stw             r0,0(r9)
 117         stw             r0,4(r9)
 118         addi    r9,r9,8
 119 4:
 120         bf              29,5f                   // word?
 121         stw             r0,0(r9)
 122         addi    r9,r9,4
 123 5:
 124         bf              30,6f                   // halfword?
 125         sth             r0,0(r9)
 126         addi    r9,r9,2
 127 6:
 128         bflr    31                              // byte?
 129         stb             r0,0(r9)
 130         blr
 131
 132         COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32)