osfmk/ppc/commpage/bzero_32.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22
  23 #define ASSEMBLER
  24 #include <sys/appleapiopts.h>
  25 #include <ppc/asm.h>
  26 #include <machine/cpu_capabilities.h>
  27 #include <machine/commpage.h>
  28
  29         .text
  30         .align  2
  31
  32
  33 // *******************
  34 // * B Z E R O _ 3 2 *
  35 // *******************
  36 //
  37 // For 32-bit processors with a 32-byte cache line.
  38 //
  39 // Register use:
  40 //              r0 = zero
  41 //              r3 = original ptr, not changed since memset returns it
  42 //              r4 = count of bytes to set
  43 //              r9 = working operand ptr
  44 // We do not touch r2 and r10-r12, which some callers depend on.
  45
  46         .align  5
  47 bzero_32:                                               // void bzero(void *b, size_t len);
  48         cmplwi  cr7,r4,32               // too short for DCBZ?
  49         li              r0,0                    // get a 0
  50         neg             r5,r3                   // start to compute #bytes to align
  51         mr              r9,r3                   // make copy of operand ptr (can't change r3)
  52         blt             cr7,Ltail               // length < 32, too short for DCBZ
  53
  54 // At least 32 bytes long, so compute alignment and #cache blocks.
  55
  56         andi.   r5,r5,0x1F              // r5 <-  #bytes to 32-byte align
  57         sub             r4,r4,r5                // adjust length
  58         srwi    r8,r4,5                 // r8 <- #32-byte chunks
  59         cmpwi   cr1,r8,0                // any chunks?
  60         mtctr   r8                              // set up loop count
  61         beq             1f                              // skip if already 32-byte aligned (r8!=0)
  62
  63 // 32-byte align.  We just store 32 0s, rather than test and use conditional
  64 // branches.  We've already stored the first few bytes above.
  65
  66         stw             r0,0(r9)
  67         stw             r0,4(r9)
  68         stw             r0,8(r9)
  69         stw             r0,12(r9)
  70         stw             r0,16(r9)
  71         stw             r0,20(r9)
  72         stw             r0,24(r9)
  73         stw             r0,28(r9)
  74         add             r9,r9,r5                // now rp is 32-byte aligned
  75         beq             cr1,Ltail               // skip if no 32-byte chunks
  76
  77 // Loop doing 32-byte version of DCBZ instruction.
  78 // NB: we take alignment exceptions on cache-inhibited memory.
  79 // The kernel could be changed to zero cr7 when emulating a
  80 // dcbz (as it does on 64-bit processors), so we could avoid all
  81 // but the first.
  82
  83 1:
  84         andi.   r5,r4,0x1F              // will there be trailing bytes?
  85         b               2f
  86         .align  4
  87 2:
  88         dcbz    0,r9                    // zero another 32 bytes
  89         addi    r9,r9,32
  90         bdnz    2b
  91
  92         beqlr                                   // no trailing bytes
  93
  94 // Store trailing bytes.
  95
  96 Ltail:
  97         andi.   r5,r4,0x10              // test bit 27 separately
  98         mtcrf   0x01,r4                 // remaining byte count to cr7
  99
 100         beq             2f                              // no 16-byte chunks
 101         stw             r0,0(r9)
 102         stw             r0,4(r9)
 103         stw             r0,8(r9)
 104         stw             r0,12(r9)
 105         addi    r9,r9,16
 106 2:
 107         bf              28,4f                   // 8-byte chunk?
 108         stw             r0,0(r9)
 109         stw             r0,4(r9)
 110         addi    r9,r9,8
 111 4:
 112         bf              29,5f                   // word?
 113         stw             r0,0(r9)
 114         addi    r9,r9,4
 115 5:
 116         bf              30,6f                   // halfword?
 117         sth             r0,0(r9)
 118         addi    r9,r9,2
 119 6:
 120         bflr    31                              // byte?
 121         stb             r0,0(r9)
 122         blr
 123
 124         COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32)