osfmk/ppc/commpage/memset_64.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22
  23 #define ASSEMBLER
  24 #include <sys/appleapiopts.h>
  25 #include <ppc/asm.h>
  26 #include <machine/cpu_capabilities.h>
  27 #include <machine/commpage.h>
  28
  29 /*
  30  * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
  31  * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
  32  * simple transformations:
  33  *      - all word compares are changed to doubleword
  34  *      - all "srwi[.]" opcodes are changed to "srdi[.]"
  35  * Nothing else is done.  For this to work, the following rules must be
  36  * carefully followed:
  37  *      - do not use carry or overflow
  38  *      - only use record mode if you are sure the results are mode-invariant
  39  *        for example, all "andi." and almost all "rlwinm." are fine
  40  *      - do not use "slwi", "slw", or "srw"
  41  * An imaginative programmer could break the porting model in other ways, but the above
  42  * are the most likely problem areas.  It is perhaps surprising how well in practice
  43  * this simple method works.
  44  */
  45
  46         .text
  47         .align  2
  48
  49
  50 /* *********************
  51  * * M E M S E T _ 6 4 *
  52  * *********************
  53  *
  54  * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
  55  * operands (zero operands are funneled into bzero.)  This version is for a
  56  * hypothetic processor that is 64-bit but not Altivec.
  57  * It is not optimized, since it would only be used during bringup.
  58  *
  59  * Registers at entry:
  60  *              r4 = count of bytes to store (must be >= 32)
  61  *      r8 = ptr to the 1st byte to store (16-byte aligned)
  62  *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
  63  * When we return:
  64  *              r3 = not changed, since memset returns it
  65  *      r4 = bytes remaining to store (will be <32)
  66  *      r7 = not changed
  67  *      r8 = ptr to next byte to store (still 16-byte aligned)
  68  *     r12 = not changed (holds return value for memset)
  69  */
  70
  71 memset_64:
  72         srwi    r0,r4,5                 // get number of 32-byte chunks (>0)
  73         ld      r10,0(r9)               // load pattern
  74         ld      r11,8(r9)
  75         rlwinm  r4,r4,0,0x1F            // mask down count
  76         mtctr   r0                      // set up loop count
  77
  78         // Loop over 32-byte chunks.
  79 1:
  80         std     r10,0(r8)
  81         std     r11,8(r8)
  82         std     r10,16(r8)
  83         std     r11,24(r8)
  84         addi    r8,r8,32
  85         bdnz++  1b
  86
  87         blr
  88
  89
  90         COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \
  91                                 kCommPageBoth+kPort32to64)