osfmk/ppc/commpage/memset_64.s

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 #define ASSEMBLER
  25 #include <sys/appleapiopts.h>
  26 #include <ppc/asm.h>
  27 #include <machine/cpu_capabilities.h>
  28 #include <machine/commpage.h>
  29
  30 /*
  31  * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
  32  * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
  33  * simple transformations:
  34  *      - all word compares are changed to doubleword
  35  *      - all "srwi[.]" opcodes are changed to "srdi[.]"
  36  * Nothing else is done.  For this to work, the following rules must be
  37  * carefully followed:
  38  *      - do not use carry or overflow
  39  *      - only use record mode if you are sure the results are mode-invariant
  40  *        for example, all "andi." and almost all "rlwinm." are fine
  41  *      - do not use "slwi", "slw", or "srw"
  42  * An imaginative programmer could break the porting model in other ways, but the above
  43  * are the most likely problem areas.  It is perhaps surprising how well in practice
  44  * this simple method works.
  45  */
  46
  47         .text
  48         .align  2
  49
  50
  51 /* *********************
  52  * * M E M S E T _ 6 4 *
  53  * *********************
  54  *
  55  * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
  56  * operands (zero operands are funneled into bzero.)  This version is for a
  57  * hypothetic processor that is 64-bit but not Altivec.
  58  * It is not optimized, since it would only be used during bringup.
  59  *
  60  * Registers at entry:
  61  *              r4 = count of bytes to store (must be >= 32)
  62  *      r8 = ptr to the 1st byte to store (16-byte aligned)
  63  *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
  64  * When we return:
  65  *              r3 = not changed, since memset returns it
  66  *      r4 = bytes remaining to store (will be <32)
  67  *      r7 = not changed
  68  *      r8 = ptr to next byte to store (still 16-byte aligned)
  69  *     r12 = not changed (holds return value for memset)
  70  */
  71
  72 memset_64:
  73         srwi    r0,r4,5                 // get number of 32-byte chunks (>0)
  74         ld      r10,0(r9)               // load pattern
  75         ld      r11,8(r9)
  76         rlwinm  r4,r4,0,0x1F            // mask down count
  77         mtctr   r0                      // set up loop count
  78
  79         // Loop over 32-byte chunks.
  80 1:
  81         std     r10,0(r8)
  82         std     r11,8(r8)
  83         std     r10,16(r8)
  84         std     r11,24(r8)
  85         addi    r8,r8,32
  86         bdnz++  1b
  87
  88         blr
  89
  90
  91         COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \
  92                                 kCommPageBoth+kPort32to64)