libsecurity_cryptkit/lib/giantPort_PPC_Gnu.s

   1 /*
   2  * giantPort_X_PPC.s - PPC/OS X giant port module
   3  *
   4  * Created 3/19/2001 by Doug Mitchell.
   5  */
   6
   7 /*
   8  * As of 3/19/2001, using this module results in no change in runtime
   9  * performance compared to using the inline C functions in
  10  * giantPort_Generic.h. Examination of the compiled code shows that
  11  * the GNU C compiler, when configured for -O2, generates almost
  12  * exactly the same code as we have here.
  13  * We'll leave this code in, to protect against changes in gcc, changes
  14  * in CFLAGS, and to serve as an example for other PPC implementations.
  15  */
  16
  17 #if             defined(__ppc__) && defined(__MACH__)
  18
  19 /*********************************************
  20
  21 Add two digits, return sum. Carry bit returned as an out parameter.
  22
  23 giantDigit giantAddDigits(
  24         register giantDigit dig1,
  25         register giantDigit dig2,
  26         register giantDigit *carry)     ...RETURNED, 0 or 1
  27 **********************************************/
  28  .text
  29         .align 2
  30 .globl _giantAddDigits
  31 _giantAddDigits:
  32         /*
  33          * dig1  : r3
  34          * dig2  : r4
  35          * carry : r5
  36          * sum   : r6
  37          */
  38
  39         /* sum = dig1 + dig2 */
  40         add     r6, r3, r4;
  41
  42         /* if((sum < dig1) || (sum < dig2)) */
  43         cmplw   cr0,r6,r3
  44         blt     L1
  45         cmplw   cr0,r6,r4
  46         bge     L2
  47
  48 L1:
  49         /* *carry = 1; */
  50         li      r7,1
  51         stw     r7, 0(r5)
  52         b       L3
  53
  54 L2:
  55         /* else *carry = 0; */
  56         li      r7,0
  57         stw     r7, 0(r5)
  58
  59 L3:
  60         /* return sum in r3 */
  61         mr.     r3,r6
  62         blr
  63
  64 /*********************************************
  65
  66 Add a single digit value to a double digit accumulator in place.
  67 Carry out of the MSD of the accumulator is not handled.
  68
  69 void giantAddDouble(
  70         giantDigit *accLow,                     -- IN/OUT
  71         giantDigit *accHigh,            -- IN/OUT
  72         giantDigit val);
  73 **********************************************/
  74
  75         .align 2
  76 .globl _giantAddDouble
  77 _giantAddDouble:
  78         /*
  79          * r3 : accLow
  80          * r4 : accHi
  81          * r5 : val
  82          * r6 : sumLo
  83          * r7 : *accLow
  84          */
  85
  86         /* giantDigit sumLo = *accLow + val; */
  87         lwz     r7,0(r3)
  88         add     r6,r7,r5
  89
  90         /* if((sumLo < *accLow) || (sumLo < val)) { */
  91         cmplw   cr0,r6,r7
  92         blt     L10
  93         cmplw   cr0,r6,r5
  94         bge     L11
  95
  96 L10:
  97         /* (*accHigh)++; */
  98         lwz     r7, 0(r4)
  99         addi    r7,r7,1
 100         stw     r7, 0(r4)
 101
 102 L11:
 103         /* *accLow = sumLo; */
 104         stw             r6,0(r3)
 105         blr
 106
 107 /*****************************************************************************
 108
 109 Subtract a - b, return difference. Borrow bit returned as an out parameter.
 110
 111 giantDigit giantSubDigits(
 112         giantDigit a,
 113         giantDigit b,
 114         giantDigit *borrow)             -- RETURNED, 0 or 1
 115
 116 ******************************************************************************/
 117
 118         .align 2
 119 .globl _giantSubDigits
 120 _giantSubDigits:
 121
 122         /* a  : r3
 123            b  : r4
 124            borrow : r5
 125            diff   : r6 */
 126
 127         /* giantDigit diff = a - b; */
 128         subf    r6, r4, r3;
 129
 130         /* if(a < b) */
 131         cmplw   cr0,r3,r4
 132         bge             L20
 133
 134         /* *borrow = 1; */
 135         li       r7,1
 136         stw      r7, 0(r5)
 137         b        L21
 138
 139 L20:
 140         /* else *borrow = 0; */
 141         li       r7,0
 142         stw      r7, 0(r5)
 143
 144 L21:
 145         /* return diff in r3 */
 146         mr.      r3,r6
 147         blr
 148
 149 /*****************************************************************************
 150
 151 Multiply two digits, return two digits.
 152
 153 void giantMulDigits(
 154         giantDigit      dig1,
 155         giantDigit      dig2,
 156         giantDigit      *lowProduct,    -- RETURNED, low digit
 157         giantDigit      *hiProduct)             -- RETURNED, high digit
 158
 159 ******************************************************************************/
 160
 161         .align 2
 162 .globl _giantMulDigits
 163 _giantMulDigits:
 164
 165         /* r3 : dig1
 166            r4 : dig2
 167            r5 : lowProduct
 168            r6 : hiProduct */
 169
 170         /* dprod = (unsigned long long)dig1 * (unsigned long long)dig2; */
 171         mullw   r7, r3, r4              /* r7 = low(dig1 * dig2) */
 172         mulhwu  r8, r3, r4      /* r8 - hi(dig1 * dig2) */
 173
 174         /* *hiProduct = (giantDigit)(dprod >> GIANT_BITS_PER_DIGIT); */
 175         stw     r8, 0(r6)
 176
 177         /* *lowProduct = (giantDigit)dprod; */
 178         stw     r7, 0(r5)
 179         blr
 180
 181
 182 /*****************************************************************************
 183
 184 Multiply a vector of giantDigits, candVector, by a single giantDigit,
 185 plierDigit, adding results into prodVector. Returns m.s. digit from
 186 final multiply; only candLength digits of *prodVector will be written.
 187
 188 giantDigit VectorMultiply(
 189         giantDigit plierDigit,
 190         giantDigit *candVector,
 191         unsigned candLength,
 192         giantDigit *prodVector)
 193
 194 ******************************************************************************/
 195
 196 /*
 197  * Register definitions
 198  * Input paramters:
 199  */
 200 #define plierDigit      r3
 201 #define candVector      r4
 202 #define candLength      r5
 203 #define prodVector      r6
 204
 205 /*
 206  * PPC ABI specifies:
 207  *    r3..r10 for parameter passing
 208  *    r11, r12 volatile (caller saved, we can write)
 209  *
 210  * We'll use the remainder of the registers normally used for parameter passing
 211  * and also the other volatile register for local variables.
 212  */
 213 #define candDex         r7
 214 #define lastCarry       r8
 215 #define prodLo          r9
 216 #define prodHi          r10
 217 #define scr1            r11
 218 #define sumLo           r12
 219
 220         .align 2
 221 .globl _VectorMultiply
 222 _VectorMultiply:
 223
 224     /* giantDigit lastCarry = 0; */
 225         li       lastCarry,0
 226
 227
 228         /* for(candDex=0; candDex<candLength; ++candDex) { */
 229         li       candDex,0
 230         b               L_endLoop
 231
 232             /*
 233              * prod = *(candVector++) * plierDigit + *prodVector + lastCarry
 234              */
 235 L_topLoop:
 236                 lwz      scr1,0(candVector)                             /* *candVector --> scr1 */
 237                 addi     candVector,candVector,4                /* candVector++ */
 238
 239                 mullw   prodLo,scr1,plierDigit  /* prodLo = low(*candVector * plierDigit) */
 240                 mulhwu  prodHi,scr1,plierDigit  /* prodHi = high(*candVector * plierDigit) */
 241
 242             /* giantAddDouble(&prodLo, &prodHi, *prodVector); */
 243                 lwz     scr1,0(prodVector)              /* *prodVector --> r9 */
 244                 add             sumLo,prodLo,scr1               /* prodLo + *prodVector --> sumLo */
 245                 cmplw   cr0,sumLo,prodLo                /* sumLo < prodLo? */
 246                 blt             L_carry1
 247                 cmplw   cr0,sumLo,scr1                  /* sumLo < *prodVector? */
 248                 bge             L_noCar1
 249 L_carry1:
 250                 addi    prodHi,prodHi,1                 /* prodHi++ */
 251 L_noCar1:
 252                 mr.             prodLo,sumLo                    /* prodLo := sumLo */
 253
 254             /* giantAddDouble(&prodLo, &prodHi, lastCarry); */
 255                 add             sumLo,sumLo,lastCarry   /* sumLo += lastCarry */
 256                 cmplw   cr0,sumLo,prodLo                /* sumLo < prodLo? */
 257                 blt             L_carry2
 258                 cmplw   cr0,sumLo,lastCarry     /* sumLo < lastCarry? */
 259                 bge             L_noCar2
 260 L_carry2:
 261                 addi    prodHi,prodHi,1                 /* prodHi++ */
 262 L_noCar2:
 263                 mr.             prodLo,sumLo                    /* prodLo := sumLo */
 264
 265             /* *(prodVector++) = prodLo; */
 266                 stw      prodLo,0(prodVector)           /* prodLo --> *prodVector */
 267                 addi     prodVector,prodVector,4        /* prodVector++ */
 268
 269             /* lastCarry = prodHi; */
 270                 mr.             lastCarry,prodHi
 271
 272         /* } */
 273         addi     candDex,candDex,1                      /* candDex++ */
 274 L_endLoop:
 275         cmplw    cr0,candDex,candLength         /* candDex < candLength? */
 276         blt      L_topLoop
 277
 278         /* return lastCarry; */
 279         mr.      r3,lastCarry                           /* return lastCarry in r3 */
 280         blr
 281
 282 #endif  /* defined(__ppc__) && defined(__MACH__) */