src/freetype/base/ftcalc.c

   1 /***************************************************************************/
   2 /*                                                                         */
   3 /*  ftcalc.c                                                               */
   4 /*                                                                         */
   5 /*    Arithmetic computations (body).                                      */
   6 /*                                                                         */
   7 /*  Copyright 1996-2000 by                                                 */
   8 /*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
   9 /*                                                                         */
  10 /*  This file is part of the FreeType project, and may only be used,       */
  11 /*  modified, and distributed under the terms of the FreeType project      */
  12 /*  license, LICENSE.TXT.  By continuing to use, modify, or distribute     */
  13 /*  this file you indicate that you have read the license and              */
  14 /*  understand and accept it fully.                                        */
  15 /*                                                                         */
  16 /***************************************************************************/
  17
  18   /*************************************************************************/
  19   /*                                                                       */
  20   /* Support for 1-complement arithmetic has been totally dropped in this  */
  21   /* release.  You can still write your own code if you need it.           */
  22   /*                                                                       */
  23   /*************************************************************************/
  24
  25   /*************************************************************************/
  26   /*                                                                       */
  27   /* Implementing basic computation routines.                              */
  28   /*                                                                       */
  29   /* FT_MulDiv(), FT_MulFix(), and FT_DivFix() are declared in freetype.h. */
  30   /*                                                                       */
  31   /*************************************************************************/
  32
  33
  34 #include <freetype/internal/ftcalc.h>
  35 #include <freetype/internal/ftdebug.h>
  36 #include <freetype/internal/ftobjs.h>  /* for ABS() */
  37
  38
  39   /*************************************************************************/
  40   /*                                                                       */
  41   /* The macro FT_COMPONENT is used in trace mode.  It is an implicit      */
  42   /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log  */
  43   /* messages during execution.                                            */
  44   /*                                                                       */
  45 #undef  FT_COMPONENT
  46 #define FT_COMPONENT  trace_calc
  47
  48
  49 #ifdef FT_CONFIG_OPTION_OLD_CALCS
  50
  51   static const FT_Long  ft_square_roots[63] =
  52   {
  53        1L,    1L,    2L,     3L,     4L,     5L,     8L,    11L,
  54       16L,   22L,   32L,    45L,    64L,    90L,   128L,   181L,
  55      256L,  362L,  512L,   724L,  1024L,  1448L,  2048L,  2896L,
  56     4096L, 5892L, 8192L, 11585L, 16384L, 23170L, 32768L, 46340L,
  57
  58       65536L,   92681L,  131072L,   185363L,   262144L,   370727L,
  59      524288L,  741455L, 1048576L,  1482910L,  2097152L,  2965820L,
  60     4194304L, 5931641L, 8388608L, 11863283L, 16777216L, 23726566L,
  61
  62       33554432L,   47453132L,   67108864L,   94906265L,
  63      134217728L,  189812531L,  268435456L,  379625062L,
  64      536870912L,  759250125L, 1073741824L, 1518500250L,
  65     2147483647L
  66   };
  67
  68 #else
  69
  70   /*************************************************************************/
  71   /*                                                                       */
  72   /* <Function>                                                            */
  73   /*    FT_Sqrt32                                                          */
  74   /*                                                                       */
  75   /* <Description>                                                         */
  76   /*    Computes the square root of an Int32 integer (which will be        */
  77   /*    handled as an unsigned long value).                                */
  78   /*                                                                       */
  79   /* <Input>                                                               */
  80   /*    x :: The value to compute the root for.                            */
  81   /*                                                                       */
  82   /* <Return>                                                              */
  83   /*    The result of `sqrt(x)'.                                           */
  84   /*                                                                       */
  85   FT_EXPORT_FUNC( FT_Int32 )  FT_Sqrt32( FT_Int32  x )
  86   {
  87     FT_ULong  val, root, newroot, mask;
  88
  89
  90     root = 0;
  91     mask = 0x40000000L;
  92     val  = (FT_ULong)x;
  93
  94     do
  95     {
  96       newroot = root + mask;
  97       if ( newroot <= val )
  98       {
  99         val -= newroot;
 100         root = newroot + mask;
 101       }
 102
 103       root >>= 1;
 104       mask >>= 2;
 105
 106     } while ( mask != 0 );
 107
 108     return root;
 109   }
 110
 111 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
 112
 113
 114 #ifdef FT_LONG64
 115
 116   /*************************************************************************/
 117   /*                                                                       */
 118   /* <Function>                                                            */
 119   /*    FT_MulDiv                                                          */
 120   /*                                                                       */
 121   /* <Description>                                                         */
 122   /*    A very simple function used to perform the computation `(a*b)/c'   */
 123   /*    with maximal accuracy (it uses a 64-bit intermediate integer       */
 124   /*    whenever necessary).                                               */
 125   /*                                                                       */
 126   /*    This function isn't necessarily as fast as some processor specific */
 127   /*    operations, but is at least completely portable.                   */
 128   /*                                                                       */
 129   /* <Input>                                                               */
 130   /*    a :: The first multiplier.                                         */
 131   /*    b :: The second multiplier.                                        */
 132   /*    c :: The divisor.                                                  */
 133   /*                                                                       */
 134   /* <Return>                                                              */
 135   /*    The result of `(a*b)/c'.  This function never traps when trying to */
 136   /*    divide by zero; it simply returns `MaxInt' or `MinInt' depending   */
 137   /*    on the signs of `a' and `b'.                                       */
 138   /*                                                                       */
 139   FT_EXPORT_FUNC( FT_Long )  FT_MulDiv( FT_Long  a,
 140                                         FT_Long  b,
 141                                         FT_Long  c )
 142   {
 143     FT_Int s;
 144
 145
 146     s = 1;
 147     if ( a < 0 ) { a = -a; s = -s; }
 148     if ( b < 0 ) { b = -b; s = -s; }
 149     if ( c < 0 ) { c = -c; s = -s; }
 150
 151     return s * ( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c
 152                        : 0x7FFFFFFFL );
 153   }
 154
 155
 156   /*************************************************************************/
 157   /*                                                                       */
 158   /* <Function>                                                            */
 159   /*    FT_MulFix                                                          */
 160   /*                                                                       */
 161   /* <Description>                                                         */
 162   /*    A very simple function used to perform the computation             */
 163   /*    `(a*b)/0x10000' with maximal accuracy.  Most of the time this is   */
 164   /*    used to multiply a given value by a 16.16 fixed float factor.      */
 165   /*                                                                       */
 166   /* <Input>                                                               */
 167   /*    a :: The first multiplier.                                         */
 168   /*    b :: The second multiplier.  Use a 16.16 factor here whenever      */
 169   /*         possible (see note below).                                    */
 170   /*                                                                       */
 171   /* <Return>                                                              */
 172   /*    The result of `(a*b)/0x10000'.                                     */
 173   /*                                                                       */
 174   /* <Note>                                                                */
 175   /*    This function has been optimized for the case where the absolute   */
 176   /*    value of `a' is less than 2048, and `b' is a 16.16 scaling factor. */
 177   /*    As this happens mainly when scaling from notional units to         */
 178   /*    fractional pixels in FreeType, it resulted in noticeable speed     */
 179   /*    improvements between versions 2.x and 1.x.                         */
 180   /*                                                                       */
 181   /*    As a conclusion, always try to place a 16.16 factor as the         */
 182   /*    _second_ argument of this function; this can make a great          */
 183   /*    difference.                                                        */
 184   /*                                                                       */
 185   FT_EXPORT_FUNC( FT_Long )  FT_MulFix( FT_Long  a,
 186                                         FT_Long  b )
 187   {
 188     FT_Int  s;
 189
 190
 191     s = 1;
 192     if ( a < 0 ) { a = -a; s = -s; }
 193     if ( b < 0 ) { b = -b; s = -s; }
 194
 195     return s * (FT_Long)( ( (FT_Int64)a * b + 0x8000 ) >> 16 );
 196   }
 197
 198
 199   /*************************************************************************/
 200   /*                                                                       */
 201   /* <Function>                                                            */
 202   /*    FT_DivFix                                                          */
 203   /*                                                                       */
 204   /* <Description>                                                         */
 205   /*    A very simple function used to perform the computation             */
 206   /*    `(a*0x10000)/b' with maximal accuracy.  Most of the time, this is  */
 207   /*    used to divide a given value by a 16.16 fixed float factor.        */
 208   /*                                                                       */
 209   /* <Input>                                                               */
 210   /*    a :: The first multiplier.                                         */
 211   /*    b :: The second multiplier.  Use a 16.16 factor here whenever      */
 212   /*         possible (see note below).                                    */
 213   /*                                                                       */
 214   /* <Return>                                                              */
 215   /*    The result of `(a*0x10000)/b'.                                     */
 216   /*                                                                       */
 217   /* <Note>                                                                */
 218   /*    The optimization for FT_DivFix() is simple: If (a << 16) fits in   */
 219   /*    32 bits, then the division is computed directly.  Otherwise, we    */
 220   /*    use a specialized version of the old FT_MulDiv64().                */
 221   /*                                                                       */
 222   FT_EXPORT_FUNC( FT_Long )  FT_DivFix( FT_Long  a,
 223                                         FT_Long  b )
 224   {
 225     FT_Int32   s;
 226     FT_UInt32  q;
 227
 228
 229     s  = a; a = ABS(a);
 230     s ^= b; b = ABS(b);
 231
 232     if ( b == 0 )
 233       /* check for division by 0 */
 234       q = 0x7FFFFFFFL;
 235     else
 236       /* compute result directly */
 237       q = ( (FT_Int64)a << 16 ) / b;
 238
 239     return (FT_Int32)( s < 0 ? -q : q );
 240   }
 241
 242
 243 #ifdef FT_CONFIG_OPTION_OLD_CALCS
 244
 245   /* a helper function for FT_Sqrt64() */
 246
 247   static
 248   int  ft_order64( FT_Int64  z )
 249   {
 250     int  j = 0;
 251
 252
 253     while ( z )
 254     {
 255       z = (unsigned FT_INT64)z >> 1;
 256       j++;
 257     }
 258     return j - 1;
 259   }
 260
 261
 262   /*************************************************************************/
 263   /*                                                                       */
 264   /* <Function>                                                            */
 265   /*    FT_Sqrt64                                                          */
 266   /*                                                                       */
 267   /* <Description>                                                         */
 268   /*    Computes the square root of a 64-bit value.  That sounds stupid,   */
 269   /*    but it is needed to obtain maximal accuracy in the TrueType        */
 270   /*    bytecode interpreter.                                              */
 271   /*                                                                       */
 272   /* <Input>                                                               */
 273   /*    l :: A 64-bit integer.                                             */
 274   /*                                                                       */
 275   /* <Return>                                                              */
 276   /*    The 32-bit square-root.                                            */
 277   /*                                                                       */
 278   FT_EXPORT_FUNC( FT_Int32 )  FT_Sqrt64( FT_Int64  l )
 279   {
 280     FT_Int64  r, s;
 281
 282
 283     if ( l <= 0 ) return 0;
 284     if ( l == 1 ) return 1;
 285
 286     r = ft_square_roots[ft_order64( l )];
 287
 288     do
 289     {
 290       s = r;
 291       r = ( r + l / r ) >> 1;
 292
 293     } while ( r > s || r * r > l );
 294
 295     return r;
 296   }
 297
 298 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
 299
 300
 301 #else /* FT_LONG64 */
 302
 303
 304   /*************************************************************************/
 305   /*                                                                       */
 306   /* <Function>                                                            */
 307   /*    FT_MulDiv                                                          */
 308   /*                                                                       */
 309   /* <Description>                                                         */
 310   /*    A very simple function used to perform the computation `(a*b)/c'   */
 311   /*    with maximal accuracy (it uses a 64-bit intermediate integer       */
 312   /*    whenever necessary).                                               */
 313   /*                                                                       */
 314   /*    This function isn't necessarily as fast as some processor specific */
 315   /*    operations, but is at least completely portable.                   */
 316   /*                                                                       */
 317   /* <Input>                                                               */
 318   /*    a :: The first multiplier.                                         */
 319   /*    b :: The second multiplier.                                        */
 320   /*    c :: The divisor.                                                  */
 321   /*                                                                       */
 322   /* <Return>                                                              */
 323   /*    The result of `(a*b)/c'.  This function never traps when trying to */
 324   /*    divide by zero; it simply returns `MaxInt' or `MinInt' depending   */
 325   /*    on the signs of `a' and `b'.                                       */
 326   /*                                                                       */
 327   /* <Note>                                                                */
 328   /*    The FT_MulDiv() function has been optimized thanks to ideas from   */
 329   /*    Graham Asher.  The trick is to optimize computation if everything  */
 330   /*    fits within 32 bits (a rather common case).                        */
 331   /*                                                                       */
 332   /*    We compute `a*b+c/2', then divide it by `c' (positive values).     */
 333   /*                                                                       */
 334   /*      46340 is FLOOR(SQRT(2^31-1)).                                    */
 335   /*                                                                       */
 336   /*      if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 )       */
 337   /*                                                                       */
 338   /*      0x7FFFFFFF - 0x7FFEA810 = 0x157F0                                */
 339   /*                                                                       */
 340   /*      if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF )              */
 341   /*                                                                       */
 342   /*      and 2*0x157F0 = 176096.                                          */
 343   /*                                                                       */
 344   FT_EXPORT_FUNC( FT_Long )  FT_MulDiv( FT_Long  a,
 345                                         FT_Long  b,
 346                                         FT_Long  c )
 347   {
 348     long   s;
 349
 350
 351     if ( a == 0 || b == c )
 352       return a;
 353
 354     s  = a; a = ABS( a );
 355     s ^= b; b = ABS( b );
 356     s ^= c; c = ABS( c );
 357
 358     if ( a <= 46340 && b <= 46340 && c <= 176095L && c > 0 )
 359     {
 360       a = ( a * b + ( c >> 1 ) ) / c;
 361     }
 362     else if ( c > 0 )
 363     {
 364       FT_Int64  temp, temp2;
 365
 366
 367       FT_MulTo64( a, b, &temp );
 368       temp2.hi = (FT_Int32)( c >> 31 );
 369       temp2.lo = (FT_UInt32)( c / 2 );
 370       FT_Add64( &temp, &temp2, &temp );
 371       a = FT_Div64by32( &temp, c );
 372     }
 373     else
 374       a = 0x7FFFFFFFL;
 375
 376     return ( s < 0 ? -a : a );
 377   }
 378
 379
 380   /*************************************************************************/
 381   /*                                                                       */
 382   /* <Function>                                                            */
 383   /*    FT_MulFix                                                          */
 384   /*                                                                       */
 385   /* <Description>                                                         */
 386   /*    A very simple function used to perform the computation             */
 387   /*    `(a*b)/0x10000' with maximal accuracy.  Most of the time, this is  */
 388   /*    used to multiply a given value by a 16.16 fixed float factor.      */
 389   /*                                                                       */
 390   /* <Input>                                                               */
 391   /*    a :: The first multiplier.                                         */
 392   /*    b :: The second multiplier.  Use a 16.16 factor here whenever      */
 393   /*         possible (see note below).                                    */
 394   /*                                                                       */
 395   /* <Return>                                                              */
 396   /*    The result of `(a*b)/0x10000'.                                     */
 397   /*                                                                       */
 398   /* <Note>                                                                */
 399   /*    The optimization for FT_MulFix() is different.  We could simply be */
 400   /*    happy by applying the same principles as with FT_MulDiv(), because */
 401   /*                                                                       */
 402   /*      c = 0x10000 < 176096                                             */
 403   /*                                                                       */
 404   /*    However, in most cases, we have a `b' with a value around 0x10000  */
 405   /*    which is greater than 46340.                                       */
 406   /*                                                                       */
 407   /*    According to some testing, most cases have `a' < 2048, so a good   */
 408   /*    idea is to use bounds like 2048 and 1048576 (=floor((2^31-1)/2048) */
 409   /*    for `a' and `b', respectively.                                     */
 410   /*                                                                       */
 411   FT_EXPORT_FUNC( FT_Long )  FT_MulFix( FT_Long  a,
 412                                         FT_Long  b )
 413   {
 414     FT_Long   s;
 415     FT_ULong  ua, ub;
 416
 417
 418     if ( a == 0 || b == 0x10000L )
 419       return a;
 420
 421     s  = a; a = ABS(a);
 422     s ^= b; b = ABS(b);
 423
 424     ua = (FT_ULong)a;
 425     ub = (FT_ULong)b;
 426
 427     if ( ua <= 2048 && ub <= 1048576L )
 428     {
 429       ua = ( ua * ub + 0x8000 ) >> 16;
 430     }
 431     else
 432     {
 433       FT_ULong  al = ua & 0xFFFF;
 434
 435
 436       ua = ( ua >> 16 ) * ub +
 437            al * ( ub >> 16 ) +
 438            ( al * ( ub & 0xFFFF ) >> 16 );
 439     }
 440
 441     return ( s < 0 ? -(FT_Long)ua : ua );
 442   }
 443
 444
 445   /*************************************************************************/
 446   /*                                                                       */
 447   /* <Function>                                                            */
 448   /*    FT_DivFix                                                          */
 449   /*                                                                       */
 450   /* <Description>                                                         */
 451   /*    A very simple function used to perform the computation             */
 452   /*    `(a*0x10000)/b' with maximal accuracy.  Most of the time, this is  */
 453   /*    used to divide a given value by a 16.16 fixed float factor.        */
 454   /*                                                                       */
 455   /* <Input>                                                               */
 456   /*    a :: The first multiplier.                                         */
 457   /*    b :: The second multiplier.  Use a 16.16 factor here whenever      */
 458   /*         possible (see note below).                                    */
 459   /*                                                                       */
 460   /* <Return>                                                              */
 461   /*    The result of `(a*0x10000)/b'.                                     */
 462   /*                                                                       */
 463   /* <Note>                                                                */
 464   /*    The optimization for FT_DivFix() is simple: If (a << 16) fits into */
 465   /*    32 bits, then the division is computed directly.  Otherwise, we    */
 466   /*    use a specialized version of the old FT_MulDiv64().                */
 467   /*                                                                       */
 468   FT_EXPORT_FUNC( FT_Long )  FT_DivFix( FT_Long  a,
 469                                         FT_Long  b )
 470   {
 471     FT_Int32   s;
 472     FT_UInt32  q;
 473
 474
 475     s  = a; a = ABS(a);
 476     s ^= b; b = ABS(b);
 477
 478     if ( b == 0 )
 479     {
 480       /* check for division by 0 */
 481       q = 0x7FFFFFFFL;
 482     }
 483     else if ( ( a >> 16 ) == 0 )
 484     {
 485       /* compute result directly */
 486       q = (FT_UInt32)( a << 16 ) / (FT_UInt32)b;
 487     }
 488     else
 489     {
 490       /* we need more bits; we have to do it by hand */
 491       FT_UInt32  c;
 492
 493
 494       q  = ( a / b ) << 16;
 495       c  = a % b;
 496
 497       /* we must compute C*0x10000/B: we simply shift C and B so */
 498       /* C becomes smaller than 16 bits                          */
 499       while ( c >> 16 )
 500       {
 501         c >>= 1;
 502         b <<= 1;
 503       }
 504
 505       q += ( c << 16 ) / b;
 506     }
 507
 508     return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
 509   }
 510
 511
 512   /*************************************************************************/
 513   /*                                                                       */
 514   /* <Function>                                                            */
 515   /*    FT_Add64                                                           */
 516   /*                                                                       */
 517   /* <Description>                                                         */
 518   /*    Add two Int64 values.                                              */
 519   /*                                                                       */
 520   /* <Input>                                                               */
 521   /*    x :: A pointer to the first value to be added.                     */
 522   /*    y :: A pointer to the second value to be added.                    */
 523   /*                                                                       */
 524   /* <Output>                                                              */
 525   /*    z :: A pointer to the result of `x + y'.                           */
 526   /*                                                                       */
 527   /* <Note>                                                                */
 528   /*    Will be wrapped by the ADD_64() macro.                             */
 529   /*                                                                       */
 530   FT_EXPORT_FUNC( void )  FT_Add64( FT_Int64*  x,
 531                                     FT_Int64*  y,
 532                                     FT_Int64*  z )
 533   {
 534     register FT_UInt32  lo, hi;
 535
 536
 537     lo = x->lo + y->lo;
 538     hi = x->hi + y->hi + ( lo < x->lo );
 539
 540     z->lo = lo;
 541     z->hi = hi;
 542   }
 543
 544
 545   /*************************************************************************/
 546   /*                                                                       */
 547   /* <Function>                                                            */
 548   /*    FT_MulTo64                                                         */
 549   /*                                                                       */
 550   /* <Description>                                                         */
 551   /*    Multiplies two Int32 integers.  Returns an Int64 integer.          */
 552   /*                                                                       */
 553   /* <Input>                                                               */
 554   /*    x :: The first multiplier.                                         */
 555   /*    y :: The second multiplier.                                        */
 556   /*                                                                       */
 557   /* <Output>                                                              */
 558   /*    z :: A pointer to the result of `x * y'.                           */
 559   /*                                                                       */
 560   /* <Note>                                                                */
 561   /*    Will be wrapped by the MUL_64() macro.                             */
 562   /*                                                                       */
 563   FT_EXPORT_FUNC( void )  FT_MulTo64( FT_Int32   x,
 564                                       FT_Int32   y,
 565                                       FT_Int64*  z )
 566   {
 567     FT_Int32   s;
 568
 569
 570     s  = x; x = ABS( x );
 571     s ^= y; y = ABS( y );
 572
 573     {
 574       FT_UInt32  lo1, hi1, lo2, hi2, lo, hi, i1, i2;
 575
 576
 577       lo1 = x & 0x0000FFFF;  hi1 = x >> 16;
 578       lo2 = y & 0x0000FFFF;  hi2 = y >> 16;
 579
 580       lo = lo1 * lo2;
 581       i1 = lo1 * hi2;
 582       i2 = lo2 * hi1;
 583       hi = hi1 * hi2;
 584
 585       /* Check carry overflow of i1 + i2 */
 586       i1 += i2;
 587       if ( i1 < i2 )
 588         hi += 1L << 16;
 589
 590       hi += i1 >> 16;
 591       i1  = i1 << 16;
 592
 593       /* Check carry overflow of i1 + lo */
 594       lo += i1;
 595       hi += ( lo < i1 );
 596
 597       z->lo = lo;
 598       z->hi = hi;
 599     }
 600
 601     if ( s < 0 )
 602     {
 603       z->lo = (FT_UInt32)-(FT_Int32)z->lo;
 604       z->hi = ~z->hi + !( z->lo );
 605     }
 606   }
 607
 608
 609   /*************************************************************************/
 610   /*                                                                       */
 611   /* <Function>                                                            */
 612   /*    FT_Div64by32                                                       */
 613   /*                                                                       */
 614   /* <Description>                                                         */
 615   /*    Divides an Int64 value by an Int32 value.  Returns an Int32        */
 616   /*    integer.                                                           */
 617   /*                                                                       */
 618   /* <Input>                                                               */
 619   /*    x :: A pointer to the dividend.                                    */
 620   /*    y :: The divisor.                                                  */
 621   /*                                                                       */
 622   /* <Return>                                                              */
 623   /*    The result of `x / y'.                                             */
 624   /*                                                                       */
 625   /* <Note>                                                                */
 626   /*    Will be wrapped by the DIV_64() macro.                             */
 627   /*                                                                       */
 628   FT_EXPORT_FUNC( FT_Int32 )  FT_Div64by32( FT_Int64*  x,
 629                                             FT_Int32   y )
 630   {
 631     FT_Int32   s;
 632     FT_UInt32  q, r, i, lo;
 633
 634
 635     s  = x->hi;
 636     if ( s < 0 )
 637     {
 638       x->lo = (FT_UInt32)-(FT_Int32)x->lo;
 639       x->hi = ~x->hi + !( x->lo );
 640     }
 641     s ^= y;  y = ABS( y );
 642
 643     /* Shortcut */
 644     if ( x->hi == 0 )
 645     {
 646       if ( y > 0 )
 647         q = x->lo / y;
 648       else
 649         q = 0x7FFFFFFFL;
 650
 651       return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
 652     }
 653
 654     r  = x->hi;
 655     lo = x->lo;
 656
 657     if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */
 658       return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL );
 659                              /* Return Max/Min Int32 if division overflow.  */
 660                              /* This includes division by zero!             */
 661     q = 0;
 662     for ( i = 0; i < 32; i++ )
 663     {
 664       r <<= 1;
 665       q <<= 1;
 666       r  |= lo >> 31;
 667
 668       if ( r >= (FT_UInt32)y )
 669       {
 670         r -= y;
 671         q |= 1;
 672       }
 673       lo <<= 1;
 674     }
 675
 676     return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
 677   }
 678
 679
 680 #ifdef FT_CONFIG_OPTION_OLD_CALCS
 681
 682
 683   /* two helper functions for FT_Sqrt64() */
 684
 685   static
 686   void  FT_Sub64( FT_Int64*  x,
 687                   FT_Int64*  y,
 688                   FT_Int64*  z )
 689   {
 690     register FT_UInt32  lo, hi;
 691
 692
 693     lo = x->lo - y->lo;
 694     hi = x->hi - y->hi - ( (FT_Int32)lo < 0 );
 695
 696     z->lo = lo;
 697     z->hi = hi;
 698   }
 699
 700
 701   static
 702   int  ft_order64( FT_Int64*  z )
 703   {
 704     FT_UInt32  i;
 705     int        j;
 706
 707
 708     i = z->lo;
 709     j = 0;
 710     if ( z->hi )
 711     {
 712       i = z->hi;
 713       j = 32;
 714     }
 715
 716     while ( i > 0 )
 717     {
 718       i >>= 1;
 719       j++;
 720     }
 721     return j - 1;
 722   }
 723
 724
 725   /*************************************************************************/
 726   /*                                                                       */
 727   /* <Function>                                                            */
 728   /*    FT_Sqrt64                                                          */
 729   /*                                                                       */
 730   /* <Description>                                                         */
 731   /*    Computes the square root of a 64-bits value.  That sounds stupid,  */
 732   /*    but it is needed to obtain maximal accuracy in the TrueType        */
 733   /*    bytecode interpreter.                                              */
 734   /*                                                                       */
 735   /* <Input>                                                               */
 736   /*    z :: A pointer to a 64-bit integer.                                */
 737   /*                                                                       */
 738   /* <Return>                                                              */
 739   /*    The 32-bit square-root.                                            */
 740   /*                                                                       */
 741   FT_EXPORT_FUNC( FT_Int32 )  FT_Sqrt64( FT_Int64*  l )
 742   {
 743     FT_Int64  l2;
 744     FT_Int32  r, s;
 745
 746
 747     if ( (FT_Int32)l->hi < 0          ||
 748          ( l->hi == 0 && l->lo == 0 ) )
 749       return 0;
 750
 751     s = ft_order64( l );
 752     if ( s == 0 )
 753       return 1;
 754
 755     r = ft_square_roots[s];
 756     do
 757     {
 758       s = r;
 759       r = ( r + FT_Div64by32( l, r ) ) >> 1;
 760       FT_MulTo64( r, r,   &l2 );
 761       FT_Sub64  ( l, &l2, &l2 );
 762
 763     } while ( r > s || (FT_Int32)l2.hi < 0 );
 764
 765     return r;
 766   }
 767
 768 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
 769
 770 #endif /* FT_LONG64 */
 771
 772
 773 /* END */