1 /***************************************************************************/
5 /* Arithmetic computations (body). */
7 /* Copyright 1996-2000 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
18 /*************************************************************************/
20 /* Support for 1-complement arithmetic has been totally dropped in this */
21 /* release. You can still write your own code if you need it. */
23 /*************************************************************************/
25 /*************************************************************************/
27 /* Implementing basic computation routines. */
29 /* FT_MulDiv(), FT_MulFix(), and FT_DivFix() are declared in freetype.h. */
31 /*************************************************************************/
34 #include <freetype/internal/ftcalc.h>
35 #include <freetype/internal/ftdebug.h>
36 #include <freetype/internal/ftobjs.h> /* for ABS() */
39 /*************************************************************************/
41 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
42 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
43 /* messages during execution. */
46 #define FT_COMPONENT trace_calc
49 #ifdef FT_CONFIG_OPTION_OLD_CALCS
51 static const FT_Long ft_square_roots
[63] =
53 1L, 1L, 2L, 3L, 4L, 5L, 8L, 11L,
54 16L, 22L, 32L, 45L, 64L, 90L, 128L, 181L,
55 256L, 362L, 512L, 724L, 1024L, 1448L, 2048L, 2896L,
56 4096L, 5892L, 8192L, 11585L, 16384L, 23170L, 32768L, 46340L,
58 65536L, 92681L, 131072L, 185363L, 262144L, 370727L,
59 524288L, 741455L, 1048576L, 1482910L, 2097152L, 2965820L,
60 4194304L, 5931641L, 8388608L, 11863283L, 16777216L, 23726566L,
62 33554432L, 47453132L, 67108864L, 94906265L,
63 134217728L, 189812531L, 268435456L, 379625062L,
64 536870912L, 759250125L, 1073741824L, 1518500250L,
70 /*************************************************************************/
76 /* Computes the square root of an Int32 integer (which will be */
77 /* handled as an unsigned long value). */
80 /* x :: The value to compute the root for. */
83 /* The result of `sqrt(x)'. */
85 FT_EXPORT_FUNC( FT_Int32
) FT_Sqrt32( FT_Int32 x
)
87 FT_ULong val
, root
, newroot
, mask
;
96 newroot
= root
+ mask
;
100 root
= newroot
+ mask
;
106 } while ( mask
!= 0 );
111 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
116 /*************************************************************************/
122 /* A very simple function used to perform the computation `(a*b)/c' */
123 /* with maximal accuracy (it uses a 64-bit intermediate integer */
124 /* whenever necessary). */
126 /* This function isn't necessarily as fast as some processor specific */
127 /* operations, but is at least completely portable. */
130 /* a :: The first multiplier. */
131 /* b :: The second multiplier. */
132 /* c :: The divisor. */
135 /* The result of `(a*b)/c'. This function never traps when trying to */
136 /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */
137 /* on the signs of `a' and `b'. */
139 FT_EXPORT_FUNC( FT_Long
) FT_MulDiv( FT_Long a
,
147 if ( a
< 0 ) { a
= -a
; s
= -s
; }
148 if ( b
< 0 ) { b
= -b
; s
= -s
; }
149 if ( c
< 0 ) { c
= -c
; s
= -s
; }
151 return s
* ( c
> 0 ? ( (FT_Int64
)a
* b
+ ( c
>> 1 ) ) / c
156 /*************************************************************************/
162 /* A very simple function used to perform the computation */
163 /* `(a*b)/0x10000' with maximal accuracy. Most of the time this is */
164 /* used to multiply a given value by a 16.16 fixed float factor. */
167 /* a :: The first multiplier. */
168 /* b :: The second multiplier. Use a 16.16 factor here whenever */
169 /* possible (see note below). */
172 /* The result of `(a*b)/0x10000'. */
175 /* This function has been optimized for the case where the absolute */
176 /* value of `a' is less than 2048, and `b' is a 16.16 scaling factor. */
177 /* As this happens mainly when scaling from notional units to */
178 /* fractional pixels in FreeType, it resulted in noticeable speed */
179 /* improvements between versions 2.x and 1.x. */
181 /* As a conclusion, always try to place a 16.16 factor as the */
182 /* _second_ argument of this function; this can make a great */
185 FT_EXPORT_FUNC( FT_Long
) FT_MulFix( FT_Long a
,
192 if ( a
< 0 ) { a
= -a
; s
= -s
; }
193 if ( b
< 0 ) { b
= -b
; s
= -s
; }
195 return s
* (FT_Long
)( ( (FT_Int64
)a
* b
+ 0x8000 ) >> 16 );
199 /*************************************************************************/
205 /* A very simple function used to perform the computation */
206 /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */
207 /* used to divide a given value by a 16.16 fixed float factor. */
210 /* a :: The first multiplier. */
211 /* b :: The second multiplier. Use a 16.16 factor here whenever */
212 /* possible (see note below). */
215 /* The result of `(a*0x10000)/b'. */
218 /* The optimization for FT_DivFix() is simple: If (a << 16) fits in */
219 /* 32 bits, then the division is computed directly. Otherwise, we */
220 /* use a specialized version of the old FT_MulDiv64(). */
222 FT_EXPORT_FUNC( FT_Long
) FT_DivFix( FT_Long a
,
233 /* check for division by 0 */
236 /* compute result directly */
237 q
= ( (FT_Int64
)a
<< 16 ) / b
;
239 return (FT_Int32
)( s
< 0 ? -q
: q
);
243 #ifdef FT_CONFIG_OPTION_OLD_CALCS
245 /* a helper function for FT_Sqrt64() */
248 int ft_order64( FT_Int64 z
)
255 z
= (unsigned FT_INT64
)z
>> 1;
262 /*************************************************************************/
268 /* Computes the square root of a 64-bit value. That sounds stupid, */
269 /* but it is needed to obtain maximal accuracy in the TrueType */
270 /* bytecode interpreter. */
273 /* l :: A 64-bit integer. */
276 /* The 32-bit square-root. */
278 FT_EXPORT_FUNC( FT_Int32
) FT_Sqrt64( FT_Int64 l
)
283 if ( l
<= 0 ) return 0;
284 if ( l
== 1 ) return 1;
286 r
= ft_square_roots
[ft_order64( l
)];
291 r
= ( r
+ l
/ r
) >> 1;
293 } while ( r
> s
|| r
* r
> l
);
298 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
301 #else /* FT_LONG64 */
304 /*************************************************************************/
310 /* A very simple function used to perform the computation `(a*b)/c' */
311 /* with maximal accuracy (it uses a 64-bit intermediate integer */
312 /* whenever necessary). */
314 /* This function isn't necessarily as fast as some processor specific */
315 /* operations, but is at least completely portable. */
318 /* a :: The first multiplier. */
319 /* b :: The second multiplier. */
320 /* c :: The divisor. */
323 /* The result of `(a*b)/c'. This function never traps when trying to */
324 /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */
325 /* on the signs of `a' and `b'. */
328 /* The FT_MulDiv() function has been optimized thanks to ideas from */
329 /* Graham Asher. The trick is to optimize computation if everything */
330 /* fits within 32 bits (a rather common case). */
332 /* We compute `a*b+c/2', then divide it by `c' (positive values). */
334 /* 46340 is FLOOR(SQRT(2^31-1)). */
336 /* if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 ) */
338 /* 0x7FFFFFFF - 0x7FFEA810 = 0x157F0 */
340 /* if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF ) */
342 /* and 2*0x157F0 = 176096. */
344 FT_EXPORT_FUNC( FT_Long
) FT_MulDiv( FT_Long a
,
351 if ( a
== 0 || b
== c
)
355 s
^= b
; b
= ABS( b
);
356 s
^= c
; c
= ABS( c
);
358 if ( a
<= 46340 && b
<= 46340 && c
<= 176095L && c
> 0 )
360 a
= ( a
* b
+ ( c
>> 1 ) ) / c
;
364 FT_Int64 temp
, temp2
;
367 FT_MulTo64( a
, b
, &temp
);
368 temp2
.hi
= (FT_Int32
)( c
>> 31 );
369 temp2
.lo
= (FT_UInt32
)( c
/ 2 );
370 FT_Add64( &temp
, &temp2
, &temp
);
371 a
= FT_Div64by32( &temp
, c
);
376 return ( s
< 0 ? -a
: a
);
380 /*************************************************************************/
386 /* A very simple function used to perform the computation */
387 /* `(a*b)/0x10000' with maximal accuracy. Most of the time, this is */
388 /* used to multiply a given value by a 16.16 fixed float factor. */
391 /* a :: The first multiplier. */
392 /* b :: The second multiplier. Use a 16.16 factor here whenever */
393 /* possible (see note below). */
396 /* The result of `(a*b)/0x10000'. */
399 /* The optimization for FT_MulFix() is different. We could simply be */
400 /* happy by applying the same principles as with FT_MulDiv(), because */
402 /* c = 0x10000 < 176096 */
404 /* However, in most cases, we have a `b' with a value around 0x10000 */
405 /* which is greater than 46340. */
407 /* According to some testing, most cases have `a' < 2048, so a good */
408 /* idea is to use bounds like 2048 and 1048576 (=floor((2^31-1)/2048) */
409 /* for `a' and `b', respectively. */
411 FT_EXPORT_FUNC( FT_Long
) FT_MulFix( FT_Long a
,
418 if ( a
== 0 || b
== 0x10000L
)
427 if ( ua
<= 2048 && ub
<= 1048576L )
429 ua
= ( ua
* ub
+ 0x8000 ) >> 16;
433 FT_ULong al
= ua
& 0xFFFF;
436 ua
= ( ua
>> 16 ) * ub
+
438 ( al
* ( ub
& 0xFFFF ) >> 16 );
441 return ( s
< 0 ? -(FT_Long
)ua
: ua
);
445 /*************************************************************************/
451 /* A very simple function used to perform the computation */
452 /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */
453 /* used to divide a given value by a 16.16 fixed float factor. */
456 /* a :: The first multiplier. */
457 /* b :: The second multiplier. Use a 16.16 factor here whenever */
458 /* possible (see note below). */
461 /* The result of `(a*0x10000)/b'. */
464 /* The optimization for FT_DivFix() is simple: If (a << 16) fits into */
465 /* 32 bits, then the division is computed directly. Otherwise, we */
466 /* use a specialized version of the old FT_MulDiv64(). */
468 FT_EXPORT_FUNC( FT_Long
) FT_DivFix( FT_Long a
,
480 /* check for division by 0 */
483 else if ( ( a
>> 16 ) == 0 )
485 /* compute result directly */
486 q
= (FT_UInt32
)( a
<< 16 ) / (FT_UInt32
)b
;
490 /* we need more bits; we have to do it by hand */
497 /* we must compute C*0x10000/B: we simply shift C and B so */
498 /* C becomes smaller than 16 bits */
505 q
+= ( c
<< 16 ) / b
;
508 return ( s
< 0 ? -(FT_Int32
)q
: (FT_Int32
)q
);
512 /*************************************************************************/
518 /* Add two Int64 values. */
521 /* x :: A pointer to the first value to be added. */
522 /* y :: A pointer to the second value to be added. */
525 /* z :: A pointer to the result of `x + y'. */
528 /* Will be wrapped by the ADD_64() macro. */
530 FT_EXPORT_FUNC( void ) FT_Add64( FT_Int64
* x
,
534 register FT_UInt32 lo
, hi
;
538 hi
= x
->hi
+ y
->hi
+ ( lo
< x
->lo
);
545 /*************************************************************************/
551 /* Multiplies two Int32 integers. Returns an Int64 integer. */
554 /* x :: The first multiplier. */
555 /* y :: The second multiplier. */
558 /* z :: A pointer to the result of `x * y'. */
561 /* Will be wrapped by the MUL_64() macro. */
563 FT_EXPORT_FUNC( void ) FT_MulTo64( FT_Int32 x
,
571 s
^= y
; y
= ABS( y
);
574 FT_UInt32 lo1
, hi1
, lo2
, hi2
, lo
, hi
, i1
, i2
;
577 lo1
= x
& 0x0000FFFF; hi1
= x
>> 16;
578 lo2
= y
& 0x0000FFFF; hi2
= y
>> 16;
585 /* Check carry overflow of i1 + i2 */
593 /* Check carry overflow of i1 + lo */
603 z
->lo
= (FT_UInt32
)-(FT_Int32
)z
->lo
;
604 z
->hi
= ~z
->hi
+ !( z
->lo
);
609 /*************************************************************************/
615 /* Divides an Int64 value by an Int32 value. Returns an Int32 */
619 /* x :: A pointer to the dividend. */
620 /* y :: The divisor. */
623 /* The result of `x / y'. */
626 /* Will be wrapped by the DIV_64() macro. */
628 FT_EXPORT_FUNC( FT_Int32
) FT_Div64by32( FT_Int64
* x
,
632 FT_UInt32 q
, r
, i
, lo
;
638 x
->lo
= (FT_UInt32
)-(FT_Int32
)x
->lo
;
639 x
->hi
= ~x
->hi
+ !( x
->lo
);
641 s
^= y
; y
= ABS( y
);
651 return ( s
< 0 ? -(FT_Int32
)q
: (FT_Int32
)q
);
657 if ( r
>= (FT_UInt32
)y
) /* we know y is to be treated as unsigned here */
658 return ( s
< 0 ? 0x80000001UL
: 0x7FFFFFFFUL
);
659 /* Return Max/Min Int32 if division overflow. */
660 /* This includes division by zero! */
662 for ( i
= 0; i
< 32; i
++ )
668 if ( r
>= (FT_UInt32
)y
)
676 return ( s
< 0 ? -(FT_Int32
)q
: (FT_Int32
)q
);
680 #ifdef FT_CONFIG_OPTION_OLD_CALCS
683 /* two helper functions for FT_Sqrt64() */
686 void FT_Sub64( FT_Int64
* x
,
690 register FT_UInt32 lo
, hi
;
694 hi
= x
->hi
- y
->hi
- ( (FT_Int32
)lo
< 0 );
702 int ft_order64( FT_Int64
* z
)
725 /*************************************************************************/
731 /* Computes the square root of a 64-bits value. That sounds stupid, */
732 /* but it is needed to obtain maximal accuracy in the TrueType */
733 /* bytecode interpreter. */
736 /* z :: A pointer to a 64-bit integer. */
739 /* The 32-bit square-root. */
741 FT_EXPORT_FUNC( FT_Int32
) FT_Sqrt64( FT_Int64
* l
)
747 if ( (FT_Int32
)l
->hi
< 0 ||
748 ( l
->hi
== 0 && l
->lo
== 0 ) )
755 r
= ft_square_roots
[s
];
759 r
= ( r
+ FT_Div64by32( l
, r
) ) >> 1;
760 FT_MulTo64( r
, r
, &l2
);
761 FT_Sub64 ( l
, &l2
, &l2
);
763 } while ( r
> s
|| (FT_Int32
)l2
.hi
< 0 );
768 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
770 #endif /* FT_LONG64 */