]> git.saurik.com Git - wxWidgets.git/blob - src/freetype/base/ftcalc.c
nicer version compilation fix for wxUSE_WCHAR_T=0 (why should we duplicate definition...
[wxWidgets.git] / src / freetype / base / ftcalc.c
1 /***************************************************************************/
2 /* */
3 /* ftcalc.c */
4 /* */
5 /* Arithmetic computations (body). */
6 /* */
7 /* Copyright 1996-2000 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
9 /* */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
15 /* */
16 /***************************************************************************/
17
18 /*************************************************************************/
19 /* */
20 /* Support for 1-complement arithmetic has been totally dropped in this */
21 /* release. You can still write your own code if you need it. */
22 /* */
23 /*************************************************************************/
24
25 /*************************************************************************/
26 /* */
27 /* Implementing basic computation routines. */
28 /* */
29 /* FT_MulDiv(), FT_MulFix(), and FT_DivFix() are declared in freetype.h. */
30 /* */
31 /*************************************************************************/
32
33
34 #include <freetype/internal/ftcalc.h>
35 #include <freetype/internal/ftdebug.h>
36 #include <freetype/internal/ftobjs.h> /* for ABS() */
37
38
39 /*************************************************************************/
40 /* */
41 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
42 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
43 /* messages during execution. */
44 /* */
45 #undef FT_COMPONENT
46 #define FT_COMPONENT trace_calc
47
48
49 #ifdef FT_CONFIG_OPTION_OLD_CALCS
50
51 static const FT_Long ft_square_roots[63] =
52 {
53 1L, 1L, 2L, 3L, 4L, 5L, 8L, 11L,
54 16L, 22L, 32L, 45L, 64L, 90L, 128L, 181L,
55 256L, 362L, 512L, 724L, 1024L, 1448L, 2048L, 2896L,
56 4096L, 5892L, 8192L, 11585L, 16384L, 23170L, 32768L, 46340L,
57
58 65536L, 92681L, 131072L, 185363L, 262144L, 370727L,
59 524288L, 741455L, 1048576L, 1482910L, 2097152L, 2965820L,
60 4194304L, 5931641L, 8388608L, 11863283L, 16777216L, 23726566L,
61
62 33554432L, 47453132L, 67108864L, 94906265L,
63 134217728L, 189812531L, 268435456L, 379625062L,
64 536870912L, 759250125L, 1073741824L, 1518500250L,
65 2147483647L
66 };
67
68 #else
69
70 /*************************************************************************/
71 /* */
72 /* <Function> */
73 /* FT_Sqrt32 */
74 /* */
75 /* <Description> */
76 /* Computes the square root of an Int32 integer (which will be */
77 /* handled as an unsigned long value). */
78 /* */
79 /* <Input> */
80 /* x :: The value to compute the root for. */
81 /* */
82 /* <Return> */
83 /* The result of `sqrt(x)'. */
84 /* */
85 FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt32( FT_Int32 x )
86 {
87 FT_ULong val, root, newroot, mask;
88
89
90 root = 0;
91 mask = 0x40000000L;
92 val = (FT_ULong)x;
93
94 do
95 {
96 newroot = root + mask;
97 if ( newroot <= val )
98 {
99 val -= newroot;
100 root = newroot + mask;
101 }
102
103 root >>= 1;
104 mask >>= 2;
105
106 } while ( mask != 0 );
107
108 return root;
109 }
110
111 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
112
113
114 #ifdef FT_LONG64
115
116 /*************************************************************************/
117 /* */
118 /* <Function> */
119 /* FT_MulDiv */
120 /* */
121 /* <Description> */
122 /* A very simple function used to perform the computation `(a*b)/c' */
123 /* with maximal accuracy (it uses a 64-bit intermediate integer */
124 /* whenever necessary). */
125 /* */
126 /* This function isn't necessarily as fast as some processor specific */
127 /* operations, but is at least completely portable. */
128 /* */
129 /* <Input> */
130 /* a :: The first multiplier. */
131 /* b :: The second multiplier. */
132 /* c :: The divisor. */
133 /* */
134 /* <Return> */
135 /* The result of `(a*b)/c'. This function never traps when trying to */
136 /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */
137 /* on the signs of `a' and `b'. */
138 /* */
139 FT_EXPORT_FUNC( FT_Long ) FT_MulDiv( FT_Long a,
140 FT_Long b,
141 FT_Long c )
142 {
143 FT_Int s;
144
145
146 s = 1;
147 if ( a < 0 ) { a = -a; s = -s; }
148 if ( b < 0 ) { b = -b; s = -s; }
149 if ( c < 0 ) { c = -c; s = -s; }
150
151 return s * ( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c
152 : 0x7FFFFFFFL );
153 }
154
155
156 /*************************************************************************/
157 /* */
158 /* <Function> */
159 /* FT_MulFix */
160 /* */
161 /* <Description> */
162 /* A very simple function used to perform the computation */
163 /* `(a*b)/0x10000' with maximal accuracy. Most of the time this is */
164 /* used to multiply a given value by a 16.16 fixed float factor. */
165 /* */
166 /* <Input> */
167 /* a :: The first multiplier. */
168 /* b :: The second multiplier. Use a 16.16 factor here whenever */
169 /* possible (see note below). */
170 /* */
171 /* <Return> */
172 /* The result of `(a*b)/0x10000'. */
173 /* */
174 /* <Note> */
175 /* This function has been optimized for the case where the absolute */
176 /* value of `a' is less than 2048, and `b' is a 16.16 scaling factor. */
177 /* As this happens mainly when scaling from notional units to */
178 /* fractional pixels in FreeType, it resulted in noticeable speed */
179 /* improvements between versions 2.x and 1.x. */
180 /* */
181 /* As a conclusion, always try to place a 16.16 factor as the */
182 /* _second_ argument of this function; this can make a great */
183 /* difference. */
184 /* */
185 FT_EXPORT_FUNC( FT_Long ) FT_MulFix( FT_Long a,
186 FT_Long b )
187 {
188 FT_Int s;
189
190
191 s = 1;
192 if ( a < 0 ) { a = -a; s = -s; }
193 if ( b < 0 ) { b = -b; s = -s; }
194
195 return s * (FT_Long)( ( (FT_Int64)a * b + 0x8000 ) >> 16 );
196 }
197
198
199 /*************************************************************************/
200 /* */
201 /* <Function> */
202 /* FT_DivFix */
203 /* */
204 /* <Description> */
205 /* A very simple function used to perform the computation */
206 /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */
207 /* used to divide a given value by a 16.16 fixed float factor. */
208 /* */
209 /* <Input> */
210 /* a :: The first multiplier. */
211 /* b :: The second multiplier. Use a 16.16 factor here whenever */
212 /* possible (see note below). */
213 /* */
214 /* <Return> */
215 /* The result of `(a*0x10000)/b'. */
216 /* */
217 /* <Note> */
218 /* The optimization for FT_DivFix() is simple: If (a << 16) fits in */
219 /* 32 bits, then the division is computed directly. Otherwise, we */
220 /* use a specialized version of the old FT_MulDiv64(). */
221 /* */
222 FT_EXPORT_FUNC( FT_Long ) FT_DivFix( FT_Long a,
223 FT_Long b )
224 {
225 FT_Int32 s;
226 FT_UInt32 q;
227
228
229 s = a; a = ABS(a);
230 s ^= b; b = ABS(b);
231
232 if ( b == 0 )
233 /* check for division by 0 */
234 q = 0x7FFFFFFFL;
235 else
236 /* compute result directly */
237 q = ( (FT_Int64)a << 16 ) / b;
238
239 return (FT_Int32)( s < 0 ? -q : q );
240 }
241
242
243 #ifdef FT_CONFIG_OPTION_OLD_CALCS
244
245 /* a helper function for FT_Sqrt64() */
246
247 static
248 int ft_order64( FT_Int64 z )
249 {
250 int j = 0;
251
252
253 while ( z )
254 {
255 z = (unsigned FT_INT64)z >> 1;
256 j++;
257 }
258 return j - 1;
259 }
260
261
262 /*************************************************************************/
263 /* */
264 /* <Function> */
265 /* FT_Sqrt64 */
266 /* */
267 /* <Description> */
268 /* Computes the square root of a 64-bit value. That sounds stupid, */
269 /* but it is needed to obtain maximal accuracy in the TrueType */
270 /* bytecode interpreter. */
271 /* */
272 /* <Input> */
273 /* l :: A 64-bit integer. */
274 /* */
275 /* <Return> */
276 /* The 32-bit square-root. */
277 /* */
278 FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt64( FT_Int64 l )
279 {
280 FT_Int64 r, s;
281
282
283 if ( l <= 0 ) return 0;
284 if ( l == 1 ) return 1;
285
286 r = ft_square_roots[ft_order64( l )];
287
288 do
289 {
290 s = r;
291 r = ( r + l / r ) >> 1;
292
293 } while ( r > s || r * r > l );
294
295 return r;
296 }
297
298 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
299
300
301 #else /* FT_LONG64 */
302
303
304 /*************************************************************************/
305 /* */
306 /* <Function> */
307 /* FT_MulDiv */
308 /* */
309 /* <Description> */
310 /* A very simple function used to perform the computation `(a*b)/c' */
311 /* with maximal accuracy (it uses a 64-bit intermediate integer */
312 /* whenever necessary). */
313 /* */
314 /* This function isn't necessarily as fast as some processor specific */
315 /* operations, but is at least completely portable. */
316 /* */
317 /* <Input> */
318 /* a :: The first multiplier. */
319 /* b :: The second multiplier. */
320 /* c :: The divisor. */
321 /* */
322 /* <Return> */
323 /* The result of `(a*b)/c'. This function never traps when trying to */
324 /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */
325 /* on the signs of `a' and `b'. */
326 /* */
327 /* <Note> */
328 /* The FT_MulDiv() function has been optimized thanks to ideas from */
329 /* Graham Asher. The trick is to optimize computation if everything */
330 /* fits within 32 bits (a rather common case). */
331 /* */
332 /* We compute `a*b+c/2', then divide it by `c' (positive values). */
333 /* */
334 /* 46340 is FLOOR(SQRT(2^31-1)). */
335 /* */
336 /* if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 ) */
337 /* */
338 /* 0x7FFFFFFF - 0x7FFEA810 = 0x157F0 */
339 /* */
340 /* if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF ) */
341 /* */
342 /* and 2*0x157F0 = 176096. */
343 /* */
344 FT_EXPORT_FUNC( FT_Long ) FT_MulDiv( FT_Long a,
345 FT_Long b,
346 FT_Long c )
347 {
348 long s;
349
350
351 if ( a == 0 || b == c )
352 return a;
353
354 s = a; a = ABS( a );
355 s ^= b; b = ABS( b );
356 s ^= c; c = ABS( c );
357
358 if ( a <= 46340 && b <= 46340 && c <= 176095L && c > 0 )
359 {
360 a = ( a * b + ( c >> 1 ) ) / c;
361 }
362 else if ( c > 0 )
363 {
364 FT_Int64 temp, temp2;
365
366
367 FT_MulTo64( a, b, &temp );
368 temp2.hi = (FT_Int32)( c >> 31 );
369 temp2.lo = (FT_UInt32)( c / 2 );
370 FT_Add64( &temp, &temp2, &temp );
371 a = FT_Div64by32( &temp, c );
372 }
373 else
374 a = 0x7FFFFFFFL;
375
376 return ( s < 0 ? -a : a );
377 }
378
379
380 /*************************************************************************/
381 /* */
382 /* <Function> */
383 /* FT_MulFix */
384 /* */
385 /* <Description> */
386 /* A very simple function used to perform the computation */
387 /* `(a*b)/0x10000' with maximal accuracy. Most of the time, this is */
388 /* used to multiply a given value by a 16.16 fixed float factor. */
389 /* */
390 /* <Input> */
391 /* a :: The first multiplier. */
392 /* b :: The second multiplier. Use a 16.16 factor here whenever */
393 /* possible (see note below). */
394 /* */
395 /* <Return> */
396 /* The result of `(a*b)/0x10000'. */
397 /* */
398 /* <Note> */
399 /* The optimization for FT_MulFix() is different. We could simply be */
400 /* happy by applying the same principles as with FT_MulDiv(), because */
401 /* */
402 /* c = 0x10000 < 176096 */
403 /* */
404 /* However, in most cases, we have a `b' with a value around 0x10000 */
405 /* which is greater than 46340. */
406 /* */
407 /* According to some testing, most cases have `a' < 2048, so a good */
408 /* idea is to use bounds like 2048 and 1048576 (=floor((2^31-1)/2048) */
409 /* for `a' and `b', respectively. */
410 /* */
411 FT_EXPORT_FUNC( FT_Long ) FT_MulFix( FT_Long a,
412 FT_Long b )
413 {
414 FT_Long s;
415 FT_ULong ua, ub;
416
417
418 if ( a == 0 || b == 0x10000L )
419 return a;
420
421 s = a; a = ABS(a);
422 s ^= b; b = ABS(b);
423
424 ua = (FT_ULong)a;
425 ub = (FT_ULong)b;
426
427 if ( ua <= 2048 && ub <= 1048576L )
428 {
429 ua = ( ua * ub + 0x8000 ) >> 16;
430 }
431 else
432 {
433 FT_ULong al = ua & 0xFFFF;
434
435
436 ua = ( ua >> 16 ) * ub +
437 al * ( ub >> 16 ) +
438 ( al * ( ub & 0xFFFF ) >> 16 );
439 }
440
441 return ( s < 0 ? -(FT_Long)ua : ua );
442 }
443
444
445 /*************************************************************************/
446 /* */
447 /* <Function> */
448 /* FT_DivFix */
449 /* */
450 /* <Description> */
451 /* A very simple function used to perform the computation */
452 /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */
453 /* used to divide a given value by a 16.16 fixed float factor. */
454 /* */
455 /* <Input> */
456 /* a :: The first multiplier. */
457 /* b :: The second multiplier. Use a 16.16 factor here whenever */
458 /* possible (see note below). */
459 /* */
460 /* <Return> */
461 /* The result of `(a*0x10000)/b'. */
462 /* */
463 /* <Note> */
464 /* The optimization for FT_DivFix() is simple: If (a << 16) fits into */
465 /* 32 bits, then the division is computed directly. Otherwise, we */
466 /* use a specialized version of the old FT_MulDiv64(). */
467 /* */
468 FT_EXPORT_FUNC( FT_Long ) FT_DivFix( FT_Long a,
469 FT_Long b )
470 {
471 FT_Int32 s;
472 FT_UInt32 q;
473
474
475 s = a; a = ABS(a);
476 s ^= b; b = ABS(b);
477
478 if ( b == 0 )
479 {
480 /* check for division by 0 */
481 q = 0x7FFFFFFFL;
482 }
483 else if ( ( a >> 16 ) == 0 )
484 {
485 /* compute result directly */
486 q = (FT_UInt32)( a << 16 ) / (FT_UInt32)b;
487 }
488 else
489 {
490 /* we need more bits; we have to do it by hand */
491 FT_UInt32 c;
492
493
494 q = ( a / b ) << 16;
495 c = a % b;
496
497 /* we must compute C*0x10000/B: we simply shift C and B so */
498 /* C becomes smaller than 16 bits */
499 while ( c >> 16 )
500 {
501 c >>= 1;
502 b <<= 1;
503 }
504
505 q += ( c << 16 ) / b;
506 }
507
508 return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
509 }
510
511
512 /*************************************************************************/
513 /* */
514 /* <Function> */
515 /* FT_Add64 */
516 /* */
517 /* <Description> */
518 /* Add two Int64 values. */
519 /* */
520 /* <Input> */
521 /* x :: A pointer to the first value to be added. */
522 /* y :: A pointer to the second value to be added. */
523 /* */
524 /* <Output> */
525 /* z :: A pointer to the result of `x + y'. */
526 /* */
527 /* <Note> */
528 /* Will be wrapped by the ADD_64() macro. */
529 /* */
530 FT_EXPORT_FUNC( void ) FT_Add64( FT_Int64* x,
531 FT_Int64* y,
532 FT_Int64* z )
533 {
534 register FT_UInt32 lo, hi;
535
536
537 lo = x->lo + y->lo;
538 hi = x->hi + y->hi + ( lo < x->lo );
539
540 z->lo = lo;
541 z->hi = hi;
542 }
543
544
545 /*************************************************************************/
546 /* */
547 /* <Function> */
548 /* FT_MulTo64 */
549 /* */
550 /* <Description> */
551 /* Multiplies two Int32 integers. Returns an Int64 integer. */
552 /* */
553 /* <Input> */
554 /* x :: The first multiplier. */
555 /* y :: The second multiplier. */
556 /* */
557 /* <Output> */
558 /* z :: A pointer to the result of `x * y'. */
559 /* */
560 /* <Note> */
561 /* Will be wrapped by the MUL_64() macro. */
562 /* */
563 FT_EXPORT_FUNC( void ) FT_MulTo64( FT_Int32 x,
564 FT_Int32 y,
565 FT_Int64* z )
566 {
567 FT_Int32 s;
568
569
570 s = x; x = ABS( x );
571 s ^= y; y = ABS( y );
572
573 {
574 FT_UInt32 lo1, hi1, lo2, hi2, lo, hi, i1, i2;
575
576
577 lo1 = x & 0x0000FFFF; hi1 = x >> 16;
578 lo2 = y & 0x0000FFFF; hi2 = y >> 16;
579
580 lo = lo1 * lo2;
581 i1 = lo1 * hi2;
582 i2 = lo2 * hi1;
583 hi = hi1 * hi2;
584
585 /* Check carry overflow of i1 + i2 */
586 i1 += i2;
587 if ( i1 < i2 )
588 hi += 1L << 16;
589
590 hi += i1 >> 16;
591 i1 = i1 << 16;
592
593 /* Check carry overflow of i1 + lo */
594 lo += i1;
595 hi += ( lo < i1 );
596
597 z->lo = lo;
598 z->hi = hi;
599 }
600
601 if ( s < 0 )
602 {
603 z->lo = (FT_UInt32)-(FT_Int32)z->lo;
604 z->hi = ~z->hi + !( z->lo );
605 }
606 }
607
608
609 /*************************************************************************/
610 /* */
611 /* <Function> */
612 /* FT_Div64by32 */
613 /* */
614 /* <Description> */
615 /* Divides an Int64 value by an Int32 value. Returns an Int32 */
616 /* integer. */
617 /* */
618 /* <Input> */
619 /* x :: A pointer to the dividend. */
620 /* y :: The divisor. */
621 /* */
622 /* <Return> */
623 /* The result of `x / y'. */
624 /* */
625 /* <Note> */
626 /* Will be wrapped by the DIV_64() macro. */
627 /* */
628 FT_EXPORT_FUNC( FT_Int32 ) FT_Div64by32( FT_Int64* x,
629 FT_Int32 y )
630 {
631 FT_Int32 s;
632 FT_UInt32 q, r, i, lo;
633
634
635 s = x->hi;
636 if ( s < 0 )
637 {
638 x->lo = (FT_UInt32)-(FT_Int32)x->lo;
639 x->hi = ~x->hi + !( x->lo );
640 }
641 s ^= y; y = ABS( y );
642
643 /* Shortcut */
644 if ( x->hi == 0 )
645 {
646 if ( y > 0 )
647 q = x->lo / y;
648 else
649 q = 0x7FFFFFFFL;
650
651 return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
652 }
653
654 r = x->hi;
655 lo = x->lo;
656
657 if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */
658 return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL );
659 /* Return Max/Min Int32 if division overflow. */
660 /* This includes division by zero! */
661 q = 0;
662 for ( i = 0; i < 32; i++ )
663 {
664 r <<= 1;
665 q <<= 1;
666 r |= lo >> 31;
667
668 if ( r >= (FT_UInt32)y )
669 {
670 r -= y;
671 q |= 1;
672 }
673 lo <<= 1;
674 }
675
676 return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
677 }
678
679
680 #ifdef FT_CONFIG_OPTION_OLD_CALCS
681
682
683 /* two helper functions for FT_Sqrt64() */
684
685 static
686 void FT_Sub64( FT_Int64* x,
687 FT_Int64* y,
688 FT_Int64* z )
689 {
690 register FT_UInt32 lo, hi;
691
692
693 lo = x->lo - y->lo;
694 hi = x->hi - y->hi - ( (FT_Int32)lo < 0 );
695
696 z->lo = lo;
697 z->hi = hi;
698 }
699
700
701 static
702 int ft_order64( FT_Int64* z )
703 {
704 FT_UInt32 i;
705 int j;
706
707
708 i = z->lo;
709 j = 0;
710 if ( z->hi )
711 {
712 i = z->hi;
713 j = 32;
714 }
715
716 while ( i > 0 )
717 {
718 i >>= 1;
719 j++;
720 }
721 return j - 1;
722 }
723
724
725 /*************************************************************************/
726 /* */
727 /* <Function> */
728 /* FT_Sqrt64 */
729 /* */
730 /* <Description> */
731 /* Computes the square root of a 64-bits value. That sounds stupid, */
732 /* but it is needed to obtain maximal accuracy in the TrueType */
733 /* bytecode interpreter. */
734 /* */
735 /* <Input> */
736 /* z :: A pointer to a 64-bit integer. */
737 /* */
738 /* <Return> */
739 /* The 32-bit square-root. */
740 /* */
741 FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt64( FT_Int64* l )
742 {
743 FT_Int64 l2;
744 FT_Int32 r, s;
745
746
747 if ( (FT_Int32)l->hi < 0 ||
748 ( l->hi == 0 && l->lo == 0 ) )
749 return 0;
750
751 s = ft_order64( l );
752 if ( s == 0 )
753 return 1;
754
755 r = ft_square_roots[s];
756 do
757 {
758 s = r;
759 r = ( r + FT_Div64by32( l, r ) ) >> 1;
760 FT_MulTo64( r, r, &l2 );
761 FT_Sub64 ( l, &l2, &l2 );
762
763 } while ( r > s || (FT_Int32)l2.hi < 0 );
764
765 return r;
766 }
767
768 #endif /* FT_CONFIG_OPTION_OLD_CALCS */
769
770 #endif /* FT_LONG64 */
771
772
773 /* END */