]>
Commit | Line | Data |
---|---|---|
cabec872 RR |
1 | /***************************************************************************/ |
2 | /* */ | |
3 | /* ftcalc.c */ | |
4 | /* */ | |
5 | /* Arithmetic computations (body). */ | |
6 | /* */ | |
7 | /* Copyright 1996-2000 by */ | |
8 | /* David Turner, Robert Wilhelm, and Werner Lemberg. */ | |
9 | /* */ | |
10 | /* This file is part of the FreeType project, and may only be used, */ | |
11 | /* modified, and distributed under the terms of the FreeType project */ | |
12 | /* license, LICENSE.TXT. By continuing to use, modify, or distribute */ | |
13 | /* this file you indicate that you have read the license and */ | |
14 | /* understand and accept it fully. */ | |
15 | /* */ | |
16 | /***************************************************************************/ | |
17 | ||
18 | /*************************************************************************/ | |
19 | /* */ | |
20 | /* Support for 1-complement arithmetic has been totally dropped in this */ | |
21 | /* release. You can still write your own code if you need it. */ | |
22 | /* */ | |
23 | /*************************************************************************/ | |
24 | ||
25 | /*************************************************************************/ | |
26 | /* */ | |
27 | /* Implementing basic computation routines. */ | |
28 | /* */ | |
29 | /* FT_MulDiv(), FT_MulFix(), and FT_DivFix() are declared in freetype.h. */ | |
30 | /* */ | |
31 | /*************************************************************************/ | |
32 | ||
33 | ||
34 | #include <freetype/internal/ftcalc.h> | |
35 | #include <freetype/internal/ftdebug.h> | |
36 | #include <freetype/internal/ftobjs.h> /* for ABS() */ | |
37 | ||
38 | ||
39 | /*************************************************************************/ | |
40 | /* */ | |
41 | /* The macro FT_COMPONENT is used in trace mode. It is an implicit */ | |
42 | /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */ | |
43 | /* messages during execution. */ | |
44 | /* */ | |
45 | #undef FT_COMPONENT | |
46 | #define FT_COMPONENT trace_calc | |
47 | ||
48 | ||
49 | #ifdef FT_CONFIG_OPTION_OLD_CALCS | |
50 | ||
51 | static const FT_Long ft_square_roots[63] = | |
52 | { | |
53 | 1L, 1L, 2L, 3L, 4L, 5L, 8L, 11L, | |
54 | 16L, 22L, 32L, 45L, 64L, 90L, 128L, 181L, | |
55 | 256L, 362L, 512L, 724L, 1024L, 1448L, 2048L, 2896L, | |
56 | 4096L, 5892L, 8192L, 11585L, 16384L, 23170L, 32768L, 46340L, | |
57 | ||
58 | 65536L, 92681L, 131072L, 185363L, 262144L, 370727L, | |
59 | 524288L, 741455L, 1048576L, 1482910L, 2097152L, 2965820L, | |
60 | 4194304L, 5931641L, 8388608L, 11863283L, 16777216L, 23726566L, | |
61 | ||
62 | 33554432L, 47453132L, 67108864L, 94906265L, | |
63 | 134217728L, 189812531L, 268435456L, 379625062L, | |
64 | 536870912L, 759250125L, 1073741824L, 1518500250L, | |
65 | 2147483647L | |
66 | }; | |
67 | ||
68 | #else | |
69 | ||
70 | /*************************************************************************/ | |
71 | /* */ | |
72 | /* <Function> */ | |
73 | /* FT_Sqrt32 */ | |
74 | /* */ | |
75 | /* <Description> */ | |
76 | /* Computes the square root of an Int32 integer (which will be */ | |
77 | /* handled as an unsigned long value). */ | |
78 | /* */ | |
79 | /* <Input> */ | |
80 | /* x :: The value to compute the root for. */ | |
81 | /* */ | |
82 | /* <Return> */ | |
83 | /* The result of `sqrt(x)'. */ | |
84 | /* */ | |
85 | FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt32( FT_Int32 x ) | |
86 | { | |
87 | FT_ULong val, root, newroot, mask; | |
88 | ||
89 | ||
90 | root = 0; | |
91 | mask = 0x40000000L; | |
92 | val = (FT_ULong)x; | |
93 | ||
94 | do | |
95 | { | |
96 | newroot = root + mask; | |
97 | if ( newroot <= val ) | |
98 | { | |
99 | val -= newroot; | |
100 | root = newroot + mask; | |
101 | } | |
102 | ||
103 | root >>= 1; | |
104 | mask >>= 2; | |
105 | ||
106 | } while ( mask != 0 ); | |
107 | ||
108 | return root; | |
109 | } | |
110 | ||
111 | #endif /* FT_CONFIG_OPTION_OLD_CALCS */ | |
112 | ||
113 | ||
114 | #ifdef FT_LONG64 | |
115 | ||
116 | /*************************************************************************/ | |
117 | /* */ | |
118 | /* <Function> */ | |
119 | /* FT_MulDiv */ | |
120 | /* */ | |
121 | /* <Description> */ | |
122 | /* A very simple function used to perform the computation `(a*b)/c' */ | |
123 | /* with maximal accuracy (it uses a 64-bit intermediate integer */ | |
124 | /* whenever necessary). */ | |
125 | /* */ | |
126 | /* This function isn't necessarily as fast as some processor specific */ | |
127 | /* operations, but is at least completely portable. */ | |
128 | /* */ | |
129 | /* <Input> */ | |
130 | /* a :: The first multiplier. */ | |
131 | /* b :: The second multiplier. */ | |
132 | /* c :: The divisor. */ | |
133 | /* */ | |
134 | /* <Return> */ | |
135 | /* The result of `(a*b)/c'. This function never traps when trying to */ | |
136 | /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */ | |
137 | /* on the signs of `a' and `b'. */ | |
138 | /* */ | |
139 | FT_EXPORT_FUNC( FT_Long ) FT_MulDiv( FT_Long a, | |
140 | FT_Long b, | |
141 | FT_Long c ) | |
142 | { | |
143 | FT_Int s; | |
144 | ||
145 | ||
146 | s = 1; | |
147 | if ( a < 0 ) { a = -a; s = -s; } | |
148 | if ( b < 0 ) { b = -b; s = -s; } | |
149 | if ( c < 0 ) { c = -c; s = -s; } | |
150 | ||
151 | return s * ( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c | |
152 | : 0x7FFFFFFFL ); | |
153 | } | |
154 | ||
155 | ||
156 | /*************************************************************************/ | |
157 | /* */ | |
158 | /* <Function> */ | |
159 | /* FT_MulFix */ | |
160 | /* */ | |
161 | /* <Description> */ | |
162 | /* A very simple function used to perform the computation */ | |
163 | /* `(a*b)/0x10000' with maximal accuracy. Most of the time this is */ | |
164 | /* used to multiply a given value by a 16.16 fixed float factor. */ | |
165 | /* */ | |
166 | /* <Input> */ | |
167 | /* a :: The first multiplier. */ | |
168 | /* b :: The second multiplier. Use a 16.16 factor here whenever */ | |
169 | /* possible (see note below). */ | |
170 | /* */ | |
171 | /* <Return> */ | |
172 | /* The result of `(a*b)/0x10000'. */ | |
173 | /* */ | |
174 | /* <Note> */ | |
175 | /* This function has been optimized for the case where the absolute */ | |
176 | /* value of `a' is less than 2048, and `b' is a 16.16 scaling factor. */ | |
177 | /* As this happens mainly when scaling from notional units to */ | |
178 | /* fractional pixels in FreeType, it resulted in noticeable speed */ | |
179 | /* improvements between versions 2.x and 1.x. */ | |
180 | /* */ | |
181 | /* As a conclusion, always try to place a 16.16 factor as the */ | |
182 | /* _second_ argument of this function; this can make a great */ | |
183 | /* difference. */ | |
184 | /* */ | |
185 | FT_EXPORT_FUNC( FT_Long ) FT_MulFix( FT_Long a, | |
186 | FT_Long b ) | |
187 | { | |
188 | FT_Int s; | |
189 | ||
190 | ||
191 | s = 1; | |
192 | if ( a < 0 ) { a = -a; s = -s; } | |
193 | if ( b < 0 ) { b = -b; s = -s; } | |
194 | ||
195 | return s * (FT_Long)( ( (FT_Int64)a * b + 0x8000 ) >> 16 ); | |
196 | } | |
197 | ||
198 | ||
199 | /*************************************************************************/ | |
200 | /* */ | |
201 | /* <Function> */ | |
202 | /* FT_DivFix */ | |
203 | /* */ | |
204 | /* <Description> */ | |
205 | /* A very simple function used to perform the computation */ | |
206 | /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */ | |
207 | /* used to divide a given value by a 16.16 fixed float factor. */ | |
208 | /* */ | |
209 | /* <Input> */ | |
210 | /* a :: The first multiplier. */ | |
211 | /* b :: The second multiplier. Use a 16.16 factor here whenever */ | |
212 | /* possible (see note below). */ | |
213 | /* */ | |
214 | /* <Return> */ | |
215 | /* The result of `(a*0x10000)/b'. */ | |
216 | /* */ | |
217 | /* <Note> */ | |
218 | /* The optimization for FT_DivFix() is simple: If (a << 16) fits in */ | |
219 | /* 32 bits, then the division is computed directly. Otherwise, we */ | |
220 | /* use a specialized version of the old FT_MulDiv64(). */ | |
221 | /* */ | |
222 | FT_EXPORT_FUNC( FT_Long ) FT_DivFix( FT_Long a, | |
223 | FT_Long b ) | |
224 | { | |
225 | FT_Int32 s; | |
226 | FT_UInt32 q; | |
227 | ||
228 | ||
229 | s = a; a = ABS(a); | |
230 | s ^= b; b = ABS(b); | |
231 | ||
232 | if ( b == 0 ) | |
233 | /* check for division by 0 */ | |
234 | q = 0x7FFFFFFFL; | |
235 | else | |
236 | /* compute result directly */ | |
237 | q = ( (FT_Int64)a << 16 ) / b; | |
238 | ||
239 | return (FT_Int32)( s < 0 ? -q : q ); | |
240 | } | |
241 | ||
242 | ||
243 | #ifdef FT_CONFIG_OPTION_OLD_CALCS | |
244 | ||
245 | /* a helper function for FT_Sqrt64() */ | |
246 | ||
247 | static | |
248 | int ft_order64( FT_Int64 z ) | |
249 | { | |
250 | int j = 0; | |
251 | ||
252 | ||
253 | while ( z ) | |
254 | { | |
255 | z = (unsigned FT_INT64)z >> 1; | |
256 | j++; | |
257 | } | |
258 | return j - 1; | |
259 | } | |
260 | ||
261 | ||
262 | /*************************************************************************/ | |
263 | /* */ | |
264 | /* <Function> */ | |
265 | /* FT_Sqrt64 */ | |
266 | /* */ | |
267 | /* <Description> */ | |
268 | /* Computes the square root of a 64-bit value. That sounds stupid, */ | |
269 | /* but it is needed to obtain maximal accuracy in the TrueType */ | |
270 | /* bytecode interpreter. */ | |
271 | /* */ | |
272 | /* <Input> */ | |
273 | /* l :: A 64-bit integer. */ | |
274 | /* */ | |
275 | /* <Return> */ | |
276 | /* The 32-bit square-root. */ | |
277 | /* */ | |
278 | FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt64( FT_Int64 l ) | |
279 | { | |
280 | FT_Int64 r, s; | |
281 | ||
282 | ||
283 | if ( l <= 0 ) return 0; | |
284 | if ( l == 1 ) return 1; | |
285 | ||
286 | r = ft_square_roots[ft_order64( l )]; | |
287 | ||
288 | do | |
289 | { | |
290 | s = r; | |
291 | r = ( r + l / r ) >> 1; | |
292 | ||
293 | } while ( r > s || r * r > l ); | |
294 | ||
295 | return r; | |
296 | } | |
297 | ||
298 | #endif /* FT_CONFIG_OPTION_OLD_CALCS */ | |
299 | ||
300 | ||
301 | #else /* FT_LONG64 */ | |
302 | ||
303 | ||
304 | /*************************************************************************/ | |
305 | /* */ | |
306 | /* <Function> */ | |
307 | /* FT_MulDiv */ | |
308 | /* */ | |
309 | /* <Description> */ | |
310 | /* A very simple function used to perform the computation `(a*b)/c' */ | |
311 | /* with maximal accuracy (it uses a 64-bit intermediate integer */ | |
312 | /* whenever necessary). */ | |
313 | /* */ | |
314 | /* This function isn't necessarily as fast as some processor specific */ | |
315 | /* operations, but is at least completely portable. */ | |
316 | /* */ | |
317 | /* <Input> */ | |
318 | /* a :: The first multiplier. */ | |
319 | /* b :: The second multiplier. */ | |
320 | /* c :: The divisor. */ | |
321 | /* */ | |
322 | /* <Return> */ | |
323 | /* The result of `(a*b)/c'. This function never traps when trying to */ | |
324 | /* divide by zero; it simply returns `MaxInt' or `MinInt' depending */ | |
325 | /* on the signs of `a' and `b'. */ | |
326 | /* */ | |
327 | /* <Note> */ | |
328 | /* The FT_MulDiv() function has been optimized thanks to ideas from */ | |
329 | /* Graham Asher. The trick is to optimize computation if everything */ | |
330 | /* fits within 32 bits (a rather common case). */ | |
331 | /* */ | |
332 | /* We compute `a*b+c/2', then divide it by `c' (positive values). */ | |
333 | /* */ | |
334 | /* 46340 is FLOOR(SQRT(2^31-1)). */ | |
335 | /* */ | |
336 | /* if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 ) */ | |
337 | /* */ | |
338 | /* 0x7FFFFFFF - 0x7FFEA810 = 0x157F0 */ | |
339 | /* */ | |
340 | /* if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF ) */ | |
341 | /* */ | |
342 | /* and 2*0x157F0 = 176096. */ | |
343 | /* */ | |
344 | FT_EXPORT_FUNC( FT_Long ) FT_MulDiv( FT_Long a, | |
345 | FT_Long b, | |
346 | FT_Long c ) | |
347 | { | |
348 | long s; | |
349 | ||
350 | ||
351 | if ( a == 0 || b == c ) | |
352 | return a; | |
353 | ||
354 | s = a; a = ABS( a ); | |
355 | s ^= b; b = ABS( b ); | |
356 | s ^= c; c = ABS( c ); | |
357 | ||
358 | if ( a <= 46340 && b <= 46340 && c <= 176095L && c > 0 ) | |
359 | { | |
360 | a = ( a * b + ( c >> 1 ) ) / c; | |
361 | } | |
362 | else if ( c > 0 ) | |
363 | { | |
364 | FT_Int64 temp, temp2; | |
365 | ||
366 | ||
367 | FT_MulTo64( a, b, &temp ); | |
368 | temp2.hi = (FT_Int32)( c >> 31 ); | |
369 | temp2.lo = (FT_UInt32)( c / 2 ); | |
370 | FT_Add64( &temp, &temp2, &temp ); | |
371 | a = FT_Div64by32( &temp, c ); | |
372 | } | |
373 | else | |
374 | a = 0x7FFFFFFFL; | |
375 | ||
376 | return ( s < 0 ? -a : a ); | |
377 | } | |
378 | ||
379 | ||
380 | /*************************************************************************/ | |
381 | /* */ | |
382 | /* <Function> */ | |
383 | /* FT_MulFix */ | |
384 | /* */ | |
385 | /* <Description> */ | |
386 | /* A very simple function used to perform the computation */ | |
387 | /* `(a*b)/0x10000' with maximal accuracy. Most of the time, this is */ | |
388 | /* used to multiply a given value by a 16.16 fixed float factor. */ | |
389 | /* */ | |
390 | /* <Input> */ | |
391 | /* a :: The first multiplier. */ | |
392 | /* b :: The second multiplier. Use a 16.16 factor here whenever */ | |
393 | /* possible (see note below). */ | |
394 | /* */ | |
395 | /* <Return> */ | |
396 | /* The result of `(a*b)/0x10000'. */ | |
397 | /* */ | |
398 | /* <Note> */ | |
399 | /* The optimization for FT_MulFix() is different. We could simply be */ | |
400 | /* happy by applying the same principles as with FT_MulDiv(), because */ | |
401 | /* */ | |
402 | /* c = 0x10000 < 176096 */ | |
403 | /* */ | |
404 | /* However, in most cases, we have a `b' with a value around 0x10000 */ | |
405 | /* which is greater than 46340. */ | |
406 | /* */ | |
407 | /* According to some testing, most cases have `a' < 2048, so a good */ | |
408 | /* idea is to use bounds like 2048 and 1048576 (=floor((2^31-1)/2048) */ | |
409 | /* for `a' and `b', respectively. */ | |
410 | /* */ | |
411 | FT_EXPORT_FUNC( FT_Long ) FT_MulFix( FT_Long a, | |
412 | FT_Long b ) | |
413 | { | |
414 | FT_Long s; | |
415 | FT_ULong ua, ub; | |
416 | ||
417 | ||
418 | if ( a == 0 || b == 0x10000L ) | |
419 | return a; | |
420 | ||
421 | s = a; a = ABS(a); | |
422 | s ^= b; b = ABS(b); | |
423 | ||
424 | ua = (FT_ULong)a; | |
425 | ub = (FT_ULong)b; | |
426 | ||
427 | if ( ua <= 2048 && ub <= 1048576L ) | |
428 | { | |
429 | ua = ( ua * ub + 0x8000 ) >> 16; | |
430 | } | |
431 | else | |
432 | { | |
433 | FT_ULong al = ua & 0xFFFF; | |
434 | ||
435 | ||
436 | ua = ( ua >> 16 ) * ub + | |
437 | al * ( ub >> 16 ) + | |
438 | ( al * ( ub & 0xFFFF ) >> 16 ); | |
439 | } | |
440 | ||
441 | return ( s < 0 ? -(FT_Long)ua : ua ); | |
442 | } | |
443 | ||
444 | ||
445 | /*************************************************************************/ | |
446 | /* */ | |
447 | /* <Function> */ | |
448 | /* FT_DivFix */ | |
449 | /* */ | |
450 | /* <Description> */ | |
451 | /* A very simple function used to perform the computation */ | |
452 | /* `(a*0x10000)/b' with maximal accuracy. Most of the time, this is */ | |
453 | /* used to divide a given value by a 16.16 fixed float factor. */ | |
454 | /* */ | |
455 | /* <Input> */ | |
456 | /* a :: The first multiplier. */ | |
457 | /* b :: The second multiplier. Use a 16.16 factor here whenever */ | |
458 | /* possible (see note below). */ | |
459 | /* */ | |
460 | /* <Return> */ | |
461 | /* The result of `(a*0x10000)/b'. */ | |
462 | /* */ | |
463 | /* <Note> */ | |
464 | /* The optimization for FT_DivFix() is simple: If (a << 16) fits into */ | |
465 | /* 32 bits, then the division is computed directly. Otherwise, we */ | |
466 | /* use a specialized version of the old FT_MulDiv64(). */ | |
467 | /* */ | |
468 | FT_EXPORT_FUNC( FT_Long ) FT_DivFix( FT_Long a, | |
469 | FT_Long b ) | |
470 | { | |
471 | FT_Int32 s; | |
472 | FT_UInt32 q; | |
473 | ||
474 | ||
475 | s = a; a = ABS(a); | |
476 | s ^= b; b = ABS(b); | |
477 | ||
478 | if ( b == 0 ) | |
479 | { | |
480 | /* check for division by 0 */ | |
481 | q = 0x7FFFFFFFL; | |
482 | } | |
483 | else if ( ( a >> 16 ) == 0 ) | |
484 | { | |
485 | /* compute result directly */ | |
486 | q = (FT_UInt32)( a << 16 ) / (FT_UInt32)b; | |
487 | } | |
488 | else | |
489 | { | |
490 | /* we need more bits; we have to do it by hand */ | |
491 | FT_UInt32 c; | |
492 | ||
493 | ||
494 | q = ( a / b ) << 16; | |
495 | c = a % b; | |
496 | ||
497 | /* we must compute C*0x10000/B: we simply shift C and B so */ | |
498 | /* C becomes smaller than 16 bits */ | |
499 | while ( c >> 16 ) | |
500 | { | |
501 | c >>= 1; | |
502 | b <<= 1; | |
503 | } | |
504 | ||
505 | q += ( c << 16 ) / b; | |
506 | } | |
507 | ||
508 | return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); | |
509 | } | |
510 | ||
511 | ||
512 | /*************************************************************************/ | |
513 | /* */ | |
514 | /* <Function> */ | |
515 | /* FT_Add64 */ | |
516 | /* */ | |
517 | /* <Description> */ | |
518 | /* Add two Int64 values. */ | |
519 | /* */ | |
520 | /* <Input> */ | |
521 | /* x :: A pointer to the first value to be added. */ | |
522 | /* y :: A pointer to the second value to be added. */ | |
523 | /* */ | |
524 | /* <Output> */ | |
525 | /* z :: A pointer to the result of `x + y'. */ | |
526 | /* */ | |
527 | /* <Note> */ | |
528 | /* Will be wrapped by the ADD_64() macro. */ | |
529 | /* */ | |
530 | FT_EXPORT_FUNC( void ) FT_Add64( FT_Int64* x, | |
531 | FT_Int64* y, | |
532 | FT_Int64* z ) | |
533 | { | |
534 | register FT_UInt32 lo, hi; | |
535 | ||
536 | ||
537 | lo = x->lo + y->lo; | |
538 | hi = x->hi + y->hi + ( lo < x->lo ); | |
539 | ||
540 | z->lo = lo; | |
541 | z->hi = hi; | |
542 | } | |
543 | ||
544 | ||
545 | /*************************************************************************/ | |
546 | /* */ | |
547 | /* <Function> */ | |
548 | /* FT_MulTo64 */ | |
549 | /* */ | |
550 | /* <Description> */ | |
551 | /* Multiplies two Int32 integers. Returns an Int64 integer. */ | |
552 | /* */ | |
553 | /* <Input> */ | |
554 | /* x :: The first multiplier. */ | |
555 | /* y :: The second multiplier. */ | |
556 | /* */ | |
557 | /* <Output> */ | |
558 | /* z :: A pointer to the result of `x * y'. */ | |
559 | /* */ | |
560 | /* <Note> */ | |
561 | /* Will be wrapped by the MUL_64() macro. */ | |
562 | /* */ | |
563 | FT_EXPORT_FUNC( void ) FT_MulTo64( FT_Int32 x, | |
564 | FT_Int32 y, | |
565 | FT_Int64* z ) | |
566 | { | |
567 | FT_Int32 s; | |
568 | ||
569 | ||
570 | s = x; x = ABS( x ); | |
571 | s ^= y; y = ABS( y ); | |
572 | ||
573 | { | |
574 | FT_UInt32 lo1, hi1, lo2, hi2, lo, hi, i1, i2; | |
575 | ||
576 | ||
577 | lo1 = x & 0x0000FFFF; hi1 = x >> 16; | |
578 | lo2 = y & 0x0000FFFF; hi2 = y >> 16; | |
579 | ||
580 | lo = lo1 * lo2; | |
581 | i1 = lo1 * hi2; | |
582 | i2 = lo2 * hi1; | |
583 | hi = hi1 * hi2; | |
584 | ||
585 | /* Check carry overflow of i1 + i2 */ | |
586 | i1 += i2; | |
587 | if ( i1 < i2 ) | |
588 | hi += 1L << 16; | |
589 | ||
590 | hi += i1 >> 16; | |
591 | i1 = i1 << 16; | |
592 | ||
593 | /* Check carry overflow of i1 + lo */ | |
594 | lo += i1; | |
595 | hi += ( lo < i1 ); | |
596 | ||
597 | z->lo = lo; | |
598 | z->hi = hi; | |
599 | } | |
600 | ||
601 | if ( s < 0 ) | |
602 | { | |
603 | z->lo = (FT_UInt32)-(FT_Int32)z->lo; | |
604 | z->hi = ~z->hi + !( z->lo ); | |
605 | } | |
606 | } | |
607 | ||
608 | ||
609 | /*************************************************************************/ | |
610 | /* */ | |
611 | /* <Function> */ | |
612 | /* FT_Div64by32 */ | |
613 | /* */ | |
614 | /* <Description> */ | |
615 | /* Divides an Int64 value by an Int32 value. Returns an Int32 */ | |
616 | /* integer. */ | |
617 | /* */ | |
618 | /* <Input> */ | |
619 | /* x :: A pointer to the dividend. */ | |
620 | /* y :: The divisor. */ | |
621 | /* */ | |
622 | /* <Return> */ | |
623 | /* The result of `x / y'. */ | |
624 | /* */ | |
625 | /* <Note> */ | |
626 | /* Will be wrapped by the DIV_64() macro. */ | |
627 | /* */ | |
628 | FT_EXPORT_FUNC( FT_Int32 ) FT_Div64by32( FT_Int64* x, | |
629 | FT_Int32 y ) | |
630 | { | |
631 | FT_Int32 s; | |
632 | FT_UInt32 q, r, i, lo; | |
633 | ||
634 | ||
635 | s = x->hi; | |
636 | if ( s < 0 ) | |
637 | { | |
638 | x->lo = (FT_UInt32)-(FT_Int32)x->lo; | |
639 | x->hi = ~x->hi + !( x->lo ); | |
640 | } | |
641 | s ^= y; y = ABS( y ); | |
642 | ||
643 | /* Shortcut */ | |
644 | if ( x->hi == 0 ) | |
645 | { | |
646 | if ( y > 0 ) | |
647 | q = x->lo / y; | |
648 | else | |
649 | q = 0x7FFFFFFFL; | |
650 | ||
651 | return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); | |
652 | } | |
653 | ||
654 | r = x->hi; | |
655 | lo = x->lo; | |
656 | ||
657 | if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */ | |
658 | return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL ); | |
659 | /* Return Max/Min Int32 if division overflow. */ | |
660 | /* This includes division by zero! */ | |
661 | q = 0; | |
662 | for ( i = 0; i < 32; i++ ) | |
663 | { | |
664 | r <<= 1; | |
665 | q <<= 1; | |
666 | r |= lo >> 31; | |
667 | ||
668 | if ( r >= (FT_UInt32)y ) | |
669 | { | |
670 | r -= y; | |
671 | q |= 1; | |
672 | } | |
673 | lo <<= 1; | |
674 | } | |
675 | ||
676 | return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); | |
677 | } | |
678 | ||
679 | ||
680 | #ifdef FT_CONFIG_OPTION_OLD_CALCS | |
681 | ||
682 | ||
683 | /* two helper functions for FT_Sqrt64() */ | |
684 | ||
685 | static | |
686 | void FT_Sub64( FT_Int64* x, | |
687 | FT_Int64* y, | |
688 | FT_Int64* z ) | |
689 | { | |
690 | register FT_UInt32 lo, hi; | |
691 | ||
692 | ||
693 | lo = x->lo - y->lo; | |
694 | hi = x->hi - y->hi - ( (FT_Int32)lo < 0 ); | |
695 | ||
696 | z->lo = lo; | |
697 | z->hi = hi; | |
698 | } | |
699 | ||
700 | ||
701 | static | |
702 | int ft_order64( FT_Int64* z ) | |
703 | { | |
704 | FT_UInt32 i; | |
705 | int j; | |
706 | ||
707 | ||
708 | i = z->lo; | |
709 | j = 0; | |
710 | if ( z->hi ) | |
711 | { | |
712 | i = z->hi; | |
713 | j = 32; | |
714 | } | |
715 | ||
716 | while ( i > 0 ) | |
717 | { | |
718 | i >>= 1; | |
719 | j++; | |
720 | } | |
721 | return j - 1; | |
722 | } | |
723 | ||
724 | ||
725 | /*************************************************************************/ | |
726 | /* */ | |
727 | /* <Function> */ | |
728 | /* FT_Sqrt64 */ | |
729 | /* */ | |
730 | /* <Description> */ | |
731 | /* Computes the square root of a 64-bits value. That sounds stupid, */ | |
732 | /* but it is needed to obtain maximal accuracy in the TrueType */ | |
733 | /* bytecode interpreter. */ | |
734 | /* */ | |
735 | /* <Input> */ | |
736 | /* z :: A pointer to a 64-bit integer. */ | |
737 | /* */ | |
738 | /* <Return> */ | |
739 | /* The 32-bit square-root. */ | |
740 | /* */ | |
741 | FT_EXPORT_FUNC( FT_Int32 ) FT_Sqrt64( FT_Int64* l ) | |
742 | { | |
743 | FT_Int64 l2; | |
744 | FT_Int32 r, s; | |
745 | ||
746 | ||
747 | if ( (FT_Int32)l->hi < 0 || | |
748 | ( l->hi == 0 && l->lo == 0 ) ) | |
749 | return 0; | |
750 | ||
751 | s = ft_order64( l ); | |
752 | if ( s == 0 ) | |
753 | return 1; | |
754 | ||
755 | r = ft_square_roots[s]; | |
756 | do | |
757 | { | |
758 | s = r; | |
759 | r = ( r + FT_Div64by32( l, r ) ) >> 1; | |
760 | FT_MulTo64( r, r, &l2 ); | |
761 | FT_Sub64 ( l, &l2, &l2 ); | |
762 | ||
763 | } while ( r > s || (FT_Int32)l2.hi < 0 ); | |
764 | ||
765 | return r; | |
766 | } | |
767 | ||
768 | #endif /* FT_CONFIG_OPTION_OLD_CALCS */ | |
769 | ||
770 | #endif /* FT_LONG64 */ | |
771 | ||
772 | ||
773 | /* END */ |