]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/double-conversion-strtod.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / double-conversion-strtod.cpp
CommitLineData
0f5d89e8
A
1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//
4// From the double-conversion library. Original license:
5//
6// Copyright 2010 the V8 project authors. All rights reserved.
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
10//
11// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following
15// disclaimer in the documentation and/or other materials provided
16// with the distribution.
17// * Neither the name of Google Inc. nor the names of its
18// contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING
34#include "unicode/utypes.h"
35#if !UCONFIG_NO_FORMATTING
36
37#include <stdarg.h>
38#include <limits.h>
39
40// ICU PATCH: Customize header file paths for ICU.
41// The file fixed-dtoa.h is not needed.
42
43#include "double-conversion-strtod.h"
44#include "double-conversion-bignum.h"
45#include "double-conversion-cached-powers.h"
46#include "double-conversion-ieee.h"
47
48// ICU PATCH: Wrap in ICU namespace
49U_NAMESPACE_BEGIN
50
51namespace double_conversion {
52
53// 2^53 = 9007199254740992.
54// Any integer with at most 15 decimal digits will hence fit into a double
55// (which has a 53bit significand) without loss of precision.
56static const int kMaxExactDoubleIntegerDecimalDigits = 15;
57// 2^64 = 18446744073709551616 > 10^19
58static const int kMaxUint64DecimalDigits = 19;
59
60// Max double: 1.7976931348623157 x 10^308
61// Min non-zero double: 4.9406564584124654 x 10^-324
62// Any x >= 10^309 is interpreted as +infinity.
63// Any x <= 10^-324 is interpreted as 0.
64// Note that 2.5e-324 (despite being smaller than the min double) will be read
65// as non-zero (equal to the min non-zero double).
66static const int kMaxDecimalPower = 309;
67static const int kMinDecimalPower = -324;
68
69// 2^64 = 18446744073709551616
70static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
71
72
73static const double exact_powers_of_ten[] = {
74 1.0, // 10^0
75 10.0,
76 100.0,
77 1000.0,
78 10000.0,
79 100000.0,
80 1000000.0,
81 10000000.0,
82 100000000.0,
83 1000000000.0,
84 10000000000.0, // 10^10
85 100000000000.0,
86 1000000000000.0,
87 10000000000000.0,
88 100000000000000.0,
89 1000000000000000.0,
90 10000000000000000.0,
91 100000000000000000.0,
92 1000000000000000000.0,
93 10000000000000000000.0,
94 100000000000000000000.0, // 10^20
95 1000000000000000000000.0,
96 // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
97 10000000000000000000000.0
98};
99static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
100
101// Maximum number of significant digits in the decimal representation.
102// In fact the value is 772 (see conversions.cc), but to give us some margin
103// we round up to 780.
104static const int kMaxSignificantDecimalDigits = 780;
105
106static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
107 for (int i = 0; i < buffer.length(); i++) {
108 if (buffer[i] != '0') {
109 return buffer.SubVector(i, buffer.length());
110 }
111 }
112 return Vector<const char>(buffer.start(), 0);
113}
114
115
116static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
117 for (int i = buffer.length() - 1; i >= 0; --i) {
118 if (buffer[i] != '0') {
119 return buffer.SubVector(0, i + 1);
120 }
121 }
122 return Vector<const char>(buffer.start(), 0);
123}
124
125
126static void CutToMaxSignificantDigits(Vector<const char> buffer,
127 int exponent,
128 char* significant_buffer,
129 int* significant_exponent) {
130 for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
131 significant_buffer[i] = buffer[i];
132 }
133 // The input buffer has been trimmed. Therefore the last digit must be
134 // different from '0'.
135 ASSERT(buffer[buffer.length() - 1] != '0');
136 // Set the last digit to be non-zero. This is sufficient to guarantee
137 // correct rounding.
138 significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
139 *significant_exponent =
140 exponent + (buffer.length() - kMaxSignificantDecimalDigits);
141}
142
143
144// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
145// If possible the input-buffer is reused, but if the buffer needs to be
146// modified (due to cutting), then the input needs to be copied into the
147// buffer_copy_space.
148static void TrimAndCut(Vector<const char> buffer, int exponent,
149 char* buffer_copy_space, int space_size,
150 Vector<const char>* trimmed, int* updated_exponent) {
151 Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
152 Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
153 exponent += left_trimmed.length() - right_trimmed.length();
154 if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
155 (void) space_size; // Mark variable as used.
156 ASSERT(space_size >= kMaxSignificantDecimalDigits);
157 CutToMaxSignificantDigits(right_trimmed, exponent,
158 buffer_copy_space, updated_exponent);
159 *trimmed = Vector<const char>(buffer_copy_space,
160 kMaxSignificantDecimalDigits);
161 } else {
162 *trimmed = right_trimmed;
163 *updated_exponent = exponent;
164 }
165}
166
167
168// Reads digits from the buffer and converts them to a uint64.
169// Reads in as many digits as fit into a uint64.
170// When the string starts with "1844674407370955161" no further digit is read.
171// Since 2^64 = 18446744073709551616 it would still be possible read another
172// digit if it was less or equal than 6, but this would complicate the code.
173static uint64_t ReadUint64(Vector<const char> buffer,
174 int* number_of_read_digits) {
175 uint64_t result = 0;
176 int i = 0;
177 while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
178 int digit = buffer[i++] - '0';
179 ASSERT(0 <= digit && digit <= 9);
180 result = 10 * result + digit;
181 }
182 *number_of_read_digits = i;
183 return result;
184}
185
186
187// Reads a DiyFp from the buffer.
188// The returned DiyFp is not necessarily normalized.
189// If remaining_decimals is zero then the returned DiyFp is accurate.
190// Otherwise it has been rounded and has error of at most 1/2 ulp.
191static void ReadDiyFp(Vector<const char> buffer,
192 DiyFp* result,
193 int* remaining_decimals) {
194 int read_digits;
195 uint64_t significand = ReadUint64(buffer, &read_digits);
196 if (buffer.length() == read_digits) {
197 *result = DiyFp(significand, 0);
198 *remaining_decimals = 0;
199 } else {
200 // Round the significand.
201 if (buffer[read_digits] >= '5') {
202 significand++;
203 }
204 // Compute the binary exponent.
205 int exponent = 0;
206 *result = DiyFp(significand, exponent);
207 *remaining_decimals = buffer.length() - read_digits;
208 }
209}
210
211
212static bool DoubleStrtod(Vector<const char> trimmed,
213 int exponent,
214 double* result) {
215#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
216 // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
217 // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
218 // result is not accurate.
219 // We know that Windows32 uses 64 bits and is therefore accurate.
220 // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
221 // the same problem.
222 return false;
223#endif
224 if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
225 int read_digits;
226 // The trimmed input fits into a double.
227 // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
228 // can compute the result-double simply by multiplying (resp. dividing) the
229 // two numbers.
230 // This is possible because IEEE guarantees that floating-point operations
231 // return the best possible approximation.
232 if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
233 // 10^-exponent fits into a double.
234 *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
235 ASSERT(read_digits == trimmed.length());
236 *result /= exact_powers_of_ten[-exponent];
237 return true;
238 }
239 if (0 <= exponent && exponent < kExactPowersOfTenSize) {
240 // 10^exponent fits into a double.
241 *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
242 ASSERT(read_digits == trimmed.length());
243 *result *= exact_powers_of_ten[exponent];
244 return true;
245 }
246 int remaining_digits =
247 kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
248 if ((0 <= exponent) &&
249 (exponent - remaining_digits < kExactPowersOfTenSize)) {
250 // The trimmed string was short and we can multiply it with
251 // 10^remaining_digits. As a result the remaining exponent now fits
252 // into a double too.
253 *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
254 ASSERT(read_digits == trimmed.length());
255 *result *= exact_powers_of_ten[remaining_digits];
256 *result *= exact_powers_of_ten[exponent - remaining_digits];
257 return true;
258 }
259 }
260 return false;
261}
262
263
264// Returns 10^exponent as an exact DiyFp.
265// The given exponent must be in the range [1; kDecimalExponentDistance[.
266static DiyFp AdjustmentPowerOfTen(int exponent) {
267 ASSERT(0 < exponent);
268 ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
269 // Simply hardcode the remaining powers for the given decimal exponent
270 // distance.
271 ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
272 switch (exponent) {
273 case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
274 case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
275 case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
276 case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
277 case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
278 case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
279 case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
280 default:
281 UNREACHABLE();
282 }
283}
284
285
286// If the function returns true then the result is the correct double.
287// Otherwise it is either the correct double or the double that is just below
288// the correct double.
289static bool DiyFpStrtod(Vector<const char> buffer,
290 int exponent,
291 double* result) {
292 DiyFp input;
293 int remaining_decimals;
294 ReadDiyFp(buffer, &input, &remaining_decimals);
295 // Since we may have dropped some digits the input is not accurate.
296 // If remaining_decimals is different than 0 than the error is at most
297 // .5 ulp (unit in the last place).
298 // We don't want to deal with fractions and therefore keep a common
299 // denominator.
300 const int kDenominatorLog = 3;
301 const int kDenominator = 1 << kDenominatorLog;
302 // Move the remaining decimals into the exponent.
303 exponent += remaining_decimals;
304 uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
305
306 int old_e = input.e();
307 input.Normalize();
308 error <<= old_e - input.e();
309
310 ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
311 if (exponent < PowersOfTenCache::kMinDecimalExponent) {
312 *result = 0.0;
313 return true;
314 }
315 DiyFp cached_power;
316 int cached_decimal_exponent;
317 PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
318 &cached_power,
319 &cached_decimal_exponent);
320
321 if (cached_decimal_exponent != exponent) {
322 int adjustment_exponent = exponent - cached_decimal_exponent;
323 DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
324 input.Multiply(adjustment_power);
325 if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
326 // The product of input with the adjustment power fits into a 64 bit
327 // integer.
328 ASSERT(DiyFp::kSignificandSize == 64);
329 } else {
330 // The adjustment power is exact. There is hence only an error of 0.5.
331 error += kDenominator / 2;
332 }
333 }
334
335 input.Multiply(cached_power);
336 // The error introduced by a multiplication of a*b equals
337 // error_a + error_b + error_a*error_b/2^64 + 0.5
338 // Substituting a with 'input' and b with 'cached_power' we have
339 // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
340 // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
341 int error_b = kDenominator / 2;
342 int error_ab = (error == 0 ? 0 : 1); // We round up to 1.
343 int fixed_error = kDenominator / 2;
344 error += error_b + error_ab + fixed_error;
345
346 old_e = input.e();
347 input.Normalize();
348 error <<= old_e - input.e();
349
350 // See if the double's significand changes if we add/subtract the error.
351 int order_of_magnitude = DiyFp::kSignificandSize + input.e();
352 int effective_significand_size =
353 Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
354 int precision_digits_count =
355 DiyFp::kSignificandSize - effective_significand_size;
356 if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
357 // This can only happen for very small denormals. In this case the
358 // half-way multiplied by the denominator exceeds the range of an uint64.
359 // Simply shift everything to the right.
360 int shift_amount = (precision_digits_count + kDenominatorLog) -
361 DiyFp::kSignificandSize + 1;
362 input.set_f(input.f() >> shift_amount);
363 input.set_e(input.e() + shift_amount);
364 // We add 1 for the lost precision of error, and kDenominator for
365 // the lost precision of input.f().
366 error = (error >> shift_amount) + 1 + kDenominator;
367 precision_digits_count -= shift_amount;
368 }
369 // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
370 ASSERT(DiyFp::kSignificandSize == 64);
371 ASSERT(precision_digits_count < 64);
372 uint64_t one64 = 1;
373 uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
374 uint64_t precision_bits = input.f() & precision_bits_mask;
375 uint64_t half_way = one64 << (precision_digits_count - 1);
376 precision_bits *= kDenominator;
377 half_way *= kDenominator;
378 DiyFp rounded_input(input.f() >> precision_digits_count,
379 input.e() + precision_digits_count);
380 if (precision_bits >= half_way + error) {
381 rounded_input.set_f(rounded_input.f() + 1);
382 }
383 // If the last_bits are too close to the half-way case than we are too
384 // inaccurate and round down. In this case we return false so that we can
385 // fall back to a more precise algorithm.
386
387 *result = Double(rounded_input).value();
388 if (half_way - error < precision_bits && precision_bits < half_way + error) {
389 // Too imprecise. The caller will have to fall back to a slower version.
390 // However the returned number is guaranteed to be either the correct
391 // double, or the next-lower double.
392 return false;
393 } else {
394 return true;
395 }
396}
397
398
399// Returns
400// - -1 if buffer*10^exponent < diy_fp.
401// - 0 if buffer*10^exponent == diy_fp.
402// - +1 if buffer*10^exponent > diy_fp.
403// Preconditions:
404// buffer.length() + exponent <= kMaxDecimalPower + 1
405// buffer.length() + exponent > kMinDecimalPower
406// buffer.length() <= kMaxDecimalSignificantDigits
407static int CompareBufferWithDiyFp(Vector<const char> buffer,
408 int exponent,
409 DiyFp diy_fp) {
410 ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
411 ASSERT(buffer.length() + exponent > kMinDecimalPower);
412 ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
413 // Make sure that the Bignum will be able to hold all our numbers.
414 // Our Bignum implementation has a separate field for exponents. Shifts will
415 // consume at most one bigit (< 64 bits).
416 // ln(10) == 3.3219...
417 ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
418 Bignum buffer_bignum;
419 Bignum diy_fp_bignum;
420 buffer_bignum.AssignDecimalString(buffer);
421 diy_fp_bignum.AssignUInt64(diy_fp.f());
422 if (exponent >= 0) {
423 buffer_bignum.MultiplyByPowerOfTen(exponent);
424 } else {
425 diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
426 }
427 if (diy_fp.e() > 0) {
428 diy_fp_bignum.ShiftLeft(diy_fp.e());
429 } else {
430 buffer_bignum.ShiftLeft(-diy_fp.e());
431 }
432 return Bignum::Compare(buffer_bignum, diy_fp_bignum);
433}
434
435
436// Returns true if the guess is the correct double.
437// Returns false, when guess is either correct or the next-lower double.
438static bool ComputeGuess(Vector<const char> trimmed, int exponent,
439 double* guess) {
440 if (trimmed.length() == 0) {
441 *guess = 0.0;
442 return true;
443 }
444 if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
445 *guess = Double::Infinity();
446 return true;
447 }
448 if (exponent + trimmed.length() <= kMinDecimalPower) {
449 *guess = 0.0;
450 return true;
451 }
452
453 if (DoubleStrtod(trimmed, exponent, guess) ||
454 DiyFpStrtod(trimmed, exponent, guess)) {
455 return true;
456 }
457 if (*guess == Double::Infinity()) {
458 return true;
459 }
460 return false;
461}
462
463double Strtod(Vector<const char> buffer, int exponent) {
464 char copy_buffer[kMaxSignificantDecimalDigits];
465 Vector<const char> trimmed;
466 int updated_exponent;
467 TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
468 &trimmed, &updated_exponent);
469 exponent = updated_exponent;
470
471 double guess;
472 bool is_correct = ComputeGuess(trimmed, exponent, &guess);
473 if (is_correct) return guess;
474
475 DiyFp upper_boundary = Double(guess).UpperBoundary();
476 int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
477 if (comparison < 0) {
478 return guess;
479 } else if (comparison > 0) {
480 return Double(guess).NextDouble();
481 } else if ((Double(guess).Significand() & 1) == 0) {
482 // Round towards even.
483 return guess;
484 } else {
485 return Double(guess).NextDouble();
486 }
487}
488
489float Strtof(Vector<const char> buffer, int exponent) {
490 char copy_buffer[kMaxSignificantDecimalDigits];
491 Vector<const char> trimmed;
492 int updated_exponent;
493 TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
494 &trimmed, &updated_exponent);
495 exponent = updated_exponent;
496
497 double double_guess;
498 bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
499
500 float float_guess = static_cast<float>(double_guess);
501 if (float_guess == double_guess) {
502 // This shortcut triggers for integer values.
503 return float_guess;
504 }
505
506 // We must catch double-rounding. Say the double has been rounded up, and is
507 // now a boundary of a float, and rounds up again. This is why we have to
508 // look at previous too.
509 // Example (in decimal numbers):
510 // input: 12349
511 // high-precision (4 digits): 1235
512 // low-precision (3 digits):
513 // when read from input: 123
514 // when rounded from high precision: 124.
515 // To do this we simply look at the neigbors of the correct result and see
516 // if they would round to the same float. If the guess is not correct we have
517 // to look at four values (since two different doubles could be the correct
518 // double).
519
520 double double_next = Double(double_guess).NextDouble();
521 double double_previous = Double(double_guess).PreviousDouble();
522
523 float f1 = static_cast<float>(double_previous);
524 float f2 = float_guess;
525 float f3 = static_cast<float>(double_next);
526 float f4;
527 if (is_correct) {
528 f4 = f3;
529 } else {
530 double double_next2 = Double(double_next).NextDouble();
531 f4 = static_cast<float>(double_next2);
532 }
533 (void) f2; // Mark variable as used.
534 ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
535
536 // If the guess doesn't lie near a single-precision boundary we can simply
537 // return its float-value.
538 if (f1 == f4) {
539 return float_guess;
540 }
541
542 ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
543 (f1 == f2 && f2 != f3 && f3 == f4) ||
544 (f1 == f2 && f2 == f3 && f3 != f4));
545
546 // guess and next are the two possible canditates (in the same way that
547 // double_guess was the lower candidate for a double-precision guess).
548 float guess = f1;
549 float next = f4;
550 DiyFp upper_boundary;
551 if (guess == 0.0f) {
552 float min_float = 1e-45f;
553 upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
554 } else {
555 upper_boundary = Single(guess).UpperBoundary();
556 }
557 int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
558 if (comparison < 0) {
559 return guess;
560 } else if (comparison > 0) {
561 return next;
562 } else if ((Single(guess).Significand() & 1) == 0) {
563 // Round towards even.
564 return guess;
565 } else {
566 return next;
567 }
568}
569
570} // namespace double_conversion
571
572// ICU PATCH: Close ICU namespace
573U_NAMESPACE_END
574#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING