OpenLexocad  27.1
dtoa.h
Go to the documentation of this file.
1 #pragma once
2 #include <assert.h>
3 #include <math.h>
4 
5 #if defined(_MSC_VER)
6 #include <intrin.h>
7 #else
8 #include <stdint.h>
9 #endif
10 
11 #define UINT64_C2(h, l) ((static_cast<uint64_t>(h) << 32) | static_cast<uint64_t>(l))
12 
13 struct DiyFp
14 {
15  DiyFp() {}
16 
17  DiyFp(uint64_t f, int e) : f(f), e(e) {}
18 
19  DiyFp(double d)
20  {
21  union {
22  double d;
23  uint64_t u64;
24  } u = {d};
25 
26  int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
27  uint64_t significand = (u.u64 & kDpSignificandMask);
28  if (biased_e != 0)
29  {
30  f = significand + kDpHiddenBit;
31  e = biased_e - kDpExponentBias;
32  }
33  else
34  {
35  f = significand;
36  e = kDpMinExponent + 1;
37  }
38  }
39 
40  DiyFp operator-(const DiyFp& rhs) const
41  {
42  assert(e == rhs.e);
43  assert(f >= rhs.f);
44  return DiyFp(f - rhs.f, e);
45  }
46 
47  DiyFp operator*(const DiyFp& rhs) const
48  {
49 #if defined(_MSC_VER) && defined(_M_AMD64)
50  uint64_t h;
51  uint64_t l = _umul128(f, rhs.f, &h);
52  if (l & (uint64_t(1) << 63)) // rounding
53  h++;
54  return DiyFp(h, e + rhs.e + 64);
55 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
56  unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
57  uint64_t h = p >> 64;
58  uint64_t l = static_cast<uint64_t>(p);
59  if (l & (uint64_t(1) << 63)) // rounding
60  h++;
61  return DiyFp(h, e + rhs.e + 64);
62 #else
63  const uint64_t M32 = 0xFFFFFFFF;
64  const uint64_t a = f >> 32;
65  const uint64_t b = f & M32;
66  const uint64_t c = rhs.f >> 32;
67  const uint64_t d = rhs.f & M32;
68  const uint64_t ac = a * c;
69  const uint64_t bc = b * c;
70  const uint64_t ad = a * d;
71  const uint64_t bd = b * d;
72  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
73  tmp += 1U << 31;
74  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
75 #endif
76  }
77 
78  DiyFp Normalize() const
79  {
80 #if defined(_MSC_VER) && defined(_M_AMD64)
81  unsigned long index;
82  _BitScanReverse64(&index, f);
83  return DiyFp(f << (63 - index), e - (63 - index));
84 #elif defined(__GNUC__)
85  int s = __builtin_clzll(f);
86  return DiyFp(f << s, e - s);
87 #else
88  DiyFp res = *this;
89  while (!(res.f & kDpHiddenBit))
90  {
91  res.f <<= 1;
92  res.e--;
93  }
95  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
96  return res;
97 #endif
98  }
99 
101  {
102 #if defined(_MSC_VER) && defined(_M_AMD64)
103  unsigned long index;
104  _BitScanReverse64(&index, f);
105  return DiyFp(f << (63 - index), e - (63 - index));
106 #else
107  DiyFp res = *this;
108  while (!(res.f & (kDpHiddenBit << 1)))
109  {
110  res.f <<= 1;
111  res.e--;
112  }
113  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
114  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
115  return res;
116 #endif
117  }
118 
119  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const
120  {
121  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
122  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
123  mi.f <<= mi.e - pl.e;
124  mi.e = pl.e;
125  *plus = pl;
126  *minus = mi;
127  }
128 
129  static const int kDiySignificandSize = 64;
130  static const int kDpSignificandSize = 52;
131  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
132  static const int kDpMinExponent = -kDpExponentBias;
133  static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000);
134  static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
135  static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000);
136 
137  uint64_t f;
138  int e;
139 };
140 
141 inline DiyFp GetCachedPower(int e, int* K)
142 {
143  // 10^-348, 10^-340, ..., 10^340
144  static const uint64_t kCachedPowers_F[] = {
145  UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76), UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea),
146  UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df), UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f),
147  UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c), UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5),
148  UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d), UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637),
149  UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7), UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5),
150  UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b), UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996),
151  UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6), UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8),
152  UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053), UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd),
153  UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94), UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b),
154  UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac), UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3),
155  UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb), UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c),
156  UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000), UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984),
157  UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70), UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245),
158  UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8), UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a),
159  UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea), UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85),
160  UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2), UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3),
161  UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25), UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece),
162  UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5), UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a),
163  UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c), UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a),
164  UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129), UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429),
165  UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d), UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841),
166  UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9), UINT64_C2(0xaf87023b, 0x9bf0ee6b)};
167  static const int16_t kCachedPowers_E[] = {
168  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661,
169  -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77,
170  -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508,
171  534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
172 
173  // int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
174  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
175  int k = static_cast<int>(dk);
176  if (k != dk)
177  k++;
178 
179  unsigned index = static_cast<unsigned>((k >> 3) + 1);
180  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
181 
182  assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0]));
183  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
184 }
185 
186 inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
187 {
188  while (rest < wp_w && delta - rest >= ten_kappa &&
189  (rest + ten_kappa < wp_w ||
190  wp_w - rest > rest + ten_kappa - wp_w))
191  {
192  buffer[len - 1]--;
193  rest += ten_kappa;
194  }
195 }
196 
197 inline unsigned CountDecimalDigit32(uint32_t n)
198 {
199  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
200  if (n < 10)
201  return 1;
202  if (n < 100)
203  return 2;
204  if (n < 1000)
205  return 3;
206  if (n < 10000)
207  return 4;
208  if (n < 100000)
209  return 5;
210  if (n < 1000000)
211  return 6;
212  if (n < 10000000)
213  return 7;
214  if (n < 100000000)
215  return 8;
216  if (n < 1000000000)
217  return 9;
218  return 10;
219 }
220 
221 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K)
222 {
223  static const uint32_t kPow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
224  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
225  const DiyFp wp_w = Mp - W;
226  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
227  uint64_t p2 = Mp.f & (one.f - 1);
228  int kappa = static_cast<int>(CountDecimalDigit32(p1));
229  *len = 0;
230 
231  while (kappa > 0)
232  {
233  uint32_t d;
234  switch (kappa)
235  {
236  case 10:
237  d = p1 / 1000000000;
238  p1 %= 1000000000;
239  break;
240  case 9:
241  d = p1 / 100000000;
242  p1 %= 100000000;
243  break;
244  case 8:
245  d = p1 / 10000000;
246  p1 %= 10000000;
247  break;
248  case 7:
249  d = p1 / 1000000;
250  p1 %= 1000000;
251  break;
252  case 6:
253  d = p1 / 100000;
254  p1 %= 100000;
255  break;
256  case 5:
257  d = p1 / 10000;
258  p1 %= 10000;
259  break;
260  case 4:
261  d = p1 / 1000;
262  p1 %= 1000;
263  break;
264  case 3:
265  d = p1 / 100;
266  p1 %= 100;
267  break;
268  case 2:
269  d = p1 / 10;
270  p1 %= 10;
271  break;
272  case 1:
273  d = p1;
274  p1 = 0;
275  break;
276  default:
277 #if defined(_MSC_VER)
278  __assume(0);
279 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
280  __builtin_unreachable();
281 #else
282  d = 0;
283 #endif
284  }
285  if (d || *len)
286  buffer[(*len)++] = '0' + static_cast<char>(d);
287  kappa--;
288  uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
289  if (tmp <= delta)
290  {
291  *K += kappa;
292  GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
293  return;
294  }
295  }
296 
297  // kappa = 0
298  for (;;)
299  {
300  p2 *= 10;
301  delta *= 10;
302  char d = static_cast<char>(p2 >> -one.e);
303  if (d || *len)
304  buffer[(*len)++] = '0' + d;
305  p2 &= one.f - 1;
306  kappa--;
307  if (p2 < delta)
308  {
309  *K += kappa;
310  GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
311  return;
312  }
313  }
314 }
315 
316 inline void Grisu2(double value, char* buffer, int* length, int* K)
317 {
318  const DiyFp v(value);
319  DiyFp w_m, w_p;
320  v.NormalizedBoundaries(&w_m, &w_p);
321 
322  const DiyFp c_mk = GetCachedPower(w_p.e, K);
323  const DiyFp W = v.Normalize() * c_mk;
324  DiyFp Wp = w_p * c_mk;
325  DiyFp Wm = w_m * c_mk;
326  Wm.f++;
327  Wp.f--;
328  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
329 }
330 
331 inline const char* GetDigitsLut()
332 {
333  static const char cDigitsLut[200] = {
334  '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1',
335  '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4',
336  '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3',
337  '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
338  '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6',
339  '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
340  '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8',
341  '7', '8', '8', '8', '9', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'};
342  return cDigitsLut;
343 }
344 
345 inline void WriteExponent(int K, char* buffer)
346 {
347  if (K < 0)
348  {
349  *buffer++ = '-';
350  K = -K;
351  }
352 
353  if (K >= 100)
354  {
355  *buffer++ = '0' + static_cast<char>(K / 100);
356  K %= 100;
357  const char* d = GetDigitsLut() + K * 2;
358  *buffer++ = d[0];
359  *buffer++ = d[1];
360  }
361  else if (K >= 10)
362  {
363  const char* d = GetDigitsLut() + K * 2;
364  *buffer++ = d[0];
365  *buffer++ = d[1];
366  }
367  else
368  *buffer++ = '0' + static_cast<char>(K);
369 
370  *buffer = '\0';
371 }
372 
373 inline void Prettify(char* buffer, int length, int k)
374 {
375  const int kk = length + k; // 10^(kk-1) <= v < 10^kk
376 
377  if (length <= kk && kk <= 21)
378  {
379  // 1234e7 -> 12340000000
380  for (int i = length; i < kk; i++)
381  buffer[i] = '0';
382  buffer[kk] = '.';
383  buffer[kk + 1] = '0';
384  buffer[kk + 2] = '\0';
385  }
386  else if (0 < kk && kk <= 21)
387  {
388  // 1234e-2 -> 12.34
389  memmove(&buffer[kk + 1], &buffer[kk], length - kk);
390  buffer[kk] = '.';
391  buffer[length + 1] = '\0';
392  }
393  else if (-6 < kk && kk <= 0)
394  {
395  // 1234e-6 -> 0.001234
396  const int offset = 2 - kk;
397  memmove(&buffer[offset], &buffer[0], length);
398  buffer[0] = '0';
399  buffer[1] = '.';
400  for (int i = 2; i < offset; i++)
401  buffer[i] = '0';
402  buffer[length + offset] = '\0';
403  }
404  else if (length == 1)
405  {
406  // 1e30
407  buffer[1] = 'e';
408  WriteExponent(kk - 1, &buffer[2]);
409  }
410  else
411  {
412  // 1234e30 -> 1.234e33
413  memmove(&buffer[2], &buffer[1], length - 1);
414  buffer[1] = '.';
415  buffer[length + 1] = 'e';
416  WriteExponent(kk - 1, &buffer[0 + length + 2]);
417  }
418 }
419 
420 inline void dtoa_milo(double value, char* buffer)
421 {
422  // Not handling NaN and inf
423  assert(!isnan(value));
424  assert(!isinf(value));
425 
426  if (value == 0)
427  {
428  buffer[0] = '0';
429  buffer[1] = '.';
430  buffer[2] = '0';
431  buffer[3] = '\0';
432  }
433  else
434  {
435  if (value < 0)
436  {
437  *buffer++ = '-';
438  value = -value;
439  }
440  int length, K;
441  Grisu2(value, buffer, &length, &K);
442  Prettify(buffer, length, K);
443  }
444 }
static const uint64_t kDpHiddenBit
Definition: dtoa.h:135
static const int kDiySignificandSize
Definition: dtoa.h:129
DiyFp operator-(const DiyFp &rhs) const
Definition: dtoa.h:40
static const uint64_t kDpExponentMask
Definition: dtoa.h:133
int e
Definition: dtoa.h:138
void DigitGen(const DiyFp &W, const DiyFp &Mp, uint64_t delta, char *buffer, int *len, int *K)
Definition: dtoa.h:221
static const int kDpSignificandSize
Definition: dtoa.h:130
DiyFp Normalize() const
Definition: dtoa.h:78
void dtoa_milo(double value, char *buffer)
Definition: dtoa.h:420
Definition: dtoa.h:13
void Grisu2(double value, char *buffer, int *length, int *K)
Definition: dtoa.h:316
void GrisuRound(char *buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
Definition: dtoa.h:186
DiyFp(uint64_t f, int e)
Definition: dtoa.h:17
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: dtoa.h:119
static const uint64_t kDpSignificandMask
Definition: dtoa.h:134
#define UINT64_C2(h, l)
Definition: dtoa.h:11
static const int kDpExponentBias
Definition: dtoa.h:131
DiyFp()
Definition: dtoa.h:15
unsigned CountDecimalDigit32(uint32_t n)
Definition: dtoa.h:197
DiyFp operator *(const DiyFp &rhs) const
Definition: dtoa.h:47
void Prettify(char *buffer, int length, int k)
Definition: dtoa.h:373
void WriteExponent(int K, char *buffer)
Definition: dtoa.h:345
uint64_t f
Definition: dtoa.h:137
const char * GetDigitsLut()
Definition: dtoa.h:331
DiyFp(double d)
Definition: dtoa.h:19
static const int kDpMinExponent
Definition: dtoa.h:132
DiyFp GetCachedPower(int e, int *K)
Definition: dtoa.h:141
DiyFp NormalizeBoundary() const
Definition: dtoa.h:100