Replace strtod with a ragel implementation

Use a ragel based number parser when strtod_l couldn't be found
as libc's strtod may is locale sensetivity which we need to not.
This commit is contained in:
Ebrahim Byagowi 2019-09-06 20:33:10 +04:30
parent 80613e5b9e
commit 47d82713a1
5 changed files with 379 additions and 1 deletions

View File

@ -169,6 +169,7 @@ HB_BASE_sources = \
HB_BASE_RAGEL_GENERATED_sources = \
hb-buffer-deserialize-json.hh \
hb-buffer-deserialize-text.hh \
hb-number-parser.hh \
hb-ot-shape-complex-indic-machine.hh \
hb-ot-shape-complex-khmer-machine.hh \
hb-ot-shape-complex-myanmar-machine.hh \
@ -177,6 +178,7 @@ HB_BASE_RAGEL_GENERATED_sources = \
HB_BASE_RAGEL_sources = \
hb-buffer-deserialize-json.rl \
hb-buffer-deserialize-text.rl \
hb-number-parser.rl \
hb-ot-shape-complex-indic-machine.rl \
hb-ot-shape-complex-khmer-machine.rl \
hb-ot-shape-complex-myanmar-machine.rl \

223
src/hb-number-parser.hh Normal file
View File

@ -0,0 +1,223 @@
#line 1 "hb-number-parser.rl"
/*
* Copyright © 2019 Ebrahim Byagowi
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
*/
#ifndef HB_NUMBER_PARSER_HH
#define HB_NUMBER_PARSER_HH
#include "hb.hh"
#line 35 "hb-number-parser.hh"
static const unsigned char _double_parser_trans_keys[] = {
0u, 0u, 43u, 57u, 46u, 57u, 48u, 57u, 43u, 57u, 48u, 57u, 48u, 101u, 48u, 57u,
46u, 101u, 0
};
static const char _double_parser_key_spans[] = {
0, 15, 12, 10, 15, 10, 54, 10,
56
};
static const unsigned char _double_parser_index_offsets[] = {
0, 0, 16, 29, 40, 56, 67, 122,
133
};
static const char _double_parser_indicies[] = {
0, 1, 2, 3, 1, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
1, 3, 1, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 1, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5,
1, 6, 1, 7, 1, 1, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
1, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 1, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 9, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 9, 1, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 1, 3, 1,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 9, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 9, 1, 0
};
static const char _double_parser_trans_targs[] = {
2, 0, 2, 3, 8, 6, 5, 5,
7, 4
};
static const char _double_parser_trans_actions[] = {
0, 0, 1, 0, 2, 3, 0, 4,
5, 0
};
static const int double_parser_start = 1;
static const int double_parser_first_final = 6;
static const int double_parser_error = 0;
static const int double_parser_en_main = 1;
#line 55 "hb-number-parser.rl"
constexpr double _pow2 (double x) { return x * x; }
constexpr double _pow10_of_2i (unsigned int n)
{ return n == 1 ? 10. : _pow2 (_pow10_of_2i (n >> 1)); }
static const double _powers_of_10[] =
{
_pow10_of_2i (0x100),
_pow10_of_2i (0x80),
_pow10_of_2i (0x40),
_pow10_of_2i (0x20),
_pow10_of_2i (0x10),
_pow10_of_2i (0x8),
_pow10_of_2i (0x4),
_pow10_of_2i (0x2),
_pow10_of_2i (0x1),
};
/* Works only for n < 512 */
inline double
_pow10 (unsigned int exponent)
{
unsigned int mask = 0x100; /* Should be same with the first element */
double result = 1;
for (const double *power = _powers_of_10; mask; ++power, mask >>= 1)
if (exponent & mask) result *= *power;
return result;
}
inline double
strtod_rl (const char *buf, char **end_ptr)
{
const char *p, *pe;
double value = 0;
double frac = 0;
double frac_count = 0;
unsigned int exp = 0;
bool neg = false, exp_neg = false;
p = buf;
pe = p + strlen (p);
while (p < pe && ISSPACE (*p))
p++;
int cs;
#line 143 "hb-number-parser.hh"
{
cs = double_parser_start;
}
#line 148 "hb-number-parser.hh"
{
int _slen;
int _trans;
const unsigned char *_keys;
const char *_inds;
if ( p == pe )
goto _test_eof;
if ( cs == 0 )
goto _out;
_resume:
_keys = _double_parser_trans_keys + (cs<<1);
_inds = _double_parser_indicies + _double_parser_index_offsets[cs];
_slen = _double_parser_key_spans[cs];
_trans = _inds[ _slen > 0 && _keys[0] <=(*p) &&
(*p) <= _keys[1] ?
(*p) - _keys[0] : _slen ];
cs = _double_parser_trans_targs[_trans];
if ( _double_parser_trans_actions[_trans] == 0 )
goto _again;
switch ( _double_parser_trans_actions[_trans] ) {
case 1:
#line 37 "hb-number-parser.rl"
{ neg = true; }
break;
case 4:
#line 38 "hb-number-parser.rl"
{ exp_neg = true; }
break;
case 2:
#line 40 "hb-number-parser.rl"
{ value = value * 10. + ((*p) - '0'); }
break;
case 3:
#line 41 "hb-number-parser.rl"
{ frac = frac * 10. + ((*p) - '0'); ++frac_count; }
break;
case 5:
#line 42 "hb-number-parser.rl"
{ exp = exp * 10 + ((*p) - '0'); }
break;
#line 193 "hb-number-parser.hh"
}
_again:
if ( cs == 0 )
goto _out;
if ( ++p != pe )
goto _resume;
_test_eof: {}
_out: {}
}
#line 104 "hb-number-parser.rl"
*end_ptr = (char *) p;
if (frac_count) value += frac / _pow10 (frac_count);
if (neg) value *= -1.;
if (exp)
{
if (exp_neg)
value /= _pow10 (exp);
else
value *= _pow10 (exp);
}
return value;
}
#endif /* HB_NUMBER_PARSER_HH */

122
src/hb-number-parser.rl Normal file
View File

@ -0,0 +1,122 @@
/*
* Copyright © 2019 Ebrahim Byagowi
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
*/
#ifndef HB_NUMBER_PARSER_HH
#define HB_NUMBER_PARSER_HH
#include "hb.hh"
%%{
machine double_parser;
alphtype unsigned char;
write data;
action see_neg { neg = true; }
action see_exp_neg { exp_neg = true; }
action add_int { value = value * 10. + (fc - '0'); }
action add_frac { frac = frac * 10. + (fc - '0'); ++frac_count; }
action add_exp { exp = exp * 10 + (fc - '0'); }
num = [0-9]+;
main := (
(
(('+'|'-'@see_neg)? num @add_int) ('.' num @add_frac)?
|
(('+'|'-'@see_neg)? '.' num @add_frac)
)
(('e'|'E') (('+'|'-'@see_exp_neg)? num @add_exp))?
);
}%%
constexpr double _pow2 (double x) { return x * x; }
constexpr double _pow10_of_2i (unsigned int n)
{ return n == 1 ? 10. : _pow2 (_pow10_of_2i (n >> 1)); }
static const double _powers_of_10[] =
{
_pow10_of_2i (0x100),
_pow10_of_2i (0x80),
_pow10_of_2i (0x40),
_pow10_of_2i (0x20),
_pow10_of_2i (0x10),
_pow10_of_2i (0x8),
_pow10_of_2i (0x4),
_pow10_of_2i (0x2),
_pow10_of_2i (0x1),
};
/* Works only for n < 512 */
inline double
_pow10 (unsigned int exponent)
{
unsigned int mask = 0x100; /* Should be same with the first element */
double result = 1;
for (const double *power = _powers_of_10; mask; ++power, mask >>= 1)
if (exponent & mask) result *= *power;
return result;
}
inline double
strtod_rl (const char *buf, char **end_ptr)
{
const char *p, *pe;
double value = 0;
double frac = 0;
double frac_count = 0;
unsigned int exp = 0;
bool neg = false, exp_neg = false;
p = buf;
pe = p + strlen (p);
while (p < pe && ISSPACE (*p))
p++;
int cs;
%%{
write init;
write exec;
}%%
*end_ptr = (char *) p;
if (frac_count) value += frac / _pow10 (frac_count);
if (neg) value *= -1.;
if (exp)
{
if (exp_neg)
value /= _pow10 (exp);
else
value *= _pow10 (exp);
}
return value;
}
#endif /* HB_NUMBER_PARSER_HH */

View File

@ -25,6 +25,7 @@
#include "hb.hh"
#include "hb-machinery.hh"
#include "hb-number-parser.hh"
#include <locale.h>
#ifdef HAVE_XLOCALE_H
@ -140,7 +141,7 @@ hb_parse_double (const char **pp, const char *end, double *pv,
#ifdef USE_XLOCALE
return strtod_l (p, end, get_C_locale ());
#else
return strtod (p, end);
return strtod_rl (p, end);
#endif
});
}

View File

@ -25,6 +25,7 @@
#include "hb.hh"
#include "hb-number.hh"
#include "hb-number-parser.hh"
int
@ -145,6 +146,11 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == 123);
assert (pp - str == 4);
assert (end - pp == 1);
/* Test strtod_rl even if libc's strtod_l is used */
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123);
assert (pend - str == 4);
}
{
@ -157,6 +163,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == 123);
assert (pp - str == 5);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123);
assert (pend - str == 5);
}
{
@ -169,6 +179,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == 123);
assert (pp - str == 7);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123);
assert (pend - str == 7);
}
{
@ -181,6 +195,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == 123);
assert (pp - str == 6);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123);
assert (pend - str == 6);
}
{
@ -193,6 +211,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == 123);
assert (pp - str == 10);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123);
assert (pend - str == 10);
}
{
@ -205,6 +227,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == -123);
assert (pp - str == 13);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == -123);
assert (pend - str == 13);
}
{
@ -217,6 +243,10 @@ main (int argc, char **argv)
assert ((int) roundf (pv * 1000.) == -123);
assert (pp - str == 8);
assert (end - pp == 0);
char *pend;
assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == -123);
assert (pend - str == 8);
}
return 0;