From 47d82713a144e06e00a486b3aa7d3934d62d68fd Mon Sep 17 00:00:00 2001 From: Ebrahim Byagowi Date: Fri, 6 Sep 2019 20:33:10 +0430 Subject: [PATCH] Replace strtod with a ragel implementation Use a ragel based number parser when strtod_l couldn't be found as libc's strtod may is locale sensetivity which we need to not. --- src/Makefile.sources | 2 + src/hb-number-parser.hh | 223 ++++++++++++++++++++++++++++++++++++++++ src/hb-number-parser.rl | 122 ++++++++++++++++++++++ src/hb-number.cc | 3 +- src/test-number.cc | 30 ++++++ 5 files changed, 379 insertions(+), 1 deletion(-) create mode 100644 src/hb-number-parser.hh create mode 100644 src/hb-number-parser.rl diff --git a/src/Makefile.sources b/src/Makefile.sources index c9883f833..eff0ad347 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -169,6 +169,7 @@ HB_BASE_sources = \ HB_BASE_RAGEL_GENERATED_sources = \ hb-buffer-deserialize-json.hh \ hb-buffer-deserialize-text.hh \ + hb-number-parser.hh \ hb-ot-shape-complex-indic-machine.hh \ hb-ot-shape-complex-khmer-machine.hh \ hb-ot-shape-complex-myanmar-machine.hh \ @@ -177,6 +178,7 @@ HB_BASE_RAGEL_GENERATED_sources = \ HB_BASE_RAGEL_sources = \ hb-buffer-deserialize-json.rl \ hb-buffer-deserialize-text.rl \ + hb-number-parser.rl \ hb-ot-shape-complex-indic-machine.rl \ hb-ot-shape-complex-khmer-machine.rl \ hb-ot-shape-complex-myanmar-machine.rl \ diff --git a/src/hb-number-parser.hh b/src/hb-number-parser.hh new file mode 100644 index 000000000..b666d00c5 --- /dev/null +++ b/src/hb-number-parser.hh @@ -0,0 +1,223 @@ + +#line 1 "hb-number-parser.rl" +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#ifndef HB_NUMBER_PARSER_HH +#define HB_NUMBER_PARSER_HH + +#include "hb.hh" + + +#line 35 "hb-number-parser.hh" +static const unsigned char _double_parser_trans_keys[] = { + 0u, 0u, 43u, 57u, 46u, 57u, 48u, 57u, 43u, 57u, 48u, 57u, 48u, 101u, 48u, 57u, + 46u, 101u, 0 +}; + +static const char _double_parser_key_spans[] = { + 0, 15, 12, 10, 15, 10, 54, 10, + 56 +}; + +static const unsigned char _double_parser_index_offsets[] = { + 0, 0, 16, 29, 40, 56, 67, 122, + 133 +}; + +static const char _double_parser_indicies[] = { + 0, 1, 2, 3, 1, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 1, 3, 1, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 1, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 1, 6, 1, 7, 1, 1, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 1, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 1, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 9, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 9, 1, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 1, 3, 1, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 0 +}; + +static const char _double_parser_trans_targs[] = { + 2, 0, 2, 3, 8, 6, 5, 5, + 7, 4 +}; + +static const char _double_parser_trans_actions[] = { + 0, 0, 1, 0, 2, 3, 0, 4, + 5, 0 +}; + +static const int double_parser_start = 1; +static const int double_parser_first_final = 6; +static const int double_parser_error = 0; + +static const int double_parser_en_main = 1; + + +#line 55 "hb-number-parser.rl" + + +constexpr double _pow2 (double x) { return x * x; } +constexpr double _pow10_of_2i (unsigned int n) +{ return n == 1 ? 10. : _pow2 (_pow10_of_2i (n >> 1)); } + +static const double _powers_of_10[] = +{ + _pow10_of_2i (0x100), + _pow10_of_2i (0x80), + _pow10_of_2i (0x40), + _pow10_of_2i (0x20), + _pow10_of_2i (0x10), + _pow10_of_2i (0x8), + _pow10_of_2i (0x4), + _pow10_of_2i (0x2), + _pow10_of_2i (0x1), +}; + +/* Works only for n < 512 */ +inline double +_pow10 (unsigned int exponent) +{ + unsigned int mask = 0x100; /* Should be same with the first element */ + double result = 1; + for (const double *power = _powers_of_10; mask; ++power, mask >>= 1) + if (exponent & mask) result *= *power; + return result; +} + +inline double +strtod_rl (const char *buf, char **end_ptr) +{ + const char *p, *pe; + double value = 0; + double frac = 0; + double frac_count = 0; + unsigned int exp = 0; + bool neg = false, exp_neg = false; + p = buf; + pe = p + strlen (p); + + while (p < pe && ISSPACE (*p)) + p++; + + int cs; + +#line 143 "hb-number-parser.hh" + { + cs = double_parser_start; + } + +#line 148 "hb-number-parser.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _double_parser_trans_keys + (cs<<1); + _inds = _double_parser_indicies + _double_parser_index_offsets[cs]; + + _slen = _double_parser_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=(*p) && + (*p) <= _keys[1] ? + (*p) - _keys[0] : _slen ]; + + cs = _double_parser_trans_targs[_trans]; + + if ( _double_parser_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _double_parser_trans_actions[_trans] ) { + case 1: +#line 37 "hb-number-parser.rl" + { neg = true; } + break; + case 4: +#line 38 "hb-number-parser.rl" + { exp_neg = true; } + break; + case 2: +#line 40 "hb-number-parser.rl" + { value = value * 10. + ((*p) - '0'); } + break; + case 3: +#line 41 "hb-number-parser.rl" + { frac = frac * 10. + ((*p) - '0'); ++frac_count; } + break; + case 5: +#line 42 "hb-number-parser.rl" + { exp = exp * 10 + ((*p) - '0'); } + break; +#line 193 "hb-number-parser.hh" + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} + } + +#line 104 "hb-number-parser.rl" + + + *end_ptr = (char *) p; + + if (frac_count) value += frac / _pow10 (frac_count); + if (neg) value *= -1.; + + if (exp) + { + if (exp_neg) + value /= _pow10 (exp); + else + value *= _pow10 (exp); + } + + return value; +} + +#endif /* HB_NUMBER_PARSER_HH */ diff --git a/src/hb-number-parser.rl b/src/hb-number-parser.rl new file mode 100644 index 000000000..6f0830670 --- /dev/null +++ b/src/hb-number-parser.rl @@ -0,0 +1,122 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#ifndef HB_NUMBER_PARSER_HH +#define HB_NUMBER_PARSER_HH + +#include "hb.hh" + +%%{ + +machine double_parser; +alphtype unsigned char; +write data; + +action see_neg { neg = true; } +action see_exp_neg { exp_neg = true; } + +action add_int { value = value * 10. + (fc - '0'); } +action add_frac { frac = frac * 10. + (fc - '0'); ++frac_count; } +action add_exp { exp = exp * 10 + (fc - '0'); } + +num = [0-9]+; + +main := ( + ( + (('+'|'-'@see_neg)? num @add_int) ('.' num @add_frac)? + | + (('+'|'-'@see_neg)? '.' num @add_frac) + ) + (('e'|'E') (('+'|'-'@see_exp_neg)? num @add_exp))? +); + +}%% + +constexpr double _pow2 (double x) { return x * x; } +constexpr double _pow10_of_2i (unsigned int n) +{ return n == 1 ? 10. : _pow2 (_pow10_of_2i (n >> 1)); } + +static const double _powers_of_10[] = +{ + _pow10_of_2i (0x100), + _pow10_of_2i (0x80), + _pow10_of_2i (0x40), + _pow10_of_2i (0x20), + _pow10_of_2i (0x10), + _pow10_of_2i (0x8), + _pow10_of_2i (0x4), + _pow10_of_2i (0x2), + _pow10_of_2i (0x1), +}; + +/* Works only for n < 512 */ +inline double +_pow10 (unsigned int exponent) +{ + unsigned int mask = 0x100; /* Should be same with the first element */ + double result = 1; + for (const double *power = _powers_of_10; mask; ++power, mask >>= 1) + if (exponent & mask) result *= *power; + return result; +} + +inline double +strtod_rl (const char *buf, char **end_ptr) +{ + const char *p, *pe; + double value = 0; + double frac = 0; + double frac_count = 0; + unsigned int exp = 0; + bool neg = false, exp_neg = false; + p = buf; + pe = p + strlen (p); + + while (p < pe && ISSPACE (*p)) + p++; + + int cs; + %%{ + write init; + write exec; + }%% + + *end_ptr = (char *) p; + + if (frac_count) value += frac / _pow10 (frac_count); + if (neg) value *= -1.; + + if (exp) + { + if (exp_neg) + value /= _pow10 (exp); + else + value *= _pow10 (exp); + } + + return value; +} + +#endif /* HB_NUMBER_PARSER_HH */ diff --git a/src/hb-number.cc b/src/hb-number.cc index bf6eca756..4f84d4ad5 100644 --- a/src/hb-number.cc +++ b/src/hb-number.cc @@ -25,6 +25,7 @@ #include "hb.hh" #include "hb-machinery.hh" +#include "hb-number-parser.hh" #include #ifdef HAVE_XLOCALE_H @@ -140,7 +141,7 @@ hb_parse_double (const char **pp, const char *end, double *pv, #ifdef USE_XLOCALE return strtod_l (p, end, get_C_locale ()); #else - return strtod (p, end); + return strtod_rl (p, end); #endif }); } diff --git a/src/test-number.cc b/src/test-number.cc index 74f905993..3591b13f2 100644 --- a/src/test-number.cc +++ b/src/test-number.cc @@ -25,6 +25,7 @@ #include "hb.hh" #include "hb-number.hh" +#include "hb-number-parser.hh" int @@ -145,6 +146,11 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == 123); assert (pp - str == 4); assert (end - pp == 1); + + /* Test strtod_rl even if libc's strtod_l is used */ + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123); + assert (pend - str == 4); } { @@ -157,6 +163,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == 123); assert (pp - str == 5); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123); + assert (pend - str == 5); } { @@ -169,6 +179,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == 123); assert (pp - str == 7); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123); + assert (pend - str == 7); } { @@ -181,6 +195,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == 123); assert (pp - str == 6); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123); + assert (pend - str == 6); } { @@ -193,6 +211,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == 123); assert (pp - str == 10); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == 123); + assert (pend - str == 10); } { @@ -205,6 +227,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == -123); assert (pp - str == 13); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == -123); + assert (pend - str == 13); } { @@ -217,6 +243,10 @@ main (int argc, char **argv) assert ((int) roundf (pv * 1000.) == -123); assert (pp - str == 8); assert (end - pp == 0); + + char *pend; + assert ((int) roundf (strtod_rl (str, &pend) * 1000.) == -123); + assert (pend - str == 8); } return 0;