From 883e67f0081fedb758f5447382d3e75a26645662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Fri, 4 Dec 2015 21:26:30 +0100 Subject: [PATCH] Create src/suffixes_dafsa.c with DAFSA C array --- list | 2 +- src/Makefile.am | 1 + src/psl2c.c | 51 +++++++++++++++++++++++++++++++-------------- tools/make_dafsa.py | 18 +++++++++------- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/list b/list index e801df4..c749cdf 160000 --- a/list +++ b/list @@ -1 +1 @@ -Subproject commit e801df4a56ac8c7519d349ad5125433206930d6e +Subproject commit c749cdfe6847c7c299045d160d379117caf47bd3 diff --git a/src/Makefile.am b/src/Makefile.am index 62cb87d..1111bb3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -37,3 +37,4 @@ endif # PSL_FILE can be set by ./configure --with-psl-file=[PATH] suffixes.c: $(PSL_FILE) psl2c$(EXEEXT) ./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c + ./psl2c$(EXEEXT) --dafsa "$(PSL_FILE)" suffixes_dafsa.c \ No newline at end of file diff --git a/src/psl2c.c b/src/psl2c.c index 285fee0..33a85f4 100644 --- a/src/psl2c.c +++ b/src/psl2c.c @@ -161,8 +161,9 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char * fprintf(fpout, "};\n"); } -static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const char *varname) +static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v) { + FILE *fp; int it; #ifdef BUILTIN_GENERATOR_LIBICU @@ -182,12 +183,30 @@ static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); #endif - for (it = 0; it < v->cur; it++) { - _psl_entry_t *e = _vector_get(v, it); + if ((fp = fopen("in.tmp", "w"))) { + for (it = 0; it < v->cur; it++) { + _psl_entry_t *e = _vector_get(v, it); + unsigned char *s = (unsigned char *)e->label_buf; + /* search for non-ASCII label and skip it */ + while (*s && *s < 128) s++; + if (*s) continue; - fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n", - e->label_buf, e->length, (int) e->nlabels, (int) e->flags); + fprintf(fp, "%s, %d\n", e->label_buf, (int) e->flags); + } + + fclose(fp); + } + + system("../tools/make_dafsa.py in.tmp out.tmp"); + + if ((fp = fopen("out.tmp", "r"))) { + char buf[256]; + + while (fgets(buf, sizeof(buf), fp)) + fputs(buf, fpout); + + fclose(fp); } } @@ -267,10 +286,10 @@ int main(int argc, const char **argv) return 5; } */ - if ((fpout = fopen(argv[2], "w"))) { + if ((fpout = fopen(argv[argpos + 1], "w"))) { FILE *pp; struct stat st; - size_t cmdsize = 16 + strlen(argv[1]); + size_t cmdsize = 16 + strlen(argv[argpos]); char *cmd = alloca(cmdsize), checksum[64] = ""; char *abs_srcfile; const char *source_date_epoch = NULL; @@ -281,18 +300,18 @@ int main(int argc, const char **argv) #endif if (dafsa) - _print_psl_entries(fpout, psl->suffixes, "suffixes"); + _print_psl_entries_dafsa(fpout, psl->suffixes); else - _print_psl_entries_dafsa(fpout, psl->suffixes, "suffixes_dafsa"); + _print_psl_entries(fpout, psl->suffixes, "suffixes"); - snprintf(cmd, cmdsize, "sha1sum %s", argv[1]); + snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1) *checksum = 0; pclose(pp); } - if (stat(argv[1], &st) != 0) + if (stat(argv[argpos], &st) != 0) st.st_mtime = 0; fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime); @@ -307,22 +326,22 @@ int main(int argc, const char **argv) /* We need an absolute path here, else psl_builtin_outdated() won't work reliable */ /* Caveat: symbolic links are resolved by realpath() */ - if ((abs_srcfile = realpath(argv[1], NULL))) { + if ((abs_srcfile = realpath(argv[argpos], NULL))) { fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile); free(abs_srcfile); } else - fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]); + fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]); if (fclose(fpout) != 0) ret = 4; } else { - fprintf(stderr, "Failed to write open '%s'\n", argv[2]); + fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]); ret = 3; } psl_free(psl); #else - if ((fpout = fopen(argv[2], "w"))) { + if ((fpout = fopen(argv[argpos + 1], "w"))) { fprintf(fpout, "static _psl_entry_t suffixes[1];\n"); fprintf(fpout, "static time_t _psl_file_time;\n"); fprintf(fpout, "static time_t _psl_compile_time;\n"); @@ -335,7 +354,7 @@ int main(int argc, const char **argv) if (fclose(fpout) != 0) ret = 4; } else { - fprintf(stderr, "Failed to write open '%s'\n", argv[2]); + fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]); ret = 3; } #endif /* GENERATE_BUILTIN_DATA */ diff --git a/tools/make_dafsa.py b/tools/make_dafsa.py index 78358ef..6a04bf1 100755 --- a/tools/make_dafsa.py +++ b/tools/make_dafsa.py @@ -442,16 +442,16 @@ def parse_gperf(infile): """Parses gperf file and extract strings and return code""" lines = [line.strip() for line in infile] # Extract strings after the first '%%' and before the second '%%'. - begin = lines.index('%%') + 1 - end = lines.index('%%', begin) - lines = lines[begin:end] + #begin = lines.index('%%') + 1 + #end = lines.index('%%', begin) + #lines = lines[begin:end] for line in lines: if line[-3:-1] != ', ': raise InputError('Expected "domainname, ", found "%s"' % line) # Technically the DAFSA format could support return values in range [0-31], # but the values below are the only with a defined meaning. - if line[-1] not in '0124': - raise InputError('Expected value to be one of {0,1,2,4}, found "%s"' % + if line[-1] not in '01245': + raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' % line[-1]) return [line[:-3] + line[-1] for line in lines] @@ -460,8 +460,12 @@ def main(): if len(sys.argv) != 3: print('usage: %s infile outfile' % sys.argv[0]) return 1 - with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile: - outfile.write(words_to_cxx(parse_gperf(infile))) + if sys.argv[1] == '-': + with open(sys.argv[2], 'w') as outfile: + outfile.write(words_to_cxx(parse_gperf(sys.stdin))) + else: + with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile: + outfile.write(words_to_cxx(parse_gperf(infile))) return 0