Create src/suffixes_dafsa.c with DAFSA C array

This commit is contained in:
Tim Rühsen 2015-12-04 21:26:30 +01:00
parent 375aef05ae
commit 883e67f008
4 changed files with 48 additions and 24 deletions

2
list

@ -1 +1 @@
Subproject commit e801df4a56ac8c7519d349ad5125433206930d6e Subproject commit c749cdfe6847c7c299045d160d379117caf47bd3

View File

@ -37,3 +37,4 @@ endif
# PSL_FILE can be set by ./configure --with-psl-file=[PATH] # PSL_FILE can be set by ./configure --with-psl-file=[PATH]
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT) suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c ./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
./psl2c$(EXEEXT) --dafsa "$(PSL_FILE)" suffixes_dafsa.c

View File

@ -161,8 +161,9 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
fprintf(fpout, "};\n"); fprintf(fpout, "};\n");
} }
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const char *varname) static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
{ {
FILE *fp;
int it; int it;
#ifdef BUILTIN_GENERATOR_LIBICU #ifdef BUILTIN_GENERATOR_LIBICU
@ -182,12 +183,30 @@ static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
#endif #endif
if ((fp = fopen("in.tmp", "w"))) {
for (it = 0; it < v->cur; it++) { for (it = 0; it < v->cur; it++) {
_psl_entry_t *e = _vector_get(v, it); _psl_entry_t *e = _vector_get(v, it);
unsigned char *s = (unsigned char *)e->label_buf;
/* search for non-ASCII label and skip it */
while (*s && *s < 128) s++;
if (*s) continue;
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n", fprintf(fp, "%s, %d\n", e->label_buf, (int) e->flags);
e->label_buf, e->length, (int) e->nlabels, (int) e->flags); }
fclose(fp);
}
system("../tools/make_dafsa.py in.tmp out.tmp");
if ((fp = fopen("out.tmp", "r"))) {
char buf[256];
while (fgets(buf, sizeof(buf), fp))
fputs(buf, fpout);
fclose(fp);
} }
} }
@ -267,10 +286,10 @@ int main(int argc, const char **argv)
return 5; return 5;
} }
*/ */
if ((fpout = fopen(argv[2], "w"))) { if ((fpout = fopen(argv[argpos + 1], "w"))) {
FILE *pp; FILE *pp;
struct stat st; struct stat st;
size_t cmdsize = 16 + strlen(argv[1]); size_t cmdsize = 16 + strlen(argv[argpos]);
char *cmd = alloca(cmdsize), checksum[64] = ""; char *cmd = alloca(cmdsize), checksum[64] = "";
char *abs_srcfile; char *abs_srcfile;
const char *source_date_epoch = NULL; const char *source_date_epoch = NULL;
@ -281,18 +300,18 @@ int main(int argc, const char **argv)
#endif #endif
if (dafsa) if (dafsa)
_print_psl_entries(fpout, psl->suffixes, "suffixes"); _print_psl_entries_dafsa(fpout, psl->suffixes);
else else
_print_psl_entries_dafsa(fpout, psl->suffixes, "suffixes_dafsa"); _print_psl_entries(fpout, psl->suffixes, "suffixes");
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]); snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]);
if ((pp = popen(cmd, "r"))) { if ((pp = popen(cmd, "r"))) {
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1) if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
*checksum = 0; *checksum = 0;
pclose(pp); pclose(pp);
} }
if (stat(argv[1], &st) != 0) if (stat(argv[argpos], &st) != 0)
st.st_mtime = 0; st.st_mtime = 0;
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime); fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
@ -307,22 +326,22 @@ int main(int argc, const char **argv)
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */ /* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
/* Caveat: symbolic links are resolved by realpath() */ /* Caveat: symbolic links are resolved by realpath() */
if ((abs_srcfile = realpath(argv[1], NULL))) { if ((abs_srcfile = realpath(argv[argpos], NULL))) {
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile); fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
free(abs_srcfile); free(abs_srcfile);
} else } else
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]); fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]);
if (fclose(fpout) != 0) if (fclose(fpout) != 0)
ret = 4; ret = 4;
} else { } else {
fprintf(stderr, "Failed to write open '%s'\n", argv[2]); fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
ret = 3; ret = 3;
} }
psl_free(psl); psl_free(psl);
#else #else
if ((fpout = fopen(argv[2], "w"))) { if ((fpout = fopen(argv[argpos + 1], "w"))) {
fprintf(fpout, "static _psl_entry_t suffixes[1];\n"); fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
fprintf(fpout, "static time_t _psl_file_time;\n"); fprintf(fpout, "static time_t _psl_file_time;\n");
fprintf(fpout, "static time_t _psl_compile_time;\n"); fprintf(fpout, "static time_t _psl_compile_time;\n");
@ -335,7 +354,7 @@ int main(int argc, const char **argv)
if (fclose(fpout) != 0) if (fclose(fpout) != 0)
ret = 4; ret = 4;
} else { } else {
fprintf(stderr, "Failed to write open '%s'\n", argv[2]); fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
ret = 3; ret = 3;
} }
#endif /* GENERATE_BUILTIN_DATA */ #endif /* GENERATE_BUILTIN_DATA */

View File

@ -442,16 +442,16 @@ def parse_gperf(infile):
"""Parses gperf file and extract strings and return code""" """Parses gperf file and extract strings and return code"""
lines = [line.strip() for line in infile] lines = [line.strip() for line in infile]
# Extract strings after the first '%%' and before the second '%%'. # Extract strings after the first '%%' and before the second '%%'.
begin = lines.index('%%') + 1 #begin = lines.index('%%') + 1
end = lines.index('%%', begin) #end = lines.index('%%', begin)
lines = lines[begin:end] #lines = lines[begin:end]
for line in lines: for line in lines:
if line[-3:-1] != ', ': if line[-3:-1] != ', ':
raise InputError('Expected "domainname, <digit>", found "%s"' % line) raise InputError('Expected "domainname, <digit>", found "%s"' % line)
# Technically the DAFSA format could support return values in range [0-31], # Technically the DAFSA format could support return values in range [0-31],
# but the values below are the only with a defined meaning. # but the values below are the only with a defined meaning.
if line[-1] not in '0124': if line[-1] not in '01245':
raise InputError('Expected value to be one of {0,1,2,4}, found "%s"' % raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' %
line[-1]) line[-1])
return [line[:-3] + line[-1] for line in lines] return [line[:-3] + line[-1] for line in lines]
@ -460,6 +460,10 @@ def main():
if len(sys.argv) != 3: if len(sys.argv) != 3:
print('usage: %s infile outfile' % sys.argv[0]) print('usage: %s infile outfile' % sys.argv[0])
return 1 return 1
if sys.argv[1] == '-':
with open(sys.argv[2], 'w') as outfile:
outfile.write(words_to_cxx(parse_gperf(sys.stdin)))
else:
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile: with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
outfile.write(words_to_cxx(parse_gperf(infile))) outfile.write(words_to_cxx(parse_gperf(infile)))
return 0 return 0