Create src/suffixes_dafsa.c with DAFSA C array
This commit is contained in:
parent
375aef05ae
commit
883e67f008
2
list
2
list
|
@ -1 +1 @@
|
||||||
Subproject commit e801df4a56ac8c7519d349ad5125433206930d6e
|
Subproject commit c749cdfe6847c7c299045d160d379117caf47bd3
|
|
@ -37,3 +37,4 @@ endif
|
||||||
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
||||||
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
|
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
|
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
|
||||||
|
./psl2c$(EXEEXT) --dafsa "$(PSL_FILE)" suffixes_dafsa.c
|
47
src/psl2c.c
47
src/psl2c.c
|
@ -161,8 +161,9 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
||||||
fprintf(fpout, "};\n");
|
fprintf(fpout, "};\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
|
||||||
{
|
{
|
||||||
|
FILE *fp;
|
||||||
int it;
|
int it;
|
||||||
|
|
||||||
#ifdef BUILTIN_GENERATOR_LIBICU
|
#ifdef BUILTIN_GENERATOR_LIBICU
|
||||||
|
@ -182,12 +183,30 @@ static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const
|
||||||
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if ((fp = fopen("in.tmp", "w"))) {
|
||||||
for (it = 0; it < v->cur; it++) {
|
for (it = 0; it < v->cur; it++) {
|
||||||
_psl_entry_t *e = _vector_get(v, it);
|
_psl_entry_t *e = _vector_get(v, it);
|
||||||
|
unsigned char *s = (unsigned char *)e->label_buf;
|
||||||
|
|
||||||
|
/* search for non-ASCII label and skip it */
|
||||||
|
while (*s && *s < 128) s++;
|
||||||
|
if (*s) continue;
|
||||||
|
|
||||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
|
fprintf(fp, "%s, %d\n", e->label_buf, (int) e->flags);
|
||||||
e->label_buf, e->length, (int) e->nlabels, (int) e->flags);
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
system("../tools/make_dafsa.py in.tmp out.tmp");
|
||||||
|
|
||||||
|
if ((fp = fopen("out.tmp", "r"))) {
|
||||||
|
char buf[256];
|
||||||
|
|
||||||
|
while (fgets(buf, sizeof(buf), fp))
|
||||||
|
fputs(buf, fpout);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,10 +286,10 @@ int main(int argc, const char **argv)
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
if ((fpout = fopen(argv[2], "w"))) {
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||||
FILE *pp;
|
FILE *pp;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
size_t cmdsize = 16 + strlen(argv[1]);
|
size_t cmdsize = 16 + strlen(argv[argpos]);
|
||||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||||
char *abs_srcfile;
|
char *abs_srcfile;
|
||||||
const char *source_date_epoch = NULL;
|
const char *source_date_epoch = NULL;
|
||||||
|
@ -281,18 +300,18 @@ int main(int argc, const char **argv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (dafsa)
|
if (dafsa)
|
||||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
_print_psl_entries_dafsa(fpout, psl->suffixes);
|
||||||
else
|
else
|
||||||
_print_psl_entries_dafsa(fpout, psl->suffixes, "suffixes_dafsa");
|
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||||
|
|
||||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]);
|
snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]);
|
||||||
if ((pp = popen(cmd, "r"))) {
|
if ((pp = popen(cmd, "r"))) {
|
||||||
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
||||||
*checksum = 0;
|
*checksum = 0;
|
||||||
pclose(pp);
|
pclose(pp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stat(argv[1], &st) != 0)
|
if (stat(argv[argpos], &st) != 0)
|
||||||
st.st_mtime = 0;
|
st.st_mtime = 0;
|
||||||
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
||||||
|
|
||||||
|
@ -307,22 +326,22 @@ int main(int argc, const char **argv)
|
||||||
|
|
||||||
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
|
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
|
||||||
/* Caveat: symbolic links are resolved by realpath() */
|
/* Caveat: symbolic links are resolved by realpath() */
|
||||||
if ((abs_srcfile = realpath(argv[1], NULL))) {
|
if ((abs_srcfile = realpath(argv[argpos], NULL))) {
|
||||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
|
||||||
free(abs_srcfile);
|
free(abs_srcfile);
|
||||||
} else
|
} else
|
||||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]);
|
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]);
|
||||||
|
|
||||||
if (fclose(fpout) != 0)
|
if (fclose(fpout) != 0)
|
||||||
ret = 4;
|
ret = 4;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||||
ret = 3;
|
ret = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
psl_free(psl);
|
psl_free(psl);
|
||||||
#else
|
#else
|
||||||
if ((fpout = fopen(argv[2], "w"))) {
|
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||||
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
|
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
|
||||||
fprintf(fpout, "static time_t _psl_file_time;\n");
|
fprintf(fpout, "static time_t _psl_file_time;\n");
|
||||||
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
||||||
|
@ -335,7 +354,7 @@ int main(int argc, const char **argv)
|
||||||
if (fclose(fpout) != 0)
|
if (fclose(fpout) != 0)
|
||||||
ret = 4;
|
ret = 4;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||||
ret = 3;
|
ret = 3;
|
||||||
}
|
}
|
||||||
#endif /* GENERATE_BUILTIN_DATA */
|
#endif /* GENERATE_BUILTIN_DATA */
|
||||||
|
|
|
@ -442,16 +442,16 @@ def parse_gperf(infile):
|
||||||
"""Parses gperf file and extract strings and return code"""
|
"""Parses gperf file and extract strings and return code"""
|
||||||
lines = [line.strip() for line in infile]
|
lines = [line.strip() for line in infile]
|
||||||
# Extract strings after the first '%%' and before the second '%%'.
|
# Extract strings after the first '%%' and before the second '%%'.
|
||||||
begin = lines.index('%%') + 1
|
#begin = lines.index('%%') + 1
|
||||||
end = lines.index('%%', begin)
|
#end = lines.index('%%', begin)
|
||||||
lines = lines[begin:end]
|
#lines = lines[begin:end]
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if line[-3:-1] != ', ':
|
if line[-3:-1] != ', ':
|
||||||
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
|
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
|
||||||
# Technically the DAFSA format could support return values in range [0-31],
|
# Technically the DAFSA format could support return values in range [0-31],
|
||||||
# but the values below are the only with a defined meaning.
|
# but the values below are the only with a defined meaning.
|
||||||
if line[-1] not in '0124':
|
if line[-1] not in '01245':
|
||||||
raise InputError('Expected value to be one of {0,1,2,4}, found "%s"' %
|
raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' %
|
||||||
line[-1])
|
line[-1])
|
||||||
return [line[:-3] + line[-1] for line in lines]
|
return [line[:-3] + line[-1] for line in lines]
|
||||||
|
|
||||||
|
@ -460,6 +460,10 @@ def main():
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 3:
|
||||||
print('usage: %s infile outfile' % sys.argv[0])
|
print('usage: %s infile outfile' % sys.argv[0])
|
||||||
return 1
|
return 1
|
||||||
|
if sys.argv[1] == '-':
|
||||||
|
with open(sys.argv[2], 'w') as outfile:
|
||||||
|
outfile.write(words_to_cxx(parse_gperf(sys.stdin)))
|
||||||
|
else:
|
||||||
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
|
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
|
||||||
outfile.write(words_to_cxx(parse_gperf(infile)))
|
outfile.write(words_to_cxx(parse_gperf(infile)))
|
||||||
return 0
|
return 0
|
||||||
|
|
Loading…
Reference in New Issue