Create src/suffixes_dafsa.c with DAFSA C array
This commit is contained in:
parent
375aef05ae
commit
883e67f008
2
list
2
list
|
@ -1 +1 @@
|
|||
Subproject commit e801df4a56ac8c7519d349ad5125433206930d6e
|
||||
Subproject commit c749cdfe6847c7c299045d160d379117caf47bd3
|
|
@ -37,3 +37,4 @@ endif
|
|||
# PSL_FILE can be set by ./configure --with-psl-file=[PATH]
|
||||
suffixes.c: $(PSL_FILE) psl2c$(EXEEXT)
|
||||
./psl2c$(EXEEXT) "$(PSL_FILE)" suffixes.c
|
||||
./psl2c$(EXEEXT) --dafsa "$(PSL_FILE)" suffixes_dafsa.c
|
51
src/psl2c.c
51
src/psl2c.c
|
@ -161,8 +161,9 @@ static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *
|
|||
fprintf(fpout, "};\n");
|
||||
}
|
||||
|
||||
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const char *varname)
|
||||
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
|
||||
{
|
||||
FILE *fp;
|
||||
int it;
|
||||
|
||||
#ifdef BUILTIN_GENERATOR_LIBICU
|
||||
|
@ -182,12 +183,30 @@ static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v, const
|
|||
fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
|
||||
#endif
|
||||
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
if ((fp = fopen("in.tmp", "w"))) {
|
||||
for (it = 0; it < v->cur; it++) {
|
||||
_psl_entry_t *e = _vector_get(v, it);
|
||||
unsigned char *s = (unsigned char *)e->label_buf;
|
||||
|
||||
/* search for non-ASCII label and skip it */
|
||||
while (*s && *s < 128) s++;
|
||||
if (*s) continue;
|
||||
|
||||
fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
|
||||
e->label_buf, e->length, (int) e->nlabels, (int) e->flags);
|
||||
fprintf(fp, "%s, %d\n", e->label_buf, (int) e->flags);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
system("../tools/make_dafsa.py in.tmp out.tmp");
|
||||
|
||||
if ((fp = fopen("out.tmp", "r"))) {
|
||||
char buf[256];
|
||||
|
||||
while (fgets(buf, sizeof(buf), fp))
|
||||
fputs(buf, fpout);
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -267,10 +286,10 @@ int main(int argc, const char **argv)
|
|||
return 5;
|
||||
}
|
||||
*/
|
||||
if ((fpout = fopen(argv[2], "w"))) {
|
||||
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||
FILE *pp;
|
||||
struct stat st;
|
||||
size_t cmdsize = 16 + strlen(argv[1]);
|
||||
size_t cmdsize = 16 + strlen(argv[argpos]);
|
||||
char *cmd = alloca(cmdsize), checksum[64] = "";
|
||||
char *abs_srcfile;
|
||||
const char *source_date_epoch = NULL;
|
||||
|
@ -281,18 +300,18 @@ int main(int argc, const char **argv)
|
|||
#endif
|
||||
|
||||
if (dafsa)
|
||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||
_print_psl_entries_dafsa(fpout, psl->suffixes);
|
||||
else
|
||||
_print_psl_entries_dafsa(fpout, psl->suffixes, "suffixes_dafsa");
|
||||
_print_psl_entries(fpout, psl->suffixes, "suffixes");
|
||||
|
||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[1]);
|
||||
snprintf(cmd, cmdsize, "sha1sum %s", argv[argpos]);
|
||||
if ((pp = popen(cmd, "r"))) {
|
||||
if (fscanf(pp, "%63[0-9a-zA-Z]", checksum) < 1)
|
||||
*checksum = 0;
|
||||
pclose(pp);
|
||||
}
|
||||
|
||||
if (stat(argv[1], &st) != 0)
|
||||
if (stat(argv[argpos], &st) != 0)
|
||||
st.st_mtime = 0;
|
||||
fprintf(fpout, "static time_t _psl_file_time = %lu;\n", st.st_mtime);
|
||||
|
||||
|
@ -307,22 +326,22 @@ int main(int argc, const char **argv)
|
|||
|
||||
/* We need an absolute path here, else psl_builtin_outdated() won't work reliable */
|
||||
/* Caveat: symbolic links are resolved by realpath() */
|
||||
if ((abs_srcfile = realpath(argv[1], NULL))) {
|
||||
if ((abs_srcfile = realpath(argv[argpos], NULL))) {
|
||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", abs_srcfile);
|
||||
free(abs_srcfile);
|
||||
} else
|
||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[1]);
|
||||
fprintf(fpout, "static const char _psl_filename[] = \"%s\";\n", argv[argpos]);
|
||||
|
||||
if (fclose(fpout) != 0)
|
||||
ret = 4;
|
||||
} else {
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||
ret = 3;
|
||||
}
|
||||
|
||||
psl_free(psl);
|
||||
#else
|
||||
if ((fpout = fopen(argv[2], "w"))) {
|
||||
if ((fpout = fopen(argv[argpos + 1], "w"))) {
|
||||
fprintf(fpout, "static _psl_entry_t suffixes[1];\n");
|
||||
fprintf(fpout, "static time_t _psl_file_time;\n");
|
||||
fprintf(fpout, "static time_t _psl_compile_time;\n");
|
||||
|
@ -335,7 +354,7 @@ int main(int argc, const char **argv)
|
|||
if (fclose(fpout) != 0)
|
||||
ret = 4;
|
||||
} else {
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[2]);
|
||||
fprintf(stderr, "Failed to write open '%s'\n", argv[argpos + 1]);
|
||||
ret = 3;
|
||||
}
|
||||
#endif /* GENERATE_BUILTIN_DATA */
|
||||
|
|
|
@ -442,16 +442,16 @@ def parse_gperf(infile):
|
|||
"""Parses gperf file and extract strings and return code"""
|
||||
lines = [line.strip() for line in infile]
|
||||
# Extract strings after the first '%%' and before the second '%%'.
|
||||
begin = lines.index('%%') + 1
|
||||
end = lines.index('%%', begin)
|
||||
lines = lines[begin:end]
|
||||
#begin = lines.index('%%') + 1
|
||||
#end = lines.index('%%', begin)
|
||||
#lines = lines[begin:end]
|
||||
for line in lines:
|
||||
if line[-3:-1] != ', ':
|
||||
raise InputError('Expected "domainname, <digit>", found "%s"' % line)
|
||||
# Technically the DAFSA format could support return values in range [0-31],
|
||||
# but the values below are the only with a defined meaning.
|
||||
if line[-1] not in '0124':
|
||||
raise InputError('Expected value to be one of {0,1,2,4}, found "%s"' %
|
||||
if line[-1] not in '01245':
|
||||
raise InputError('Expected value to be one of {0,1,2,4,5}, found "%s"' %
|
||||
line[-1])
|
||||
return [line[:-3] + line[-1] for line in lines]
|
||||
|
||||
|
@ -460,8 +460,12 @@ def main():
|
|||
if len(sys.argv) != 3:
|
||||
print('usage: %s infile outfile' % sys.argv[0])
|
||||
return 1
|
||||
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
|
||||
outfile.write(words_to_cxx(parse_gperf(infile)))
|
||||
if sys.argv[1] == '-':
|
||||
with open(sys.argv[2], 'w') as outfile:
|
||||
outfile.write(words_to_cxx(parse_gperf(sys.stdin)))
|
||||
else:
|
||||
with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
|
||||
outfile.write(words_to_cxx(parse_gperf(infile)))
|
||||
return 0
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue