From 73dd2967c8e1e4f6d7334ee9e539a323d6e66cbd Mon Sep 17 00:00:00 2001 From: Bartek Fabiszewski Date: Mon, 27 Nov 2017 12:19:26 +0100 Subject: [PATCH] Discard too long lines in dictionary file (#14) * Discard too long lines in dictionary file * Discard too long lines in dictionary file: add warning and test --- hyphen.c | 22 ++++++++++++++++++---- tests/Makefile.am | 5 +++++ tests/longlines.hyph | 1 + tests/longlines.pat | 10 ++++++++++ tests/longlines.test | 4 ++++ tests/longlines.word | 1 + 6 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 tests/longlines.hyph create mode 100644 tests/longlines.pat create mode 100755 tests/longlines.test create mode 100644 tests/longlines.word diff --git a/hyphen.c b/hyphen.c index 9a132d0..9f2b711 100644 --- a/hyphen.c +++ b/hyphen.c @@ -438,11 +438,25 @@ for (k = 0; k < 2; k++) { } if (k == 0 || nextlevel) { - while (fgets (buf, sizeof(buf), f) != NULL) { + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* discard lines that don't fit in buffer */ + if (!feof(f) && strchr(buf, '\n') == NULL) { + int c; + while ((c = fgetc(f)) != '\n' && c != EOF); + /* issue warning if not a comment */ + if (buf[0] != '%') { + fprintf(stderr, "Warning: skipping too long pattern (more than %lu chars)\n", sizeof(buf)); + } + continue; + } + if (strncmp(buf, "NEXTLEVEL", 9) == 0) { - nextlevel = 1; - break; - } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab); + nextlevel = 1; + break; + } else if (buf[0] != '%') { + hnj_hyphen_load_line(buf, dict[k], hashtab); + } } } else if (k == 1) { /* default first level: hyphen and ASCII apostrophe */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 2b08c67..e72863c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -11,6 +11,7 @@ alt4.test \ alt5.test \ alt6.test \ alt7.test \ +longlines.test \ compound.test \ compound2.test \ compound3.test \ @@ -50,6 +51,9 @@ alt6.word \ alt7.hyph \ alt7.pat \ alt7.word \ +longlines.hyph \ +longlines.pat \ +longlines.word \ alt.hyph \ alt.pat \ alt.word \ @@ -105,6 +109,7 @@ alt4.test \ alt5.test \ alt6.test \ alt7.test \ +longlines.test \ alt.test \ basealt2.test \ basealt.test \ diff --git a/tests/longlines.hyph b/tests/longlines.hyph new file mode 100644 index 0000000..8b0ee7f --- /dev/null +++ b/tests/longlines.hyph @@ -0,0 +1 @@ +a=bc=d diff --git a/tests/longlines.pat b/tests/longlines.pat new file mode 100644 index 0000000..290aef4 --- /dev/null +++ b/tests/longlines.pat @@ -0,0 +1,10 @@ +ISO8859-1 +LEFTHYPHENMIN 1 +RIGHTHYPHENMIN 1 +% Check whether characters over MAX_CHARS are not treated as new line +% This test is valid as long as MAX_CHARS is 100 +% +% Following pattern should result in a=bc=d hyphenation +a1b2c1d +% and should not be overriden by pattern from too long comment (over MAX_CHARS characters) +%|------------------------------ this part is 100 characters long --------------------------------|a8b9c8d diff --git a/tests/longlines.test b/tests/longlines.test new file mode 100755 index 0000000..f144145 --- /dev/null +++ b/tests/longlines.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph diff --git a/tests/longlines.word b/tests/longlines.word new file mode 100644 index 0000000..acbe86c --- /dev/null +++ b/tests/longlines.word @@ -0,0 +1 @@ +abcd