From a8d50da0cc93a28fa05bd892f49bf074e11280e6 Mon Sep 17 00:00:00 2001 From: Bartek Fabiszewski Date: Tue, 2 May 2017 18:00:42 +0200 Subject: [PATCH] hnj_hyphen_hyphword: fix buffer overflow (#13) * Fix buffer overflow * hnj_hyphen_hyphword: rewrite, improve overflow checking * hnj_hyphen_hyphword: add test to detect overflows --- example.c | 7 ++++++- hyphen.c | 45 ++++++++++++++++++++++++++++++++------------- tests/Makefile.am | 5 +++++ tests/alt7.hyph | 2 ++ tests/alt7.pat | 7 +++++++ tests/alt7.test | 4 ++++ tests/alt7.word | 2 ++ 7 files changed, 58 insertions(+), 14 deletions(-) create mode 100644 tests/alt7.hyph create mode 100644 tests/alt7.pat create mode 100755 tests/alt7.test create mode 100644 tests/alt7.word diff --git a/example.c b/example.c index 939c6cc..3e9dea8 100644 --- a/example.c +++ b/example.c @@ -67,7 +67,7 @@ main(int argc, char** argv) char *hyphens; char *lcword; char *hyphword; - char hword[BUFSIZE * 2]; + char *hword; int arg = 1; int optd = 1; int optn = 0; @@ -151,12 +151,16 @@ main(int argc, char** argv) rep = NULL; pos = NULL; cut = NULL; + + /* set minimum required output buffer size (2 * word_size) */ + hword = (char *) malloc((n-1)*2); hword[0] = '\0'; if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) || (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) { free(hyphens); free(lcword); + free(hword); fprintf(stderr, "hyphenation error\n"); exit(1); } @@ -202,6 +206,7 @@ main(int argc, char** argv) } free(hyphens); free(lcword); + free(hword); } fclose(wtclst); diff --git a/hyphen.c b/hyphen.c index 06363b5..9a132d0 100644 --- a/hyphen.c +++ b/hyphen.c @@ -39,6 +39,7 @@ #include /* for NULL, malloc */ #include /* for fprintf */ #include /* for strdup */ +#include /* for INT_MAX */ #ifdef UNX #include /* for exit */ @@ -1073,23 +1074,41 @@ int hnj_hyphen_norm(const char *word, int word_size, char * hyphens, } /* get the word with all possible hyphenations (output: hyphword) */ -void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, +void hnj_hyphen_hyphword(const char * word, int word_size, const char * hyphens, char * hyphword, char *** rep, int ** pos, int ** cut) { - int hyphenslen = l + 5; + + if (word_size <= 0 || word_size > INT_MAX / 2) { + hyphword[0] = '\0'; + return; + } + + /* hyphword buffer size must be at least 2 * l */ + int hyphword_size = 2 * word_size - 1; - int i, j; - for (i = 0, j = 0; i < l; i++, j++) { - if (hyphens[i]&1) { - hyphword[j] = word[i]; - if (*rep && *pos && *cut && (*rep)[i]) { - size_t offset = j - (*pos)[i] + 1; - strncpy(hyphword + offset, (*rep)[i], hyphenslen - offset - 1); - hyphword[hyphenslen-1] = '\0'; - j += strlen((*rep)[i]) - (*pos)[i]; + int nonstandard = 0; + if (*rep && *pos && *cut) { + nonstandard = 1; + } + + int i; + int j = 0; + for (i = 0; i < word_size && j < hyphword_size; i++) { + hyphword[j++] = word[i]; + if (hyphens[i]&1 && j < hyphword_size) { + if (nonstandard && (*rep)[i] && j >= (*pos)[i]) { + /* non-standard */ + j -= (*pos)[i]; + char *s = (*rep)[i]; + while (*s && j < hyphword_size) { + hyphword[j++] = *s++; + } i += (*cut)[i] - (*pos)[i]; - } else hyphword[++j] = '='; - } else hyphword[j] = word[i]; + } else { + /* standard */ + hyphword[j++] = '='; + } + } } hyphword[j] = '\0'; } diff --git a/tests/Makefile.am b/tests/Makefile.am index 37dc74f..2b08c67 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -10,6 +10,7 @@ alt3.test \ alt4.test \ alt5.test \ alt6.test \ +alt7.test \ compound.test \ compound2.test \ compound3.test \ @@ -46,6 +47,9 @@ alt5.word \ alt6.hyph \ alt6.pat \ alt6.word \ +alt7.hyph \ +alt7.pat \ +alt7.word \ alt.hyph \ alt.pat \ alt.word \ @@ -100,6 +104,7 @@ alt3.test \ alt4.test \ alt5.test \ alt6.test \ +alt7.test \ alt.test \ basealt2.test \ basealt.test \ diff --git a/tests/alt7.hyph b/tests/alt7.hyph new file mode 100644 index 0000000..c8b4364 --- /dev/null +++ b/tests/alt7.hyph @@ -0,0 +1,2 @@ +b=bbbbbbbbbbbbbbbbbbbbbba=a=a=a=a=a=a=a=a=a=a=a=a=a=a=a +b=bbbbbbbbbbbbbbbbbbbbbba=b=bbbbbbbbbbbbbbbbbbbbbba=b=b diff --git a/tests/alt7.pat b/tests/alt7.pat new file mode 100644 index 0000000..e295cf2 --- /dev/null +++ b/tests/alt7.pat @@ -0,0 +1,7 @@ +ISO8859-1 +LEFTHYPHENMIN 1 +RIGHTHYPHENMIN 1 +% Check for buffer overflows +.a1b. +a9a +ab3a/b=bbbbbbbbbbbbbbbbbbbbbb,1,2 diff --git a/tests/alt7.test b/tests/alt7.test new file mode 100755 index 0000000..f144145 --- /dev/null +++ b/tests/alt7.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph diff --git a/tests/alt7.word b/tests/alt7.word new file mode 100644 index 0000000..43d4bc3 --- /dev/null +++ b/tests/alt7.word @@ -0,0 +1,2 @@ +abaaaaaaaaaaaaaaaaaaaaaaaaaa +abaabaababababababababababab