NOHYPHEN feature, see README.compound
This commit is contained in:
parent
06bfd5b5b3
commit
f86ce87baa
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
2010-11-27 László Németh <nemeth at OOo>:
|
||||
* hyphen.c: add NOHYPHEN feature
|
||||
to handle special hyphenation at hyphens and apostrophes,
|
||||
see README.compound
|
||||
|
||||
* tests/{rhmin, hyphen}: new test files
|
||||
|
||||
* ooopatch.sed. hyph_en_US.dic: replace and improve old hack
|
||||
with NOHYPHEN
|
||||
|
||||
2010-07-18 Caolán McNamara <cmc at OOo>:
|
||||
* remove csutil.* as more trouble that its
|
||||
worth for just the testsuite
|
||||
|
|
|
@ -257,6 +257,7 @@ libdir = @libdir@
|
|||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
lt_ECHO = @lt_ECHO@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
|
|
8
NEWS
8
NEWS
|
@ -1,3 +1,11 @@
|
|||
2010-11-27 Hyphen 2.7 release:
|
||||
- The new hyphenation problem of OpenOffice.org 3.2, related to its
|
||||
modified word breaking of words with hyphen characters, can be fixed
|
||||
with the new NOHYPHEN feature. Also it's possible to solve the similar old
|
||||
problem with apostrophes. More information: README.compound.
|
||||
|
||||
- improved English dictionaries
|
||||
|
||||
2010-08-10 Hyphen 2.6 release:
|
||||
- maintainance release, fix all warnings, tidy up
|
||||
make check with VALGRIND=memcheck, etc.
|
||||
|
|
|
@ -1,3 +1,23 @@
|
|||
New option of Libhyphen 2.7: NOHYPHEN
|
||||
|
||||
Hyphen, apostrophe and other characters may be word boundary characters,
|
||||
but they don't need (extra) hyphenation. With NOHYPHEN option
|
||||
it's possible to hyphenate the words parts correctly.
|
||||
|
||||
Example:
|
||||
|
||||
ISO8859-1
|
||||
NOHYPHEN -,'
|
||||
1-1
|
||||
1'1
|
||||
NEXTLEVEL
|
||||
|
||||
Description:
|
||||
|
||||
1-1 and 1'1 declare hyphen and apostrophe as word boundary characters
|
||||
and NOHYPHEN with the comma separated character (or character sequence)
|
||||
list forbid the (extra) hyphens at the hyphen and apostrophe characters.
|
||||
|
||||
Compound word hyphenation
|
||||
|
||||
Hyphen library supports better compound word hyphenation and special
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,5 @@
|
|||
AC_INIT([hyphen],[2.6],[nemeth@openoffice.org])
|
||||
AM_INIT_AUTOMAKE(hyphen,2.6)
|
||||
AC_INIT([hyphen],[2.7],[nemeth@openoffice.org])
|
||||
AM_INIT_AUTOMAKE(hyphen,2.7)
|
||||
AC_PROG_CC
|
||||
AC_PROG_INSTALL
|
||||
AC_PROG_LIBTOOL
|
||||
|
|
116
hyph_en_US.dic
116
hyph_en_US.dic
|
@ -3,119 +3,11 @@ LEFTHYPHENMIN 2
|
|||
RIGHTHYPHENMIN 3
|
||||
COMPOUNDLEFTHYPHENMIN 2
|
||||
COMPOUNDRIGHTHYPHENMIN 3
|
||||
1'.
|
||||
1's./'=s,1,2
|
||||
1't./'=t,1,2
|
||||
1’.
|
||||
1’s./’=s,1,2
|
||||
1’t./’=t,1,2
|
||||
NOHYPHEN -,',’
|
||||
1-1
|
||||
1'1
|
||||
1’1
|
||||
NEXTLEVEL
|
||||
4'4
|
||||
4a4'4
|
||||
4b4'4
|
||||
4c4'4
|
||||
4d4'4
|
||||
4e4'4
|
||||
4f4'4
|
||||
4g4'4
|
||||
4h4'4
|
||||
4i4'4
|
||||
4j4'4
|
||||
4k4'4
|
||||
4l4'4
|
||||
4m4'4
|
||||
4n4'4
|
||||
4o4'4
|
||||
4p4'4
|
||||
4q4'4
|
||||
4r4'4
|
||||
4s4'4
|
||||
4t4'4
|
||||
4u4'4
|
||||
4v4'4
|
||||
4w4'4
|
||||
4x4'4
|
||||
4y4'4
|
||||
4z4'4
|
||||
'a4
|
||||
'b4
|
||||
'c4
|
||||
'd4
|
||||
'e4
|
||||
'f4
|
||||
'g4
|
||||
'h4
|
||||
'i4
|
||||
'j4
|
||||
'k4
|
||||
'l4
|
||||
'm4
|
||||
'n4
|
||||
'o4
|
||||
'p4
|
||||
'q4
|
||||
'r4
|
||||
's4
|
||||
't4
|
||||
'u4
|
||||
'v4
|
||||
'w4
|
||||
'x4
|
||||
'y4
|
||||
'z4
|
||||
4’4
|
||||
4a4’4
|
||||
4b4’4
|
||||
4c4’4
|
||||
4d4’4
|
||||
4e4’4
|
||||
4f4’4
|
||||
4g4’4
|
||||
4h4’4
|
||||
4i4’4
|
||||
4j4’4
|
||||
4k4’4
|
||||
4l4’4
|
||||
4m4’4
|
||||
4n4’4
|
||||
4o4’4
|
||||
4p4’4
|
||||
4q4’4
|
||||
4r4’4
|
||||
4s4’4
|
||||
4t4’4
|
||||
4u4’4
|
||||
4v4’4
|
||||
4w4’4
|
||||
4x4’4
|
||||
4y4’4
|
||||
4z4’4
|
||||
’a4
|
||||
’b4
|
||||
’c4
|
||||
’d4
|
||||
’e4
|
||||
’f4
|
||||
’g4
|
||||
’h4
|
||||
’i4
|
||||
’j4
|
||||
’k4
|
||||
’l4
|
||||
’m4
|
||||
’n4
|
||||
’o4
|
||||
’p4
|
||||
’q4
|
||||
’r4
|
||||
’s4
|
||||
’t4
|
||||
’u4
|
||||
’v4
|
||||
’w4
|
||||
’x4
|
||||
’y4
|
||||
’z4
|
||||
.a2ch4
|
||||
.ad4der
|
||||
.a2d
|
||||
|
|
51
hyphen.c
51
hyphen.c
|
@ -285,6 +285,8 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
|
|||
dict[k]->rhmin = 0;
|
||||
dict[k]->clhmin = 0;
|
||||
dict[k]->crhmin = 0;
|
||||
dict[k]->nohyphen = NULL;
|
||||
dict[k]->nohyphenl = 0;
|
||||
|
||||
/* read in character set info */
|
||||
if (k == 0) {
|
||||
|
@ -321,6 +323,21 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
|
|||
} else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
|
||||
dict[k]->crhmin = atoi(buf + 22);
|
||||
continue;
|
||||
} else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
|
||||
char * space = buf + 8;
|
||||
while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
|
||||
if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
|
||||
if (dict[k]->nohyphen) {
|
||||
char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
|
||||
*nhe = 0;
|
||||
for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
|
||||
if (*nhe == ',') {
|
||||
dict[k]->nohyphenl++;
|
||||
*nhe = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
j = 0;
|
||||
pattern[j] = '0';
|
||||
|
@ -483,6 +500,8 @@ void hnj_hyphen_free (HyphenDict *dict)
|
|||
}
|
||||
if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel);
|
||||
|
||||
if (dict->nohyphen) hnj_free(dict->nohyphen);
|
||||
|
||||
hnj_free (dict->states);
|
||||
|
||||
hnj_free (dict);
|
||||
|
@ -1050,6 +1069,22 @@ int hnj_hyphen_hyphenate2 (HyphenDict *dict,
|
|||
hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2));
|
||||
hnj_hyphen_rhmin(dict->utf8, word, word_size,
|
||||
hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2));
|
||||
|
||||
/* nohyphen */
|
||||
if (dict->nohyphen) {
|
||||
char * nh = dict->nohyphen;
|
||||
int nhi;
|
||||
for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
|
||||
char * nhy = (char *) strstr(word, nh);
|
||||
while (nhy) {
|
||||
hyphens[nhy - word + strlen(nh) - 1] = 0;
|
||||
hyphens[nhy - word - 1] = 0;
|
||||
nhy = (char *) strstr(nhy + 1, nh);
|
||||
}
|
||||
nh = nh + strlen(nh) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
|
||||
if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
|
||||
return 0;
|
||||
|
@ -1070,6 +1105,22 @@ int hnj_hyphen_hyphenate3 (HyphenDict *dict,
|
|||
hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
|
||||
rep, pos, cut, (rhmin > 0 ? rhmin : 2));
|
||||
if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
|
||||
|
||||
/* nohyphen */
|
||||
if (dict->nohyphen) {
|
||||
char * nh = dict->nohyphen;
|
||||
int nhi;
|
||||
for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
|
||||
char * nhy = (char *) strstr(word, nh);
|
||||
while (nhy) {
|
||||
hyphens[nhy - word + strlen(nh) - 1] = 0;
|
||||
hyphens[nhy - word - 1] = 0;
|
||||
nhy = (char *) strstr(nhy + 1, nh);
|
||||
}
|
||||
nh = nh + strlen(nh) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
|
||||
return 0;
|
||||
}
|
||||
|
|
3
hyphen.h
3
hyphen.h
|
@ -67,6 +67,9 @@ struct _HyphenDict {
|
|||
char rhmin; /* righthyphenmin: min. hyph. distance from the right side */
|
||||
char clhmin; /* min. hyph. distance from the left compound boundary */
|
||||
char crhmin; /* min. hyph. distance from the right compound boundary */
|
||||
char * nohyphen; /* comma separated list of characters or character
|
||||
sequences with forbidden hyphenation */
|
||||
int nohyphenl; /* count of elements in nohyphen */
|
||||
/* system variables */
|
||||
int num_states;
|
||||
char cset[MAX_NAME];
|
||||
|
|
118
ooopatch.sed
118
ooopatch.sed
|
@ -2,116 +2,8 @@
|
|||
s/\(RIGHTHYPHENMIN.*\)/\1\
|
||||
COMPOUNDLEFTHYPHENMIN 2\
|
||||
COMPOUNDRIGHTHYPHENMIN 3\
|
||||
1'.\
|
||||
1's.\/'=s,1,2\
|
||||
1't.\/'=t,1,2\
|
||||
1’.\
|
||||
1’s.\/’=s,1,2\
|
||||
1’t.\/’=t,1,2\
|
||||
NEXTLEVEL\
|
||||
4'4\
|
||||
4a4'4\
|
||||
4b4'4\
|
||||
4c4'4\
|
||||
4d4'4\
|
||||
4e4'4\
|
||||
4f4'4\
|
||||
4g4'4\
|
||||
4h4'4\
|
||||
4i4'4\
|
||||
4j4'4\
|
||||
4k4'4\
|
||||
4l4'4\
|
||||
4m4'4\
|
||||
4n4'4\
|
||||
4o4'4\
|
||||
4p4'4\
|
||||
4q4'4\
|
||||
4r4'4\
|
||||
4s4'4\
|
||||
4t4'4\
|
||||
4u4'4\
|
||||
4v4'4\
|
||||
4w4'4\
|
||||
4x4'4\
|
||||
4y4'4\
|
||||
4z4'4\
|
||||
'a4\
|
||||
'b4\
|
||||
'c4\
|
||||
'd4\
|
||||
'e4\
|
||||
'f4\
|
||||
'g4\
|
||||
'h4\
|
||||
'i4\
|
||||
'j4\
|
||||
'k4\
|
||||
'l4\
|
||||
'm4\
|
||||
'n4\
|
||||
'o4\
|
||||
'p4\
|
||||
'q4\
|
||||
'r4\
|
||||
's4\
|
||||
't4\
|
||||
'u4\
|
||||
'v4\
|
||||
'w4\
|
||||
'x4\
|
||||
'y4\
|
||||
'z4\
|
||||
4’4\
|
||||
4a4’4\
|
||||
4b4’4\
|
||||
4c4’4\
|
||||
4d4’4\
|
||||
4e4’4\
|
||||
4f4’4\
|
||||
4g4’4\
|
||||
4h4’4\
|
||||
4i4’4\
|
||||
4j4’4\
|
||||
4k4’4\
|
||||
4l4’4\
|
||||
4m4’4\
|
||||
4n4’4\
|
||||
4o4’4\
|
||||
4p4’4\
|
||||
4q4’4\
|
||||
4r4’4\
|
||||
4s4’4\
|
||||
4t4’4\
|
||||
4u4’4\
|
||||
4v4’4\
|
||||
4w4’4\
|
||||
4x4’4\
|
||||
4y4’4\
|
||||
4z4’4\
|
||||
’a4\
|
||||
’b4\
|
||||
’c4\
|
||||
’d4\
|
||||
’e4\
|
||||
’f4\
|
||||
’g4\
|
||||
’h4\
|
||||
’i4\
|
||||
’j4\
|
||||
’k4\
|
||||
’l4\
|
||||
’m4\
|
||||
’n4\
|
||||
’o4\
|
||||
’p4\
|
||||
’q4\
|
||||
’r4\
|
||||
’s4\
|
||||
’t4\
|
||||
’u4\
|
||||
’v4\
|
||||
’w4\
|
||||
’x4\
|
||||
’y4\
|
||||
’z4/
|
||||
NOHYPHEN -,',’\
|
||||
1-1\
|
||||
1'1\
|
||||
1’1\
|
||||
NEXTLEVEL/
|
||||
|
|
|
@ -19,7 +19,9 @@ settings2.test \
|
|||
settings3.test \
|
||||
settings4.test \
|
||||
lhmin.test \
|
||||
lig.test
|
||||
rhmin.test \
|
||||
lig.test \
|
||||
hyphen.test
|
||||
|
||||
distclean-local:
|
||||
-rm -rf testSubDir
|
||||
|
@ -107,7 +109,15 @@ lhmin.hyph \
|
|||
lhmin.pat \
|
||||
lhmin.word \
|
||||
lhmin.test \
|
||||
rhmin.hyph \
|
||||
rhmin.pat \
|
||||
rhmin.word \
|
||||
rhmin.test \
|
||||
lig.hyph \
|
||||
lig.pat \
|
||||
lig.test \
|
||||
lig.word
|
||||
lig.word \
|
||||
hyphen.hyph \
|
||||
hyphen.pat \
|
||||
hyphen.test \
|
||||
hyphen.word
|
||||
|
|
|
@ -178,7 +178,9 @@ settings2.test \
|
|||
settings3.test \
|
||||
settings4.test \
|
||||
lhmin.test \
|
||||
lig.test
|
||||
rhmin.test \
|
||||
lig.test \
|
||||
hyphen.test
|
||||
|
||||
EXTRA_DIST = \
|
||||
test.sh \
|
||||
|
@ -263,10 +265,18 @@ lhmin.hyph \
|
|||
lhmin.pat \
|
||||
lhmin.word \
|
||||
lhmin.test \
|
||||
rhmin.hyph \
|
||||
rhmin.pat \
|
||||
rhmin.word \
|
||||
rhmin.test \
|
||||
lig.hyph \
|
||||
lig.pat \
|
||||
lig.test \
|
||||
lig.word
|
||||
lig.word \
|
||||
hyphen.hyph \
|
||||
hyphen.pat \
|
||||
hyphen.test \
|
||||
hyphen.word
|
||||
|
||||
all: all-am
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
foobar'foobar-foobar’foobar
|
|
@ -0,0 +1,6 @@
|
|||
UTF-8
|
||||
NOHYPHEN -,',’
|
||||
1-1
|
||||
1'1
|
||||
1’1
|
||||
NEXTLEVEL
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
DIR="`dirname $0`"
|
||||
NAME="`basename $0 .test`"
|
||||
$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph
|
|
@ -0,0 +1 @@
|
|||
foobar'foobar-foobar’foobar
|
|
@ -0,0 +1 @@
|
|||
övéit
|
|
@ -0,0 +1,4 @@
|
|||
UTF-8
|
||||
RIGHTHYPHENMIN 3
|
||||
% test patterns for righthyphenmin fix for UTF-8 patterns
|
||||
övé1it
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
DIR="`dirname $0`"
|
||||
NAME="`basename $0 .test`"
|
||||
$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph
|
|
@ -0,0 +1 @@
|
|||
övéit
|
Loading…
Reference in New Issue