sync 2.8.3 into CVS

This commit is contained in:
Caolán McNamara 2012-06-29 07:10:58 +00:00
parent 058b24a9c3
commit 39bf406090
23 changed files with 3607 additions and 6243 deletions

View File

@ -6,12 +6,11 @@ and http://packages.debian.org/unstable/source/libhnj
This subset of Libhnj was extended by
Peter Novodvorsky <nidd at alt-linux dot org> (OOo integration),
László Németh <nemeth at OOo> (non-standard and compound hyphenation
with Unicode support),
László Németh <nemeth at numbertext dot org> (non-standard and compound
hyphenation with Unicode support),
Nanning Buitenhuis <nanning at elvenkind dot com> (substrings.c)
Write bug reports to László Németh or to the lingu-dev mailing list of
OpenOffice.org (after subscription: http://lingucomponent.openoffice.org)
Write bug reports to László Németh or in the bug tracker of hunspell.sf.net.
---
Please contact Raph Levien for information about licensing for

View File

@ -1,3 +1,36 @@
2011-10-10 László Németh <nemeth at numbertext dot org>:
* hyphen.c:
- fix NOHYPHEN (hyphenation vector
modified by 0 instead of '0')
- fix non-balanced hyphenation of LibreOffice/OOo 3.3 by
1-1/=,1,1 alternative pattern (removing hyphen)
- set default COMPOUNDHYPHENMIN=3 at hyphen and apostrophes
- fix VERBOSE to support multilevel patterns
* example.c: new option: -n to print hyphenation vector
2011-10-07 László Németh <nemeth at numbertext dot org>:
* hyphen.c: use explicite compoundhyphenmin values of the
dictionary also for the implicite NOHYPHEN hyphenation
2011-10-06 László Németh <nemeth at numbertext dot org>:
* hyphen.c: force minimal lefthyphenmin and righthyphenmin values
of the dictionary
2011-10-04 László Németh <nemeth at numbertext dot org>:
* hyphen.c: ignore leading and ending numbers (eg. fix
hyphenation of words with manual or field-based
indexes in LibreOffice/OpenOffice.org)
* tests/num.*: new test files
* hyphen.c: default NOHYPHEN separators: hyphen and
ASCII apostrophe + (only in UTF-8 encoding) endash (U+2013)
and typographical apostrophe (U+2019): fix hyphenation of
words with hyphens, n-dashes or apostrophes in LibreOffice/OOo.
* README.compound: description
* hyph_en_US.dic: remove explicite NOHYPHEN declaration
2010-12-01 László Németh <nemeth at OOo>:
* hyphen.c: NOHYPHEN fix for a special case (NOHYPHEN
character in starting position, eg. "-example")

View File

@ -2,7 +2,7 @@ SUBDIRS= doc tests
lib_LTLIBRARIES = libhyphen.la
libhyphen_la_SOURCES = hnjalloc.c hyphen.c
libhyphen_la_LDFLAGS = -version-info 2:0:2
libhyphen_la_LDFLAGS = -version-info 2:1:2
include_HEADERS = hyphen.h
noinst_HEADERS = hnjalloc.h
@ -21,7 +21,7 @@ hyph_DATA = hyph_en_US.dic
EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \
README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \
COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \
ligpatch.txt ooopatch.sed
ligpatch.txt
hyphen.us3:
cp -f $(srcdir)/hyphen.tex hyphen.us
@ -31,8 +31,7 @@ hyphen.us3:
cat $(srcdir)/ligpatch.txt >>hyphen.us3
hyph_en_US.dic: hyphen.us3
perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
clean-local:
rm -rf hyphen.us* hyph_en_US.dic

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am.
# Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
# Foundation, Inc.
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@ -75,12 +75,6 @@ am__nobase_list = $(am__nobase_strip_setup); \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
"$(DESTDIR)$(hyphdir)" "$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
@ -136,11 +130,9 @@ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
if test -d "$(distdir)"; then \
find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -rf "$(distdir)" \
|| { sleep 5 && rm -rf "$(distdir)"; }; \
else :; fi
{ test ! -d "$(distdir)" \
|| { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -fr "$(distdir)"; }; }
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
@ -169,8 +161,6 @@ am__relativize = \
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
distuninstallcheck_listfiles = find . -type f -print
am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
| sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
@ -187,7 +177,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
@ -211,7 +200,6 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
@ -237,7 +225,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
@ -270,6 +257,7 @@ libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
@ -288,7 +276,7 @@ top_srcdir = @top_srcdir@
SUBDIRS = doc tests
lib_LTLIBRARIES = libhyphen.la
libhyphen_la_SOURCES = hnjalloc.c hyphen.c
libhyphen_la_LDFLAGS = -version-info 2:0:2
libhyphen_la_LDFLAGS = -version-info 2:1:2
include_HEADERS = hyphen.h
noinst_HEADERS = hnjalloc.h
substrings_SOURCES = substrings.c
@ -302,13 +290,13 @@ hyph_DATA = hyph_en_US.dic
EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \
README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \
COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \
ligpatch.txt ooopatch.sed
ligpatch.txt
all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
am--refresh: Makefile
am--refresh:
@:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
@ -373,7 +361,7 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
libhyphen.la: $(libhyphen_la_OBJECTS) $(libhyphen_la_DEPENDENCIES) $(EXTRA_libhyphen_la_DEPENDENCIES)
libhyphen.la: $(libhyphen_la_OBJECTS) $(libhyphen_la_DEPENDENCIES)
$(libhyphen_la_LINK) -rpath $(libdir) $(libhyphen_la_OBJECTS) $(libhyphen_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS:
@ -384,10 +372,10 @@ clean-noinstPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES) $(EXTRA_example_DEPENDENCIES)
example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES)
@rm -f example$(EXEEXT)
$(LINK) $(example_OBJECTS) $(example_LDADD) $(LIBS)
substrings$(EXEEXT): $(substrings_OBJECTS) $(substrings_DEPENDENCIES) $(EXTRA_substrings_DEPENDENCIES)
substrings$(EXEEXT): $(substrings_OBJECTS) $(substrings_DEPENDENCIES)
@rm -f substrings$(EXEEXT)
$(LINK) $(substrings_OBJECTS) $(substrings_LDADD) $(LIBS)
install-binSCRIPTS: $(bin_SCRIPTS)
@ -421,7 +409,9 @@ uninstall-binSCRIPTS:
@list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
files=`for p in $$list; do echo "$$p"; done | \
sed -e 's,.*/,,;$(transform)'`; \
dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir)
test -n "$$list" || exit 0; \
echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(bindir)" && rm -f $$files
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@ -480,7 +470,9 @@ uninstall-hyphDATA:
@$(NORMAL_UNINSTALL)
@list='$(hyph_DATA)'; test -n "$(hyphdir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(hyphdir)'; $(am__uninstall_files_from_dir)
test -n "$$files" || exit 0; \
echo " ( cd '$(DESTDIR)$(hyphdir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(hyphdir)" && rm -f $$files
install-includeHEADERS: $(include_HEADERS)
@$(NORMAL_INSTALL)
test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
@ -498,7 +490,9 @@ uninstall-includeHEADERS:
@$(NORMAL_UNINSTALL)
@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
test -n "$$files" || exit 0; \
echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(includedir)" && rm -f $$files
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
@ -707,11 +701,7 @@ dist-gzip: distdir
$(am__remove_distdir)
dist-bzip2: distdir
tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
$(am__remove_distdir)
dist-lzip: distdir
tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
$(am__remove_distdir)
dist-lzma: distdir
@ -719,7 +709,7 @@ dist-lzma: distdir
$(am__remove_distdir)
dist-xz: distdir
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
$(am__remove_distdir)
dist-tarZ: distdir
@ -750,8 +740,6 @@ distcheck: dist
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lzma*) \
lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
*.tar.lz*) \
lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \
@ -771,7 +759,6 @@ distcheck: dist
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
@ -800,16 +787,8 @@ distcheck: dist
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
@test -n '$(distuninstallcheck_dir)' || { \
echo 'ERROR: trying to run $@ with an empty' \
'$$(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
$(am__cd) '$(distuninstallcheck_dir)' || { \
echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
@$(am__cd) '$(distuninstallcheck_dir)' \
&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
@ -844,15 +823,10 @@ install-am: all-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
@ -946,8 +920,8 @@ uninstall-am: uninstall-binSCRIPTS uninstall-hyphDATA \
all all-am am--refresh check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool clean-local \
clean-noinstPROGRAMS ctags ctags-recursive dist dist-all \
dist-bzip2 dist-gzip dist-lzip dist-lzma dist-shar dist-tarZ \
dist-xz dist-zip distcheck distclean distclean-compile \
dist-bzip2 dist-gzip dist-lzma dist-shar dist-tarZ dist-xz \
dist-zip distcheck distclean distclean-compile \
distclean-generic distclean-libtool distclean-tags \
distcleancheck distdir distuninstallcheck dvi dvi-am html \
html-am info info-am install install-am install-binSCRIPTS \
@ -972,8 +946,7 @@ hyphen.us3:
cat $(srcdir)/ligpatch.txt >>hyphen.us3
hyph_en_US.dic: hyphen.us3
perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
clean-local:
rm -rf hyphen.us* hyph_en_US.dic

26
NEWS
View File

@ -1,4 +1,28 @@
2010-12-01 Hyphen 2,7,1 bug fix release
2011-10-10 Hyphen 2.8.3:
- fix NOHYPHEN
- fix unbalanced hyphenation of LibreOffice/OOo
- set default COMPOUNDHYPHENMIN=3 at hyphens and apostrophes
- fix VERBOSE in hyphen.c
- new ./example option: -n to print hyphenation vector
2011-10-07 Hyphen 2.8.2:
- fix for explicite COMPOUNDHYPHENMIN values
2011-10-06 Hyphen 2.8.1:
- force minimal lefthyphenmin and righthyphenmin values of the dictionary
(eg. righthyphenmin=3 of English dictionaries in LibreOffice/OOo,
also the original TeX hyphenation patterns are correct only with this
righthyphenmin value).
2011-10-04 Hyphen 2.8:
- Ignore leading and ending numbers (eg. manual/field based indexes
in LibreOffice/OOo)
- Fix LibreOffice/OpenOffice.org hyphenation errors at apostrophes and
hyphens, n-dashes with default NOHYPHEN separators.
Eg. *o'c=lock -> o'clock.
2010-12-01 Hyphen 2.7.1 bug fix release
2010-11-27 Hyphen 2.7 release:
- The new hyphenation problem of OpenOffice.org 3.2, related to its

4
README
View File

@ -2,7 +2,7 @@ Hyphen - hyphenation library to use converted TeX hyphenation patterns
(C) 1998 Raph Levien
(C) 2001 ALTLinux, Moscow
(C) 2006, 2007, 2008, 2010 László Németh
(C) 2006, 2007, 2008, 2010, 2011 László Németh
This was part of libHnj library by Raph Levien.
@ -124,4 +124,4 @@ is released in binary form as jar files and in source form as zip files.
See http://sourceforge.net/project/showfiles.php?group_id=119136
László Németh
<nemeth (at) openoffice (dot) org>
<nemeth (at) numbertext (dot) org>

View File

@ -18,6 +18,16 @@ Description:
and NOHYPHEN with the comma separated character (or character sequence)
list forbid the (extra) hyphens at the hyphen and apostrophe characters.
Implicite NOHYPHEN declaration
Without explicite NEXTLEVEL declaration, Hyphen 2.8 uses the
previous settings, plus in UTF-8 encoding, endash (U+2013) and
typographical apostrophe (U+2019) are NOHYPHEN characters, too.
It's possible to enlarge the hyphenation distance from these
NOHYPHEN characters by using COMPOUNDLEFTHYPHENMIN and
COMPOUNDRIGHTHYPHENMIN attributes.
Compound word hyphenation
Hyphen library supports better compound word hyphenation and special

2183
aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

2634
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
AC_INIT([hyphen],[2.7.1],[nemeth@openoffice.org])
AM_INIT_AUTOMAKE(hyphen,2.7.1)
AC_INIT([hyphen],[2.8.3],[nemeth@numbertext.org])
AM_INIT_AUTOMAKE(hyphen,2.8.3)
AC_PROG_CC
AC_PROG_INSTALL
AC_PROG_LIBTOOL

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am.
# Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
# Foundation, Inc.
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@ -60,7 +60,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
@ -84,7 +83,6 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
@ -110,7 +108,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
@ -143,6 +140,7 @@ libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
@ -249,15 +247,10 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:

View File

@ -12,6 +12,7 @@ void help() {
fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
fprintf(stderr,"-n = print hyphenation vector\n");
}
/* get a pointer to the nth 8-bit or UTF-8 character of the word */
@ -69,6 +70,7 @@ main(int argc, char** argv)
char hword[BUFSIZE * 2];
int arg = 1;
int optd = 1;
int optn = 0;
int optdd = 0;
char ** rep;
int * pos;
@ -82,6 +84,10 @@ main(int argc, char** argv)
optd = 0;
arg++;
}
if (strcmp(argv[arg], "-n") == 0) {
optn = 1;
arg++;
}
if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
optd = 1;
optdd = 1;
@ -152,6 +158,8 @@ main(int argc, char** argv)
exit(1);
}
if (optn) fprintf(stderr, "%s\n", hyphens);
if (!optd) {
/* now backfill hyphens[] for any removed periods */
for (c = n; c < k; c++) hyphens[c] = '0';
@ -176,7 +184,6 @@ main(int argc, char** argv)
fflush(stdout);
free(hyphword);
} else {
/* fprintf(stderr, "vasz: %s", hyphens); */
fprintf(stdout,"%s\n", hword);

View File

@ -1,13 +1,6 @@
UTF-8
LEFTHYPHENMIN 2
RIGHTHYPHENMIN 3
COMPOUNDLEFTHYPHENMIN 2
COMPOUNDRIGHTHYPHENMIN 3
NOHYPHEN -,',
1-1
1'1
11
NEXTLEVEL
.a2ch4
.ad4der
.a2d

257
hyphen.c
View File

@ -226,118 +226,61 @@ hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
}
#ifdef VERBOSE
HashTab *global;
HashTab *global[1];
static char *
get_state_str (int state)
get_state_str (int state, int level)
{
int i;
HashEntry *e;
for (i = 0; i < HASH_SIZE; i++)
for (e = global->entries[i]; e; e = e->next)
for (e = global[level]->entries[i]; e; e = e->next)
if (e->val == state)
return e->key;
return NULL;
}
#endif
HyphenDict *
hnj_hyphen_load (const char *fn)
{
HyphenDict *dict[2];
HashTab *hashtab;
FILE *f;
char buf[MAX_CHARS];
void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
int i, j;
char word[MAX_CHARS];
char pattern[MAX_CHARS];
char * repl;
signed char replindex;
signed char replcut;
int state_num = 0, last_state;
int i, j, k;
int state_num = 0;
int last_state;
char ch;
int found;
HashEntry *e;
int nextlevel = 0;
f = fopen (fn, "r");
if (f == NULL)
return NULL;
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
hashtab = hnj_hash_new ();
#ifdef VERBOSE
global = hashtab;
#endif
hnj_hash_insert (hashtab, "", 0);
dict[k] = hnj_malloc (sizeof(HyphenDict));
dict[k]->num_states = 1;
dict[k]->states = hnj_malloc (sizeof(HyphenState));
dict[k]->states[0].match = NULL;
dict[k]->states[0].repl = NULL;
dict[k]->states[0].fallback_state = -1;
dict[k]->states[0].num_trans = 0;
dict[k]->states[0].trans = NULL;
dict[k]->nextlevel = NULL;
dict[k]->lhmin = 0;
dict[k]->rhmin = 0;
dict[k]->clhmin = 0;
dict[k]->crhmin = 0;
dict[k]->nohyphen = NULL;
dict[k]->nohyphenl = 0;
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
} else {
dict[k]->cset[0] = 0;
}
dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
} else {
strcpy(dict[k]->cset, dict[0]->cset);
dict[k]->utf8 = dict[0]->utf8;
}
while (fgets (buf, sizeof(buf), f) != NULL)
{
if (buf[0] != '%')
{
if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
nextlevel = 1;
break;
} else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
dict[k]->lhmin = atoi(buf + 13);
continue;
if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
dict->lhmin = atoi(buf + 13);
return;
} else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
dict[k]->rhmin = atoi(buf + 14);
continue;
dict->rhmin = atoi(buf + 14);
return;
} else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
dict[k]->clhmin = atoi(buf + 21);
continue;
dict->clhmin = atoi(buf + 21);
return;
} else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
dict[k]->crhmin = atoi(buf + 22);
continue;
dict->crhmin = atoi(buf + 22);
return;
} else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
char * space = buf + 8;
while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
if (dict[k]->nohyphen) {
char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
if (dict->nohyphen) {
char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
*nhe = 0;
for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
if (*nhe == ',') {
dict[k]->nohyphenl++;
dict->nohyphenl++;
*nhe = 0;
}
}
}
continue;
return;
}
j = 0;
pattern[j] = '0';
@ -382,7 +325,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
} else {
if (*word == '.') i++;
/* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
if (dict[k]->utf8) {
if (dict->utf8) {
int pu = -1; /* unicode character position */
int ps = -1; /* unicode start position (original replindex) */
int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
@ -406,14 +349,14 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);
#endif
found = hnj_hash_lookup (hashtab, word);
state_num = hnj_get_state (dict[k], hashtab, word);
dict[k]->states[state_num].match = hnj_strdup (pattern + i);
dict[k]->states[state_num].repl = repl;
dict[k]->states[state_num].replindex = replindex;
state_num = hnj_get_state (dict, hashtab, word);
dict->states[state_num].match = hnj_strdup (pattern + i);
dict->states[state_num].repl = repl;
dict->states[state_num].replindex = replindex;
if (!replcut) {
dict[k]->states[state_num].replcut = (signed char) strlen(word);
dict->states[state_num].replcut = (signed char) strlen(word);
} else {
dict[k]->states[state_num].replcut = replcut;
dict->states[state_num].replcut = replcut;
}
/* now, put in the prefix transitions */
@ -423,10 +366,84 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
ch = word[j - 1];
word[j - 1] = '\0';
found = hnj_hash_lookup (hashtab, word);
state_num = hnj_get_state (dict[k], hashtab, word);
hnj_add_trans (dict[k], state_num, last_state, ch);
state_num = hnj_get_state (dict, hashtab, word);
hnj_add_trans (dict, state_num, last_state, ch);
}
}
HyphenDict *
hnj_hyphen_load (const char *fn)
{
HyphenDict *dict[2];
HashTab *hashtab;
FILE *f;
char buf[MAX_CHARS];
int nextlevel = 0;
int i, j, k;
HashEntry *e;
int state_num = 0;
f = fopen (fn, "r");
if (f == NULL)
return NULL;
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
for (k = 0; k < 2; k++) {
hashtab = hnj_hash_new ();
#ifdef VERBOSE
global[k] = hashtab;
#endif
hnj_hash_insert (hashtab, "", 0);
dict[k] = hnj_malloc (sizeof(HyphenDict));
dict[k]->num_states = 1;
dict[k]->states = hnj_malloc (sizeof(HyphenState));
dict[k]->states[0].match = NULL;
dict[k]->states[0].repl = NULL;
dict[k]->states[0].fallback_state = -1;
dict[k]->states[0].num_trans = 0;
dict[k]->states[0].trans = NULL;
dict[k]->nextlevel = NULL;
dict[k]->lhmin = 0;
dict[k]->rhmin = 0;
dict[k]->clhmin = 0;
dict[k]->crhmin = 0;
dict[k]->nohyphen = NULL;
dict[k]->nohyphenl = 0;
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
} else {
dict[k]->cset[0] = 0;
}
dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
} else {
strcpy(dict[k]->cset, dict[0]->cset);
dict[k]->utf8 = dict[0]->utf8;
}
if (k == 0 || nextlevel) {
while (fgets (buf, sizeof(buf), f) != NULL) {
if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
nextlevel = 1;
break;
} else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
}
} else if (k == 1) {
/* default first level: hyphen and ASCII apostrophe */
if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
if (dict[0]->utf8) {
hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
}
}
/* Could do unioning of matches here (instead of the preprocessor script).
@ -479,7 +496,20 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
state_num = 0;
}
fclose(f);
if (k == 2) dict[0]->nextlevel = dict[1];
if (nextlevel) dict[0]->nextlevel = dict[1];
else {
dict[1] -> nextlevel = dict[0];
dict[1]->lhmin = dict[0]->lhmin;
dict[1]->rhmin = dict[0]->rhmin;
dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
#ifdef VERBOSE
HashTab *r = global[0];
global[0] = global[1];
global[1] = r;
#endif
return dict[1];
}
return dict[0];
}
@ -530,8 +560,13 @@ int hnj_hyphen_hyphenate (HyphenDict *dict,
j = 0;
prep_word[j++] = '.';
for (i = 0; i < word_size; i++)
for (i = 0; i < word_size; i++) {
if (word[i] <= '9' && word[i] >= '0') {
prep_word[j++] = '.';
} else {
prep_word[j++] = word[i];
}
}
prep_word[j++] = '.';
prep_word[j] = '\0';
@ -560,7 +595,7 @@ int hnj_hyphen_hyphenate (HyphenDict *dict,
#ifdef VERBOSE
char *state_str;
state_str = get_state_str (state);
state_str = get_state_str (state, 0);
for (k = 0; k < i - strlen (state_str); k++)
putchar (' ');
@ -673,6 +708,9 @@ int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
i += hnj_ligature(word[2]);
}
// ignore numbers
for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
// check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) {
@ -699,9 +737,13 @@ int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
char *** rep, int ** pos, int ** cut, int rhmin)
{
int i;
int j = word_size - 2;
for (i = 1; i < rhmin && j > 0; j--) {
int i = 1;
int j;
// ignore numbers
for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
for (j = word_size - 2; i < rhmin && j > 0; j--) {
// check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) {
char * rh = strchr((*rep)[j], '=');
@ -759,8 +801,15 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
j = 0;
prep_word[j++] = '.';
for (i = 0; i < word_size; i++)
for (i = 0; i < word_size; i++) {
if (word[i] <= '9' && word[i] >= '0') {
prep_word[j++] = '.';
} else {
prep_word[j++] = word[i];
}
}
prep_word[j++] = '.';
prep_word[j] = '\0';
@ -789,7 +838,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
#ifdef VERBOSE
char *state_str;
state_str = get_state_str (state);
state_str = get_state_str (state, 1);
for (k = 0; k < i - strlen (state_str); k++)
putchar (' ');
@ -1036,6 +1085,9 @@ int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
}
}
hyphens[j + 1] = '\0';
#ifdef VERBOSE
printf ("nums: %s\n", hyphens);
#endif
return 0;
}
@ -1077,8 +1129,8 @@ int hnj_hyphen_hyphenate2 (HyphenDict *dict,
for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
char * nhy = (char *) strstr(word, nh);
while (nhy) {
hyphens[nhy - word + strlen(nh) - 1] = 0;
if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0;
hyphens[nhy - word + strlen(nh) - 1] = '0';
if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0';
nhy = (char *) strstr(nhy + 1, nh);
}
nh = nh + strlen(nh) + 1;
@ -1087,6 +1139,9 @@ int hnj_hyphen_hyphenate2 (HyphenDict *dict,
if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
#ifdef VERBOSE
printf ("nums: %s\n", hyphens);
#endif
return 0;
}
@ -1096,8 +1151,10 @@ int hnj_hyphen_hyphenate3 (HyphenDict *dict,
char *hyphword, char *** rep, int ** pos, int ** cut,
int lhmin, int rhmin, int clhmin, int crhmin)
{
lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
clhmin, crhmin, 1, 1);
hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,

3852
ltmain.sh

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,8 @@ settings4.test \
lhmin.test \
rhmin.test \
lig.test \
hyphen.test
hyphen.test \
num.test
distclean-local:
-rm -rf testSubDir
@ -120,4 +121,8 @@ lig.word \
hyphen.hyph \
hyphen.pat \
hyphen.test \
hyphen.word
hyphen.word \
num.hyph \
num.pat \
num.test \
num.word

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am.
# Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
# Foundation, Inc.
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@ -62,7 +62,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
@ -86,7 +85,6 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
@ -112,7 +110,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
@ -145,6 +142,7 @@ libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
@ -183,7 +181,8 @@ settings4.test \
lhmin.test \
rhmin.test \
lig.test \
hyphen.test
hyphen.test \
num.test
EXTRA_DIST = \
test.sh \
@ -279,7 +278,11 @@ lig.word \
hyphen.hyph \
hyphen.pat \
hyphen.test \
hyphen.word
hyphen.word \
num.hyph \
num.pat \
num.test \
num.word
all: all-am
@ -408,15 +411,14 @@ check-TESTS: $(TESTS)
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
col="$$grn"; \
echo "$$grn$$dashes"; \
else \
col="$$red"; \
echo "$$red$$dashes"; \
fi; \
echo "$${col}$$dashes$${std}"; \
echo "$${col}$$banner$${std}"; \
test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
test -z "$$report" || echo "$${col}$$report$${std}"; \
echo "$${col}$$dashes$${std}"; \
echo "$$banner"; \
test -z "$$skipped" || echo "$$skipped"; \
test -z "$$report" || echo "$$report"; \
echo "$$dashes$$std"; \
test "$$failed" -eq 0; \
else :; fi
@ -465,15 +467,10 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:

View File

@ -2839,7 +2839,7 @@ oc=tag=o=nal
octets
oddly
odi=ous
o'd=wyer
o'dwyer
of=fended
of=fer
of=fi=cer

9
tests/num.hyph Normal file
View File

@ -0,0 +1,9 @@
foobar
foobar=foobar
barfoo=barfoo
123foobar=foobar
foobar=foobar123
123foobar=foobar123
123barfoo=barfoo
barfoo=barfoo123
123barfoo=barfoo123

6
tests/num.pat Normal file
View File

@ -0,0 +1,6 @@
UTF-8
LEFTHYPHENMIN 4
RIGHTHYPHENMIN 4
foo1
NEXTLEVEL
bar1

4
tests/num.test Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph

9
tests/num.word Normal file
View File

@ -0,0 +1,9 @@
foobar
foobarfoobar
barfoobarfoo
123foobarfoobar
foobarfoobar123
123foobarfoobar123
123barfoobarfoo
barfoobarfoo123
123barfoobarfoo123

View File

@ -1,4 +1,5 @@
UTF-8
NEXTLEVEL
l·1l/l=l,1,3
e1ë/e=e,1,2
a1atje./a=t,1,3