sync 2.8.3 into CVS

This commit is contained in:
Caolán McNamara 2012-06-29 07:10:58 +00:00
parent 058b24a9c3
commit 39bf406090
23 changed files with 3607 additions and 6243 deletions

View File

@ -6,12 +6,11 @@ and http://packages.debian.org/unstable/source/libhnj
This subset of Libhnj was extended by This subset of Libhnj was extended by
Peter Novodvorsky <nidd at alt-linux dot org> (OOo integration), Peter Novodvorsky <nidd at alt-linux dot org> (OOo integration),
László Németh <nemeth at OOo> (non-standard and compound hyphenation László Németh <nemeth at numbertext dot org> (non-standard and compound
with Unicode support), hyphenation with Unicode support),
Nanning Buitenhuis <nanning at elvenkind dot com> (substrings.c) Nanning Buitenhuis <nanning at elvenkind dot com> (substrings.c)
Write bug reports to László Németh or to the lingu-dev mailing list of Write bug reports to László Németh or in the bug tracker of hunspell.sf.net.
OpenOffice.org (after subscription: http://lingucomponent.openoffice.org)
--- ---
Please contact Raph Levien for information about licensing for Please contact Raph Levien for information about licensing for

View File

@ -1,16 +1,49 @@
2011-10-10 László Németh <nemeth at numbertext dot org>:
* hyphen.c:
- fix NOHYPHEN (hyphenation vector
modified by 0 instead of '0')
- fix non-balanced hyphenation of LibreOffice/OOo 3.3 by
1-1/=,1,1 alternative pattern (removing hyphen)
- set default COMPOUNDHYPHENMIN=3 at hyphen and apostrophes
- fix VERBOSE to support multilevel patterns
* example.c: new option: -n to print hyphenation vector
2011-10-07 László Németh <nemeth at numbertext dot org>:
* hyphen.c: use explicite compoundhyphenmin values of the
dictionary also for the implicite NOHYPHEN hyphenation
2011-10-06 László Németh <nemeth at numbertext dot org>:
* hyphen.c: force minimal lefthyphenmin and righthyphenmin values
of the dictionary
2011-10-04 László Németh <nemeth at numbertext dot org>:
* hyphen.c: ignore leading and ending numbers (eg. fix
hyphenation of words with manual or field-based
indexes in LibreOffice/OpenOffice.org)
* tests/num.*: new test files
* hyphen.c: default NOHYPHEN separators: hyphen and
ASCII apostrophe + (only in UTF-8 encoding) endash (U+2013)
and typographical apostrophe (U+2019): fix hyphenation of
words with hyphens, n-dashes or apostrophes in LibreOffice/OOo.
* README.compound: description
* hyph_en_US.dic: remove explicite NOHYPHEN declaration
2010-12-01 László Németh <nemeth at OOo>: 2010-12-01 László Németh <nemeth at OOo>:
* hyphen.c: NOHYPHEN fix for a special case (NOHYPHEN * hyphen.c: NOHYPHEN fix for a special case (NOHYPHEN
character in starting position, eg. "-example") character in starting position, eg. "-example")
2010-11-27 László Németh <nemeth at OOo>: 2010-11-27 László Németh <nemeth at OOo>:
* hyphen.c: add NOHYPHEN feature * hyphen.c: add NOHYPHEN feature
to handle special hyphenation at hyphens and apostrophes, to handle special hyphenation at hyphens and apostrophes,
see README.compound see README.compound
* tests/{rhmin, hyphen}: new test files * tests/{rhmin, hyphen}: new test files
* ooopatch.sed. hyph_en_US.dic: replace and improve old hack * ooopatch.sed. hyph_en_US.dic: replace and improve old hack
with NOHYPHEN with NOHYPHEN
2010-07-18 Caolán McNamara <cmc at OOo>: 2010-07-18 Caolán McNamara <cmc at OOo>:
* remove csutil.* as more trouble that its * remove csutil.* as more trouble that its

View File

@ -2,7 +2,7 @@ SUBDIRS= doc tests
lib_LTLIBRARIES = libhyphen.la lib_LTLIBRARIES = libhyphen.la
libhyphen_la_SOURCES = hnjalloc.c hyphen.c libhyphen_la_SOURCES = hnjalloc.c hyphen.c
libhyphen_la_LDFLAGS = -version-info 2:0:2 libhyphen_la_LDFLAGS = -version-info 2:1:2
include_HEADERS = hyphen.h include_HEADERS = hyphen.h
noinst_HEADERS = hnjalloc.h noinst_HEADERS = hnjalloc.h
@ -21,7 +21,7 @@ hyph_DATA = hyph_en_US.dic
EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \ EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \
README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \ README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \
COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \ COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \
ligpatch.txt ooopatch.sed ligpatch.txt
hyphen.us3: hyphen.us3:
cp -f $(srcdir)/hyphen.tex hyphen.us cp -f $(srcdir)/hyphen.tex hyphen.us
@ -31,8 +31,7 @@ hyphen.us3:
cat $(srcdir)/ligpatch.txt >>hyphen.us3 cat $(srcdir)/ligpatch.txt >>hyphen.us3
hyph_en_US.dic: hyphen.us3 hyph_en_US.dic: hyphen.us3
perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
clean-local: clean-local:
rm -rf hyphen.us* hyph_en_US.dic rm -rf hyphen.us* hyph_en_US.dic

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am. # Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Foundation, Inc. # Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved. # with or without modifications, as long as this notice is preserved.
@ -75,12 +75,6 @@ am__nobase_list = $(am__nobase_strip_setup); \
am__base_list = \ am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
"$(DESTDIR)$(hyphdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(hyphdir)" "$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES) LTLIBRARIES = $(lib_LTLIBRARIES)
@ -136,11 +130,9 @@ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION) distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir) top_distdir = $(distdir)
am__remove_distdir = \ am__remove_distdir = \
if test -d "$(distdir)"; then \ { test ! -d "$(distdir)" \
find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -rf "$(distdir)" \ && rm -fr "$(distdir)"; }; }
|| { sleep 5 && rm -rf "$(distdir)"; }; \
else :; fi
am__relativize = \ am__relativize = \
dir0=`pwd`; \ dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
@ -169,8 +161,6 @@ am__relativize = \
DIST_ARCHIVES = $(distdir).tar.gz DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best GZIP_ENV = --best
distuninstallcheck_listfiles = find . -type f -print distuninstallcheck_listfiles = find . -type f -print
am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
| sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@ ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@ AMTAR = @AMTAR@
@ -187,7 +177,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@ CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@ DEFS = @DEFS@
DEPDIR = @DEPDIR@ DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@ DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@ DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@ ECHO_C = @ECHO_C@
@ -211,7 +200,6 @@ LIPO = @LIPO@
LN_S = @LN_S@ LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@ MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@ MKDIR_P = @MKDIR_P@
NM = @NM@ NM = @NM@
NMEDIT = @NMEDIT@ NMEDIT = @NMEDIT@
@ -237,7 +225,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@ abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@ abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@ abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@ ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@ am__include = @am__include@
@ -270,6 +257,7 @@ libdir = @libdir@
libexecdir = @libexecdir@ libexecdir = @libexecdir@
localedir = @localedir@ localedir = @localedir@
localstatedir = @localstatedir@ localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@ mandir = @mandir@
mkdir_p = @mkdir_p@ mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@ oldincludedir = @oldincludedir@
@ -288,7 +276,7 @@ top_srcdir = @top_srcdir@
SUBDIRS = doc tests SUBDIRS = doc tests
lib_LTLIBRARIES = libhyphen.la lib_LTLIBRARIES = libhyphen.la
libhyphen_la_SOURCES = hnjalloc.c hyphen.c libhyphen_la_SOURCES = hnjalloc.c hyphen.c
libhyphen_la_LDFLAGS = -version-info 2:0:2 libhyphen_la_LDFLAGS = -version-info 2:1:2
include_HEADERS = hyphen.h include_HEADERS = hyphen.h
noinst_HEADERS = hnjalloc.h noinst_HEADERS = hnjalloc.h
substrings_SOURCES = substrings.c substrings_SOURCES = substrings.c
@ -302,13 +290,13 @@ hyph_DATA = hyph_en_US.dic
EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \ EXTRA_DIST = checkme.lst hyphen.tex tbhyphext.tex tbhyphext.sh \
README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \ README.compound README.hyphen README.nonstandard substrings.pl hyphen.patch \
COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \ COPYING.MPL COPYING.LGPL hyph_en_US.dic README_hyph_en_US.txt lig.awk \
ligpatch.txt ooopatch.sed ligpatch.txt
all: all-recursive all: all-recursive
.SUFFIXES: .SUFFIXES:
.SUFFIXES: .c .lo .o .obj .SUFFIXES: .c .lo .o .obj
am--refresh: Makefile am--refresh:
@: @:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \ @for dep in $?; do \
@ -373,7 +361,7 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \ echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \ rm -f "$${dir}/so_locations"; \
done done
libhyphen.la: $(libhyphen_la_OBJECTS) $(libhyphen_la_DEPENDENCIES) $(EXTRA_libhyphen_la_DEPENDENCIES) libhyphen.la: $(libhyphen_la_OBJECTS) $(libhyphen_la_DEPENDENCIES)
$(libhyphen_la_LINK) -rpath $(libdir) $(libhyphen_la_OBJECTS) $(libhyphen_la_LIBADD) $(LIBS) $(libhyphen_la_LINK) -rpath $(libdir) $(libhyphen_la_OBJECTS) $(libhyphen_la_LIBADD) $(LIBS)
clean-noinstPROGRAMS: clean-noinstPROGRAMS:
@ -384,10 +372,10 @@ clean-noinstPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \ echo " rm -f" $$list; \
rm -f $$list rm -f $$list
example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES) $(EXTRA_example_DEPENDENCIES) example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES)
@rm -f example$(EXEEXT) @rm -f example$(EXEEXT)
$(LINK) $(example_OBJECTS) $(example_LDADD) $(LIBS) $(LINK) $(example_OBJECTS) $(example_LDADD) $(LIBS)
substrings$(EXEEXT): $(substrings_OBJECTS) $(substrings_DEPENDENCIES) $(EXTRA_substrings_DEPENDENCIES) substrings$(EXEEXT): $(substrings_OBJECTS) $(substrings_DEPENDENCIES)
@rm -f substrings$(EXEEXT) @rm -f substrings$(EXEEXT)
$(LINK) $(substrings_OBJECTS) $(substrings_LDADD) $(LIBS) $(LINK) $(substrings_OBJECTS) $(substrings_LDADD) $(LIBS)
install-binSCRIPTS: $(bin_SCRIPTS) install-binSCRIPTS: $(bin_SCRIPTS)
@ -421,7 +409,9 @@ uninstall-binSCRIPTS:
@list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
files=`for p in $$list; do echo "$$p"; done | \ files=`for p in $$list; do echo "$$p"; done | \
sed -e 's,.*/,,;$(transform)'`; \ sed -e 's,.*/,,;$(transform)'`; \
dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) test -n "$$list" || exit 0; \
echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(bindir)" && rm -f $$files
mostlyclean-compile: mostlyclean-compile:
-rm -f *.$(OBJEXT) -rm -f *.$(OBJEXT)
@ -480,7 +470,9 @@ uninstall-hyphDATA:
@$(NORMAL_UNINSTALL) @$(NORMAL_UNINSTALL)
@list='$(hyph_DATA)'; test -n "$(hyphdir)" || list=; \ @list='$(hyph_DATA)'; test -n "$(hyphdir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(hyphdir)'; $(am__uninstall_files_from_dir) test -n "$$files" || exit 0; \
echo " ( cd '$(DESTDIR)$(hyphdir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(hyphdir)" && rm -f $$files
install-includeHEADERS: $(include_HEADERS) install-includeHEADERS: $(include_HEADERS)
@$(NORMAL_INSTALL) @$(NORMAL_INSTALL)
test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
@ -498,7 +490,9 @@ uninstall-includeHEADERS:
@$(NORMAL_UNINSTALL) @$(NORMAL_UNINSTALL)
@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) test -n "$$files" || exit 0; \
echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
cd "$(DESTDIR)$(includedir)" && rm -f $$files
# This directory's subdirectories are mostly independent; you can cd # This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile. # into them and run `make' without going through this Makefile.
@ -707,11 +701,7 @@ dist-gzip: distdir
$(am__remove_distdir) $(am__remove_distdir)
dist-bzip2: distdir dist-bzip2: distdir
tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
$(am__remove_distdir)
dist-lzip: distdir
tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
$(am__remove_distdir) $(am__remove_distdir)
dist-lzma: distdir dist-lzma: distdir
@ -719,7 +709,7 @@ dist-lzma: distdir
$(am__remove_distdir) $(am__remove_distdir)
dist-xz: distdir dist-xz: distdir
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
$(am__remove_distdir) $(am__remove_distdir)
dist-tarZ: distdir dist-tarZ: distdir
@ -750,8 +740,6 @@ distcheck: dist
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lzma*) \ *.tar.lzma*) \
lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\ lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
*.tar.lz*) \
lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \ *.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\ xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \ *.tar.Z*) \
@ -771,7 +759,6 @@ distcheck: dist
&& am__cwd=`pwd` \ && am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \ && $(am__cd) $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \ && ../configure --srcdir=.. --prefix="$$dc_install_base" \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) dvi \
@ -800,16 +787,8 @@ distcheck: dist
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck: distuninstallcheck:
@test -n '$(distuninstallcheck_dir)' || { \ @$(am__cd) '$(distuninstallcheck_dir)' \
echo 'ERROR: trying to run $@ with an empty' \ && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
'$$(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
$(am__cd) '$(distuninstallcheck_dir)' || { \
echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left after uninstall:" ; \ || { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \ if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \ echo " (check DESTDIR support)"; \
@ -844,15 +823,10 @@ install-am: all-am
installcheck: installcheck-recursive installcheck: installcheck-recursive
install-strip: install-strip:
if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \
install; \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic: mostlyclean-generic:
clean-generic: clean-generic:
@ -946,8 +920,8 @@ uninstall-am: uninstall-binSCRIPTS uninstall-hyphDATA \
all all-am am--refresh check check-am clean clean-generic \ all all-am am--refresh check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool clean-local \ clean-libLTLIBRARIES clean-libtool clean-local \
clean-noinstPROGRAMS ctags ctags-recursive dist dist-all \ clean-noinstPROGRAMS ctags ctags-recursive dist dist-all \
dist-bzip2 dist-gzip dist-lzip dist-lzma dist-shar dist-tarZ \ dist-bzip2 dist-gzip dist-lzma dist-shar dist-tarZ dist-xz \
dist-xz dist-zip distcheck distclean distclean-compile \ dist-zip distcheck distclean distclean-compile \
distclean-generic distclean-libtool distclean-tags \ distclean-generic distclean-libtool distclean-tags \
distcleancheck distdir distuninstallcheck dvi dvi-am html \ distcleancheck distdir distuninstallcheck dvi dvi-am html \
html-am info info-am install install-am install-binSCRIPTS \ html-am info info-am install install-am install-binSCRIPTS \
@ -972,8 +946,7 @@ hyphen.us3:
cat $(srcdir)/ligpatch.txt >>hyphen.us3 cat $(srcdir)/ligpatch.txt >>hyphen.us3
hyph_en_US.dic: hyphen.us3 hyph_en_US.dic: hyphen.us3
perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
clean-local: clean-local:
rm -rf hyphen.us* hyph_en_US.dic rm -rf hyphen.us* hyph_en_US.dic

26
NEWS
View File

@ -1,4 +1,28 @@
2010-12-01 Hyphen 2,7,1 bug fix release 2011-10-10 Hyphen 2.8.3:
- fix NOHYPHEN
- fix unbalanced hyphenation of LibreOffice/OOo
- set default COMPOUNDHYPHENMIN=3 at hyphens and apostrophes
- fix VERBOSE in hyphen.c
- new ./example option: -n to print hyphenation vector
2011-10-07 Hyphen 2.8.2:
- fix for explicite COMPOUNDHYPHENMIN values
2011-10-06 Hyphen 2.8.1:
- force minimal lefthyphenmin and righthyphenmin values of the dictionary
(eg. righthyphenmin=3 of English dictionaries in LibreOffice/OOo,
also the original TeX hyphenation patterns are correct only with this
righthyphenmin value).
2011-10-04 Hyphen 2.8:
- Ignore leading and ending numbers (eg. manual/field based indexes
in LibreOffice/OOo)
- Fix LibreOffice/OpenOffice.org hyphenation errors at apostrophes and
hyphens, n-dashes with default NOHYPHEN separators.
Eg. *o'c=lock -> o'clock.
2010-12-01 Hyphen 2.7.1 bug fix release
2010-11-27 Hyphen 2.7 release: 2010-11-27 Hyphen 2.7 release:
- The new hyphenation problem of OpenOffice.org 3.2, related to its - The new hyphenation problem of OpenOffice.org 3.2, related to its

4
README
View File

@ -2,7 +2,7 @@ Hyphen - hyphenation library to use converted TeX hyphenation patterns
(C) 1998 Raph Levien (C) 1998 Raph Levien
(C) 2001 ALTLinux, Moscow (C) 2001 ALTLinux, Moscow
(C) 2006, 2007, 2008, 2010 László Németh (C) 2006, 2007, 2008, 2010, 2011 László Németh
This was part of libHnj library by Raph Levien. This was part of libHnj library by Raph Levien.
@ -124,4 +124,4 @@ is released in binary form as jar files and in source form as zip files.
See http://sourceforge.net/project/showfiles.php?group_id=119136 See http://sourceforge.net/project/showfiles.php?group_id=119136
László Németh László Németh
<nemeth (at) openoffice (dot) org> <nemeth (at) numbertext (dot) org>

View File

@ -18,6 +18,16 @@ Description:
and NOHYPHEN with the comma separated character (or character sequence) and NOHYPHEN with the comma separated character (or character sequence)
list forbid the (extra) hyphens at the hyphen and apostrophe characters. list forbid the (extra) hyphens at the hyphen and apostrophe characters.
Implicite NOHYPHEN declaration
Without explicite NEXTLEVEL declaration, Hyphen 2.8 uses the
previous settings, plus in UTF-8 encoding, endash (U+2013) and
typographical apostrophe (U+2019) are NOHYPHEN characters, too.
It's possible to enlarge the hyphenation distance from these
NOHYPHEN characters by using COMPOUNDLEFTHYPHENMIN and
COMPOUNDRIGHTHYPHENMIN attributes.
Compound word hyphenation Compound word hyphenation
Hyphen library supports better compound word hyphenation and special Hyphen library supports better compound word hyphenation and special

2381
aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

2826
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
AC_INIT([hyphen],[2.7.1],[nemeth@openoffice.org]) AC_INIT([hyphen],[2.8.3],[nemeth@numbertext.org])
AM_INIT_AUTOMAKE(hyphen,2.7.1) AM_INIT_AUTOMAKE(hyphen,2.8.3)
AC_PROG_CC AC_PROG_CC
AC_PROG_INSTALL AC_PROG_INSTALL
AC_PROG_LIBTOOL AC_PROG_LIBTOOL

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am. # Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Foundation, Inc. # Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved. # with or without modifications, as long as this notice is preserved.
@ -60,7 +60,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@ CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@ DEFS = @DEFS@
DEPDIR = @DEPDIR@ DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@ DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@ DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@ ECHO_C = @ECHO_C@
@ -84,7 +83,6 @@ LIPO = @LIPO@
LN_S = @LN_S@ LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@ MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@ MKDIR_P = @MKDIR_P@
NM = @NM@ NM = @NM@
NMEDIT = @NMEDIT@ NMEDIT = @NMEDIT@
@ -110,7 +108,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@ abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@ abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@ abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@ ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@ am__include = @am__include@
@ -143,6 +140,7 @@ libdir = @libdir@
libexecdir = @libexecdir@ libexecdir = @libexecdir@
localedir = @localedir@ localedir = @localedir@
localstatedir = @localstatedir@ localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@ mandir = @mandir@
mkdir_p = @mkdir_p@ mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@ oldincludedir = @oldincludedir@
@ -249,15 +247,10 @@ install-am: all-am
installcheck: installcheck-am installcheck: installcheck-am
install-strip: install-strip:
if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \
install; \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic: mostlyclean-generic:
clean-generic: clean-generic:

View File

@ -12,6 +12,7 @@ void help() {
fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n"); fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n"); fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n"); fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
fprintf(stderr,"-n = print hyphenation vector\n");
} }
/* get a pointer to the nth 8-bit or UTF-8 character of the word */ /* get a pointer to the nth 8-bit or UTF-8 character of the word */
@ -69,6 +70,7 @@ main(int argc, char** argv)
char hword[BUFSIZE * 2]; char hword[BUFSIZE * 2];
int arg = 1; int arg = 1;
int optd = 1; int optd = 1;
int optn = 0;
int optdd = 0; int optdd = 0;
char ** rep; char ** rep;
int * pos; int * pos;
@ -82,6 +84,10 @@ main(int argc, char** argv)
optd = 0; optd = 0;
arg++; arg++;
} }
if (strcmp(argv[arg], "-n") == 0) {
optn = 1;
arg++;
}
if (argv[arg] && strcmp(argv[arg], "-d") == 0) { if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
optd = 1; optd = 1;
optdd = 1; optdd = 1;
@ -152,6 +158,8 @@ main(int argc, char** argv)
exit(1); exit(1);
} }
if (optn) fprintf(stderr, "%s\n", hyphens);
if (!optd) { if (!optd) {
/* now backfill hyphens[] for any removed periods */ /* now backfill hyphens[] for any removed periods */
for (c = n; c < k; c++) hyphens[c] = '0'; for (c = n; c < k; c++) hyphens[c] = '0';
@ -176,7 +184,6 @@ main(int argc, char** argv)
fflush(stdout); fflush(stdout);
free(hyphword); free(hyphword);
} else { } else {
/* fprintf(stderr, "vasz: %s", hyphens); */
fprintf(stdout,"%s\n", hword); fprintf(stdout,"%s\n", hword);

View File

@ -1,13 +1,6 @@
UTF-8 UTF-8
LEFTHYPHENMIN 2 LEFTHYPHENMIN 2
RIGHTHYPHENMIN 3 RIGHTHYPHENMIN 3
COMPOUNDLEFTHYPHENMIN 2
COMPOUNDRIGHTHYPHENMIN 3
NOHYPHEN -,',
1-1
1'1
11
NEXTLEVEL
.a2ch4 .a2ch4
.ad4der .ad4der
.a2d .a2d

259
hyphen.c
View File

@ -226,118 +226,61 @@ hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
} }
#ifdef VERBOSE #ifdef VERBOSE
HashTab *global; HashTab *global[1];
static char * static char *
get_state_str (int state) get_state_str (int state, int level)
{ {
int i; int i;
HashEntry *e; HashEntry *e;
for (i = 0; i < HASH_SIZE; i++) for (i = 0; i < HASH_SIZE; i++)
for (e = global->entries[i]; e; e = e->next) for (e = global[level]->entries[i]; e; e = e->next)
if (e->val == state) if (e->val == state)
return e->key; return e->key;
return NULL; return NULL;
} }
#endif #endif
HyphenDict * void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
hnj_hyphen_load (const char *fn) int i, j;
{
HyphenDict *dict[2];
HashTab *hashtab;
FILE *f;
char buf[MAX_CHARS];
char word[MAX_CHARS]; char word[MAX_CHARS];
char pattern[MAX_CHARS]; char pattern[MAX_CHARS];
char * repl; char * repl;
signed char replindex; signed char replindex;
signed char replcut; signed char replcut;
int state_num = 0, last_state; int state_num = 0;
int i, j, k; int last_state;
char ch; char ch;
int found; int found;
HashEntry *e;
int nextlevel = 0;
f = fopen (fn, "r"); if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
if (f == NULL) dict->lhmin = atoi(buf + 13);
return NULL; return;
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
hashtab = hnj_hash_new ();
#ifdef VERBOSE
global = hashtab;
#endif
hnj_hash_insert (hashtab, "", 0);
dict[k] = hnj_malloc (sizeof(HyphenDict));
dict[k]->num_states = 1;
dict[k]->states = hnj_malloc (sizeof(HyphenState));
dict[k]->states[0].match = NULL;
dict[k]->states[0].repl = NULL;
dict[k]->states[0].fallback_state = -1;
dict[k]->states[0].num_trans = 0;
dict[k]->states[0].trans = NULL;
dict[k]->nextlevel = NULL;
dict[k]->lhmin = 0;
dict[k]->rhmin = 0;
dict[k]->clhmin = 0;
dict[k]->crhmin = 0;
dict[k]->nohyphen = NULL;
dict[k]->nohyphenl = 0;
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
} else {
dict[k]->cset[0] = 0;
}
dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
} else {
strcpy(dict[k]->cset, dict[0]->cset);
dict[k]->utf8 = dict[0]->utf8;
}
while (fgets (buf, sizeof(buf), f) != NULL)
{
if (buf[0] != '%')
{
if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
nextlevel = 1;
break;
} else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
dict[k]->lhmin = atoi(buf + 13);
continue;
} else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
dict[k]->rhmin = atoi(buf + 14); dict->rhmin = atoi(buf + 14);
continue; return;
} else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
dict[k]->clhmin = atoi(buf + 21); dict->clhmin = atoi(buf + 21);
continue; return;
} else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
dict[k]->crhmin = atoi(buf + 22); dict->crhmin = atoi(buf + 22);
continue; return;
} else if (strncmp(buf, "NOHYPHEN", 8) == 0) { } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
char * space = buf + 8; char * space = buf + 8;
while (*space != '\0' && (*space == ' ' || *space == '\t')) space++; while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space); if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
if (dict[k]->nohyphen) { if (dict->nohyphen) {
char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1; char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
*nhe = 0; *nhe = 0;
for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) { for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
if (*nhe == ',') { if (*nhe == ',') {
dict[k]->nohyphenl++; dict->nohyphenl++;
*nhe = 0; *nhe = 0;
} }
} }
} }
continue; return;
} }
j = 0; j = 0;
pattern[j] = '0'; pattern[j] = '0';
@ -382,7 +325,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
} else { } else {
if (*word == '.') i++; if (*word == '.') i++;
/* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
if (dict[k]->utf8) { if (dict->utf8) {
int pu = -1; /* unicode character position */ int pu = -1; /* unicode character position */
int ps = -1; /* unicode start position (original replindex) */ int ps = -1; /* unicode start position (original replindex) */
int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
@ -406,14 +349,14 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);
#endif #endif
found = hnj_hash_lookup (hashtab, word); found = hnj_hash_lookup (hashtab, word);
state_num = hnj_get_state (dict[k], hashtab, word); state_num = hnj_get_state (dict, hashtab, word);
dict[k]->states[state_num].match = hnj_strdup (pattern + i); dict->states[state_num].match = hnj_strdup (pattern + i);
dict[k]->states[state_num].repl = repl; dict->states[state_num].repl = repl;
dict[k]->states[state_num].replindex = replindex; dict->states[state_num].replindex = replindex;
if (!replcut) { if (!replcut) {
dict[k]->states[state_num].replcut = (signed char) strlen(word); dict->states[state_num].replcut = (signed char) strlen(word);
} else { } else {
dict[k]->states[state_num].replcut = replcut; dict->states[state_num].replcut = replcut;
} }
/* now, put in the prefix transitions */ /* now, put in the prefix transitions */
@ -423,11 +366,85 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
ch = word[j - 1]; ch = word[j - 1];
word[j - 1] = '\0'; word[j - 1] = '\0';
found = hnj_hash_lookup (hashtab, word); found = hnj_hash_lookup (hashtab, word);
state_num = hnj_get_state (dict[k], hashtab, word); state_num = hnj_get_state (dict, hashtab, word);
hnj_add_trans (dict[k], state_num, last_state, ch); hnj_add_trans (dict, state_num, last_state, ch);
} }
} }
HyphenDict *
hnj_hyphen_load (const char *fn)
{
HyphenDict *dict[2];
HashTab *hashtab;
FILE *f;
char buf[MAX_CHARS];
int nextlevel = 0;
int i, j, k;
HashEntry *e;
int state_num = 0;
f = fopen (fn, "r");
if (f == NULL)
return NULL;
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
for (k = 0; k < 2; k++) {
hashtab = hnj_hash_new ();
#ifdef VERBOSE
global[k] = hashtab;
#endif
hnj_hash_insert (hashtab, "", 0);
dict[k] = hnj_malloc (sizeof(HyphenDict));
dict[k]->num_states = 1;
dict[k]->states = hnj_malloc (sizeof(HyphenState));
dict[k]->states[0].match = NULL;
dict[k]->states[0].repl = NULL;
dict[k]->states[0].fallback_state = -1;
dict[k]->states[0].num_trans = 0;
dict[k]->states[0].trans = NULL;
dict[k]->nextlevel = NULL;
dict[k]->lhmin = 0;
dict[k]->rhmin = 0;
dict[k]->clhmin = 0;
dict[k]->crhmin = 0;
dict[k]->nohyphen = NULL;
dict[k]->nohyphenl = 0;
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
} else {
dict[k]->cset[0] = 0;
} }
dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
} else {
strcpy(dict[k]->cset, dict[0]->cset);
dict[k]->utf8 = dict[0]->utf8;
}
if (k == 0 || nextlevel) {
while (fgets (buf, sizeof(buf), f) != NULL) {
if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
nextlevel = 1;
break;
} else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
}
} else if (k == 1) {
/* default first level: hyphen and ASCII apostrophe */
if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
if (dict[0]->utf8) {
hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
}
}
/* Could do unioning of matches here (instead of the preprocessor script). /* Could do unioning of matches here (instead of the preprocessor script).
If we did, the pseudocode would look something like this: If we did, the pseudocode would look something like this:
@ -479,7 +496,20 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
state_num = 0; state_num = 0;
} }
fclose(f); fclose(f);
if (k == 2) dict[0]->nextlevel = dict[1]; if (nextlevel) dict[0]->nextlevel = dict[1];
else {
dict[1] -> nextlevel = dict[0];
dict[1]->lhmin = dict[0]->lhmin;
dict[1]->rhmin = dict[0]->rhmin;
dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
#ifdef VERBOSE
HashTab *r = global[0];
global[0] = global[1];
global[1] = r;
#endif
return dict[1];
}
return dict[0]; return dict[0];
} }
@ -530,8 +560,13 @@ int hnj_hyphen_hyphenate (HyphenDict *dict,
j = 0; j = 0;
prep_word[j++] = '.'; prep_word[j++] = '.';
for (i = 0; i < word_size; i++) for (i = 0; i < word_size; i++) {
if (word[i] <= '9' && word[i] >= '0') {
prep_word[j++] = '.';
} else {
prep_word[j++] = word[i]; prep_word[j++] = word[i];
}
}
prep_word[j++] = '.'; prep_word[j++] = '.';
prep_word[j] = '\0'; prep_word[j] = '\0';
@ -560,7 +595,7 @@ int hnj_hyphen_hyphenate (HyphenDict *dict,
#ifdef VERBOSE #ifdef VERBOSE
char *state_str; char *state_str;
state_str = get_state_str (state); state_str = get_state_str (state, 0);
for (k = 0; k < i - strlen (state_str); k++) for (k = 0; k < i - strlen (state_str); k++)
putchar (' '); putchar (' ');
@ -673,6 +708,9 @@ int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
i += hnj_ligature(word[2]); i += hnj_ligature(word[2]);
} }
// ignore numbers
for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
for (j = 0; i < lhmin && word[j] != '\0'; i++) do { for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
// check length of the non-standard part // check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) { if (*rep && *pos && *cut && (*rep)[j]) {
@ -699,9 +737,13 @@ int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
char *** rep, int ** pos, int ** cut, int rhmin) char *** rep, int ** pos, int ** cut, int rhmin)
{ {
int i; int i = 1;
int j = word_size - 2; int j;
for (i = 1; i < rhmin && j > 0; j--) {
// ignore numbers
for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
for (j = word_size - 2; i < rhmin && j > 0; j--) {
// check length of the non-standard part // check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) { if (*rep && *pos && *cut && (*rep)[j]) {
char * rh = strchr((*rep)[j], '='); char * rh = strchr((*rep)[j], '=');
@ -759,8 +801,15 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
j = 0; j = 0;
prep_word[j++] = '.'; prep_word[j++] = '.';
for (i = 0; i < word_size; i++) for (i = 0; i < word_size; i++) {
if (word[i] <= '9' && word[i] >= '0') {
prep_word[j++] = '.';
} else {
prep_word[j++] = word[i]; prep_word[j++] = word[i];
}
}
prep_word[j++] = '.'; prep_word[j++] = '.';
prep_word[j] = '\0'; prep_word[j] = '\0';
@ -789,7 +838,7 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
#ifdef VERBOSE #ifdef VERBOSE
char *state_str; char *state_str;
state_str = get_state_str (state); state_str = get_state_str (state, 1);
for (k = 0; k < i - strlen (state_str); k++) for (k = 0; k < i - strlen (state_str); k++)
putchar (' '); putchar (' ');
@ -1036,6 +1085,9 @@ int hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
} }
} }
hyphens[j + 1] = '\0'; hyphens[j + 1] = '\0';
#ifdef VERBOSE
printf ("nums: %s\n", hyphens);
#endif
return 0; return 0;
} }
@ -1077,8 +1129,8 @@ int hnj_hyphen_hyphenate2 (HyphenDict *dict,
for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
char * nhy = (char *) strstr(word, nh); char * nhy = (char *) strstr(word, nh);
while (nhy) { while (nhy) {
hyphens[nhy - word + strlen(nh) - 1] = 0; hyphens[nhy - word + strlen(nh) - 1] = '0';
if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0; if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0';
nhy = (char *) strstr(nhy + 1, nh); nhy = (char *) strstr(nhy + 1, nh);
} }
nh = nh + strlen(nh) + 1; nh = nh + strlen(nh) + 1;
@ -1087,6 +1139,9 @@ int hnj_hyphen_hyphenate2 (HyphenDict *dict,
if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
#ifdef VERBOSE
printf ("nums: %s\n", hyphens);
#endif
return 0; return 0;
} }
@ -1096,8 +1151,10 @@ int hnj_hyphen_hyphenate3 (HyphenDict *dict,
char *hyphword, char *** rep, int ** pos, int ** cut, char *hyphword, char *** rep, int ** pos, int ** cut,
int lhmin, int rhmin, int clhmin, int crhmin) int lhmin, int rhmin, int clhmin, int crhmin)
{ {
lhmin = (lhmin > 0 ? lhmin : dict->lhmin); lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
rhmin = (rhmin > 0 ? rhmin : dict->rhmin); rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
clhmin, crhmin, 1, 1); clhmin, crhmin, 1, 1);
hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,

4060
ltmain.sh

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,8 @@ settings4.test \
lhmin.test \ lhmin.test \
rhmin.test \ rhmin.test \
lig.test \ lig.test \
hyphen.test hyphen.test \
num.test
distclean-local: distclean-local:
-rm -rf testSubDir -rm -rf testSubDir
@ -120,4 +121,8 @@ lig.word \
hyphen.hyph \ hyphen.hyph \
hyphen.pat \ hyphen.pat \
hyphen.test \ hyphen.test \
hyphen.word hyphen.word \
num.hyph \
num.pat \
num.test \
num.word

View File

@ -1,9 +1,9 @@
# Makefile.in generated by automake 1.11.3 from Makefile.am. # Makefile.in generated by automake 1.11.1 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
# Foundation, Inc. # Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved. # with or without modifications, as long as this notice is preserved.
@ -62,7 +62,6 @@ CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@ CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@ DEFS = @DEFS@
DEPDIR = @DEPDIR@ DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@ DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@ DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@ ECHO_C = @ECHO_C@
@ -86,7 +85,6 @@ LIPO = @LIPO@
LN_S = @LN_S@ LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@ MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@ MKDIR_P = @MKDIR_P@
NM = @NM@ NM = @NM@
NMEDIT = @NMEDIT@ NMEDIT = @NMEDIT@
@ -112,7 +110,6 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@ abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@ abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@ abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@ ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@ am__include = @am__include@
@ -145,6 +142,7 @@ libdir = @libdir@
libexecdir = @libexecdir@ libexecdir = @libexecdir@
localedir = @localedir@ localedir = @localedir@
localstatedir = @localstatedir@ localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@ mandir = @mandir@
mkdir_p = @mkdir_p@ mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@ oldincludedir = @oldincludedir@
@ -183,7 +181,8 @@ settings4.test \
lhmin.test \ lhmin.test \
rhmin.test \ rhmin.test \
lig.test \ lig.test \
hyphen.test hyphen.test \
num.test
EXTRA_DIST = \ EXTRA_DIST = \
test.sh \ test.sh \
@ -279,7 +278,11 @@ lig.word \
hyphen.hyph \ hyphen.hyph \
hyphen.pat \ hyphen.pat \
hyphen.test \ hyphen.test \
hyphen.word hyphen.word \
num.hyph \
num.pat \
num.test \
num.word
all: all-am all: all-am
@ -408,15 +411,14 @@ check-TESTS: $(TESTS)
fi; \ fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \ dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \ if test "$$failed" -eq 0; then \
col="$$grn"; \ echo "$$grn$$dashes"; \
else \ else \
col="$$red"; \ echo "$$red$$dashes"; \
fi; \ fi; \
echo "$${col}$$dashes$${std}"; \ echo "$$banner"; \
echo "$${col}$$banner$${std}"; \ test -z "$$skipped" || echo "$$skipped"; \
test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \ test -z "$$report" || echo "$$report"; \
test -z "$$report" || echo "$${col}$$report$${std}"; \ echo "$$dashes$$std"; \
echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \ test "$$failed" -eq 0; \
else :; fi else :; fi
@ -465,15 +467,10 @@ install-am: all-am
installcheck: installcheck-am installcheck: installcheck-am
install-strip: install-strip:
if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \
install; \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic: mostlyclean-generic:
clean-generic: clean-generic:

View File

@ -2839,7 +2839,7 @@ oc=tag=o=nal
octets octets
oddly oddly
odi=ous odi=ous
o'd=wyer o'dwyer
of=fended of=fended
of=fer of=fer
of=fi=cer of=fi=cer

9
tests/num.hyph Normal file
View File

@ -0,0 +1,9 @@
foobar
foobar=foobar
barfoo=barfoo
123foobar=foobar
foobar=foobar123
123foobar=foobar123
123barfoo=barfoo
barfoo=barfoo123
123barfoo=barfoo123

6
tests/num.pat Normal file
View File

@ -0,0 +1,6 @@
UTF-8
LEFTHYPHENMIN 4
RIGHTHYPHENMIN 4
foo1
NEXTLEVEL
bar1

4
tests/num.test Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
DIR="`dirname $0`"
NAME="`basename $0 .test`"
$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph

9
tests/num.word Normal file
View File

@ -0,0 +1,9 @@
foobar
foobarfoobar
barfoobarfoo
123foobarfoobar
foobarfoobar123
123foobarfoobar123
123barfoobarfoo
barfoobarfoo123
123barfoobarfoo123

View File

@ -1,4 +1,5 @@
UTF-8 UTF-8
NEXTLEVEL
l·1l/l=l,1,3 l·1l/l=l,1,3
e1ë/e=e,1,2 e1ë/e=e,1,2
a1atje./a=t,1,3 a1atje./a=t,1,3