Adopt some RedHat suggestions for standard font configuration.

Add new helper program 'fc-case' to construct case folding tables from
    standard Unicode CaseFolding.txt file
Re-implement case insensitive functions with Unicode aware versions
    (including full case folding mappings)
This commit is contained in:
Keith Packard 2004-12-29 09:15:17 +00:00
parent 5cf8c5364f
commit 192296d852
11 changed files with 1687 additions and 96 deletions

View File

@ -1,3 +1,29 @@
2004-12-29 Keith Packard <keithp@keithp.com>
* fonts.conf.in:
Adopt some RedHat suggestions for standard font configuration.
* Makefile.am:
* configure.in:
* fc-case/CaseFolding.txt:
* fc-case/Makefile.am:
* fc-case/fc-case.c: (panic), (addFold), (ucs4_to_utf8),
(utf8_size), (addChar), (foldExtends), (case_fold_method_name),
(dump), (parseRaw), (caseFoldReadRaw), (main):
* fc-case/fccase.tmpl.h:
Add new helper program 'fc-case' to construct case folding
tables from standard Unicode CaseFolding.txt file
* src/fcint.h:
* src/fclist.c: (FcListValueHash):
* src/fcstr.c: (FcStrCaseWalkerInit), (FcStrCaseWalkerLong),
(FcStrCaseWalkerNext), (FcStrCaseWalkerNextIgnoreBlanks),
(FcStrCmpIgnoreCase), (FcStrCmpIgnoreBlanksAndCase),
(FcStrHashIgnoreCase), (FcStrIsAtIgnoreBlanksAndCase),
(FcStrIsAtIgnoreCase), (FcStrStrIgnoreCase):
Re-implement case insensitive functions with Unicode
aware versions (including full case folding mappings)
2004-12-13 Keith Packard <keithp@keithp.com> 2004-12-13 Keith Packard <keithp@keithp.com>
reviewed by: Owen Taylor <otaylor@redhat.com> reviewed by: Owen Taylor <otaylor@redhat.com>

View File

@ -22,7 +22,7 @@
# PERFORMANCE OF THIS SOFTWARE. # PERFORMANCE OF THIS SOFTWARE.
DOCSRC=@DOCSRC@ DOCSRC=@DOCSRC@
SUBDIRS=fontconfig fc-lang fc-glyphname src fc-cache fc-list fc-match $(DOCSRC) test SUBDIRS=fontconfig fc-lang fc-glyphname fc-case src fc-cache fc-list fc-match $(DOCSRC) test
EXTRA_DIST = \ EXTRA_DIST = \
fontconfig.pc.in \ fontconfig.pc.in \

View File

@ -411,6 +411,7 @@ Makefile
fontconfig/Makefile fontconfig/Makefile
fc-lang/Makefile fc-lang/Makefile
fc-glyphname/Makefile fc-glyphname/Makefile
fc-case/Makefile
src/Makefile src/Makefile
src/fontconfig.def src/fontconfig.def
fc-cache/Makefile fc-cache/Makefile

924
fc-case/CaseFolding.txt Normal file
View File

@ -0,0 +1,924 @@
# CaseFolding-4.0.1.txt
# Date: 2004-03-02, 02:41:24 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2004 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# Case Folding Properties
#
# This file is a supplement to the UnicodeData file.
# It provides a case folding mapping generated from the Unicode Character Database.
# If all characters are mapped according to the full mapping below, then
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
# are eliminated.
#
# The data supports both implementations that require simple case foldings
# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
# All code points not listed in this file map to themselves.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, see
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
#
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <status>; <mapping>; # <name>
#
# The status field is:
# C: common case folding, common mappings shared by both simple and full mappings.
# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
# S: simple case folding, mappings to single characters where different from F.
# T: special case for uppercase I and dotted uppercase I
# - For non-Turkic languages, this mapping is normally not used.
# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
# Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
# See the discussions of case mapping in the Unicode Standard for more information.
#
# Usage:
# A. To do a simple case folding, use the mappings with status C + S.
# B. To do a full case folding, use the mappings with status C + F.
#
# The mappings with status T can be used or omitted depending on the desired case-folding
# behavior. (The default option is to exclude them.)
#
# =================================================================
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
0043; C; 0063; # LATIN CAPITAL LETTER C
0044; C; 0064; # LATIN CAPITAL LETTER D
0045; C; 0065; # LATIN CAPITAL LETTER E
0046; C; 0066; # LATIN CAPITAL LETTER F
0047; C; 0067; # LATIN CAPITAL LETTER G
0048; C; 0068; # LATIN CAPITAL LETTER H
0049; C; 0069; # LATIN CAPITAL LETTER I
0049; T; 0131; # LATIN CAPITAL LETTER I
004A; C; 006A; # LATIN CAPITAL LETTER J
004B; C; 006B; # LATIN CAPITAL LETTER K
004C; C; 006C; # LATIN CAPITAL LETTER L
004D; C; 006D; # LATIN CAPITAL LETTER M
004E; C; 006E; # LATIN CAPITAL LETTER N
004F; C; 006F; # LATIN CAPITAL LETTER O
0050; C; 0070; # LATIN CAPITAL LETTER P
0051; C; 0071; # LATIN CAPITAL LETTER Q
0052; C; 0072; # LATIN CAPITAL LETTER R
0053; C; 0073; # LATIN CAPITAL LETTER S
0054; C; 0074; # LATIN CAPITAL LETTER T
0055; C; 0075; # LATIN CAPITAL LETTER U
0056; C; 0076; # LATIN CAPITAL LETTER V
0057; C; 0077; # LATIN CAPITAL LETTER W
0058; C; 0078; # LATIN CAPITAL LETTER X
0059; C; 0079; # LATIN CAPITAL LETTER Y
005A; C; 007A; # LATIN CAPITAL LETTER Z
00B5; C; 03BC; # MICRO SIGN
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
014A; C; 014B; # LATIN CAPITAL LETTER ENG
014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0152; C; 0153; # LATIN CAPITAL LIGATURE OE
0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
017F; C; 0073; # LATIN SMALL LETTER LONG S
0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
0196; C; 0269; # LATIN CAPITAL LETTER IOTA
0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
01A2; C; 01A3; # LATIN CAPITAL LETTER OI
01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
01A6; C; 0280; # LATIN LETTER YR
01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
01A9; C; 0283; # LATIN CAPITAL LETTER ESH
01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
01B7; C; 0292; # LATIN CAPITAL LETTER EZH
01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
021C; C; 021D; # LATIN CAPITAL LETTER YOGH
021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
0222; C; 0223; # LATIN CAPITAL LETTER OU
0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
0392; C; 03B2; # GREEK CAPITAL LETTER BETA
0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
0397; C; 03B7; # GREEK CAPITAL LETTER ETA
0398; C; 03B8; # GREEK CAPITAL LETTER THETA
0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
039C; C; 03BC; # GREEK CAPITAL LETTER MU
039D; C; 03BD; # GREEK CAPITAL LETTER NU
039E; C; 03BE; # GREEK CAPITAL LETTER XI
039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
03A0; C; 03C0; # GREEK CAPITAL LETTER PI
03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
03D0; C; 03B2; # GREEK BETA SYMBOL
03D1; C; 03B8; # GREEK THETA SYMBOL
03D5; C; 03C6; # GREEK PHI SYMBOL
03D6; C; 03C0; # GREEK PI SYMBOL
03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
03DA; C; 03DB; # GREEK LETTER STIGMA
03DC; C; 03DD; # GREEK LETTER DIGAMMA
03DE; C; 03DF; # GREEK LETTER KOPPA
03E0; C; 03E1; # GREEK LETTER SAMPI
03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
03F0; C; 03BA; # GREEK KAPPA SYMBOL
03F1; C; 03C1; # GREEK RHO SYMBOL
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
0410; C; 0430; # CYRILLIC CAPITAL LETTER A
0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
0418; C; 0438; # CYRILLIC CAPITAL LETTER I
0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
041E; C; 043E; # CYRILLIC CAPITAL LETTER O
041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
0423; C; 0443; # CYRILLIC CAPITAL LETTER U
0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
042D; C; 044D; # CYRILLIC CAPITAL LETTER E
042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2126; C; 03C9; # OHM SIGN
212A; C; 006B; # KELVIN SIGN
212B; C; 00E5; # ANGSTROM SIGN
2160; C; 2170; # ROMAN NUMERAL ONE
2161; C; 2171; # ROMAN NUMERAL TWO
2162; C; 2172; # ROMAN NUMERAL THREE
2163; C; 2173; # ROMAN NUMERAL FOUR
2164; C; 2174; # ROMAN NUMERAL FIVE
2165; C; 2175; # ROMAN NUMERAL SIX
2166; C; 2176; # ROMAN NUMERAL SEVEN
2167; C; 2177; # ROMAN NUMERAL EIGHT
2168; C; 2178; # ROMAN NUMERAL NINE
2169; C; 2179; # ROMAN NUMERAL TEN
216A; C; 217A; # ROMAN NUMERAL ELEVEN
216B; C; 217B; # ROMAN NUMERAL TWELVE
216C; C; 217C; # ROMAN NUMERAL FIFTY
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10400; C; 10428; # DESERET CAPITAL LETTER LONG I
10401; C; 10429; # DESERET CAPITAL LETTER LONG E
10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
1040C; C; 10434; # DESERET CAPITAL LETTER AY
1040D; C; 10435; # DESERET CAPITAL LETTER OW
1040E; C; 10436; # DESERET CAPITAL LETTER WU
1040F; C; 10437; # DESERET CAPITAL LETTER YEE
10410; C; 10438; # DESERET CAPITAL LETTER H
10411; C; 10439; # DESERET CAPITAL LETTER PEE
10412; C; 1043A; # DESERET CAPITAL LETTER BEE
10413; C; 1043B; # DESERET CAPITAL LETTER TEE
10414; C; 1043C; # DESERET CAPITAL LETTER DEE
10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
10416; C; 1043E; # DESERET CAPITAL LETTER JEE
10417; C; 1043F; # DESERET CAPITAL LETTER KAY
10418; C; 10440; # DESERET CAPITAL LETTER GAY
10419; C; 10441; # DESERET CAPITAL LETTER EF
1041A; C; 10442; # DESERET CAPITAL LETTER VEE
1041B; C; 10443; # DESERET CAPITAL LETTER ETH
1041C; C; 10444; # DESERET CAPITAL LETTER THEE
1041D; C; 10445; # DESERET CAPITAL LETTER ES
1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
1041F; C; 10447; # DESERET CAPITAL LETTER ESH
10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
10421; C; 10449; # DESERET CAPITAL LETTER ER
10422; C; 1044A; # DESERET CAPITAL LETTER EL
10423; C; 1044B; # DESERET CAPITAL LETTER EM
10424; C; 1044C; # DESERET CAPITAL LETTER EN
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW

52
fc-case/Makefile.am Normal file
View File

@ -0,0 +1,52 @@
#
# $Id $
#
# Copyright © 2003 Keith Packard
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of Keith Packard not be used in
# advertising or publicity pertaining to distribution of the software without
# specific, written prior permission. Keith Packard makes no
# representations about the suitability of this software for any purpose. It
# is provided "as is" without express or implied warranty.
#
# KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
# EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
INCLUDES=-I${top_srcdir}/src -I${top_srcdir} $(FREETYPE_CFLAGS) $(WARN_CFLAGS)
TMPL=fccase.tmpl.h
STMPL=${top_srcdir}/fc-case/${TMPL}
TARG=fccase.h
noinst_PROGRAMS=fc-case
noinst_HEADERS=$(TARG)
noinst_MANS=fc-case.man
fc_case_SRCS= \
fc-case.c \
fccaseint.h \
fccaseread.c
CASEFOLDING=CaseFolding.txt
SCASEFOLDING=${top_srcdir}/fc-case/CaseFolding.txt
EXTRA_DIST=$(TMPL) $(CASEFOLDING)
$(TARG): $(STMPL) fc-case $(SCASEFOLDING)
rm -f $(TARG)
./fc-case $(SCASEFOLDING) < $(STMPL) > $(TARG)
CLEANFILES=$(TARG)

363
fc-case/fc-case.c Normal file
View File

@ -0,0 +1,363 @@
/*
* $Id$
*
* Copyright © 2004 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include "fcint.h"
#include <ctype.h>
#define MAX_OUT 32
#define MAX_LINE 8192
typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
typedef struct _caseFoldClassMap {
char *name;
CaseFoldClass class;
} CaseFoldClassMap;
static CaseFoldClassMap caseFoldClassMap[] = {
{ "C", CaseFoldCommon },
{ "F", CaseFoldFull },
{ "S", CaseFoldSimple },
{ "T", CaseFoldTurkic },
{ 0, 0 }
};
typedef struct _caseFoldRaw {
FcChar32 upper;
CaseFoldClass class;
int nout;
FcChar32 lower[MAX_OUT];
} CaseFoldRaw;
static void
panic (char *reason)
{
fprintf (stderr, "fc-case: panic %s\n", reason);
exit (1);
}
int maxExpand;
static FcCaseFold *folds;
int nfolds;
static FcCaseFold *
addFold (void)
{
if (folds)
folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
else
folds = malloc (sizeof (FcCaseFold));
if (!folds)
panic ("out of memory");
return &folds[nfolds++];
}
static int
ucs4_to_utf8 (FcChar32 ucs4,
FcChar8 dest[FC_UTF8_MAX_LEN])
{
int bits;
FcChar8 *d = dest;
if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
else return 0;
for ( ; bits >= 0; bits-= 6) {
*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
}
return d - dest;
}
static int
utf8_size (FcChar32 ucs4)
{
FcChar8 utf8[FC_UTF8_MAX_LEN];
return ucs4_to_utf8 (ucs4, utf8 );
}
static FcChar8 *foldChars;
int nfoldChars;
int maxFoldChars;
FcChar32 minFoldChar;
FcChar32 maxFoldChar;
static void
addChar (FcChar32 c)
{
FcChar8 utf8[FC_UTF8_MAX_LEN];
int len;
int i;
len = ucs4_to_utf8 (c, utf8);
if (foldChars)
foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
else
foldChars = malloc (sizeof (FcChar8) * len);
if (!foldChars)
panic ("out of memory");
for (i = 0; i < len; i++)
foldChars[nfoldChars + i] = utf8[i];
nfoldChars += len;
}
static int
foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
{
switch (fold->method) {
case FC_CASE_FOLD_RANGE:
if ((short) (raw->lower[0] - raw->upper) != fold->offset)
return 0;
if (raw->upper != fold->upper + fold->count)
return 0;
return 1;
case FC_CASE_FOLD_EVEN_ODD:
if ((short) (raw->lower[0] - raw->upper) != 1)
return 0;
if (raw->upper != fold->upper + fold->count + 1)
return 0;
return 1;
case FC_CASE_FOLD_FULL:
break;
}
return 0;
}
static char *
case_fold_method_name (FcChar16 method)
{
switch (method) {
case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
default: return "unknown";
}
}
static void
dump (void)
{
int i;
printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
printf ( "\n");
/*
* Dump out ranges
*/
printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
for (i = 0; i < nfolds; i++)
{
printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
folds[i].upper, case_fold_method_name (folds[i].method),
folds[i].count, folds[i].offset);
}
printf ("};\n\n");
/*
* Dump out "other" values
*/
printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
for (i = 0; i < nfoldChars; i++)
{
printf ("0x%02x", foldChars[i]);
if (i != nfoldChars - 1)
{
if ((i & 0xf) == 0xf)
printf (",\n");
else
printf (",");
}
}
printf ("\n};\n");
}
/*
* Read the standard Unicode CaseFolding.txt file
*/
#define SEP "; \t\n"
static int
parseRaw (char *line, CaseFoldRaw *raw)
{
char *tok, *end;
int i;
if (!isxdigit (line[0]))
return 0;
/*
* Get upper case value
*/
tok = strtok (line, SEP);
if (!tok || tok[0] == '#')
return 0;
raw->upper = strtol (tok, &end, 16);
if (end == tok)
return 0;
/*
* Get class
*/
tok = strtok (NULL, SEP);
if (!tok || tok[0] == '#')
return 0;
for (i = 0; caseFoldClassMap[i].name; i++)
if (!strcmp (tok, caseFoldClassMap[i].name))
{
raw->class = caseFoldClassMap[i].class;
break;
}
if (!caseFoldClassMap[i].name)
return 0;
/*
* Get list of result characters
*/
for (i = 0; i < MAX_OUT; i++)
{
tok = strtok (NULL, SEP);
if (!tok || tok[0] == '#')
break;
raw->lower[i] = strtol (tok, &end, 16);
if (end == tok)
break;
}
if (i == 0)
return 0;
raw->nout = i;
return 1;
}
static int
caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
{
char line[MAX_LINE];
for (;;)
{
if (!fgets (line, sizeof (line) - 1, in))
return 0;
if (parseRaw (line, raw))
return 1;
}
}
int
main (int argc, char **argv)
{
FcCaseFold *fold = 0;
CaseFoldRaw raw;
int i;
FILE *caseFile;
char line[MAX_LINE];
int expand;
if (argc != 2)
panic ("usage: fc-case CaseFolding.txt");
caseFile = fopen (argv[1], "r");
if (!caseFile)
panic ("can't open case folding file");
while (caseFoldReadRaw (caseFile, &raw))
{
if (!minFoldChar)
minFoldChar = raw.upper;
maxFoldChar = raw.upper;
switch (raw.class) {
case CaseFoldCommon:
case CaseFoldFull:
if (raw.nout == 1)
{
if (fold && foldExtends (fold, &raw))
fold->count = raw.upper - fold->upper + 1;
else
{
fold = addFold ();
fold->upper = raw.upper;
fold->offset = raw.lower[0] - raw.upper;
if (fold->offset == 1)
fold->method = FC_CASE_FOLD_EVEN_ODD;
else
fold->method = FC_CASE_FOLD_RANGE;
fold->count = 1;
}
expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
}
else
{
fold = addFold ();
fold->upper = raw.upper;
fold->method = FC_CASE_FOLD_FULL;
fold->offset = nfoldChars;
for (i = 0; i < raw.nout; i++)
addChar (raw.lower[i]);
fold->count = nfoldChars - fold->offset;
if (fold->count > maxFoldChars)
maxFoldChars = fold->count;
expand = fold->count - utf8_size (raw.upper);
}
if (expand > maxExpand)
maxExpand = expand;
break;
case CaseFoldSimple:
break;
case CaseFoldTurkic:
break;
}
}
/*
* Scan the input until the marker is found
*/
while (fgets (line, sizeof (line), stdin))
{
if (!strncmp (line, "@@@", 3))
break;
fputs (line, stdout);
}
/*
* Dump these tables
*/
dump ();
/*
* And flush out the rest of the input file
*/
while (fgets (line, sizeof (line), stdin))
fputs (line, stdout);
fflush (stdout);
exit (ferror (stdout));
}

25
fc-case/fccase.tmpl.h Normal file
View File

@ -0,0 +1,25 @@
/*
* $Id$
*
* Copyright © 2003 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
@@@

View File

@ -73,8 +73,9 @@
--> -->
<alias> <alias>
<family>Bitstream Vera Serif</family> <family>Bitstream Vera Serif</family>
<family>Times</family>
<family>Times New Roman</family> <family>Times New Roman</family>
<family>Thorndale AMT</family>
<family>Times</family>
<family>Nimbus Roman No9 L</family> <family>Nimbus Roman No9 L</family>
<family>Luxi Serif</family> <family>Luxi Serif</family>
<family>Kochi Mincho</family> <family>Kochi Mincho</family>
@ -92,6 +93,7 @@
<family>Helvetica</family> <family>Helvetica</family>
<family>Arial</family> <family>Arial</family>
<family>Verdana</family> <family>Verdana</family>
<family>Albany AMT</family>
<family>Nimbus Sans L</family> <family>Nimbus Sans L</family>
<family>Luxi Sans</family> <family>Luxi Sans</family>
<family>Kochi Gothic</family> <family>Kochi Gothic</family>
@ -111,6 +113,7 @@
<family>Courier New</family> <family>Courier New</family>
<family>Andale Mono</family> <family>Andale Mono</family>
<family>Luxi Mono</family> <family>Luxi Mono</family>
<family>Cumberland AMT</family>
<family>Nimbus Mono L</family> <family>Nimbus Mono L</family>
<family>NSimSun</family> <family>NSimSun</family>
<family>FreeMono</family> <family>FreeMono</family>
@ -134,6 +137,48 @@
</edit> </edit>
</match> </match>
<!--
URW provides metric and shape compatible fonts for these 3 Adobe families
Mark these as effective replacements by binding the replacement
family names strongly
-->
<match target="pattern">
<test name="family"><string>Times</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Roman No9 L</string>
</edit>
</match>
<match target="pattern">
<test name="family"><string>Helvetica</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Sans L</string>
</edit>
</match>
<match target="pattern">
<test name="family"><string>Courier</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Mono L</string>
</edit>
</match>
<!--
AMT provides metric and shape compatible fonts for these three web font
families. Bind them weakly as matching here is not as important as
with web fonts.
-->
<alias>
<family>Times New Roman</family>
<accept><family>Thorndale AMT</family></accept>
</alias
<alias>
<family>Arial</family>
<accept><family>Albany AMT</family></accept>
</alias
<alias>
<family>Courier New</family>
<accept><family>Cumberland AMT</family></accept>
</alias
<!-- <!--
Some Asian fonts misadvertise themselves as monospaced when Some Asian fonts misadvertise themselves as monospaced when
in fact they are dual-spaced (half and full). This makes in fact they are dual-spaced (half and full). This makes
@ -214,22 +259,6 @@
<include ignore_missing="yes">conf.d</include> <include ignore_missing="yes">conf.d</include>
<include ignore_missing="yes">local.conf</include> <include ignore_missing="yes">local.conf</include>
<!--
Alias well known Type1 font names to metric equivalent TrueType fonts
-->
<alias>
<family>Times</family>
<accept><family>Times New Roman</family></accept>
</alias>
<alias>
<family>Helvetica</family>
<accept><family>Arial</family></accept>
</alias>
<alias>
<family>Courier</family>
<accept><family>Courier New</family></accept>
</alias>
<!-- <!--
Provide required aliases for standard names Provide required aliases for standard names
--> -->
@ -238,8 +267,9 @@
<prefer> <prefer>
<family>Bitstream Vera Serif</family> <family>Bitstream Vera Serif</family>
<family>Times New Roman</family> <family>Times New Roman</family>
<family>Nimbus Roman No9 L</family> <family>Thorndale AMT</family>
<family>Luxi Serif</family> <family>Luxi Serif</family>
<family>Nimbus Roman No9 L</family>
<family>Times</family> <family>Times</family>
<family>Frank Ruehl</family> <family>Frank Ruehl</family>
<family>Kochi Mincho</family> <family>Kochi Mincho</family>
@ -254,9 +284,10 @@
<prefer> <prefer>
<family>Bitstream Vera Sans</family> <family>Bitstream Vera Sans</family>
<family>Verdana</family> <family>Verdana</family>
<family>Nimbus Sans L</family>
<family>Luxi Sans</family>
<family>Arial</family> <family>Arial</family>
<family>Albany AMT</family>
<family>Luxi Sans</family>
<family>Nimbus Sans L</family>
<family>Helvetica</family> <family>Helvetica</family>
<family>Nachlieli</family> <family>Nachlieli</family>
<family>Kochi Gothic</family> <family>Kochi Gothic</family>
@ -273,8 +304,10 @@
<family>Bitstream Vera Sans Mono</family> <family>Bitstream Vera Sans Mono</family>
<family>Andale Mono</family> <family>Andale Mono</family>
<family>Courier New</family> <family>Courier New</family>
<family>Cumberland AMT</family>
<family>Luxi Mono</family> <family>Luxi Mono</family>
<family>Nimbus Mono L</family> <family>Nimbus Mono L</family>
<family>Courier</family>
<family>Miriam Mono</family> <family>Miriam Mono</family>
<family>Kochi Gothic</family> <family>Kochi Gothic</family>
<family>AR PL KaitiM GB</family> <family>AR PL KaitiM GB</family>

View File

@ -231,6 +231,37 @@ typedef struct _FcGlyphName {
FcChar8 name[1]; /* name extends beyond struct */ FcChar8 name[1]; /* name extends beyond struct */
} FcGlyphName; } FcGlyphName;
/*
* To perform case-insensitive string comparisons, a table
* is used which holds three different kinds of folding data.
*
* The first is a range of upper case values mapping to a range
* of their lower case equivalents. Within each range, the offset
* between upper and lower case is constant.
*
* The second is a range of upper case values which are interleaved
* with their lower case equivalents.
*
* The third is a set of raw unicode values mapping to a list
* of unicode values for comparison purposes. This allows conversion
* of ß to "ss" so that SS, ss and ß all match. A separate array
* holds the list of unicode values for each entry.
*
* These are packed into a single table. Using a binary search,
* the appropriate entry can be located.
*/
#define FC_CASE_FOLD_RANGE 0
#define FC_CASE_FOLD_EVEN_ODD 1
#define FC_CASE_FOLD_FULL 2
typedef struct _FcCaseFold {
FcChar32 upper;
FcChar16 method : 2;
FcChar16 count : 14;
short offset; /* lower - upper for RANGE, table id for FULL */
} FcCaseFold;
#define FC_MAX_FILE_LEN 4096 #define FC_MAX_FILE_LEN 4096
/* /*
@ -746,4 +777,7 @@ FcStrUsesHome (const FcChar8 *s);
FcChar8 * FcChar8 *
FcStrLastSlash (const FcChar8 *path); FcStrLastSlash (const FcChar8 *path);
FcChar32
FcStrHashIgnoreCase (const FcChar8 *s);
#endif /* _FC_INT_H_ */ #endif /* _FC_INT_H_ */

View File

@ -219,20 +219,6 @@ FcListPatternMatchAny (const FcPattern *p,
return FcTrue; return FcTrue;
} }
static FcChar32
FcListStringHash (const FcChar8 *s)
{
FcChar32 h = 0;
FcChar8 c;
while ((c = *s++))
{
c = FcToLower (c);
h = ((h << 3) ^ (h >> 3)) ^ c;
}
return h;
}
static FcChar32 static FcChar32
FcListMatrixHash (const FcMatrix *m) FcListMatrixHash (const FcMatrix *m)
{ {
@ -255,7 +241,7 @@ FcListValueHash (FcValue v)
case FcTypeDouble: case FcTypeDouble:
return (FcChar32) (int) v.u.d; return (FcChar32) (int) v.u.d;
case FcTypeString: case FcTypeString:
return FcListStringHash (v.u.s); return FcStrHashIgnoreCase (v.u.s);
case FcTypeBool: case FcTypeBool:
return (FcChar32) v.u.b; return (FcChar32) v.u.b;
case FcTypeMatrix: case FcTypeMatrix:

View File

@ -63,16 +63,147 @@ FcStrFree (FcChar8 *s)
free (s); free (s);
} }
#include "../fc-case/fccase.h"
#define FcCaseFoldUpperCount(cf) \
((cf)->method == FC_CASE_FOLD_FULL ? 1 : (cf)->count)
#define FC_STR_CANON_BUF_LEN 1024
typedef struct _FcCaseWalker {
const FcChar8 *read;
const FcChar8 *src;
int len;
FcChar8 utf8[FC_MAX_CASE_FOLD_CHARS + 1];
} FcCaseWalker;
static void
FcStrCaseWalkerInit (const FcChar8 *src, FcCaseWalker *w)
{
w->src = src;
w->read = 0;
w->len = strlen (src);
}
static FcChar8
FcStrCaseWalkerLong (FcCaseWalker *w, FcChar8 r)
{
FcChar32 ucs4;
int slen;
slen = FcUtf8ToUcs4 (w->src - 1, &ucs4, w->len);
if (slen <= 0)
return r;
if (FC_MIN_FOLD_CHAR <= ucs4 && ucs4 <= FC_MAX_FOLD_CHAR)
{
int min = 0;
int max = FC_NUM_CASE_FOLD;
while (min <= max)
{
int mid = (min + max) >> 1;
FcChar32 low = fcCaseFold[mid].upper;
FcChar32 high = low + FcCaseFoldUpperCount (&fcCaseFold[mid]);
if (high <= ucs4)
min = mid + 1;
else if (ucs4 < low)
max = mid - 1;
else
{
const FcCaseFold *fold = &fcCaseFold[mid];
int dlen;
switch (fold->method) {
case FC_CASE_FOLD_EVEN_ODD:
if ((ucs4 & 1) != (fold->upper & 1))
return r;
/* fall through ... */
default:
dlen = FcUcs4ToUtf8 (ucs4 + fold->offset, w->utf8);
break;
case FC_CASE_FOLD_FULL:
dlen = fold->count;
memcpy (w->utf8, fcCaseFoldChars + fold->offset, dlen);
break;
}
/* consume rest of src utf-8 bytes */
w->src += slen - 1;
w->len -= slen - 1;
/* read from temp buffer */
w->utf8[dlen] = '\0';
w->read = w->utf8;
return *w->read++;
}
}
}
return r;
}
static FcChar8
FcStrCaseWalkerNext (FcCaseWalker *w)
{
FcChar8 r;
if (w->read)
{
if ((r = *w->read++))
return r;
w->read = 0;
}
r = *w->src++;
--w->len;
if ((r & 0xc0) == 0xc0)
return FcStrCaseWalkerLong (w, r);
if ('A' <= r && r <= 'Z')
r = r - 'A' + 'a';
return r;
}
static FcChar8
FcStrCaseWalkerNextIgnoreBlanks (FcCaseWalker *w)
{
FcChar8 r;
if (w->read)
{
if ((r = *w->read++))
return r;
w->read = 0;
}
do
{
r = *w->src++;
--w->len;
} while (r == ' ');
if ((r & 0xc0) == 0xc0)
return FcStrCaseWalkerLong (w, r);
if ('A' <= r && r <= 'Z')
r = r - 'A' + 'a';
return r;
}
int int
FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2) FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{ {
FcChar8 c1, c2; FcCaseWalker w1, w2;
FcChar8 c1, c2;
if (s1 == s2) return 0;
FcStrCaseWalkerInit (s1, &w1);
FcStrCaseWalkerInit (s2, &w2);
for (;;) for (;;)
{ {
c1 = *s1++; c1 = FcStrCaseWalkerNext (&w1);
c2 = *s2++; c2 = FcStrCaseWalkerNext (&w2);
if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2)))) if (!c1 || (c1 != c2))
break; break;
} }
return (int) c1 - (int) c2; return (int) c1 - (int) c2;
@ -81,17 +212,19 @@ FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
int int
FcStrCmpIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2) FcStrCmpIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
{ {
FcChar8 c1, c2; FcCaseWalker w1, w2;
FcChar8 c1, c2;
if (s1 == s2) return 0;
FcStrCaseWalkerInit (s1, &w1);
FcStrCaseWalkerInit (s2, &w2);
for (;;) for (;;)
{ {
do c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
c1 = *s1++; c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
while (c1 == ' '); if (!c1 || (c1 != c2))
do
c2 = *s2++;
while (c2 == ' ');
if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
break; break;
} }
return (int) c1 - (int) c2; return (int) c1 - (int) c2;
@ -114,6 +247,23 @@ FcStrCmp (const FcChar8 *s1, const FcChar8 *s2)
return (int) c1 - (int) c2; return (int) c1 - (int) c2;
} }
/*
* Return a hash value for a string
*/
FcChar32
FcStrHashIgnoreCase (const FcChar8 *s)
{
FcChar32 h = 0;
FcCaseWalker w;
FcChar8 c;
FcStrCaseWalkerInit (s, &w);
while ((c = FcStrCaseWalkerNext (&w)))
h = ((h << 3) ^ (h >> 3)) ^ c;
return h;
}
/* /*
* Is the head of s1 equal to s2? * Is the head of s1 equal to s2?
*/ */
@ -121,17 +271,17 @@ FcStrCmp (const FcChar8 *s1, const FcChar8 *s2)
static FcBool static FcBool
FcStrIsAtIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2) FcStrIsAtIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
{ {
FcChar8 c1, c2; FcCaseWalker w1, w2;
FcChar8 c1, c2;
FcStrCaseWalkerInit (s1, &w1);
FcStrCaseWalkerInit (s2, &w2);
for (;;) for (;;)
{ {
do c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
c1 = *s1++; c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
while (c1 == ' '); if (!c1 || (c1 != c2))
do
c2 = *s2++;
while (c2 == ' ');
if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
break; break;
} }
return c1 == c2 || !c2; return c1 == c2 || !c2;
@ -160,13 +310,17 @@ FcStrContainsIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
static FcBool static FcBool
FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2) FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{ {
FcChar8 c1, c2; FcCaseWalker w1, w2;
FcChar8 c1, c2;
FcStrCaseWalkerInit (s1, &w1);
FcStrCaseWalkerInit (s2, &w2);
for (;;) for (;;)
{ {
c1 = *s1++; c1 = FcStrCaseWalkerNext (&w1);
c2 = *s2++; c2 = FcStrCaseWalkerNext (&w2);
if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2)))) if (!c1 || (c1 != c2))
break; break;
} }
return c1 == c2 || !c2; return c1 == c2 || !c2;
@ -191,52 +345,45 @@ FcStrContainsIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
const FcChar8 * const FcChar8 *
FcStrStrIgnoreCase (const FcChar8 *s1, const FcChar8 *s2) FcStrStrIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{ {
FcChar8 c1, c2; FcCaseWalker w1, w2;
const FcChar8 * p = s1; FcChar8 c1, c2;
const FcChar8 * b = s2; const FcChar8 *cur;
if (!s1 || !s2) if (!s1 || !s2)
return 0; return 0;
if (s1 == s2) if (s1 == s2)
return s1; return s1;
again: FcStrCaseWalkerInit (s1, &w1);
c2 = *s2++; FcStrCaseWalkerInit (s2, &w2);
c2 = FcToLower (c2);
c2 = FcStrCaseWalkerNext (&w2);
if (!c2)
return 0;
for (;;)
{
p = s1;
c1 = *s1++;
if (!c1 || (c1 = FcToLower (c1)) == c2)
break;
}
if (c1 != c2)
return 0;
for (;;) for (;;)
{ {
c1 = *s1; cur = w1.src;
c2 = *s2; c1 = FcStrCaseWalkerNext (&w1);
if (c1 && c2 && (c1 = FcToLower (c1)) != (c2 = FcToLower (c2)))
{
s1 = p + 1;
s2 = b;
goto again;
}
if (!c2)
return p;
if (!c1) if (!c1)
return 0; break;
++ s1; if (c1 == c2)
++ s2; {
} FcCaseWalker w1t = w1;
FcCaseWalker w2t = w2;
FcChar8 c1t, c2t;
for (;;)
{
c1t = FcStrCaseWalkerNext (&w1t);
c2t = FcStrCaseWalkerNext (&w2t);
if (!c2t)
return cur;
if (c2t != c1t)
break;
}
}
}
return 0; return 0;
} }