Update CaseFolding.txt to Unicode 5.1.0

This commit is contained in:
Behdad Esfahbod 2007-10-25 14:20:06 -07:00 committed by Keith Packard
parent 4ee9ca6786
commit fc990b2e86
2 changed files with 178 additions and 8 deletions

View File

@ -1,10 +1,11 @@
# CaseFolding-4.0.1.txt
# Date: 2004-03-02, 02:41:24 GMT [MD]
# CaseFolding-5.1.0.txt
# Date: 2007-04-26, 20:59:40 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2004 Unicode, Inc.
# Copyright (c) 1991-2007 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
#
# Case Folding Properties
#
# This file is a supplement to the UnicodeData file.
@ -16,14 +17,15 @@
# The data supports both implementations that require simple case foldings
# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
# All code points not listed in this file map to themselves.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, see
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard, Version 5.0.
#
# ================================================================================
# Format
@ -50,7 +52,7 @@
# behavior. (The default option is to exclude them.)
#
# =================================================================
# @missing 0000..10FFFF; <codepoint>
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
0043; C; 0063; # LATIN CAPITAL LETTER C
@ -271,6 +273,19 @@
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
0244; C; 0289; # LATIN CAPITAL LETTER U BAR
0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
@ -331,6 +346,9 @@
03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
@ -423,6 +441,7 @@
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
@ -449,7 +468,11 @@
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
@ -458,6 +481,8 @@
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
@ -497,6 +522,44 @@
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
@ -802,6 +865,7 @@
2126; C; 03C9; # OHM SIGN
212A; C; 006B; # KELVIN SIGN
212B; C; 00E5; # ANGSTROM SIGN
2132; C; 214E; # TURNED CAPITAL F
2160; C; 2170; # ROMAN NUMERAL ONE
2161; C; 2171; # ROMAN NUMERAL TWO
2162; C; 2172; # ROMAN NUMERAL THREE
@ -818,6 +882,7 @@
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
@ -844,6 +909,111 @@
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL

View File

@ -367,7 +367,7 @@ FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
}
/*
* Does s1 contain an instance of s2 (ignoring case)?
* Does s1 contain an instance of s2 (ignoring blanks and case)?
*/
const FcChar8 *