From 600110ee8c3e9bdd18cd5bc27555d1f1114e4880 Mon Sep 17 00:00:00 2001 From: Akira TAGOH Date: Thu, 9 Jun 2016 14:22:31 +0900 Subject: [PATCH] Add the static raw data to generate fcblanks.h https://bugs.freedesktop.org/show_bug.cgi?id=91406 --- fc-blanks/fc-blanks.py | 21 +++++- fc-blanks/list-unicodeset.html | 119 +++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 fc-blanks/list-unicodeset.html diff --git a/fc-blanks/fc-blanks.py b/fc-blanks/fc-blanks.py index 81b07d2..b88a0aa 100755 --- a/fc-blanks/fc-blanks.py +++ b/fc-blanks/fc-blanks.py @@ -4,12 +4,27 @@ from __future__ import absolute_import from __future__ import print_function import urllib2 import sys +import os from lxml import html from six.moves import range -fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g') -data = fp.read() -fp.close() +datafile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'list-unicodeset.html') +try: + fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g') + data = fp.read() + fp.close() + fp = open(datafile, 'w'); + fp.write(data); + fp.close(); +except urllib2.URLError: + # fall back reading the static data in repo + try: + fp = open(datafile) + data = fp.read() + fp.close() + except IOError: + sys.stderr.write("Error: No static data to generate the blank data. please make sure the network connection is reachable to Unicode.org\n") + sys.exit(1) dom = html.fromstring(data) x = dom.xpath('/html/body/form/p/text()') diff --git a/fc-blanks/list-unicodeset.html b/fc-blanks/list-unicodeset.html new file mode 100644 index 0000000..6e95efa --- /dev/null +++ b/fc-blanks/list-unicodeset.html @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +Unicode Utilities: UnicodeSet + + + +

Unicode Utilities: UnicodeSet

+

help | character + | properties + | confusables + | unicode-set + | compare-sets + | regex + | bnf-regex + | breaks + | transform + | bidi + | idna + | languageid

+
+ + + + + + + + + + +
Input
+    +    +    +    +    + + + + +
+

4,190 Code Points

+
+

[\ \u00A0\u00AD\u034F\u061C\u115F\u1160\u1680\u17B4\u17B5\u180B-\u180E\u2000-\u200F\u202A-\u202F\u205F-\u206F\u3000\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001BCA0-\U0001BCA3\U0001D173-\U0001D17A\U000E0000-\U000E0FFF]

+
+ + + + + + + + + + +180B.. +2000.. +202A.. +205F.. + + +FE00.. + + +FFF0.. +1BCA0.. +1D173.. +E0000.. +
   0020SPACE
   00A0NO-BREAK SPACE
  00ADSOFT HYPHEN
  034FCOMBINING GRAPHEME JOINER
  061CARABIC LETTER MARK
  115FHANGUL CHOSEONG FILLER
  1160HANGUL JUNGSEONG FILLER
   1680OGHAM SPACE MARK
  17B4KHMER VOWEL INHERENT AQ
  17B5KHMER VOWEL INHERENT AA
  180EMONGOLIAN VOWEL SEPARATOR
 ‎‏‎ 200FRIGHT-TO-LEFT MARK
   202FNARROW NO-BREAK SPACE
  206FNOMINAL DIGIT SHAPES
   3000IDEOGRAPHIC SPACE
  3164HANGUL FILLER
  FE0FVARIATION SELECTOR-16
  FEFFZERO WIDTH NO-BREAK SPACE
  FFA0HALFWIDTH HANGUL FILLER
  FFF8<unassigned-FFF8>
  1BCA3SHORTHAND FORMAT UP STEP
  1D17AMUSICAL SYMBOL END PHRASE
  E0FFF<unassigned-E0FFF>
+
+
+

Fonts and Display. If you don't have a good set of Unicode fonts (and modern browser), +you may not be able to read some of the characters. +Some suggested fonts that you can add for coverage are: +Unicode Fonts for Ancient Scripts, +Noto Fonts site, +Large, multi-script Unicode fonts. +See also: Unicode Display Problems.

+

Version 3.7; +ICU version: 57.0.1.0; +Unicode version: 8.0.0.0 +

+ + +
+ +