Add the static raw data to generate fcblanks.h

https://bugs.freedesktop.org/show_bug.cgi?id=91406
This commit is contained in:
Akira TAGOH 2016-06-09 14:22:31 +09:00
parent ea26c5e9f8
commit 600110ee8c
2 changed files with 137 additions and 3 deletions

View File

@ -4,12 +4,27 @@ from __future__ import absolute_import
from __future__ import print_function
import urllib2
import sys
import os
from lxml import html
from six.moves import range
fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g')
data = fp.read()
fp.close()
datafile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'list-unicodeset.html')
try:
fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g')
data = fp.read()
fp.close()
fp = open(datafile, 'w');
fp.write(data);
fp.close();
except urllib2.URLError:
# fall back reading the static data in repo
try:
fp = open(datafile)
data = fp.read()
fp.close()
except IOError:
sys.stderr.write("Error: No static data to generate the blank data. please make sure the network connection is reachable to Unicode.org\n")
sys.exit(1)
dom = html.fromstring(data)
x = dom.xpath('/html/body/form/p/text()')

View File

@ -0,0 +1,119 @@
<html>
<head>
<meta http-equiv="Content-Language" content="en-us">
<meta name="GENERATOR" content="Microsoft FrontPage 6.0">
<meta name="ProgId" content="FrontPage.Editor.Document">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="index.css">
<title>Unicode Utilities: UnicodeSet</title>
</head>
<body>
<h1>Unicode Utilities: UnicodeSet </h1>
<p><a target="help" href="http://cldr.unicode.org/unicode-utilities/list-unicodeset"><b>help</b></a> | <a target="character" href="character.jsp">character</a>
| <a target="properties" href="properties.jsp">properties</a>
| <a target="confusables" href="confusables.jsp">confusables</a>
| <a target="list" href="list-unicodeset.jsp">unicode-set</a>
| <a target="compare" href="unicodeset.jsp">compare-sets</a>
| <a target="regex" href="regex.jsp">regex</a>
| <a target="bnf" href="bnf.jsp">bnf-regex</a>
| <a target="breaks" href="breaks.jsp">breaks</a>
| <a target="transform" href="transform.jsp">transform</a>
| <a target="bidi" href="bidi.jsp">bidi</a>
| <a target="idna" href="idna.jsp">idna</a>
| <a target="languageid" href="languageid.jsp">languageid</a></p>
<form name="myform">
<table border="1" cellpadding="0" cellspacing="0" style="border-collapse: collapse; width:100%">
<tr>
<th style="width: 50%">Input</th>
</tr>
<tr>
<td><textarea name="a" rows="8" cols="10" style="width: 100%">[:GC=Zs:][:DI:]</textarea></td>
</tr>
<tr>
<td>
<input id='main' type="submit" value="Show Set" onClick="window.location.href='list-unicodeset.jsp?a='+document.getElementById('main').value"/>&nbsp;&nbsp;
<input type="checkbox" checked name="abb"><label for="abb">Abbreviate</label>&nbsp;&nbsp;
<input type="checkbox" name="c"><label for="c">Collate</label>&nbsp;&nbsp;
<input type="checkbox" checked name="ucd"><label for="ucd">UCD format</label>&nbsp;&nbsp;
<input type="checkbox" checked name="esc"><label for="esc">Escape</label>&nbsp;&nbsp;
<label for="g">Group by:</label>
<input type="text" checked name="g" size="25" value="">
<label for="i">Info:</label>
<input type="text" checked name="i" size="25" value="">
</td>
</tr>
</table>
<p>4,190 Code Points</p>
<hr>
<p>[\ \u00A0\u00AD\u034F\u061C\u115F\u1160\u1680\u17B4\u17B5\u180B-\u180E\u2000-\u200F\u202A-\u202F\u205F-\u206F\u3000\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001BCA0-\U0001BCA3\U0001D173-\U0001D17A\U000E0000-\U000E0FFF]</p>
<hr>
<table width='100%'><tr><td colSpan='4'><tr><td class='charCell' width='3m'>   </td><td width='7m'><code><a target='c' href='character.jsp?a=0020'>0020</a></code></td><td>SPACE</td></tr>
<tr><td class='charCell' width='3m'>   </td><td width='7m'><code><a target='c' href='character.jsp?a=00A0'>00A0</a></code></td><td>NO-BREAK SPACE</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=00AD'>00AD</a></code></td><td>SOFT HYPHEN</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=034F'>034F</a></code></td><td>COMBINING GRAPHEME JOINER</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=061C'>061C</a></code></td><td>ARABIC LETTER MARK</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=115F'>115F</a></code></td><td>HANGUL CHOSEONG FILLER</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1160'>1160</a></code></td><td>HANGUL JUNGSEONG FILLER</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1680'>1680</a></code></td><td>OGHAM SPACE MARK</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=17B4'>17B4</a></code></td><td>KHMER VOWEL INHERENT AQ</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=17B5'>17B5</a></code></td><td>KHMER VOWEL INHERENT AA</td></tr>
<code><a target='c' href='character.jsp?a=180B'>180B</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=180E'>180E</a></code></td><td>MONGOLIAN VOWEL SEPARATOR</td></tr>
<code><a target='c' href='character.jsp?a=2000'>2000</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=200F'>200F</a></code></td><td>RIGHT-TO-LEFT MARK</td></tr>
<code><a target='c' href='character.jsp?a=202A'>202A</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=202F'>202F</a></code></td><td>NARROW NO-BREAK SPACE</td></tr>
<code><a target='c' href='character.jsp?a=205F'>205F</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=206F'>206F</a></code></td><td>NOMINAL DIGIT SHAPES</td></tr>
<tr><td class='charCell' width='3m'>   </td><td width='7m'><code><a target='c' href='character.jsp?a=3000'>3000</a></code></td><td>IDEOGRAPHIC SPACE</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=3164'>3164</a></code></td><td>HANGUL FILLER</td></tr>
<code><a target='c' href='character.jsp?a=FE00'>FE00</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FE0F'>FE0F</a></code></td><td>VARIATION SELECTOR-16</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FEFF'>FEFF</a></code></td><td>ZERO WIDTH NO-BREAK SPACE</td></tr>
<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FFA0'>FFA0</a></code></td><td>HALFWIDTH HANGUL FILLER</td></tr>
<code><a target='c' href='character.jsp?a=FFF0'>FFF0</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FFF8'>FFF8</a></code></td><td><i>&lt;unassigned-FFF8&gt;</i></td></tr>
<code><a target='c' href='character.jsp?a=1BCA0'>1BCA0</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1BCA3'>1BCA3</a></code></td><td>SHORTHAND FORMAT UP STEP</td></tr>
<code><a target='c' href='character.jsp?a=1D173'>1D173</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1D17A'>1D17A</a></code></td><td>MUSICAL SYMBOL END PHRASE</td></tr>
<code><a target='c' href='character.jsp?a=E0000'>E0000</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=E0FFF'>E0FFF</a></code></td><td><i>&lt;unassigned-E0FFF&gt;</i></td></tr>
</td></tr></table>
</form>
<hr>
<p style="font-size:80%"><b><a name="fonts">Fonts and Display.</a></b> If you don't have a good set of Unicode fonts (and modern browser),
you may not be able to read some of the characters.
Some suggested fonts that you can add for coverage are:
<a href="http://greekfonts.teilar.gr/" target="_blank">Unicode Fonts for Ancient Scripts</a>,
<a href="https://www.google.com/get/noto/" target="_blank">Noto Fonts site</a>,
<a href="http://www.alanwood.net/unicode/fonts.html" target="_blank">Large, multi-script Unicode fonts</a>.
See also: <a href="http://www.unicode.org/help/display_problems.html" target="_blank">Unicode Display Problems</a>.</p>
<p style="font-size:80%">Version 3.7;
ICU version: 57.0.1.0;
Unicode version: 8.0.0.0
</p>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-8314904-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
<hr>
</body>
</html>