83 lines
2.6 KiB
Python
Executable File
83 lines
2.6 KiB
Python
Executable File
#! /usr/bin/python
|
|
|
|
# PCRE2 UNICODE PROPERTY SUPPORT
|
|
# ------------------------------
|
|
|
|
# This script generates the pcre2_ucp.h file from Unicode data files. This
|
|
# header uses enumerations to give names to Unicode property types and script
|
|
# names.
|
|
|
|
# This script was created in December 2021 as part of the Unicode data
|
|
# generation refactoring.
|
|
|
|
|
|
# Import common data lists and functions
|
|
|
|
from GenerateCommon import \
|
|
bidi_classes, \
|
|
break_properties, \
|
|
category_names, \
|
|
general_category_names, \
|
|
script_names, \
|
|
open_output
|
|
|
|
# Open the output file (no return on failure). This call also writes standard
|
|
# header boilerplate.
|
|
|
|
f = open_output("pcre2_ucp.h")
|
|
|
|
# Output this file's heading text
|
|
|
|
f.write("""\
|
|
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
|
|
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
|
|
|
|
/* This file contains definitions of the Unicode property values that are
|
|
returned by the UCD access macros and used throughout PCRE2.
|
|
|
|
IMPORTANT: The specific values of the first two enums (general and particular
|
|
character categories) are assumed by the table called catposstab in the file
|
|
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
|
|
an update. */
|
|
\n""")
|
|
|
|
f.write("/* These are the general character categories. */\n\nenum {\n")
|
|
for i in general_category_names:
|
|
f.write(" ucp_%s,\n" % i)
|
|
f.write("};\n\n")
|
|
|
|
f.write("/* These are the particular character categories. */\n\nenum {\n")
|
|
for i in range(0, len(category_names), 2):
|
|
f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1]))
|
|
f.write("};\n\n")
|
|
|
|
f.write("/* These are the bidi class values. */\n\nenum {\n")
|
|
for i in range(0, len(bidi_classes), 2):
|
|
sp = ' ' * (4 - len(bidi_classes[i]))
|
|
f.write(" ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1]))
|
|
f.write("};\n\n")
|
|
|
|
f.write("/* These are grapheme break properties. The Extended Pictographic "
|
|
"property\ncomes from the emoji-data.txt file. */\n\nenum {\n")
|
|
for i in range(0, len(break_properties), 2):
|
|
sp = ' ' * (21 - len(break_properties[i]))
|
|
f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
|
|
f.write("};\n\n")
|
|
|
|
f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n")
|
|
for i in script_names:
|
|
if i == "Unknown":
|
|
f.write("\n /* Scripts which has no characters in other scripts. */\n")
|
|
f.write(" ucp_%s,\n" % i)
|
|
f.write("\n")
|
|
|
|
f.write(" /* This must be last */\n")
|
|
f.write(" ucp_Script_Count\n};\n\n")
|
|
|
|
f.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
|
|
f.write("/* End of pcre2_ucp.h */\n")
|
|
|
|
f.close
|
|
|
|
# End
|