From e708e97c351d3bc9f7030ef22ac2f007d5114730 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 3 Jul 2014 17:52:54 -0400 Subject: [PATCH] Change charset parse/unparse format to be human readable Previous format was unusable. New format is ranges of hex values. To choose space character and Latin capital letters for example: $ fc-pattern ':charset=20 41-5a' Pattern has 1 elts (size 16) charset: 0000: 00000000 00000001 07fffffe 00000000 00000000 00000000 00000000 00000000 (s) --- src/fccharset.c | 223 +++++++++++++++++------------------------------- 1 file changed, 79 insertions(+), 144 deletions(-) diff --git a/src/fccharset.c b/src/fccharset.c index 43a3cc0..d1d976a 100644 --- a/src/fccharset.c +++ b/src/fccharset.c @@ -815,188 +815,123 @@ FcCharSetCoverage (const FcCharSet *a, FcChar32 page, FcChar32 *result) return page; } -/* - * ASCII representation of charsets. - * - * Each leaf is represented as 9 32-bit values, the code of the first character followed - * by 8 32 bit values for the leaf itself. Each value is encoded as 5 ASCII characters, - * only 85 different values are used to avoid control characters as well as the other - * characters used to encode font names. 85**5 > 2^32 so things work out, but - * it's not exactly human readable output. As a special case, 0 is encoded as a space - */ - -static const unsigned char charToValue[256] = { - /* "" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\b" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\020" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\030" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* " " */ 0xff, 0x00, 0xff, 0x01, 0x02, 0x03, 0x04, 0xff, - /* "(" */ 0x05, 0x06, 0x07, 0x08, 0xff, 0xff, 0x09, 0x0a, - /* "0" */ 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, - /* "8" */ 0x13, 0x14, 0xff, 0x15, 0x16, 0xff, 0x17, 0x18, - /* "@" */ 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, - /* "H" */ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, - /* "P" */ 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, - /* "X" */ 0x31, 0x32, 0x33, 0x34, 0xff, 0x35, 0x36, 0xff, - /* "`" */ 0xff, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, - /* "h" */ 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, - /* "p" */ 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, - /* "x" */ 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0xff, - /* "\200" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\210" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\220" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\230" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\240" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\250" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\260" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\270" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\300" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\310" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\320" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\330" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\340" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\350" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\360" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - /* "\370" */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - -static const FcChar8 valueToChar[0x55] = { - /* 0x00 */ '!', '#', '$', '%', '&', '(', ')', '*', - /* 0x08 */ '+', '.', '/', '0', '1', '2', '3', '4', - /* 0x10 */ '5', '6', '7', '8', '9', ';', '<', '>', - /* 0x18 */ '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', - /* 0x20 */ 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', - /* 0x28 */ 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - /* 0x30 */ 'W', 'X', 'Y', 'Z', '[', ']', '^', 'a', - /* 0x38 */ 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', - /* 0x40 */ 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', - /* 0x48 */ 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', - /* 0x50 */ 'z', '{', '|', '}', '~', -}; - -static FcChar8 * -FcCharSetParseValue (FcChar8 *string, FcChar32 *value) -{ - int i; - FcChar32 v; - FcChar32 c; - - if (*string == ' ') - { - v = 0; - string++; - } - else - { - v = 0; - for (i = 0; i < 5; i++) - { - if (!(c = (FcChar32) (unsigned char) *string++)) - return 0; - c = charToValue[c]; - if (c == 0xff) - return 0; - v = v * 85 + c; - } - } - *value = v; - return string; -} - static FcBool -FcCharSetUnparseValue (FcStrBuf *buf, FcChar32 value) +FcNameParseRange (FcChar8 **string, FcChar32 *pfirst, FcChar32 *plast) { - int i; - if (value == 0) - { - return FcStrBufChar (buf, ' '); - } - else - { - FcChar8 string[6]; - FcChar8 *s = string + 5; - string[5] = '\0'; - for (i = 0; i < 5; i++) + char *s = (char *) *string; + char *t; + long first, last; + + while (isspace(*s) || *s == ',') + s++; + t = s; + errno = 0; + first = last = strtol (s, &s, 16); + if (errno) + return FcFalse; + while (isspace(*s) || *s == ',') + s++; + if (*s == '-') { - *--s = valueToChar[value % 85]; - value /= 85; - } - for (i = 0; i < 5; i++) - if (!FcStrBufChar (buf, *s++)) + s++; + errno = 0; + last = strtol (s, &s, 16); + if (errno) return FcFalse; - } - return FcTrue; + } + + if (s == t || first < 0 || last < 0 || last < first || last > 0x10ffff) + return FcFalse; + + *string = (FcChar8 *) s; + *pfirst = first; + *plast = last; + return FcTrue; } FcCharSet * FcNameParseCharSet (FcChar8 *string) { FcCharSet *c; - FcChar32 ucs4; - FcCharLeaf *leaf; - FcCharLeaf temp; - FcChar32 bits; - int i; + FcChar32 first, last; c = FcCharSetCreate (); if (!c) goto bail0; while (*string) { - string = FcCharSetParseValue (string, &ucs4); - if (!string) - goto bail1; - bits = 0; - for (i = 0; i < 256/32; i++) - { - string = FcCharSetParseValue (string, &temp.map[i]); - if (!string) + FcChar32 u; + + if (!FcNameParseRange (&string, &first, &last)) goto bail1; - bits |= temp.map[i]; - } - if (bits) - { - leaf = malloc (sizeof (FcCharLeaf)); - if (!leaf) - goto bail1; - *leaf = temp; - if (!FcCharSetInsertLeaf (c, ucs4, leaf)) - goto bail1; - } + + for (u = first; u < last + 1; u++) + FcCharSetAddChar (c, u); } return c; bail1: - if (c->num) - { - free (FcCharSetLeaves (c)); - } - if (c->num) - { - free (FcCharSetNumbers (c)); - } - free (c); + FcCharSetDestroy (c); bail0: return NULL; } +static void +FcNameUnparseUnicode (FcStrBuf *buf, FcChar32 u) +{ + FcChar8 buf_static[64]; + snprintf ((char *) buf_static, sizeof (buf_static), "%x", u); + FcStrBufString (buf, buf_static); +} + FcBool FcNameUnparseCharSet (FcStrBuf *buf, const FcCharSet *c) { FcCharSetIter ci; + FcChar32 first, last; int i; #ifdef CHECK int len = buf->len; #endif + first = last = 0x7FFFFFFF; + for (FcCharSetIterStart (c, &ci); ci.leaf; FcCharSetIterNext (c, &ci)) { - if (!FcCharSetUnparseValue (buf, ci.ucs4)) - return FcFalse; for (i = 0; i < 256/32; i++) - if (!FcCharSetUnparseValue (buf, ci.leaf->map[i])) - return FcFalse; + { + FcChar32 bits = ci.leaf->map[i]; + FcChar32 u = ci.ucs4 + i * 32; + + while (bits) + { + if (bits & 1) + { + if (u != last + 1) + { + if (last != first) + { + FcStrBufChar (buf, '-'); + FcNameUnparseUnicode (buf, last); + } + if (last != 0x7FFFFFFF) + FcStrBufChar (buf, ','); + /* Start new range. */ + first = u; + FcNameUnparseUnicode (buf, u); + } + last = u; + } + bits >>= 1; + u++; + } + } + } + if (last != first) + { + FcStrBufChar (buf, '-'); + FcNameUnparseUnicode (buf, last); } #ifdef CHECK {