Slightly modified UTF-8 handling. Based on http://man7.org/linux/man-pages/man7/utf-8.7.html.

This commit is contained in:
Steve 2019-11-06 19:18:03 +00:00
parent cbd7425a31
commit 98274ffa34
1 changed files with 129 additions and 108 deletions

View File

@ -397,26 +397,47 @@ static char *nextCharacter(const char *str, int *i)
static char character[MAX_NAME_LENGTH];
unsigned char bit;
int n;
int n, numBits;
memset(character, '\0', MAX_NAME_LENGTH);
n = 0;
while (1)
{
bit = (unsigned char)str[*i];
if ((bit >= ' ' && bit <= '~') || bit >= 0xC0 || bit == '\0')
numBits = 0;
if (bit == '\0')
{
if (n > 0)
{
return character[0] != '\0' ? character : NULL;
return NULL;
}
else if (bit <= 0x0000007F)
{
numBits = 1;
}
else if (bit <= 0x000007FF)
{
numBits = 2;
}
else if (bit <= 0x0000FFFF)
{
numBits = 3;
}
else if (bit <= 0x001FFFFF)
{
numBits = 4;
}
else if (bit <= 0x03FFFFFF)
{
numBits = 5;
}
else if (bit <= 0x7FFFFFFF)
{
numBits = 6;
}
character[n++] = str[*i];
*i = *i + 1;
for (n = 0 ; n < numBits ; n++)
{
character[n] = str[(*i)++];
}
return character;
}