Slightly modified UTF-8 handling. Based on http://man7.org/linux/man-pages/man7/utf-8.7.html.
This commit is contained in:
parent
cbd7425a31
commit
98274ffa34
|
@ -397,26 +397,47 @@ static char *nextCharacter(const char *str, int *i)
|
||||||
static char character[MAX_NAME_LENGTH];
|
static char character[MAX_NAME_LENGTH];
|
||||||
|
|
||||||
unsigned char bit;
|
unsigned char bit;
|
||||||
int n;
|
int n, numBits;
|
||||||
|
|
||||||
memset(character, '\0', MAX_NAME_LENGTH);
|
memset(character, '\0', MAX_NAME_LENGTH);
|
||||||
|
|
||||||
n = 0;
|
|
||||||
|
|
||||||
while (1)
|
|
||||||
{
|
|
||||||
bit = (unsigned char)str[*i];
|
bit = (unsigned char)str[*i];
|
||||||
|
|
||||||
if ((bit >= ' ' && bit <= '~') || bit >= 0xC0 || bit == '\0')
|
numBits = 0;
|
||||||
|
|
||||||
|
if (bit == '\0')
|
||||||
{
|
{
|
||||||
if (n > 0)
|
return NULL;
|
||||||
{
|
|
||||||
return character[0] != '\0' ? character : NULL;
|
|
||||||
}
|
}
|
||||||
|
else if (bit <= 0x0000007F)
|
||||||
|
{
|
||||||
|
numBits = 1;
|
||||||
|
}
|
||||||
|
else if (bit <= 0x000007FF)
|
||||||
|
{
|
||||||
|
numBits = 2;
|
||||||
|
}
|
||||||
|
else if (bit <= 0x0000FFFF)
|
||||||
|
{
|
||||||
|
numBits = 3;
|
||||||
|
}
|
||||||
|
else if (bit <= 0x001FFFFF)
|
||||||
|
{
|
||||||
|
numBits = 4;
|
||||||
|
}
|
||||||
|
else if (bit <= 0x03FFFFFF)
|
||||||
|
{
|
||||||
|
numBits = 5;
|
||||||
|
}
|
||||||
|
else if (bit <= 0x7FFFFFFF)
|
||||||
|
{
|
||||||
|
numBits = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
character[n++] = str[*i];
|
for (n = 0 ; n < numBits ; n++)
|
||||||
|
{
|
||||||
*i = *i + 1;
|
character[n] = str[(*i)++];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return character;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue