Slightly modified UTF-8 handling. Based on http://man7.org/linux/man-pages/man7/utf-8.7.html.
This commit is contained in:
parent
cbd7425a31
commit
98274ffa34
|
@ -397,26 +397,47 @@ static char *nextCharacter(const char *str, int *i)
|
|||
static char character[MAX_NAME_LENGTH];
|
||||
|
||||
unsigned char bit;
|
||||
int n;
|
||||
int n, numBits;
|
||||
|
||||
memset(character, '\0', MAX_NAME_LENGTH);
|
||||
|
||||
n = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
bit = (unsigned char)str[*i];
|
||||
|
||||
if ((bit >= ' ' && bit <= '~') || bit >= 0xC0 || bit == '\0')
|
||||
numBits = 0;
|
||||
|
||||
if (bit == '\0')
|
||||
{
|
||||
if (n > 0)
|
||||
{
|
||||
return character[0] != '\0' ? character : NULL;
|
||||
return NULL;
|
||||
}
|
||||
else if (bit <= 0x0000007F)
|
||||
{
|
||||
numBits = 1;
|
||||
}
|
||||
else if (bit <= 0x000007FF)
|
||||
{
|
||||
numBits = 2;
|
||||
}
|
||||
else if (bit <= 0x0000FFFF)
|
||||
{
|
||||
numBits = 3;
|
||||
}
|
||||
else if (bit <= 0x001FFFFF)
|
||||
{
|
||||
numBits = 4;
|
||||
}
|
||||
else if (bit <= 0x03FFFFFF)
|
||||
{
|
||||
numBits = 5;
|
||||
}
|
||||
else if (bit <= 0x7FFFFFFF)
|
||||
{
|
||||
numBits = 6;
|
||||
}
|
||||
|
||||
character[n++] = str[*i];
|
||||
|
||||
*i = *i + 1;
|
||||
for (n = 0 ; n < numBits ; n++)
|
||||
{
|
||||
character[n] = str[(*i)++];
|
||||
}
|
||||
|
||||
return character;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue