Backport from dev branch: utf8codepoint() should always advance pointer.
This commit is contained in:
parent
76ffb5dde1
commit
4043d09295
|
@ -63,12 +63,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 224) /* two octets */
|
else if (octet < 224) /* two octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet -= (128+64);
|
octet -= (128+64);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 2; /* skip to next possible start of codepoint. */
|
*_str += 1; /* skip to next possible start of codepoint. */
|
||||||
retval = ((octet << 6) | (octet2 - 128));
|
retval = ((octet << 6) | (octet2 - 128));
|
||||||
if ((retval >= 0x80) && (retval <= 0x7FF))
|
if ((retval >= 0x80) && (retval <= 0x7FF))
|
||||||
return retval;
|
return retval;
|
||||||
|
@ -76,6 +77,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 240) /* three octets */
|
else if (octet < 240) /* three octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; // advance at least one byte in case of an error
|
||||||
octet -= (128+64+32);
|
octet -= (128+64+32);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
@ -85,7 +87,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 3; /* skip to next possible start of codepoint. */
|
*_str += 2; /* skip to next possible start of codepoint. */
|
||||||
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
|
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
|
||||||
|
|
||||||
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
|
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
|
||||||
|
@ -108,6 +110,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 248) /* four octets */
|
else if (octet < 248) /* four octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; // advance at least one byte in case of an error
|
||||||
octet -= (128+64+32+16);
|
octet -= (128+64+32+16);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
@ -121,7 +124,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 4; /* skip to next possible start of codepoint. */
|
*_str += 3; /* skip to next possible start of codepoint. */
|
||||||
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
|
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
|
||||||
((octet3 - 128) << 6) | ((octet4 - 128)) );
|
((octet3 - 128) << 6) | ((octet4 - 128)) );
|
||||||
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
|
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
|
||||||
|
@ -136,6 +139,34 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 252) /* five octets */
|
else if (octet < 252) /* five octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; // advance at least one byte in case of an error
|
||||||
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
|
*_str += 4; /* skip to next possible start of codepoint. */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
} /* else if */
|
||||||
|
|
||||||
|
else /* six octets */
|
||||||
|
{
|
||||||
|
(*_str)++; // advance at least one byte in case of an error
|
||||||
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
@ -156,32 +187,6 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
} /* else if */
|
} /* else if */
|
||||||
|
|
||||||
else /* six octets */
|
|
||||||
{
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
|
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
|
|
||||||
*_str += 6; /* skip to next possible start of codepoint. */
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
|
||||||
} /* else if */
|
|
||||||
|
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
} /* utf8codepoint */
|
} /* utf8codepoint */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue