Make sure utf8codepoint() advances the pointer even on bogus input.
This patch was from Francois Gouget, who found this bug in MojoSetup's copy of this function.
This commit is contained in:
parent
c6ca60eb20
commit
6a1aca5db5
|
@ -61,12 +61,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
|
||||
else if (octet < 224) /* two octets */
|
||||
{
|
||||
(*_str)++; /* advance at least one byte in case of an error */
|
||||
octet -= (128+64);
|
||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
||||
*_str += 2; /* skip to next possible start of codepoint. */
|
||||
*_str += 1; /* skip to next possible start of codepoint. */
|
||||
retval = ((octet << 6) | (octet2 - 128));
|
||||
if ((retval >= 0x80) && (retval <= 0x7FF))
|
||||
return retval;
|
||||
|
@ -74,6 +75,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
|
||||
else if (octet < 240) /* three octets */
|
||||
{
|
||||
(*_str)++; /* advance at least one byte in case of an error */
|
||||
octet -= (128+64+32);
|
||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
|
@ -83,7 +85,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
||||
*_str += 3; /* skip to next possible start of codepoint. */
|
||||
*_str += 2; /* skip to next possible start of codepoint. */
|
||||
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
|
||||
|
||||
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
|
||||
|
@ -106,6 +108,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
|
||||
else if (octet < 248) /* four octets */
|
||||
{
|
||||
(*_str)++; /* advance at least one byte in case of an error */
|
||||
octet -= (128+64+32+16);
|
||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
|
@ -119,7 +122,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
||||
*_str += 4; /* skip to next possible start of codepoint. */
|
||||
*_str += 3; /* skip to next possible start of codepoint. */
|
||||
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
|
||||
((octet3 - 128) << 6) | ((octet4 - 128)) );
|
||||
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
|
||||
|
@ -134,6 +137,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
|
||||
else if (octet < 252) /* five octets */
|
||||
{
|
||||
(*_str)++; /* advance at least one byte in case of an error */
|
||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
@ -150,12 +154,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
|||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
||||
*_str += 5; /* skip to next possible start of codepoint. */
|
||||
*_str += 4; /* skip to next possible start of codepoint. */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
} /* else if */
|
||||
|
||||
else /* six octets */
|
||||
{
|
||||
(*_str)++; /* advance at least one byte in case of an error */
|
||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||
return UNICODE_BOGUS_CHAR_VALUE;
|
||||
|
|
Loading…
Reference in New Issue