Make sure utf8codepoint() advances the pointer even on bogus input.

This patch was from Francois Gouget, who found this bug in MojoSetup's copy
 of this function.
This commit is contained in:
Ryan C. Gordon 2011-06-01 03:10:08 -04:00
parent c6ca60eb20
commit 6a1aca5db5
1 changed files with 9 additions and 4 deletions

View File

@ -61,12 +61,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 224) /* two octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
*_str += 2; /* skip to next possible start of codepoint. */
*_str += 1; /* skip to next possible start of codepoint. */
retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF))
return retval;
@ -74,6 +75,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 240) /* three octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@ -83,7 +85,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
*_str += 3; /* skip to next possible start of codepoint. */
*_str += 2; /* skip to next possible start of codepoint. */
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
@ -106,6 +108,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 248) /* four octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32+16);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@ -119,7 +122,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
*_str += 4; /* skip to next possible start of codepoint. */
*_str += 3; /* skip to next possible start of codepoint. */
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
@ -134,6 +137,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 252) /* five octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
@ -150,12 +154,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
*_str += 5; /* skip to next possible start of codepoint. */
*_str += 4; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */
else /* six octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;