Make sure utf8codepoint() advances the pointer even on bogus input.
This patch was from Francois Gouget, who found this bug in MojoSetup's copy of this function.
This commit is contained in:
parent
c6ca60eb20
commit
6a1aca5db5
|
@ -61,12 +61,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 224) /* two octets */
|
else if (octet < 224) /* two octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet -= (128+64);
|
octet -= (128+64);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 2; /* skip to next possible start of codepoint. */
|
*_str += 1; /* skip to next possible start of codepoint. */
|
||||||
retval = ((octet << 6) | (octet2 - 128));
|
retval = ((octet << 6) | (octet2 - 128));
|
||||||
if ((retval >= 0x80) && (retval <= 0x7FF))
|
if ((retval >= 0x80) && (retval <= 0x7FF))
|
||||||
return retval;
|
return retval;
|
||||||
|
@ -74,6 +75,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 240) /* three octets */
|
else if (octet < 240) /* three octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet -= (128+64+32);
|
octet -= (128+64+32);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
@ -83,7 +85,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 3; /* skip to next possible start of codepoint. */
|
*_str += 2; /* skip to next possible start of codepoint. */
|
||||||
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
|
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
|
||||||
|
|
||||||
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
|
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
|
||||||
|
@ -106,6 +108,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 248) /* four octets */
|
else if (octet < 248) /* four octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet -= (128+64+32+16);
|
octet -= (128+64+32+16);
|
||||||
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
|
@ -119,7 +122,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 4; /* skip to next possible start of codepoint. */
|
*_str += 3; /* skip to next possible start of codepoint. */
|
||||||
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
|
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
|
||||||
((octet3 - 128) << 6) | ((octet4 - 128)) );
|
((octet3 - 128) << 6) | ((octet4 - 128)) );
|
||||||
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
|
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
|
||||||
|
@ -134,6 +137,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
|
|
||||||
else if (octet < 252) /* five octets */
|
else if (octet < 252) /* five octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
@ -150,12 +154,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
||||||
*_str += 5; /* skip to next possible start of codepoint. */
|
*_str += 4; /* skip to next possible start of codepoint. */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
} /* else if */
|
} /* else if */
|
||||||
|
|
||||||
else /* six octets */
|
else /* six octets */
|
||||||
{
|
{
|
||||||
|
(*_str)++; /* advance at least one byte in case of an error */
|
||||||
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
|
||||||
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
|
||||||
return UNICODE_BOGUS_CHAR_VALUE;
|
return UNICODE_BOGUS_CHAR_VALUE;
|
||||||
|
|
Loading…
Reference in New Issue