Make sure utf8codepoint() advances the pointer even on bogus input.

This patch was from Francois Gouget, who found this bug in MojoSetup's copy
 of this function.
This commit is contained in:
Ryan C. Gordon 2011-06-01 03:10:08 -04:00
parent c6ca60eb20
commit 6a1aca5db5
1 changed files with 9 additions and 4 deletions

View File

@ -61,12 +61,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 224) /* two octets */ else if (octet < 224) /* two octets */
{ {
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64); octet -= (128+64);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str)); octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
*_str += 2; /* skip to next possible start of codepoint. */ *_str += 1; /* skip to next possible start of codepoint. */
retval = ((octet << 6) | (octet2 - 128)); retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF)) if ((retval >= 0x80) && (retval <= 0x7FF))
return retval; return retval;
@ -74,6 +75,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 240) /* three octets */ else if (octet < 240) /* three octets */
{ {
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32); octet -= (128+64+32);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str)); octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@ -83,7 +85,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
*_str += 3; /* skip to next possible start of codepoint. */ *_str += 2; /* skip to next possible start of codepoint. */
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) ); retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */ /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
@ -106,6 +108,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 248) /* four octets */ else if (octet < 248) /* four octets */
{ {
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32+16); octet -= (128+64+32+16);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str)); octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@ -119,7 +122,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
*_str += 4; /* skip to next possible start of codepoint. */ *_str += 3; /* skip to next possible start of codepoint. */
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) | retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) ); ((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF)) if ((retval >= 0x10000) && (retval <= 0x10FFFF))
@ -134,6 +137,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
else if (octet < 252) /* five octets */ else if (octet < 252) /* five octets */
{ {
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str)); octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
@ -150,12 +154,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
*_str += 5; /* skip to next possible start of codepoint. */ *_str += 4; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */ } /* else if */
else /* six octets */ else /* six octets */
{ {
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str)); octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */ if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE; return UNICODE_BOGUS_CHAR_VALUE;