Handle UTF-16 files. Partial fix for ticket #2083
This commit is contained in:
parent
905615e991
commit
171f570639
|
@ -59,15 +59,32 @@ void Preprocessor::writeError(const std::string &fileName, const unsigned int li
|
||||||
false));
|
false));
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned char readChar(std::istream &istr)
|
static unsigned char readChar(std::istream &istr, unsigned int bom)
|
||||||
{
|
{
|
||||||
unsigned char ch = (unsigned char)istr.get();
|
unsigned char ch = (unsigned char)istr.get();
|
||||||
|
|
||||||
|
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
|
||||||
|
// character is non-ASCII character then replace it with 0xff
|
||||||
|
if (bom == 0xfeff || bom == 0xfffe) {
|
||||||
|
unsigned char ch2 = (unsigned char)istr.get();
|
||||||
|
int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
|
||||||
|
ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16);
|
||||||
|
}
|
||||||
|
|
||||||
// Handling of newlines..
|
// Handling of newlines..
|
||||||
if (ch == '\r') {
|
if (ch == '\r') {
|
||||||
ch = '\n';
|
ch = '\n';
|
||||||
if ((char)istr.peek() == '\n')
|
if (bom == 0 && (char)istr.peek() == '\n')
|
||||||
(void)istr.get();
|
(void)istr.get();
|
||||||
|
else if (bom == 0xfeff || bom == 0xfffe) {
|
||||||
|
int c1 = istr.get();
|
||||||
|
int c2 = istr.get();
|
||||||
|
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
|
||||||
|
if (ch16 != '\n') {
|
||||||
|
istr.unget();
|
||||||
|
istr.unget();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ch;
|
return ch;
|
||||||
|
@ -108,6 +125,14 @@ static std::string unify(const std::string &s, char separator)
|
||||||
/** Just read the code into a string. Perform simple cleanup of the code */
|
/** Just read the code into a string. Perform simple cleanup of the code */
|
||||||
std::string Preprocessor::read(std::istream &istr, const std::string &filename)
|
std::string Preprocessor::read(std::istream &istr, const std::string &filename)
|
||||||
{
|
{
|
||||||
|
// The UTF-16 BOM is 0xfffe or 0xfeff.
|
||||||
|
unsigned int bom = 0;
|
||||||
|
if (istr.peek() >= 0xfe) {
|
||||||
|
bom = (istr.get() << 8);
|
||||||
|
if (istr.peek() >= 0xfe)
|
||||||
|
bom |= istr.get();
|
||||||
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// handling <backslash><newline>
|
// handling <backslash><newline>
|
||||||
|
@ -115,7 +140,7 @@ std::string Preprocessor::read(std::istream &istr, const std::string &filename)
|
||||||
// on the next <newline>, extra newlines will be added
|
// on the next <newline>, extra newlines will be added
|
||||||
std::ostringstream code;
|
std::ostringstream code;
|
||||||
unsigned int newlines = 0;
|
unsigned int newlines = 0;
|
||||||
for (unsigned char ch = readChar(istr); istr.good(); ch = readChar(istr)) {
|
for (unsigned char ch = readChar(istr,bom); istr.good(); ch = readChar(istr,bom)) {
|
||||||
// Replace assorted special chars with spaces..
|
// Replace assorted special chars with spaces..
|
||||||
if (((ch & 0x80) == 0) && (ch != '\n') && (std::isspace(ch) || std::iscntrl(ch)))
|
if (((ch & 0x80) == 0) && (ch != '\n') && (std::isspace(ch) || std::iscntrl(ch)))
|
||||||
ch = ' ';
|
ch = ' ';
|
||||||
|
@ -135,7 +160,7 @@ std::string Preprocessor::read(std::istream &istr, const std::string &filename)
|
||||||
if (chNext != '\n' && chNext != '\r' &&
|
if (chNext != '\n' && chNext != '\r' &&
|
||||||
(std::isspace(chNext) || std::iscntrl(chNext))) {
|
(std::isspace(chNext) || std::iscntrl(chNext))) {
|
||||||
// Skip whitespace between <backslash> and <newline>
|
// Skip whitespace between <backslash> and <newline>
|
||||||
(void)readChar(istr);
|
(void)readChar(istr,bom);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +172,7 @@ std::string Preprocessor::read(std::istream &istr, const std::string &filename)
|
||||||
#endif
|
#endif
|
||||||
if (chNext == '\n' || chNext == '\r') {
|
if (chNext == '\n' || chNext == '\r') {
|
||||||
++newlines;
|
++newlines;
|
||||||
(void)readChar(istr); // Skip the "<backslash><newline>"
|
(void)readChar(istr,bom); // Skip the "<backslash><newline>"
|
||||||
} else
|
} else
|
||||||
code << "\\";
|
code << "\\";
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -62,6 +62,9 @@ private:
|
||||||
TEST_CASE(readCode1);
|
TEST_CASE(readCode1);
|
||||||
TEST_CASE(readCode2);
|
TEST_CASE(readCode2);
|
||||||
|
|
||||||
|
// reading utf-16 file
|
||||||
|
TEST_CASE(utf16);
|
||||||
|
|
||||||
// The bug that started the whole work with the new preprocessor
|
// The bug that started the whole work with the new preprocessor
|
||||||
TEST_CASE(Bug2190219);
|
TEST_CASE(Bug2190219);
|
||||||
|
|
||||||
|
@ -284,6 +287,59 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void utf16() {
|
||||||
|
Settings settings;
|
||||||
|
Preprocessor preprocessor(&settings, this);
|
||||||
|
|
||||||
|
// a => a
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xff, (char)0xfe, 'a', '\0' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
ASSERT_EQUALS("a", preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xfe, (char)0xff, '\0', 'a' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
ASSERT_EQUALS("a", preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// extended char => 0xff
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xff, (char)0xfe, 'a', 'a' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
const char expected[] = { (char)0xff, 0 };
|
||||||
|
ASSERT_EQUALS(expected, preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xfe, (char)0xff, 'a', 'a' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
const char expected[] = { (char)0xff, 0 };
|
||||||
|
ASSERT_EQUALS(expected, preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// \r\n => \n
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xff, (char)0xfe, '\r', '\0', '\n', '\0' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
ASSERT_EQUALS("\n", preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char code[] = { (char)0xfe, (char)0xff, '\0', '\r', '\0', '\n' };
|
||||||
|
std::string s(code, sizeof(code));
|
||||||
|
std::istringstream istr(s);
|
||||||
|
ASSERT_EQUALS("\n", preprocessor.read(istr, "test.c"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Bug2190219() {
|
void Bug2190219() {
|
||||||
const char filedata[] = "int main()\n"
|
const char filedata[] = "int main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
|
|
Loading…
Reference in New Issue