From 79fa4e72262ce4cf2e64aa3bcae986f1d5ef6537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Sun, 21 Jul 2013 17:00:12 +0200 Subject: [PATCH] htdocs/archive: improved xml validation --- htdocs/archive/validatexml.c | 95 ++++++++++++++++++++++++++++++++++++ htdocs/archive/validatexml.h | 7 +++ htdocs/archive/webarchive.h | 44 ++++------------- 3 files changed, 112 insertions(+), 34 deletions(-) create mode 100644 htdocs/archive/validatexml.c create mode 100644 htdocs/archive/validatexml.h diff --git a/htdocs/archive/validatexml.c b/htdocs/archive/validatexml.c new file mode 100644 index 000000000..8f0b04fd9 --- /dev/null +++ b/htdocs/archive/validatexml.c @@ -0,0 +1,95 @@ + +#include "validatexml.h" +#include +#include +#include + +void skipspaces(const char xmldata[], int *pos, int *linenr) +{ + const char *p = &xmldata[*pos]; + while (isspace(*p) || *p == '\r' || *p == '\n') { + if (strncmp(p,"\r\n",2)==0) + ++p; + if (*p == '\r' || *p == '\n') + ++(*linenr); + ++p; + } + *pos = p - xmldata; +} + +int validatexml(const char xmldata[]) +{ + if (strncmp(xmldata,"",21)!=0) + return 1; + int linenr = 1; + char elementNames[10][64]; // only 10 element levels handled + int level = 0; + for (int pos = 21; xmldata[pos]; pos++) { + if (strncmp(&xmldata[pos], "\r\n", 2)==0) { + ++linenr; + ++pos; + } else if (xmldata[pos]=='\r' || xmldata[pos]=='\n') { + ++linenr; + } else if (xmldata[pos] == '<') { + ++pos; + skipspaces(xmldata,&pos,&linenr); + if (xmldata[pos] == '/') { + if (level <= 0) { + return linenr; + } + --level; + int len = strlen(elementNames[level]); + if (strncmp(&xmldata[pos+1],elementNames[level],len)!=0 || xmldata[pos+1+len]!='>') + return linenr; + pos += 1 + len; + } else { + if (level > 8) + return linenr; + if (!isalpha(xmldata[pos])) + return linenr; + memset(elementNames[level], 0, 64); + for (int i = 0; i < 64; i++) { + if ((xmldata[pos+i]>='a' && xmldata[pos+i]<='z') || xmldata[pos+i] == '-') + elementNames[level][i] = xmldata[pos+i]; + else { + pos += i; + break; + } + } + + if (!strchr("> \r\n", xmldata[pos])) + return linenr; + + level++; + + while (xmldata[pos] != '>') { + skipspaces(xmldata,&pos,&linenr); + if ((xmldata[pos] >= 'a') && xmldata[pos] <= 'z') { + while (((xmldata[pos] >= 'a') && xmldata[pos] <= 'z') || xmldata[pos] == '-') + ++pos; + if (xmldata[pos++] != '=') + return linenr; + if (xmldata[pos++] != '\"') + return linenr; + while (isalnum(xmldata[pos]) || strchr(":-.,",xmldata[pos])) + ++pos; + if (xmldata[pos++] != '\"') + return linenr; + if (!strchr("> \r\n", xmldata[pos])) + return linenr; + } else if (xmldata[pos] != '>') { + return linenr; + } + } + } + } else if (xmldata[pos] == '>') { + return linenr; + } + } + + if (level != 0) + return linenr; + + return -1; +} + diff --git a/htdocs/archive/validatexml.h b/htdocs/archive/validatexml.h new file mode 100644 index 000000000..b7fa6f08e --- /dev/null +++ b/htdocs/archive/validatexml.h @@ -0,0 +1,7 @@ +#ifndef VALIDATEXMLH +#define VALIDATEXMLH + +/** validate xml data */ +int validatexml(const char xmldata[]); + +#endif diff --git a/htdocs/archive/webarchive.h b/htdocs/archive/webarchive.h index 92830b720..db84f25b6 100644 --- a/htdocs/archive/webarchive.h +++ b/htdocs/archive/webarchive.h @@ -1,3 +1,4 @@ +#include "validatexml.h" #include #include #include @@ -174,42 +175,17 @@ const char *validate_name_version_data(const char *data) i += 6; // validate xml - char xml[strlen(data+i)]; - memset(xml, 0, strlen(data+i)); - unencode(data+i, xml); - - if (strncmp(xml,"",21)!=0) - return "invalid query string: XML must start with '<?xml version=\"1.0\"?>'"; - int linenr = 1; - enum {TEXT,ELEMENT} state = TEXT; - for (int pos = 21; xml[pos]; pos++) { - if (strncmp(&xml[pos], "\r\n", 2)==0) { - ++linenr; - ++pos; - } else if (xml[pos]=='\r' || xml[pos]=='\n') { - ++linenr; - } else if (xml[pos] == '<') { - if (state != TEXT) { - static char errmsg[256]; - sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr); - return errmsg; - } - state = ELEMENT; - } else if (xml[pos] == '>') { - if (state != ELEMENT) { - static char errmsg[256]; - sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr); - return errmsg; - } - state = TEXT; - } - } - if (state != TEXT) { - static char errmsg[256]; - sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr); - return errmsg; + char xmldata[strlen(data+i)]; + memset(xmldata, 0, strlen(data+i)); + unencode(data+i, xmldata); + const int badline = validatexml(xmldata); + if (badline >= 1) { + static char buf[256]; + sprintf(buf, "Invalid query: Invalid XML at line %i\n", badline); + return buf; } + // No error return NULL; }