htdocs/archive: improved xml validation

This commit is contained in:
Daniel Marjamäki 2013-07-21 17:00:12 +02:00
parent cb73a1d0c7
commit 79fa4e7226
3 changed files with 112 additions and 34 deletions

View File

@ -0,0 +1,95 @@
#include "validatexml.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
void skipspaces(const char xmldata[], int *pos, int *linenr)
{
const char *p = &xmldata[*pos];
while (isspace(*p) || *p == '\r' || *p == '\n') {
if (strncmp(p,"\r\n",2)==0)
++p;
if (*p == '\r' || *p == '\n')
++(*linenr);
++p;
}
*pos = p - xmldata;
}
int validatexml(const char xmldata[])
{
if (strncmp(xmldata,"<?xml version=\"1.0\"?>",21)!=0)
return 1;
int linenr = 1;
char elementNames[10][64]; // only 10 element levels handled
int level = 0;
for (int pos = 21; xmldata[pos]; pos++) {
if (strncmp(&xmldata[pos], "\r\n", 2)==0) {
++linenr;
++pos;
} else if (xmldata[pos]=='\r' || xmldata[pos]=='\n') {
++linenr;
} else if (xmldata[pos] == '<') {
++pos;
skipspaces(xmldata,&pos,&linenr);
if (xmldata[pos] == '/') {
if (level <= 0) {
return linenr;
}
--level;
int len = strlen(elementNames[level]);
if (strncmp(&xmldata[pos+1],elementNames[level],len)!=0 || xmldata[pos+1+len]!='>')
return linenr;
pos += 1 + len;
} else {
if (level > 8)
return linenr;
if (!isalpha(xmldata[pos]))
return linenr;
memset(elementNames[level], 0, 64);
for (int i = 0; i < 64; i++) {
if ((xmldata[pos+i]>='a' && xmldata[pos+i]<='z') || xmldata[pos+i] == '-')
elementNames[level][i] = xmldata[pos+i];
else {
pos += i;
break;
}
}
if (!strchr("> \r\n", xmldata[pos]))
return linenr;
level++;
while (xmldata[pos] != '>') {
skipspaces(xmldata,&pos,&linenr);
if ((xmldata[pos] >= 'a') && xmldata[pos] <= 'z') {
while (((xmldata[pos] >= 'a') && xmldata[pos] <= 'z') || xmldata[pos] == '-')
++pos;
if (xmldata[pos++] != '=')
return linenr;
if (xmldata[pos++] != '\"')
return linenr;
while (isalnum(xmldata[pos]) || strchr(":-.,",xmldata[pos]))
++pos;
if (xmldata[pos++] != '\"')
return linenr;
if (!strchr("> \r\n", xmldata[pos]))
return linenr;
} else if (xmldata[pos] != '>') {
return linenr;
}
}
}
} else if (xmldata[pos] == '>') {
return linenr;
}
}
if (level != 0)
return linenr;
return -1;
}

View File

@ -0,0 +1,7 @@
#ifndef VALIDATEXMLH
#define VALIDATEXMLH
/** validate xml data */
int validatexml(const char xmldata[]);
#endif

View File

@ -1,3 +1,4 @@
#include "validatexml.h"
#include <string.h>
#include <ctype.h>
#include <stdio.h>
@ -174,42 +175,17 @@ const char *validate_name_version_data(const char *data)
i += 6;
// validate xml
char xml[strlen(data+i)];
memset(xml, 0, strlen(data+i));
unencode(data+i, xml);
if (strncmp(xml,"<?xml version=\"1.0\"?>",21)!=0)
return "invalid query string: XML must start with '&lt;?xml version=\"1.0\"?&gt;'";
int linenr = 1;
enum {TEXT,ELEMENT} state = TEXT;
for (int pos = 21; xml[pos]; pos++) {
if (strncmp(&xml[pos], "\r\n", 2)==0) {
++linenr;
++pos;
} else if (xml[pos]=='\r' || xml[pos]=='\n') {
++linenr;
} else if (xml[pos] == '<') {
if (state != TEXT) {
static char errmsg[256];
sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr);
return errmsg;
}
state = ELEMENT;
} else if (xml[pos] == '>') {
if (state != ELEMENT) {
static char errmsg[256];
sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr);
return errmsg;
}
state = TEXT;
}
}
if (state != TEXT) {
static char errmsg[256];
sprintf(errmsg, "invalid query string: Invalid XML at line %i", linenr);
return errmsg;
char xmldata[strlen(data+i)];
memset(xmldata, 0, strlen(data+i));
unencode(data+i, xmldata);
const int badline = validatexml(xmldata);
if (badline >= 1) {
static char buf[256];
sprintf(buf, "Invalid query: Invalid XML at line %i\n", badline);
return buf;
}
// No error
return NULL;
}