Update http-parser to 39ff0975c220ef76a2d98c8ac61b0d36f4dce80f

This commit is contained in:
Tatsuhiro Tsujikawa 2015-05-22 01:55:47 +09:00
parent bcf9e66dbc
commit d983dd81ec
5 changed files with 217 additions and 19 deletions

View File

@ -61,3 +61,7 @@ Marc O'Morain <github.com@marcomorain.com>
Jeff Pinner <jpinner@twitter.com>
Timothy J Fontaine <tjfontaine@gmail.com>
Akagi201 <akagi201@gmail.com>
Romain Giraud <giraud.romain@gmail.com>
Jay Satiro <raysatiro@yahoo.com>
Arne Steen <Arne.Steen@gmx.de>
Kjell Schubert <kjell.schubert@gmail.com>

View File

@ -14,7 +14,7 @@ dump_url (const char *url, const struct http_parser_url *u)
continue;
}
printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n",
printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
i,
u->field_data[i].off,
u->field_data[i].len,
@ -24,16 +24,18 @@ dump_url (const char *url, const struct http_parser_url *u)
}
int main(int argc, char ** argv) {
struct http_parser_url u;
int len, connect, result;
if (argc != 3) {
printf("Syntax : %s connect|get url\n", argv[0]);
return 1;
}
struct http_parser_url u;
int len = strlen(argv[2]);
int connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
len = strlen(argv[2]);
connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
printf("Parsing %s, connect %d\n", argv[2], connect);
int result = http_parser_parse_url(argv[2], len, connect, &u);
result = http_parser_parse_url(argv[2], len, connect, &u);
if (result != 0) {
printf("Parse error : %d\n", result);
return result;

View File

@ -1782,9 +1782,9 @@ reexecute:
if (parser->flags & F_TRAILING) {
/* End of a chunked request */
UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete);
break;
UPDATE_STATE(s_message_done);
CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
REEXECUTE();
}
UPDATE_STATE(s_headers_done);
@ -1832,8 +1832,11 @@ reexecute:
parser->nread = 0;
/* Exit, the rest of the connect is in a different protocol. */
if (parser->upgrade) {
int hasBody = parser->flags & F_CHUNKED ||
(parser->content_length > 0 && parser->content_length != ULLONG_MAX);
if (parser->upgrade && (parser->method == HTTP_CONNECT ||
(parser->flags & F_SKIPBODY) || !hasBody)) {
/* Exit, the rest of the message is in a different protocol. */
UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete);
RETURN((p - data) + 1);
@ -1854,8 +1857,7 @@ reexecute:
/* Content-Length header given and non-zero */
UPDATE_STATE(s_body_identity);
} else {
if (parser->type == HTTP_REQUEST ||
!http_message_needs_eof(parser)) {
if (!http_message_needs_eof(parser)) {
/* Assume content-length 0 - read the next */
UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete);
@ -1915,6 +1917,10 @@ reexecute:
case s_message_done:
UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete);
if (parser->upgrade) {
/* Exit, the rest of the message is in a different protocol. */
RETURN((p - data) + 1);
}
break;
case s_chunk_size_start:
@ -1994,6 +2000,7 @@ reexecute:
} else {
UPDATE_STATE(s_chunk_data);
}
CALLBACK_NOTIFY(chunk_header);
break;
}
@ -2033,6 +2040,7 @@ reexecute:
STRICT_CHECK(ch != LF);
parser->nread = 0;
UPDATE_STATE(s_chunk_size_start);
CALLBACK_NOTIFY(chunk_complete);
break;
default:
@ -2144,13 +2152,15 @@ http_parser_settings_init(http_parser_settings *settings)
const char *
http_errno_name(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
assert(((size_t) err) <
(sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].name;
}
const char *
http_errno_description(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
assert(((size_t) err) <
(sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].description;
}
@ -2221,6 +2231,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
static int
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
assert(u->field_set & (1 << UF_HOST));
enum http_host_state s;
const char *p;
@ -2365,7 +2376,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
/* host must be present if there is a schema */
/* parsing http:///toto will fail */
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
if ((u->field_set & (1 << UF_SCHEMA)) &&
(u->field_set & (1 << UF_HOST)) == 0) {
return 1;
}
if (u->field_set & (1 << UF_HOST)) {
if (http_parse_host(buf, u, found_at) != 0) {
return 1;
}

View File

@ -26,8 +26,8 @@ extern "C" {
/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 4
#define HTTP_PARSER_VERSION_PATCH 2
#define HTTP_PARSER_VERSION_MINOR 5
#define HTTP_PARSER_VERSION_PATCH 0
#include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
@ -160,6 +160,8 @@ enum flags
XX(CB_body, "the on_body callback failed") \
XX(CB_message_complete, "the on_message_complete callback failed") \
XX(CB_status, "the on_status callback failed") \
XX(CB_chunk_header, "the on_chunk_header callback failed") \
XX(CB_chunk_complete, "the on_chunk_complete callback failed") \
\
/* Parsing-related errors */ \
XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
@ -240,6 +242,11 @@ struct http_parser_settings {
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
/* When on_chunk_header is called, the current chunk length is stored
* in parser->content_length.
*/
http_cb on_chunk_header;
http_cb on_chunk_complete;
};

View File

@ -39,6 +39,7 @@
#define MAX_HEADERS 13
#define MAX_ELEMENT_SIZE 2048
#define MAX_CHUNKS 16
#define MIN(a,b) ((a) < (b) ? (a) : (b))
@ -65,6 +66,10 @@ struct message {
char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE];
int should_keep_alive;
int num_chunks;
int num_chunks_complete;
int chunk_lengths[MAX_CHUNKS];
const char *upgrade; // upgraded body
unsigned short http_major;
@ -301,6 +306,8 @@ const struct message requests[] =
{ { "Transfer-Encoding" , "chunked" }
}
,.body= "all your base are belong to us"
,.num_chunks_complete= 2
,.chunk_lengths= { 0x1e }
}
#define TWO_CHUNKS_MULT_ZERO_END 9
@ -327,6 +334,8 @@ const struct message requests[] =
{ { "Transfer-Encoding", "chunked" }
}
,.body= "hello world"
,.num_chunks_complete= 3
,.chunk_lengths= { 5, 6 }
}
#define CHUNKED_W_TRAILING_HEADERS 10
@ -357,6 +366,8 @@ const struct message requests[] =
, { "Content-Type", "text/plain" }
}
,.body= "hello world"
,.num_chunks_complete= 3
,.chunk_lengths= { 5, 6 }
}
#define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11
@ -383,6 +394,8 @@ const struct message requests[] =
{ { "Transfer-Encoding", "chunked" }
}
,.body= "hello world"
,.num_chunks_complete= 3
,.chunk_lengths= { 5, 6 }
}
#define WITH_QUOTES 12
@ -1036,6 +1049,58 @@ const struct message requests[] =
,.body= ""
}
#define UPGRADE_POST_REQUEST 38
, {.name = "upgrade post request"
,.type= HTTP_REQUEST
,.raw= "POST /demo HTTP/1.1\r\n"
"Host: example.com\r\n"
"Connection: Upgrade\r\n"
"Upgrade: HTTP/2.0\r\n"
"Content-Length: 15\r\n"
"\r\n"
"sweet post body"
"Hot diggity dogg"
,.should_keep_alive= TRUE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 1
,.method= HTTP_POST
,.request_path= "/demo"
,.request_url= "/demo"
,.num_headers= 4
,.upgrade="Hot diggity dogg"
,.headers= { { "Host", "example.com" }
, { "Connection", "Upgrade" }
, { "Upgrade", "HTTP/2.0" }
, { "Content-Length", "15" }
}
,.body= "sweet post body"
}
#define CONNECT_WITH_BODY_REQUEST 39
, {.name = "connect with body request"
,.type= HTTP_REQUEST
,.raw= "CONNECT foo.bar.com:443 HTTP/1.0\r\n"
"User-agent: Mozilla/1.1N\r\n"
"Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
"Content-Length: 10\r\n"
"\r\n"
"blarfcicle"
,.should_keep_alive= FALSE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 0
,.method= HTTP_CONNECT
,.request_url= "foo.bar.com:443"
,.num_headers= 3
,.upgrade="blarfcicle"
,.headers= { { "User-agent", "Mozilla/1.1N" }
, { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
, { "Content-Length", "10" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */
};
@ -1195,7 +1260,8 @@ const struct message responses[] =
,.body =
"This is the data in the first chunk\r\n"
"and this is the second one\r\n"
,.num_chunks_complete= 3
,.chunk_lengths= { 0x25, 0x1c }
}
#define NO_CARRIAGE_RET 5
@ -1349,6 +1415,8 @@ const struct message responses[] =
, { "Connection", "close" }
}
,.body= ""
,.num_chunks_complete= 1
,.chunk_lengths= {}
}
#define NON_ASCII_IN_STATUS_LINE 10
@ -1531,6 +1599,7 @@ const struct message responses[] =
}
,.body_size= 0
,.body= ""
,.num_chunks_complete= 1
}
#if !HTTP_PARSER_STRICT
@ -1604,6 +1673,8 @@ const struct message responses[] =
, { "Transfer-Encoding", "chunked" }
}
,.body= "\n"
,.num_chunks_complete= 2
,.chunk_lengths= { 1 }
}
#define EMPTY_REASON_PHRASE_AFTER_SPACE 20
@ -1839,6 +1910,35 @@ response_status_cb (http_parser *p, const char *buf, size_t len)
return 0;
}
int
chunk_header_cb (http_parser *p)
{
assert(p == parser);
int chunk_idx = messages[num_messages].num_chunks;
messages[num_messages].num_chunks++;
if (chunk_idx < MAX_CHUNKS) {
messages[num_messages].chunk_lengths[chunk_idx] = p->content_length;
}
return 0;
}
int
chunk_complete_cb (http_parser *p)
{
assert(p == parser);
/* Here we want to verify that each chunk_header_cb is matched by a
* chunk_complete_cb, so not only should the total number of calls to
* both callbacks be the same, but they also should be interleaved
* properly */
assert(messages[num_messages].num_chunks ==
messages[num_messages].num_chunks_complete + 1);
messages[num_messages].num_chunks_complete++;
return 0;
}
/* These dontcall_* callbacks exist so that we can verify that when we're
* paused, no additional callbacks are invoked */
int
@ -1907,6 +2007,23 @@ dontcall_response_status_cb (http_parser *p, const char *buf, size_t len)
abort();
}
int
dontcall_chunk_header_cb (http_parser *p)
{
if (p) { } // gcc
fprintf(stderr, "\n\n*** on_chunk_header() called on paused parser ***\n\n");
exit(1);
}
int
dontcall_chunk_complete_cb (http_parser *p)
{
if (p) { } // gcc
fprintf(stderr, "\n\n*** on_chunk_complete() "
"called on paused parser ***\n\n");
exit(1);
}
static http_parser_settings settings_dontcall =
{.on_message_begin = dontcall_message_begin_cb
,.on_header_field = dontcall_header_field_cb
@ -1916,6 +2033,8 @@ static http_parser_settings settings_dontcall =
,.on_body = dontcall_body_cb
,.on_headers_complete = dontcall_headers_complete_cb
,.on_message_complete = dontcall_message_complete_cb
,.on_chunk_header = dontcall_chunk_header_cb
,.on_chunk_complete = dontcall_chunk_complete_cb
};
/* These pause_* callbacks always pause the parser and just invoke the regular
@ -1986,6 +2105,22 @@ pause_response_status_cb (http_parser *p, const char *buf, size_t len)
return response_status_cb(p, buf, len);
}
int
pause_chunk_header_cb (http_parser *p)
{
http_parser_pause(p, 1);
*current_pause_parser = settings_dontcall;
return chunk_header_cb(p);
}
int
pause_chunk_complete_cb (http_parser *p)
{
http_parser_pause(p, 1);
*current_pause_parser = settings_dontcall;
return chunk_complete_cb(p);
}
static http_parser_settings settings_pause =
{.on_message_begin = pause_message_begin_cb
,.on_header_field = pause_header_field_cb
@ -1995,6 +2130,8 @@ static http_parser_settings settings_pause =
,.on_body = pause_body_cb
,.on_headers_complete = pause_headers_complete_cb
,.on_message_complete = pause_message_complete_cb
,.on_chunk_header = pause_chunk_header_cb
,.on_chunk_complete = pause_chunk_complete_cb
};
static http_parser_settings settings =
@ -2006,6 +2143,8 @@ static http_parser_settings settings =
,.on_body = body_cb
,.on_headers_complete = headers_complete_cb
,.on_message_complete = message_complete_cb
,.on_chunk_header = chunk_header_cb
,.on_chunk_complete = chunk_complete_cb
};
static http_parser_settings settings_count_body =
@ -2017,6 +2156,8 @@ static http_parser_settings settings_count_body =
,.on_body = count_body_cb
,.on_headers_complete = headers_complete_cb
,.on_message_complete = message_complete_cb
,.on_chunk_header = chunk_header_cb
,.on_chunk_complete = chunk_complete_cb
};
static http_parser_settings settings_null =
@ -2028,6 +2169,8 @@ static http_parser_settings settings_null =
,.on_body = 0
,.on_headers_complete = 0
,.on_message_complete = 0
,.on_chunk_header = 0
,.on_chunk_complete = 0
};
void
@ -2196,6 +2339,12 @@ message_eq (int index, const struct message *expected)
MESSAGE_CHECK_STR_EQ(expected, m, body);
}
assert(m->num_chunks == m->num_chunks_complete);
MESSAGE_CHECK_NUM_EQ(expected, m, num_chunks_complete);
for (i = 0; i < m->num_chunks && i < MAX_CHUNKS; i++) {
MESSAGE_CHECK_NUM_EQ(expected, m, chunk_lengths[i]);
}
MESSAGE_CHECK_NUM_EQ(expected, m, num_headers);
int r;
@ -2909,7 +3058,7 @@ test_message (const struct message *message)
if (msg1len) {
read = parse(msg1, msg1len);
if (message->upgrade && parser->upgrade) {
if (message->upgrade && parser->upgrade && num_messages > 0) {
messages[num_messages - 1].upgrade = msg1 + read;
goto test;
}
@ -3488,7 +3637,11 @@ main (void)
, { "Content-Type", "text/plain" }
}
,.body_size= 31337*1024
,.num_chunks_complete= 31338
};
for (i = 0; i < MAX_CHUNKS; i++) {
large_chunked.chunk_lengths[i] = 1024;
}
test_message_count_body(&large_chunked);
free(msg);
}
@ -3619,6 +3772,22 @@ main (void)
"\r\n";
test_simple(dumbfuck2, HPE_OK);
const char *corrupted_connection =
"GET / HTTP/1.1\r\n"
"Host: www.example.com\r\n"
"Connection\r\033\065\325eep-Alive\r\n"
"Accept-Encoding: gzip\r\n"
"\r\n";
test_simple(corrupted_connection, HPE_INVALID_HEADER_TOKEN);
const char *corrupted_header_name =
"GET / HTTP/1.1\r\n"
"Host: www.example.com\r\n"
"X-Some-Header\r\033\065\325eep-Alive\r\n"
"Accept-Encoding: gzip\r\n"
"\r\n";
test_simple(corrupted_header_name, HPE_INVALID_HEADER_TOKEN);
#if 0
// NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body
// until EOF.