diff --git a/third-party/http-parser/AUTHORS b/third-party/http-parser/AUTHORS index 29cdbb16..8e2df1d0 100644 --- a/third-party/http-parser/AUTHORS +++ b/third-party/http-parser/AUTHORS @@ -61,3 +61,7 @@ Marc O'Morain Jeff Pinner Timothy J Fontaine Akagi201 +Romain Giraud +Jay Satiro +Arne Steen +Kjell Schubert diff --git a/third-party/http-parser/contrib/url_parser.c b/third-party/http-parser/contrib/url_parser.c index b1f9c979..6650b414 100644 --- a/third-party/http-parser/contrib/url_parser.c +++ b/third-party/http-parser/contrib/url_parser.c @@ -14,7 +14,7 @@ dump_url (const char *url, const struct http_parser_url *u) continue; } - printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n", + printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n", i, u->field_data[i].off, u->field_data[i].len, @@ -24,16 +24,18 @@ dump_url (const char *url, const struct http_parser_url *u) } int main(int argc, char ** argv) { + struct http_parser_url u; + int len, connect, result; + if (argc != 3) { printf("Syntax : %s connect|get url\n", argv[0]); return 1; } - struct http_parser_url u; - int len = strlen(argv[2]); - int connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; + len = strlen(argv[2]); + connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; printf("Parsing %s, connect %d\n", argv[2], connect); - int result = http_parser_parse_url(argv[2], len, connect, &u); + result = http_parser_parse_url(argv[2], len, connect, &u); if (result != 0) { printf("Parse error : %d\n", result); return result; diff --git a/third-party/http-parser/http_parser.c b/third-party/http-parser/http_parser.c index aa6310f7..cbe8a90b 100644 --- a/third-party/http-parser/http_parser.c +++ b/third-party/http-parser/http_parser.c @@ -1782,9 +1782,9 @@ reexecute: if (parser->flags & F_TRAILING) { /* End of a chunked request */ - UPDATE_STATE(NEW_MESSAGE()); - CALLBACK_NOTIFY(message_complete); - break; + UPDATE_STATE(s_message_done); + CALLBACK_NOTIFY_NOADVANCE(chunk_complete); + REEXECUTE(); } UPDATE_STATE(s_headers_done); @@ -1832,8 +1832,11 @@ reexecute: parser->nread = 0; - /* Exit, the rest of the connect is in a different protocol. */ - if (parser->upgrade) { + int hasBody = parser->flags & F_CHUNKED || + (parser->content_length > 0 && parser->content_length != ULLONG_MAX); + if (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) { + /* Exit, the rest of the message is in a different protocol. */ UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); RETURN((p - data) + 1); @@ -1854,8 +1857,7 @@ reexecute: /* Content-Length header given and non-zero */ UPDATE_STATE(s_body_identity); } else { - if (parser->type == HTTP_REQUEST || - !http_message_needs_eof(parser)) { + if (!http_message_needs_eof(parser)) { /* Assume content-length 0 - read the next */ UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); @@ -1915,6 +1917,10 @@ reexecute: case s_message_done: UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); + if (parser->upgrade) { + /* Exit, the rest of the message is in a different protocol. */ + RETURN((p - data) + 1); + } break; case s_chunk_size_start: @@ -1994,6 +2000,7 @@ reexecute: } else { UPDATE_STATE(s_chunk_data); } + CALLBACK_NOTIFY(chunk_header); break; } @@ -2033,6 +2040,7 @@ reexecute: STRICT_CHECK(ch != LF); parser->nread = 0; UPDATE_STATE(s_chunk_size_start); + CALLBACK_NOTIFY(chunk_complete); break; default: @@ -2144,13 +2152,15 @@ http_parser_settings_init(http_parser_settings *settings) const char * http_errno_name(enum http_errno err) { - assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + assert(((size_t) err) < + (sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0]))); return http_strerror_tab[err].name; } const char * http_errno_description(enum http_errno err) { - assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + assert(((size_t) err) < + (sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0]))); return http_strerror_tab[err].description; } @@ -2221,6 +2231,7 @@ http_parse_host_char(enum http_host_state s, const char ch) { static int http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + assert(u->field_set & (1 << UF_HOST)); enum http_host_state s; const char *p; @@ -2365,7 +2376,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, /* host must be present if there is a schema */ /* parsing http:///toto will fail */ - if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { if (http_parse_host(buf, u, found_at) != 0) { return 1; } diff --git a/third-party/http-parser/http_parser.h b/third-party/http-parser/http_parser.h index 99c533ae..eb71bf99 100644 --- a/third-party/http-parser/http_parser.h +++ b/third-party/http-parser/http_parser.h @@ -26,8 +26,8 @@ extern "C" { /* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 -#define HTTP_PARSER_VERSION_MINOR 4 -#define HTTP_PARSER_VERSION_PATCH 2 +#define HTTP_PARSER_VERSION_MINOR 5 +#define HTTP_PARSER_VERSION_PATCH 0 #include #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) @@ -160,6 +160,8 @@ enum flags XX(CB_body, "the on_body callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \ XX(CB_status, "the on_status callback failed") \ + XX(CB_chunk_header, "the on_chunk_header callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ \ /* Parsing-related errors */ \ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ @@ -240,6 +242,11 @@ struct http_parser_settings { http_cb on_headers_complete; http_data_cb on_body; http_cb on_message_complete; + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + */ + http_cb on_chunk_header; + http_cb on_chunk_complete; }; diff --git a/third-party/http-parser/test.c b/third-party/http-parser/test.c index 58c1955a..4c00571e 100644 --- a/third-party/http-parser/test.c +++ b/third-party/http-parser/test.c @@ -39,6 +39,7 @@ #define MAX_HEADERS 13 #define MAX_ELEMENT_SIZE 2048 +#define MAX_CHUNKS 16 #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -65,6 +66,10 @@ struct message { char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; int should_keep_alive; + int num_chunks; + int num_chunks_complete; + int chunk_lengths[MAX_CHUNKS]; + const char *upgrade; // upgraded body unsigned short http_major; @@ -301,6 +306,8 @@ const struct message requests[] = { { "Transfer-Encoding" , "chunked" } } ,.body= "all your base are belong to us" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 0x1e } } #define TWO_CHUNKS_MULT_ZERO_END 9 @@ -327,6 +334,8 @@ const struct message requests[] = { { "Transfer-Encoding", "chunked" } } ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } } #define CHUNKED_W_TRAILING_HEADERS 10 @@ -357,6 +366,8 @@ const struct message requests[] = , { "Content-Type", "text/plain" } } ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } } #define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 @@ -383,6 +394,8 @@ const struct message requests[] = { { "Transfer-Encoding", "chunked" } } ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } } #define WITH_QUOTES 12 @@ -1036,6 +1049,58 @@ const struct message requests[] = ,.body= "" } +#define UPGRADE_POST_REQUEST 38 +, {.name = "upgrade post request" + ,.type= HTTP_REQUEST + ,.raw= "POST /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Upgrade\r\n" + "Upgrade: HTTP/2.0\r\n" + "Content-Length: 15\r\n" + "\r\n" + "sweet post body" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 4 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Host", "example.com" } + , { "Connection", "Upgrade" } + , { "Upgrade", "HTTP/2.0" } + , { "Content-Length", "15" } + } + ,.body= "sweet post body" + } + +#define CONNECT_WITH_BODY_REQUEST 39 +, {.name = "connect with body request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT foo.bar.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "Content-Length: 10\r\n" + "\r\n" + "blarfcicle" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.request_url= "foo.bar.com:443" + ,.num_headers= 3 + ,.upgrade="blarfcicle" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + , { "Content-Length", "10" } + } + ,.body= "" + } + , {.name= NULL } /* sentinel */ }; @@ -1195,7 +1260,8 @@ const struct message responses[] = ,.body = "This is the data in the first chunk\r\n" "and this is the second one\r\n" - + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 0x25, 0x1c } } #define NO_CARRIAGE_RET 5 @@ -1349,6 +1415,8 @@ const struct message responses[] = , { "Connection", "close" } } ,.body= "" + ,.num_chunks_complete= 1 + ,.chunk_lengths= {} } #define NON_ASCII_IN_STATUS_LINE 10 @@ -1531,6 +1599,7 @@ const struct message responses[] = } ,.body_size= 0 ,.body= "" + ,.num_chunks_complete= 1 } #if !HTTP_PARSER_STRICT @@ -1604,6 +1673,8 @@ const struct message responses[] = , { "Transfer-Encoding", "chunked" } } ,.body= "\n" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 1 } } #define EMPTY_REASON_PHRASE_AFTER_SPACE 20 @@ -1839,6 +1910,35 @@ response_status_cb (http_parser *p, const char *buf, size_t len) return 0; } +int +chunk_header_cb (http_parser *p) +{ + assert(p == parser); + int chunk_idx = messages[num_messages].num_chunks; + messages[num_messages].num_chunks++; + if (chunk_idx < MAX_CHUNKS) { + messages[num_messages].chunk_lengths[chunk_idx] = p->content_length; + } + + return 0; +} + +int +chunk_complete_cb (http_parser *p) +{ + assert(p == parser); + + /* Here we want to verify that each chunk_header_cb is matched by a + * chunk_complete_cb, so not only should the total number of calls to + * both callbacks be the same, but they also should be interleaved + * properly */ + assert(messages[num_messages].num_chunks == + messages[num_messages].num_chunks_complete + 1); + + messages[num_messages].num_chunks_complete++; + return 0; +} + /* These dontcall_* callbacks exist so that we can verify that when we're * paused, no additional callbacks are invoked */ int @@ -1907,6 +2007,23 @@ dontcall_response_status_cb (http_parser *p, const char *buf, size_t len) abort(); } +int +dontcall_chunk_header_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_header() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_chunk_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_complete() " + "called on paused parser ***\n\n"); + exit(1); +} + static http_parser_settings settings_dontcall = {.on_message_begin = dontcall_message_begin_cb ,.on_header_field = dontcall_header_field_cb @@ -1916,6 +2033,8 @@ static http_parser_settings settings_dontcall = ,.on_body = dontcall_body_cb ,.on_headers_complete = dontcall_headers_complete_cb ,.on_message_complete = dontcall_message_complete_cb + ,.on_chunk_header = dontcall_chunk_header_cb + ,.on_chunk_complete = dontcall_chunk_complete_cb }; /* These pause_* callbacks always pause the parser and just invoke the regular @@ -1986,6 +2105,22 @@ pause_response_status_cb (http_parser *p, const char *buf, size_t len) return response_status_cb(p, buf, len); } +int +pause_chunk_header_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_header_cb(p); +} + +int +pause_chunk_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_complete_cb(p); +} + static http_parser_settings settings_pause = {.on_message_begin = pause_message_begin_cb ,.on_header_field = pause_header_field_cb @@ -1995,6 +2130,8 @@ static http_parser_settings settings_pause = ,.on_body = pause_body_cb ,.on_headers_complete = pause_headers_complete_cb ,.on_message_complete = pause_message_complete_cb + ,.on_chunk_header = pause_chunk_header_cb + ,.on_chunk_complete = pause_chunk_complete_cb }; static http_parser_settings settings = @@ -2006,6 +2143,8 @@ static http_parser_settings settings = ,.on_body = body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_chunk_header = chunk_header_cb + ,.on_chunk_complete = chunk_complete_cb }; static http_parser_settings settings_count_body = @@ -2017,6 +2156,8 @@ static http_parser_settings settings_count_body = ,.on_body = count_body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_chunk_header = chunk_header_cb + ,.on_chunk_complete = chunk_complete_cb }; static http_parser_settings settings_null = @@ -2028,6 +2169,8 @@ static http_parser_settings settings_null = ,.on_body = 0 ,.on_headers_complete = 0 ,.on_message_complete = 0 + ,.on_chunk_header = 0 + ,.on_chunk_complete = 0 }; void @@ -2196,6 +2339,12 @@ message_eq (int index, const struct message *expected) MESSAGE_CHECK_STR_EQ(expected, m, body); } + assert(m->num_chunks == m->num_chunks_complete); + MESSAGE_CHECK_NUM_EQ(expected, m, num_chunks_complete); + for (i = 0; i < m->num_chunks && i < MAX_CHUNKS; i++) { + MESSAGE_CHECK_NUM_EQ(expected, m, chunk_lengths[i]); + } + MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); int r; @@ -2909,7 +3058,7 @@ test_message (const struct message *message) if (msg1len) { read = parse(msg1, msg1len); - if (message->upgrade && parser->upgrade) { + if (message->upgrade && parser->upgrade && num_messages > 0) { messages[num_messages - 1].upgrade = msg1 + read; goto test; } @@ -3488,7 +3637,11 @@ main (void) , { "Content-Type", "text/plain" } } ,.body_size= 31337*1024 + ,.num_chunks_complete= 31338 }; + for (i = 0; i < MAX_CHUNKS; i++) { + large_chunked.chunk_lengths[i] = 1024; + } test_message_count_body(&large_chunked); free(msg); } @@ -3619,6 +3772,22 @@ main (void) "\r\n"; test_simple(dumbfuck2, HPE_OK); + const char *corrupted_connection = + "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Connection\r\033\065\325eep-Alive\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + test_simple(corrupted_connection, HPE_INVALID_HEADER_TOKEN); + + const char *corrupted_header_name = + "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "X-Some-Header\r\033\065\325eep-Alive\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + test_simple(corrupted_header_name, HPE_INVALID_HEADER_TOKEN); + #if 0 // NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body // until EOF.