From be5066c450c81a2a674b691e3bdb2187dc5a4f33 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Sun, 23 Dec 2012 01:13:02 +0900 Subject: [PATCH] Updated http-parser --- src/http-parser/AUTHORS | 5 +- src/http-parser/contrib/parsertrace.c | 156 +++++++++++++++++++++ src/http-parser/{ => contrib}/url_parser.c | 0 src/http-parser/http_parser.c | 3 +- src/http-parser/http_parser.h | 5 +- src/http-parser/test.c | 139 ++++++++++++++++-- src/shrpx_http_downstream_connection.cc | 1 + src/shrpx_https_upstream.cc | 1 + 8 files changed, 297 insertions(+), 13 deletions(-) create mode 100644 src/http-parser/contrib/parsertrace.c rename src/http-parser/{ => contrib}/url_parser.c (100%) diff --git a/src/http-parser/AUTHORS b/src/http-parser/AUTHORS index 590d8080..1fdbda8c 100644 --- a/src/http-parser/AUTHORS +++ b/src/http-parser/AUTHORS @@ -28,7 +28,7 @@ Andre Caron Ivo Raisr James McLaughlin David Gwynne -LE ROUX Thomas +Thomas LE ROUX Randy Rizun Andre Louis Caron Simon Zimmermann @@ -36,3 +36,6 @@ Erik Dubbelboer Martell Malone Bertrand Paquet BogDan Vatra +Peter Faiman +Corey Richardson +Tóth Tamás diff --git a/src/http-parser/contrib/parsertrace.c b/src/http-parser/contrib/parsertrace.c new file mode 100644 index 00000000..c9bc71ec --- /dev/null +++ b/src/http-parser/contrib/parsertrace.c @@ -0,0 +1,156 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Dump what the parser finds to stdout as it happen */ + +#include "http_parser.h" +#include +#include +#include + +int on_message_begin(http_parser* _) { + (void)_; + printf("\n***MESSAGE BEGIN***\n\n"); + return 0; +} + +int on_headers_complete(http_parser* _) { + (void)_; + printf("\n***HEADERS COMPLETE***\n\n"); + return 0; +} + +int on_message_complete(http_parser* _) { + (void)_; + printf("\n***MESSAGE COMPLETE***\n\n"); + return 0; +} + +int on_url(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Url: %.*s\n", (int)length, at); + return 0; +} + +int on_header_field(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header field: %.*s\n", (int)length, at); + return 0; +} + +int on_header_value(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header value: %.*s\n", (int)length, at); + return 0; +} + +int on_body(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Body: %.*s\n", (int)length, at); + return 0; +} + +void usage(const char* name) { + fprintf(stderr, + "Usage: %s $type $filename\n" + " type: -x, where x is one of {r,b,q}\n" + " parses file as a Response, reQuest, or Both\n", + name); + exit(EXIT_FAILURE); +} + +int main(int argc, char* argv[]) { + enum http_parser_type file_type; + + if (argc != 3) { + usage(argv[0]); + } + + char* type = argv[1]; + if (type[0] != '-') { + usage(argv[0]); + } + + switch (type[1]) { + /* in the case of "-", type[1] will be NUL */ + case 'r': + file_type = HTTP_RESPONSE; + break; + case 'q': + file_type = HTTP_REQUEST; + break; + case 'b': + file_type = HTTP_BOTH; + break; + default: + usage(argv[0]); + } + + char* filename = argv[2]; + FILE* file = fopen(filename, "r"); + if (file == NULL) { + perror("fopen"); + return EXIT_FAILURE; + } + + fseek(file, 0, SEEK_END); + long file_length = ftell(file); + if (file_length == -1) { + perror("ftell"); + return EXIT_FAILURE; + } + fseek(file, 0, SEEK_SET); + + char* data = malloc(file_length); + if (fread(data, 1, file_length, file) != (size_t)file_length) { + fprintf(stderr, "couldn't read entire file\n"); + free(data); + return EXIT_FAILURE; + } + + http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_message_begin = on_message_begin; + settings.on_url = on_url; + settings.on_header_field = on_header_field; + settings.on_header_value = on_header_value; + settings.on_headers_complete = on_headers_complete; + settings.on_body = on_body; + settings.on_message_complete = on_message_complete; + + http_parser parser; + http_parser_init(&parser, file_type); + size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); + free(data); + + if (nparsed != (size_t)file_length) { + fprintf(stderr, + "Error: %s (%s)\n", + http_errno_description(HTTP_PARSER_ERRNO(&parser)), + http_errno_name(HTTP_PARSER_ERRNO(&parser))); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/http-parser/url_parser.c b/src/http-parser/contrib/url_parser.c similarity index 100% rename from src/http-parser/url_parser.c rename to src/http-parser/contrib/url_parser.c diff --git a/src/http-parser/http_parser.c b/src/http-parser/http_parser.c index ea37406e..f6a44e1e 100644 --- a/src/http-parser/http_parser.c +++ b/src/http-parser/http_parser.c @@ -866,6 +866,7 @@ size_t http_parser_execute (http_parser *parser, case s_res_line_almost_done: STRICT_CHECK(ch != LF); parser->state = s_header_field_start; + CALLBACK_NOTIFY(status_complete); break; case s_start_req: @@ -1967,7 +1968,7 @@ http_parse_host_char(enum http_host_state s, const char ch) { /* FALLTHROUGH */ case s_http_host_v6_start: - if (IS_HEX(ch) || ch == ':') { + if (IS_HEX(ch) || ch == ':' || ch == '.') { return s_http_host_v6; } diff --git a/src/http-parser/http_parser.h b/src/http-parser/http_parser.h index 4f20396c..a992c742 100644 --- a/src/http-parser/http_parser.h +++ b/src/http-parser/http_parser.h @@ -30,6 +30,7 @@ extern "C" { #include #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) #include +#include typedef __int8 int8_t; typedef unsigned __int8 uint8_t; typedef __int16 int16_t; @@ -38,8 +39,6 @@ typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; -typedef SIZE_T size_t; -typedef SSIZE_T ssize_t; #else #include #endif @@ -142,6 +141,7 @@ enum flags \ /* Callback-related errors */ \ XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_status_complete, "the on_status_complete callback failed") \ XX(CB_url, "the on_url callback failed") \ XX(CB_header_field, "the on_header_field callback failed") \ XX(CB_header_value, "the on_header_value callback failed") \ @@ -222,6 +222,7 @@ struct http_parser { struct http_parser_settings { http_cb on_message_begin; http_data_cb on_url; + http_cb on_status_complete; http_data_cb on_header_field; http_data_cb on_header_value; http_cb on_headers_complete; diff --git a/src/http-parser/test.c b/src/http-parser/test.c index 0caea241..83723b7f 100644 --- a/src/http-parser/test.c +++ b/src/http-parser/test.c @@ -32,7 +32,7 @@ #define FALSE 0 #define MAX_HEADERS 13 -#define MAX_ELEMENT_SIZE 500 +#define MAX_ELEMENT_SIZE 2048 #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -1413,11 +1413,88 @@ const struct message responses[] = , {.name= NULL } /* sentinel */ }; +/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so + * define it ourselves. + */ +size_t +strnlen(const char *s, size_t maxlen) +{ + const char *p; + + p = memchr(s, '\0', maxlen); + if (p == NULL) + return maxlen; + + return p - s; +} + +size_t +strlncat(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t dlen; + size_t rlen; + size_t ncpy; + + slen = strnlen(src, n); + dlen = strnlen(dst, len); + + if (dlen < len) { + rlen = len - dlen; + ncpy = slen < rlen ? slen : (rlen - 1); + memcpy(dst + dlen, src, ncpy); + dst[dlen + ncpy] = '\0'; + } + + assert(len > slen + dlen); + return slen + dlen; +} + +size_t +strlcat(char *dst, const char *src, size_t len) +{ + return strlncat(dst, len, src, (size_t) -1); +} + +size_t +strlncpy(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t ncpy; + + slen = strnlen(src, n); + + if (len > 0) { + ncpy = slen < len ? slen : (len - 1); + memcpy(dst, src, ncpy); + dst[ncpy] = '\0'; + } + + assert(len > slen); + return slen; +} + +size_t +strlcpy(char *dst, const char *src, size_t len) +{ + return strlncpy(dst, len, src, (size_t) -1); +} + int request_url_cb (http_parser *p, const char *buf, size_t len) { assert(p == parser); - strncat(messages[num_messages].request_url, buf, len); + strlncat(messages[num_messages].request_url, + sizeof(messages[num_messages].request_url), + buf, + len); + return 0; +} + +int +status_complete_cb (http_parser *p) { + assert(p == parser); + p->data++; return 0; } @@ -1430,7 +1507,10 @@ header_field_cb (http_parser *p, const char *buf, size_t len) if (m->last_header_element != FIELD) m->num_headers++; - strncat(m->headers[m->num_headers-1][0], buf, len); + strlncat(m->headers[m->num_headers-1][0], + sizeof(m->headers[m->num_headers-1][0]), + buf, + len); m->last_header_element = FIELD; @@ -1443,7 +1523,10 @@ header_value_cb (http_parser *p, const char *buf, size_t len) assert(p == parser); struct message *m = &messages[num_messages]; - strncat(m->headers[m->num_headers-1][1], buf, len); + strlncat(m->headers[m->num_headers-1][1], + sizeof(m->headers[m->num_headers-1][1]), + buf, + len); m->last_header_element = VALUE; @@ -1467,7 +1550,10 @@ int body_cb (http_parser *p, const char *buf, size_t len) { assert(p == parser); - strncat(messages[num_messages].body, buf, len); + strlncat(messages[num_messages].body, + sizeof(messages[num_messages].body), + buf, + len); messages[num_messages].body_size += len; check_body_is_final(p); // printf("body_cb: '%s'\n", requests[num_messages].body); @@ -2135,6 +2221,25 @@ const struct url_test url_tests[] = ,.rv=0 } +, {.name="ipv4 in ipv6 address" + ,.url="http://[2001:0000:0000:0000:0000:0000:1.9.1.1]/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 37 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 46, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + , {.name="extra ? in query string" ,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css," "fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css," @@ -2489,7 +2594,7 @@ dump_url (const char *url, const struct http_parser_url *u) continue; } - printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n", + printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n\"", i, u->field_data[i].off, u->field_data[i].len, @@ -2879,15 +2984,15 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess parser_init(type_both ? HTTP_BOTH : r1->type); buf1_len = i; - strncpy(buf1, total, buf1_len); + strlncpy(buf1, sizeof(buf1), total, buf1_len); buf1[buf1_len] = 0; buf2_len = j - i; - strncpy(buf2, total+i, buf2_len); + strlncpy(buf2, sizeof(buf1), total+i, buf2_len); buf2[buf2_len] = 0; buf3_len = total_len - j; - strncpy(buf3, total+j, buf3_len); + strlncpy(buf3, sizeof(buf1), total+j, buf3_len); buf3[buf3_len] = 0; read = parse(buf1, buf1_len); @@ -2991,6 +3096,20 @@ create_large_chunked_message (int body_size_in_kb, const char* headers) return buf; } +void +test_status_complete (void) +{ + parser_init(HTTP_RESPONSE); + parser->data = 0; + http_parser_settings settings = settings_null; + settings.on_status_complete = status_complete_cb; + + char *response = "don't mind me, just a simple response"; + http_parser_execute(parser, &settings, response, strlen(response)); + assert(parser->data == (void*)0); // the status_complete callback was never called + assert(parser->http_errno == HPE_INVALID_CONSTANT); // the errno for an invalid status line +} + /* Verify that we can pause parsing at any of the bytes in the * message and still get the result that we're expecting. */ void @@ -3298,6 +3417,8 @@ main (void) , &requests[CONNECT_REQUEST] ); + test_status_complete(); + puts("requests okay"); return 0; diff --git a/src/shrpx_http_downstream_connection.cc b/src/shrpx_http_downstream_connection.cc index 925db7ed..cc6626ed 100644 --- a/src/shrpx_http_downstream_connection.cc +++ b/src/shrpx_http_downstream_connection.cc @@ -417,6 +417,7 @@ namespace { http_parser_settings htp_hooks = { 0, /*http_cb on_message_begin;*/ 0, /*http_data_cb on_url;*/ + 0, /*http_cb on_status_complete */ htp_hdr_keycb, /*http_data_cb on_header_field;*/ htp_hdr_valcb, /*http_data_cb on_header_value;*/ htp_hdrs_completecb, /*http_cb on_headers_complete;*/ diff --git a/src/shrpx_https_upstream.cc b/src/shrpx_https_upstream.cc index f6b89d03..7a69f546 100644 --- a/src/shrpx_https_upstream.cc +++ b/src/shrpx_https_upstream.cc @@ -241,6 +241,7 @@ namespace { http_parser_settings htp_hooks = { htp_msg_begin, /*http_cb on_message_begin;*/ htp_uricb, /*http_data_cb on_url;*/ + 0, /*http_cb on_status_complete */ htp_hdr_keycb, /*http_data_cb on_header_field;*/ htp_hdr_valcb, /*http_data_cb on_header_value;*/ htp_hdrs_completecb, /*http_cb on_headers_complete;*/