diff --git a/src/http-parser/AUTHORS b/src/http-parser/AUTHORS index 1fdbda8c..92ee45ca 100644 --- a/src/http-parser/AUTHORS +++ b/src/http-parser/AUTHORS @@ -28,7 +28,7 @@ Andre Caron Ivo Raisr James McLaughlin David Gwynne -Thomas LE ROUX +Thomas LE ROUX Randy Rizun Andre Louis Caron Simon Zimmermann diff --git a/src/http-parser/README.md b/src/http-parser/README.md index ef1bf6e9..0bf5d359 100644 --- a/src/http-parser/README.md +++ b/src/http-parser/README.md @@ -1,6 +1,8 @@ HTTP Parser =========== +[![Build Status](https://travis-ci.org/joyent/http-parser.png?branch=master)](https://travis-ci.org/joyent/http-parser) + This is a parser for HTTP messages written in C. It parses both requests and responses. The parser is designed to be used in performance HTTP applications. It does not make any syscalls nor allocations, it does not @@ -34,38 +36,41 @@ Usage One `http_parser` object is used per TCP connection. Initialize the struct using `http_parser_init()` and set the callbacks. That might look something like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ - http_parser_settings settings; - settings.on_url = my_url_callback; - settings.on_header_field = my_header_field_callback; - /* ... */ - - http_parser *parser = malloc(sizeof(http_parser)); - http_parser_init(parser, HTTP_REQUEST); - parser->data = my_socket; +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` When data is received on the socket execute the parser and check for errors. - size_t len = 80*1024, nparsed; - char buf[len]; - ssize_t recved; +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; - recved = recv(fd, buf, len, 0); +recved = recv(fd, buf, len, 0); - if (recved < 0) { - /* Handle error. */ - } +if (recved < 0) { + /* Handle error. */ +} - /* Start up / continue the parser. - * Note we pass recved==0 to signal that EOF has been recieved. - */ - nparsed = http_parser_execute(parser, &settings, buf, recved); +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been recieved. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); - if (parser->upgrade) { - /* handle new protocol */ - } else if (nparsed != recved) { - /* Handle error. Usually just close the connection. */ - } +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` HTTP needs to know where the end of the stream is. For example, sometimes servers send responses without Content-Length and expect the client to diff --git a/src/http-parser/http_parser.c b/src/http-parser/http_parser.c index ed3a9232..9695525b 100644 --- a/src/http-parser/http_parser.c +++ b/src/http-parser/http_parser.c @@ -634,7 +634,17 @@ size_t http_parser_execute (http_parser *parser, if (PARSING_HEADER(parser->state)) { ++parser->nread; - /* Buffer overflow attack */ + /* Don't allow the total size of the HTTP headers (including the status + * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ if (parser->nread > HTTP_MAX_HEADER_SIZE) { SET_ERRNO(HPE_HEADER_OVERFLOW); goto error; @@ -929,6 +939,7 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->index == 2 && ch == 'P') { parser->method = HTTP_COPY; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->method == HTTP_MKCOL) { @@ -941,12 +952,14 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->index == 2 && ch == 'A') { parser->method = HTTP_MKACTIVITY; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->method == HTTP_SUBSCRIBE) { if (parser->index == 1 && ch == 'E') { parser->method = HTTP_SEARCH; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->index == 1 && parser->method == HTTP_POST) { @@ -957,13 +970,27 @@ size_t http_parser_execute (http_parser *parser, } else if (ch == 'A') { parser->method = HTTP_PATCH; } else { + SET_ERRNO(HPE_INVALID_METHOD); goto error; } } else if (parser->index == 2) { if (parser->method == HTTP_PUT) { - if (ch == 'R') parser->method = HTTP_PURGE; + if (ch == 'R') { + parser->method = HTTP_PURGE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } } else if (parser->method == HTTP_UNLOCK) { - if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE; + if (ch == 'S') { + parser->method = HTTP_UNSUBSCRIBE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; } } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { parser->method = HTTP_PROPPATCH; @@ -2173,3 +2200,10 @@ int http_body_is_final(const struct http_parser *parser) { return parser->state == s_message_done; } + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} diff --git a/src/http-parser/http_parser.h b/src/http-parser/http_parser.h index a992c742..98c0905b 100644 --- a/src/http-parser/http_parser.h +++ b/src/http-parser/http_parser.h @@ -24,8 +24,10 @@ extern "C" { #endif +/* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 -#define HTTP_PARSER_VERSION_MINOR 0 +#define HTTP_PARSER_VERSION_MINOR 1 +#define HTTP_PARSER_VERSION_PATCH 0 #include #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) @@ -261,6 +263,18 @@ struct http_parser_url { }; +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, version); + */ +unsigned long http_parser_version(void); + void http_parser_init(http_parser *parser, enum http_parser_type type); diff --git a/src/http-parser/test.c b/src/http-parser/test.c index 83723b7f..656bc9f8 100644 --- a/src/http-parser/test.c +++ b/src/http-parser/test.c @@ -1410,6 +1410,43 @@ const struct message responses[] = } #endif /* !HTTP_PARSER_STRICT */ +#define AMAZON_COM 20 +, {.name= "amazon.com" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 MovedPermanently\r\n" + "Date: Wed, 15 May 2013 17:06:33 GMT\r\n" + "Server: Server\r\n" + "x-amz-id-1: 0GPHKXSJQ826RK7GZEB2\r\n" + "p3p: policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"\r\n" + "x-amz-id-2: STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD\r\n" + "Location: http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846\r\n" + "Vary: Accept-Encoding,User-Agent\r\n" + "Content-Type: text/html; charset=ISO-8859-1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1\r\n" + "\n\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 9 + ,.headers= { { "Date", "Wed, 15 May 2013 17:06:33 GMT" } + , { "Server", "Server" } + , { "x-amz-id-1", "0GPHKXSJQ826RK7GZEB2" } + , { "p3p", "policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"" } + , { "x-amz-id-2", "STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD" } + , { "Location", "http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846" } + , { "Vary", "Accept-Encoding,User-Agent" } + , { "Content-Type", "text/html; charset=ISO-8859-1" } + , { "Transfer-Encoding", "chunked" } + } + ,.body= "\n" + } + , {.name= NULL } /* sentinel */ }; @@ -3170,6 +3207,16 @@ main (void) int i, j, k; int request_count; int response_count; + unsigned long version; + unsigned major; + unsigned minor; + unsigned patch; + + version = http_parser_version(); + major = (version >> 16) & 255; + minor = (version >> 8) & 255; + patch = version & 255; + printf("http_parser v%u.%u.%u (0x%06lx)\n", major, minor, patch, version); printf("sizeof(http_parser) = %u\n", (unsigned int)sizeof(http_parser)); @@ -3263,14 +3310,8 @@ main (void) /// REQUESTS - test_simple("hello world", HPE_INVALID_METHOD); test_simple("GET / HTP/1.1\r\n\r\n", HPE_INVALID_VERSION); - - test_simple("ASDF / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); - test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); - test_simple("GETA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD); - // Well-formed but incomplete test_simple("GET / HTTP/1.1\r\n" "Content-Type: text/plain\r\n" @@ -3313,13 +3354,23 @@ main (void) } static const char *bad_methods[] = { + "ASDF", "C******", + "COLA", + "GEM", + "GETA", "M****", + "MKCOLA", + "PROPPATCHA", + "PUN", + "PX", + "SA", + "hello world", 0 }; for (this_method = bad_methods; *this_method; this_method++) { char buf[200]; sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); - test_simple(buf, HPE_UNKNOWN); + test_simple(buf, HPE_INVALID_METHOD); } const char *dumbfuck2 =