Updated http-parser

This commit is contained in:
Tatsuhiro Tsujikawa 2012-12-23 01:13:02 +09:00
parent 92260ccc81
commit be5066c450
8 changed files with 297 additions and 13 deletions

View File

@ -28,7 +28,7 @@ Andre Caron <andre.l.caron@gmail.com>
Ivo Raisr <ivosh@ivosh.net>
James McLaughlin <jamie@lacewing-project.org>
David Gwynne <loki@animata.net>
LE ROUX Thomas <thomas@procheo.fr>
Thomas LE ROUX <thomas@procheo.fr>
Randy Rizun <rrizun@ortivawireless.com>
Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
Simon Zimmermann <simonz05@gmail.com>
@ -36,3 +36,6 @@ Erik Dubbelboer <erik@dubbelboer.com>
Martell Malone <martellmalone@gmail.com>
Bertrand Paquet <bpaquet@octo.com>
BogDan Vatra <bogdan@kde.org>
Peter Faiman <peter@thepicard.org>
Corey Richardson <corey@octayn.net>
Tóth Tamás <tomika_nospam@freemail.hu>

View File

@ -0,0 +1,156 @@
/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
*
* Additional changes are licensed under the same terms as NGINX and
* copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/* Dump what the parser finds to stdout as it happen */
#include "http_parser.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int on_message_begin(http_parser* _) {
(void)_;
printf("\n***MESSAGE BEGIN***\n\n");
return 0;
}
int on_headers_complete(http_parser* _) {
(void)_;
printf("\n***HEADERS COMPLETE***\n\n");
return 0;
}
int on_message_complete(http_parser* _) {
(void)_;
printf("\n***MESSAGE COMPLETE***\n\n");
return 0;
}
int on_url(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Url: %.*s\n", (int)length, at);
return 0;
}
int on_header_field(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header field: %.*s\n", (int)length, at);
return 0;
}
int on_header_value(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header value: %.*s\n", (int)length, at);
return 0;
}
int on_body(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Body: %.*s\n", (int)length, at);
return 0;
}
void usage(const char* name) {
fprintf(stderr,
"Usage: %s $type $filename\n"
" type: -x, where x is one of {r,b,q}\n"
" parses file as a Response, reQuest, or Both\n",
name);
exit(EXIT_FAILURE);
}
int main(int argc, char* argv[]) {
enum http_parser_type file_type;
if (argc != 3) {
usage(argv[0]);
}
char* type = argv[1];
if (type[0] != '-') {
usage(argv[0]);
}
switch (type[1]) {
/* in the case of "-", type[1] will be NUL */
case 'r':
file_type = HTTP_RESPONSE;
break;
case 'q':
file_type = HTTP_REQUEST;
break;
case 'b':
file_type = HTTP_BOTH;
break;
default:
usage(argv[0]);
}
char* filename = argv[2];
FILE* file = fopen(filename, "r");
if (file == NULL) {
perror("fopen");
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_END);
long file_length = ftell(file);
if (file_length == -1) {
perror("ftell");
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_SET);
char* data = malloc(file_length);
if (fread(data, 1, file_length, file) != (size_t)file_length) {
fprintf(stderr, "couldn't read entire file\n");
free(data);
return EXIT_FAILURE;
}
http_parser_settings settings;
memset(&settings, 0, sizeof(settings));
settings.on_message_begin = on_message_begin;
settings.on_url = on_url;
settings.on_header_field = on_header_field;
settings.on_header_value = on_header_value;
settings.on_headers_complete = on_headers_complete;
settings.on_body = on_body;
settings.on_message_complete = on_message_complete;
http_parser parser;
http_parser_init(&parser, file_type);
size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
free(data);
if (nparsed != (size_t)file_length) {
fprintf(stderr,
"Error: %s (%s)\n",
http_errno_description(HTTP_PARSER_ERRNO(&parser)),
http_errno_name(HTTP_PARSER_ERRNO(&parser)));
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View File

@ -866,6 +866,7 @@ size_t http_parser_execute (http_parser *parser,
case s_res_line_almost_done:
STRICT_CHECK(ch != LF);
parser->state = s_header_field_start;
CALLBACK_NOTIFY(status_complete);
break;
case s_start_req:
@ -1967,7 +1968,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
/* FALLTHROUGH */
case s_http_host_v6_start:
if (IS_HEX(ch) || ch == ':') {
if (IS_HEX(ch) || ch == ':' || ch == '.') {
return s_http_host_v6;
}

View File

@ -30,6 +30,7 @@ extern "C" {
#include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
#include <BaseTsd.h>
#include <stddef.h>
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
@ -38,8 +39,6 @@ typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef SIZE_T size_t;
typedef SSIZE_T ssize_t;
#else
#include <stdint.h>
#endif
@ -142,6 +141,7 @@ enum flags
\
/* Callback-related errors */ \
XX(CB_message_begin, "the on_message_begin callback failed") \
XX(CB_status_complete, "the on_status_complete callback failed") \
XX(CB_url, "the on_url callback failed") \
XX(CB_header_field, "the on_header_field callback failed") \
XX(CB_header_value, "the on_header_value callback failed") \
@ -222,6 +222,7 @@ struct http_parser {
struct http_parser_settings {
http_cb on_message_begin;
http_data_cb on_url;
http_cb on_status_complete;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;

View File

@ -32,7 +32,7 @@
#define FALSE 0
#define MAX_HEADERS 13
#define MAX_ELEMENT_SIZE 500
#define MAX_ELEMENT_SIZE 2048
#define MIN(a,b) ((a) < (b) ? (a) : (b))
@ -1413,11 +1413,88 @@ const struct message responses[] =
, {.name= NULL } /* sentinel */
};
/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
* define it ourselves.
*/
size_t
strnlen(const char *s, size_t maxlen)
{
const char *p;
p = memchr(s, '\0', maxlen);
if (p == NULL)
return maxlen;
return p - s;
}
size_t
strlncat(char *dst, size_t len, const char *src, size_t n)
{
size_t slen;
size_t dlen;
size_t rlen;
size_t ncpy;
slen = strnlen(src, n);
dlen = strnlen(dst, len);
if (dlen < len) {
rlen = len - dlen;
ncpy = slen < rlen ? slen : (rlen - 1);
memcpy(dst + dlen, src, ncpy);
dst[dlen + ncpy] = '\0';
}
assert(len > slen + dlen);
return slen + dlen;
}
size_t
strlcat(char *dst, const char *src, size_t len)
{
return strlncat(dst, len, src, (size_t) -1);
}
size_t
strlncpy(char *dst, size_t len, const char *src, size_t n)
{
size_t slen;
size_t ncpy;
slen = strnlen(src, n);
if (len > 0) {
ncpy = slen < len ? slen : (len - 1);
memcpy(dst, src, ncpy);
dst[ncpy] = '\0';
}
assert(len > slen);
return slen;
}
size_t
strlcpy(char *dst, const char *src, size_t len)
{
return strlncpy(dst, len, src, (size_t) -1);
}
int
request_url_cb (http_parser *p, const char *buf, size_t len)
{
assert(p == parser);
strncat(messages[num_messages].request_url, buf, len);
strlncat(messages[num_messages].request_url,
sizeof(messages[num_messages].request_url),
buf,
len);
return 0;
}
int
status_complete_cb (http_parser *p) {
assert(p == parser);
p->data++;
return 0;
}
@ -1430,7 +1507,10 @@ header_field_cb (http_parser *p, const char *buf, size_t len)
if (m->last_header_element != FIELD)
m->num_headers++;
strncat(m->headers[m->num_headers-1][0], buf, len);
strlncat(m->headers[m->num_headers-1][0],
sizeof(m->headers[m->num_headers-1][0]),
buf,
len);
m->last_header_element = FIELD;
@ -1443,7 +1523,10 @@ header_value_cb (http_parser *p, const char *buf, size_t len)
assert(p == parser);
struct message *m = &messages[num_messages];
strncat(m->headers[m->num_headers-1][1], buf, len);
strlncat(m->headers[m->num_headers-1][1],
sizeof(m->headers[m->num_headers-1][1]),
buf,
len);
m->last_header_element = VALUE;
@ -1467,7 +1550,10 @@ int
body_cb (http_parser *p, const char *buf, size_t len)
{
assert(p == parser);
strncat(messages[num_messages].body, buf, len);
strlncat(messages[num_messages].body,
sizeof(messages[num_messages].body),
buf,
len);
messages[num_messages].body_size += len;
check_body_is_final(p);
// printf("body_cb: '%s'\n", requests[num_messages].body);
@ -2135,6 +2221,25 @@ const struct url_test url_tests[] =
,.rv=0
}
, {.name="ipv4 in ipv6 address"
,.url="http://[2001:0000:0000:0000:0000:0000:1.9.1.1]/"
,.is_connect=0
,.u=
{.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 8, 37 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 46, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
,{ 0, 0 } /* UF_USERINFO */
}
}
,.rv=0
}
, {.name="extra ? in query string"
,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css,"
"fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css,"
@ -2489,7 +2594,7 @@ dump_url (const char *url, const struct http_parser_url *u)
continue;
}
printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n",
printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n\"",
i,
u->field_data[i].off,
u->field_data[i].len,
@ -2879,15 +2984,15 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
parser_init(type_both ? HTTP_BOTH : r1->type);
buf1_len = i;
strncpy(buf1, total, buf1_len);
strlncpy(buf1, sizeof(buf1), total, buf1_len);
buf1[buf1_len] = 0;
buf2_len = j - i;
strncpy(buf2, total+i, buf2_len);
strlncpy(buf2, sizeof(buf1), total+i, buf2_len);
buf2[buf2_len] = 0;
buf3_len = total_len - j;
strncpy(buf3, total+j, buf3_len);
strlncpy(buf3, sizeof(buf1), total+j, buf3_len);
buf3[buf3_len] = 0;
read = parse(buf1, buf1_len);
@ -2991,6 +3096,20 @@ create_large_chunked_message (int body_size_in_kb, const char* headers)
return buf;
}
void
test_status_complete (void)
{
parser_init(HTTP_RESPONSE);
parser->data = 0;
http_parser_settings settings = settings_null;
settings.on_status_complete = status_complete_cb;
char *response = "don't mind me, just a simple response";
http_parser_execute(parser, &settings, response, strlen(response));
assert(parser->data == (void*)0); // the status_complete callback was never called
assert(parser->http_errno == HPE_INVALID_CONSTANT); // the errno for an invalid status line
}
/* Verify that we can pause parsing at any of the bytes in the
* message and still get the result that we're expecting. */
void
@ -3298,6 +3417,8 @@ main (void)
, &requests[CONNECT_REQUEST]
);
test_status_complete();
puts("requests okay");
return 0;

View File

@ -417,6 +417,7 @@ namespace {
http_parser_settings htp_hooks = {
0, /*http_cb on_message_begin;*/
0, /*http_data_cb on_url;*/
0, /*http_cb on_status_complete */
htp_hdr_keycb, /*http_data_cb on_header_field;*/
htp_hdr_valcb, /*http_data_cb on_header_value;*/
htp_hdrs_completecb, /*http_cb on_headers_complete;*/

View File

@ -241,6 +241,7 @@ namespace {
http_parser_settings htp_hooks = {
htp_msg_begin, /*http_cb on_message_begin;*/
htp_uricb, /*http_data_cb on_url;*/
0, /*http_cb on_status_complete */
htp_hdr_keycb, /*http_data_cb on_header_field;*/
htp_hdr_valcb, /*http_data_cb on_header_value;*/
htp_hdrs_completecb, /*http_cb on_headers_complete;*/