htparse: enhancements and bug fixes

CONNECT method supported. Fixed _str8cmp and _str9cmp comparison.
Support no content-length case.  Fixed hook_uri_run args. Run
hook_on_hdrs_complete if no header field is present. Supported empty
header value. Case insensitive match for particular header values
(e.g., keep-alive).
This commit is contained in:
Tatsuhiro Tsujikawa 2012-07-11 16:02:08 +09:00
parent fe5dfe533f
commit db8a62c0d7
2 changed files with 72 additions and 23 deletions

View File

@ -55,6 +55,7 @@ enum parser_flags {
parser_flag_connection_keep_alive = 1 << 1, parser_flag_connection_keep_alive = 1 << 1,
parser_flag_connection_close = 1 << 2, parser_flag_connection_close = 1 << 2,
parser_flag_trailing = 1 << 3, parser_flag_trailing = 1 << 3,
parser_flag_content_length = 1 << 4,
}; };
enum parser_state { enum parser_state {
@ -216,11 +217,11 @@ static inline uint32_t to_uint32(char *m) {
#define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \ #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
to_uint32(m) == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \ to_uint32(m) == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
&& to_uint32(m) == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) && to_uint32(m+4) == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
#define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \ #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
to_uint32(m) == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \ to_uint32(m) == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
&& to_uint32(m) == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \ && to_uint32(m+4) == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \
&& m[8] == c8 && m[8] == c8
#define __HTPARSE_GENHOOK(__n) \ #define __HTPARSE_GENHOOK(__n) \
@ -579,6 +580,10 @@ htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
p->method = htp_method_OPTIONS; p->method = htp_method_OPTIONS;
} }
if (_str7_cmp(m, 'C', 'O', 'N', 'N', 'E', 'C', 'T', '\0')) {
p->method = htp_method_CONNECT;
}
break; break;
case 8: case 8:
if (_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D')) { if (_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D')) {
@ -632,6 +637,14 @@ htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
c = (unsigned char)(ch | 0x20); c = (unsigned char)(ch | 0x20);
if (c >= 'a' && c <= 'z') { if (c >= 'a' && c <= 'z') {
if(p->method == htp_method_CONNECT) {
p->path_offset = &p->buf[p->buf_idx];
p->buf[p->buf_idx++] = ch;
p->buf[p->buf_idx] = '\0';
p->state = s_after_slash_in_uri;
break;
}
p->scheme_offset = &p->buf[p->buf_idx]; p->scheme_offset = &p->buf[p->buf_idx];
p->buf[p->buf_idx++] = ch; p->buf[p->buf_idx++] = ch;
p->buf[p->buf_idx] = '\0'; p->buf[p->buf_idx] = '\0';
@ -983,8 +996,7 @@ htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
(&p->buf[p->buf_idx] - p->path_offset)); (&p->buf[p->buf_idx] - p->path_offset));
} }
r2 = hook_uri_run(p, hooks, p->path_offset, r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
(&p->buf[p->buf_idx] - p->path_offset));
p->buf_idx = 0; p->buf_idx = 0;
p->state = s_http_09; p->state = s_http_09;
@ -1230,6 +1242,12 @@ htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
switch (ch) { switch (ch) {
case CR: case CR:
p->state = s_hdrline_almost_done; p->state = s_hdrline_almost_done;
res = hook_on_hdrs_complete_run(p, hooks);
if (res) {
p->error = htparse_error_user;
return i + 1;
}
break; break;
case LF: case LF:
return i + 1; return i + 1;
@ -1333,9 +1351,19 @@ hdrline_start:
break; break;
case CR: case CR:
case LF: case LF:
/* empty header value, is this legal? */ /* empty header value, is this legal? Don't
p->error = htparse_error_inval_hdr; nknow but we want to support it */
return i + 1; res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
if(ch == CR) {
p->state = s_hdrline_hdr_almost_done;
} else {
p->state = s_hdrline_hdr_done;
}
p->buf_idx = 0;
break;
/* p->error = htparse_error_inval_hdr; */
/* return i + 1; */
default: default:
p->buf[p->buf_idx++] = ch; p->buf[p->buf_idx++] = ch;
p->buf[p->buf_idx] = '\0'; p->buf[p->buf_idx] = '\0';
@ -1350,6 +1378,7 @@ hdrline_start:
switch (ch) { switch (ch) {
case CR: case CR:
case LF:
res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx); res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
switch (p->heval) { switch (p->heval) {
@ -1365,26 +1394,29 @@ hdrline_start:
p->error = htparse_error_too_big; p->error = htparse_error_too_big;
return i + 1; return i + 1;
} }
p->flags |= parser_flag_content_length;
break; break;
case eval_hdr_val_connection: case eval_hdr_val_connection:
switch (p->buf[0]) { switch (p->buf[0]) {
case 'K': case 'K':
case 'k': case 'k':
if (_str9cmp((p->buf + 1), if( !strcasecmp(p->buf, "keep-alive")) {
'e', 'e', 'p', '-', 'A', 'l', 'i', 'v', 'e')) { /* if (_str9cmp((p->buf + 1), */
/* 'e', 'e', 'p', '-', 'a', 'l', 'i', 'v', 'e')) { */
p->flags |= parser_flag_connection_keep_alive; p->flags |= parser_flag_connection_keep_alive;
} }
break; break;
case 'c': case 'c':
if (_str5cmp(p->buf, 'c', 'l', 'o', 's', 'e')) { if(!strcasecmp(p->buf, "close")) {
/* if (_str5cmp(p->buf, 'c', 'l', 'o', 's', 'e')) { */
p->flags |= parser_flag_connection_close; p->flags |= parser_flag_connection_close;
} }
break; break;
} }
break; break;
case eval_hdr_val_transfer_encoding: case eval_hdr_val_transfer_encoding:
if (_str7_cmp(p->buf, 'c', 'h', 'u', 'n', 'k', 'e', 'd', '\0')) { if(!strcasecmp(p->buf, "chunked")) {
/* if (_str7_cmp(p->buf, 'c', 'h', 'u', 'n', 'k', 'e', 'd', '\0')) { */
p->flags |= parser_flag_chunked; p->flags |= parser_flag_chunked;
} }
@ -1399,14 +1431,18 @@ hdrline_start:
default: default:
break; break;
} /* switch */ } /* switch */
if(ch == CR) {
p->state = s_hdrline_hdr_almost_done; p->state = s_hdrline_hdr_almost_done;
} else {
/* TODO Run hook_on_msg_complete_run ? */
p->state = s_hdrline_hdr_done;
}
p->buf_idx = 0; p->buf_idx = 0;
break; break;
case LF: /* case LF: */
p->state = s_hdrline_hdr_done; /* p->state = s_hdrline_hdr_done; */
break; /* break; */
default: default:
p->buf[p->buf_idx++] = ch; p->buf[p->buf_idx++] = ch;
p->buf[p->buf_idx] = '\0'; p->buf[p->buf_idx] = '\0';
@ -1450,6 +1486,7 @@ hdrline_start:
switch (ch) { switch (ch) {
case CR: case CR:
p->state = s_hdrline_almost_done; p->state = s_hdrline_almost_done;
res = hook_on_hdrs_complete_run(p, hooks); res = hook_on_hdrs_complete_run(p, hooks);
if (res) { if (res) {
@ -1487,13 +1524,18 @@ hdrline_start:
p->state = s_chunk_size_start; p->state = s_chunk_size_start;
} else if (p->content_len > 0) { } else if (p->content_len > 0) {
p->state = s_body_read; p->state = s_body_read;
} else if (!(p->flags & parser_flag_content_length) &&
((p->type == htp_type_response &&
!htparser_should_keep_alive(p)) ||
(p->type == htp_type_request &&
(p->method == htp_method_CONNECT)))) {
p->state = s_body_read;
} else if (p->content_len == 0) { } else if (p->content_len == 0) {
res = hook_on_msg_complete_run(p, hooks); res = hook_on_msg_complete_run(p, hooks);
p->state = s_start; p->state = s_start;
} } else {
p->state = s_hdrline_done; p->state = s_hdrline_done;
}
if (res) { if (res) {
p->error = htparse_error_user; p->error = htparse_error_user;
return i + 1; return i + 1;
@ -1645,8 +1687,13 @@ hdrline_start:
{ {
const char * pp = &data[i]; const char * pp = &data[i];
const char * pe = (const char *)(data + len); const char * pe = (const char *)(data + len);
size_t to_read = _MIN_READ((uint64_t)(pe - pp), size_t to_read;
if(p->flags & parser_flag_content_length) {
to_read = _MIN_READ((uint64_t)(pe - pp),
p->content_len); p->content_len);
} else {
to_read = pe-pp;
}
htparse_log_debug("[%p] s_body_read %zu", p, to_read); htparse_log_debug("[%p] s_body_read %zu", p, to_read);
@ -1656,7 +1703,8 @@ hdrline_start:
i += to_read - 1; i += to_read - 1;
p->content_len -= to_read; p->content_len -= to_read;
if (p->content_len == 0) { if ((p->flags & parser_flag_content_length) &&
p->content_len == 0) {
res = hook_on_msg_complete_run(p, hooks); res = hook_on_msg_complete_run(p, hooks);
p->state = s_start; p->state = s_start;

View File

@ -32,6 +32,7 @@ enum htp_method {
htp_method_LOCK, htp_method_LOCK,
htp_method_UNLOCK, htp_method_UNLOCK,
htp_method_TRACE, htp_method_TRACE,
htp_method_CONNECT,
htp_method_UNKNOWN htp_method_UNKNOWN
}; };