From 46e3be7b5b35aaf0bd8a36994942cee5e753e134 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Fri, 13 Mar 2015 22:40:41 +0900 Subject: [PATCH] nghttpx: Simplify backend request line construction It turns out that the cause of complication in backend request line construction is a absolute-form in HTTP/1 request. In HTTP/2, we have separated pseudo-header fields and no problem at all. In this commit, we parse request URI in HTTP/1 frontend and extract values from it to make backend logic simpler. This patch removes host header field emission in HTTP/2 backend if :authority is emitted. It also rewrites host header field with authority part in absolute-form URI as per RFC 7230. --- src/shrpx_downstream.cc | 8 ++ src/shrpx_downstream.h | 4 +- src/shrpx_http2_downstream_connection.cc | 102 ++++++----------------- src/shrpx_http_downstream_connection.cc | 26 +++--- src/shrpx_https_upstream.cc | 68 +++++++++++++-- 5 files changed, 111 insertions(+), 97 deletions(-) diff --git a/src/shrpx_downstream.cc b/src/shrpx_downstream.cc index 69394c98..a176de3e 100644 --- a/src/shrpx_downstream.cc +++ b/src/shrpx_downstream.cc @@ -390,6 +390,14 @@ void Downstream::set_last_request_header_value(const char *data, size_t len) { request_headers_, data, len); } +void Downstream::add_request_header(std::string name, std::string value, + int16_t token) { + http2::index_header(request_hdidx_, token, request_headers_.size()); + request_headers_sum_ += name.size() + value.size(); + request_headers_.emplace_back(std::move(name), std::move(value), false, + token); +} + void Downstream::add_request_header(const uint8_t *name, size_t namelen, const uint8_t *value, size_t valuelen, bool no_index, int16_t token) { diff --git a/src/shrpx_downstream.h b/src/shrpx_downstream.h index cf22162d..6febe806 100644 --- a/src/shrpx_downstream.h +++ b/src/shrpx_downstream.h @@ -116,6 +116,7 @@ public: void add_request_header(std::string name, std::string value); void set_last_request_header_value(const char *data, size_t len); + void add_request_header(std::string name, std::string value, int16_t token); void add_request_header(const uint8_t *name, size_t namelen, const uint8_t *value, size_t valuelen, bool no_index, int16_t token); @@ -152,7 +153,8 @@ public: // Returns HTTP/2 :scheme header field value. const std::string &get_request_http2_scheme() const; void set_request_http2_scheme(std::string scheme); - // Returns HTTP/2 :authority header field value. + // Returns HTTP/2 :authority header field value. We also set the + // value retrieved from absolute-form HTTP/1 request. const std::string &get_request_http2_authority() const; void set_request_http2_authority(std::string authority); void set_request_major(int major); diff --git a/src/shrpx_http2_downstream_connection.cc b/src/shrpx_http2_downstream_connection.cc index f818a136..6a66605b 100644 --- a/src/shrpx_http2_downstream_connection.cc +++ b/src/shrpx_http2_downstream_connection.cc @@ -265,8 +265,6 @@ int Http2DownstreamConnection::push_request_headers() { const char *authority = nullptr, *host = nullptr; if (!no_host_rewrite) { - // HTTP/2 backend does not support multiple address, so we always - // use index = 0. if (!downstream_->get_request_http2_authority().empty()) { authority = downstream_addr.hostport.get(); } @@ -302,104 +300,52 @@ int Http2DownstreamConnection::push_request_headers() { cookies = downstream_->crumble_request_cookie(); } - // 9 means: + // 8 means: // 1. :method // 2. :scheme // 3. :path - // 4. :authority (at least either :authority or host exists) - // 5. host - // 6. via (optional) - // 7. x-forwarded-for (optional) - // 8. x-forwarded-proto (optional) - // 9. te (optional) + // 4. :authority or host (at least either of them exists) + // 5. via (optional) + // 6. x-forwarded-for (optional) + // 7. x-forwarded-proto (optional) + // 8. te (optional) auto nva = std::vector(); - nva.reserve(nheader + 9 + cookies.size()); + nva.reserve(nheader + 8 + cookies.size()); std::string via_value; std::string xff_value; std::string scheme, uri_authority, path, query; - // To reconstruct HTTP/1 status line and headers, proxy should - // preserve host header field. See draft-09 section 8.1.3.1. + nva.push_back( + http2::make_nv_ls(":method", downstream_->get_request_method())); + if (downstream_->get_request_method() == "CONNECT") { - // The upstream may be HTTP/2 or HTTP/1 if (authority) { nva.push_back(http2::make_nv_lc(":authority", authority)); } else { nva.push_back( http2::make_nv_ls(":authority", downstream_->get_request_path())); } - } else if (!downstream_->get_request_http2_scheme().empty()) { - // Here the upstream is HTTP/2 - nva.push_back( - http2::make_nv_ls(":scheme", downstream_->get_request_http2_scheme())); - nva.push_back(http2::make_nv_ls(":path", downstream_->get_request_path())); + } else { + if (!downstream_->get_request_http2_scheme().empty()) { + nva.push_back(http2::make_nv_ls(":scheme", + downstream_->get_request_http2_scheme())); + } else if (client_handler_->get_ssl()) { + nva.push_back(http2::make_nv_ll(":scheme", "https")); + } else { + nva.push_back(http2::make_nv_ll(":scheme", "http")); + } + if (authority) { nva.push_back(http2::make_nv_lc(":authority", authority)); } - } else { - // The upstream is HTTP/1 - http_parser_url u; - const char *url = downstream_->get_request_path().c_str(); - memset(&u, 0, sizeof(u)); - rv = http_parser_parse_url(url, downstream_->get_request_path().size(), 0, - &u); - if (rv == 0) { - http2::copy_url_component(scheme, &u, UF_SCHEMA, url); - http2::copy_url_component(uri_authority, &u, UF_HOST, url); - http2::copy_url_component(path, &u, UF_PATH, url); - http2::copy_url_component(query, &u, UF_QUERY, url); - if (path.empty()) { - if (!uri_authority.empty() && - downstream_->get_request_method() == "OPTIONS") { - path = "*"; - } else { - path = "/"; - } - } - if (!query.empty()) { - path += "?"; - path += query; - } - } - if (scheme.empty()) { - if (client_handler_->get_ssl()) { - nva.push_back(http2::make_nv_ll(":scheme", "https")); - } else { - nva.push_back(http2::make_nv_ll(":scheme", "http")); - } - } else { - nva.push_back(http2::make_nv_ls(":scheme", scheme)); - } - if (path.empty()) { - nva.push_back( - http2::make_nv_ls(":path", downstream_->get_request_path())); - } else { - nva.push_back(http2::make_nv_ls(":path", path)); - } - if (!no_host_rewrite) { - if (authority) { - nva.push_back(http2::make_nv_lc(":authority", authority)); - } - } else if (!uri_authority.empty()) { - // TODO properly check IPv6 numeric address - if (uri_authority.find(":") != std::string::npos) { - uri_authority = "[" + uri_authority; - uri_authority += "]"; - } - if (u.field_set & (1 << UF_PORT)) { - uri_authority += ":"; - uri_authority += util::utos(u.port); - } - nva.push_back(http2::make_nv_ls(":authority", uri_authority)); - } + nva.push_back(http2::make_nv_ls(":path", downstream_->get_request_path())); } - nva.push_back( - http2::make_nv_ls(":method", downstream_->get_request_method())); - - if (host) { + // only emit host header field if :authority is not emitted. They + // both must be the same value. + if (!authority && host) { nva.push_back(http2::make_nv_lc("host", host)); } diff --git a/src/shrpx_http_downstream_connection.cc b/src/shrpx_http_downstream_connection.cc index df34fffc..c5c7341a 100644 --- a/src/shrpx_http_downstream_connection.cc +++ b/src/shrpx_http_downstream_connection.cc @@ -246,22 +246,29 @@ int HttpDownstreamConnection::push_request_headers() { // Assume that method and request path do not contain \r\n. std::string hdrs = downstream_->get_request_method(); - hdrs += " "; + hdrs += ' '; if (downstream_->get_request_method() == "CONNECT") { if (authority) { hdrs += authority; } else { hdrs += downstream_->get_request_path(); } - } else if (get_config()->http2_proxy && - !downstream_->get_request_http2_scheme().empty() && authority && - (downstream_->get_request_path().c_str()[0] == '/' || - downstream_->get_request_path() == "*")) { + } else if (get_config()->http2_proxy || get_config()->client_proxy) { // Construct absolute-form request target because we are going to // send a request to a HTTP/1 proxy. - hdrs += downstream_->get_request_http2_scheme(); - hdrs += "://"; - hdrs += authority; + if (downstream_->get_request_http2_scheme().empty()) { + // this comes from HTTP/1 upstream, use http scheme. We don't + // support https forward link yet. + hdrs += "http://"; + } else { + hdrs += downstream_->get_request_http2_scheme(); + hdrs += "://"; + } + if (authority) { + hdrs += authority; + } else { + hdrs += host; + } // Server-wide OPTIONS takes following form in proxy request: // @@ -273,8 +280,7 @@ int HttpDownstreamConnection::push_request_headers() { hdrs += downstream_->get_request_path(); } } else { - // No proxy case. get_request_path() may be absolute-form but we - // don't care. + // No proxy case. hdrs += downstream_->get_request_path(); } hdrs += " HTTP/1.1\r\nHost: "; diff --git a/src/shrpx_https_upstream.cc b/src/shrpx_https_upstream.cc index b14a8664..c760c5db 100644 --- a/src/shrpx_https_upstream.cc +++ b/src/shrpx_https_upstream.cc @@ -136,6 +136,51 @@ int htp_hdr_valcb(http_parser *htp, const char *data, size_t len) { } } // namespace +namespace { +void rewrite_request_host_path_from_uri(Downstream *downstream, const char *uri, + http_parser_url &u) { + assert(u.field_set & (1 << UF_HOST)); + + // As per https://tools.ietf.org/html/rfc7230#section-5.4, we + // rewrite host header field with authority component. + std::string authority; + http2::copy_url_component(authority, &u, UF_HOST, uri); + // TODO properly check IPv6 numeric address + if (authority.find(':') != std::string::npos) { + authority = '[' + authority; + authority += ']'; + } + if (u.field_set & (1 << UF_PORT)) { + authority += ':'; + authority += util::utos(u.port); + } + downstream->set_request_http2_authority(authority); + + std::string path; + if (u.field_set & (1 << UF_PATH)) { + http2::copy_url_component(path, &u, UF_PATH, uri); + } else if (downstream->get_request_method() == "OPTIONS") { + // Server-wide OPTIONS takes following form in proxy request: + // + // OPTIONS http://example.org HTTP/1.1 + // + // Notice that no slash after authority. See + // http://tools.ietf.org/html/rfc7230#section-5.3.4 + downstream->set_request_path("*"); + // we ignore query component here + return; + } else { + path = "/"; + } + if (u.field_set & (1 << UF_QUERY)) { + auto &fdata = u.field_data[UF_QUERY]; + path += '?'; + path.append(uri + fdata.off, fdata.len); + } + downstream->set_request_path(path); +} +} // namespace + namespace { int htp_hdrs_completecb(http_parser *htp) { int rv; @@ -178,18 +223,25 @@ int htp_hdrs_completecb(http_parser *htp) { downstream->inspect_http1_request(); - if (get_config()->client_proxy && - downstream->get_request_method() != "CONNECT") { - // Make sure that request path is an absolute URI. - http_parser_url u; - auto url = downstream->get_request_path().c_str(); - memset(&u, 0, sizeof(u)); - rv = http_parser_parse_url(url, downstream->get_request_path().size(), 0, + if (downstream->get_request_method() != "CONNECT") { + http_parser_url u{}; + auto uri = downstream->get_request_path().c_str(); + rv = http_parser_parse_url(uri, downstream->get_request_path().size(), 0, &u); - if (rv != 0 || !(u.field_set & (1 << UF_SCHEMA))) { + if (rv != 0) { // Expect to respond with 400 bad request return -1; } + // checking UF_HOST could be redundant, but just in case ... + if (!(u.field_set & (1 << UF_SCHEMA)) || !(u.field_set & (1 << UF_HOST))) { + if (get_config()->client_proxy) { + // Request URI should be absolute-form for client proxy mode + return -1; + } + } else { + rewrite_request_host_path_from_uri(downstream, uri, u); + // uri could be invalidated here + } } rv = downstream->attach_downstream_connection(