From 34d209b30b602c9b04e8e7ee8a748865d5fecf9d Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Sun, 13 Mar 2016 00:59:25 +0900 Subject: [PATCH] nghttpx: Add wildcard host routing This change allows host pattern in --backend to include '*' to indicate wildcard match. The wildcard match is made in suffix match only. --- src/shrpx.cc | 26 ++++++++- src/shrpx_client_handler.cc | 16 ++--- src/shrpx_config.cc | 27 ++++++++- src/shrpx_config.h | 9 +++ src/shrpx_worker.cc | 47 +++++++++++---- src/shrpx_worker.h | 3 +- src/shrpx_worker_test.cc | 113 ++++++++++++++++++++++++------------ src/template.h | 31 ++++++++++ 8 files changed, 212 insertions(+), 60 deletions(-) diff --git a/src/shrpx.cc b/src/shrpx.cc index 3beb91be..f59c2b24 100644 --- a/src/shrpx.cc +++ b/src/shrpx.cc @@ -1227,8 +1227,14 @@ Connections: Patterns with host take precedence over patterns with just path. Then, longer patterns take precedence over - shorter ones, breaking a tie by the order of the - appearance in the configuration. + shorter ones. + + Host can include "*" in the left most position to + indicate wildcard match (only suffix match is done). + For example, host pattern "*www.nghttp2.org" matches + against "www.nghttp2.org" and "1www.ngttp2.org", but + does not match against "nghttp2.org". The exact hosts + match takes precedence over the wildcard hosts match. If is omitted or empty string, "/" is used as pattern, which matches all request paths (catch-all @@ -2089,11 +2095,27 @@ void process_options( } catch_all.proto = proto; std::vector().swap(addr_groups); + std::vector().swap(mod_config()->wildcard_patterns); // maybe not necessary? mod_config()->router = Router(); mod_config()->router.add_route(StringRef{catch_all.pattern}, addr_groups.size()); addr_groups.push_back(std::move(catch_all)); + } else { + auto &wildcard_patterns = mod_config()->wildcard_patterns; + std::sort(std::begin(wildcard_patterns), std::end(wildcard_patterns), + [](const WildcardPattern &lhs, const WildcardPattern &rhs) { + return std::lexicographical_compare( + rhs.host.rbegin(), rhs.host.rend(), lhs.host.rbegin(), + lhs.host.rend()); + }); + if (LOG_ENABLED(INFO)) { + LOG(INFO) << "Reverse sorted wildcard hosts (compared from tail to head, " + "and sorted in reverse order):"; + for (auto &wp : mod_config()->wildcard_patterns) { + LOG(INFO) << wp.host; + } + } } if (LOG_ENABLED(INFO)) { diff --git a/src/shrpx_client_handler.cc b/src/shrpx_client_handler.cc index d7366b04..2588264f 100644 --- a/src/shrpx_client_handler.cc +++ b/src/shrpx_client_handler.cc @@ -689,7 +689,7 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { const auto &req = downstream->request(); // Fast path. If we have one group, it must be catch-all group. - // HTTP/2 and client proxy modes fall in this case. + // proxy mode falls in this case. if (groups.size() == 1) { group_idx = 0; } else if (req.method == HTTP_CONNECT) { @@ -699,20 +699,20 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { group_idx = catch_all; } else { auto &router = get_config()->router; + auto &wildcard_patterns = get_config()->wildcard_patterns; if (!req.authority.empty()) { group_idx = - match_downstream_addr_group(router, StringRef{req.authority}, - StringRef{req.path}, groups, catch_all); + match_downstream_addr_group(router, wildcard_patterns, req.authority, + req.path, groups, catch_all); } else { auto h = req.fs.header(http2::HD_HOST); if (h) { - group_idx = - match_downstream_addr_group(router, StringRef{h->value}, - StringRef{req.path}, groups, catch_all); + group_idx = match_downstream_addr_group( + router, wildcard_patterns, h->value, req.path, groups, catch_all); } else { group_idx = - match_downstream_addr_group(router, StringRef::from_lit(""), - StringRef{req.path}, groups, catch_all); + match_downstream_addr_group(router, wildcard_patterns, StringRef{}, + req.path, groups, catch_all); } } } diff --git a/src/shrpx_config.cc b/src/shrpx_config.cc index a2ad7c54..2a08b690 100644 --- a/src/shrpx_config.cc +++ b/src/shrpx_config.cc @@ -648,7 +648,32 @@ int parse_mapping(const DownstreamAddrConfig &addr, g.addrs.push_back(addr); g.proto = proto; - mod_config()->router.add_route(StringRef{g.pattern}, addr_groups.size()); + if (pattern[0] == '*') { + // wildcard pattern + auto path_first = + std::find(std::begin(g.pattern), std::end(g.pattern), '/'); + + auto host = StringRef{std::begin(g.pattern) + 1, path_first}; + auto path = StringRef{path_first, std::end(g.pattern)}; + + auto &wildcard_patterns = mod_config()->wildcard_patterns; + + auto it = std::find_if( + std::begin(wildcard_patterns), std::end(wildcard_patterns), + [&host](const WildcardPattern &wp) { return wp.host == host; }); + + if (it == std::end(wildcard_patterns)) { + mod_config()->wildcard_patterns.push_back( + {ImmutableString{std::begin(host), std::end(host)}}); + + auto &router = mod_config()->wildcard_patterns.back().router; + router.add_route(path, addr_groups.size()); + } else { + (*it).router.add_route(path, addr_groups.size()); + } + } else { + mod_config()->router.add_route(StringRef{g.pattern}, addr_groups.size()); + } addr_groups.push_back(std::move(g)); } diff --git a/src/shrpx_config.h b/src/shrpx_config.h index 72594fa8..5a31e820 100644 --- a/src/shrpx_config.h +++ b/src/shrpx_config.h @@ -577,8 +577,17 @@ struct ConnectionConfig { } downstream; }; +// Wildcard host pattern routing. We strips left most '*' from host +// field. router includes all path pattern sharing same wildcard +// host. +struct WildcardPattern { + ImmutableString host; + Router router; +}; + struct Config { Router router; + std::vector wildcard_patterns; HttpProxy downstream_http_proxy; HttpConfig http; Http2Config http2; diff --git a/src/shrpx_worker.cc b/src/shrpx_worker.cc index 7c8c6bb3..601c8143 100644 --- a/src/shrpx_worker.cc +++ b/src/shrpx_worker.cc @@ -290,7 +290,8 @@ ConnectBlocker *Worker::get_connect_blocker() const { namespace { size_t match_downstream_addr_group_host( - const Router &router, const StringRef &host, const StringRef &path, + const Router &router, const std::vector &wildcard_patterns, + const StringRef &host, const StringRef &path, const std::vector &groups, size_t catch_all) { if (path.empty() || path[0] != '/') { auto group = router.match(host, StringRef::from_lit("/")); @@ -318,6 +319,24 @@ size_t match_downstream_addr_group_host( return group; } + for (auto it = std::begin(wildcard_patterns); + it != std::end(wildcard_patterns); ++it) { + if (!util::ends_with(std::begin(host), std::end(host), + std::begin((*it).host), std::end((*it).host))) { + continue; + } + auto group = (*it).router.match(StringRef{}, path); + if (group != -1) { + // We sorted wildcard_patterns in a way that first match is the + // longest host pattern. + if (LOG_ENABLED(INFO)) { + LOG(INFO) << "Found wildcard pattern with query " << host << path + << ", matched pattern=" << groups[group].pattern; + } + return group; + } + } + group = router.match(StringRef::from_lit(""), path); if (group != -1) { if (LOG_ENABLED(INFO)) { @@ -335,7 +354,8 @@ size_t match_downstream_addr_group_host( } // namespace size_t match_downstream_addr_group( - const Router &router, const StringRef &hostport, const StringRef &raw_path, + const Router &router, const std::vector &wildcard_patterns, + const StringRef &hostport, const StringRef &raw_path, const std::vector &groups, size_t catch_all) { if (std::find(std::begin(hostport), std::end(hostport), '/') != std::end(hostport)) { @@ -349,11 +369,11 @@ size_t match_downstream_addr_group( auto path = StringRef{std::begin(raw_path), query}; if (hostport.empty()) { - return match_downstream_addr_group_host(router, hostport, path, groups, - catch_all); + return match_downstream_addr_group_host(router, wildcard_patterns, hostport, + path, groups, catch_all); } - std::string host; + StringRef host; if (hostport[0] == '[') { // assume this is IPv6 numeric address auto p = std::find(std::begin(hostport), std::end(hostport), ']'); @@ -363,18 +383,25 @@ size_t match_downstream_addr_group( if (p + 1 < std::end(hostport) && *(p + 1) != ':') { return catch_all; } - host.assign(std::begin(hostport), p + 1); + host = StringRef{std::begin(hostport), p + 1}; } else { auto p = std::find(std::begin(hostport), std::end(hostport), ':'); if (p == std::begin(hostport)) { return catch_all; } - host.assign(std::begin(hostport), p); + host = StringRef{std::begin(hostport), p}; } - util::inp_strlower(host); - return match_downstream_addr_group_host(router, StringRef{host}, path, groups, - catch_all); + std::string low_host; + if (std::find_if(std::begin(host), std::end(host), [](char c) { + return 'A' <= c || c <= 'Z'; + }) != std::end(host)) { + low_host = host.str(); + util::inp_strlower(low_host); + host = StringRef{low_host}; + } + return match_downstream_addr_group_host(router, wildcard_patterns, host, path, + groups, catch_all); } } // namespace shrpx diff --git a/src/shrpx_worker.h b/src/shrpx_worker.h index 0602e988..17a9fee9 100644 --- a/src/shrpx_worker.h +++ b/src/shrpx_worker.h @@ -206,7 +206,8 @@ private: // group. The catch-all group index is given in |catch_all|. All // patterns are given in |groups|. size_t match_downstream_addr_group( - const Router &router, const StringRef &hostport, const StringRef &path, + const Router &router, const std::vector &wildcard_patterns, + const StringRef &hostport, const StringRef &path, const std::vector &groups, size_t catch_all); } // namespace shrpx diff --git a/src/shrpx_worker_test.cc b/src/shrpx_worker_test.cc index 11a15e01..ae2f0e80 100644 --- a/src/shrpx_worker_test.cc +++ b/src/shrpx_worker_test.cc @@ -43,7 +43,7 @@ void test_shrpx_worker_match_downstream_addr_group(void) { "nghttp2.org/alpha/charlie", "nghttp2.org/delta%3A", "www.nghttp2.org/", "[::1]/", "nghttp2.org/alpha/bravo/delta", // Check that match is done in the single node - "example.com/alpha/bravo", "192.168.0.1/alpha/"}) { + "example.com/alpha/bravo", "192.168.0.1/alpha/", "/golf/"}) { groups.push_back(DownstreamAddrGroup{ImmutableString(s)}); } @@ -54,126 +54,163 @@ void test_shrpx_worker_match_downstream_addr_group(void) { router.add_route(StringRef{g.pattern}, i); } + std::vector wp; + CU_ASSERT(0 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/"), groups, 255)); // port is removed CU_ASSERT(0 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org:8080"), + router, wp, StringRef::from_lit("nghttp2.org:8080"), StringRef::from_lit("/"), groups, 255)); // host is case-insensitive CU_ASSERT(4 == match_downstream_addr_group( - router, StringRef::from_lit("WWW.nghttp2.org"), + router, wp, StringRef::from_lit("WWW.nghttp2.org"), StringRef::from_lit("/alpha"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/"), groups, 255)); // /alpha/bravo also matches /alpha/bravo/ CU_ASSERT(1 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo"), groups, 255)); // path part is case-sensitive CU_ASSERT(0 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/Alpha/bravo"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/charlie"), groups, 255)); CU_ASSERT(2 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/charlie"), groups, 255)); // pattern which does not end with '/' must match its entirely. So // this matches to group 0, not group 2. CU_ASSERT(0 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/charlie/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("example.org"), + router, wp, StringRef::from_lit("example.org"), StringRef::from_lit("/"), groups, 255)); - CU_ASSERT(255 == match_downstream_addr_group(router, StringRef::from_lit(""), - StringRef::from_lit("/"), groups, - 255)); - - CU_ASSERT(255 == match_downstream_addr_group(router, StringRef::from_lit(""), - StringRef::from_lit("alpha"), - groups, 255)); - CU_ASSERT(255 == - match_downstream_addr_group(router, StringRef::from_lit("foo/bar"), + match_downstream_addr_group(router, wp, StringRef::from_lit(""), StringRef::from_lit("/"), groups, 255)); + CU_ASSERT(255 == match_downstream_addr_group( + router, wp, StringRef::from_lit(""), + StringRef::from_lit("alpha"), groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group( + router, wp, StringRef::from_lit("foo/bar"), + StringRef::from_lit("/"), groups, 255)); + // If path is StringRef::from_lit("*", only match with host + "/"). CU_ASSERT(0 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("*"), groups, 255)); - CU_ASSERT(5 == - match_downstream_addr_group(router, StringRef::from_lit("[::1]"), - StringRef::from_lit("/"), groups, 255)); + CU_ASSERT( + 5 == match_downstream_addr_group(router, wp, StringRef::from_lit("[::1]"), + StringRef::from_lit("/"), groups, 255)); CU_ASSERT(5 == match_downstream_addr_group( - router, StringRef::from_lit("[::1]:8080"), + router, wp, StringRef::from_lit("[::1]:8080"), StringRef::from_lit("/"), groups, 255)); CU_ASSERT(255 == - match_downstream_addr_group(router, StringRef::from_lit("[::1"), + match_downstream_addr_group(router, wp, StringRef::from_lit("[::1"), StringRef::from_lit("/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("[::1]8000"), + router, wp, StringRef::from_lit("[::1]8000"), StringRef::from_lit("/"), groups, 255)); // Check the case where adding route extends tree CU_ASSERT(6 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/delta"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, StringRef::from_lit("nghttp2.org"), + router, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/delta/"), groups, 255)); // Check the case where query is done in a single node CU_ASSERT(7 == match_downstream_addr_group( - router, StringRef::from_lit("example.com"), + router, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha/bravo"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("example.com"), + router, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha/bravo/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("example.com"), + router, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha"), groups, 255)); // Check the case where quey is done in a single node CU_ASSERT(8 == match_downstream_addr_group( - router, StringRef::from_lit("192.168.0.1"), + router, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha"), groups, 255)); CU_ASSERT(8 == match_downstream_addr_group( - router, StringRef::from_lit("192.168.0.1"), + router, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha/"), groups, 255)); CU_ASSERT(8 == match_downstream_addr_group( - router, StringRef::from_lit("192.168.0.1"), + router, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha/bravo"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("192.168.0.1"), + router, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alph"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, StringRef::from_lit("192.168.0.1"), + router, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/"), groups, 255)); - router.dump(); + // Test for wildcard hosts + groups.push_back( + DownstreamAddrGroup{ImmutableString::from_lit("git.nghttp2.org")}); + groups.push_back( + DownstreamAddrGroup{ImmutableString::from_lit(".nghttp2.org")}); + + wp.push_back({ImmutableString("git.nghttp2.org")}); + wp.back().router.add_route(StringRef::from_lit("/echo/"), 10); + + wp.push_back({ImmutableString(".nghttp2.org")}); + wp.back().router.add_route(StringRef::from_lit("/echo/"), 11); + wp.back().router.add_route(StringRef::from_lit("/echo/foxtrot"), 12); + + CU_ASSERT(10 == match_downstream_addr_group( + router, wp, StringRef::from_lit("git.nghttp2.org"), + StringRef::from_lit("/echo"), groups, 255)); + + CU_ASSERT(10 == match_downstream_addr_group( + router, wp, StringRef::from_lit("0git.nghttp2.org"), + StringRef::from_lit("/echo"), groups, 255)); + + CU_ASSERT(11 == match_downstream_addr_group( + router, wp, StringRef::from_lit("it.nghttp2.org"), + StringRef::from_lit("/echo"), groups, 255)); + + CU_ASSERT(12 == match_downstream_addr_group( + router, wp, StringRef::from_lit(".nghttp2.org"), + StringRef::from_lit("/echo/foxtrot"), groups, 255)); + + CU_ASSERT(9 == match_downstream_addr_group( + router, wp, StringRef::from_lit("alpha.nghttp2.org"), + StringRef::from_lit("/golf"), groups, 255)); + + CU_ASSERT(0 == match_downstream_addr_group( + router, wp, StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/echo"), groups, 255)); } } // namespace shrpx diff --git a/src/template.h b/src/template.h index 1664714a..fdbc2aac 100644 --- a/src/template.h +++ b/src/template.h @@ -250,6 +250,7 @@ public: using const_reference = const value_type &; using const_pointer = const value_type *; using const_iterator = const_pointer; + using const_reverse_iterator = std::reverse_iterator; ImmutableString() : len(0), base("") {} ImmutableString(const char *s, size_t slen) @@ -308,6 +309,16 @@ public: const_iterator end() const { return base + len; }; const_iterator cend() const { return base + len; }; + const_reverse_iterator rbegin() const { + return const_reverse_iterator{base + len}; + } + const_reverse_iterator crbegin() const { + return const_reverse_iterator{base + len}; + } + + const_reverse_iterator rend() const { return const_reverse_iterator{base}; } + const_reverse_iterator crend() const { return const_reverse_iterator{base}; } + const char *c_str() const { return base; } size_type size() const { return len; } bool empty() const { return len == 0; } @@ -395,6 +406,7 @@ public: using const_reference = const value_type &; using const_pointer = const value_type *; using const_iterator = const_pointer; + using const_reverse_iterator = std::reverse_iterator; constexpr StringRef() : base(""), len(0) {} explicit StringRef(const std::string &s) : base(s.c_str()), len(s.size()) {} @@ -430,6 +442,16 @@ public: const_iterator end() const { return base + len; }; const_iterator cend() const { return base + len; }; + const_reverse_iterator rbegin() const { + return const_reverse_iterator{base + len}; + } + const_reverse_iterator crbegin() const { + return const_reverse_iterator{base + len}; + } + + const_reverse_iterator rend() const { return const_reverse_iterator{base}; } + const_reverse_iterator crend() const { return const_reverse_iterator{base}; } + const char *c_str() const { return base; } size_type size() const { return len; } bool empty() const { return len == 0; } @@ -464,6 +486,15 @@ inline bool operator==(const StringRef &lhs, const char *rhs) { std::equal(std::begin(lhs), std::end(lhs), rhs); } +inline bool operator==(const StringRef &lhs, const ImmutableString &rhs) { + return lhs.size() == rhs.size() && + std::equal(std::begin(lhs), std::end(lhs), std::begin(rhs)); +} + +inline bool operator==(const ImmutableString &lhs, const StringRef &rhs) { + return rhs == lhs; +} + inline bool operator==(const char *lhs, const StringRef &rhs) { return rhs == lhs; }