diff --git a/src/shrpx_client_handler.cc b/src/shrpx_client_handler.cc index cfe95dfe..912c712a 100644 --- a/src/shrpx_client_handler.cc +++ b/src/shrpx_client_handler.cc @@ -910,20 +910,22 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { group_idx = catch_all; } else { auto &router = downstreamconf.router; + auto &rev_wildcard_router = downstreamconf.rev_wildcard_router; auto &wildcard_patterns = downstreamconf.wildcard_patterns; if (!req.authority.empty()) { - group_idx = - match_downstream_addr_group(router, wildcard_patterns, req.authority, - req.path, groups, catch_all); + group_idx = match_downstream_addr_group(router, rev_wildcard_router, + wildcard_patterns, req.authority, + req.path, groups, catch_all); } else { auto h = req.fs.header(http2::HD_HOST); if (h) { - group_idx = match_downstream_addr_group( - router, wildcard_patterns, h->value, req.path, groups, catch_all); + group_idx = match_downstream_addr_group(router, rev_wildcard_router, + wildcard_patterns, h->value, + req.path, groups, catch_all); } else { - group_idx = - match_downstream_addr_group(router, wildcard_patterns, StringRef{}, - req.path, groups, catch_all); + group_idx = match_downstream_addr_group(router, rev_wildcard_router, + wildcard_patterns, StringRef{}, + req.path, groups, catch_all); } } } diff --git a/src/shrpx_config.cc b/src/shrpx_config.cc index d1cf8a23..9f31f3f3 100644 --- a/src/shrpx_config.cc +++ b/src/shrpx_config.cc @@ -828,6 +828,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig addr, auto &router = wildcard_patterns.back().router; router.add_route(path, idx); + + auto rev_host = host.str(); + std::reverse(std::begin(rev_host), std::end(rev_host)); + + downstreamconf.rev_wildcard_router.add_route( + StringRef{rev_host}, wildcard_patterns.size() - 1); } else { (*it).router.add_route(path, idx); } @@ -2831,21 +2837,6 @@ int configure_downstream_group(Config *config, bool http2_proxy, router = Router(); router.add_route(StringRef{catch_all.pattern}, addr_groups.size()); addr_groups.push_back(std::move(catch_all)); - } else { - auto &wildcard_patterns = downstreamconf.wildcard_patterns; - std::sort(std::begin(wildcard_patterns), std::end(wildcard_patterns), - [](const WildcardPattern &lhs, const WildcardPattern &rhs) { - return std::lexicographical_compare( - rhs.host.rbegin(), rhs.host.rend(), lhs.host.rbegin(), - lhs.host.rend()); - }); - if (LOG_ENABLED(INFO)) { - LOG(INFO) << "Reverse sorted wildcard hosts (compared from tail to head, " - "and sorted in reverse order):"; - for (auto &wp : wildcard_patterns) { - LOG(INFO) << wp.host; - } - } } // backward compatibility: override all SNI fields with the option diff --git a/src/shrpx_config.h b/src/shrpx_config.h index 7bfe2990..ec258cbf 100644 --- a/src/shrpx_config.h +++ b/src/shrpx_config.h @@ -599,7 +599,7 @@ struct RateLimitConfig { }; // Wildcard host pattern routing. We strips left most '*' from host -// field. router includes all path pattern sharing same wildcard +// field. router includes all path patterns sharing the same wildcard // host. struct WildcardPattern { WildcardPattern(const StringRef &host) @@ -616,6 +616,12 @@ struct DownstreamConfig { ev_tstamp idle_read; } timeout; Router router; + // Router for reversed wildcard hosts. Since this router has + // wildcard hosts reversed without '*', one should call match() + // function with reversed host stripping last character. This is + // because we require at least one character must match for '*'. + // The index stored in this router is index of wildcard_patterns. + Router rev_wildcard_router; std::vector wildcard_patterns; std::vector addr_groups; // The index of catch-all group in downstream_addr_groups. diff --git a/src/shrpx_router.cc b/src/shrpx_router.cc index 86d5f02f..7e875aef 100644 --- a/src/shrpx_router.cc +++ b/src/shrpx_router.cc @@ -279,6 +279,57 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const { return node->index; } +namespace { +const RNode *match_prefix(const RNode *node, const char *first, + const char *last) { + if (first == last) { + return nullptr; + } + + auto p = first; + + const RNode *ans = nullptr; + + for (;;) { + auto next_node = find_next_node(node, *p); + if (next_node == nullptr) { + return ans; + } + + node = next_node; + + auto n = std::min(node->len, static_cast(last - p)); + if (memcmp(node->s, p, n) != 0) { + return ans; + } + + p += n; + + if (p != last) { + if (node->index != -1) { + ans = node; + } + continue; + } + + if (node->len == n) { + return node; + } + + return ans; + } +} +} // namespace + +ssize_t Router::match_prefix(const StringRef &s) const { + auto node = ::shrpx::match_prefix(&root_, std::begin(s), std::end(s)); + if (node == nullptr) { + return -1; + } + + return node->index; +} + namespace { void dump_node(const RNode *node, int depth) { fprintf(stderr, "%*ss='%.*s', len=%zu, index=%zd\n", depth, "", diff --git a/src/shrpx_router.h b/src/shrpx_router.h index 2381a96c..7d3a588e 100644 --- a/src/shrpx_router.h +++ b/src/shrpx_router.h @@ -67,6 +67,9 @@ public: bool add_route(const StringRef &pattern, size_t index); // Returns the matched index of pattern. -1 if there is no match. ssize_t match(const StringRef &host, const StringRef &path) const; + // Returns the matched index of pattern if a pattern is a suffix of + // |s|, otherwise -1. + ssize_t match_prefix(const StringRef &s) const; void add_node(RNode *node, const char *pattern, size_t patlen, size_t index); diff --git a/src/shrpx_worker.cc b/src/shrpx_worker.cc index c0f9ee01..b5651f75 100644 --- a/src/shrpx_worker.cc +++ b/src/shrpx_worker.cc @@ -456,7 +456,8 @@ ConnectionHandler *Worker::get_connection_handler() const { namespace { size_t match_downstream_addr_group_host( - const Router &router, const std::vector &wildcard_patterns, + const Router &router, const Router &rev_wildcard_router, + const std::vector &wildcard_patterns, const StringRef &host, const StringRef &path, const std::vector> &groups, size_t catch_all) { @@ -486,23 +487,24 @@ size_t match_downstream_addr_group_host( return group; } - for (auto it = std::begin(wildcard_patterns); - it != std::end(wildcard_patterns); ++it) { - /* left most '*' must match at least one character */ - if (host.size() <= (*it).host.size() || - !util::ends_with(std::begin(host), std::end(host), - std::begin((*it).host), std::end((*it).host))) { - continue; - } - auto group = (*it).router.match(StringRef{}, path); - if (group != -1) { - // We sorted wildcard_patterns in a way that first match is the - // longest host pattern. - if (LOG_ENABLED(INFO)) { - LOG(INFO) << "Found wildcard pattern with query " << host << path - << ", matched pattern=" << groups[group]->pattern; + if (!wildcard_patterns.empty() && !host.empty()) { + auto rev_host = std::string{std::begin(host) + 1, std::end(host)}; + std::reverse(std::begin(rev_host), std::end(rev_host)); + + auto wcidx = rev_wildcard_router.match_prefix( + StringRef{std::begin(rev_host), std::end(rev_host)}); + if (wcidx != -1) { + auto &wc = wildcard_patterns[wcidx]; + auto group = wc.router.match(StringRef{}, path); + if (group != -1) { + // We sorted wildcard_patterns in a way that first match is the + // longest host pattern. + if (LOG_ENABLED(INFO)) { + LOG(INFO) << "Found wildcard pattern with query " << host << path + << ", matched pattern=" << groups[group]->pattern; + } + return group; } - return group; } } @@ -523,7 +525,8 @@ size_t match_downstream_addr_group_host( } // namespace size_t match_downstream_addr_group( - const Router &router, const std::vector &wildcard_patterns, + const Router &router, const Router &rev_wildcard_router, + const std::vector &wildcard_patterns, const StringRef &hostport, const StringRef &raw_path, const std::vector> &groups, size_t catch_all) { @@ -539,8 +542,9 @@ size_t match_downstream_addr_group( auto path = StringRef{std::begin(raw_path), query}; if (hostport.empty()) { - return match_downstream_addr_group_host(router, wildcard_patterns, hostport, - path, groups, catch_all); + return match_downstream_addr_group_host(router, rev_wildcard_router, + wildcard_patterns, hostport, path, + groups, catch_all); } StringRef host; @@ -570,8 +574,9 @@ size_t match_downstream_addr_group( util::inp_strlower(low_host); host = StringRef{low_host}; } - return match_downstream_addr_group_host(router, wildcard_patterns, host, path, - groups, catch_all); + return match_downstream_addr_group_host(router, rev_wildcard_router, + wildcard_patterns, host, path, groups, + catch_all); } void downstream_failure(DownstreamAddr *addr) { diff --git a/src/shrpx_worker.h b/src/shrpx_worker.h index 2a0a2965..3dbd31e6 100644 --- a/src/shrpx_worker.h +++ b/src/shrpx_worker.h @@ -280,7 +280,8 @@ private: // group. The catch-all group index is given in |catch_all|. All // patterns are given in |groups|. size_t match_downstream_addr_group( - const Router &router, const std::vector &wildcard_patterns, + const Router &router, const Router &rev_wildcard_router, + const std::vector &wildcard_patterns, const StringRef &hostport, const StringRef &path, const std::vector> &groups, size_t catch_all); diff --git a/src/shrpx_worker_test.cc b/src/shrpx_worker_test.cc index c4f60b4f..7c941d77 100644 --- a/src/shrpx_worker_test.cc +++ b/src/shrpx_worker_test.cc @@ -50,6 +50,7 @@ void test_shrpx_worker_match_downstream_addr_group(void) { } Router router; + Router wcrouter; for (size_t i = 0; i < groups.size(); ++i) { auto &g = groups[i]; @@ -59,122 +60,124 @@ void test_shrpx_worker_match_downstream_addr_group(void) { std::vector wp; CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/"), groups, 255)); // port is removed - CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org:8080"), - StringRef::from_lit("/"), groups, 255)); + CU_ASSERT(0 == + match_downstream_addr_group(router, wcrouter, wp, + StringRef::from_lit("nghttp2.org:8080"), + StringRef::from_lit("/"), groups, 255)); // host is case-insensitive CU_ASSERT(4 == match_downstream_addr_group( - router, wp, StringRef::from_lit("WWW.nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit("WWW.nghttp2.org"), StringRef::from_lit("/alpha"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/"), groups, 255)); // /alpha/bravo also matches /alpha/bravo/ CU_ASSERT(1 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo"), groups, 255)); // path part is case-sensitive CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/Alpha/bravo"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/charlie"), groups, 255)); CU_ASSERT(2 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/charlie"), groups, 255)); // pattern which does not end with '/' must match its entirely. So // this matches to group 0, not group 2. CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/charlie/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("example.org"), + router, wcrouter, wp, StringRef::from_lit("example.org"), StringRef::from_lit("/"), groups, 255)); - CU_ASSERT(255 == - match_downstream_addr_group(router, wp, StringRef::from_lit(""), - StringRef::from_lit("/"), groups, 255)); + CU_ASSERT(255 == match_downstream_addr_group( + router, wcrouter, wp, StringRef::from_lit(""), + StringRef::from_lit("/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit(""), + router, wcrouter, wp, StringRef::from_lit(""), StringRef::from_lit("alpha"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("foo/bar"), + router, wcrouter, wp, StringRef::from_lit("foo/bar"), StringRef::from_lit("/"), groups, 255)); // If path is StringRef::from_lit("*", only match with host + "/"). CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("*"), groups, 255)); - CU_ASSERT( - 5 == match_downstream_addr_group(router, wp, StringRef::from_lit("[::1]"), - StringRef::from_lit("/"), groups, 255)); CU_ASSERT(5 == match_downstream_addr_group( - router, wp, StringRef::from_lit("[::1]:8080"), + router, wcrouter, wp, StringRef::from_lit("[::1]"), + StringRef::from_lit("/"), groups, 255)); + CU_ASSERT(5 == match_downstream_addr_group( + router, wcrouter, wp, StringRef::from_lit("[::1]:8080"), StringRef::from_lit("/"), groups, 255)); - CU_ASSERT(255 == - match_downstream_addr_group(router, wp, StringRef::from_lit("[::1"), - StringRef::from_lit("/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("[::1]8000"), + router, wcrouter, wp, StringRef::from_lit("[::1"), + StringRef::from_lit("/"), groups, 255)); + CU_ASSERT(255 == match_downstream_addr_group( + router, wcrouter, wp, StringRef::from_lit("[::1]8000"), StringRef::from_lit("/"), groups, 255)); // Check the case where adding route extends tree CU_ASSERT(6 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/delta"), groups, 255)); CU_ASSERT(1 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/alpha/bravo/delta/"), groups, 255)); // Check the case where query is done in a single node CU_ASSERT(7 == match_downstream_addr_group( - router, wp, StringRef::from_lit("example.com"), + router, wcrouter, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha/bravo"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("example.com"), + router, wcrouter, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha/bravo/"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("example.com"), + router, wcrouter, wp, StringRef::from_lit("example.com"), StringRef::from_lit("/alpha"), groups, 255)); // Check the case where quey is done in a single node CU_ASSERT(8 == match_downstream_addr_group( - router, wp, StringRef::from_lit("192.168.0.1"), + router, wcrouter, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha"), groups, 255)); CU_ASSERT(8 == match_downstream_addr_group( - router, wp, StringRef::from_lit("192.168.0.1"), + router, wcrouter, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha/"), groups, 255)); CU_ASSERT(8 == match_downstream_addr_group( - router, wp, StringRef::from_lit("192.168.0.1"), + router, wcrouter, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alpha/bravo"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("192.168.0.1"), + router, wcrouter, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/alph"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit("192.168.0.1"), + router, wcrouter, wp, StringRef::from_lit("192.168.0.1"), StringRef::from_lit("/"), groups, 255)); // Test for wildcard hosts @@ -187,34 +190,41 @@ void test_shrpx_worker_match_downstream_addr_group(void) { groups.push_back(std::move(g2)); wp.emplace_back(StringRef::from_lit("git.nghttp2.org")); + wcrouter.add_route(StringRef::from_lit("gro.2ptthgn.tig"), 0); wp.back().router.add_route(StringRef::from_lit("/echo/"), 10); wp.emplace_back(StringRef::from_lit(".nghttp2.org")); + wcrouter.add_route(StringRef::from_lit("gro.2ptthgn."), 1); wp.back().router.add_route(StringRef::from_lit("/echo/"), 11); wp.back().router.add_route(StringRef::from_lit("/echo/foxtrot"), 12); CU_ASSERT(11 == match_downstream_addr_group( - router, wp, StringRef::from_lit("git.nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit("git.nghttp2.org"), StringRef::from_lit("/echo"), groups, 255)); CU_ASSERT(10 == match_downstream_addr_group( - router, wp, StringRef::from_lit("0git.nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit("0git.nghttp2.org"), StringRef::from_lit("/echo"), groups, 255)); CU_ASSERT(11 == match_downstream_addr_group( - router, wp, StringRef::from_lit("it.nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit("it.nghttp2.org"), StringRef::from_lit("/echo"), groups, 255)); CU_ASSERT(255 == match_downstream_addr_group( - router, wp, StringRef::from_lit(".nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit(".nghttp2.org"), StringRef::from_lit("/echo/foxtrot"), groups, 255)); CU_ASSERT(9 == match_downstream_addr_group( - router, wp, StringRef::from_lit("alpha.nghttp2.org"), + router, wcrouter, wp, + StringRef::from_lit("alpha.nghttp2.org"), StringRef::from_lit("/golf"), groups, 255)); CU_ASSERT(0 == match_downstream_addr_group( - router, wp, StringRef::from_lit("nghttp2.org"), + router, wcrouter, wp, StringRef::from_lit("nghttp2.org"), StringRef::from_lit("/echo"), groups, 255)); }