nghttpx: Rewrite wildcard router

This commit is contained in:
Tatsuhiro Tsujikawa 2016-06-10 23:13:40 +09:00
parent 11e66510e4
commit 288449b9bc
8 changed files with 158 additions and 89 deletions

View File

@ -910,20 +910,22 @@ ClientHandler::get_downstream_connection(Downstream *downstream) {
group_idx = catch_all;
} else {
auto &router = downstreamconf.router;
auto &rev_wildcard_router = downstreamconf.rev_wildcard_router;
auto &wildcard_patterns = downstreamconf.wildcard_patterns;
if (!req.authority.empty()) {
group_idx =
match_downstream_addr_group(router, wildcard_patterns, req.authority,
req.path, groups, catch_all);
group_idx = match_downstream_addr_group(router, rev_wildcard_router,
wildcard_patterns, req.authority,
req.path, groups, catch_all);
} else {
auto h = req.fs.header(http2::HD_HOST);
if (h) {
group_idx = match_downstream_addr_group(
router, wildcard_patterns, h->value, req.path, groups, catch_all);
group_idx = match_downstream_addr_group(router, rev_wildcard_router,
wildcard_patterns, h->value,
req.path, groups, catch_all);
} else {
group_idx =
match_downstream_addr_group(router, wildcard_patterns, StringRef{},
req.path, groups, catch_all);
group_idx = match_downstream_addr_group(router, rev_wildcard_router,
wildcard_patterns, StringRef{},
req.path, groups, catch_all);
}
}
}

View File

@ -828,6 +828,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig addr,
auto &router = wildcard_patterns.back().router;
router.add_route(path, idx);
auto rev_host = host.str();
std::reverse(std::begin(rev_host), std::end(rev_host));
downstreamconf.rev_wildcard_router.add_route(
StringRef{rev_host}, wildcard_patterns.size() - 1);
} else {
(*it).router.add_route(path, idx);
}
@ -2831,21 +2837,6 @@ int configure_downstream_group(Config *config, bool http2_proxy,
router = Router();
router.add_route(StringRef{catch_all.pattern}, addr_groups.size());
addr_groups.push_back(std::move(catch_all));
} else {
auto &wildcard_patterns = downstreamconf.wildcard_patterns;
std::sort(std::begin(wildcard_patterns), std::end(wildcard_patterns),
[](const WildcardPattern &lhs, const WildcardPattern &rhs) {
return std::lexicographical_compare(
rhs.host.rbegin(), rhs.host.rend(), lhs.host.rbegin(),
lhs.host.rend());
});
if (LOG_ENABLED(INFO)) {
LOG(INFO) << "Reverse sorted wildcard hosts (compared from tail to head, "
"and sorted in reverse order):";
for (auto &wp : wildcard_patterns) {
LOG(INFO) << wp.host;
}
}
}
// backward compatibility: override all SNI fields with the option

View File

@ -599,7 +599,7 @@ struct RateLimitConfig {
};
// Wildcard host pattern routing. We strips left most '*' from host
// field. router includes all path pattern sharing same wildcard
// field. router includes all path patterns sharing the same wildcard
// host.
struct WildcardPattern {
WildcardPattern(const StringRef &host)
@ -616,6 +616,12 @@ struct DownstreamConfig {
ev_tstamp idle_read;
} timeout;
Router router;
// Router for reversed wildcard hosts. Since this router has
// wildcard hosts reversed without '*', one should call match()
// function with reversed host stripping last character. This is
// because we require at least one character must match for '*'.
// The index stored in this router is index of wildcard_patterns.
Router rev_wildcard_router;
std::vector<WildcardPattern> wildcard_patterns;
std::vector<DownstreamAddrGroupConfig> addr_groups;
// The index of catch-all group in downstream_addr_groups.

View File

@ -279,6 +279,57 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const {
return node->index;
}
namespace {
const RNode *match_prefix(const RNode *node, const char *first,
const char *last) {
if (first == last) {
return nullptr;
}
auto p = first;
const RNode *ans = nullptr;
for (;;) {
auto next_node = find_next_node(node, *p);
if (next_node == nullptr) {
return ans;
}
node = next_node;
auto n = std::min(node->len, static_cast<size_t>(last - p));
if (memcmp(node->s, p, n) != 0) {
return ans;
}
p += n;
if (p != last) {
if (node->index != -1) {
ans = node;
}
continue;
}
if (node->len == n) {
return node;
}
return ans;
}
}
} // namespace
ssize_t Router::match_prefix(const StringRef &s) const {
auto node = ::shrpx::match_prefix(&root_, std::begin(s), std::end(s));
if (node == nullptr) {
return -1;
}
return node->index;
}
namespace {
void dump_node(const RNode *node, int depth) {
fprintf(stderr, "%*ss='%.*s', len=%zu, index=%zd\n", depth, "",

View File

@ -67,6 +67,9 @@ public:
bool add_route(const StringRef &pattern, size_t index);
// Returns the matched index of pattern. -1 if there is no match.
ssize_t match(const StringRef &host, const StringRef &path) const;
// Returns the matched index of pattern if a pattern is a suffix of
// |s|, otherwise -1.
ssize_t match_prefix(const StringRef &s) const;
void add_node(RNode *node, const char *pattern, size_t patlen, size_t index);

View File

@ -456,7 +456,8 @@ ConnectionHandler *Worker::get_connection_handler() const {
namespace {
size_t match_downstream_addr_group_host(
const Router &router, const std::vector<WildcardPattern> &wildcard_patterns,
const Router &router, const Router &rev_wildcard_router,
const std::vector<WildcardPattern> &wildcard_patterns,
const StringRef &host, const StringRef &path,
const std::vector<std::shared_ptr<DownstreamAddrGroup>> &groups,
size_t catch_all) {
@ -486,23 +487,24 @@ size_t match_downstream_addr_group_host(
return group;
}
for (auto it = std::begin(wildcard_patterns);
it != std::end(wildcard_patterns); ++it) {
/* left most '*' must match at least one character */
if (host.size() <= (*it).host.size() ||
!util::ends_with(std::begin(host), std::end(host),
std::begin((*it).host), std::end((*it).host))) {
continue;
}
auto group = (*it).router.match(StringRef{}, path);
if (group != -1) {
// We sorted wildcard_patterns in a way that first match is the
// longest host pattern.
if (LOG_ENABLED(INFO)) {
LOG(INFO) << "Found wildcard pattern with query " << host << path
<< ", matched pattern=" << groups[group]->pattern;
if (!wildcard_patterns.empty() && !host.empty()) {
auto rev_host = std::string{std::begin(host) + 1, std::end(host)};
std::reverse(std::begin(rev_host), std::end(rev_host));
auto wcidx = rev_wildcard_router.match_prefix(
StringRef{std::begin(rev_host), std::end(rev_host)});
if (wcidx != -1) {
auto &wc = wildcard_patterns[wcidx];
auto group = wc.router.match(StringRef{}, path);
if (group != -1) {
// We sorted wildcard_patterns in a way that first match is the
// longest host pattern.
if (LOG_ENABLED(INFO)) {
LOG(INFO) << "Found wildcard pattern with query " << host << path
<< ", matched pattern=" << groups[group]->pattern;
}
return group;
}
return group;
}
}
@ -523,7 +525,8 @@ size_t match_downstream_addr_group_host(
} // namespace
size_t match_downstream_addr_group(
const Router &router, const std::vector<WildcardPattern> &wildcard_patterns,
const Router &router, const Router &rev_wildcard_router,
const std::vector<WildcardPattern> &wildcard_patterns,
const StringRef &hostport, const StringRef &raw_path,
const std::vector<std::shared_ptr<DownstreamAddrGroup>> &groups,
size_t catch_all) {
@ -539,8 +542,9 @@ size_t match_downstream_addr_group(
auto path = StringRef{std::begin(raw_path), query};
if (hostport.empty()) {
return match_downstream_addr_group_host(router, wildcard_patterns, hostport,
path, groups, catch_all);
return match_downstream_addr_group_host(router, rev_wildcard_router,
wildcard_patterns, hostport, path,
groups, catch_all);
}
StringRef host;
@ -570,8 +574,9 @@ size_t match_downstream_addr_group(
util::inp_strlower(low_host);
host = StringRef{low_host};
}
return match_downstream_addr_group_host(router, wildcard_patterns, host, path,
groups, catch_all);
return match_downstream_addr_group_host(router, rev_wildcard_router,
wildcard_patterns, host, path, groups,
catch_all);
}
void downstream_failure(DownstreamAddr *addr) {

View File

@ -280,7 +280,8 @@ private:
// group. The catch-all group index is given in |catch_all|. All
// patterns are given in |groups|.
size_t match_downstream_addr_group(
const Router &router, const std::vector<WildcardPattern> &wildcard_patterns,
const Router &router, const Router &rev_wildcard_router,
const std::vector<WildcardPattern> &wildcard_patterns,
const StringRef &hostport, const StringRef &path,
const std::vector<std::shared_ptr<DownstreamAddrGroup>> &groups,
size_t catch_all);

View File

@ -50,6 +50,7 @@ void test_shrpx_worker_match_downstream_addr_group(void) {
}
Router router;
Router wcrouter;
for (size_t i = 0; i < groups.size(); ++i) {
auto &g = groups[i];
@ -59,122 +60,124 @@ void test_shrpx_worker_match_downstream_addr_group(void) {
std::vector<WildcardPattern> wp;
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/"), groups, 255));
// port is removed
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org:8080"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(0 ==
match_downstream_addr_group(router, wcrouter, wp,
StringRef::from_lit("nghttp2.org:8080"),
StringRef::from_lit("/"), groups, 255));
// host is case-insensitive
CU_ASSERT(4 == match_downstream_addr_group(
router, wp, StringRef::from_lit("WWW.nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit("WWW.nghttp2.org"),
StringRef::from_lit("/alpha"), groups, 255));
CU_ASSERT(1 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/bravo/"), groups, 255));
// /alpha/bravo also matches /alpha/bravo/
CU_ASSERT(1 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/bravo"), groups, 255));
// path part is case-sensitive
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/Alpha/bravo"), groups, 255));
CU_ASSERT(1 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/bravo/charlie"), groups, 255));
CU_ASSERT(2 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/charlie"), groups, 255));
// pattern which does not end with '/' must match its entirely. So
// this matches to group 0, not group 2.
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/charlie/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("example.org"),
router, wcrouter, wp, StringRef::from_lit("example.org"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 ==
match_downstream_addr_group(router, wp, StringRef::from_lit(""),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wcrouter, wp, StringRef::from_lit(""),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit(""),
router, wcrouter, wp, StringRef::from_lit(""),
StringRef::from_lit("alpha"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("foo/bar"),
router, wcrouter, wp, StringRef::from_lit("foo/bar"),
StringRef::from_lit("/"), groups, 255));
// If path is StringRef::from_lit("*", only match with host + "/").
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("*"), groups, 255));
CU_ASSERT(
5 == match_downstream_addr_group(router, wp, StringRef::from_lit("[::1]"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(5 == match_downstream_addr_group(
router, wp, StringRef::from_lit("[::1]:8080"),
router, wcrouter, wp, StringRef::from_lit("[::1]"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(5 == match_downstream_addr_group(
router, wcrouter, wp, StringRef::from_lit("[::1]:8080"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 ==
match_downstream_addr_group(router, wp, StringRef::from_lit("[::1"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("[::1]8000"),
router, wcrouter, wp, StringRef::from_lit("[::1"),
StringRef::from_lit("/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wcrouter, wp, StringRef::from_lit("[::1]8000"),
StringRef::from_lit("/"), groups, 255));
// Check the case where adding route extends tree
CU_ASSERT(6 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/bravo/delta"), groups, 255));
CU_ASSERT(1 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/bravo/delta/"), groups, 255));
// Check the case where query is done in a single node
CU_ASSERT(7 == match_downstream_addr_group(
router, wp, StringRef::from_lit("example.com"),
router, wcrouter, wp, StringRef::from_lit("example.com"),
StringRef::from_lit("/alpha/bravo"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("example.com"),
router, wcrouter, wp, StringRef::from_lit("example.com"),
StringRef::from_lit("/alpha/bravo/"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("example.com"),
router, wcrouter, wp, StringRef::from_lit("example.com"),
StringRef::from_lit("/alpha"), groups, 255));
// Check the case where quey is done in a single node
CU_ASSERT(8 == match_downstream_addr_group(
router, wp, StringRef::from_lit("192.168.0.1"),
router, wcrouter, wp, StringRef::from_lit("192.168.0.1"),
StringRef::from_lit("/alpha"), groups, 255));
CU_ASSERT(8 == match_downstream_addr_group(
router, wp, StringRef::from_lit("192.168.0.1"),
router, wcrouter, wp, StringRef::from_lit("192.168.0.1"),
StringRef::from_lit("/alpha/"), groups, 255));
CU_ASSERT(8 == match_downstream_addr_group(
router, wp, StringRef::from_lit("192.168.0.1"),
router, wcrouter, wp, StringRef::from_lit("192.168.0.1"),
StringRef::from_lit("/alpha/bravo"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("192.168.0.1"),
router, wcrouter, wp, StringRef::from_lit("192.168.0.1"),
StringRef::from_lit("/alph"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit("192.168.0.1"),
router, wcrouter, wp, StringRef::from_lit("192.168.0.1"),
StringRef::from_lit("/"), groups, 255));
// Test for wildcard hosts
@ -187,34 +190,41 @@ void test_shrpx_worker_match_downstream_addr_group(void) {
groups.push_back(std::move(g2));
wp.emplace_back(StringRef::from_lit("git.nghttp2.org"));
wcrouter.add_route(StringRef::from_lit("gro.2ptthgn.tig"), 0);
wp.back().router.add_route(StringRef::from_lit("/echo/"), 10);
wp.emplace_back(StringRef::from_lit(".nghttp2.org"));
wcrouter.add_route(StringRef::from_lit("gro.2ptthgn."), 1);
wp.back().router.add_route(StringRef::from_lit("/echo/"), 11);
wp.back().router.add_route(StringRef::from_lit("/echo/foxtrot"), 12);
CU_ASSERT(11 == match_downstream_addr_group(
router, wp, StringRef::from_lit("git.nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit("git.nghttp2.org"),
StringRef::from_lit("/echo"), groups, 255));
CU_ASSERT(10 == match_downstream_addr_group(
router, wp, StringRef::from_lit("0git.nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit("0git.nghttp2.org"),
StringRef::from_lit("/echo"), groups, 255));
CU_ASSERT(11 == match_downstream_addr_group(
router, wp, StringRef::from_lit("it.nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit("it.nghttp2.org"),
StringRef::from_lit("/echo"), groups, 255));
CU_ASSERT(255 == match_downstream_addr_group(
router, wp, StringRef::from_lit(".nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit(".nghttp2.org"),
StringRef::from_lit("/echo/foxtrot"), groups, 255));
CU_ASSERT(9 == match_downstream_addr_group(
router, wp, StringRef::from_lit("alpha.nghttp2.org"),
router, wcrouter, wp,
StringRef::from_lit("alpha.nghttp2.org"),
StringRef::from_lit("/golf"), groups, 255));
CU_ASSERT(0 == match_downstream_addr_group(
router, wp, StringRef::from_lit("nghttp2.org"),
router, wcrouter, wp, StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/echo"), groups, 255));
}