From b5007d45f7d29ba25ff2489ff87dacb4ae7bcc10 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Wed, 10 May 2017 23:34:04 +0900 Subject: [PATCH] nghttpx: Wildcard path matching This commit adds wildcard path matching. If path pattern given in backend option ends with "*", it is considered as wildcard path. "*" must match at least one character. All paths which include wildcard path without last "*" as prefix, and are strictly longer than wildcard path without last "*" are matched. --- src/shrpx-unittest.cc | 2 + src/shrpx.cc | 12 ++++++ src/shrpx_config.cc | 19 +++++++-- src/shrpx_router.cc | 88 ++++++++++++++++++++++++++++------------ src/shrpx_router.h | 18 ++++++-- src/shrpx_router_test.cc | 54 ++++++++++++++++++++++++ src/shrpx_router_test.h | 1 + 7 files changed, 161 insertions(+), 33 deletions(-) diff --git a/src/shrpx-unittest.cc b/src/shrpx-unittest.cc index 4d816890..da427740 100644 --- a/src/shrpx-unittest.cc +++ b/src/shrpx-unittest.cc @@ -130,6 +130,8 @@ int main(int argc, char *argv[]) { !CU_add_test(pSuite, "http_create_via_header_value", shrpx::test_shrpx_http_create_via_header_value) || !CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) || + !CU_add_test(pSuite, "router_match_wildcard", + shrpx::test_shrpx_router_match_wildcard) || !CU_add_test(pSuite, "router_match_prefix", shrpx::test_shrpx_router_match_prefix) || !CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) || diff --git a/src/shrpx.cc b/src/shrpx.cc index 385b22c0..2de63a52 100644 --- a/src/shrpx.cc +++ b/src/shrpx.cc @@ -1688,6 +1688,18 @@ Connections: match against "nghttp2.org". The exact hosts match takes precedence over the wildcard hosts match. + If path part ends with "*", it is treated as wildcard + path. The wildcard path behaves differently from the + normal path. For normal path, match is made around the + boundary of path component separator,"/". On the other + hand, the wildcard path does not take into account the + path component separator. All paths which include the + wildcard path without last "*" as prefix, and are + strictly longer than wildcard path without last "*" are + matched. "*" must match at least one character. For + example, the pattern "/foo*" matches "/foo/" and + "/foobar". But it does not match "/foo", or "/fo". + If is omitted or empty string, "/" is used as pattern, which matches all request paths (catch-all pattern). The catch-all backend must be given. diff --git a/src/shrpx_config.cc b/src/shrpx_config.cc index b244ab5d..c33f8c51 100644 --- a/src/shrpx_config.cc +++ b/src/shrpx_config.cc @@ -967,6 +967,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr, auto host = StringRef{std::begin(g.pattern) + 1, path_first}; auto path = StringRef{path_first, std::end(g.pattern)}; + auto path_is_wildcard = false; + if (path[path.size() - 1] == '*') { + path = StringRef{std::begin(path), std::begin(path) + path.size() - 1}; + path_is_wildcard = true; + } + auto it = std::find_if( std::begin(wildcard_patterns), std::end(wildcard_patterns), [&host](const WildcardPattern &wp) { return wp.host == host; }); @@ -975,7 +981,7 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr, wildcard_patterns.emplace_back(host); auto &router = wildcard_patterns.back().router; - router.add_route(path, idx); + router.add_route(path, idx, path_is_wildcard); auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1); auto p = iov.base; @@ -985,13 +991,20 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr, rw_router.add_route(rev_host, wildcard_patterns.size() - 1); } else { - (*it).router.add_route(path, idx); + (*it).router.add_route(path, idx, path_is_wildcard); } continue; } - router.add_route(g.pattern, idx); + auto path_is_wildcard = false; + if (pattern[pattern.size() - 1] == '*') { + pattern = StringRef{std::begin(pattern), + std::begin(pattern) + pattern.size() - 1}; + path_is_wildcard = true; + } + + router.add_route(pattern, idx, path_is_wildcard); } return 0; } diff --git a/src/shrpx_router.cc b/src/shrpx_router.cc index 92531f87..82547c20 100644 --- a/src/shrpx_router.cc +++ b/src/shrpx_router.cc @@ -31,10 +31,10 @@ namespace shrpx { -RNode::RNode() : s(nullptr), len(0), index(-1) {} +RNode::RNode() : s(nullptr), len(0), index(-1), wildcard_index(-1) {} -RNode::RNode(const char *s, size_t len, size_t index) - : s(s), len(len), index(index) {} +RNode::RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index) + : s(s), len(len), index(index), wildcard_index(wildcard_index) {} Router::Router() : balloc_(1024, 1024), root_{} {} @@ -64,21 +64,30 @@ void add_next_node(RNode *node, std::unique_ptr new_node) { } // namespace void Router::add_node(RNode *node, const char *pattern, size_t patlen, - size_t index) { + ssize_t index, ssize_t wildcard_index) { auto pat = make_string_ref(balloc_, StringRef{pattern, patlen}); - auto new_node = make_unique(pat.c_str(), pat.size(), index); + auto new_node = + make_unique(pat.c_str(), pat.size(), index, wildcard_index); add_next_node(node, std::move(new_node)); } -size_t Router::add_route(const StringRef &pattern, size_t index) { +size_t Router::add_route(const StringRef &pattern, size_t idx, bool wildcard) { + ssize_t index = -1, wildcard_index = -1; + if (wildcard) { + wildcard_index = idx; + } else { + index = idx; + } + auto node = &root_; size_t i = 0; for (;;) { auto next_node = find_next_node(node, pattern[i]); if (next_node == nullptr) { - add_node(node, pattern.c_str() + i, pattern.size() - i, index); - return index; + add_node(node, pattern.c_str() + i, pattern.size() - i, index, + wildcard_index); + return idx; } node = next_node; @@ -93,12 +102,22 @@ size_t Router::add_route(const StringRef &pattern, size_t index) { // The common prefix was matched if (slen == node->len) { // Complete match - if (node->index != -1) { - // Return the existing index for duplicates. - return node->index; + if (index != -1) { + if (node->index != -1) { + // Return the existing index for duplicates. + return node->index; + } + node->index = index; + return idx; } - node->index = index; - return index; + + assert(wildcard_index != -1); + + if (node->wildcard_index != -1) { + return node->wildcard_index; + } + node->wildcard_index = wildcard_index; + return idx; } if (slen > node->len) { @@ -112,27 +131,30 @@ size_t Router::add_route(const StringRef &pattern, size_t index) { if (node->len > j) { // node must be split into 2 nodes. new_node is now the child // of node. - auto new_node = - make_unique(&node->s[j], node->len - j, node->index); + auto new_node = make_unique(&node->s[j], node->len - j, + node->index, node->wildcard_index); std::swap(node->next, new_node->next); node->len = j; node->index = -1; + node->wildcard_index = -1; add_next_node(node, std::move(new_node)); if (slen == j) { node->index = index; - return index; + node->wildcard_index = wildcard_index; + return idx; } } i += j; assert(pattern.size() > i); - add_node(node, pattern.c_str() + i, pattern.size() - i, index); + add_node(node, pattern.c_str() + i, pattern.size() - i, index, + wildcard_index); - return index; + return idx; } } @@ -169,8 +191,10 @@ const RNode *match_complete(size_t *offset, const RNode *node, } // namespace namespace { -const RNode *match_partial(const RNode *node, size_t offset, const char *first, - const char *last) { +const RNode *match_partial(bool *pattern_is_wildcard, const RNode *node, + size_t offset, const char *first, const char *last) { + *pattern_is_wildcard = false; + if (first == last) { if (node->len == offset) { return node; @@ -207,8 +231,12 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first, return nullptr; } - if (node->index != -1 && node->s[node->len - 1] == '/') { + if (node->wildcard_index != -1) { found_node = node; + *pattern_is_wildcard = true; + } else if (node->index != -1 && node->s[node->len - 1] == '/') { + found_node = node; + *pattern_is_wildcard = false; } assert(node->len == offset + n); @@ -233,6 +261,7 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first, if (node->len == n) { // Complete match with this node if (node->index != -1) { + *pattern_is_wildcard = false; return node; } @@ -246,16 +275,21 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first, // pattern is "/foo/" and path is "/foo", we consider they // match. if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') { + *pattern_is_wildcard = false; return node; } return found_node; } - // This is the case when pattern which ends with "/" is included - // in query. - if (node->index != -1 && node->s[node->len - 1] == '/') { + if (node->wildcard_index != -1) { found_node = node; + *pattern_is_wildcard = true; + } else if (node->index != -1 && node->s[node->len - 1] == '/') { + // This is the case when pattern which ends with "/" is included + // in query. + found_node = node; + *pattern_is_wildcard = false; } assert(node->len == n); @@ -272,12 +306,14 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const { return -1; } - node = match_partial(node, offset, std::begin(path), std::end(path)); + bool pattern_is_wildcard; + node = match_partial(&pattern_is_wildcard, node, offset, std::begin(path), + std::end(path)); if (node == nullptr || node == &root_) { return -1; } - return node->index; + return pattern_is_wildcard ? node->wildcard_index : node->index; } ssize_t Router::match(const StringRef &s) const { diff --git a/src/shrpx_router.h b/src/shrpx_router.h index 8762cb33..295db7e6 100644 --- a/src/shrpx_router.h +++ b/src/shrpx_router.h @@ -38,7 +38,7 @@ namespace shrpx { struct RNode { RNode(); - RNode(const char *s, size_t len, size_t index); + RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index); RNode(RNode &&) = default; RNode(const RNode &) = delete; RNode &operator=(RNode &&) = default; @@ -54,6 +54,10 @@ struct RNode { // Index of pattern if match ends in this node. Note that we don't // store duplicated pattern. ssize_t index; + // Index of wildcard pattern if query includes this node as prefix + // and it still has suffix to match. Note that we don't store + // duplicated pattern. + ssize_t wildcard_index; }; class Router { @@ -66,8 +70,13 @@ public: Router &operator=(const Router &) = delete; // Adds route |pattern| with its |index|. If same pattern has - // already been added, the existing index is returned. - size_t add_route(const StringRef &pattern, size_t index); + // already been added, the existing index is returned. If + // |wildcard| is true, |pattern| is considered as wildcard pattern, + // and all paths which have the |pattern| as prefix and are strictly + // longer than |pattern| match. The wildcard pattern only works + // with match(const StringRef&, const StringRef&). + size_t add_route(const StringRef &pattern, size_t index, + bool wildcard = false); // Returns the matched index of pattern. -1 if there is no match. ssize_t match(const StringRef &host, const StringRef &path) const; // Returns the matched index of pattern |s|. -1 if there is no @@ -84,7 +93,8 @@ public: ssize_t match_prefix(size_t *nread, const RNode **last_node, const StringRef &s) const; - void add_node(RNode *node, const char *pattern, size_t patlen, size_t index); + void add_node(RNode *node, const char *pattern, size_t patlen, ssize_t index, + ssize_t wildcard_index); void dump() const; diff --git a/src/shrpx_router_test.cc b/src/shrpx_router_test.cc index 0248181c..d2858c18 100644 --- a/src/shrpx_router_test.cc +++ b/src/shrpx_router_test.cc @@ -33,6 +33,7 @@ namespace shrpx { struct Pattern { StringRef pattern; size_t idx; + bool wildcard; }; void test_shrpx_router_match(void) { @@ -88,6 +89,59 @@ void test_shrpx_router_match(void) { CU_ASSERT(5 == idx); } +void test_shrpx_router_match_wildcard(void) { + constexpr auto patterns = std::array{{ + {StringRef::from_lit("nghttp2.org/"), 0}, + {StringRef::from_lit("nghttp2.org/"), 1, true}, + {StringRef::from_lit("nghttp2.org/alpha/"), 2}, + {StringRef::from_lit("nghttp2.org/alpha/"), 3, true}, + {StringRef::from_lit("nghttp2.org/bravo"), 4}, + {StringRef::from_lit("nghttp2.org/bravo"), 5, true}, + }}; + + Router router; + + for (auto &p : patterns) { + router.add_route(p.pattern, p.idx, p.wildcard); + } + + CU_ASSERT(0 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/"))); + + CU_ASSERT(1 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/a"))); + + CU_ASSERT(1 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/charlie"))); + + CU_ASSERT(2 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/alpha"))); + + CU_ASSERT(2 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/alpha/"))); + + CU_ASSERT(3 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/alpha/b"))); + + CU_ASSERT(4 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/bravo"))); + + CU_ASSERT(5 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/bravocharlie"))); + + CU_ASSERT(5 == + router.match(StringRef::from_lit("nghttp2.org"), + StringRef::from_lit("/bravo/"))); +} + void test_shrpx_router_match_prefix(void) { auto patterns = std::vector{ {StringRef::from_lit("gro.2ptthgn."), 0}, diff --git a/src/shrpx_router_test.h b/src/shrpx_router_test.h index 9f4ba665..03b49d25 100644 --- a/src/shrpx_router_test.h +++ b/src/shrpx_router_test.h @@ -32,6 +32,7 @@ namespace shrpx { void test_shrpx_router_match(void); +void test_shrpx_router_match_wildcard(void); void test_shrpx_router_match_prefix(void); } // namespace shrpx