nghttpx: Wildcard path matching

This commit adds wildcard path matching.  If path pattern given in
backend option ends with "*", it is considered as wildcard path.  "*"
must match at least one character.  All paths which include wildcard
path without last "*" as prefix, and are strictly longer than wildcard
path without last "*" are matched.
This commit is contained in:
Tatsuhiro Tsujikawa 2017-05-10 23:34:04 +09:00
parent a584cf5a4f
commit b5007d45f7
7 changed files with 161 additions and 33 deletions

View File

@ -130,6 +130,8 @@ int main(int argc, char *argv[]) {
!CU_add_test(pSuite, "http_create_via_header_value", !CU_add_test(pSuite, "http_create_via_header_value",
shrpx::test_shrpx_http_create_via_header_value) || shrpx::test_shrpx_http_create_via_header_value) ||
!CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) || !CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) ||
!CU_add_test(pSuite, "router_match_wildcard",
shrpx::test_shrpx_router_match_wildcard) ||
!CU_add_test(pSuite, "router_match_prefix", !CU_add_test(pSuite, "router_match_prefix",
shrpx::test_shrpx_router_match_prefix) || shrpx::test_shrpx_router_match_prefix) ||
!CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) || !CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) ||

View File

@ -1688,6 +1688,18 @@ Connections:
match against "nghttp2.org". The exact hosts match match against "nghttp2.org". The exact hosts match
takes precedence over the wildcard hosts match. takes precedence over the wildcard hosts match.
If path part ends with "*", it is treated as wildcard
path. The wildcard path behaves differently from the
normal path. For normal path, match is made around the
boundary of path component separator,"/". On the other
hand, the wildcard path does not take into account the
path component separator. All paths which include the
wildcard path without last "*" as prefix, and are
strictly longer than wildcard path without last "*" are
matched. "*" must match at least one character. For
example, the pattern "/foo*" matches "/foo/" and
"/foobar". But it does not match "/foo", or "/fo".
If <PATTERN> is omitted or empty string, "/" is used as If <PATTERN> is omitted or empty string, "/" is used as
pattern, which matches all request paths (catch-all pattern, which matches all request paths (catch-all
pattern). The catch-all backend must be given. pattern). The catch-all backend must be given.

View File

@ -967,6 +967,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
auto host = StringRef{std::begin(g.pattern) + 1, path_first}; auto host = StringRef{std::begin(g.pattern) + 1, path_first};
auto path = StringRef{path_first, std::end(g.pattern)}; auto path = StringRef{path_first, std::end(g.pattern)};
auto path_is_wildcard = false;
if (path[path.size() - 1] == '*') {
path = StringRef{std::begin(path), std::begin(path) + path.size() - 1};
path_is_wildcard = true;
}
auto it = std::find_if( auto it = std::find_if(
std::begin(wildcard_patterns), std::end(wildcard_patterns), std::begin(wildcard_patterns), std::end(wildcard_patterns),
[&host](const WildcardPattern &wp) { return wp.host == host; }); [&host](const WildcardPattern &wp) { return wp.host == host; });
@ -975,7 +981,7 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
wildcard_patterns.emplace_back(host); wildcard_patterns.emplace_back(host);
auto &router = wildcard_patterns.back().router; auto &router = wildcard_patterns.back().router;
router.add_route(path, idx); router.add_route(path, idx, path_is_wildcard);
auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1); auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1);
auto p = iov.base; auto p = iov.base;
@ -985,13 +991,20 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
rw_router.add_route(rev_host, wildcard_patterns.size() - 1); rw_router.add_route(rev_host, wildcard_patterns.size() - 1);
} else { } else {
(*it).router.add_route(path, idx); (*it).router.add_route(path, idx, path_is_wildcard);
} }
continue; continue;
} }
router.add_route(g.pattern, idx); auto path_is_wildcard = false;
if (pattern[pattern.size() - 1] == '*') {
pattern = StringRef{std::begin(pattern),
std::begin(pattern) + pattern.size() - 1};
path_is_wildcard = true;
}
router.add_route(pattern, idx, path_is_wildcard);
} }
return 0; return 0;
} }

View File

@ -31,10 +31,10 @@
namespace shrpx { namespace shrpx {
RNode::RNode() : s(nullptr), len(0), index(-1) {} RNode::RNode() : s(nullptr), len(0), index(-1), wildcard_index(-1) {}
RNode::RNode(const char *s, size_t len, size_t index) RNode::RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index)
: s(s), len(len), index(index) {} : s(s), len(len), index(index), wildcard_index(wildcard_index) {}
Router::Router() : balloc_(1024, 1024), root_{} {} Router::Router() : balloc_(1024, 1024), root_{} {}
@ -64,21 +64,30 @@ void add_next_node(RNode *node, std::unique_ptr<RNode> new_node) {
} // namespace } // namespace
void Router::add_node(RNode *node, const char *pattern, size_t patlen, void Router::add_node(RNode *node, const char *pattern, size_t patlen,
size_t index) { ssize_t index, ssize_t wildcard_index) {
auto pat = make_string_ref(balloc_, StringRef{pattern, patlen}); auto pat = make_string_ref(balloc_, StringRef{pattern, patlen});
auto new_node = make_unique<RNode>(pat.c_str(), pat.size(), index); auto new_node =
make_unique<RNode>(pat.c_str(), pat.size(), index, wildcard_index);
add_next_node(node, std::move(new_node)); add_next_node(node, std::move(new_node));
} }
size_t Router::add_route(const StringRef &pattern, size_t index) { size_t Router::add_route(const StringRef &pattern, size_t idx, bool wildcard) {
ssize_t index = -1, wildcard_index = -1;
if (wildcard) {
wildcard_index = idx;
} else {
index = idx;
}
auto node = &root_; auto node = &root_;
size_t i = 0; size_t i = 0;
for (;;) { for (;;) {
auto next_node = find_next_node(node, pattern[i]); auto next_node = find_next_node(node, pattern[i]);
if (next_node == nullptr) { if (next_node == nullptr) {
add_node(node, pattern.c_str() + i, pattern.size() - i, index); add_node(node, pattern.c_str() + i, pattern.size() - i, index,
return index; wildcard_index);
return idx;
} }
node = next_node; node = next_node;
@ -93,12 +102,22 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
// The common prefix was matched // The common prefix was matched
if (slen == node->len) { if (slen == node->len) {
// Complete match // Complete match
if (node->index != -1) { if (index != -1) {
// Return the existing index for duplicates. if (node->index != -1) {
return node->index; // Return the existing index for duplicates.
return node->index;
}
node->index = index;
return idx;
} }
node->index = index;
return index; assert(wildcard_index != -1);
if (node->wildcard_index != -1) {
return node->wildcard_index;
}
node->wildcard_index = wildcard_index;
return idx;
} }
if (slen > node->len) { if (slen > node->len) {
@ -112,27 +131,30 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
if (node->len > j) { if (node->len > j) {
// node must be split into 2 nodes. new_node is now the child // node must be split into 2 nodes. new_node is now the child
// of node. // of node.
auto new_node = auto new_node = make_unique<RNode>(&node->s[j], node->len - j,
make_unique<RNode>(&node->s[j], node->len - j, node->index); node->index, node->wildcard_index);
std::swap(node->next, new_node->next); std::swap(node->next, new_node->next);
node->len = j; node->len = j;
node->index = -1; node->index = -1;
node->wildcard_index = -1;
add_next_node(node, std::move(new_node)); add_next_node(node, std::move(new_node));
if (slen == j) { if (slen == j) {
node->index = index; node->index = index;
return index; node->wildcard_index = wildcard_index;
return idx;
} }
} }
i += j; i += j;
assert(pattern.size() > i); assert(pattern.size() > i);
add_node(node, pattern.c_str() + i, pattern.size() - i, index); add_node(node, pattern.c_str() + i, pattern.size() - i, index,
wildcard_index);
return index; return idx;
} }
} }
@ -169,8 +191,10 @@ const RNode *match_complete(size_t *offset, const RNode *node,
} // namespace } // namespace
namespace { namespace {
const RNode *match_partial(const RNode *node, size_t offset, const char *first, const RNode *match_partial(bool *pattern_is_wildcard, const RNode *node,
const char *last) { size_t offset, const char *first, const char *last) {
*pattern_is_wildcard = false;
if (first == last) { if (first == last) {
if (node->len == offset) { if (node->len == offset) {
return node; return node;
@ -207,8 +231,12 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
return nullptr; return nullptr;
} }
if (node->index != -1 && node->s[node->len - 1] == '/') { if (node->wildcard_index != -1) {
found_node = node; found_node = node;
*pattern_is_wildcard = true;
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
found_node = node;
*pattern_is_wildcard = false;
} }
assert(node->len == offset + n); assert(node->len == offset + n);
@ -233,6 +261,7 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
if (node->len == n) { if (node->len == n) {
// Complete match with this node // Complete match with this node
if (node->index != -1) { if (node->index != -1) {
*pattern_is_wildcard = false;
return node; return node;
} }
@ -246,16 +275,21 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
// pattern is "/foo/" and path is "/foo", we consider they // pattern is "/foo/" and path is "/foo", we consider they
// match. // match.
if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') { if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') {
*pattern_is_wildcard = false;
return node; return node;
} }
return found_node; return found_node;
} }
// This is the case when pattern which ends with "/" is included if (node->wildcard_index != -1) {
// in query.
if (node->index != -1 && node->s[node->len - 1] == '/') {
found_node = node; found_node = node;
*pattern_is_wildcard = true;
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
// This is the case when pattern which ends with "/" is included
// in query.
found_node = node;
*pattern_is_wildcard = false;
} }
assert(node->len == n); assert(node->len == n);
@ -272,12 +306,14 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const {
return -1; return -1;
} }
node = match_partial(node, offset, std::begin(path), std::end(path)); bool pattern_is_wildcard;
node = match_partial(&pattern_is_wildcard, node, offset, std::begin(path),
std::end(path));
if (node == nullptr || node == &root_) { if (node == nullptr || node == &root_) {
return -1; return -1;
} }
return node->index; return pattern_is_wildcard ? node->wildcard_index : node->index;
} }
ssize_t Router::match(const StringRef &s) const { ssize_t Router::match(const StringRef &s) const {

View File

@ -38,7 +38,7 @@ namespace shrpx {
struct RNode { struct RNode {
RNode(); RNode();
RNode(const char *s, size_t len, size_t index); RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index);
RNode(RNode &&) = default; RNode(RNode &&) = default;
RNode(const RNode &) = delete; RNode(const RNode &) = delete;
RNode &operator=(RNode &&) = default; RNode &operator=(RNode &&) = default;
@ -54,6 +54,10 @@ struct RNode {
// Index of pattern if match ends in this node. Note that we don't // Index of pattern if match ends in this node. Note that we don't
// store duplicated pattern. // store duplicated pattern.
ssize_t index; ssize_t index;
// Index of wildcard pattern if query includes this node as prefix
// and it still has suffix to match. Note that we don't store
// duplicated pattern.
ssize_t wildcard_index;
}; };
class Router { class Router {
@ -66,8 +70,13 @@ public:
Router &operator=(const Router &) = delete; Router &operator=(const Router &) = delete;
// Adds route |pattern| with its |index|. If same pattern has // Adds route |pattern| with its |index|. If same pattern has
// already been added, the existing index is returned. // already been added, the existing index is returned. If
size_t add_route(const StringRef &pattern, size_t index); // |wildcard| is true, |pattern| is considered as wildcard pattern,
// and all paths which have the |pattern| as prefix and are strictly
// longer than |pattern| match. The wildcard pattern only works
// with match(const StringRef&, const StringRef&).
size_t add_route(const StringRef &pattern, size_t index,
bool wildcard = false);
// Returns the matched index of pattern. -1 if there is no match. // Returns the matched index of pattern. -1 if there is no match.
ssize_t match(const StringRef &host, const StringRef &path) const; ssize_t match(const StringRef &host, const StringRef &path) const;
// Returns the matched index of pattern |s|. -1 if there is no // Returns the matched index of pattern |s|. -1 if there is no
@ -84,7 +93,8 @@ public:
ssize_t match_prefix(size_t *nread, const RNode **last_node, ssize_t match_prefix(size_t *nread, const RNode **last_node,
const StringRef &s) const; const StringRef &s) const;
void add_node(RNode *node, const char *pattern, size_t patlen, size_t index); void add_node(RNode *node, const char *pattern, size_t patlen, ssize_t index,
ssize_t wildcard_index);
void dump() const; void dump() const;

View File

@ -33,6 +33,7 @@ namespace shrpx {
struct Pattern { struct Pattern {
StringRef pattern; StringRef pattern;
size_t idx; size_t idx;
bool wildcard;
}; };
void test_shrpx_router_match(void) { void test_shrpx_router_match(void) {
@ -88,6 +89,59 @@ void test_shrpx_router_match(void) {
CU_ASSERT(5 == idx); CU_ASSERT(5 == idx);
} }
void test_shrpx_router_match_wildcard(void) {
constexpr auto patterns = std::array<Pattern, 6>{{
{StringRef::from_lit("nghttp2.org/"), 0},
{StringRef::from_lit("nghttp2.org/"), 1, true},
{StringRef::from_lit("nghttp2.org/alpha/"), 2},
{StringRef::from_lit("nghttp2.org/alpha/"), 3, true},
{StringRef::from_lit("nghttp2.org/bravo"), 4},
{StringRef::from_lit("nghttp2.org/bravo"), 5, true},
}};
Router router;
for (auto &p : patterns) {
router.add_route(p.pattern, p.idx, p.wildcard);
}
CU_ASSERT(0 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/")));
CU_ASSERT(1 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/a")));
CU_ASSERT(1 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/charlie")));
CU_ASSERT(2 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha")));
CU_ASSERT(2 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/")));
CU_ASSERT(3 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/b")));
CU_ASSERT(4 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravo")));
CU_ASSERT(5 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravocharlie")));
CU_ASSERT(5 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravo/")));
}
void test_shrpx_router_match_prefix(void) { void test_shrpx_router_match_prefix(void) {
auto patterns = std::vector<Pattern>{ auto patterns = std::vector<Pattern>{
{StringRef::from_lit("gro.2ptthgn."), 0}, {StringRef::from_lit("gro.2ptthgn."), 0},

View File

@ -32,6 +32,7 @@
namespace shrpx { namespace shrpx {
void test_shrpx_router_match(void); void test_shrpx_router_match(void);
void test_shrpx_router_match_wildcard(void);
void test_shrpx_router_match_prefix(void); void test_shrpx_router_match_prefix(void);
} // namespace shrpx } // namespace shrpx