nghttpx: Wildcard path matching

This commit adds wildcard path matching.  If path pattern given in
backend option ends with "*", it is considered as wildcard path.  "*"
must match at least one character.  All paths which include wildcard
path without last "*" as prefix, and are strictly longer than wildcard
path without last "*" are matched.
This commit is contained in:
Tatsuhiro Tsujikawa 2017-05-10 23:34:04 +09:00
parent a584cf5a4f
commit b5007d45f7
7 changed files with 161 additions and 33 deletions

View File

@ -130,6 +130,8 @@ int main(int argc, char *argv[]) {
!CU_add_test(pSuite, "http_create_via_header_value",
shrpx::test_shrpx_http_create_via_header_value) ||
!CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) ||
!CU_add_test(pSuite, "router_match_wildcard",
shrpx::test_shrpx_router_match_wildcard) ||
!CU_add_test(pSuite, "router_match_prefix",
shrpx::test_shrpx_router_match_prefix) ||
!CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) ||

View File

@ -1688,6 +1688,18 @@ Connections:
match against "nghttp2.org". The exact hosts match
takes precedence over the wildcard hosts match.
If path part ends with "*", it is treated as wildcard
path. The wildcard path behaves differently from the
normal path. For normal path, match is made around the
boundary of path component separator,"/". On the other
hand, the wildcard path does not take into account the
path component separator. All paths which include the
wildcard path without last "*" as prefix, and are
strictly longer than wildcard path without last "*" are
matched. "*" must match at least one character. For
example, the pattern "/foo*" matches "/foo/" and
"/foobar". But it does not match "/foo", or "/fo".
If <PATTERN> is omitted or empty string, "/" is used as
pattern, which matches all request paths (catch-all
pattern). The catch-all backend must be given.

View File

@ -967,6 +967,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
auto host = StringRef{std::begin(g.pattern) + 1, path_first};
auto path = StringRef{path_first, std::end(g.pattern)};
auto path_is_wildcard = false;
if (path[path.size() - 1] == '*') {
path = StringRef{std::begin(path), std::begin(path) + path.size() - 1};
path_is_wildcard = true;
}
auto it = std::find_if(
std::begin(wildcard_patterns), std::end(wildcard_patterns),
[&host](const WildcardPattern &wp) { return wp.host == host; });
@ -975,7 +981,7 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
wildcard_patterns.emplace_back(host);
auto &router = wildcard_patterns.back().router;
router.add_route(path, idx);
router.add_route(path, idx, path_is_wildcard);
auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1);
auto p = iov.base;
@ -985,13 +991,20 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
rw_router.add_route(rev_host, wildcard_patterns.size() - 1);
} else {
(*it).router.add_route(path, idx);
(*it).router.add_route(path, idx, path_is_wildcard);
}
continue;
}
router.add_route(g.pattern, idx);
auto path_is_wildcard = false;
if (pattern[pattern.size() - 1] == '*') {
pattern = StringRef{std::begin(pattern),
std::begin(pattern) + pattern.size() - 1};
path_is_wildcard = true;
}
router.add_route(pattern, idx, path_is_wildcard);
}
return 0;
}

View File

@ -31,10 +31,10 @@
namespace shrpx {
RNode::RNode() : s(nullptr), len(0), index(-1) {}
RNode::RNode() : s(nullptr), len(0), index(-1), wildcard_index(-1) {}
RNode::RNode(const char *s, size_t len, size_t index)
: s(s), len(len), index(index) {}
RNode::RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index)
: s(s), len(len), index(index), wildcard_index(wildcard_index) {}
Router::Router() : balloc_(1024, 1024), root_{} {}
@ -64,21 +64,30 @@ void add_next_node(RNode *node, std::unique_ptr<RNode> new_node) {
} // namespace
void Router::add_node(RNode *node, const char *pattern, size_t patlen,
size_t index) {
ssize_t index, ssize_t wildcard_index) {
auto pat = make_string_ref(balloc_, StringRef{pattern, patlen});
auto new_node = make_unique<RNode>(pat.c_str(), pat.size(), index);
auto new_node =
make_unique<RNode>(pat.c_str(), pat.size(), index, wildcard_index);
add_next_node(node, std::move(new_node));
}
size_t Router::add_route(const StringRef &pattern, size_t index) {
size_t Router::add_route(const StringRef &pattern, size_t idx, bool wildcard) {
ssize_t index = -1, wildcard_index = -1;
if (wildcard) {
wildcard_index = idx;
} else {
index = idx;
}
auto node = &root_;
size_t i = 0;
for (;;) {
auto next_node = find_next_node(node, pattern[i]);
if (next_node == nullptr) {
add_node(node, pattern.c_str() + i, pattern.size() - i, index);
return index;
add_node(node, pattern.c_str() + i, pattern.size() - i, index,
wildcard_index);
return idx;
}
node = next_node;
@ -93,12 +102,22 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
// The common prefix was matched
if (slen == node->len) {
// Complete match
if (index != -1) {
if (node->index != -1) {
// Return the existing index for duplicates.
return node->index;
}
node->index = index;
return index;
return idx;
}
assert(wildcard_index != -1);
if (node->wildcard_index != -1) {
return node->wildcard_index;
}
node->wildcard_index = wildcard_index;
return idx;
}
if (slen > node->len) {
@ -112,27 +131,30 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
if (node->len > j) {
// node must be split into 2 nodes. new_node is now the child
// of node.
auto new_node =
make_unique<RNode>(&node->s[j], node->len - j, node->index);
auto new_node = make_unique<RNode>(&node->s[j], node->len - j,
node->index, node->wildcard_index);
std::swap(node->next, new_node->next);
node->len = j;
node->index = -1;
node->wildcard_index = -1;
add_next_node(node, std::move(new_node));
if (slen == j) {
node->index = index;
return index;
node->wildcard_index = wildcard_index;
return idx;
}
}
i += j;
assert(pattern.size() > i);
add_node(node, pattern.c_str() + i, pattern.size() - i, index);
add_node(node, pattern.c_str() + i, pattern.size() - i, index,
wildcard_index);
return index;
return idx;
}
}
@ -169,8 +191,10 @@ const RNode *match_complete(size_t *offset, const RNode *node,
} // namespace
namespace {
const RNode *match_partial(const RNode *node, size_t offset, const char *first,
const char *last) {
const RNode *match_partial(bool *pattern_is_wildcard, const RNode *node,
size_t offset, const char *first, const char *last) {
*pattern_is_wildcard = false;
if (first == last) {
if (node->len == offset) {
return node;
@ -207,8 +231,12 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
return nullptr;
}
if (node->index != -1 && node->s[node->len - 1] == '/') {
if (node->wildcard_index != -1) {
found_node = node;
*pattern_is_wildcard = true;
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
found_node = node;
*pattern_is_wildcard = false;
}
assert(node->len == offset + n);
@ -233,6 +261,7 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
if (node->len == n) {
// Complete match with this node
if (node->index != -1) {
*pattern_is_wildcard = false;
return node;
}
@ -246,16 +275,21 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
// pattern is "/foo/" and path is "/foo", we consider they
// match.
if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') {
*pattern_is_wildcard = false;
return node;
}
return found_node;
}
if (node->wildcard_index != -1) {
found_node = node;
*pattern_is_wildcard = true;
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
// This is the case when pattern which ends with "/" is included
// in query.
if (node->index != -1 && node->s[node->len - 1] == '/') {
found_node = node;
*pattern_is_wildcard = false;
}
assert(node->len == n);
@ -272,12 +306,14 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const {
return -1;
}
node = match_partial(node, offset, std::begin(path), std::end(path));
bool pattern_is_wildcard;
node = match_partial(&pattern_is_wildcard, node, offset, std::begin(path),
std::end(path));
if (node == nullptr || node == &root_) {
return -1;
}
return node->index;
return pattern_is_wildcard ? node->wildcard_index : node->index;
}
ssize_t Router::match(const StringRef &s) const {

View File

@ -38,7 +38,7 @@ namespace shrpx {
struct RNode {
RNode();
RNode(const char *s, size_t len, size_t index);
RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index);
RNode(RNode &&) = default;
RNode(const RNode &) = delete;
RNode &operator=(RNode &&) = default;
@ -54,6 +54,10 @@ struct RNode {
// Index of pattern if match ends in this node. Note that we don't
// store duplicated pattern.
ssize_t index;
// Index of wildcard pattern if query includes this node as prefix
// and it still has suffix to match. Note that we don't store
// duplicated pattern.
ssize_t wildcard_index;
};
class Router {
@ -66,8 +70,13 @@ public:
Router &operator=(const Router &) = delete;
// Adds route |pattern| with its |index|. If same pattern has
// already been added, the existing index is returned.
size_t add_route(const StringRef &pattern, size_t index);
// already been added, the existing index is returned. If
// |wildcard| is true, |pattern| is considered as wildcard pattern,
// and all paths which have the |pattern| as prefix and are strictly
// longer than |pattern| match. The wildcard pattern only works
// with match(const StringRef&, const StringRef&).
size_t add_route(const StringRef &pattern, size_t index,
bool wildcard = false);
// Returns the matched index of pattern. -1 if there is no match.
ssize_t match(const StringRef &host, const StringRef &path) const;
// Returns the matched index of pattern |s|. -1 if there is no
@ -84,7 +93,8 @@ public:
ssize_t match_prefix(size_t *nread, const RNode **last_node,
const StringRef &s) const;
void add_node(RNode *node, const char *pattern, size_t patlen, size_t index);
void add_node(RNode *node, const char *pattern, size_t patlen, ssize_t index,
ssize_t wildcard_index);
void dump() const;

View File

@ -33,6 +33,7 @@ namespace shrpx {
struct Pattern {
StringRef pattern;
size_t idx;
bool wildcard;
};
void test_shrpx_router_match(void) {
@ -88,6 +89,59 @@ void test_shrpx_router_match(void) {
CU_ASSERT(5 == idx);
}
void test_shrpx_router_match_wildcard(void) {
constexpr auto patterns = std::array<Pattern, 6>{{
{StringRef::from_lit("nghttp2.org/"), 0},
{StringRef::from_lit("nghttp2.org/"), 1, true},
{StringRef::from_lit("nghttp2.org/alpha/"), 2},
{StringRef::from_lit("nghttp2.org/alpha/"), 3, true},
{StringRef::from_lit("nghttp2.org/bravo"), 4},
{StringRef::from_lit("nghttp2.org/bravo"), 5, true},
}};
Router router;
for (auto &p : patterns) {
router.add_route(p.pattern, p.idx, p.wildcard);
}
CU_ASSERT(0 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/")));
CU_ASSERT(1 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/a")));
CU_ASSERT(1 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/charlie")));
CU_ASSERT(2 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha")));
CU_ASSERT(2 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/")));
CU_ASSERT(3 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/alpha/b")));
CU_ASSERT(4 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravo")));
CU_ASSERT(5 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravocharlie")));
CU_ASSERT(5 ==
router.match(StringRef::from_lit("nghttp2.org"),
StringRef::from_lit("/bravo/")));
}
void test_shrpx_router_match_prefix(void) {
auto patterns = std::vector<Pattern>{
{StringRef::from_lit("gro.2ptthgn."), 0},

View File

@ -32,6 +32,7 @@
namespace shrpx {
void test_shrpx_router_match(void);
void test_shrpx_router_match_wildcard(void);
void test_shrpx_router_match_prefix(void);
} // namespace shrpx