nghttpx: Wildcard path matching
This commit adds wildcard path matching. If path pattern given in backend option ends with "*", it is considered as wildcard path. "*" must match at least one character. All paths which include wildcard path without last "*" as prefix, and are strictly longer than wildcard path without last "*" are matched.
This commit is contained in:
parent
a584cf5a4f
commit
b5007d45f7
|
@ -130,6 +130,8 @@ int main(int argc, char *argv[]) {
|
||||||
!CU_add_test(pSuite, "http_create_via_header_value",
|
!CU_add_test(pSuite, "http_create_via_header_value",
|
||||||
shrpx::test_shrpx_http_create_via_header_value) ||
|
shrpx::test_shrpx_http_create_via_header_value) ||
|
||||||
!CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) ||
|
!CU_add_test(pSuite, "router_match", shrpx::test_shrpx_router_match) ||
|
||||||
|
!CU_add_test(pSuite, "router_match_wildcard",
|
||||||
|
shrpx::test_shrpx_router_match_wildcard) ||
|
||||||
!CU_add_test(pSuite, "router_match_prefix",
|
!CU_add_test(pSuite, "router_match_prefix",
|
||||||
shrpx::test_shrpx_router_match_prefix) ||
|
shrpx::test_shrpx_router_match_prefix) ||
|
||||||
!CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) ||
|
!CU_add_test(pSuite, "util_streq", shrpx::test_util_streq) ||
|
||||||
|
|
12
src/shrpx.cc
12
src/shrpx.cc
|
@ -1688,6 +1688,18 @@ Connections:
|
||||||
match against "nghttp2.org". The exact hosts match
|
match against "nghttp2.org". The exact hosts match
|
||||||
takes precedence over the wildcard hosts match.
|
takes precedence over the wildcard hosts match.
|
||||||
|
|
||||||
|
If path part ends with "*", it is treated as wildcard
|
||||||
|
path. The wildcard path behaves differently from the
|
||||||
|
normal path. For normal path, match is made around the
|
||||||
|
boundary of path component separator,"/". On the other
|
||||||
|
hand, the wildcard path does not take into account the
|
||||||
|
path component separator. All paths which include the
|
||||||
|
wildcard path without last "*" as prefix, and are
|
||||||
|
strictly longer than wildcard path without last "*" are
|
||||||
|
matched. "*" must match at least one character. For
|
||||||
|
example, the pattern "/foo*" matches "/foo/" and
|
||||||
|
"/foobar". But it does not match "/foo", or "/fo".
|
||||||
|
|
||||||
If <PATTERN> is omitted or empty string, "/" is used as
|
If <PATTERN> is omitted or empty string, "/" is used as
|
||||||
pattern, which matches all request paths (catch-all
|
pattern, which matches all request paths (catch-all
|
||||||
pattern). The catch-all backend must be given.
|
pattern). The catch-all backend must be given.
|
||||||
|
|
|
@ -967,6 +967,12 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
|
||||||
auto host = StringRef{std::begin(g.pattern) + 1, path_first};
|
auto host = StringRef{std::begin(g.pattern) + 1, path_first};
|
||||||
auto path = StringRef{path_first, std::end(g.pattern)};
|
auto path = StringRef{path_first, std::end(g.pattern)};
|
||||||
|
|
||||||
|
auto path_is_wildcard = false;
|
||||||
|
if (path[path.size() - 1] == '*') {
|
||||||
|
path = StringRef{std::begin(path), std::begin(path) + path.size() - 1};
|
||||||
|
path_is_wildcard = true;
|
||||||
|
}
|
||||||
|
|
||||||
auto it = std::find_if(
|
auto it = std::find_if(
|
||||||
std::begin(wildcard_patterns), std::end(wildcard_patterns),
|
std::begin(wildcard_patterns), std::end(wildcard_patterns),
|
||||||
[&host](const WildcardPattern &wp) { return wp.host == host; });
|
[&host](const WildcardPattern &wp) { return wp.host == host; });
|
||||||
|
@ -975,7 +981,7 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
|
||||||
wildcard_patterns.emplace_back(host);
|
wildcard_patterns.emplace_back(host);
|
||||||
|
|
||||||
auto &router = wildcard_patterns.back().router;
|
auto &router = wildcard_patterns.back().router;
|
||||||
router.add_route(path, idx);
|
router.add_route(path, idx, path_is_wildcard);
|
||||||
|
|
||||||
auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1);
|
auto iov = make_byte_ref(downstreamconf.balloc, host.size() + 1);
|
||||||
auto p = iov.base;
|
auto p = iov.base;
|
||||||
|
@ -985,13 +991,20 @@ int parse_mapping(Config *config, DownstreamAddrConfig &addr,
|
||||||
|
|
||||||
rw_router.add_route(rev_host, wildcard_patterns.size() - 1);
|
rw_router.add_route(rev_host, wildcard_patterns.size() - 1);
|
||||||
} else {
|
} else {
|
||||||
(*it).router.add_route(path, idx);
|
(*it).router.add_route(path, idx, path_is_wildcard);
|
||||||
}
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
router.add_route(g.pattern, idx);
|
auto path_is_wildcard = false;
|
||||||
|
if (pattern[pattern.size() - 1] == '*') {
|
||||||
|
pattern = StringRef{std::begin(pattern),
|
||||||
|
std::begin(pattern) + pattern.size() - 1};
|
||||||
|
path_is_wildcard = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
router.add_route(pattern, idx, path_is_wildcard);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,10 +31,10 @@
|
||||||
|
|
||||||
namespace shrpx {
|
namespace shrpx {
|
||||||
|
|
||||||
RNode::RNode() : s(nullptr), len(0), index(-1) {}
|
RNode::RNode() : s(nullptr), len(0), index(-1), wildcard_index(-1) {}
|
||||||
|
|
||||||
RNode::RNode(const char *s, size_t len, size_t index)
|
RNode::RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index)
|
||||||
: s(s), len(len), index(index) {}
|
: s(s), len(len), index(index), wildcard_index(wildcard_index) {}
|
||||||
|
|
||||||
Router::Router() : balloc_(1024, 1024), root_{} {}
|
Router::Router() : balloc_(1024, 1024), root_{} {}
|
||||||
|
|
||||||
|
@ -64,21 +64,30 @@ void add_next_node(RNode *node, std::unique_ptr<RNode> new_node) {
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void Router::add_node(RNode *node, const char *pattern, size_t patlen,
|
void Router::add_node(RNode *node, const char *pattern, size_t patlen,
|
||||||
size_t index) {
|
ssize_t index, ssize_t wildcard_index) {
|
||||||
auto pat = make_string_ref(balloc_, StringRef{pattern, patlen});
|
auto pat = make_string_ref(balloc_, StringRef{pattern, patlen});
|
||||||
auto new_node = make_unique<RNode>(pat.c_str(), pat.size(), index);
|
auto new_node =
|
||||||
|
make_unique<RNode>(pat.c_str(), pat.size(), index, wildcard_index);
|
||||||
add_next_node(node, std::move(new_node));
|
add_next_node(node, std::move(new_node));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Router::add_route(const StringRef &pattern, size_t index) {
|
size_t Router::add_route(const StringRef &pattern, size_t idx, bool wildcard) {
|
||||||
|
ssize_t index = -1, wildcard_index = -1;
|
||||||
|
if (wildcard) {
|
||||||
|
wildcard_index = idx;
|
||||||
|
} else {
|
||||||
|
index = idx;
|
||||||
|
}
|
||||||
|
|
||||||
auto node = &root_;
|
auto node = &root_;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
auto next_node = find_next_node(node, pattern[i]);
|
auto next_node = find_next_node(node, pattern[i]);
|
||||||
if (next_node == nullptr) {
|
if (next_node == nullptr) {
|
||||||
add_node(node, pattern.c_str() + i, pattern.size() - i, index);
|
add_node(node, pattern.c_str() + i, pattern.size() - i, index,
|
||||||
return index;
|
wildcard_index);
|
||||||
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = next_node;
|
node = next_node;
|
||||||
|
@ -93,12 +102,22 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
|
||||||
// The common prefix was matched
|
// The common prefix was matched
|
||||||
if (slen == node->len) {
|
if (slen == node->len) {
|
||||||
// Complete match
|
// Complete match
|
||||||
if (node->index != -1) {
|
if (index != -1) {
|
||||||
// Return the existing index for duplicates.
|
if (node->index != -1) {
|
||||||
return node->index;
|
// Return the existing index for duplicates.
|
||||||
|
return node->index;
|
||||||
|
}
|
||||||
|
node->index = index;
|
||||||
|
return idx;
|
||||||
}
|
}
|
||||||
node->index = index;
|
|
||||||
return index;
|
assert(wildcard_index != -1);
|
||||||
|
|
||||||
|
if (node->wildcard_index != -1) {
|
||||||
|
return node->wildcard_index;
|
||||||
|
}
|
||||||
|
node->wildcard_index = wildcard_index;
|
||||||
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slen > node->len) {
|
if (slen > node->len) {
|
||||||
|
@ -112,27 +131,30 @@ size_t Router::add_route(const StringRef &pattern, size_t index) {
|
||||||
if (node->len > j) {
|
if (node->len > j) {
|
||||||
// node must be split into 2 nodes. new_node is now the child
|
// node must be split into 2 nodes. new_node is now the child
|
||||||
// of node.
|
// of node.
|
||||||
auto new_node =
|
auto new_node = make_unique<RNode>(&node->s[j], node->len - j,
|
||||||
make_unique<RNode>(&node->s[j], node->len - j, node->index);
|
node->index, node->wildcard_index);
|
||||||
std::swap(node->next, new_node->next);
|
std::swap(node->next, new_node->next);
|
||||||
|
|
||||||
node->len = j;
|
node->len = j;
|
||||||
node->index = -1;
|
node->index = -1;
|
||||||
|
node->wildcard_index = -1;
|
||||||
|
|
||||||
add_next_node(node, std::move(new_node));
|
add_next_node(node, std::move(new_node));
|
||||||
|
|
||||||
if (slen == j) {
|
if (slen == j) {
|
||||||
node->index = index;
|
node->index = index;
|
||||||
return index;
|
node->wildcard_index = wildcard_index;
|
||||||
|
return idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
i += j;
|
i += j;
|
||||||
|
|
||||||
assert(pattern.size() > i);
|
assert(pattern.size() > i);
|
||||||
add_node(node, pattern.c_str() + i, pattern.size() - i, index);
|
add_node(node, pattern.c_str() + i, pattern.size() - i, index,
|
||||||
|
wildcard_index);
|
||||||
|
|
||||||
return index;
|
return idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,8 +191,10 @@ const RNode *match_complete(size_t *offset, const RNode *node,
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
const RNode *match_partial(const RNode *node, size_t offset, const char *first,
|
const RNode *match_partial(bool *pattern_is_wildcard, const RNode *node,
|
||||||
const char *last) {
|
size_t offset, const char *first, const char *last) {
|
||||||
|
*pattern_is_wildcard = false;
|
||||||
|
|
||||||
if (first == last) {
|
if (first == last) {
|
||||||
if (node->len == offset) {
|
if (node->len == offset) {
|
||||||
return node;
|
return node;
|
||||||
|
@ -207,8 +231,12 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->index != -1 && node->s[node->len - 1] == '/') {
|
if (node->wildcard_index != -1) {
|
||||||
found_node = node;
|
found_node = node;
|
||||||
|
*pattern_is_wildcard = true;
|
||||||
|
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
|
||||||
|
found_node = node;
|
||||||
|
*pattern_is_wildcard = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(node->len == offset + n);
|
assert(node->len == offset + n);
|
||||||
|
@ -233,6 +261,7 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
|
||||||
if (node->len == n) {
|
if (node->len == n) {
|
||||||
// Complete match with this node
|
// Complete match with this node
|
||||||
if (node->index != -1) {
|
if (node->index != -1) {
|
||||||
|
*pattern_is_wildcard = false;
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,16 +275,21 @@ const RNode *match_partial(const RNode *node, size_t offset, const char *first,
|
||||||
// pattern is "/foo/" and path is "/foo", we consider they
|
// pattern is "/foo/" and path is "/foo", we consider they
|
||||||
// match.
|
// match.
|
||||||
if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') {
|
if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') {
|
||||||
|
*pattern_is_wildcard = false;
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
return found_node;
|
return found_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the case when pattern which ends with "/" is included
|
if (node->wildcard_index != -1) {
|
||||||
// in query.
|
|
||||||
if (node->index != -1 && node->s[node->len - 1] == '/') {
|
|
||||||
found_node = node;
|
found_node = node;
|
||||||
|
*pattern_is_wildcard = true;
|
||||||
|
} else if (node->index != -1 && node->s[node->len - 1] == '/') {
|
||||||
|
// This is the case when pattern which ends with "/" is included
|
||||||
|
// in query.
|
||||||
|
found_node = node;
|
||||||
|
*pattern_is_wildcard = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(node->len == n);
|
assert(node->len == n);
|
||||||
|
@ -272,12 +306,14 @@ ssize_t Router::match(const StringRef &host, const StringRef &path) const {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = match_partial(node, offset, std::begin(path), std::end(path));
|
bool pattern_is_wildcard;
|
||||||
|
node = match_partial(&pattern_is_wildcard, node, offset, std::begin(path),
|
||||||
|
std::end(path));
|
||||||
if (node == nullptr || node == &root_) {
|
if (node == nullptr || node == &root_) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return node->index;
|
return pattern_is_wildcard ? node->wildcard_index : node->index;
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t Router::match(const StringRef &s) const {
|
ssize_t Router::match(const StringRef &s) const {
|
||||||
|
|
|
@ -38,7 +38,7 @@ namespace shrpx {
|
||||||
|
|
||||||
struct RNode {
|
struct RNode {
|
||||||
RNode();
|
RNode();
|
||||||
RNode(const char *s, size_t len, size_t index);
|
RNode(const char *s, size_t len, ssize_t index, ssize_t wildcard_index);
|
||||||
RNode(RNode &&) = default;
|
RNode(RNode &&) = default;
|
||||||
RNode(const RNode &) = delete;
|
RNode(const RNode &) = delete;
|
||||||
RNode &operator=(RNode &&) = default;
|
RNode &operator=(RNode &&) = default;
|
||||||
|
@ -54,6 +54,10 @@ struct RNode {
|
||||||
// Index of pattern if match ends in this node. Note that we don't
|
// Index of pattern if match ends in this node. Note that we don't
|
||||||
// store duplicated pattern.
|
// store duplicated pattern.
|
||||||
ssize_t index;
|
ssize_t index;
|
||||||
|
// Index of wildcard pattern if query includes this node as prefix
|
||||||
|
// and it still has suffix to match. Note that we don't store
|
||||||
|
// duplicated pattern.
|
||||||
|
ssize_t wildcard_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Router {
|
class Router {
|
||||||
|
@ -66,8 +70,13 @@ public:
|
||||||
Router &operator=(const Router &) = delete;
|
Router &operator=(const Router &) = delete;
|
||||||
|
|
||||||
// Adds route |pattern| with its |index|. If same pattern has
|
// Adds route |pattern| with its |index|. If same pattern has
|
||||||
// already been added, the existing index is returned.
|
// already been added, the existing index is returned. If
|
||||||
size_t add_route(const StringRef &pattern, size_t index);
|
// |wildcard| is true, |pattern| is considered as wildcard pattern,
|
||||||
|
// and all paths which have the |pattern| as prefix and are strictly
|
||||||
|
// longer than |pattern| match. The wildcard pattern only works
|
||||||
|
// with match(const StringRef&, const StringRef&).
|
||||||
|
size_t add_route(const StringRef &pattern, size_t index,
|
||||||
|
bool wildcard = false);
|
||||||
// Returns the matched index of pattern. -1 if there is no match.
|
// Returns the matched index of pattern. -1 if there is no match.
|
||||||
ssize_t match(const StringRef &host, const StringRef &path) const;
|
ssize_t match(const StringRef &host, const StringRef &path) const;
|
||||||
// Returns the matched index of pattern |s|. -1 if there is no
|
// Returns the matched index of pattern |s|. -1 if there is no
|
||||||
|
@ -84,7 +93,8 @@ public:
|
||||||
ssize_t match_prefix(size_t *nread, const RNode **last_node,
|
ssize_t match_prefix(size_t *nread, const RNode **last_node,
|
||||||
const StringRef &s) const;
|
const StringRef &s) const;
|
||||||
|
|
||||||
void add_node(RNode *node, const char *pattern, size_t patlen, size_t index);
|
void add_node(RNode *node, const char *pattern, size_t patlen, ssize_t index,
|
||||||
|
ssize_t wildcard_index);
|
||||||
|
|
||||||
void dump() const;
|
void dump() const;
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ namespace shrpx {
|
||||||
struct Pattern {
|
struct Pattern {
|
||||||
StringRef pattern;
|
StringRef pattern;
|
||||||
size_t idx;
|
size_t idx;
|
||||||
|
bool wildcard;
|
||||||
};
|
};
|
||||||
|
|
||||||
void test_shrpx_router_match(void) {
|
void test_shrpx_router_match(void) {
|
||||||
|
@ -88,6 +89,59 @@ void test_shrpx_router_match(void) {
|
||||||
CU_ASSERT(5 == idx);
|
CU_ASSERT(5 == idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_shrpx_router_match_wildcard(void) {
|
||||||
|
constexpr auto patterns = std::array<Pattern, 6>{{
|
||||||
|
{StringRef::from_lit("nghttp2.org/"), 0},
|
||||||
|
{StringRef::from_lit("nghttp2.org/"), 1, true},
|
||||||
|
{StringRef::from_lit("nghttp2.org/alpha/"), 2},
|
||||||
|
{StringRef::from_lit("nghttp2.org/alpha/"), 3, true},
|
||||||
|
{StringRef::from_lit("nghttp2.org/bravo"), 4},
|
||||||
|
{StringRef::from_lit("nghttp2.org/bravo"), 5, true},
|
||||||
|
}};
|
||||||
|
|
||||||
|
Router router;
|
||||||
|
|
||||||
|
for (auto &p : patterns) {
|
||||||
|
router.add_route(p.pattern, p.idx, p.wildcard);
|
||||||
|
}
|
||||||
|
|
||||||
|
CU_ASSERT(0 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/")));
|
||||||
|
|
||||||
|
CU_ASSERT(1 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/a")));
|
||||||
|
|
||||||
|
CU_ASSERT(1 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/charlie")));
|
||||||
|
|
||||||
|
CU_ASSERT(2 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/alpha")));
|
||||||
|
|
||||||
|
CU_ASSERT(2 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/alpha/")));
|
||||||
|
|
||||||
|
CU_ASSERT(3 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/alpha/b")));
|
||||||
|
|
||||||
|
CU_ASSERT(4 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/bravo")));
|
||||||
|
|
||||||
|
CU_ASSERT(5 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/bravocharlie")));
|
||||||
|
|
||||||
|
CU_ASSERT(5 ==
|
||||||
|
router.match(StringRef::from_lit("nghttp2.org"),
|
||||||
|
StringRef::from_lit("/bravo/")));
|
||||||
|
}
|
||||||
|
|
||||||
void test_shrpx_router_match_prefix(void) {
|
void test_shrpx_router_match_prefix(void) {
|
||||||
auto patterns = std::vector<Pattern>{
|
auto patterns = std::vector<Pattern>{
|
||||||
{StringRef::from_lit("gro.2ptthgn."), 0},
|
{StringRef::from_lit("gro.2ptthgn."), 0},
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
namespace shrpx {
|
namespace shrpx {
|
||||||
|
|
||||||
void test_shrpx_router_match(void);
|
void test_shrpx_router_match(void);
|
||||||
|
void test_shrpx_router_match_wildcard(void);
|
||||||
void test_shrpx_router_match_prefix(void);
|
void test_shrpx_router_match_prefix(void);
|
||||||
|
|
||||||
} // namespace shrpx
|
} // namespace shrpx
|
||||||
|
|
Loading…
Reference in New Issue