From 8acf9a2802989ffe328a365e4dc11948b30b0f1f Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Sat, 26 Sep 2015 02:38:45 +0900 Subject: [PATCH] nghttpx: Trie based routing --- src/Makefile.am | 1 + src/shrpx.cc | 9 +- src/shrpx_client_handler.cc | 12 +- src/shrpx_config.cc | 129 ++++++---------- src/shrpx_config.h | 31 ++-- src/shrpx_config_test.cc | 112 ++++++++++---- src/shrpx_router.cc | 291 ++++++++++++++++++++++++++++++++++++ src/shrpx_router.h | 76 ++++++++++ src/template.h | 25 ++++ 9 files changed, 550 insertions(+), 136 deletions(-) create mode 100644 src/shrpx_router.cc create mode 100644 src/shrpx_router.h diff --git a/src/Makefile.am b/src/Makefile.am index 85405cb4..627a944e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -128,6 +128,7 @@ NGHTTPX_SRCS = \ shrpx_worker_process.cc shrpx_worker_process.h \ shrpx_process.h \ shrpx_signal.cc shrpx_signal.h \ + shrpx_router.cc shrpx_router.h \ buffer.h memchunk.h template.h if HAVE_SPDYLAY diff --git a/src/shrpx.cc b/src/shrpx.cc index 5a3a48a6..a6779713 100644 --- a/src/shrpx.cc +++ b/src/shrpx.cc @@ -2370,6 +2370,8 @@ int main(int argc, char **argv) { DownstreamAddrGroup g("/"); g.addrs.push_back(std::move(addr)); + mod_config()->router.add_route(g.pattern.get(), 1, + get_config()->downstream_addr_groups.size()); mod_config()->downstream_addr_groups.push_back(std::move(g)); } else if (get_config()->http2_proxy || get_config()->client_proxy) { // We don't support host mapping in these cases. Move all @@ -2381,6 +2383,9 @@ int main(int argc, char **argv) { } std::vector().swap( mod_config()->downstream_addr_groups); + // maybe not necessary? + mod_config()->router.add_route(catch_all.pattern.get(), 1, + get_config()->downstream_addr_groups.size()); mod_config()->downstream_addr_groups.push_back(std::move(catch_all)); } @@ -2391,11 +2396,11 @@ int main(int argc, char **argv) { ssize_t catch_all_group = -1; for (size_t i = 0; i < mod_config()->downstream_addr_groups.size(); ++i) { auto &g = mod_config()->downstream_addr_groups[i]; - if (g.pattern == "/") { + if (util::streq(g.pattern.get(), "/")) { catch_all_group = i; } if (LOG_ENABLED(INFO)) { - LOG(INFO) << "Host-path pattern: group " << i << ": '" << g.pattern + LOG(INFO) << "Host-path pattern: group " << i << ": '" << g.pattern.get() << "'"; for (auto &addr : g.addrs) { LOG(INFO) << "group " << i << " -> " << addr.host.get() diff --git a/src/shrpx_client_handler.cc b/src/shrpx_client_handler.cc index 0f31897b..719afa8f 100644 --- a/src/shrpx_client_handler.cc +++ b/src/shrpx_client_handler.cc @@ -635,18 +635,20 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { // have dealt with proxy case already, just use catch-all group. group = catch_all; } else { + auto &router = get_config()->router; if (!downstream->get_request_http2_authority().empty()) { group = match_downstream_addr_group( - downstream->get_request_http2_authority(), + router, downstream->get_request_http2_authority(), downstream->get_request_path(), groups, catch_all); } else { auto h = downstream->get_request_header(http2::HD_HOST); if (h) { - group = match_downstream_addr_group( - h->value, downstream->get_request_path(), groups, catch_all); - } else { - group = match_downstream_addr_group("", downstream->get_request_path(), + group = match_downstream_addr_group(router, h->value, + downstream->get_request_path(), groups, catch_all); + } else { + group = match_downstream_addr_group( + router, "", downstream->get_request_path(), groups, catch_all); } } } diff --git a/src/shrpx_config.cc b/src/shrpx_config.cc index affcab36..bdab013d 100644 --- a/src/shrpx_config.cc +++ b/src/shrpx_config.cc @@ -79,9 +79,9 @@ TicketKeys::~TicketKeys() { } DownstreamAddr::DownstreamAddr(const DownstreamAddr &other) - : addr(other.addr), host(other.host ? strcopy(other.host.get()) : nullptr), - hostport(other.hostport ? strcopy(other.hostport.get()) : nullptr), - port(other.port), host_unix(other.host_unix) {} + : addr(other.addr), host(strcopy(other.host)), + hostport(strcopy(other.hostport)), port(other.port), + host_unix(other.host_unix) {} DownstreamAddr &DownstreamAddr::operator=(const DownstreamAddr &other) { if (this == &other) { @@ -89,14 +89,29 @@ DownstreamAddr &DownstreamAddr::operator=(const DownstreamAddr &other) { } addr = other.addr; - host = (other.host ? strcopy(other.host.get()) : nullptr); - hostport = (other.hostport ? strcopy(other.hostport.get()) : nullptr); + host = strcopy(other.host); + hostport = strcopy(other.hostport); port = other.port; host_unix = other.host_unix; return *this; } +DownstreamAddrGroup::DownstreamAddrGroup(const DownstreamAddrGroup &other) + : pattern(strcopy(other.pattern)), addrs(other.addrs) {} + +DownstreamAddrGroup &DownstreamAddrGroup:: +operator=(const DownstreamAddrGroup &other) { + if (this == &other) { + return *this; + } + + pattern = strcopy(other.pattern); + addrs = other.addrs; + + return *this; +} + namespace { int split_host_port(char *host, size_t hostlen, uint16_t *port_ptr, const char *hostport, size_t hostportlen) { @@ -260,7 +275,6 @@ std::string read_passwd_from_file(const char *filename) { return line; } - std::pair parse_header(const char *optarg) { // We skip possible ":" at the start of optarg. const auto *colon = strchr(optarg + 1, ':'); @@ -576,7 +590,7 @@ void parse_mapping(const DownstreamAddr &addr, const char *src) { pattern += http2::normalize_path(slash, raw_pattern.second); } for (auto &g : mod_config()->downstream_addr_groups) { - if (g.pattern == pattern) { + if (g.pattern.get() == pattern) { g.addrs.push_back(addr); done = true; break; @@ -587,6 +601,10 @@ void parse_mapping(const DownstreamAddr &addr, const char *src) { } DownstreamAddrGroup g(pattern); g.addrs.push_back(addr); + + mod_config()->router.add_route(g.pattern.get(), strlen(g.pattern.get()), + get_config()->downstream_addr_groups.size()); + mod_config()->downstream_addr_groups.push_back(std::move(g)); } } @@ -2128,67 +2146,17 @@ int int_syslog_facility(const char *strfacility) { } namespace { -template -bool path_match(const std::string &pattern, const std::string &host, - InputIt path_first, InputIt path_last) { - if (pattern.back() != '/') { - return pattern.size() == host.size() + (path_last - path_first) && - std::equal(std::begin(host), std::end(host), std::begin(pattern)) && - std::equal(path_first, path_last, std::begin(pattern) + host.size()); - } - - if (pattern.size() >= host.size() && - std::equal(std::begin(host), std::end(host), std::begin(pattern)) && - util::startsWith(path_first, path_last, std::begin(pattern) + host.size(), - std::end(pattern))) { - return true; - } - - // If pattern ends with '/', and pattern and path matches without - // that slash, we consider they match to deal with request to the - // directory without trailing slash. That is if pattern is "/foo/" - // and path is "/foo", we consider they match. - - assert(!pattern.empty()); - return pattern.size() - 1 == host.size() + (path_last - path_first) && - std::equal(std::begin(host), std::end(host), std::begin(pattern)) && - std::equal(path_first, path_last, std::begin(pattern) + host.size()); -} -} // namespace - -namespace { -template -ssize_t match(const std::string &host, InputIt path_first, InputIt path_last, - const std::vector &groups) { - ssize_t res = -1; - size_t best = 0; - for (size_t i = 0; i < groups.size(); ++i) { - auto &g = groups[i]; - auto &pattern = g.pattern; - if (!path_match(pattern, host, path_first, path_last)) { - continue; - } - if (res == -1 || best < pattern.size()) { - best = pattern.size(); - res = i; - } - } - return res; -} -} // namespace - -namespace { -template -size_t match_downstream_addr_group_host( - const std::string &host, InputIt path_first, InputIt path_last, - const std::vector &groups, size_t catch_all) { - if (path_first == path_last || *path_first != '/') { - constexpr const char P[] = "/"; - auto group = match(host, P, P + 1, groups); +size_t +match_downstream_addr_group_host(const Router &router, const std::string &host, + const char *path, size_t pathlen, + const std::vector &groups, + size_t catch_all) { + if (pathlen == 0 || *path != '/') { + auto group = router.match(host, "/", 1); if (group != -1) { if (LOG_ENABLED(INFO)) { LOG(INFO) << "Found pattern with query " << host - << ", matched pattern=" << groups[group].pattern; + << ", matched pattern=" << groups[group].pattern.get(); } return group; } @@ -2197,25 +2165,24 @@ size_t match_downstream_addr_group_host( if (LOG_ENABLED(INFO)) { LOG(INFO) << "Perform mapping selection, using host=" << host - << ", path=" << std::string(path_first, path_last); + << ", path=" << std::string(path, pathlen); } - auto group = match(host, path_first, path_last, groups); + auto group = router.match(host, path, pathlen); if (group != -1) { if (LOG_ENABLED(INFO)) { LOG(INFO) << "Found pattern with query " << host - << std::string(path_first, path_last) - << ", matched pattern=" << groups[group].pattern; + << std::string(path, pathlen) + << ", matched pattern=" << groups[group].pattern.get(); } return group; } - group = match("", path_first, path_last, groups); + group = router.match("", path, pathlen); if (group != -1) { if (LOG_ENABLED(INFO)) { - LOG(INFO) << "Found pattern with query " - << std::string(path_first, path_last) - << ", matched pattern=" << groups[group].pattern; + LOG(INFO) << "Found pattern with query " << std::string(path, pathlen) + << ", matched pattern=" << groups[group].pattern.get(); } return group; } @@ -2227,9 +2194,11 @@ size_t match_downstream_addr_group_host( } } // namespace -size_t match_downstream_addr_group( - const std::string &hostport, const std::string &raw_path, - const std::vector &groups, size_t catch_all) { +size_t +match_downstream_addr_group(const Router &router, const std::string &hostport, + const std::string &raw_path, + const std::vector &groups, + size_t catch_all) { if (std::find(std::begin(hostport), std::end(hostport), '/') != std::end(hostport)) { // We use '/' specially, and if '/' is included in host, it breaks @@ -2239,11 +2208,11 @@ size_t match_downstream_addr_group( auto fragment = std::find(std::begin(raw_path), std::end(raw_path), '#'); auto query = std::find(std::begin(raw_path), fragment, '?'); - auto path_first = std::begin(raw_path); - auto path_last = query; + auto path = raw_path.c_str(); + auto pathlen = query - std::begin(raw_path); if (hostport.empty()) { - return match_downstream_addr_group_host(hostport, path_first, path_last, + return match_downstream_addr_group_host(router, hostport, path, pathlen, groups, catch_all); } @@ -2267,7 +2236,7 @@ size_t match_downstream_addr_group( } util::inp_strlower(host); - return match_downstream_addr_group_host(host, path_first, path_last, groups, + return match_downstream_addr_group_host(router, host, path, pathlen, groups, catch_all); } diff --git a/src/shrpx_config.h b/src/shrpx_config.h index f147c61f..3cf81db1 100644 --- a/src/shrpx_config.h +++ b/src/shrpx_config.h @@ -50,6 +50,7 @@ #include +#include "shrpx_router.h" #include "template.h" using namespace nghttp2; @@ -229,8 +230,13 @@ struct DownstreamAddr { }; struct DownstreamAddrGroup { - DownstreamAddrGroup(std::string pattern) : pattern(std::move(pattern)) {} - std::string pattern; + DownstreamAddrGroup(const std::string &pattern) : pattern(strcopy(pattern)) {} + DownstreamAddrGroup(const DownstreamAddrGroup &other); + DownstreamAddrGroup(DownstreamAddrGroup &&) = default; + DownstreamAddrGroup &operator=(const DownstreamAddrGroup &other); + DownstreamAddrGroup &operator=(DownstreamAddrGroup &&) = default; + + std::unique_ptr pattern; std::vector addrs; }; @@ -272,6 +278,7 @@ struct Config { Address downstream_http_proxy_addr; Address session_cache_memcached_addr; Address tls_ticket_key_memcached_addr; + Router router; std::chrono::seconds tls_session_timeout; ev_tstamp http2_upstream_read_timeout; ev_tstamp upstream_read_timeout; @@ -442,24 +449,6 @@ std::pair parse_header(const char *optarg); std::vector parse_log_format(const char *optarg); -// Returns a copy of NULL-terminated string [first, last). -template -std::unique_ptr strcopy(InputIt first, InputIt last) { - auto res = make_unique(last - first + 1); - *std::copy(first, last, res.get()) = '\0'; - return res; -} - -// Returns a copy of NULL-terminated string |val|. -inline std::unique_ptr strcopy(const char *val) { - return strcopy(val, val + strlen(val)); -} - -// Returns a copy of val.c_str(). -inline std::unique_ptr strcopy(const std::string &val) { - return strcopy(std::begin(val), std::end(val)); -} - // Returns string for syslog |facility|. const char *str_syslog_facility(int facility); @@ -483,7 +472,7 @@ read_tls_ticket_key_file(const std::vector &files, // group. The catch-all group index is given in |catch_all|. All // patterns are given in |groups|. size_t match_downstream_addr_group( - const std::string &hostport, const std::string &path, + const Router &router, const std::string &hostport, const std::string &path, const std::vector &groups, size_t catch_all); } // namespace shrpx diff --git a/src/shrpx_config_test.cc b/src/shrpx_config_test.cc index 4cec6615..6af69645 100644 --- a/src/shrpx_config_test.cc +++ b/src/shrpx_config_test.cc @@ -235,56 +235,112 @@ void test_shrpx_config_match_downstream_addr_group(void) { {"nghttp2.org/delta%3A"}, {"www.nghttp2.org/"}, {"[::1]/"}, + {"nghttp2.org/alpha/bravo/delta"}, + // Check that match is done in the single node + {"example.com/alpha/bravo"}, + {"192.168.0.1/alpha/"}, }; - CU_ASSERT(0 == match_downstream_addr_group("nghttp2.org", "/", groups, 255)); + Router router; + + for (size_t i = 0; i < groups.size(); ++i) { + auto &g = groups[i]; + router.add_route(g.pattern.get(), strlen(g.pattern.get()), i); + } + + CU_ASSERT(0 == match_downstream_addr_group(router, "nghttp2.org", "/", groups, + 255)); // port is removed - CU_ASSERT(0 == - match_downstream_addr_group("nghttp2.org:8080", "/", groups, 255)); + CU_ASSERT(0 == match_downstream_addr_group(router, "nghttp2.org:8080", "/", + groups, 255)); // host is case-insensitive - CU_ASSERT(4 == match_downstream_addr_group("WWW.nghttp2.org", "/alpha", - groups, 255)); + CU_ASSERT(4 == match_downstream_addr_group(router, "WWW.nghttp2.org", + "/alpha", groups, 255)); - CU_ASSERT(1 == match_downstream_addr_group("nghttp2.org", "/alpha/bravo/", - groups, 255)); + CU_ASSERT(1 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/bravo/", groups, 255)); // /alpha/bravo also matches /alpha/bravo/ - CU_ASSERT(1 == match_downstream_addr_group("nghttp2.org", "/alpha/bravo", - groups, 255)); + CU_ASSERT(1 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/bravo", groups, 255)); // path part is case-sensitive - CU_ASSERT(0 == match_downstream_addr_group("nghttp2.org", "/Alpha/bravo", - groups, 255)); + CU_ASSERT(0 == match_downstream_addr_group(router, "nghttp2.org", + "/Alpha/bravo", groups, 255)); - CU_ASSERT(1 == match_downstream_addr_group( - "nghttp2.org", "/alpha/bravo/charlie", groups, 255)); + CU_ASSERT(1 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/bravo/charlie", groups, + 255)); - CU_ASSERT(2 == match_downstream_addr_group("nghttp2.org", "/alpha/charlie", - groups, 255)); + CU_ASSERT(2 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/charlie", groups, 255)); // pattern which does not end with '/' must match its entirely. So // this matches to group 0, not group 2. - CU_ASSERT(0 == match_downstream_addr_group("nghttp2.org", "/alpha/charlie/", - groups, 255)); + CU_ASSERT(0 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/charlie/", groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "example.org", "/", + groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "", "/", groups, 255)); CU_ASSERT(255 == - match_downstream_addr_group("example.org", "/", groups, 255)); + match_downstream_addr_group(router, "", "alpha", groups, 255)); - CU_ASSERT(255 == match_downstream_addr_group("", "/", groups, 255)); - - CU_ASSERT(255 == match_downstream_addr_group("", "alpha", groups, 255)); - - CU_ASSERT(255 == match_downstream_addr_group("foo/bar", "/", groups, 255)); + CU_ASSERT(255 == + match_downstream_addr_group(router, "foo/bar", "/", groups, 255)); // If path is "*", only match with host + "/". - CU_ASSERT(0 == match_downstream_addr_group("nghttp2.org", "*", groups, 255)); + CU_ASSERT(0 == match_downstream_addr_group(router, "nghttp2.org", "*", groups, + 255)); - CU_ASSERT(5 == match_downstream_addr_group("[::1]", "/", groups, 255)); - CU_ASSERT(5 == match_downstream_addr_group("[::1]:8080", "/", groups, 255)); - CU_ASSERT(255 == match_downstream_addr_group("[::1", "/", groups, 255)); - CU_ASSERT(255 == match_downstream_addr_group("[::1]8000", "/", groups, 255)); + CU_ASSERT(5 == + match_downstream_addr_group(router, "[::1]", "/", groups, 255)); + CU_ASSERT( + 5 == match_downstream_addr_group(router, "[::1]:8080", "/", groups, 255)); + CU_ASSERT(255 == + match_downstream_addr_group(router, "[::1", "/", groups, 255)); + CU_ASSERT(255 == + match_downstream_addr_group(router, "[::1]8000", "/", groups, 255)); + + // Check the case where adding route extends tree + CU_ASSERT(6 == match_downstream_addr_group( + router, "nghttp2.org", "/alpha/bravo/delta", groups, 255)); + + CU_ASSERT(1 == match_downstream_addr_group(router, "nghttp2.org", + "/alpha/bravo/delta/", groups, + 255)); + + // Check the case where query is done in a single node + CU_ASSERT(7 == match_downstream_addr_group(router, "example.com", + "/alpha/bravo", groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "example.com", + "/alpha/bravo/", groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "example.com", "/alpha", + groups, 255)); + + // Check the case where quey is done in a single node + CU_ASSERT(8 == match_downstream_addr_group(router, "192.168.0.1", "/alpha", + groups, 255)); + + CU_ASSERT(8 == match_downstream_addr_group(router, "192.168.0.1", "/alpha/", + groups, 255)); + + CU_ASSERT(8 == match_downstream_addr_group(router, "192.168.0.1", + "/alpha/bravo", groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "192.168.0.1", "/alph", + groups, 255)); + + CU_ASSERT(255 == match_downstream_addr_group(router, "192.168.0.1", "/", + groups, 255)); + + router.dump(); } } // namespace shrpx diff --git a/src/shrpx_router.cc b/src/shrpx_router.cc new file mode 100644 index 00000000..9b9f7b73 --- /dev/null +++ b/src/shrpx_router.cc @@ -0,0 +1,291 @@ +/* + * nghttp2 - HTTP/2 C Library + * + * Copyright (c) 2015 Tatsuhiro Tsujikawa + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "shrpx_router.h" + +#include "shrpx_config.h" + +namespace shrpx { + +RNode::RNode() : s(nullptr), len(0), index(-1) {} + +RNode::RNode(const char *s, size_t len, size_t index) + : s(s), len(len), index(index) {} + +Router::Router() : root_{} {} + +namespace { +RNode *find_next_node(const RNode *node, char c) { + auto itr = std::lower_bound(std::begin(node->next), std::end(node->next), c, + [](const std::unique_ptr &lhs, + const char c) { return lhs->s[0] < c; }); + if (itr == std::end(node->next) || (*itr)->s[0] != c) { + return nullptr; + } + + return (*itr).get(); +} +} // namespace + +namespace { +void add_next_node(RNode *node, std::unique_ptr new_node) { + auto itr = std::lower_bound(std::begin(node->next), std::end(node->next), + new_node->s[0], + [](const std::unique_ptr &lhs, + const char c) { return lhs->s[0] < c; }); + node->next.insert(itr, std::move(new_node)); +} +} // namespace + +void Router::add_node(RNode *node, const char *pattern, size_t patlen, + size_t index) { + auto new_node = make_unique(pattern, patlen, index); + add_next_node(node, std::move(new_node)); +} + +bool Router::add_route(const char *pattern, size_t patlen, size_t index) { + auto node = &root_; + size_t i = 0; + + for (;;) { + auto next_node = find_next_node(node, pattern[i]); + if (next_node == nullptr) { + add_node(node, pattern + i, patlen - i, index); + return true; + } + + node = next_node; + + auto slen = patlen - i; + auto s = pattern + i; + auto n = std::min(node->len, slen); + size_t j; + for (j = 0; j < n && node->s[j] == s[j]; ++j) + ; + if (j == n) { + // The common prefix was matched + if (slen == node->len) { + // Complete match + if (node->index != -1) { + // Don't allow duplicate + return false; + } + node->index = index; + return true; + } + + if (slen > node->len) { + // We still have pattern to add + i += j; + + continue; + } + } + + if (node->len > j) { + // node must be split into 2 nodes. new_node is now the child + // of node. + auto new_node = + make_unique(&node->s[j], node->len - j, node->index); + std::swap(node->next, new_node->next); + + node->len = j; + node->index = -1; + + add_next_node(node, std::move(new_node)); + + if (slen == j) { + node->index = index; + return true; + } + } + + i += j; + + assert(patlen > i); + add_node(node, pattern + i, patlen - i, index); + + return true; + } +} + +namespace { +const RNode *match_complete(size_t *offset, const RNode *node, + const char *first, const char *last) { + *offset = 0; + + if (first == last) { + return node; + } + + auto p = first; + + for (;;) { + auto next_node = find_next_node(node, *p); + if (next_node == nullptr) { + return nullptr; + } + + node = next_node; + + auto n = std::min(node->len, static_cast(last - p)); + if (memcmp(node->s, p, n) != 0) { + return nullptr; + } + p += n; + if (p == last) { + *offset = n; + return node; + } + } +} +} // namespace + +namespace { +const RNode *match_partial(const RNode *node, size_t offset, const char *first, + const char *last) { + if (first == last) { + if (node->len == offset) { + return node; + } + return nullptr; + } + + auto p = first; + + const RNode *found_node = nullptr; + + if (offset > 0) { + auto n = std::min(node->len - offset, static_cast(last - first)); + if (memcmp(node->s + offset, first, n) != 0) { + return nullptr; + } + + p += n; + + if (p == last) { + if (node->len == offset + n) { + if (node->index != -1) { + return node; + } + + return nullptr; + } + + if (node->index != -1 && offset + n + 1 == node->len && + node->s[node->len - 1] == '/') { + return node; + } + + return nullptr; + } + + if (node->index != -1 && node->s[node->len - 1] == '/') { + found_node = node; + } + + assert(node->len == offset + n); + } + + for (;;) { + auto next_node = find_next_node(node, *p); + if (next_node == nullptr) { + return found_node; + } + + node = next_node; + + auto n = std::min(node->len, static_cast(last - p)); + if (memcmp(node->s, p, n) != 0) { + return found_node; + } + + p += n; + + if (p == last) { + if (node->len == n) { + // Complete match with this node + if (node->index != -1) { + return node; + } + + return found_node; + } + + // We allow match without trailing "/" at the end of pattern. + // So, if pattern ends with '/', and pattern and path matches + // without that slash, we consider they match to deal with + // request to the directory without trailing slash. That is if + // pattern is "/foo/" and path is "/foo", we consider they + // match. + if (node->index != -1 && n + 1 == node->len && node->s[n] == '/') { + return node; + } + + return found_node; + } + + // This is the case when pattern which ends with "/" is included + // in query. + if (node->index != -1 && node->s[node->len - 1] == '/') { + found_node = node; + } + + assert(node->len == n); + } +} +} // namespace + +ssize_t Router::match(const std::string &host, const char *path, + size_t pathlen) const { + const RNode *node; + size_t offset; + + node = + match_complete(&offset, &root_, host.c_str(), host.c_str() + host.size()); + if (node == nullptr) { + return -1; + } + + node = match_partial(node, offset, path, path + pathlen); + if (node == nullptr || node == &root_) { + return -1; + } + + return node->index; +} + +namespace { +void dump_node(const RNode *node, int depth) { + fprintf(stderr, "%*ss='%.*s', len=%zu, index=%zd\n", depth, "", + (int)node->len, node->s, node->len, node->index); + for (auto &nd : node->next) { + dump_node(nd.get(), depth + 4); + } +} +} // namespace + +void Router::dump() const { dump_node(&root_, 0); } + +} // namespace shrpx diff --git a/src/shrpx_router.h b/src/shrpx_router.h new file mode 100644 index 00000000..07aadf69 --- /dev/null +++ b/src/shrpx_router.h @@ -0,0 +1,76 @@ +/* + * nghttp2 - HTTP/2 C Library + * + * Copyright (c) 2015 Tatsuhiro Tsujikawa + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef SHRPX_ROUTER_H +#define SHRPX_ROUTER_H + +#include "shrpx.h" + +#include +#include + +namespace shrpx { + +struct RNode { + RNode(); + RNode(const char *s, size_t len, size_t index); + RNode(RNode &&) = default; + RNode(const RNode &) = delete; + RNode &operator=(RNode &&) = default; + RNode &operator=(const RNode &) = delete; + + // Next RNode, sorted by s[0]. + std::vector> next; + // Stores pointer to the string this node represents. Not + // NULL-terminated. + const char *s; + // Length of |s| + size_t len; + // Index of pattern if match ends in this node. Note that we don't + // store duplicated pattern. + ssize_t index; +}; + +class Router { +public: + Router(); + // Adds route |pattern| of size |patlen| with its |index|. + bool add_route(const char *pattern, size_t patlen, size_t index); + // Returns the matched index of pattern. -1 if there is no match. + ssize_t match(const std::string &host, const char *path, + size_t pathlen) const; + + void add_node(RNode *node, const char *pattern, size_t patlen, size_t index); + + void dump() const; + +private: + // The root node of Patricia tree. This is special node and its s + // field is nulptr, and len field is 0. + RNode root_; +}; + +} // namespace shrpx + +#endif // SHRPX_ROUTER_H diff --git a/src/template.h b/src/template.h index 26ebc444..f842cdd2 100644 --- a/src/template.h +++ b/src/template.h @@ -170,6 +170,31 @@ constexpr double operator"" _h(unsigned long long h) { return h * 60 * 60; } constexpr double operator"" _min(unsigned long long min) { return min * 60; } +// Returns a copy of NULL-terminated string [first, last). +template +std::unique_ptr strcopy(InputIt first, InputIt last) { + auto res = make_unique(last - first + 1); + *std::copy(first, last, res.get()) = '\0'; + return res; +} + +// Returns a copy of NULL-terminated string |val|. +inline std::unique_ptr strcopy(const char *val) { + return strcopy(val, val + strlen(val)); +} + +// Returns a copy of val.c_str(). +inline std::unique_ptr strcopy(const std::string &val) { + return strcopy(std::begin(val), std::end(val)); +} + +inline std::unique_ptr strcopy(const std::unique_ptr &val) { + if (!val) { + return nullptr; + } + return strcopy(val.get()); +} + } // namespace nghttp2 #endif // TEMPLATE_H