diff --git a/src/HtmlParser.cc b/src/HtmlParser.cc index f6420461..1e3f52ba 100644 --- a/src/HtmlParser.cc +++ b/src/HtmlParser.cc @@ -24,8 +24,9 @@ */ #include "HtmlParser.h" +#include + #include "util.h" -#include "uri.h" namespace spdylay { @@ -69,14 +70,24 @@ void start_element_func if((util::strieq(rel_attr, "shortcut icon") || util::strieq(rel_attr, "stylesheet")) && href_attr) { - std::string uri = uri::joinUri(parser_data->base_uri, href_attr); - parser_data->links.push_back(uri); + xmlChar *u = xmlBuildURI(reinterpret_cast(href_attr), + reinterpret_cast + (parser_data->base_uri.c_str())); + if(u) { + parser_data->links.push_back(reinterpret_cast(u)); + free(u); + } } } else if(util::strieq(reinterpret_cast(name), "img")) { const char *src_attr = get_attr(attrs, "src"); if(src_attr) { - std::string uri = uri::joinUri(parser_data->base_uri, src_attr); - parser_data->links.push_back(uri); + xmlChar *u = xmlBuildURI(reinterpret_cast(src_attr), + reinterpret_cast + (parser_data->base_uri.c_str())); + if(u) { + parser_data->links.push_back(reinterpret_cast(u)); + free(u); + } } } } diff --git a/src/Makefile.am b/src/Makefile.am index 244ba240..17eef3e0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,8 +35,8 @@ if HAVE_LIBEVENT_OPENSSL bin_PROGRAMS += shrpx endif # HAVE_LIBEVENT_OPENSSL -HELPER_OBJECTS = uri.cc util.cc spdylay_ssl.cc -HELPER_HFILES = uri.h util.h spdylay_ssl.h spdylay_config.h +HELPER_OBJECTS = util.cc spdylay_ssl.cc +HELPER_HFILES = util.h spdylay_ssl.h spdylay_config.h EVENT_OBJECTS = EVENT_HFILES = EventPoll.h EventPollEvent.h @@ -62,7 +62,8 @@ SPDY_SERVER_OBJECTS = SpdyServer.cc SPDY_SERVER_HFILES = SpdyServer.h spdycat_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} spdycat.cc \ - ${HTML_PARSER_OBJECTS} ${HTML_PARSER_HFILES} + ${HTML_PARSER_OBJECTS} ${HTML_PARSER_HFILES} \ + http-parser/http_parser.c http-parser/http_parser.h spdyd_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} \ ${EVENT_OBJECTS} ${EVENT_HFILES} \ diff --git a/src/SpdyServer.cc b/src/SpdyServer.cc index 709bc66f..ba4f8dd5 100644 --- a/src/SpdyServer.cc +++ b/src/SpdyServer.cc @@ -40,7 +40,6 @@ #include #include "spdylay_ssl.h" -#include "uri.h" #include "util.h" #include "EventPoll.h" diff --git a/src/spdycat.cc b/src/spdycat.cc index b65f7599..ee87658e 100644 --- a/src/spdycat.cc +++ b/src/spdycat.cc @@ -54,8 +54,9 @@ #include #include +#include "http-parser/http_parser.h" + #include "spdylay_ssl.h" -#include "uri.h" #include "HtmlParser.h" #include "util.h" @@ -101,16 +102,112 @@ void record_time(timeval *tv) gettimeofday(tv, 0); } +bool has_uri_field(const http_parser_url &u, http_parser_url_fields field) +{ + return u.field_set & (1 << field); +} + +bool fieldeq(const char *uri1, const http_parser_url &u1, + const char *uri2, const http_parser_url &u2, + http_parser_url_fields field) +{ + if(!has_uri_field(u1, field)) { + if(!has_uri_field(u2, field)) { + return true; + } else { + return false; + } + } else if(!has_uri_field(u2, field)) { + return false; + } + if(u1.field_data[field].len != u2.field_data[field].len) { + return false; + } + return memcmp(uri1+u1.field_data[field].off, + uri2+u2.field_data[field].off, + u1.field_data[field].len) == 0; +} + +bool fieldeq(const char *uri, const http_parser_url &u, + http_parser_url_fields field, + const char *t) +{ + if(!has_uri_field(u, field)) { + if(!t[0]) { + return true; + } else { + return false; + } + } else if(!t[0]) { + return false; + } + int i, len = u.field_data[field].len; + const char *p = uri+u.field_data[field].off; + for(i = 0; i < len && t[i] && p[i] == t[i]; ++i); + return i == len && !t[i]; +} + +uint16_t get_default_port(const char *uri, const http_parser_url &u) +{ + if(fieldeq(uri, u, UF_SCHEMA, "https")) { + return 443; + } else if(fieldeq(uri, u, UF_SCHEMA, "http")) { + return 80; + } else { + return 443; + } +} + +std::string get_uri_field(const char *uri, const http_parser_url &u, + http_parser_url_fields field) +{ + if(has_uri_field(u, field)) { + return std::string(uri+u.field_data[field].off, + u.field_data[field].len); + } else { + return ""; + } +} + +bool porteq(const char *uri1, const http_parser_url &u1, + const char *uri2, const http_parser_url &u2) +{ + uint16_t port1, port2; + port1 = has_uri_field(u1, UF_PORT) ? u1.port : get_default_port(uri1, u1); + port2 = has_uri_field(u2, UF_PORT) ? u2.port : get_default_port(uri2, u2); + return port1 == port2; +} + +void write_uri_field(std::ostream& o, + const char *uri, const http_parser_url &u, + http_parser_url_fields field) +{ + if(has_uri_field(u, field)) { + o.write(uri+u.field_data[field].off, u.field_data[field].len); + } +} + +std::string strip_fragment(const char *raw_uri) +{ + const char *end; + for(end = raw_uri; *end && *end != '#'; ++end); + size_t len = end-raw_uri; + return std::string(raw_uri, len); +} + struct Request { - uri::UriStruct us; + // URI without fragment + std::string uri; + http_parser_url u; spdylay_gzip *inflater; HtmlParser *html_parser; // Recursion level: 0: first entity, 1: entity linked from first entity int level; RequestStat stat; std::string status; - Request(const uri::UriStruct& us, int level = 0) - : us(us), inflater(0), html_parser(0), level(level) + Request(const std::string& uri, const http_parser_url &u, int level = 0) + : uri(uri), u(u), + inflater(0), html_parser(0), level(level) {} ~Request() @@ -128,7 +225,7 @@ struct Request { void init_html_parser() { - html_parser = new HtmlParser(uri::construct(us)); + html_parser = new HtmlParser(uri); } int update_html_parser(const uint8_t *data, size_t len, int fin) @@ -142,6 +239,28 @@ struct Request { return rv; } + std::string make_reqpath() const + { + std::string path = has_uri_field(u, UF_PATH) ? + get_uri_field(uri.c_str(), u, UF_PATH) : "/"; + if(has_uri_field(u, UF_QUERY)) { + path += "?"; + path.append(uri.c_str()+u.field_data[UF_QUERY].off, + u.field_data[UF_QUERY].len); + } + return path; + } + + bool is_ipv6_literal_addr() const + { + if(has_uri_field(u, UF_HOST)) { + return memchr(uri.c_str()+u.field_data[UF_HOST].off, ':', + u.field_data[UF_HOST].len); + } else { + return false; + } + } + void record_syn_stream_time() { record_time(&stat.on_syn_stream_time); @@ -196,24 +315,28 @@ struct SpdySession { return; } std::stringstream ss; - if(reqvec[0]->us.ipv6LiteralAddress) { - ss << "[" << reqvec[0]->us.host << "]"; + if(reqvec[0]->is_ipv6_literal_addr()) { + ss << "["; + write_uri_field(ss, reqvec[0]->uri.c_str(), reqvec[0]->u, UF_HOST); + ss << "]"; } else { - ss << reqvec[0]->us.host; + write_uri_field(ss, reqvec[0]->uri.c_str(), reqvec[0]->u, UF_HOST); } - if(reqvec[0]->us.port != 443) { - ss << ":" << reqvec[0]->us.port; + if(has_uri_field(reqvec[0]->u, UF_PORT) && + reqvec[0]->u.port != get_default_port(reqvec[0]->uri.c_str(), + reqvec[0]->u)) { + ss << ":" << reqvec[0]->u.port; } hostport = ss.str(); } - bool add_request(const uri::UriStruct& us, int level = 0) + bool add_request(const std::string& uri, const http_parser_url& u, + int level = 0) { - std::string key = us.dir+us.file+us.query; - if(path_cache.count(key)) { + if(path_cache.count(uri)) { return false; } else { - path_cache.insert(key); - reqvec.push_back(new Request(us, level)); + path_cache.insert(uri); + reqvec.push_back(new Request(uri, u, level)); return true; } } @@ -230,9 +353,9 @@ void submit_request(Spdylay& sc, const std::string& hostport, const std::map &headers, Request* req) { - uri::UriStruct& us = req->us; - std::string path = us.dir+us.file+us.query; - int r = sc.submit_request(us.protocol, hostport, path, headers, 3, req); + std::string path = req->make_reqpath(); + int r = sc.submit_request(get_uri_field(req->uri.c_str(), req->u, UF_SCHEMA), + hostport, path, headers, 3, req); assert(r == 0); } @@ -245,12 +368,14 @@ void update_html_parser(SpdySession *spdySession, Request *req, req->update_html_parser(data, len, fin); for(size_t i = 0; i < req->html_parser->get_links().size(); ++i) { - const std::string& uri = req->html_parser->get_links()[i]; - uri::UriStruct us; - if(uri::parse(us, uri) && - req->us.protocol == us.protocol && req->us.host == us.host && - req->us.port == us.port) { - spdySession->add_request(us, req->level+1); + const std::string& raw_uri = req->html_parser->get_links()[i]; + std::string uri = strip_fragment(raw_uri.c_str()); + http_parser_url u; + if(http_parser_parse_url(uri.c_str(), uri.size(), 0, &u) == 0 && + fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_SCHEMA) && + fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_HOST) && + porteq(uri.c_str(), u, req->uri.c_str(), req->u)) { + spdySession->add_request(uri, u, req->level+1); submit_request(*spdySession->sc, spdySession->hostport, config.headers, spdySession->reqvec.back()); } @@ -403,7 +528,7 @@ void print_stats(const SpdySession& spdySession) std::cout << "***** Statistics *****" << std::endl; for(size_t i = 0; i < spdySession.reqvec.size(); ++i) { const Request *req = spdySession.reqvec[i]; - std::cout << "#" << i+1 << ": " << uri::construct(req->us) << std::endl; + std::cout << "#" << i+1 << ": " << req->uri << std::endl; std::cout << " Status: " << req->status << std::endl; std::cout << " Delta (ms) from SSL/TLS handshake(SYN_STREAM):" << std::endl; @@ -439,7 +564,8 @@ int communicate(const std::string& host, uint16_t port, int timeout = config.timeout; int fd = nonblock_connect_to(host, port, timeout); if(fd == -1) { - std::cerr << "Could not connect to the host" << std::endl; + std::cerr << "Could not connect to the host: " << spdySession.hostport + << std::endl; return -1; } else if(fd == -2) { std::cerr << "Request to " << spdySession.hostport << " timed out " @@ -614,9 +740,14 @@ int run(char **uris, int n) int failures = 0; SpdySession spdySession; for(int i = 0; i < n; ++i) { - uri::UriStruct us; - if(uri::parse(us, uris[i])) { - if(prev_host != us.host || prev_port != us.port) { + http_parser_url u; + std::string uri = strip_fragment(uris[i]); + if(http_parser_parse_url(uri.c_str(), uri.size(), 0, &u) == 0 && + has_uri_field(u, UF_SCHEMA)) { + uint16_t port = has_uri_field(u, UF_PORT) ? + u.port : get_default_port(uri.c_str(), u); + if(!fieldeq(uri.c_str(), u, UF_HOST, prev_host.c_str()) || + u.port != prev_port) { if(!spdySession.reqvec.empty()) { spdySession.update_hostport(); if (communicate(prev_host, prev_port, spdySession, &callbacks) != 0) { @@ -624,10 +755,10 @@ int run(char **uris, int n) } spdySession = SpdySession(); } - prev_host = us.host; - prev_port = us.port; + prev_host = get_uri_field(uri.c_str(), u, UF_HOST); + prev_port = port; } - spdySession.add_request(us); + spdySession.add_request(uri, u); } } if(!spdySession.reqvec.empty()) { diff --git a/src/uri.cc b/src/uri.cc deleted file mode 100644 index f1e98e6c..00000000 --- a/src/uri.cc +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Spdylay - SPDY Library - * - * Copyright (c) 2012 Tatsuhiro Tsujikawa - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "uri.h" - -#include -#include -#include - -#include "util.h" - -namespace spdylay { - -namespace uri { - -UriStruct::UriStruct() - : port(0), hasPassword(false), ipv6LiteralAddress(false) -{} - -UriStruct::UriStruct(const UriStruct& c) - : protocol(c.protocol), - host(c.host), - port(c.port), - dir(c.dir), - file(c.file), - query(c.query), - username(c.username), - password(c.password), - hasPassword(c.hasPassword), - ipv6LiteralAddress(c.ipv6LiteralAddress) -{} - -UriStruct::~UriStruct() {} - -UriStruct& UriStruct::operator=(const UriStruct& c) -{ - if(this != &c) { - protocol = c.protocol; - host = c.host; - port = c.port; - dir = c.dir; - file = c.file; - query = c.query; - username = c.username; - password = c.password; - hasPassword = c.hasPassword; - ipv6LiteralAddress = c.ipv6LiteralAddress; - } - return *this; -} - -void UriStruct::swap(UriStruct& other) -{ - using std::swap; - if(this != &other) { - swap(protocol, other.protocol); - swap(host, other.host); - swap(port, other.port); - swap(dir, other.dir); - swap(file, other.file); - swap(query, other.query); - swap(username, other.username); - swap(password, other.password); - swap(hasPassword, other.hasPassword); - swap(ipv6LiteralAddress, other.ipv6LiteralAddress); - } -} - -void swap(UriStruct& lhs, UriStruct& rhs) -{ - lhs.swap(rhs); -} - -bool parse(UriStruct& result, const std::string& uri) -{ - // http://user:password@aria2.sourceforge.net:80/dir/file?query#fragment - // | || || | | | | - // | || hostLast| | | | | - // | || portFirst| | | | - // authorityFirst || authorityLast | | | - // || | | | | - // userInfoLast | | | | - // | | | | | - // hostPortFirst | | | | - // | | | | - // dirFirst dirLast| | - // | | - // queryFirst fragmentFirst - - // find fragment part - std::string::const_iterator fragmentFirst = uri.begin(); - for(; fragmentFirst != uri.end(); ++fragmentFirst) { - if(*fragmentFirst == '#') break; - } - // find query part - std::string::const_iterator queryFirst = uri.begin(); - for(; queryFirst != fragmentFirst; ++queryFirst) { - if(*queryFirst == '?') break; - } - result.query.assign(queryFirst, fragmentFirst); - // find protocol - std::string::size_type protocolOffset = uri.find("://"); - if(protocolOffset == std::string::npos) return false; - result.protocol.assign(uri.begin(), uri.begin()+protocolOffset); - uint16_t defPort; - if(result.protocol == "http") { - defPort = 80; - } else if(result.protocol == "https") { - defPort = 443; - } else { - return false; - } - // find authority - std::string::const_iterator authorityFirst = uri.begin()+protocolOffset+3; - std::string::const_iterator authorityLast = authorityFirst; - for(; authorityLast != queryFirst; ++authorityLast) { - if(*authorityLast == '/') break; - } - if(authorityFirst == authorityLast) { - // No authority found - return false; - } - // find userinfo(username and password) in authority if they exist - result.username = ""; - result.password = ""; - result.hasPassword = false; - std::string::const_iterator userInfoLast = authorityLast; - std::string::const_iterator hostPortFirst = authorityFirst; - for(; userInfoLast != authorityFirst-1; --userInfoLast) { - if(*userInfoLast == '@') { - hostPortFirst = userInfoLast; - ++hostPortFirst; - std::string::const_iterator userLast = authorityFirst; - for(; userLast != userInfoLast; ++userLast) { - if(*userLast == ':') { - result.password = - util::percentDecode(userLast+1,userInfoLast); - result.hasPassword = true; - break; - } - } - result.username = - util::percentDecode(authorityFirst, userLast); - break; - } - } - std::string::const_iterator hostLast = hostPortFirst; - std::string::const_iterator portFirst = authorityLast; - result.ipv6LiteralAddress = false; - if(*hostPortFirst == '[') { - // Detected IPv6 literal address in square brackets - for(; hostLast != authorityLast; ++hostLast) { - if(*hostLast == ']') { - ++hostLast; - if(hostLast == authorityLast) { - result.ipv6LiteralAddress = true; - } else { - if(*hostLast == ':') { - portFirst = hostLast; - ++portFirst; - result.ipv6LiteralAddress = true; - } - } - break; - } - } - if(!result.ipv6LiteralAddress) { - return false; - } - } else { - for(; hostLast != authorityLast; ++hostLast) { - if(*hostLast == ':') { - portFirst = hostLast; - ++portFirst; - break; - } - } - } - if(hostPortFirst == hostLast) { - // No host - return false; - } - if(portFirst == authorityLast) { - // If port is not specified, then we set it to default port of - // its protocol.. - result.port = defPort; - } else { - errno = 0; - uint32_t tempPort = strtol(std::string(portFirst, authorityLast).c_str(), - 0, 10); - if(errno != 0) { - return false; - } else if(65535 < tempPort) { - return false; - } - result.port = tempPort; - } - if(result.ipv6LiteralAddress) { - result.host.assign(hostPortFirst+1, hostLast-1); - } else { - result.host.assign(hostPortFirst, hostLast); - } - // find directory and file part - std::string::const_iterator dirLast = authorityLast; - for(std::string::const_iterator i = authorityLast; - i != queryFirst; ++i) { - if(*i == '/') { - dirLast = i+1; - } - } - if(dirLast == queryFirst) { - result.file = ""; - } else { - result.file.assign(dirLast, queryFirst); - } - // dirFirst == authorityLast - if(authorityLast == dirLast) { - result.dir = "/"; - } else { - result.dir.assign(authorityLast, dirLast); - } - return true; -} - -std::string construct(const UriStruct& us) -{ - std::string res; - res += us.protocol; - res += "://"; - if(!us.username.empty()) { - res += util::percentEncode(us.username); - if(us.hasPassword) { - res += ":"; - res += util::percentEncode(us.password); - } - res += "@"; - } - if(us.ipv6LiteralAddress) { - res += "["; - res += us.host; - res += "]"; - } else { - res += us.host; - } - uint16_t defPort; - if(us.protocol == "http") { - defPort = 80; - } else if(us.protocol == "https") { - defPort = 443; - } else { - defPort = 0; - } - if(us.port != 0 && defPort != us.port) { - char temp[10]; - snprintf(temp, sizeof(temp), ":%u", us.port); - res += temp; - } - res += us.dir; - if(us.dir.empty() || us.dir[us.dir.size()-1] != '/') { - res += "/"; - } - res += us.file; - res += us.query; - return res; -} - -std::string joinUri(const std::string& baseUri, const std::string& uri) -{ - UriStruct us; - if(parse(us, uri)) { - return uri; - } else { - UriStruct bus; - if(!parse(bus, baseUri)) { - return uri; - } - std::vector parts; - if(uri.empty() || uri[0] != '/') { - util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts), - '/'); - } - std::string::const_iterator qend; - for(qend = uri.begin(); qend != uri.end(); ++qend) { - if(*qend == '#') { - break; - } - } - std::string::const_iterator end; - for(end = uri.begin(); end != qend; ++end) { - if(*end == '?') { - break; - } - } - util::split(uri.begin(), end, std::back_inserter(parts), '/'); - bus.dir.clear(); - bus.file.clear(); - bus.query.clear(); - std::string res = construct(bus); - res += util::joinPath(parts.begin(), parts.end()); - if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') { - res += "/"; - } - res.append(end, qend); - return res; - } -} - -} // namespace uri - -} // namespace spdylay diff --git a/src/uri.h b/src/uri.h deleted file mode 100644 index 249e4acf..00000000 --- a/src/uri.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Spdylay - SPDY Library - * - * Copyright (c) 2012 Tatsuhiro Tsujikawa - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef URI_H -#define URI_H - -#include "spdylay_config.h" - -#include - -#include - -namespace spdylay { - -namespace uri { - -struct UriStruct { - std::string protocol; - std::string host; - uint16_t port; - std::string dir; - std::string file; - std::string query; - std::string username; - std::string password; - bool hasPassword; - bool ipv6LiteralAddress; - - UriStruct(); - UriStruct(const UriStruct& c); - ~UriStruct(); - - UriStruct& operator=(const UriStruct& c); - void swap(UriStruct& other); -}; - -void swap(UriStruct& lhs, UriStruct& rhs); - -// Splits URI uri into components and stores them into result. On -// success returns true. Otherwise returns false and result is -// undefined. -bool parse(UriStruct& result, const std::string& uri); - -std::string construct(const UriStruct& us); - -std::string joinUri(const std::string& baseUri, const std::string& uri); - -} // namespace uri - -} // namespace spdylay - -#endif // URI_H