Remove uri.{cc,h} and use http_parser_parse_url() instead

This commit is contained in:
Tatsuhiro Tsujikawa 2013-01-09 00:42:06 +09:00
parent 633e85246f
commit 6da492c4e8
6 changed files with 183 additions and 445 deletions

View File

@ -24,8 +24,9 @@
*/
#include "HtmlParser.h"
#include <libxml/uri.h>
#include "util.h"
#include "uri.h"
namespace spdylay {
@ -69,14 +70,24 @@ void start_element_func
if((util::strieq(rel_attr, "shortcut icon") ||
util::strieq(rel_attr, "stylesheet")) &&
href_attr) {
std::string uri = uri::joinUri(parser_data->base_uri, href_attr);
parser_data->links.push_back(uri);
xmlChar *u = xmlBuildURI(reinterpret_cast<const xmlChar*>(href_attr),
reinterpret_cast<const xmlChar*>
(parser_data->base_uri.c_str()));
if(u) {
parser_data->links.push_back(reinterpret_cast<char*>(u));
free(u);
}
}
} else if(util::strieq(reinterpret_cast<const char*>(name), "img")) {
const char *src_attr = get_attr(attrs, "src");
if(src_attr) {
std::string uri = uri::joinUri(parser_data->base_uri, src_attr);
parser_data->links.push_back(uri);
xmlChar *u = xmlBuildURI(reinterpret_cast<const xmlChar*>(src_attr),
reinterpret_cast<const xmlChar*>
(parser_data->base_uri.c_str()));
if(u) {
parser_data->links.push_back(reinterpret_cast<char*>(u));
free(u);
}
}
}
}

View File

@ -35,8 +35,8 @@ if HAVE_LIBEVENT_OPENSSL
bin_PROGRAMS += shrpx
endif # HAVE_LIBEVENT_OPENSSL
HELPER_OBJECTS = uri.cc util.cc spdylay_ssl.cc
HELPER_HFILES = uri.h util.h spdylay_ssl.h spdylay_config.h
HELPER_OBJECTS = util.cc spdylay_ssl.cc
HELPER_HFILES = util.h spdylay_ssl.h spdylay_config.h
EVENT_OBJECTS =
EVENT_HFILES = EventPoll.h EventPollEvent.h
@ -62,7 +62,8 @@ SPDY_SERVER_OBJECTS = SpdyServer.cc
SPDY_SERVER_HFILES = SpdyServer.h
spdycat_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} spdycat.cc \
${HTML_PARSER_OBJECTS} ${HTML_PARSER_HFILES}
${HTML_PARSER_OBJECTS} ${HTML_PARSER_HFILES} \
http-parser/http_parser.c http-parser/http_parser.h
spdyd_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} \
${EVENT_OBJECTS} ${EVENT_HFILES} \

View File

@ -40,7 +40,6 @@
#include <zlib.h>
#include "spdylay_ssl.h"
#include "uri.h"
#include "util.h"
#include "EventPoll.h"

View File

@ -54,8 +54,9 @@
#include <openssl/err.h>
#include <spdylay/spdylay.h>
#include "http-parser/http_parser.h"
#include "spdylay_ssl.h"
#include "uri.h"
#include "HtmlParser.h"
#include "util.h"
@ -101,16 +102,112 @@ void record_time(timeval *tv)
gettimeofday(tv, 0);
}
bool has_uri_field(const http_parser_url &u, http_parser_url_fields field)
{
return u.field_set & (1 << field);
}
bool fieldeq(const char *uri1, const http_parser_url &u1,
const char *uri2, const http_parser_url &u2,
http_parser_url_fields field)
{
if(!has_uri_field(u1, field)) {
if(!has_uri_field(u2, field)) {
return true;
} else {
return false;
}
} else if(!has_uri_field(u2, field)) {
return false;
}
if(u1.field_data[field].len != u2.field_data[field].len) {
return false;
}
return memcmp(uri1+u1.field_data[field].off,
uri2+u2.field_data[field].off,
u1.field_data[field].len) == 0;
}
bool fieldeq(const char *uri, const http_parser_url &u,
http_parser_url_fields field,
const char *t)
{
if(!has_uri_field(u, field)) {
if(!t[0]) {
return true;
} else {
return false;
}
} else if(!t[0]) {
return false;
}
int i, len = u.field_data[field].len;
const char *p = uri+u.field_data[field].off;
for(i = 0; i < len && t[i] && p[i] == t[i]; ++i);
return i == len && !t[i];
}
uint16_t get_default_port(const char *uri, const http_parser_url &u)
{
if(fieldeq(uri, u, UF_SCHEMA, "https")) {
return 443;
} else if(fieldeq(uri, u, UF_SCHEMA, "http")) {
return 80;
} else {
return 443;
}
}
std::string get_uri_field(const char *uri, const http_parser_url &u,
http_parser_url_fields field)
{
if(has_uri_field(u, field)) {
return std::string(uri+u.field_data[field].off,
u.field_data[field].len);
} else {
return "";
}
}
bool porteq(const char *uri1, const http_parser_url &u1,
const char *uri2, const http_parser_url &u2)
{
uint16_t port1, port2;
port1 = has_uri_field(u1, UF_PORT) ? u1.port : get_default_port(uri1, u1);
port2 = has_uri_field(u2, UF_PORT) ? u2.port : get_default_port(uri2, u2);
return port1 == port2;
}
void write_uri_field(std::ostream& o,
const char *uri, const http_parser_url &u,
http_parser_url_fields field)
{
if(has_uri_field(u, field)) {
o.write(uri+u.field_data[field].off, u.field_data[field].len);
}
}
std::string strip_fragment(const char *raw_uri)
{
const char *end;
for(end = raw_uri; *end && *end != '#'; ++end);
size_t len = end-raw_uri;
return std::string(raw_uri, len);
}
struct Request {
uri::UriStruct us;
// URI without fragment
std::string uri;
http_parser_url u;
spdylay_gzip *inflater;
HtmlParser *html_parser;
// Recursion level: 0: first entity, 1: entity linked from first entity
int level;
RequestStat stat;
std::string status;
Request(const uri::UriStruct& us, int level = 0)
: us(us), inflater(0), html_parser(0), level(level)
Request(const std::string& uri, const http_parser_url &u, int level = 0)
: uri(uri), u(u),
inflater(0), html_parser(0), level(level)
{}
~Request()
@ -128,7 +225,7 @@ struct Request {
void init_html_parser()
{
html_parser = new HtmlParser(uri::construct(us));
html_parser = new HtmlParser(uri);
}
int update_html_parser(const uint8_t *data, size_t len, int fin)
@ -142,6 +239,28 @@ struct Request {
return rv;
}
std::string make_reqpath() const
{
std::string path = has_uri_field(u, UF_PATH) ?
get_uri_field(uri.c_str(), u, UF_PATH) : "/";
if(has_uri_field(u, UF_QUERY)) {
path += "?";
path.append(uri.c_str()+u.field_data[UF_QUERY].off,
u.field_data[UF_QUERY].len);
}
return path;
}
bool is_ipv6_literal_addr() const
{
if(has_uri_field(u, UF_HOST)) {
return memchr(uri.c_str()+u.field_data[UF_HOST].off, ':',
u.field_data[UF_HOST].len);
} else {
return false;
}
}
void record_syn_stream_time()
{
record_time(&stat.on_syn_stream_time);
@ -196,24 +315,28 @@ struct SpdySession {
return;
}
std::stringstream ss;
if(reqvec[0]->us.ipv6LiteralAddress) {
ss << "[" << reqvec[0]->us.host << "]";
if(reqvec[0]->is_ipv6_literal_addr()) {
ss << "[";
write_uri_field(ss, reqvec[0]->uri.c_str(), reqvec[0]->u, UF_HOST);
ss << "]";
} else {
ss << reqvec[0]->us.host;
write_uri_field(ss, reqvec[0]->uri.c_str(), reqvec[0]->u, UF_HOST);
}
if(reqvec[0]->us.port != 443) {
ss << ":" << reqvec[0]->us.port;
if(has_uri_field(reqvec[0]->u, UF_PORT) &&
reqvec[0]->u.port != get_default_port(reqvec[0]->uri.c_str(),
reqvec[0]->u)) {
ss << ":" << reqvec[0]->u.port;
}
hostport = ss.str();
}
bool add_request(const uri::UriStruct& us, int level = 0)
bool add_request(const std::string& uri, const http_parser_url& u,
int level = 0)
{
std::string key = us.dir+us.file+us.query;
if(path_cache.count(key)) {
if(path_cache.count(uri)) {
return false;
} else {
path_cache.insert(key);
reqvec.push_back(new Request(us, level));
path_cache.insert(uri);
reqvec.push_back(new Request(uri, u, level));
return true;
}
}
@ -230,9 +353,9 @@ void submit_request(Spdylay& sc, const std::string& hostport,
const std::map<std::string,std::string> &headers,
Request* req)
{
uri::UriStruct& us = req->us;
std::string path = us.dir+us.file+us.query;
int r = sc.submit_request(us.protocol, hostport, path, headers, 3, req);
std::string path = req->make_reqpath();
int r = sc.submit_request(get_uri_field(req->uri.c_str(), req->u, UF_SCHEMA),
hostport, path, headers, 3, req);
assert(r == 0);
}
@ -245,12 +368,14 @@ void update_html_parser(SpdySession *spdySession, Request *req,
req->update_html_parser(data, len, fin);
for(size_t i = 0; i < req->html_parser->get_links().size(); ++i) {
const std::string& uri = req->html_parser->get_links()[i];
uri::UriStruct us;
if(uri::parse(us, uri) &&
req->us.protocol == us.protocol && req->us.host == us.host &&
req->us.port == us.port) {
spdySession->add_request(us, req->level+1);
const std::string& raw_uri = req->html_parser->get_links()[i];
std::string uri = strip_fragment(raw_uri.c_str());
http_parser_url u;
if(http_parser_parse_url(uri.c_str(), uri.size(), 0, &u) == 0 &&
fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_SCHEMA) &&
fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_HOST) &&
porteq(uri.c_str(), u, req->uri.c_str(), req->u)) {
spdySession->add_request(uri, u, req->level+1);
submit_request(*spdySession->sc, spdySession->hostport, config.headers,
spdySession->reqvec.back());
}
@ -403,7 +528,7 @@ void print_stats(const SpdySession& spdySession)
std::cout << "***** Statistics *****" << std::endl;
for(size_t i = 0; i < spdySession.reqvec.size(); ++i) {
const Request *req = spdySession.reqvec[i];
std::cout << "#" << i+1 << ": " << uri::construct(req->us) << std::endl;
std::cout << "#" << i+1 << ": " << req->uri << std::endl;
std::cout << " Status: " << req->status << std::endl;
std::cout << " Delta (ms) from SSL/TLS handshake(SYN_STREAM):"
<< std::endl;
@ -439,7 +564,8 @@ int communicate(const std::string& host, uint16_t port,
int timeout = config.timeout;
int fd = nonblock_connect_to(host, port, timeout);
if(fd == -1) {
std::cerr << "Could not connect to the host" << std::endl;
std::cerr << "Could not connect to the host: " << spdySession.hostport
<< std::endl;
return -1;
} else if(fd == -2) {
std::cerr << "Request to " << spdySession.hostport << " timed out "
@ -614,9 +740,14 @@ int run(char **uris, int n)
int failures = 0;
SpdySession spdySession;
for(int i = 0; i < n; ++i) {
uri::UriStruct us;
if(uri::parse(us, uris[i])) {
if(prev_host != us.host || prev_port != us.port) {
http_parser_url u;
std::string uri = strip_fragment(uris[i]);
if(http_parser_parse_url(uri.c_str(), uri.size(), 0, &u) == 0 &&
has_uri_field(u, UF_SCHEMA)) {
uint16_t port = has_uri_field(u, UF_PORT) ?
u.port : get_default_port(uri.c_str(), u);
if(!fieldeq(uri.c_str(), u, UF_HOST, prev_host.c_str()) ||
u.port != prev_port) {
if(!spdySession.reqvec.empty()) {
spdySession.update_hostport();
if (communicate(prev_host, prev_port, spdySession, &callbacks) != 0) {
@ -624,10 +755,10 @@ int run(char **uris, int n)
}
spdySession = SpdySession();
}
prev_host = us.host;
prev_port = us.port;
prev_host = get_uri_field(uri.c_str(), u, UF_HOST);
prev_port = port;
}
spdySession.add_request(us);
spdySession.add_request(uri, u);
}
}
if(!spdySession.reqvec.empty()) {

View File

@ -1,331 +0,0 @@
/*
* Spdylay - SPDY Library
*
* Copyright (c) 2012 Tatsuhiro Tsujikawa
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "uri.h"
#include <cerrno>
#include <cstdio>
#include <vector>
#include "util.h"
namespace spdylay {
namespace uri {
UriStruct::UriStruct()
: port(0), hasPassword(false), ipv6LiteralAddress(false)
{}
UriStruct::UriStruct(const UriStruct& c)
: protocol(c.protocol),
host(c.host),
port(c.port),
dir(c.dir),
file(c.file),
query(c.query),
username(c.username),
password(c.password),
hasPassword(c.hasPassword),
ipv6LiteralAddress(c.ipv6LiteralAddress)
{}
UriStruct::~UriStruct() {}
UriStruct& UriStruct::operator=(const UriStruct& c)
{
if(this != &c) {
protocol = c.protocol;
host = c.host;
port = c.port;
dir = c.dir;
file = c.file;
query = c.query;
username = c.username;
password = c.password;
hasPassword = c.hasPassword;
ipv6LiteralAddress = c.ipv6LiteralAddress;
}
return *this;
}
void UriStruct::swap(UriStruct& other)
{
using std::swap;
if(this != &other) {
swap(protocol, other.protocol);
swap(host, other.host);
swap(port, other.port);
swap(dir, other.dir);
swap(file, other.file);
swap(query, other.query);
swap(username, other.username);
swap(password, other.password);
swap(hasPassword, other.hasPassword);
swap(ipv6LiteralAddress, other.ipv6LiteralAddress);
}
}
void swap(UriStruct& lhs, UriStruct& rhs)
{
lhs.swap(rhs);
}
bool parse(UriStruct& result, const std::string& uri)
{
// http://user:password@aria2.sourceforge.net:80/dir/file?query#fragment
// | || || | | | |
// | || hostLast| | | | |
// | || portFirst| | | |
// authorityFirst || authorityLast | | |
// || | | | |
// userInfoLast | | | |
// | | | | |
// hostPortFirst | | | |
// | | | |
// dirFirst dirLast| |
// | |
// queryFirst fragmentFirst
// find fragment part
std::string::const_iterator fragmentFirst = uri.begin();
for(; fragmentFirst != uri.end(); ++fragmentFirst) {
if(*fragmentFirst == '#') break;
}
// find query part
std::string::const_iterator queryFirst = uri.begin();
for(; queryFirst != fragmentFirst; ++queryFirst) {
if(*queryFirst == '?') break;
}
result.query.assign(queryFirst, fragmentFirst);
// find protocol
std::string::size_type protocolOffset = uri.find("://");
if(protocolOffset == std::string::npos) return false;
result.protocol.assign(uri.begin(), uri.begin()+protocolOffset);
uint16_t defPort;
if(result.protocol == "http") {
defPort = 80;
} else if(result.protocol == "https") {
defPort = 443;
} else {
return false;
}
// find authority
std::string::const_iterator authorityFirst = uri.begin()+protocolOffset+3;
std::string::const_iterator authorityLast = authorityFirst;
for(; authorityLast != queryFirst; ++authorityLast) {
if(*authorityLast == '/') break;
}
if(authorityFirst == authorityLast) {
// No authority found
return false;
}
// find userinfo(username and password) in authority if they exist
result.username = "";
result.password = "";
result.hasPassword = false;
std::string::const_iterator userInfoLast = authorityLast;
std::string::const_iterator hostPortFirst = authorityFirst;
for(; userInfoLast != authorityFirst-1; --userInfoLast) {
if(*userInfoLast == '@') {
hostPortFirst = userInfoLast;
++hostPortFirst;
std::string::const_iterator userLast = authorityFirst;
for(; userLast != userInfoLast; ++userLast) {
if(*userLast == ':') {
result.password =
util::percentDecode(userLast+1,userInfoLast);
result.hasPassword = true;
break;
}
}
result.username =
util::percentDecode(authorityFirst, userLast);
break;
}
}
std::string::const_iterator hostLast = hostPortFirst;
std::string::const_iterator portFirst = authorityLast;
result.ipv6LiteralAddress = false;
if(*hostPortFirst == '[') {
// Detected IPv6 literal address in square brackets
for(; hostLast != authorityLast; ++hostLast) {
if(*hostLast == ']') {
++hostLast;
if(hostLast == authorityLast) {
result.ipv6LiteralAddress = true;
} else {
if(*hostLast == ':') {
portFirst = hostLast;
++portFirst;
result.ipv6LiteralAddress = true;
}
}
break;
}
}
if(!result.ipv6LiteralAddress) {
return false;
}
} else {
for(; hostLast != authorityLast; ++hostLast) {
if(*hostLast == ':') {
portFirst = hostLast;
++portFirst;
break;
}
}
}
if(hostPortFirst == hostLast) {
// No host
return false;
}
if(portFirst == authorityLast) {
// If port is not specified, then we set it to default port of
// its protocol..
result.port = defPort;
} else {
errno = 0;
uint32_t tempPort = strtol(std::string(portFirst, authorityLast).c_str(),
0, 10);
if(errno != 0) {
return false;
} else if(65535 < tempPort) {
return false;
}
result.port = tempPort;
}
if(result.ipv6LiteralAddress) {
result.host.assign(hostPortFirst+1, hostLast-1);
} else {
result.host.assign(hostPortFirst, hostLast);
}
// find directory and file part
std::string::const_iterator dirLast = authorityLast;
for(std::string::const_iterator i = authorityLast;
i != queryFirst; ++i) {
if(*i == '/') {
dirLast = i+1;
}
}
if(dirLast == queryFirst) {
result.file = "";
} else {
result.file.assign(dirLast, queryFirst);
}
// dirFirst == authorityLast
if(authorityLast == dirLast) {
result.dir = "/";
} else {
result.dir.assign(authorityLast, dirLast);
}
return true;
}
std::string construct(const UriStruct& us)
{
std::string res;
res += us.protocol;
res += "://";
if(!us.username.empty()) {
res += util::percentEncode(us.username);
if(us.hasPassword) {
res += ":";
res += util::percentEncode(us.password);
}
res += "@";
}
if(us.ipv6LiteralAddress) {
res += "[";
res += us.host;
res += "]";
} else {
res += us.host;
}
uint16_t defPort;
if(us.protocol == "http") {
defPort = 80;
} else if(us.protocol == "https") {
defPort = 443;
} else {
defPort = 0;
}
if(us.port != 0 && defPort != us.port) {
char temp[10];
snprintf(temp, sizeof(temp), ":%u", us.port);
res += temp;
}
res += us.dir;
if(us.dir.empty() || us.dir[us.dir.size()-1] != '/') {
res += "/";
}
res += us.file;
res += us.query;
return res;
}
std::string joinUri(const std::string& baseUri, const std::string& uri)
{
UriStruct us;
if(parse(us, uri)) {
return uri;
} else {
UriStruct bus;
if(!parse(bus, baseUri)) {
return uri;
}
std::vector<std::string> parts;
if(uri.empty() || uri[0] != '/') {
util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts),
'/');
}
std::string::const_iterator qend;
for(qend = uri.begin(); qend != uri.end(); ++qend) {
if(*qend == '#') {
break;
}
}
std::string::const_iterator end;
for(end = uri.begin(); end != qend; ++end) {
if(*end == '?') {
break;
}
}
util::split(uri.begin(), end, std::back_inserter(parts), '/');
bus.dir.clear();
bus.file.clear();
bus.query.clear();
std::string res = construct(bus);
res += util::joinPath(parts.begin(), parts.end());
if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') {
res += "/";
}
res.append(end, qend);
return res;
}
}
} // namespace uri
} // namespace spdylay

View File

@ -1,73 +0,0 @@
/*
* Spdylay - SPDY Library
*
* Copyright (c) 2012 Tatsuhiro Tsujikawa
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef URI_H
#define URI_H
#include "spdylay_config.h"
#include <stdint.h>
#include <string>
namespace spdylay {
namespace uri {
struct UriStruct {
std::string protocol;
std::string host;
uint16_t port;
std::string dir;
std::string file;
std::string query;
std::string username;
std::string password;
bool hasPassword;
bool ipv6LiteralAddress;
UriStruct();
UriStruct(const UriStruct& c);
~UriStruct();
UriStruct& operator=(const UriStruct& c);
void swap(UriStruct& other);
};
void swap(UriStruct& lhs, UriStruct& rhs);
// Splits URI uri into components and stores them into result. On
// success returns true. Otherwise returns false and result is
// undefined.
bool parse(UriStruct& result, const std::string& uri);
std::string construct(const UriStruct& us);
std::string joinUri(const std::string& baseUri, const std::string& uri);
} // namespace uri
} // namespace spdylay
#endif // URI_H