diff --git a/configure.ac b/configure.ac index 07dccfee..72a5462e 100644 --- a/configure.ac +++ b/configure.ac @@ -95,6 +95,13 @@ if test "x${have_openssl}" = "xno"; then AC_MSG_NOTICE([The example programs will not be built.]) fi +# libxml2 (for examples/spdycat) +AM_PATH_XML2(2.7.7, [have_libxml2=yes]) +if test "x${have_libxml2}" = "xyes"; then + AC_DEFINE([HAVE_LIBXML2], [1], [Define to 1 if you have `libxml2` library.]) +fi +AM_CONDITIONAL([HAVE_LIBXML2], [ test "x${have_libxml2}" = "xyes" ]) + # The example programs depend on OpenSSL enable_examples=$have_openssl AM_CONDITIONAL([ENABLE_EXAMPLES], [ test "x${enable_examples}" = "xyes" ]) diff --git a/examples/Makefile.am b/examples/Makefile.am index e89c4693..392c8193 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -25,8 +25,8 @@ if ENABLE_EXAMPLES AM_CFLAGS = -Wall AM_CPPFLAGS = -Wall -I$(srcdir)/../lib/includes -I$(builddir)/../lib/includes \ - @OPENSSL_CFLAGS@ @DEFS@ -AM_LDFLAGS = @OPENSSL_LIBS@ + @OPENSSL_CFLAGS@ @XML_CPPFLAGS@ @DEFS@ +AM_LDFLAGS = @OPENSSL_LIBS@ @XML_LIBS@ LDADD = $(top_builddir)/lib/libspdylay.la bin_PROGRAMS = spdycat spdyd @@ -47,10 +47,18 @@ EVENT_OBJECTS += EventPoll_kqueue.cc EVENT_HFILES += EventPoll_kqueue.h endif # HAVE_KQUEUE +HTML_PARSER_OBJECTS = +HTML_PARSER_HFILES = HtmlParser.h + +if HAVE_LIBXML2 +HTML_PARSER_OBJECTS += HtmlParser.cc +endif # HAVE_LIBXML2 + SPDY_SERVER_OBJECTS = SpdyServer.cc SPDY_SERVER_HFILES = SpdyServer.h -spdycat_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} spdycat.cc +spdycat_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} spdycat.cc \ + ${HTML_PARSER_OBJECTS} ${HTML_PARSER_HFILES} spdyd_SOURCES = ${HELPER_OBJECTS} ${HELPER_HFILES} \ ${EVENT_OBJECTS} ${EVENT_HFILES} \ diff --git a/examples/spdycat.cc b/examples/spdycat.cc index 28e60d51..01349a7a 100644 --- a/examples/spdycat.cc +++ b/examples/spdycat.cc @@ -22,6 +22,8 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include #include @@ -54,6 +56,8 @@ #include "spdylay_ssl.h" #include "uri.h" +#include "HtmlParser.h" +#include "util.h" namespace spdylay { @@ -61,22 +65,31 @@ struct Config { bool null_out; bool remote_name; bool verbose; + bool get_assets; int spdy_version; int timeout; std::string certfile; std::string keyfile; int window_bits; Config():null_out(false), remote_name(false), verbose(false), - spdy_version(-1), timeout(-1), window_bits(-1) {} + get_assets(false), spdy_version(-1), timeout(-1), window_bits(-1) + {} }; struct Request { uri::UriStruct us; spdylay_gzip *inflater; - Request(const uri::UriStruct& us):us(us), inflater(0) {} + HtmlParser *html_parser; + // Recursion level: 0: first entity, 1: entity linked from first entity + int level; + Request(const uri::UriStruct& us, int level = 0) + : us(us), inflater(0), html_parser(0), level(level) + {} + ~Request() { spdylay_gzip_inflate_del(inflater); + delete html_parser; } void init_inflater() @@ -85,20 +98,122 @@ struct Request { rv = spdylay_gzip_inflate_new(&inflater); assert(rv == 0); } + + void init_html_parser() + { + html_parser = new HtmlParser(uri::construct(us)); + } + + int update_html_parser(const uint8_t *data, size_t len, int fin) + { + if(!html_parser) { + return 0; + } + int rv; + rv = html_parser->parse_chunk(reinterpret_cast(data), len, + fin); + return rv; + } }; -std::map stream2req; -size_t numreq, complete; +struct SpdySession { + std::vector reqvec; + // Map from stream ID to Request object. + std::map streams; + // Insert path already added in reqvec to prevent multiple request + // for 1 resource. + std::set path_cache; + // The number of completed requests, including failed ones. + size_t complete; + std::string hostport; + Spdylay *sc; + SpdySession():complete(0) {} + ~SpdySession() + { + for(size_t i = 0; i < reqvec.size(); ++i) { + delete reqvec[i]; + } + } + bool all_requests_processed() const + { + return complete == reqvec.size(); + } + void update_hostport() + { + if(reqvec.empty()) { + return; + } + std::stringstream ss; + if(reqvec[0]->us.ipv6LiteralAddress) { + ss << "[" << reqvec[0]->us.host << "]"; + } else { + ss << reqvec[0]->us.host; + } + if(reqvec[0]->us.port != 443) { + ss << ":" << reqvec[0]->us.port; + } + hostport = ss.str(); + } + bool add_request(const uri::UriStruct& us, int level = 0) + { + std::string key = us.dir+us.file+us.query; + if(path_cache.count(key)) { + return false; + } else { + path_cache.insert(key); + reqvec.push_back(new Request(us, level)); + return true; + } + } +}; Config config; extern bool ssl_debug; +void submit_request(Spdylay& sc, const std::string& hostport, Request* req) +{ + uri::UriStruct& us = req->us; + std::string path = us.dir+us.file+us.query; + int r = sc.submit_request(hostport, path, 3, req); + assert(r == 0); +} + +void update_html_parser(SpdySession *spdySession, Request *req, + const uint8_t *data, size_t len, int fin) +{ + if(!req->html_parser) { + return; + } + req->update_html_parser(data, len, fin); + + for(size_t i = 0; i < req->html_parser->get_links().size(); ++i) { + const std::string& uri = req->html_parser->get_links()[i]; + uri::UriStruct us; + if(uri::parse(us, uri) && + req->us.protocol == us.protocol && req->us.host == us.host && + req->us.port == us.port) { + spdySession->add_request(us, req->level+1); + submit_request(*spdySession->sc, spdySession->hostport, + spdySession->reqvec.back()); + } + } + req->html_parser->clear_links(); +} + +SpdySession* get_session(void *user_data) +{ + return reinterpret_cast + (reinterpret_cast(user_data)->user_data()); +} + void on_data_chunk_recv_callback (spdylay_session *session, uint8_t flags, int32_t stream_id, const uint8_t *data, size_t len, void *user_data) { - std::map::iterator itr = stream2req.find(stream_id); - if(itr != stream2req.end()) { + SpdySession *spdySession = get_session(user_data); + std::map::iterator itr = + spdySession->streams.find(stream_id); + if(itr != spdySession->streams.end()) { Request *req = (*itr).second; if(req->inflater) { while(len > 0) { @@ -111,23 +226,31 @@ void on_data_chunk_recv_callback spdylay_submit_rst_stream(session, stream_id, SPDYLAY_INTERNAL_ERROR); break; } - std::cout.write(reinterpret_cast(out), outlen); + if(!config.null_out) { + std::cout.write(reinterpret_cast(out), outlen); + } + update_html_parser(spdySession, req, out, outlen, 0); data += tlen; len -= tlen; } } else { - std::cout.write(reinterpret_cast(data), len); + if(!config.null_out) { + std::cout.write(reinterpret_cast(data), len); + } + update_html_parser(spdySession, req, data, len, 0); } } } void check_stream_id(spdylay_session *session, - spdylay_frame_type type, spdylay_frame *frame) + spdylay_frame_type type, spdylay_frame *frame, + void *user_data) { + SpdySession *spdySession = get_session(user_data); int32_t stream_id = frame->syn_stream.stream_id; Request *req = (Request*)spdylay_session_get_stream_user_data(session, stream_id); - stream2req[stream_id] = req; + spdySession->streams[stream_id] = req; } void on_ctrl_send_callback2 @@ -135,7 +258,7 @@ void on_ctrl_send_callback2 void *user_data) { if(type == SPDYLAY_SYN_STREAM) { - check_stream_id(session, type, frame); + check_stream_id(session, type, frame, user_data); } } @@ -144,12 +267,12 @@ void on_ctrl_send_callback3 void *user_data) { if(type == SPDYLAY_SYN_STREAM) { - check_stream_id(session, type, frame); + check_stream_id(session, type, frame, user_data); } on_ctrl_send_callback(session, type, frame, user_data); } -void check_gzip +void check_response_header (spdylay_session *session, spdylay_frame_type type, spdylay_frame *frame, void *user_data) { @@ -164,21 +287,24 @@ void check_gzip } else { return; } + Request *req = (Request*)spdylay_session_get_stream_user_data(session, + stream_id); + assert(req); bool gzip = false; for(size_t i = 0; nv[i]; i += 2) { if(strcmp("content-encoding", nv[i]) == 0) { - gzip = strcmp("gzip", nv[i+1]) == 0; - break; + gzip = util::strieq("gzip", nv[i+1]) || util::strieq("deflate", nv[i+1]); } } if(gzip) { - Request *req = (Request*)spdylay_session_get_stream_user_data(session, - stream_id); - assert(req); - if(req->inflater) { - return; + if(!req->inflater) { + req->init_inflater(); + } + } + if(config.get_assets && req->level == 0) { + if(!req->html_parser) { + req->init_html_parser(); } - req->init_inflater(); } } @@ -186,14 +312,14 @@ void on_ctrl_recv_callback2 (spdylay_session *session, spdylay_frame_type type, spdylay_frame *frame, void *user_data) { - check_gzip(session, type, frame, user_data); + check_response_header(session, type, frame, user_data); } void on_ctrl_recv_callback3 (spdylay_session *session, spdylay_frame_type type, spdylay_frame *frame, void *user_data) { - check_gzip(session, type, frame, user_data); + check_response_header(session, type, frame, user_data); on_ctrl_recv_callback(session, type, frame, user_data); } @@ -201,23 +327,22 @@ void on_stream_close_callback (spdylay_session *session, int32_t stream_id, spdylay_status_code status_code, void *user_data) { - std::map::iterator itr = stream2req.find(stream_id); - if(itr != stream2req.end()) { - ++complete; - if(complete == numreq) { + SpdySession *spdySession = get_session(user_data); + std::map::iterator itr = + spdySession->streams.find(stream_id); + if(itr != spdySession->streams.end()) { + update_html_parser(spdySession, (*itr).second, 0, 0, 1); + ++spdySession->complete; + if(spdySession->all_requests_processed()) { spdylay_submit_goaway(session, SPDYLAY_GOAWAY_OK); } - stream2req.erase(itr); } } int communicate(const std::string& host, uint16_t port, - std::vector& reqvec, + SpdySession& spdySession, const spdylay_session_callbacks *callbacks) { - numreq = reqvec.size(); - complete = 0; - stream2req.clear(); int fd = connect_to(host, port); if(fd == -1) { std::cerr << "Could not connect to the host" << std::endl; @@ -269,23 +394,12 @@ int communicate(const std::string& host, uint16_t port, if (spdy_version <= 0) { return -1; } - Spdylay sc(fd, ssl, spdy_version, callbacks); + Spdylay sc(fd, ssl, spdy_version, callbacks, &spdySession); + spdySession.sc = ≻ nfds_t npollfds = 1; pollfd pollfds[1]; - std::stringstream ss; - if(reqvec[0].us.ipv6LiteralAddress) { - ss << "["; - } - ss << host; - if(reqvec[0].us.ipv6LiteralAddress) { - ss << "]"; - } - if(port != 443) { - ss << ":" << port; - } - std::string hostport = ss.str(); if(spdy_version >= SPDYLAY_PROTO_SPDY3 && config.window_bits != -1) { spdylay_settings_entry iv[1]; iv[0].settings_id = SPDYLAY_SETTINGS_INITIAL_WINDOW_SIZE; @@ -294,11 +408,8 @@ int communicate(const std::string& host, uint16_t port, int rv = sc.submit_settings(SPDYLAY_FLAG_SETTINGS_NONE, iv, 1); assert(rv == 0); } - for(int i = 0, n = reqvec.size(); i < n; ++i) { - uri::UriStruct& us = reqvec[i].us; - std::string path = us.dir+us.file+us.query; - int r = sc.submit_request(hostport, path, 3, &reqvec[i]); - assert(r == 0); + for(int i = 0, n = spdySession.reqvec.size(); i < n; ++i) { + submit_request(sc, spdySession.hostport, spdySession.reqvec[i]); } pollfds[0].fd = fd; ctl_poll(pollfds, &sc); @@ -315,8 +426,10 @@ int communicate(const std::string& host, uint16_t port, if(pollfds[0].revents & (POLLIN | POLLOUT)) { int rv; if((rv = sc.recv()) != 0 || (rv = sc.send()) != 0) { - if(rv != SPDYLAY_ERR_EOF || complete != numreq) { + if(rv != SPDYLAY_ERR_EOF || !spdySession.all_requests_processed()) { std::cout << "Fatal: " << spdylay_strerror(rv) << std::endl; + std::cout << "reqnum=" << spdySession.reqvec.size() + << ", completed=" << spdySession.complete << std::endl; } ok = false; break; @@ -329,14 +442,18 @@ int communicate(const std::string& host, uint16_t port, } timeout = timeout == -1 ? timeout : end_time - time(NULL); if (config.timeout != -1 && timeout <= 0) { - std::cout << "Requests to " << hostport << "timed out."; + std::cout << "Requests to " << spdySession.hostport << "timed out."; ok = false; break; } assert(ok); ctl_poll(pollfds, &sc); } - + if(!spdySession.all_requests_processed()) { + std::cout << "Some requests were not processed. total=" + << spdySession.reqvec.size() + << ", processed=" << spdySession.complete << std::endl; + } SSL_shutdown(ssl); SSL_free(ssl); SSL_CTX_free(ssl_ctx); @@ -364,32 +481,32 @@ int run(char **uris, int n) callbacks.on_ctrl_recv_callback = on_ctrl_recv_callback2; callbacks.on_ctrl_send_callback = on_ctrl_send_callback2; } - if(!config.null_out) { - callbacks.on_data_chunk_recv_callback = on_data_chunk_recv_callback; - } + callbacks.on_data_chunk_recv_callback = on_data_chunk_recv_callback; ssl_debug = config.verbose; - std::vector reqvec; std::string prev_host; uint16_t prev_port = 0; int failures = 0; + SpdySession spdySession; for(int i = 0; i < n; ++i) { uri::UriStruct us; if(uri::parse(us, uris[i])) { if(prev_host != us.host || prev_port != us.port) { - if(!reqvec.empty()) { - if (communicate(prev_host, prev_port, reqvec, &callbacks) != 0) { + if(!spdySession.reqvec.empty()) { + spdySession.update_hostport(); + if (communicate(prev_host, prev_port, spdySession, &callbacks) != 0) { ++failures; } - reqvec.clear(); + spdySession = SpdySession(); } prev_host = us.host; prev_port = us.port; } - reqvec.push_back(Request(us)); + spdySession.add_request(us); } } - if(!reqvec.empty()) { - if (communicate(prev_host, prev_port, reqvec, &callbacks) != 0) { + if(!spdySession.reqvec.empty()) { + spdySession.update_hostport(); + if (communicate(prev_host, prev_port, spdySession, &callbacks) != 0) { ++failures; } } @@ -398,7 +515,7 @@ int run(char **uris, int n) void print_usage(std::ostream& out) { - out << "Usage: spdycat [-Onv23] [-t ] [-w ] [--cert=]\n" + out << "Usage: spdycat [-Oanv23] [-t ] [-w ] [--cert=]\n" << " [--key=] ..." << std::endl; } @@ -420,6 +537,11 @@ void print_help(std::ostream& out) << " -t, --timeout= Timeout each request after seconds.\n" << " -w, --window-bits=\n" << " Sets the initial window size to 2**.\n" + << " -a, --get-assets Download assets such as stylesheets, images\n" + << " and script files linked from the downloaded\n" + << " resource. Only links whose origins are the\n" + << " same with the linking resource will be\n" + << " downloaded.\n" << " --cert= Use the specified client certificate file.\n" << " The file must be in PEM format.\n" << " --key= Use the client private key file. The file\n" @@ -439,13 +561,14 @@ int main(int argc, char **argv) {"spdy3", no_argument, 0, '3' }, {"timeout", required_argument, 0, 't' }, {"window-bits", required_argument, 0, 'w' }, + {"get-assets", no_argument, 0, 'a' }, {"cert", required_argument, &flag, 1 }, {"key", required_argument, &flag, 2 }, {"help", no_argument, 0, 'h' }, {0, 0, 0, 0 } }; int option_index = 0; - int c = getopt_long(argc, argv, "Onhv23t:w:", long_options, &option_index); + int c = getopt_long(argc, argv, "Oanhv23t:w:", long_options, &option_index); if(c == -1) { break; } @@ -483,6 +606,15 @@ int main(int argc, char **argv) } break; } + case 'a': +#ifdef HAVE_LIBXML2 + config.get_assets = true; +#else // !HAVE_LIBXML2 + std::cerr << "Warning: -a, --get-assets option cannot be used because\n" + << "the binary was not compiled with libxml2." + << std::endl; +#endif // !HAVE_LIBXML2 + break; case '?': exit(EXIT_FAILURE); case 0: diff --git a/examples/spdylay_ssl.cc b/examples/spdylay_ssl.cc index 0014103d..38958234 100644 --- a/examples/spdylay_ssl.cc +++ b/examples/spdylay_ssl.cc @@ -49,8 +49,10 @@ namespace spdylay { bool ssl_debug = false; Spdylay::Spdylay(int fd, SSL *ssl, uint16_t version, - const spdylay_session_callbacks *callbacks) - : fd_(fd), ssl_(ssl), version_(version), want_write_(false) + const spdylay_session_callbacks *callbacks, + void *user_data) + : fd_(fd), ssl_(ssl), version_(version), user_data_(user_data), + want_write_(false) { int r = spdylay_session_client_new(&session_, version_, callbacks, this); assert(r == 0); @@ -114,6 +116,11 @@ int Spdylay::fd() const return fd_; } +void* Spdylay::user_data() +{ + return user_data_; +} + int Spdylay::submit_request(const std::string& hostport, const std::string& path, uint8_t pri, void *stream_user_data) diff --git a/examples/spdylay_ssl.h b/examples/spdylay_ssl.h index b7fc288a..03e3f5ad 100644 --- a/examples/spdylay_ssl.h +++ b/examples/spdylay_ssl.h @@ -41,7 +41,8 @@ extern bool ssl_debug; class Spdylay { public: Spdylay(int fd, SSL *ssl, uint16_t version, - const spdylay_session_callbacks *callbacks); + const spdylay_session_callbacks *callbacks, + void *user_data); ~Spdylay(); int recv(); int send(); @@ -55,11 +56,13 @@ public: uint8_t pri, void *stream_user_data); int submit_settings(int flags, spdylay_settings_entry *iv, size_t niv); bool would_block(int r); + void* user_data(); private: int fd_; SSL *ssl_; uint16_t version_; spdylay_session *session_; + void *user_data_; bool want_write_; bool debug_; }; diff --git a/examples/util.cc b/examples/util.cc index a2cda706..ec22dc8f 100644 --- a/examples/util.cc +++ b/examples/util.cc @@ -119,10 +119,55 @@ time_t parse_http_date(const std::string& s) return timegm(&tm); } +namespace { +char lowcase(char c) +{ + if('A' <= c && c <= 'Z') { + return c+('a'-'A'); + } else { + return c; + } +} +} // namespace + +bool startsWith(const std::string& a, const std::string& b) +{ + return startsWith(a.begin(), a.end(), b.begin(), b.end()); +} + +bool istartsWith(const std::string& a, const std::string& b) +{ + return istartsWith(a.begin(), a.end(), b.begin(), b.end()); +} + +namespace { +void streq_advance(const char **ap, const char **bp) +{ + for(; **ap && **bp && lowcase(**ap) == lowcase(**bp); ++*ap, ++*bp); +} +} // namespace + +bool istartsWith(const char *a, const char* b) +{ + if(!a || !b) { + return false; + } + streq_advance(&a, &b); + return !*b; +} + bool endsWith(const std::string& a, const std::string& b) { return endsWith(a.begin(), a.end(), b.begin(), b.end()); } +bool strieq(const char *a, const char *b) +{ + if(!a || !b) { + return false; + } + for(; *a && *b && lowcase(*a) == lowcase(*b); ++a, ++b); + return !*a && !*b; +} } // namespace util diff --git a/examples/util.h b/examples/util.h index f2f549ab..d9979955 100644 --- a/examples/util.h +++ b/examples/util.h @@ -174,6 +174,50 @@ std::string to_str(T value) return ss.str(); } +template +bool startsWith +(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2) +{ + if(last1-first1 < last2-first2) { + return false; + } + return std::equal(first2, last2, first1); +} + +bool startsWith(const std::string& a, const std::string& b); + +struct CaseCmp { + bool operator()(char lhs, char rhs) const + { + if('A' <= lhs && lhs <= 'Z') { + lhs += 'a'-'A'; + } + if('A' <= rhs && rhs <= 'Z') { + rhs += 'a'-'A'; + } + return lhs == rhs; + } +}; + +template +bool istartsWith +(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2) +{ + if(last1-first1 < last2-first2) { + return false; + } + return std::equal(first2, last2, first1, CaseCmp()); +} + +bool istartsWith(const std::string& a, const std::string& b); +bool istartsWith(const char *a, const char* b); + template bool endsWith (InputIterator1 first1, @@ -189,6 +233,8 @@ bool endsWith bool endsWith(const std::string& a, const std::string& b); +bool strieq(const char *a, const char *b); + } // namespace util } // namespace spdylay