diff --git a/src/HtmlParser.cc b/src/HtmlParser.cc index 5451e700..263981d2 100644 --- a/src/HtmlParser.cc +++ b/src/HtmlParser.cc @@ -60,6 +60,20 @@ const char* get_attr(const xmlChar **attrs, const char *name) } } // namespace +namespace { +void add_link(ParserData *parser_data, const char *uri, RequestPriority pri) +{ + auto u = xmlBuildURI(reinterpret_cast(uri), + reinterpret_cast + (parser_data->base_uri.c_str())); + if(u) { + parser_data->links.push_back(std::make_pair(reinterpret_cast(u), + pri)); + free(u); + } +} +} // namespace + namespace { void start_element_func (void* user_data, @@ -70,29 +84,26 @@ void start_element_func if(util::strieq(reinterpret_cast(name), "link")) { const char *rel_attr = get_attr(attrs, "rel"); const char *href_attr = get_attr(attrs, "href"); - if((util::strieq(rel_attr, "shortcut icon") || - util::strieq(rel_attr, "stylesheet")) && - href_attr) { - xmlChar *u = xmlBuildURI(reinterpret_cast(href_attr), - reinterpret_cast - (parser_data->base_uri.c_str())); - if(u) { - parser_data->links.push_back(reinterpret_cast(u)); - free(u); - } + if(!href_attr) { + return; } - } else if(util::strieq(reinterpret_cast(name), "img") || - util::strieq(reinterpret_cast(name), "script")) { + if(util::strieq(rel_attr, "shortcut icon")) { + add_link(parser_data, href_attr, REQ_PRI_LOWEST); + } else if(util::strieq(rel_attr, "stylesheet")) { + add_link(parser_data, href_attr, REQ_PRI_MEDIUM); + } + } else if(util::strieq(reinterpret_cast(name), "img")) { const char *src_attr = get_attr(attrs, "src"); - if(src_attr) { - xmlChar *u = xmlBuildURI(reinterpret_cast(src_attr), - reinterpret_cast - (parser_data->base_uri.c_str())); - if(u) { - parser_data->links.push_back(reinterpret_cast(u)); - free(u); - } + if(!src_attr) { + return; } + add_link(parser_data, src_attr, REQ_PRI_LOWEST); + } else if(util::strieq(reinterpret_cast(name), "script")) { + const char *src_attr = get_attr(attrs, "src"); + if(!src_attr) { + return; + } + add_link(parser_data, src_attr, REQ_PRI_MEDIUM); } } } // namespace @@ -168,7 +179,8 @@ int HtmlParser::parse_chunk_internal(const char *chunk, size_t size, } } -const std::vector& HtmlParser::get_links() const +const std::vector>& +HtmlParser::get_links() const { return parser_data_.links; } diff --git a/src/HtmlParser.h b/src/HtmlParser.h index e0f3798a..75a621a7 100644 --- a/src/HtmlParser.h +++ b/src/HtmlParser.h @@ -36,9 +36,16 @@ namespace nghttp2 { +enum RequestPriority { + REQ_PRI_HIGH = 0, + REQ_PRI_MEDIUM = 1, + REQ_PRI_LOW = 2, + REQ_PRI_LOWEST = 3 +}; + struct ParserData { std::string base_uri; - std::vector links; + std::vector> links; ParserData(const std::string& base_uri); }; @@ -47,7 +54,8 @@ public: HtmlParser(const std::string& base_uri); ~HtmlParser(); int parse_chunk(const char *chunk, size_t size, int fin); - const std::vector& get_links() const; + const std::vector>& + get_links() const; void clear_links(); private: int parse_chunk_internal(const char *chunk, size_t size, int fin); diff --git a/src/nghttp.cc b/src/nghttp.cc index 1a0d9a56..393e0668 100644 --- a/src/nghttp.cc +++ b/src/nghttp.cc @@ -248,16 +248,17 @@ struct Request { const nghttp2_data_provider *data_prd; int64_t data_length; int64_t data_offset; + int32_t pri; // Recursion level: 0: first entity, 1: entity linked from first entity int level; RequestStat stat; std::string status; Request(const std::string& uri, const http_parser_url &u, const nghttp2_data_provider *data_prd, int64_t data_length, - int level = 0) + int32_t pri, int level = 0) : uri(uri), u(u), inflater(nullptr), html_parser(nullptr), data_prd(data_prd), - data_length(data_length), data_offset(0), + data_length(data_length), data_offset(0), pri(pri), level(level) {} @@ -794,6 +795,7 @@ struct HttpClient { bool add_request(const std::string& uri, const nghttp2_data_provider *data_prd, int64_t data_length, + int32_t pri, int level = 0) { http_parser_url u; @@ -807,7 +809,7 @@ struct HttpClient { path_cache.insert(uri); } reqvec.push_back(util::make_unique(uri, u, data_prd, - data_length, level)); + data_length, pri, level)); return true; } } @@ -930,12 +932,23 @@ void submit_request(HttpClient *client, } nv[pos] = nullptr; - int r = nghttp2_submit_request(client->session, config.pri, + int r = nghttp2_submit_request(client->session, req->pri, nv.get(), req->data_prd, req); assert(r == 0); } } // namespace +namespace { +int32_t adjust_pri(int32_t base_pri, int32_t rel_pri) +{ + if((int32_t)NGHTTP2_PRI_LOWEST - rel_pri < base_pri) { + return NGHTTP2_PRI_LOWEST; + } else { + return base_pri + rel_pri; + } +} +} // namespace + namespace { void update_html_parser(HttpClient *client, Request *req, const uint8_t *data, size_t len, int fin) @@ -945,16 +958,16 @@ void update_html_parser(HttpClient *client, Request *req, } req->update_html_parser(data, len, fin); - for(size_t i = 0; i < req->html_parser->get_links().size(); ++i) { - const auto& raw_uri = req->html_parser->get_links()[i]; - auto uri = strip_fragment(raw_uri.c_str()); + for(auto& p : req->html_parser->get_links()) { + auto uri = strip_fragment(p.first.c_str()); http_parser_url u; if(http_parser_parse_url(uri.c_str(), uri.size(), 0, &u) == 0 && fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_SCHEMA) && fieldeq(uri.c_str(), u, req->uri.c_str(), req->u, UF_HOST) && porteq(uri.c_str(), u, req->uri.c_str(), req->u)) { + int32_t pri = adjust_pri(req->pri, p.second); // No POST data for assets - if ( client->add_request(uri, nullptr, 0, req->level+1) ) { + if ( client->add_request(uri, nullptr, 0, pri, req->level+1) ) { submit_request(client, config.headers, client->reqvec.back().get()); } @@ -1344,7 +1357,7 @@ int communicate(const std::string& scheme, const std::string& host, for(auto req : requests) { for(int i = 0; i < config.multiply; ++i) { client.add_request(std::get<0>(req), std::get<1>(req), - std::get<2>(req)); + std::get<2>(req), config.pri); } } client.update_hostport();