diff --git a/src/shrpx.cc b/src/shrpx.cc index 65d41635..959c15bf 100644 --- a/src/shrpx.cc +++ b/src/shrpx.cc @@ -1284,12 +1284,13 @@ Connections: Several parameters are accepted after . The parameters are delimited by ";". The available parameters are: "proto=", "tls", - "sni=", "fall=", and "rise=". The - parameter consists of keyword, and optionally followed - by "=" and value. For example, the parameter "proto=h2" - consists of the keyword "proto" and value "h2". The - parameter "tls" consists of the keyword "tls" without - value. Each parameter is described as follows. + "sni=", "fall=", "rise=", and + "affinity=". The parameter consists of keyword, + and optionally followed by "=" and value. For example, + the parameter "proto=h2" consists of the keyword "proto" + and value "h2". The parameter "tls" consists of the + keyword "tls" without value. Each parameter is + described as follows. The backend application protocol can be specified using optional "proto" parameter, and in the form of @@ -1324,6 +1325,20 @@ Connections: backend is permanently offline, once it goes in that state, and this is the default behaviour. + The session affinity is enabled using + "affinity=" parameter. If "ip" is given in + , client IP based session affinity is enabled. + If "none" is given in , session affinity is + disabled, and this is the default. The session affinity + is enabled per . If at least one backend has + "affinity" parameter, and its is not "none", + session affinity is enabled for all backend servers + sharing the same . It is advised to set + "affinity" parameter to all backend explicitly if + session affinity is desired. The session affinity may + break if one of the backend gets unreachable, or backend + settings are reload or replaced by API. + Since ";" and ":" are used as delimiter, must not contain these characters. Since ";" has special meaning in shell, the option value must be quoted. diff --git a/src/shrpx_api_downstream_connection.cc b/src/shrpx_api_downstream_connection.cc index 5e13971d..55b3e01c 100644 --- a/src/shrpx_api_downstream_connection.cc +++ b/src/shrpx_api_downstream_connection.cc @@ -302,4 +302,6 @@ APIDownstreamConnection::get_downstream_addr_group() const { return nullptr; } +DownstreamAddr *APIDownstreamConnection::get_addr() const { return nullptr; } + } // namespace shrpx diff --git a/src/shrpx_api_downstream_connection.h b/src/shrpx_api_downstream_connection.h index fdf49526..f80341c7 100644 --- a/src/shrpx_api_downstream_connection.h +++ b/src/shrpx_api_downstream_connection.h @@ -55,6 +55,7 @@ public: virtual bool poolable() const; virtual DownstreamAddrGroup *get_downstream_addr_group() const; + virtual DownstreamAddr *get_addr() const; int send_reply(unsigned int http_status, int api_status); diff --git a/src/shrpx_client_handler.cc b/src/shrpx_client_handler.cc index 5f4eb6e5..cfe95dfe 100644 --- a/src/shrpx_client_handler.cc +++ b/src/shrpx_client_handler.cc @@ -407,6 +407,7 @@ ClientHandler::ClientHandler(Worker *worker, int fd, SSL *ssl, faddr_(faddr), worker_(worker), left_connhd_len_(NGHTTP2_CLIENT_MAGIC_LEN), + affinity_hash_(-1), should_close_after_write_(false), reset_conn_rtimer_required_(false) { @@ -668,8 +669,18 @@ void ClientHandler::pool_downstream_connection( << " in group " << group; } - auto &dconn_pool = group->shared_addr->dconn_pool; - dconn_pool.add_downstream_connection(std::move(dconn)); + auto &shared_addr = group->shared_addr; + + if (shared_addr->affinity == AFFINITY_NONE) { + auto &dconn_pool = group->shared_addr->dconn_pool; + dconn_pool.add_downstream_connection(std::move(dconn)); + + return; + } + + auto addr = dconn->get_addr(); + auto &dconn_pool = addr->dconn_pool; + dconn_pool->add_downstream_connection(std::move(dconn)); } void ClientHandler::remove_downstream_connection(DownstreamConnection *dconn) { @@ -682,6 +693,67 @@ void ClientHandler::remove_downstream_connection(DownstreamConnection *dconn) { dconn_pool.remove_downstream_connection(dconn); } +namespace { +uint32_t hash32(const StringRef &s) { + /* 32 bit FNV-1a: http://isthe.com/chongo/tech/comp/fnv/ */ + uint32_t h = 2166136261u; + size_t i; + + for (i = 0; i < s.size(); ++i) { + h ^= s[i]; + h += (h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24); + } + + return h; +} +} // namespace + +namespace { +int32_t calculate_affinity_from_ip(const StringRef &ip) { + return hash32(ip) & 0x7fffffff; +} +} // namespace + +Http2Session *ClientHandler::select_http2_session_with_affinity( + const std::shared_ptr &group, DownstreamAddr *addr) { + auto &shared_addr = group->shared_addr; + + if (LOG_ENABLED(INFO)) { + CLOG(INFO, this) << "Selected DownstreamAddr=" << addr + << ", index=" << (addr - shared_addr->addrs.data()); + } + + if (addr->http2_extra_freelist.size()) { + auto session = addr->http2_extra_freelist.head; + + if (LOG_ENABLED(INFO)) { + CLOG(INFO, this) << "Use Http2Session " << session + << " from http2_extra_freelist"; + } + + if (session->max_concurrency_reached(1)) { + if (LOG_ENABLED(INFO)) { + CLOG(INFO, this) << "Maximum streams are reached for Http2Session(" + << session << ")."; + } + + session->remove_from_freelist(); + } + return session; + } + + auto session = new Http2Session(conn_.loop, worker_->get_cl_ssl_ctx(), + worker_, group, addr); + + if (LOG_ENABLED(INFO)) { + CLOG(INFO, this) << "Create new Http2Session " << session; + } + + session->add_to_extra_freelist(); + + return session; +} + namespace { // Returns true if load of |lhs| is lighter than that of |rhs|. // Currently, we assume that lesser streams means lesser load. @@ -863,11 +935,42 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { auto &group = groups[group_idx]; auto &shared_addr = group->shared_addr; - auto proto = PROTO_NONE; + if (shared_addr->affinity == AFFINITY_IP) { + if (affinity_hash_ == -1) { + affinity_hash_ = calculate_affinity_from_ip(StringRef{ipaddr_}); + } + + auto idx = affinity_hash_ % shared_addr->addrs.size(); + + auto &addr = shared_addr->addrs[idx]; + if (addr.proto == PROTO_HTTP2) { + auto http2session = select_http2_session_with_affinity(group, &addr); + + auto dconn = make_unique(http2session); + + dconn->set_client_handler(this); + + return std::move(dconn); + } + + auto &dconn_pool = addr.dconn_pool; + auto dconn = dconn_pool->pop_downstream_connection(); + + if (!dconn) { + dconn = make_unique(group, idx, conn_.loop, + worker_); + } + + dconn->set_client_handler(this); + + return dconn; + } auto http1_weight = shared_addr->http1_pri.weight; auto http2_weight = shared_addr->http2_pri.weight; + auto proto = PROTO_NONE; + if (http1_weight > 0 && http2_weight > 0) { // We only advance cycle if both weight has nonzero to keep its // distance under WEIGHT_MAX. @@ -927,7 +1030,8 @@ ClientHandler::get_downstream_connection(Downstream *downstream) { << " Create new one"; } - dconn = make_unique(group, conn_.loop, worker_); + dconn = + make_unique(group, -1, conn_.loop, worker_); } dconn->set_client_handler(this); diff --git a/src/shrpx_client_handler.h b/src/shrpx_client_handler.h index 93f7b845..a207956d 100644 --- a/src/shrpx_client_handler.h +++ b/src/shrpx_client_handler.h @@ -50,6 +50,7 @@ class DownstreamConnectionPool; class Worker; struct WorkerStat; struct DownstreamAddrGroup; +struct DownstreamAddr; class ClientHandler { public: @@ -145,6 +146,9 @@ public: Http2Session * select_http2_session(const std::shared_ptr &group); + Http2Session *select_http2_session_with_affinity( + const std::shared_ptr &group, DownstreamAddr *addr); + const UpstreamAddr *get_upstream_addr() const; private: @@ -167,6 +171,7 @@ private: Worker *worker_; // The number of bytes of HTTP/2 client connection header to read size_t left_connhd_len_; + int32_t affinity_hash_; bool should_close_after_write_; bool reset_conn_rtimer_required_; ReadBuf rb_; diff --git a/src/shrpx_config.cc b/src/shrpx_config.cc index 0fa7082f..d0e4c9b5 100644 --- a/src/shrpx_config.cc +++ b/src/shrpx_config.cc @@ -651,6 +651,7 @@ struct DownstreamParams { size_t fall; size_t rise; shrpx_proto proto; + shrpx_session_affinity affinity; bool tls; }; @@ -715,6 +716,16 @@ int parse_downstream_params(DownstreamParams &out, out.tls = false; } else if (util::istarts_with_l(param, "sni=")) { out.sni = StringRef{first + str_size("sni="), end}; + } else if (util::istarts_with_l(param, "affinity=")) { + auto valstr = StringRef{first + str_size("affinity="), end}; + if (util::strieq_l("none", valstr)) { + out.affinity = AFFINITY_NONE; + } else if (util::strieq_l("ip", valstr)) { + out.affinity = AFFINITY_IP; + } else { + LOG(ERROR) << "backend: affinity: value must be either none or ip"; + return -1; + } } else if (!param.empty()) { LOG(ERROR) << "backend: " << param << ": unknown keyword"; return -1; @@ -778,6 +789,11 @@ int parse_mapping(Config *config, DownstreamAddrConfig addr, } for (auto &g : addr_groups) { if (g.pattern == pattern) { + // Last value wins if we have multiple different affinity + // value under one group. + if (params.affinity != AFFINITY_NONE) { + g.affinity = params.affinity; + } g.addrs.push_back(addr); done = true; break; @@ -791,6 +807,7 @@ int parse_mapping(Config *config, DownstreamAddrConfig addr, addr_groups.emplace_back(StringRef{pattern}); auto &g = addr_groups.back(); g.addrs.push_back(addr); + g.affinity = params.affinity; if (pattern[0] == '*') { // wildcard pattern diff --git a/src/shrpx_config.h b/src/shrpx_config.h index c22835cb..7bfe2990 100644 --- a/src/shrpx_config.h +++ b/src/shrpx_config.h @@ -290,6 +290,13 @@ constexpr int16_t DEFAULT_DOWNSTREAM_PORT = 80; enum shrpx_proto { PROTO_NONE, PROTO_HTTP1, PROTO_HTTP2, PROTO_MEMCACHED }; +enum shrpx_session_affinity { + // No session affinity + AFFINITY_NONE, + // Client IP affinity + AFFINITY_IP, +}; + enum shrpx_forwarded_param { FORWARDED_NONE = 0, FORWARDED_BY = 0x1, @@ -354,10 +361,12 @@ struct DownstreamAddrConfig { struct DownstreamAddrGroupConfig { DownstreamAddrGroupConfig(const StringRef &pattern) - : pattern(pattern.c_str(), pattern.size()) {} + : pattern(pattern.c_str(), pattern.size()), affinity(AFFINITY_NONE) {} ImmutableString pattern; std::vector addrs; + // Session affinity + shrpx_session_affinity affinity; }; struct TicketKey { diff --git a/src/shrpx_downstream_connection.h b/src/shrpx_downstream_connection.h index 7935099a..0372496d 100644 --- a/src/shrpx_downstream_connection.h +++ b/src/shrpx_downstream_connection.h @@ -35,6 +35,7 @@ class ClientHandler; class Upstream; class Downstream; struct DownstreamAddrGroup; +struct DownstreamAddr; class DownstreamConnection { public: @@ -61,6 +62,7 @@ public: virtual bool poolable() const = 0; virtual DownstreamAddrGroup *get_downstream_addr_group() const = 0; + virtual DownstreamAddr *get_addr() const = 0; void set_client_handler(ClientHandler *client_handler); ClientHandler *get_client_handler(); diff --git a/src/shrpx_http2_downstream_connection.cc b/src/shrpx_http2_downstream_connection.cc index bb7e60dc..c6213a11 100644 --- a/src/shrpx_http2_downstream_connection.cc +++ b/src/shrpx_http2_downstream_connection.cc @@ -543,4 +543,6 @@ Http2DownstreamConnection::get_downstream_addr_group() const { return http2session_->get_downstream_addr_group(); } +DownstreamAddr *Http2DownstreamConnection::get_addr() const { return nullptr; } + } // namespace shrpx diff --git a/src/shrpx_http2_downstream_connection.h b/src/shrpx_http2_downstream_connection.h index 6f72a884..f8195560 100644 --- a/src/shrpx_http2_downstream_connection.h +++ b/src/shrpx_http2_downstream_connection.h @@ -65,6 +65,7 @@ public: virtual bool poolable() const { return false; } virtual DownstreamAddrGroup *get_downstream_addr_group() const; + virtual DownstreamAddr *get_addr() const; int send(); diff --git a/src/shrpx_http_downstream_connection.cc b/src/shrpx_http_downstream_connection.cc index 14d1e000..495147d3 100644 --- a/src/shrpx_http_downstream_connection.cc +++ b/src/shrpx_http_downstream_connection.cc @@ -148,8 +148,8 @@ void connectcb(struct ev_loop *loop, ev_io *w, int revents) { } // namespace HttpDownstreamConnection::HttpDownstreamConnection( - const std::shared_ptr &group, struct ev_loop *loop, - Worker *worker) + const std::shared_ptr &group, ssize_t initial_addr_idx, + struct ev_loop *loop, Worker *worker) : conn_(loop, -1, nullptr, worker->get_mcpool(), worker->get_downstream_config()->timeout.write, worker->get_downstream_config()->timeout.read, {}, {}, connectcb, @@ -164,7 +164,8 @@ HttpDownstreamConnection::HttpDownstreamConnection( group_(group), addr_(nullptr), ioctrl_(&conn_.rlimit), - response_htp_{0} {} + response_htp_{0}, + initial_addr_idx_(initial_addr_idx) {} HttpDownstreamConnection::~HttpDownstreamConnection() { if (LOG_ENABLED(INFO)) { @@ -191,7 +192,13 @@ int HttpDownstreamConnection::attach_downstream(Downstream *downstream) { if (conn_.fd == -1) { auto &shared_addr = group_->shared_addr; auto &addrs = shared_addr->addrs; - auto &next_downstream = shared_addr->next; + + // If session affinity is enabled, we always start with address at + // initial_addr_idx_. + size_t temp_idx = initial_addr_idx_; + + auto &next_downstream = + shared_addr->affinity == AFFINITY_NONE ? shared_addr->next : temp_idx; auto end = next_downstream; for (;;) { auto &addr = addrs[next_downstream]; @@ -555,6 +562,24 @@ int HttpDownstreamConnection::end_upload_data() { return 0; } +namespace { +void remove_from_pool(HttpDownstreamConnection *dconn) { + auto group = dconn->get_downstream_addr_group(); + auto &shared_addr = group->shared_addr; + + if (shared_addr->affinity == AFFINITY_NONE) { + auto &dconn_pool = + dconn->get_downstream_addr_group()->shared_addr->dconn_pool; + dconn_pool.remove_downstream_connection(dconn); + return; + } + + auto addr = dconn->get_addr(); + auto &dconn_pool = addr->dconn_pool; + dconn_pool->remove_downstream_connection(dconn); +} +} // namespace + namespace { void idle_readcb(struct ev_loop *loop, ev_io *w, int revents) { auto conn = static_cast(w->data); @@ -562,9 +587,8 @@ void idle_readcb(struct ev_loop *loop, ev_io *w, int revents) { if (LOG_ENABLED(INFO)) { DCLOG(INFO, dconn) << "Idle connection EOF"; } - auto &dconn_pool = - dconn->get_downstream_addr_group()->shared_addr->dconn_pool; - dconn_pool.remove_downstream_connection(dconn); + + remove_from_pool(dconn); // dconn was deleted } } // namespace @@ -576,9 +600,8 @@ void idle_timeoutcb(struct ev_loop *loop, ev_timer *w, int revents) { if (LOG_ENABLED(INFO)) { DCLOG(INFO, dconn) << "Idle connection timeout"; } - auto &dconn_pool = - dconn->get_downstream_addr_group()->shared_addr->dconn_pool; - dconn_pool.remove_downstream_connection(dconn); + + remove_from_pool(dconn); // dconn was deleted } } // namespace diff --git a/src/shrpx_http_downstream_connection.h b/src/shrpx_http_downstream_connection.h index 40b53f44..8fad5353 100644 --- a/src/shrpx_http_downstream_connection.h +++ b/src/shrpx_http_downstream_connection.h @@ -43,7 +43,8 @@ struct DownstreamAddr; class HttpDownstreamConnection : public DownstreamConnection { public: HttpDownstreamConnection(const std::shared_ptr &group, - struct ev_loop *loop, Worker *worker); + ssize_t initial_addr_idx, struct ev_loop *loop, + Worker *worker); virtual ~HttpDownstreamConnection(); virtual int attach_downstream(Downstream *downstream); virtual void detach_downstream(Downstream *downstream); @@ -64,6 +65,7 @@ public: virtual bool poolable() const; virtual DownstreamAddrGroup *get_downstream_addr_group() const; + virtual DownstreamAddr *get_addr() const; int read_clear(); int write_clear(); @@ -79,8 +81,6 @@ public: int noop(); - DownstreamAddr *get_addr() const; - private: Connection conn_; std::function do_read_, do_write_, @@ -93,6 +93,7 @@ private: DownstreamAddr *addr_; IOControl ioctrl_; http_parser response_htp_; + ssize_t initial_addr_idx_; }; } // namespace shrpx diff --git a/src/shrpx_worker.cc b/src/shrpx_worker.cc index 745de073..c0f9ee01 100644 --- a/src/shrpx_worker.cc +++ b/src/shrpx_worker.cc @@ -71,6 +71,10 @@ bool match_shared_downstream_addr( return false; } + if (lhs->affinity != rhs->affinity) { + return false; + } + auto used = std::vector(lhs->addrs.size()); for (auto &a : lhs->addrs) { @@ -143,7 +147,17 @@ void Worker::replace_downstream_config( std::shared_ptr downstreamconf) { for (auto &g : downstream_addr_groups_) { g->retired = true; - g->shared_addr->dconn_pool.remove_all(); + + auto &shared_addr = g->shared_addr; + + if (shared_addr->affinity == AFFINITY_NONE) { + shared_addr->dconn_pool.remove_all(); + continue; + } + + for (auto &addr : shared_addr->addrs) { + addr.dconn_pool->remove_all(); + } } downstreamconf_ = downstreamconf; @@ -163,6 +177,7 @@ void Worker::replace_downstream_config( auto shared_addr = std::make_shared(); shared_addr->addrs.resize(src.addrs.size()); + shared_addr->affinity = src.affinity; size_t num_http1 = 0; size_t num_http2 = 0; @@ -240,6 +255,12 @@ void Worker::replace_downstream_config( shared_addr->http1_pri.weight = num_http1; shared_addr->http2_pri.weight = num_http2; + if (shared_addr->affinity != AFFINITY_NONE) { + for (auto &addr : shared_addr->addrs) { + addr.dconn_pool = make_unique(); + } + } + dst->shared_addr = shared_addr; } else { if (LOG_ENABLED(INFO)) { diff --git a/src/shrpx_worker.h b/src/shrpx_worker.h index 1a334723..2a0a2965 100644 --- a/src/shrpx_worker.h +++ b/src/shrpx_worker.h @@ -86,6 +86,9 @@ struct DownstreamAddr { std::unique_ptr connect_blocker; std::unique_ptr live_check; + // Connection pool for this particular address if session affinity + // is enabled + std::unique_ptr dconn_pool; size_t fall; size_t rise; // Client side TLS session cache @@ -93,6 +96,9 @@ struct DownstreamAddr { // Http2Session object created for this address. This list chains // all Http2Session objects that is not in group scope // http2_avail_freelist, and is not reached in maximum concurrency. + // + // If session affinity is enabled, http2_avail_freelist is not used, + // and this list is solely used. DList http2_extra_freelist; // true if Http2Session for this address is in group scope // SharedDownstreamAddr.http2_avail_freelist @@ -126,6 +132,9 @@ struct SharedDownstreamAddr { // coalesce as much stream as possible in one Http2Session to fully // utilize TCP connection. // + // If session affinity is enabled, this list is not used. Per + // address http2_extra_freelist is used instead. + // // TODO Verify that this approach performs better in performance // wise. DList http2_avail_freelist; @@ -139,6 +148,8 @@ struct SharedDownstreamAddr { // HTTP/1.1. Otherwise, choose HTTP/2. WeightedPri http1_pri; WeightedPri http2_pri; + // Session affinity + shrpx_session_affinity affinity; }; struct DownstreamAddrGroup {