diff --git a/doc/configuration.txt b/doc/configuration.txt index 408829143..f6269d75b 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2352,6 +2352,7 @@ reqrep - X X X -- keyword -------------------------- defaults - frontend - listen -- backend - reqtarpit - X X X retries X - X X +retry-on X - X X rspadd - X X X rspdel - X X X rspdeny - X X X @@ -8004,6 +8005,70 @@ retries See also : "option redispatch" +retry-on [list of keywords] + Specify when to attempt to automatically retry a failed request + May be used in sections: defaults | frontend | listen | backend + yes | no | yes | yes + Arguments : + is a list of keywords or HTTP status codes, each representing a + type of failure event on which an attempt to retry the request + is desired. Please read the notes at the bottom before changing + this setting. The following keywords are supported : + + none never retry + + conn-failure retry when the connection or the SSL handshake failed + and the request could not be sent. This is the default. + + empty-response retry when the server connection was closed after part + of the request was sent, and nothing was received from + the server. This type of failure may be caused by the + request timeout on the server side, poor network + condition, or a server crash or restart while + processing the request. + + response-timeout the server timeout stroke while waiting for the server + to respond to the request. This may be caused by poor + network condition, the reuse of an idle connection + which has expired on the path, or by the request being + extremely expensive to process. It generally is a bad + idea to retry on such events on servers dealing with + heavy database processing (full scans, etc) as it may + amplify denial of service attacks. + + any HTTP status code among "404" (Not Found), "408" + (Request Timeout), "425" (Too Early), "500" (Server + Error), "501" (Not Implemented), "502" (Bad Gateway), + "503" (Service Unavailable), "504" (Gateway Timeout). + + Using this directive replaces any previous settings with the new ones; it is + not cumulative. + + Please note that using anything other than "none" and "conn-failure" requires + to allocate a buffer and copy the whole request into it, so it has memory and + performance impacts. Requests not fitting in a single buffer will never be + retried (see the global tune.bufsize setting). + + You have to make sure the application has a replay protection mechanism built + in such as a unique transaction IDs passed in requests, or that replaying the + same request has no consequence, or it is very dangerous to use any retry-on + value beside "conn-failure" and "none". Static file servers and caches are + generally considered safe against any type of retry. Using a status code can + be useful to quickly leave a server showing an abnormal behavior (out of + memory, file system issues, etc), but in this case it may be a good idea to + immediately redispatch the connection to another server (please see "option + redispatch" for this). Last, it is important to understand that most causes + of failures are the requests themselves and that retrying a request causing a + server to misbehave will often make the situation even worse for this server, + or for the whole service in case of redispatch. + + Unless you know exactly how the application deals with replayed requests, you + should not use this directive. + + The default is "conn-failure". + + See also: "retries", "option redispatch", "tune.bufsize" + rspadd [{if | unless} ] Add a header at the end of the HTTP response May be used in sections : defaults | frontend | listen | backend diff --git a/doc/internals/filters.txt b/doc/internals/filters.txt index 09090e556..2cb0eed7b 100644 --- a/doc/internals/filters.txt +++ b/doc/internals/filters.txt @@ -1170,9 +1170,12 @@ In addition to these callbacks, there are three others: Then, to finish, there are 2 informational callbacks: * 'flt_ops.http_reset': This callback is called when a HTTP message is - reset. This only happens when a '100-continue' response is received. It + reset. This happens either when a '100-continue' response is received, or + if we're retrying to send the request to the server after it failed. It could be useful to reset the filter context before receiving the true response. + You can know why the callback is called by checking s->txn->status. If it's + 10X, we're called because of a '100-continue', if not, it's a L7 retry. * 'flt_ops.http_reply': This callback is called when, at any time, HAProxy decides to stop the processing on a HTTP message and to send an internal diff --git a/include/proto/proxy.h b/include/proto/proxy.h index 172c3d561..c75c0da21 100644 --- a/include/proto/proxy.h +++ b/include/proto/proxy.h @@ -155,6 +155,35 @@ static inline void proxy_inc_fe_req_ctr(struct proxy *fe) update_freq_ctr(&fe->fe_req_per_sec, 1)); } +/* Returns non-zero if the proxy is configured to retry a request if we got that status, 0 overwise */ +static inline int l7_status_match(struct proxy *p, int status) +{ + /* Just return 0 if no retry was configured for any status */ + if (!(p->retry_type & PR_RE_STATUS_MASK)) + return 0; + + switch (status) { + case 404: + return (p->retry_type & PR_RE_404); + case 408: + return (p->retry_type & PR_RE_408); + case 425: + return (p->retry_type & PR_RE_425); + case 500: + return (p->retry_type & PR_RE_500); + case 501: + return (p->retry_type & PR_RE_501); + case 502: + return (p->retry_type & PR_RE_502); + case 503: + return (p->retry_type & PR_RE_503); + case 504: + return (p->retry_type & PR_RE_504); + default: + break; + } + return 0; +} #endif /* _PROTO_PROXY_H */ /* diff --git a/include/types/filters.h b/include/types/filters.h index f52592d85..1dd33988b 100644 --- a/include/types/filters.h +++ b/include/types/filters.h @@ -137,7 +137,9 @@ struct flt_kw_list { * it needs to wait for some reason, any other value * otherwise. * - http_reset : Called when the HTTP message is reseted. It happens - * when a 100-continue response is received. + * either when a 100-continue response is received. + * that can be detected if s->txn->status is 10X, or + * if we're attempting a L7 retry. * Returns nothing. * - http_reply : Called when, at any time, HA proxy decides to stop * the HTTP message's processing and to send a message diff --git a/include/types/proxy.h b/include/types/proxy.h index 765e81d8e..5f194140c 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -153,6 +153,7 @@ enum PR_SRV_STATE_FILE { #define PR_O2_FAKE_KA 0x00200000 /* pretend we do keep-alive with server eventhough we close */ #define PR_O2_USE_HTX 0x00400000 /* use the HTX representation for the HTTP protocol */ + #define PR_O2_EXP_NONE 0x00000000 /* http-check : no expect rule */ #define PR_O2_EXP_STS 0x00800000 /* http-check expect status */ #define PR_O2_EXP_RSTS 0x01000000 /* http-check expect rstatus */ @@ -202,6 +203,21 @@ enum PR_SRV_STATE_FILE { #define PR_FBM_MISMATCH_NAME 0x02 #define PR_FBM_MISMATCH_PROXYTYPE 0x04 +/* Bits for the different retry causes */ +#define PR_RE_CONN_FAILED 0x00000001 /* Retry if we failed to connect */ +#define PR_RE_DISCONNECTED 0x00000002 /* Retry if we got disconnected with no answer */ +#define PR_RE_TIMEOUT 0x00000004 /* Retry if we got a server timeout before we got any data */ +#define PR_RE_404 0x00000008 /* Retry if we got a 404 */ +#define PR_RE_408 0x00000010 /* Retry if we got a 408 */ +#define PR_RE_425 0x00000020 /* Retry if we got a 425 */ +#define PR_RE_500 0x00000040 /* Retry if we got a 500 */ +#define PR_RE_501 0x00000080 /* Retry if we got a 501 */ +#define PR_RE_502 0x00000100 /* Retry if we got a 502 */ +#define PR_RE_503 0x00000200 /* Retry if we got a 503 */ +#define PR_RE_504 0x00000400 /* Retry if we got a 504 */ +#define PR_RE_STATUS_MASK (PR_RE_404 | PR_RE_408 | PR_RE_425 | \ + PR_RE_425 | PR_RE_500 | PR_RE_501 | \ + PR_RE_502 | PR_RE_503 | PR_RE_504) struct stream; struct http_snapshot { @@ -364,6 +380,7 @@ struct proxy { char *server_id_hdr_name; /* the header to use to send the server id (name) */ int server_id_hdr_len; /* the length of the id (name) header... name */ int conn_retries; /* maximum number of connect retries */ + unsigned int retry_type; /* Type of retry allowed */ int redispatch_after; /* number of retries before redispatch */ unsigned down_trans; /* up-down transitions */ unsigned down_time; /* total time the proxy was down */ diff --git a/include/types/stream_interface.h b/include/types/stream_interface.h index 61937e073..6b30de58a 100644 --- a/include/types/stream_interface.h +++ b/include/types/stream_interface.h @@ -83,6 +83,7 @@ enum { SI_FL_RXBLK_CONN = 0x00100000, /* other side is not connected */ SI_FL_RXBLK_ANY = 0x001F0000, /* any of the RXBLK flags above */ SI_FL_RX_WAIT_EP = 0x00200000, /* stream-int waits for more data from the end point */ + SI_FL_L7_RETRY = 0x01000000, /* The stream interface may attempt L7 retries */ }; /* A stream interface has 3 parts : @@ -111,6 +112,7 @@ struct stream_interface { int conn_retries; /* number of connect retries left */ unsigned int hcto; /* half-closed timeout (0 = unset) */ struct wait_event wait_event; /* We're in a wait list */ + struct buffer l7_buffer; /* To store the data, in case we have to retry */ }; /* operations available on a stream-interface */ diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c index 5f44cfdf0..7c64be102 100644 --- a/src/cfgparse-listen.c +++ b/src/cfgparse-listen.c @@ -395,6 +395,7 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm) curproxy->except_mask = defproxy.except_mask; curproxy->except_to = defproxy.except_to; curproxy->except_mask_to = defproxy.except_mask_to; + curproxy->retry_type = defproxy.retry_type; if (defproxy.fwdfor_hdr_len) { curproxy->fwdfor_hdr_len = defproxy.fwdfor_hdr_len; diff --git a/src/cfgparse.c b/src/cfgparse.c index 48d53e9dd..dd99bbbf1 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -2446,6 +2446,12 @@ int check_config_validity() } } + if ((curproxy->retry_type &~ PR_RE_CONN_FAILED) && + !(curproxy->options2 & PR_O2_USE_HTX)) { + ha_warning("Proxy '%s' : retry-on with any other keywords than 'conn-failure' will be ignored, requires 'option http-use-htx'.\n", curproxy->id); + err_code |= ERR_WARN; + curproxy->retry_type &= PR_RE_CONN_FAILED; + } if (curproxy->email_alert.set) { if (!(curproxy->email_alert.mailers.name && curproxy->email_alert.from && curproxy->email_alert.to)) { ha_warning("config : 'email-alert' will be ignored for %s '%s' (the presence any of " diff --git a/src/proto_htx.c b/src/proto_htx.c index ee9a2716f..d8363a23d 100644 --- a/src/proto_htx.c +++ b/src/proto_htx.c @@ -1386,6 +1386,45 @@ int htx_request_forward_body(struct stream *s, struct channel *req, int an_bit) return 0; } +/* Reset the stream and the backend stream_interface to a situation suitable for attemption connection */ +/* Returns 0 if we can attempt to retry, -1 otherwise */ +static __inline int do_l7_retry(struct stream *s, struct stream_interface *si) +{ + struct channel *req, *res; + int co_data; + + si->conn_retries--; + if (si->conn_retries < 0) + return -1; + + req = &s->req; + res = &s->res; + /* Remove any write error from the request, and read error from the response */ + req->flags &= ~(CF_WRITE_ERROR | CF_WRITE_TIMEOUT | CF_SHUTW | CF_SHUTW_NOW); + res->flags &= ~(CF_READ_ERROR | CF_READ_TIMEOUT | CF_SHUTR | CF_EOI | CF_READ_NULL | CF_SHUTR_NOW); + res->analysers = 0; + si->flags &= ~(SI_FL_ERR | SI_FL_EXP | SI_FL_RXBLK_SHUT); + si->state = SI_ST_REQ; + si->exp = TICK_ETERNITY; + res->rex = TICK_ETERNITY; + res->to_forward = 0; + res->analyse_exp = TICK_ETERNITY; + res->total = 0; + s->flags &= ~(SF_ASSIGNED | SF_ADDR_SET | SF_ERR_SRVTO | SF_ERR_SRVCL); + si_release_endpoint(&s->si[1]); + b_free(&req->buf); + /* Swap the L7 buffer with the channel buffer */ + /* We know we stored the co_data as b_data, so get it there */ + co_data = b_data(&si->l7_buffer); + b_set_data(&si->l7_buffer, b_size(&si->l7_buffer)); + b_xfer(&req->buf, &si->l7_buffer, b_data(&si->l7_buffer)); + + co_set_data(req, co_data); + b_reset(&res->buf); + co_set_data(res, 0); + return 0; +} + /* This stream analyser waits for a complete HTTP response. It returns 1 if the * processing can continue on next analysers, or zero if it either needs more * data or wants to immediately abort the response (eg: timeout, error, ...). It @@ -1406,6 +1445,7 @@ int htx_wait_for_response(struct stream *s, struct channel *rep, int an_bit) struct http_txn *txn = s->txn; struct http_msg *msg = &txn->rsp; struct htx *htx; + struct stream_interface *si_b = &s->si[1]; struct connection *srv_conn; struct htx_sl *sl; int n; @@ -1453,6 +1493,17 @@ int htx_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (txn->flags & TX_NOT_FIRST) goto abort_keep_alive; + if (si_b->flags & SI_FL_L7_RETRY) { + /* If we arrive here, then CF_READ_ERROR was + * set by si_cs_recv() because we matched a + * status, overwise it would have removed + * the SI_FL_L7_RETRY flag, so it's ok not + * to check s->be->retry_type. + */ + if (co_data(rep) || do_l7_retry(s, si_b) == 0) + return 0; + } + _HA_ATOMIC_ADD(&s->be->be_counters.failed_resp, 1); if (objt_server(s->target)) { _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_resp, 1); @@ -1484,6 +1535,11 @@ int htx_wait_for_response(struct stream *s, struct channel *rep, int an_bit) /* 2: read timeout : return a 504 to the client. */ else if (rep->flags & CF_READ_TIMEOUT) { + if ((si_b->flags & SI_FL_L7_RETRY) && + (s->be->retry_type & PR_RE_TIMEOUT)) { + if (co_data(rep) || do_l7_retry(s, si_b) == 0) + return 0; + } _HA_ATOMIC_ADD(&s->be->be_counters.failed_resp, 1); if (objt_server(s->target)) { _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_resp, 1); @@ -1527,6 +1583,12 @@ int htx_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (txn->flags & TX_NOT_FIRST) goto abort_keep_alive; + if ((si_b->flags & SI_FL_L7_RETRY) && + (s->be->retry_type & PR_RE_DISCONNECTED)) { + if (co_data(rep) || do_l7_retry(s, si_b) == 0) + return 0; + } + _HA_ATOMIC_ADD(&s->be->be_counters.failed_resp, 1); if (objt_server(s->target)) { _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_resp, 1); diff --git a/src/proxy.c b/src/proxy.c index a3f355f64..6e804a91e 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -501,6 +501,62 @@ static int proxy_parse_declare(char **args, int section, struct proxy *curpx, } } +/* This function parses a "retry-on" statement */ +static int +proxy_parse_retry_on(char **args, int section, struct proxy *curpx, + struct proxy *defpx, const char *file, int line, + char **err) +{ + int i; + + if (!(*args[1])) { + memprintf(err, "'%s' needs at least one keyword to specify when to retry", args[0]); + return -1; + } + if (!(curpx->cap & PR_CAP_BE)) { + memprintf(err, "'%s' only available in backend or listen section", args[0]); + return -1; + } + curpx->retry_type = 0; + for (i = 1; *(args[i]); i++) { + if (!strcmp(args[i], "conn-failure")) + curpx->retry_type |= PR_RE_CONN_FAILED; + else if (!strcmp(args[i], "empty-response")) + curpx->retry_type |= PR_RE_DISCONNECTED; + else if (!strcmp(args[i], "response-timeout")) + curpx->retry_type |= PR_RE_TIMEOUT; + else if (!strcmp(args[i], "404")) + curpx->retry_type |= PR_RE_404; + else if (!strcmp(args[i], "408")) + curpx->retry_type |= PR_RE_408; + else if (!strcmp(args[i], "425")) + curpx->retry_type |= PR_RE_425; + else if (!strcmp(args[i], "500")) + curpx->retry_type |= PR_RE_500; + else if (!strcmp(args[i], "501")) + curpx->retry_type |= PR_RE_501; + else if (!strcmp(args[i], "502")) + curpx->retry_type |= PR_RE_502; + else if (!strcmp(args[i], "503")) + curpx->retry_type |= PR_RE_503; + else if (!strcmp(args[i], "504")) + curpx->retry_type |= PR_RE_504; + else if (!strcmp(args[i], "none")) { + if (i != 1 || *args[i + 1]) { + memprintf(err, "'%s' 'none' keyworld only usable alone", args[0]); + return -1; + } + } else { + memprintf(err, "'%s': unknown keyword '%s'", args[0], args[i]); + return -1; + } + + } + + + return 0; +} + /* This function inserts proxy into the tree of known proxies. The proxy's * name is used as the storing key so it must already have been initialized. */ @@ -823,6 +879,9 @@ void init_new_proxy(struct proxy *p) /* HTX is the default mode, for HTTP and TCP */ p->options2 |= PR_O2_USE_HTX; + /* Default to only allow L4 retries */ + p->retry_type = PR_RE_CONN_FAILED; + HA_SPIN_INIT(&p->lock); } @@ -1590,6 +1649,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_LISTEN, "rate-limit", proxy_parse_rate_limit }, { CFG_LISTEN, "max-keep-alive-queue", proxy_parse_max_ka_queue }, { CFG_LISTEN, "declare", proxy_parse_declare }, + { CFG_LISTEN, "retry-on", proxy_parse_retry_on }, { 0, NULL, NULL }, }}; diff --git a/src/stream.c b/src/stream.c index b3573c8d7..8c2ea5561 100644 --- a/src/stream.c +++ b/src/stream.c @@ -323,6 +323,7 @@ struct stream *stream_new(struct session *sess, enum obj_type *origin) if (flt_stream_init(s) < 0 || flt_stream_start(s) < 0) goto out_fail_accept; + s->si[1].l7_buffer = BUF_NULL; /* finish initialization of the accepted file descriptor */ if (appctx) si_want_get(&s->si[0]); @@ -475,6 +476,7 @@ static void stream_free(struct stream *s) tasklet_free(s->si[0].wait_event.task); tasklet_free(s->si[1].wait_event.task); + b_free(&s->si[1].l7_buffer); if (must_free_sess) { sess->origin = NULL; session_free(sess); @@ -769,7 +771,7 @@ static int sess_update_st_cer(struct stream *s) /* ensure that we have enough retries left */ si->conn_retries--; - if (si->conn_retries < 0) { + if (si->conn_retries < 0 || !(s->be->retry_type & PR_RE_CONN_FAILED)) { if (!si->err_type) { si->err_type = SI_ET_CONN_ERR; } @@ -2322,6 +2324,8 @@ redo: */ si_b->state = SI_ST_REQ; /* new connection requested */ si_b->conn_retries = s->be->conn_retries; + if (s->be->retry_type &~ PR_RE_CONN_FAILED) + si_b->flags |= SI_FL_L7_RETRY; } } else { diff --git a/src/stream_interface.c b/src/stream_interface.c index 1e50c1fc9..731df38a5 100644 --- a/src/stream_interface.c +++ b/src/stream_interface.c @@ -30,8 +30,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -685,6 +687,34 @@ int si_cs_send(struct conn_stream *cs) if (oc->flags & CF_STREAMER) send_flag |= CO_SFL_STREAMER; + if ((si->flags & SI_FL_L7_RETRY) && !b_data(&si->l7_buffer)) { + /* If we want to be able to do L7 retries, copy + * the data we're about to send, so that we are able + * to resend them if needed + */ + /* Try to allocate a buffer if we had none. + * If it fails, the next test will just + * disable the l7 retries by setting + * l7_conn_retries to 0. + */ + if (!(oc->flags & CF_EOI)) + si->flags &= ~SI_FL_L7_RETRY; + else { + if (b_is_null(&si->l7_buffer)) + b_alloc(&si->l7_buffer); + if (b_is_null(&si->l7_buffer)) + si->flags &= ~SI_FL_L7_RETRY; + else { + memcpy(b_orig(&si->l7_buffer), + b_orig(&oc->buf), + b_size(&oc->buf)); + si->l7_buffer.head = co_data(oc); + b_add(&si->l7_buffer, co_data(oc)); + } + + } + } + ret = cs->conn->mux->snd_buf(cs, &oc->buf, co_data(oc), send_flag); if (ret > 0) { did_send = 1; @@ -1268,6 +1298,27 @@ int si_cs_recv(struct conn_stream *cs) break; } + if (si->flags & SI_FL_L7_RETRY) { + struct htx *htx; + struct htx_sl *sl; + + htx = htxbuf(&ic->buf); + if (htx) { + sl = http_find_stline(htx); + if (sl && l7_status_match(si_strm(si)->be, + sl->info.res.status)) { + /* If we got a status for which we would + * like to retry the request, empty + * the buffer and pretend there's an + * error on the channel. + */ + ic->flags |= CF_READ_ERROR; + htx_reset(htx); + return 1; + } + } + si->flags &= ~SI_FL_L7_RETRY; + } cur_read += ret; /* if we're allowed to directly forward data, we must update ->o */