From 4f906a9c3824dd424a36f53fc3479f276333d566 Mon Sep 17 00:00:00 2001 From: Aurelien DARRAGON Date: Mon, 3 Jun 2024 16:18:27 +0200 Subject: [PATCH] BUG/MINOR: hlua: use CertCache.set() from various hlua contexts Using CertCache.set() from init context wasn't explicitly supported and caused the process to crash: crash.lua: core.register_init(function() CertCache.set{filename="reg-tests/ssl/set_cafile_client.pem", ocsp=""} end) crash.conf: global lua-load crash.lua listen front bind localhost:9090 ssl crt reg-tests/ssl/set_cafile_client.pem ca-file reg-tests/ssl/set_cafile_interCA1.crt verify none ./haproxy -f crash.conf [NOTICE] (267993) : haproxy version is 3.0-dev2-640ff6-910 [NOTICE] (267993) : path to executable is ./haproxy [WARNING] (267993) : config : missing timeouts for proxy 'front'. | While not properly invalid, you will certainly encounter various problems | with such a configuration. To fix this, please ensure that all following | timeouts are set to a non-zero value: 'client', 'connect', 'server'. [1] 267993 segmentation fault (core dumped) ./haproxy -f crash.conf This is because in hlua_ckch_set/hlua_ckch_commit_yield, we always consider that we're being called from a yield-capable runtime context. As such, hlua_gethlua() is never checked for NULL and we systematically try to wake hlua->task and yield every 10 instances. In fact, if we're called from the body or init context (that is, during haproxy startup), hlua_gethlua() will return NULL, and in this case we shouldn't care about yielding because it is ok to commit all instances at once since haproxy is still starting up. Also, when calling CertCache.set() from a non-yield capable runtime context (such as hlua fetch context), we kept doing as if the yield succeeded, resulting in unexpected function termination (operation would be aborted and the CertCache lock wouldn't be released). Instead, now we explicitly state in the doc that CertCache.set() cannot be used from a non-yield capable runtime context, and we raise a runtime error if it is used that way. These bugs were discovered by reading the code when trying to address Svace report documented by @Bbulatov GH #2586. It should be backported up to 2.6 with 30fcca18 ("MINOR: ssl/lua: CertCache.set() allows to update an SSL certificate file") --- doc/lua-api/index.rst | 4 ++++ src/hlua.c | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/lua-api/index.rst b/doc/lua-api/index.rst index 0d69a2f81..cfef26c18 100644 --- a/doc/lua-api/index.rst +++ b/doc/lua-api/index.rst @@ -4463,6 +4463,10 @@ CertCache class :param string certificate.issuer: The certificate of the OCSP issuer. :param string certificate.sctl: An SCTL file. + .. Note:: + This function may be slow. As such, it may only be used during startup + (main or init context) or from a yield-capable runtime context. + .. code-block:: lua CertCache.set{filename="certs/localhost9994.pem.rsa", crt=crt} diff --git a/src/hlua.c b/src/hlua.c index 098107f7a..33eccfc64 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -12989,8 +12989,10 @@ __LJMP static int hlua_ckch_commit_yield(lua_State *L, int status, lua_KContext list_for_each_entry_from(ckchi, &old_ckchs->ckch_inst, by_ckchs) { struct ckch_inst *new_inst; - /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */ - if (y % 10 == 0) { + /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances + * during runtime + */ + if (hlua && (y % 10) == 0) { *lua_ckchi = ckchi; @@ -13053,6 +13055,14 @@ __LJMP static int hlua_ckch_set(lua_State *L) WILL_LJMP(luaL_error(L, "'CertCache.set' needs a table as argument")); hlua = hlua_gethlua(L); + if (hlua && HLUA_CANT_YIELD(hlua)) { + /* using hlua_ckch_set() during runtime from a context that + * doesn't allow yielding (e.g.: fetches) is not supported + * as it may cause contention. + */ + WILL_LJMP(luaL_error(L, "Cannot use CertCache.set from a " + "non-yield capable runtime context")); + } /* FIXME: this should not return an error but should come back later */ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock)) @@ -13135,8 +13145,19 @@ __LJMP static int hlua_ckch_set(lua_State *L) lua_ckchi = lua_newuserdata(L, sizeof(struct ckch_inst *)); *lua_ckchi = NULL; - task_wakeup(hlua->task, TASK_WOKEN_MSG); - MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0)); + if (hlua) { + /* yield right away to let hlua_ckch_commit_yield() benefit from + * a fresh task cycle on next wakeup + */ + task_wakeup(hlua->task, TASK_WOKEN_MSG); + MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0)); + } else { + /* body/init context: yielding not available, perform the commit as a + * 1-shot operation (may be slow, but haproxy process is starting so + * it is acceptable) + */ + hlua_ckch_commit_yield(L, LUA_OK, 0); + } end: HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);