From a3839461cff09a4297997041f066e6e14996b47e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Gonz=C3=A1lez?= <7822554+AlexTMjugador@users.noreply.github.com> Date: Tue, 10 Jun 2025 00:19:58 +0200 Subject: [PATCH] perf(labrinth/random_projects_get): speed up through spatial queries according to profiling results (#3762) --- ...c1760d2e89af99dd71849eda35d6c6820aa43.json | 23 +++++++++++++++++++ ...061c79f4e6d2f1701e727a7474fd5029a5492.json | 23 ------------------- ...609134334_spatial-random-project-index.sql | 12 ++++++++++ apps/labrinth/src/models/v3/projects.rs | 3 +++ apps/labrinth/src/routes/v3/projects.rs | 16 ++++++++----- 5 files changed, 48 insertions(+), 29 deletions(-) create mode 100644 apps/labrinth/.sqlx/query-0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43.json delete mode 100644 apps/labrinth/.sqlx/query-1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492.json create mode 100644 apps/labrinth/migrations/20250609134334_spatial-random-project-index.sql diff --git a/apps/labrinth/.sqlx/query-0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43.json b/apps/labrinth/.sqlx/query-0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43.json new file mode 100644 index 000000000..4a8e5af6a --- /dev/null +++ b/apps/labrinth/.sqlx/query-0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "WITH random_id_point AS (\n SELECT POINT(RANDOM() * ((SELECT MAX(id) FROM mods) - (SELECT MIN(id) FROM mods) + 1) + (SELECT MIN(id) FROM mods), 0) AS point\n )\n SELECT id FROM mods\n WHERE status = ANY($1)\n ORDER BY POINT(id, 0) <-> (SELECT point FROM random_id_point)\n LIMIT $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "TextArray", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43" +} diff --git a/apps/labrinth/.sqlx/query-1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492.json b/apps/labrinth/.sqlx/query-1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492.json deleted file mode 100644 index 8de44b292..000000000 --- a/apps/labrinth/.sqlx/query-1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT id FROM mods WHERE status = ANY($1)\n ORDER BY id\n LIMIT $2\n OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Int8" - } - ], - "parameters": { - "Left": [ - "TextArray", - "Int8" - ] - }, - "nullable": [ - false - ] - }, - "hash": "1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492" -} diff --git a/apps/labrinth/migrations/20250609134334_spatial-random-project-index.sql b/apps/labrinth/migrations/20250609134334_spatial-random-project-index.sql new file mode 100644 index 000000000..fb471b286 --- /dev/null +++ b/apps/labrinth/migrations/20250609134334_spatial-random-project-index.sql @@ -0,0 +1,12 @@ +-- The spatial query for retrieving random searchable projects is greatly sped +-- up by this index on a fixture of 1M mods, bringing down the total cost of +-- the query plan and runtime to be comparable to primary key lookups. See the +-- `labrinth::routes::v3::projects::random_projects_get` function and the +-- previous 20250608183828_random-project-index.sql migration for more details. +-- +-- That previous migration created a non-spatial index for the status column which +-- does not get used in the new spatial query, but may still be useful for other +-- queries that filter mods by status. + +CREATE INDEX mods_searchable_ids_gist ON mods USING gist (POINT(id, 0)) + WHERE status = ANY(ARRAY['approved', 'archived']); diff --git a/apps/labrinth/src/models/v3/projects.rs b/apps/labrinth/src/models/v3/projects.rs index 6e9f17cf2..e509f0b89 100644 --- a/apps/labrinth/src/models/v3/projects.rs +++ b/apps/labrinth/src/models/v3/projects.rs @@ -519,6 +519,9 @@ impl ProjectStatus { } // Project can be displayed in search + // IMPORTANT: if this is changed, make sure to update the `mods_searchable_ids_gist` + // index in the DB to keep random project queries fast (see the + // `20250609134334_spatial-random-project-index.sql` migration) pub fn is_searchable(&self) -> bool { matches!(self, ProjectStatus::Approved | ProjectStatus::Archived) } diff --git a/apps/labrinth/src/routes/v3/projects.rs b/apps/labrinth/src/routes/v3/projects.rs index 242fd3979..766f4fc5a 100644 --- a/apps/labrinth/src/routes/v3/projects.rs +++ b/apps/labrinth/src/routes/v3/projects.rs @@ -94,12 +94,16 @@ pub async fn random_projects_get( })?; let project_ids = sqlx::query!( - // IDs are randomly generated (see the `generate_ids` macro), so ID order is - // equivalent to a random order - "SELECT id FROM mods WHERE status = ANY($1) - ORDER BY id - LIMIT $2 - OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)", + // IDs are randomly generated (see the `generate_ids` macro), so fetching a + // number of mods nearest to a random point in the ID space is equivalent to + // random sampling + "WITH random_id_point AS ( + SELECT POINT(RANDOM() * ((SELECT MAX(id) FROM mods) - (SELECT MIN(id) FROM mods) + 1) + (SELECT MIN(id) FROM mods), 0) AS point + ) + SELECT id FROM mods + WHERE status = ANY($1) + ORDER BY POINT(id, 0) <-> (SELECT point FROM random_id_point) + LIMIT $2", &*crate::models::projects::ProjectStatus::iterator() .filter(|x| x.is_searchable()) .map(|x| x.to_string())