From 0f56b719d29a10ccd2230a26d41eff9acbdb7c46 Mon Sep 17 00:00:00 2001
From: Ralf Kistner <ralf@journeyapps.com>
Date: Tue, 15 Oct 2024 16:56:58 +0200
Subject: [PATCH] Fix performance issue with many duplicate ids.

---
 crates/core/src/sync_local.rs | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/crates/core/src/sync_local.rs b/crates/core/src/sync_local.rs
index e1061ed..0e6602e 100644
--- a/crates/core/src/sync_local.rs
+++ b/crates/core/src/sync_local.rs
@@ -55,21 +55,17 @@ pub fn sync_local(db: *mut sqlite::sqlite3, _data: &str) -> Result<i64, SQLiteEr
 
     // Query for updated objects
 
-    // QUERY PLAN
-    // |--SCAN buckets
-    // |--SEARCH b USING INDEX ps_oplog_by_opid (bucket=? AND op_id>?)
-    // |--SEARCH r USING INDEX ps_oplog_by_row (row_type=? AND row_id=?)
-    // `--USE TEMP B-TREE FOR GROUP BY
     // language=SQLite
     let statement = db
         .prepare_v2(
             "\
 -- 1. Filter oplog by the ops added but not applied yet (oplog b).
+--    SELECT DISTINCT / UNION is important for cases with many duplicate ids.
 WITH updated_rows AS (
-  SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
+  SELECT DISTINCT b.row_type, b.row_id FROM ps_buckets AS buckets
     CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
   AND (b.op_id > buckets.last_applied_op)
-  UNION ALL SELECT row_type, row_id FROM ps_updated_rows
+  UNION SELECT row_type, row_id FROM ps_updated_rows
 )
 
 -- 3. Group the objects from different buckets together into a single one (ops).