Skip to content

Commit e58c238

Browse files
committed
Do deletion in batches
1 parent 0aa5a39 commit e58c238

File tree

2 files changed

+43
-31
lines changed

2 files changed

+43
-31
lines changed

synapse/handlers/e2e_keys.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,24 +1590,35 @@ async def _delete_old_one_time_keys_task(
15901590
that it could still have old OTKs that the client has dropped. This task is scheduled exactly once
15911591
by a database schema delta file, and it clears out old one-time-keys that look like they came from libolm.
15921592
"""
1593-
user = task.result.get("from_user", "") if task.result else ""
1593+
last_user = task.result.get("from_user", "") if task.result else ""
15941594
while True:
1595-
user, rowcount = await self.store.delete_old_otks_for_one_user(user)
1596-
if user is None:
1595+
# We process users in batches of 100
1596+
users, rowcount = await self.store.delete_old_otks_for_next_user_batch(
1597+
last_user, 100
1598+
)
1599+
if len(users) == 0:
15971600
# We're done!
15981601
return TaskStatus.COMPLETE, None, None
15991602

1600-
logger.debug("Deleted %i old one-time-keys for user '%s'", rowcount, user)
1603+
logger.debug(
1604+
"Deleted %i old one-time-keys for users '%s'..'%s'",
1605+
rowcount,
1606+
users[0],
1607+
users[-1],
1608+
)
1609+
last_user = users[-1]
16011610

16021611
# Store our progress
1603-
await self._task_scheduler.update_task(task.id, result={"from_user": user})
1612+
await self._task_scheduler.update_task(
1613+
task.id, result={"from_user": last_user}
1614+
)
16041615

16051616
# Sleep a little before doing the next user.
16061617
#
16071618
# matrix.org has about 15M users in the e2e_one_time_keys_json table
16081619
# (comprising 20M devices). We want this to take about a week, so we need
1609-
# to do 25 per second.
1610-
await self.clock.sleep(0.04)
1620+
# to do about one batch of 100 users every 4 seconds.
1621+
await self.clock.sleep(4)
16111622

16121623

16131624
def _check_cross_signing_key(

synapse/storage/databases/main/end_to_end_keys.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,48 +1453,49 @@ def impl(txn: LoggingTransaction) -> Tuple[bool, Optional[int]]:
14531453
impl,
14541454
)
14551455

1456-
async def delete_old_otks_for_one_user(
1457-
self, after_user_id: str
1458-
) -> Tuple[Optional[str], int]:
1459-
"""Deletes old OTKs belonging to one user.
1456+
async def delete_old_otks_for_next_user_batch(
1457+
self, after_user_id: str, number_of_users: int
1458+
) -> Tuple[List[str], int]:
1459+
"""Deletes old OTKs belonging to the next batch of users
14601460
14611461
Returns:
1462-
`(user, rows)`, where:
1463-
* `user` is the user ID of the updated user, or None if we are don
1462+
`(users, rows)`, where:
1463+
* `users` is the user IDs of the updated users. An empty list if we are done.
14641464
* `rows` is the number of deleted rows
14651465
"""
14661466

1467-
def impl(txn: LoggingTransaction) -> Tuple[Optional[str], int]:
1468-
# Find the next user
1467+
def impl(txn: LoggingTransaction) -> Tuple[List[str], int]:
1468+
# Find a batch of users
14691469
txn.execute(
14701470
"""
1471-
SELECT user_id FROM e2e_one_time_keys_json WHERE user_id > ? LIMIT 1
1471+
SELECT DISTINCT(user_id) FROM e2e_one_time_keys_json
1472+
WHERE user_id > ?
1473+
ORDER BY user_id
1474+
LIMIT ?
14721475
""",
1473-
(after_user_id,),
1476+
(after_user_id, number_of_users),
14741477
)
1475-
row = txn.fetchone()
1476-
if not row:
1477-
# We're done!
1478-
return None, 0
1479-
(user_id,) = row
1478+
users = [row[0] for row in txn.fetchall()]
1479+
if len(users) == 0:
1480+
return users, 0
14801481

1481-
# Delete any old OTKs belonging to that user.
1482+
# Delete any old OTKs belonging to those users.
14821483
#
14831484
# We only actually consider OTKs whose key ID is 6 characters long. These
14841485
# keys were likely made by libolm rather than Vodozemac; libolm only kept
14851486
# 100 private OTKs, so was far more vulnerable than Vodozemac to throwing
14861487
# away keys prematurely.
1487-
txn.execute(
1488-
"""
1488+
clause, args = make_in_list_sql_clause(txn.database_engine, 'user_id', users)
1489+
sql = f"""
14891490
DELETE FROM e2e_one_time_keys_json
1490-
WHERE user_id = ? AND ts_added_ms < ? AND length(key_id) = 6
1491-
""",
1492-
(user_id, self._clock.time_msec() - (7 * 24 * 3600 * 1000)),
1493-
)
1491+
WHERE {clause} AND ts_added_ms < ? AND length(key_id) = 6
1492+
"""
1493+
args.append(self._clock.time_msec() - (7 * 24 * 3600 * 1000))
1494+
txn.execute(sql, args)
14941495

1495-
return user_id, txn.rowcount
1496+
return users, txn.rowcount
14961497

1497-
return await self.db_pool.runInteraction("delete_old_otks_for_one_user", impl)
1498+
return await self.db_pool.runInteraction("delete_old_otks_for_next_user_batch", impl)
14981499

14991500

15001501
class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):

0 commit comments

Comments
 (0)