Skip to content

Commit c5e89f5

Browse files
authored
Create one-off scheduled task to delete old OTKs (#17934)
To work around the fact that, pre-#17903, our database may have old one-time-keys that the clients have long thrown away the private keys for, we want to delete OTKs that look like they came from libolm. To spread the load a bit, without holding up other background database updates, we use a scheduled task to do the work.
1 parent e918f68 commit c5e89f5

File tree

6 files changed

+203
-0
lines changed

6 files changed

+203
-0
lines changed

changelog.d/17934.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a one-off task to delete old one-time-keys, to guard against us having old OTKs in the database that the client has long forgotten about.

synapse/handlers/e2e_keys.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
from synapse.types import (
4040
JsonDict,
4141
JsonMapping,
42+
ScheduledTask,
43+
TaskStatus,
4244
UserID,
4345
get_domain_from_id,
4446
get_verify_key_from_cross_signing_key,
@@ -70,6 +72,7 @@ def __init__(self, hs: "HomeServer"):
7072
self.is_mine = hs.is_mine
7173
self.clock = hs.get_clock()
7274
self._worker_lock_handler = hs.get_worker_locks_handler()
75+
self._task_scheduler = hs.get_task_scheduler()
7376

7477
federation_registry = hs.get_federation_registry()
7578

@@ -116,6 +119,10 @@ def __init__(self, hs: "HomeServer"):
116119
hs.config.experimental.msc3984_appservice_key_query
117120
)
118121

122+
self._task_scheduler.register_action(
123+
self._delete_old_one_time_keys_task, "delete_old_otks"
124+
)
125+
119126
@trace
120127
@cancellable
121128
async def query_devices(
@@ -1574,6 +1581,45 @@ async def has_different_keys(self, user_id: str, body: JsonDict) -> bool:
15741581
return True
15751582
return False
15761583

1584+
async def _delete_old_one_time_keys_task(
1585+
self, task: ScheduledTask
1586+
) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
1587+
"""Scheduler task to delete old one time keys.
1588+
1589+
Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
1590+
that it could still have old OTKs that the client has dropped. This task is scheduled exactly once
1591+
by a database schema delta file, and it clears out old one-time-keys that look like they came from libolm.
1592+
"""
1593+
last_user = task.result.get("from_user", "") if task.result else ""
1594+
while True:
1595+
# We process users in batches of 100
1596+
users, rowcount = await self.store.delete_old_otks_for_next_user_batch(
1597+
last_user, 100
1598+
)
1599+
if len(users) == 0:
1600+
# We're done!
1601+
return TaskStatus.COMPLETE, None, None
1602+
1603+
logger.debug(
1604+
"Deleted %i old one-time-keys for users '%s'..'%s'",
1605+
rowcount,
1606+
users[0],
1607+
users[-1],
1608+
)
1609+
last_user = users[-1]
1610+
1611+
# Store our progress
1612+
await self._task_scheduler.update_task(
1613+
task.id, result={"from_user": last_user}
1614+
)
1615+
1616+
# Sleep a little before doing the next user.
1617+
#
1618+
# matrix.org has about 15M users in the e2e_one_time_keys_json table
1619+
# (comprising 20M devices). We want this to take about a week, so we need
1620+
# to do about one batch of 100 users every 4 seconds.
1621+
await self.clock.sleep(4)
1622+
15771623

15781624
def _check_cross_signing_key(
15791625
key: JsonDict, user_id: str, key_type: str, signing_key: Optional[VerifyKey] = None

synapse/storage/databases/main/end_to_end_keys.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,54 @@ def impl(txn: LoggingTransaction) -> Tuple[bool, Optional[int]]:
14531453
impl,
14541454
)
14551455

1456+
async def delete_old_otks_for_next_user_batch(
1457+
self, after_user_id: str, number_of_users: int
1458+
) -> Tuple[List[str], int]:
1459+
"""Deletes old OTKs belonging to the next batch of users
1460+
1461+
Returns:
1462+
`(users, rows)`, where:
1463+
* `users` is the user IDs of the updated users. An empty list if we are done.
1464+
* `rows` is the number of deleted rows
1465+
"""
1466+
1467+
def impl(txn: LoggingTransaction) -> Tuple[List[str], int]:
1468+
# Find a batch of users
1469+
txn.execute(
1470+
"""
1471+
SELECT DISTINCT(user_id) FROM e2e_one_time_keys_json
1472+
WHERE user_id > ?
1473+
ORDER BY user_id
1474+
LIMIT ?
1475+
""",
1476+
(after_user_id, number_of_users),
1477+
)
1478+
users = [row[0] for row in txn.fetchall()]
1479+
if len(users) == 0:
1480+
return users, 0
1481+
1482+
# Delete any old OTKs belonging to those users.
1483+
#
1484+
# We only actually consider OTKs whose key ID is 6 characters long. These
1485+
# keys were likely made by libolm rather than Vodozemac; libolm only kept
1486+
# 100 private OTKs, so was far more vulnerable than Vodozemac to throwing
1487+
# away keys prematurely.
1488+
clause, args = make_in_list_sql_clause(
1489+
txn.database_engine, "user_id", users
1490+
)
1491+
sql = f"""
1492+
DELETE FROM e2e_one_time_keys_json
1493+
WHERE {clause} AND ts_added_ms < ? AND length(key_id) = 6
1494+
"""
1495+
args.append(self._clock.time_msec() - (7 * 24 * 3600 * 1000))
1496+
txn.execute(sql, args)
1497+
1498+
return users, txn.rowcount
1499+
1500+
return await self.db_pool.runInteraction(
1501+
"delete_old_otks_for_next_user_batch", impl
1502+
)
1503+
14561504

14571505
class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
14581506
def __init__(
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
--
2+
-- This file is licensed under the Affero General Public License (AGPL) version 3.
3+
--
4+
-- Copyright (C) 2024 New Vector, Ltd
5+
--
6+
-- This program is free software: you can redistribute it and/or modify
7+
-- it under the terms of the GNU Affero General Public License as
8+
-- published by the Free Software Foundation, either version 3 of the
9+
-- License, or (at your option) any later version.
10+
--
11+
-- See the GNU Affero General Public License for more details:
12+
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
13+
14+
-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
15+
-- that it could still have old OTKs that the client has dropped.
16+
--
17+
-- We create a scheduled task which will drop old OTKs, to flush them out.
18+
INSERT INTO scheduled_tasks(id, action, status, timestamp)
19+
VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', extract(epoch from current_timestamp) * 1000);
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
--
2+
-- This file is licensed under the Affero General Public License (AGPL) version 3.
3+
--
4+
-- Copyright (C) 2024 New Vector, Ltd
5+
--
6+
-- This program is free software: you can redistribute it and/or modify
7+
-- it under the terms of the GNU Affero General Public License as
8+
-- published by the Free Software Foundation, either version 3 of the
9+
-- License, or (at your option) any later version.
10+
--
11+
-- See the GNU Affero General Public License for more details:
12+
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
13+
14+
-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility
15+
-- that it could still have old OTKs that the client has dropped.
16+
--
17+
-- We create a scheduled task which will drop old OTKs, to flush them out.
18+
INSERT INTO scheduled_tasks(id, action, status, timestamp)
19+
VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', strftime('%s', 'now') * 1000);

tests/handlers/test_e2e_keys.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# [This file includes modifications made by New Vector Limited]
2020
#
2121
#
22+
import time
2223
from typing import Dict, Iterable
2324
from unittest import mock
2425

@@ -1826,3 +1827,72 @@ def test_check_cross_signing_setup(self) -> None:
18261827
)
18271828
self.assertIs(exists, True)
18281829
self.assertIs(replaceable_without_uia, False)
1830+
1831+
def test_delete_old_one_time_keys(self) -> None:
1832+
"""Test the db migration that clears out old OTKs"""
1833+
1834+
# We upload two sets of keys, one just over a week ago, and one just less than
1835+
# a week ago. Each batch contains some keys that match the deletion pattern
1836+
# (key IDs of 6 chars), and some that do not.
1837+
#
1838+
# Finally, set the scheduled task going, and check what gets deleted.
1839+
1840+
user_id = "@user000:" + self.hs.hostname
1841+
device_id = "xyz"
1842+
1843+
# The scheduled task should be for "now" in real, wallclock time, so
1844+
# set the test reactor to just over a week ago.
1845+
self.reactor.advance(time.time() - 7.5 * 24 * 3600)
1846+
1847+
# Upload some keys
1848+
self.get_success(
1849+
self.handler.upload_keys_for_user(
1850+
user_id,
1851+
device_id,
1852+
{
1853+
"one_time_keys": {
1854+
# some keys to delete
1855+
"alg1:AAAAAA": "key1",
1856+
"alg2:AAAAAB": {"key": "key2", "signatures": {"k1": "sig1"}},
1857+
# A key to *not* delete
1858+
"alg2:AAAAAAAAAA": {"key": "key3"},
1859+
}
1860+
},
1861+
)
1862+
)
1863+
1864+
# A day passes
1865+
self.reactor.advance(24 * 3600)
1866+
1867+
# Upload some more keys
1868+
self.get_success(
1869+
self.handler.upload_keys_for_user(
1870+
user_id,
1871+
device_id,
1872+
{
1873+
"one_time_keys": {
1874+
# some keys which match the pattern
1875+
"alg1:BAAAAA": "key1",
1876+
"alg2:BAAAAB": {"key": "key2", "signatures": {"k1": "sig1"}},
1877+
# A key to *not* delete
1878+
"alg2:BAAAAAAAAA": {"key": "key3"},
1879+
}
1880+
},
1881+
)
1882+
)
1883+
1884+
# The rest of the week passes, which should set the scheduled task going.
1885+
self.reactor.advance(6.5 * 24 * 3600)
1886+
1887+
# Check what we're left with in the database
1888+
remaining_key_ids = {
1889+
row[0]
1890+
for row in self.get_success(
1891+
self.handler.store.db_pool.simple_select_list(
1892+
"e2e_one_time_keys_json", None, ["key_id"]
1893+
)
1894+
)
1895+
}
1896+
self.assertEqual(
1897+
remaining_key_ids, {"AAAAAAAAAA", "BAAAAA", "BAAAAB", "BAAAAAAAAA"}
1898+
)

0 commit comments

Comments
 (0)