Skip to content

Commit

Permalink
Some more changes to reduce adjacent server flakes
Browse files Browse the repository at this point in the history
1. Make worker threads (like net) not attempt new connections
   once the server has started shutting down.
2. Don't respond to messages that prompt updating database
   metadata for database's we don't have introspected anyway.
3. Mark a lot of worker tasks as being interruptable. When they
   weren't interruptable, server exit had to wait for them to
   exit, which they didn't always do. This caused connections
   to get held open.

*Most* of the locally observable flakes I've been seeing were fixed by
these changes, though I am still getting
test_server_adjacent_database_propagation failures where it fails to
connect to the newly create database. Still working on that.
  • Loading branch information
msullivan committed Oct 23, 2024
1 parent 584795d commit a591454
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 7 deletions.
4 changes: 4 additions & 0 deletions edb/server/dbview/dbview.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ class Database:
def lookup_config(self, name: str) -> Any:
...

def is_introspected(self) -> bool:
...


class DatabaseConnectionView:
def in_tx(self) -> bool:
...
Expand Down
3 changes: 3 additions & 0 deletions edb/server/dbview/dbview.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,9 @@ cdef class Database:
if self.user_schema_pickle is None:
await self.tenant.introspect_db(self.name)

def is_introspected(self):
return self.user_schema_pickle is not None

def lookup_config(self, name: str):
spec = self._index._sys_config_spec
if self.user_config_spec is not None:
Expand Down
4 changes: 2 additions & 2 deletions edb/server/net_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def http(server: edbserver.BaseServer) -> None:
tasks.append(
tenant.create_task(
_http_task(tenant, tenant_http[tenant]),
interruptable=False,
interruptable=True,
)
)
# Remove unused tenant_http entries
Expand Down Expand Up @@ -310,7 +310,7 @@ async def gc(server: edbserver.BaseServer) -> None:
while True:
tasks = [
tenant.create_task(
_gc(tenant, NET_HTTP_REQUEST_TTL), interruptable=False
_gc(tenant, NET_HTTP_REQUEST_TTL), interruptable=True
)
for tenant in server.iter_tenants()
if tenant.accept_new_tasks
Expand Down
2 changes: 1 addition & 1 deletion edb/server/protocol/auth_ext/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async def noop_coroutine() -> None:
async def _protected_send(
coro: Coroutine[Any, Any, None], tenant: tenant.Tenant
) -> None:
task = tenant.create_task(coro, interruptable=False)
task = tenant.create_task(coro, interruptable=True)
# Prevent timing attack
await asyncio.sleep(random.random() * 0.5)
# Expose e.g. configuration errors
Expand Down
2 changes: 1 addition & 1 deletion edb/server/protocol/auth_ext/pkce.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ async def gc(server: edbserver.BaseServer) -> None:
while True:
try:
tasks = [
tenant.create_task(_gc(tenant), interruptable=False)
tenant.create_task(_gc(tenant), interruptable=True)
for tenant in server.iter_tenants()
if tenant.accept_new_tasks
]
Expand Down
19 changes: 16 additions & 3 deletions edb/server/tenant.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,18 @@ def stop_accepting_connections(self) -> None:
def accept_new_tasks(self):
return self._accept_new_tasks

def is_db_ready(self, dbname: str) -> bool:
if not self._accept_new_tasks:
return False

if (
not (db := self.maybe_get_db(dbname=dbname))
or not db.is_introspected()
):
return False

return True

def create_task(
self,
coro: Coroutine,
Expand Down Expand Up @@ -962,7 +974,8 @@ def allow_database_connections(self, dbname: str) -> None:

def is_database_connectable(self, dbname: str) -> bool:
return (
dbname != defines.EDGEDB_TEMPLATE_DB
self._running
and dbname != defines.EDGEDB_TEMPLATE_DB
and dbname not in self._block_new_connections
)

Expand Down Expand Up @@ -1662,7 +1675,7 @@ async def task():
self.create_task(task(), interruptable=True)

def on_remote_ddl(self, dbname: str) -> None:
if not self._accept_new_tasks:
if not self.is_db_ready(dbname):
return

# Triggered by a postgres notification event 'schema-changes'
Expand Down Expand Up @@ -1826,7 +1839,7 @@ def on_remote_query_cache_change(
dbname: str,
keys: Optional[list[str]],
) -> None:
if not self._accept_new_tasks:
if not self.is_db_ready(dbname):
return

async def task():
Expand Down

0 comments on commit a591454

Please sign in to comment.