Skip to content

Commit

Permalink
Avoid spurious backtrace / log message on critical error (#291)
Browse files Browse the repository at this point in the history
## Problem

On a critical error (e.g. existing index and --overwrite not specified),
VSB prints a large amount of spurious / unnecessary output - e.g:

```
vsb --database=pinecone --pinecone_api_key=XXX
    --workload=synthetic-proportional \
    --synthetic_records=1000 --synthetic_requests=100 \
    --synthetic_insert_ratio=0.3 --synthetic_query_ratio=0.5 \
    --synthetic_delete_ratio=0.1 --synthetic_update_ratio=0.1 \
    --synthetic_dimensions=768 --synthetic_query_distribution=zipfian \
    --synthetic_metadata=id:10n --synthetic_metadata=tags:5s10l \
    --pinecone_index_name=daver-synthetic-test
2025-01-10T14:44:11 INFO     Vector Search Bench: Starting experiment with backend='pinecone', workload='synthetic-proportional', users=1, requests_per_sec=unlimited
...
2025-01-10T14:44:12 CRITICAL PineconeDB: Index 'daver-synthetic-test' already exists - cowardly refusing to overwrite existing data. Specify --overwrite to delete it, or specify --skip-populate to skip population phase.
Traceback (most recent call last):
  File "/Users/dave/repos/pinecone-io/VSB/vsb/users.py", line 64, in setup
    self.database.initialize_population()
  File "/Users/dave/repos/pinecone-io/VSB/vsb/databases/pinecone/pinecone.py", line 155, in initialize_population
    raise StopUser()
locust.exception.StopUser
Performing Setup phase                         0/? ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  0:00:00   %
```

This causes the "CRITICAL" message to get somewhat lost.

## Solution

Modify the handling of critical errors to use an explicit DoneFailed
state in the state machine, which handles the shutdown without adding
backtraces, also also updating messages to be more applicable to a
failed run:

```
2025-01-10T14:47:20 INFO     Vector Search Bench: Starting experiment with backend='pinecone', workload='synthetic-proportional', users=1, requests_per_sec=unlimited
...
2025-01-10T14:47:22 CRITICAL PineconeDB: Index 'daver-synthetic-test' already exists - cowardly refusing to overwrite existing data. Specify --overwrite to delete it, or specify --skip-populate to skip population phase.
✘ Setup cancelled                              1/1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00 00:00 ~
```

## Type of Change

- [x] Bug fix (non-breaking change which fixes an issue)
  • Loading branch information
daverigby authored Jan 13, 2025
1 parent fc6e538 commit 8766e51
Showing 1 changed file with 36 additions and 8 deletions.
44 changes: 36 additions & 8 deletions vsb/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ def setup(self):
# Nothing more to do, but sleep briefly here to prevent
# us busy-looping in this state.
gevent.sleep(0.1)
except StopUser:
# Explicit stop, assume callee already logged the reason; exit with
# non-zero:
logger.debug("Stopping user due to StopUser exception")
self.environment.runner.send_message(
"update_progress",
{
"user": 0,
"phase": "setup",
"error": "Stopping user due to StopUser exception",
},
)
self.state = self.State.Done

except Exception as e:
traceback.print_exception(e)
locust.log.unhandled_greenlet_exception = True
Expand Down Expand Up @@ -375,6 +389,9 @@ class Phase(Enum):
"""Issue requests (queries) to the database and recording the results."""
Done = auto()
"""Final phase when all Run Users have completed"""
DoneFailed = auto()
"""Final phase when all a fatal error occurred and we want to stop down,
signalling an unsuccessful run"""

def __init__(self):
super().__init__()
Expand Down Expand Up @@ -469,6 +486,10 @@ def tick(self):
return self.num_users, self.num_users, [RunUser]
case LoadShape.Phase.Done:
return None
case LoadShape.Phase.DoneFailed:
# Set exit status to 2 to indicate a failure
locust.log.unhandled_greenlet_exception = True
return None
case _:
raise ValueError(f"Invalid phase:{self.phase}")

Expand All @@ -486,9 +507,11 @@ def _transition_phase(self, new: Phase):
]
if vsb.progress is not None:
self._update_progress_bar(mark_completed=True)
vsb.progress.update(
self.progress_task_id, description=f"✔ {self.phase.name} complete"
)
if new == LoadShape.Phase.DoneFailed:
msg = f"✘ {self.phase.name} cancelled"
else:
msg = f"✔ {self.phase.name} complete"
vsb.progress.update(self.progress_task_id, description=msg)
vsb.progress.stop()
vsb.progress = None
if hasattr(self.runner.environment, "workload_sequence"):
Expand All @@ -514,7 +537,10 @@ def _transition_phase(self, new: Phase):
else:
phase_display_name = self.phase.name
if self.phase in tracked_phases:
metrics_tracker.record_phase_end(phase_display_name)
# Skip reporting phase end on a fatal error; it adds noise after
# the important fatal error message.
if new != LoadShape.Phase.DoneFailed:
metrics_tracker.record_phase_end(phase_display_name)
self.phase = new
if hasattr(self.runner.environment, "workload_sequence"):
workload = self.runner.environment.workload_sequence[
Expand All @@ -539,9 +565,7 @@ def _transition_phase(self, new: Phase):
def on_update_progress(self, msg, **kwargs):
# Fired when VSBLoadShape (running on the master) receives an
# "update_progress" message.
logger.debug(
f"VSBLoadShape.update_progress() - user:{msg.data['user']}, phase:{msg.data['phase']}"
)
logger.debug(f"VSBLoadShape.on_update_progress() - {msg.data}")
match self.phase:
case LoadShape.Phase.Setup:
assert msg.data["phase"] == "setup"
Expand All @@ -557,7 +581,11 @@ def on_update_progress(self, msg, **kwargs):
f"{self.request_count} - "
f"moving to TransitionFromSetup phase"
)
self._transition_phase(LoadShape.Phase.TransitionFromSetup)
if "error" in msg.data:
# Setup signalled an error; cancel the benchmark
self._transition_phase(LoadShape.Phase.DoneFailed)
else:
self._transition_phase(LoadShape.Phase.TransitionFromSetup)
case LoadShape.Phase.TransitionFromSetup:
logger.error(
f"VSBLoadShape.update_progress() - Unexpected progress update in "
Expand Down

0 comments on commit 8766e51

Please sign in to comment.