Skip to content

Commit

Permalink
EPD-968: update cli for new test case endpoints (#96)
Browse files Browse the repository at this point in the history
This means we will not surface errors when sending in the test case
fields. I was going back and forth on the best approach here.

We could also add a --debug flag or something? These errors we should be
able to see in our logs since they will be errors coming back from our
API.

Let me know what you think.
  • Loading branch information
adamnolte authored Jun 12, 2024
1 parent 66326fb commit dc620b9
Showing 1 changed file with 70 additions and 48 deletions.
118 changes: 70 additions & 48 deletions src/handlers/testing/exec/util/run-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,68 +398,90 @@ export class RunManager {
testCaseHumanReviewInputFields?: { name: string; value: string }[] | null;
testCaseHumanReviewOutputFields?: { name: string; value: string }[] | null;
}) {
const runId = this.currentRunId({
testExternalId: args.testExternalId,
});

const { id: resultId } = await this.post<{ id: string }>(
`/runs/${runId}/results`,
{
testCaseHash: args.testCaseHash,
testCaseDurationMs: args.testCaseDurationMs,
testCaseRevisionUsage: args.testCaseRevisionUsage,
},
);

if (!this.testCaseHashToResultId[args.testExternalId]) {
this.testCaseHashToResultId[args.testExternalId] = {};
}
this.testCaseHashToResultId[args.testExternalId][args.testCaseHash] =
resultId;

const events = this.testCaseEvents
.filter(
(e) =>
e.testExternalId === args.testExternalId &&
e.testCaseHash === args.testCaseHash,
)
.map((e) => e.event);
const runId = this.currentRunId({
testExternalId: args.testExternalId,

// We still want to try the other endpoints if 1 fails
const results = await Promise.allSettled([
this.post(`/runs/${runId}/results/${resultId}/body`, {
testCaseBody: args.testCaseBody,
}),
this.post(`/runs/${runId}/results/${resultId}/output`, {
testCaseOutput: args.testCaseOutput,
}),
this.post(`/runs/${runId}/results/${resultId}/events`, {
testCaseEvents: events,
}),
]);

results.forEach((result) => {
if (result.status === 'rejected') {
emitter.emit(EventName.CONSOLE_LOG, {
ctx: 'cli',
level: 'warn',
message: `Failed to send part of the test case results to Autoblocks for test case hash ${args.testCaseHash}: ${result.reason}`,
});
}
});
const payload = {
testCaseHash: args.testCaseHash,
testCaseBody: args.testCaseBody,
testCaseOutput: args.testCaseOutput,
testCaseEvents: events,
testCaseDurationMs: args.testCaseDurationMs,
testCaseRevisionUsage: args.testCaseRevisionUsage,
testCaseHumanReviewInputFields: args.testCaseHumanReviewInputFields,
testCaseHumanReviewOutputFields: args.testCaseHumanReviewOutputFields,
} as const;

let resultId: string;

try {
const { id } = await this.post<{ id: string }>(
`/runs/${runId}/results`,
payload,
// Important that this is after we send in the body and output
// that way we can infer human review fields from the body and output
// if they weren't set by the user
await this.post(
`/runs/${runId}/results/${resultId}/human-review-fields`,
{
testCaseHumanReviewInputFields: args.testCaseHumanReviewInputFields,
testCaseHumanReviewOutputFields: args.testCaseHumanReviewOutputFields,
},
);
resultId = id;
} catch (err) {
try {
const parsedError = zHttpError.parse(
JSON.parse((err as Error).message),
);
if (parsedError.status === 413) {
// If the /results request fails with a content too large error, retry with an empty output
const { id } = await this.post<{ id: string }>(
`/runs/${runId}/results`,
{
...payload,
testCaseOutput: '',
},
);
resultId = id;
} else {
throw err;
}
} catch {
throw err;
}
emitter.emit(EventName.CONSOLE_LOG, {
ctx: 'cli',
level: 'warn',
message: `Failed to send human review fields to Autoblocks for test case hash ${args.testCaseHash}: ${err}`,
});
}

if (!this.testCaseHashToResultId[args.testExternalId]) {
this.testCaseHashToResultId[args.testExternalId] = {};
try {
// Important that this is after human review fields since it uses them
// If human review fields fails, we don't want to run this
await this.runUIBasedEvaluators({
testExternalId: args.testExternalId,
testCaseId: resultId,
testCaseHash: args.testCaseHash,
});
} catch (err) {
emitter.emit(EventName.CONSOLE_LOG, {
ctx: 'cli',
level: 'warn',
message: `Failed to run AI evaluators created on the Autoblocks UI for test case hash ${args.testCaseHash}: ${err}`,
});
}
this.testCaseHashToResultId[args.testExternalId][args.testCaseHash] =
resultId;

await this.runUIBasedEvaluators({
testExternalId: args.testExternalId,
testCaseId: resultId,
testCaseHash: args.testCaseHash,
});
}

private determineIfEvaluationPassed(args: {
Expand Down

2 comments on commit dc620b9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FAILED  •  🕐 7s  •  🏗️ E2E / ts (#698)  •  ➡️ View in Autoblocks


🔴  typescript-e2e-test-suite-1

Evaluators            Test Cases
-------------------------------------------
has-all-substrings    3 PASSED     1 FAILED
is-friendly           4 PASSED     0 FAILED

🔴  typescript-e2e-test-suite-2

Evaluators            Test Cases
--------------------------------------------
has-all-substrings     8 PASSED     2 FAILED
is-friendly           10 PASSED     0 FAILED

Generated by Autoblocks against dc620b9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FAILED  •  🕐 9s  •  🏗️ E2E / py (#698)  •  ➡️ View in Autoblocks


🟢  python-e2e-test-suite-1

Evaluators            Test Cases
-------------------------------------------
has-all-substrings    4 PASSED     0 FAILED
is-friendly           4 PASSED     0 FAILED

🔴  python-e2e-test-suite-2

Evaluators            Test Cases
--------------------------------------------
has-all-substrings     7 PASSED     3 FAILED
is-friendly           10 PASSED     0 FAILED

Generated by Autoblocks against dc620b9

Please sign in to comment.