Skip to content

Commit

Permalink
EPD-969 Show # of skipped test cases in Slack + GitHub (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicole White authored Jun 13, 2024
1 parent dc620b9 commit dea5e50
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 12 deletions.
2 changes: 2 additions & 0 deletions e2e/python/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ async def get_score(self, output: str) -> float:

async def evaluate_test_case(self, test_case: BaseTestCase, output: str) -> Evaluation:
score = await self.get_score(output)
if random.random() > 0.8:
return None
return Evaluation(
score=score,
)
Expand Down
50 changes: 38 additions & 12 deletions src/handlers/testing/exec/util/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,9 @@ function makeSectionsForTestSuite(args: {
* Make a table of the evaluator stats. For example:
*
* Evaluators Test Cases
* -----------------------------------------------
* has-all-substrings 979 PASSED | 121 FAILED
* is-friendly 1,000 PASSED | 0 FAILED
* -------------------------------------------------------------
* has-all-substrings 979 PASSED 121 FAILED 0 SKIPPED
* is-friendly 1,000 PASSED 0 FAILED 0 SKIPPED
*/
function makeEvaluatorStatsTable(args: { evaluations: Evaluation[] }): string {
// Get the evaluator IDs sorted alphabetically
Expand All @@ -458,10 +458,18 @@ function makeEvaluatorStatsTable(args: { evaluations: Evaluation[] }): string {
...evaluatorIds.map((evaluatorId) => evaluatorId.length),
);

// Get the number of passed / failed test cases per evaluator
const uniqTestCaseHashes = [
...new Set(args.evaluations.map((e) => e.testCaseHash)),
];

// Get the number of passed / failed / skipped test cases per evaluator
const evaluatorStats: Record<
string,
{ numPassedString: string; numFailedString: string }
{
numPassedString: string;
numFailedString: string;
numSkippedString: string;
}
> = {};
for (const evaluatorId of evaluatorIds) {
const evaluations = args.evaluations.filter(
Expand All @@ -478,21 +486,30 @@ function makeEvaluatorStatsTable(args: { evaluations: Evaluation[] }): string {
(e) => e.passed === EvaluationPassed.NOT_APPLICABLE,
).length;

// Find the # of test cases that don't have an evaluation for this evaluator
const skippedCount = uniqTestCaseHashes.filter(
(hash) => !evaluations.some((e) => e.testCaseHash === hash),
).length;

evaluatorStats[evaluatorId] = {
// Consider N/A as passed to simplify
numPassedString: (passedCount + naCount).toLocaleString(),
numFailedString: failedCount.toLocaleString(),
numSkippedString: skippedCount.toLocaleString(),
};
}

// Get the max length of each of the numPassed and numFailed strings
// This is used to right-align the numbers in each column (passed / failed)
// Get the max length of each of the numPassed, numFailed, and numSkipped strings.
// This is used to right-align the numbers in each column (passed / failed / skipped).
const maxNumPassedLength = Math.max(
...Object.values(evaluatorStats).map((s) => s.numPassedString.length),
);
const maxNumFailedLength = Math.max(
...Object.values(evaluatorStats).map((s) => s.numFailedString.length),
);
const maxNumSkippedLength = Math.max(
...Object.values(evaluatorStats).map((s) => s.numSkippedString.length),
);

// Add the header row
const paddedEvaluatorHeader =
Expand All @@ -507,8 +524,10 @@ function makeEvaluatorStatsTable(args: { evaluations: Evaluation[] }): string {
const statsAsString = makeEvaluatorStatsRow({
numPassedString: stats.numPassedString,
numFailedString: stats.numFailedString,
numSkippedString: stats.numSkippedString,
maxNumPassedLength,
maxNumFailedLength,
maxNumSkippedLength,
});

const paddedEvaluatorId = evaluatorId.padEnd(maxEvaluatorIdLength);
Expand All @@ -528,23 +547,30 @@ function makeEvaluatorStatsTable(args: { evaluations: Evaluation[] }): string {
* Numbers should be right-justified for their column.
* For example:
*
* 3 PASSED 56 FAILED
* 1,000 PASSED 6 FAILED
* 3 PASSED 56 FAILED 0 SKIPPED
* 1,000 PASSED 6 FAILED 0 SKIPPED
*/
function makeEvaluatorStatsRow(args: {
numPassedString: string;
numFailedString: string;
numSkippedString: string;
maxNumPassedLength: number;
maxNumFailedLength: number;
maxNumSkippedLength: number;
}): string {
const paddedNumPassed = args.numPassedString.padStart(
args.maxNumPassedLength,
);
const paddedNumFailed = args.numFailedString.padStart(
args.maxNumFailedLength,
);

return [`${paddedNumPassed} PASSED`, `${paddedNumFailed} FAILED`].join(
' '.repeat(5),
const paddedNumSkipped = args.numSkippedString.padStart(
args.maxNumSkippedLength,
);

return [
`${paddedNumPassed} PASSED`,
`${paddedNumFailed} FAILED`,
`${paddedNumSkipped} SKIPPED`,
].join(COLUMN_GAP);
}

2 comments on commit dea5e50

@github-actions
Copy link

@github-actions github-actions bot commented on dea5e50 Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FAILED  •  🕐 8s  •  🏗️ E2E / py (#728)  •  ➡️ View in Autoblocks


🔴  python-e2e-test-suite-1

Evaluators            Test Cases
-------------------------------------------------------
has-all-substrings    3 PASSED    1 FAILED    0 SKIPPED
is-friendly           4 PASSED    0 FAILED    0 SKIPPED

🔴  python-e2e-test-suite-2

Evaluators            Test Cases
-------------------------------------------------------
has-all-substrings    8 PASSED    2 FAILED    0 SKIPPED
is-friendly           7 PASSED    0 FAILED    3 SKIPPED

Generated by Autoblocks against dea5e50

@github-actions
Copy link

@github-actions github-actions bot commented on dea5e50 Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FAILED  •  🕐 7s  •  🏗️ E2E / ts (#728)  •  ➡️ View in Autoblocks


🟢  typescript-e2e-test-suite-1

Evaluators            Test Cases
-------------------------------------------------------
has-all-substrings    4 PASSED    0 FAILED    0 SKIPPED
is-friendly           4 PASSED    0 FAILED    0 SKIPPED

🔴  typescript-e2e-test-suite-2

Evaluators            Test Cases
--------------------------------------------------------
has-all-substrings     9 PASSED    1 FAILED    0 SKIPPED
is-friendly           10 PASSED    0 FAILED    0 SKIPPED

Generated by Autoblocks against dea5e50

Please sign in to comment.