Skip to content

Commit

Permalink
Add isSameAuthor check when classifying flaky tests with Dr.CI (#4688)
Browse files Browse the repository at this point in the history
I strengthen the `isSameHeadBranch` check to become `isSameAuthor` to
avoid an FP in flaky detection when the same author submits multiple PRs
of the same change. For example,

* Cherry picking pytorch/pytorch#111845
* Draft PR and then submit a real PR (I remember seeing an example of
this, but couldn't find the PR number now)

I choose to use author email here because that's readily available in
the commit info of the workflow run. The commit info, however, is not
available in `torch-workflow-job` DynamoDB table which powers the
search. So, an additional Rockset query is needed foreach flaky match.

### Testing

```
curl --request POST \
    --url 'http://localhost:3000/api/drci/drci?prNumber=111845' \
    --data 'repo=pytorch'
```
  • Loading branch information
huydhn authored Oct 31, 2023
1 parent 54ee5e7 commit a4c39aa
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 48 deletions.
14 changes: 10 additions & 4 deletions torchci/lib/drciUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
MAX_SIZE,
} from "lib/searchUtils";
import { RecentWorkflowsData, JobData } from "lib/types";
import { isSameHeadBranch, isSameFailure } from "lib/jobUtils";
import { isSameAuthor, isSameFailure } from "lib/jobUtils";

export const NUM_MINUTES = 30;
export const REPO: string = "pytorch";
Expand Down Expand Up @@ -317,13 +317,19 @@ export async function hasSimilarFailures(
head_branch: record.branch as string,
failure_captures: record.failureCaptures as string[],
failure_lines: record.failureLines,
authorEmail: record.authorEmail,
};

// Only count different jobs with the same failure
// Only count different jobs with the same failure. To avoid FP, PRs from the
// same author are treated as the same till we could figure out a better way
// to separate them
if (
!isSameHeadBranch(job.head_branch, record.branch) &&
job.id !== failure.id &&
isSameFailure(job, failure)
job.head_sha !== failure.head_sha &&
isSameFailure(job, failure) &&
// Run this check last because it costs one query to query for the commit
// author of the failure
!(await isSameAuthor(job, failure))
) {
return true;
}
Expand Down
62 changes: 45 additions & 17 deletions torchci/lib/jobUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import { RecentWorkflowsData, JobData, BasicJobData } from "lib/types";
export const REMOVE_JOB_NAME_SUFFIX_REGEX = new RegExp(
", [0-9]+, [0-9]+, .+\\)"
);
export const GHSTACK_SUFFIX_REGEX = new RegExp("/[0-9]+/head");

export function isFailedJob(job: JobData) {
return (
Expand Down Expand Up @@ -105,23 +104,52 @@ export function removeJobNameSuffix(
return jobName.replace(REMOVE_JOB_NAME_SUFFIX_REGEX, replaceWith);
}

export function isSameHeadBranch(
branchA: string | null | undefined,
branchB: string | null | undefined
): boolean {
if (!branchA || !branchB) {
return false;
}

const replaceWith = "";
// This function exists because we want to treat all ghstack head branches
// as one branch when it comes to finding similar failures. A legit failure
// coming from the same job but different commits in the stack shouldn't be
// treated as a flaky similar failure
const branchANoGhstack = branchA.replace(GHSTACK_SUFFIX_REGEX, replaceWith);
const branchBNoGhstack = branchB.replace(GHSTACK_SUFFIX_REGEX, replaceWith);
async function getAuthor(job: RecentWorkflowsData): Promise<string> {
const query = `
SELECT
w.head_commit.author.email
FROM
commons.workflow_run w
WHERE
w.head_commit.id = :sha
LIMIT
1
`;
const rocksetClient = getRocksetClient();
const results = (
await rocksetClient.queries.query({
sql: {
query: query,
parameters: [
{
name: "sha",
type: "string",
value: job.head_sha,
},
],
},
})
).results;
return results !== undefined && results.length === 1 ? results[0].email : "";
}

return branchANoGhstack === branchBNoGhstack;
export async function isSameAuthor(
job: RecentWorkflowsData,
failure: RecentWorkflowsData
): Promise<boolean> {
const jobAuthor = job.authorEmail ? job.authorEmail : await getAuthor(job);
const failureAuthor = failure.authorEmail
? failure.authorEmail
: await getAuthor(failure);

// This function exists because we don't want to wrongly count similar failures
// from commits of the same author as flaky. Some common cases include:
// * ghstack
// * Draft commit
// * Cherry picking
return (
jobAuthor !== "" && failureAuthor !== "" && jobAuthor === failureAuthor
);
}

export function isSameFailure(
Expand Down
4 changes: 4 additions & 0 deletions torchci/lib/searchUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ export async function searchSimilarFailures(
failureLines: [data.torchci_classification.line],
failureLineNumbers: [data.torchci_classification.line_num],
failureCaptures: data.torchci_classification.captures,
// NB: The author information, unfortunately, is not available atm in
// torchci-workflow-job DynamoDB table. We might be able to update the
// lambda to add it in the future though, but that's not a guarantee
authorEmail: "",
});
});

Expand Down
1 change: 1 addition & 0 deletions torchci/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export interface BasicJobData {
time?: string;
conclusion?: string;
runnerName?: string;
authorEmail?: string;
}

// Used by HUD
Expand Down
1 change: 1 addition & 0 deletions torchci/rockset/commons/__sql/commit_failed_jobs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ SELECT
j.name AS jobName,
CONCAT(w.name, ' / ', j.name) AS name,
j.runner_name AS runnerName,
w.head_commit.author.email as authorEmail,
j.conclusion,
j.completed_at,
j.html_url,
Expand Down
3 changes: 3 additions & 0 deletions torchci/rockset/commons/__sql/commit_jobs_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ WITH
job.torchci_classification.captures,
job.torchci_classification.line_num,
job.runner_name AS runner_name,
workflow.head_commit.author.email AS authorEmail,
FROM
workflow_job job
INNER JOIN workflow_run workflow ON workflow.id = job.run_id
Expand Down Expand Up @@ -93,6 +94,7 @@ WITH
null,
-- Don't care about runner name from CircleCI
null AS runner_name,
null AS authorEmail,
FROM
circleci.job job
WHERE
Expand All @@ -119,6 +121,7 @@ SELECT
ARRAY_CREATE(line_num) AS failureLineNumbers,
captures AS failureCaptures,
runner_name AS runnerName,
authorEmail,
time,
FROM
job
Expand Down
2 changes: 2 additions & 0 deletions torchci/rockset/commons/__sql/recent_pr_workflows_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ SELECT
w.id AS workflowId,
j.id,
j.runner_name AS runnerName,
w.head_commit.author.email as authorEmail,
CONCAT(w.name, ' / ', j.name) AS name,
j.name AS jobName,
j.conclusion,
Expand All @@ -43,6 +44,7 @@ SELECT
null AS workflowId,
w.id,
null AS runnerName,
w.head_commit.author.email as authorEmail,
w.name AS name,
w.name AS jobName,
w.conclusion,
Expand Down
8 changes: 4 additions & 4 deletions torchci/rockset/prodVersions.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"commons": {
"annotated_flaky_jobs": "bd991c8c9782f339",
"hud_query": "69f0bc9a618c82b1",
"commit_jobs_query": "8457359bb5183034",
"commit_jobs_query": "f69bca0b65bd7919",
"disabled_non_flaky_tests": "f909abf9eec15b56",
"commit_failed_jobs": "985d62570a63388d",
"commit_failed_jobs": "250f069bcb911363",
"filter_forced_merge_pr": "a28350c863e36239",
"flaky_tests": "9f7a1abcebb7f027",
"flaky_tests_across_jobs": "474e5454bda0c5bb",
Expand All @@ -23,7 +23,7 @@
"issue_query": "e4d338de89980044",
"failure_samples_query": "7940a636284d0752",
"num_commits_master": "e4a864147cf3bf44",
"recent_pr_workflows_query": "0a22b6a523f96bd7",
"recent_pr_workflows_query": "c82822b2ea0ad920",
"reverted_prs_with_reason": "751f01cba16364f0",
"unclassified": "1b31a2d8f4ab7230",
"test_insights_overview": "42dbd5232f45fd53",
Expand Down Expand Up @@ -99,4 +99,4 @@
"validation_jobs_red": "ac8dee6e6b76916d",
"validation_jobs_red_past_day": "aecb798a574ba2ff"
}
}
}
5 changes: 5 additions & 0 deletions torchci/test/drciUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import {
isExcludedFromFlakiness,
} from "../lib/drciUtils";
import * as searchUtils from "../lib/searchUtils";
import * as jobUtils from "../lib/jobUtils";
import { JobData, RecentWorkflowsData } from "lib/types";
import nock from "nock";
import dayjs from "dayjs";
import { Client } from "@opensearch-project/opensearch";
import * as utils from "./utils";

nock.disableNetConnect();

Expand Down Expand Up @@ -204,6 +206,9 @@ describe("Test various utils used by Dr.CI", () => {

const mock = jest.spyOn(searchUtils, "searchSimilarFailures");
mock.mockImplementation(() => Promise.resolve({ jobs: [] }));
const mockJobUtils = jest.spyOn(jobUtils, "isSameAuthor");
mockJobUtils.mockImplementation(() => Promise.resolve(false));

// Found no similar job
expect(
await hasSimilarFailures(
Expand Down
46 changes: 23 additions & 23 deletions torchci/test/jobUtils.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
removeJobNameSuffix,
isSameFailure,
isSameHeadBranch,
isSameAuthor,
removeCancelledJobAfterRetry,
} from "../lib/jobUtils";
import { JobData, RecentWorkflowsData, BasicJobData } from "lib/types";
Expand Down Expand Up @@ -55,30 +55,30 @@ describe("Test various job utils", () => {
).toStrictEqual("Test `run_test.py` is usable without boto3/rockset");
});

test("test isSameHeadBranch", () => {
expect(isSameHeadBranch("", "")).toEqual(false);

expect(isSameHeadBranch("mock-branch", "")).toEqual(false);

expect(isSameHeadBranch("", "mock-branch")).toEqual(false);

expect(isSameHeadBranch("mock-branch", "mock-branch")).toEqual(true);

expect(isSameHeadBranch("ciflow/trunk/1", "ciflow/trunk/2")).toEqual(false);

expect(isSameHeadBranch("ciflow/trunk/1", "ciflow/trunk/1")).toEqual(true);

expect(isSameHeadBranch("gh/user/1/head", "gh/user/2/head")).toEqual(true);

expect(isSameHeadBranch("gh/user/1/head", "gh/user/1/head")).toEqual(true);
test("test isSameAuthor", async () => {
const job: RecentWorkflowsData = {
head_sha: "123",
authorEmail: "[email protected]",
// The rest doesn't matter
id: "",
completed_at: "",
html_url: "",
failure_captures: [],
};
const failure: RecentWorkflowsData = {
head_sha: "456",
authorEmail: "[email protected]",
// The rest doesn't matter
id: "",
completed_at: "",
html_url: "",
failure_captures: [],
};

expect(
isSameHeadBranch("gh/user/1/head", "gh/another-user/2/head")
).toEqual(false);
expect(await isSameAuthor(job, failure)).toEqual(true);

expect(
isSameHeadBranch("gh/user/1/head", "gh/another-user/1/head")
).toEqual(false);
failure.authorEmail = "different.author";
expect(await isSameAuthor(job, failure)).toEqual(false);
});

test("test isSameFailure", () => {
Expand Down

1 comment on commit a4c39aa

@vercel
Copy link

@vercel vercel bot commented on a4c39aa Oct 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.