Skip to content

Commit

Permalink
[flakybot] Add retry for fetching file (#1387)
Browse files Browse the repository at this point in the history
Add retry function for retrieving the test file and add info on the
issue to track better, since we've had some issues with labeling.

Reduce amount of logs b/c vercel cuts off logs that are too long
vercel/community#1012

I spent a long time trying to figure out how to get retries using some
other library but kept failing. If someone figures out how to do it with
another library, would be much appreciated.
  • Loading branch information
clee2000 authored Jan 13, 2023
1 parent 813fdc0 commit 4b9687c
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 44 deletions.
17 changes: 16 additions & 1 deletion torchci/lib/bot/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,24 @@ export class CachedIssueTracker extends CachedConfigTracker {

// returns undefined if the request fails
export async function fetchJSON(path: string): Promise<any> {
const result = await urllib.request(path);
const result = await retryRequest(path);
if (result.res.statusCode !== 200) {
return;
}
return JSON.parse(result.data.toString());
}

export async function retryRequest(
path: string,
numRetries: number = 3,
delay: number = 500
): Promise<urllib.HttpClientResponse<any>> {
for (let i = 0; i < numRetries; i++) {
const result = await urllib.request(path);
if (result.res.statusCode == 200) {
return result;
}
await new Promise((f) => setTimeout(f, delay));
}
return await urllib.request(path);
}
65 changes: 32 additions & 33 deletions torchci/pages/api/flaky-tests/disable.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import type { NextApiRequest, NextApiResponse } from "next";
import * as urllib from "urllib";

import { getOctokit } from "lib/github";
import fetchFlakyTests, { fetchFlakyTestsAcrossJobs } from "lib/fetchFlakyTests";
import fetchFlakyTests, {
fetchFlakyTestsAcrossJobs,
} from "lib/fetchFlakyTests";
import fetchDisabledNonFlakyTests from "lib/fetchDisabledNonFlakyTests";
import { FlakyTestData, IssueData, DisabledNonFlakyTestData } from "lib/types";
import { supportedPlatforms } from "lib/bot/verifyDisableTestIssueBot";
import fetchIssuesByLabel from "lib/fetchIssuesByLabel";
import { retryRequest } from "lib/bot/utils";
import { Octokit } from "octokit";
import dayjs from "dayjs";

const NUM_HOURS = 3;
const NUM_HOURS_ACROSS_JOBS = 72;
export const NUM_HOURS_NOT_UPDATED_BEFORE_CLOSING = 24 * 14 // 2 weeks
export const NUM_HOURS_NOT_UPDATED_BEFORE_CLOSING = 24 * 14; // 2 weeks
const owner: string = "pytorch";
const repo: string = "pytorch";

Expand Down Expand Up @@ -139,7 +140,6 @@ export async function handleNonFlakyTest(
const matchingIssues = issues.filter((issue) => issue.title === issueTitle);

if (matchingIssues.length === 0) {
console.log(`Found no matching issue for ${issueTitle}`);
return;
}

Expand All @@ -151,22 +151,15 @@ export async function handleNonFlakyTest(

// Only close the issue if the issue is not flaky and hasn't been updated in
// NUM_HOURS_NOT_UPDATED_BEFORE_CLOSING hours, defaults to 2 weeks
if (updatedAt.isBefore(dayjs().subtract(NUM_HOURS_NOT_UPDATED_BEFORE_CLOSING, "hour"))) {
console.log(
`Resolving issue ${matchingIssue.title} (${matchingIssue.html_url}) because it's not flaky ` +
`anymore after ${test.num_green} reruns and hasn't been updated in ${daysSinceLastUpdate} ` +
`days from the current date ${dayjs()}`
);
}
else {
console.log(
`According to PyTorch flaky bot, issue ${matchingIssue.title} (${matchingIssue.html_url}) ` +
`is not flaky anymore after ${test.num_green} reruns. But the issue was last updated at ` +
`${updatedAt}, which was less than ${daysSinceLastUpdate} days from the current date ${dayjs()}. ` +
`So, the issue won't be closed yet`
);
if (
updatedAt.isAfter(
dayjs().subtract(NUM_HOURS_NOT_UPDATED_BEFORE_CLOSING, "hour")
)
) {
console.log(`${matchingIssue.number} is not flaky but is too recent.`);
return;
}
console.log(`${matchingIssue.number} is not longer flaky`);

const body = `Resolving the issue because the test is not flaky anymore after ${test.num_green} reruns without ` +
`any failures and the issue hasn't been updated in ${daysSinceLastUpdate} days. Please reopen the ` +
Expand Down Expand Up @@ -268,16 +261,17 @@ ${numRedGreen}
${debuggingSteps}`;
}

export async function getTestOwnerLabels(testFile: string): Promise<string[]> {
export async function getTestOwnerLabels(
testFile: string
): Promise<{ labels: string[]; additionalErrMessage?: string }> {
const urlkey =
"https://raw.githubusercontent.com/pytorch/pytorch/master/test/";

try {
const result = await urllib.request(`${urlkey}${testFile}`);
const result = await retryRequest(`${urlkey}${testFile}`);
const statusCode = result.res.statusCode;
if (statusCode !== 200) {
console.warn(`Error retrieving test file of flaky test: ${statusCode}`);
return ["module: unknown"];
throw new Error(`Statuscode ${statusCode}`);
}
const fileContents = result.data.toString(); // data is a Buffer
const lines = fileContents.split(/[\r\n]+/);
Expand All @@ -286,7 +280,7 @@ export async function getTestOwnerLabels(testFile: string): Promise<string[]> {
if (line.startsWith(prefix)) {
const labels: string[] = JSON.parse(line.substring(prefix.length));
if (labels.length === 0) {
return ["module: unknown"];
return { labels: ["module: unknown"] };
}
if (
labels.some(
Expand All @@ -295,13 +289,17 @@ export async function getTestOwnerLabels(testFile: string): Promise<string[]> {
) {
labels.push("triaged");
}
return labels;
return { labels: labels };
}
}
return ["module: unknown"];
return { labels: ["module: unknown"] };
} catch (err) {
console.warn(`Error retrieving test file of flaky test: ${err}`);
return ["module: unknown"];
const errMessage = `Error retrieving ${testFile}: ${err}`;
console.warn(errMessage);
return {
labels: ["module: unknown"],
additionalErrMessage: errMessage,
};
}
}

Expand All @@ -321,15 +319,16 @@ export async function createIssueFromFlakyTest(
octokit: Octokit
): Promise<void> {
const title = getIssueTitle(test.name, test.suite);
const body = getIssueBodyForFlakyTest(test);
const labels = ["skipped", "module: flaky-tests"].concat(
await getTestOwnerLabels(test.file)
);
let body = getIssueBodyForFlakyTest(test);
const { labels, additionalErrMessage } = await getTestOwnerLabels(test.file);
if (additionalErrMessage) {
body += `\n\n${additionalErrMessage}`;
}
await octokit.rest.issues.create({
owner,
repo,
title,
body,
labels,
labels: ["skipped", "module: flaky-tests"].concat(labels),
});
}
64 changes: 54 additions & 10 deletions torchci/test/disableFlakyBot.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -541,9 +541,9 @@ describe("Disable Flaky Test Bot Unit Tests", () => {
Buffer.from(`# Owner(s): ["module: fft"]\nimport blah;\nrest of file`)
);

const labels = await disableFlakyTestBot.getTestOwnerLabels(
flakyTestA.file
);
const { labels, additionalErrMessage } =
await disableFlakyTestBot.getTestOwnerLabels(flakyTestA.file);
expect(additionalErrMessage).toEqual(undefined);
expect(labels).toEqual(["module: fft", "triaged"]);

if (!scope.isDone()) {
Expand All @@ -562,7 +562,7 @@ describe("Disable Flaky Test Bot Unit Tests", () => {
)
);

const labels = await disableFlakyTestBot.getTestOwnerLabels(
const { labels } = await disableFlakyTestBot.getTestOwnerLabels(
flakyTestA.file
);
expect(labels).toEqual(["oncall: distributed"]);
Expand All @@ -583,10 +583,10 @@ describe("Disable Flaky Test Bot Unit Tests", () => {
)
);

const labels = await disableFlakyTestBot.getTestOwnerLabels(
flakyTestA.file
);
const { labels, additionalErrMessage } =
await disableFlakyTestBot.getTestOwnerLabels(flakyTestA.file);
expect(labels).toEqual(["module: unknown"]);
expect(additionalErrMessage).toEqual(undefined);

if (!scope.isDone()) {
console.error("pending mocks: %j", scope.pendingMocks());
Expand All @@ -602,10 +602,54 @@ describe("Disable Flaky Test Bot Unit Tests", () => {
Buffer.from("line1\nline2\nline3\nstill no owners\nline4\nlastline\n")
);

const labels = await disableFlakyTestBot.getTestOwnerLabels(
flakyTestA.file
);
const { labels, additionalErrMessage } =
await disableFlakyTestBot.getTestOwnerLabels(flakyTestA.file);
expect(labels).toEqual(["module: unknown"]);
expect(additionalErrMessage).toEqual(undefined);

if (!scope.isDone()) {
console.error("pending mocks: %j", scope.pendingMocks());
}
scope.done();
});

test("getTestOwnerLabels: retry getting file fails all times", async () => {
const scope = nock("https://raw.githubusercontent.com/")
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(404)
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(404)
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(404)
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(404);

const { labels, additionalErrMessage } =
await disableFlakyTestBot.getTestOwnerLabels(flakyTestA.file);
expect(labels).toEqual(["module: unknown"]);
expect(additionalErrMessage).toEqual(
"Error retrieving file_a.py: Error: Statuscode 404"
);

if (!scope.isDone()) {
console.error("pending mocks: %j", scope.pendingMocks());
}
scope.done();
});

test("getTestOwnerLabels: retry getting file fails all times", async () => {
const scope = nock("https://raw.githubusercontent.com/")
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(404)
.get(`/pytorch/pytorch/master/test/${flakyTestA.file}`)
.reply(
200,
Buffer.from(`# Owner(s): ["module: fft"]\nimport blah;\nrest of file`)
);
const { labels, additionalErrMessage } =
await disableFlakyTestBot.getTestOwnerLabels(flakyTestA.file);
expect(labels).toEqual(["module: fft", "triaged"]);
expect(additionalErrMessage).toEqual(undefined);

if (!scope.isDone()) {
console.error("pending mocks: %j", scope.pendingMocks());
Expand Down

1 comment on commit 4b9687c

@vercel
Copy link

@vercel vercel bot commented on 4b9687c Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.