pytorch · PaliC · Oct 19, 2023 · Oct 19, 2023
diff --git a/aws/lambda/log-classifier/src/engine.rs b/aws/lambda/log-classifier/src/engine.rs
@@ -37,10 +37,20 @@ pub fn evaluate_rule(rule: &Rule, log: &Log) -> Option<Match> {
 
 /// Evaluate the ruleset against `log`. Returns the highest-priority match, or
 /// None if no rule matched.
-pub fn evaluate_ruleset(ruleset: &RuleSet, log: &Log) -> Option<Match> {
+pub fn evaluate_ruleset_by_priority(ruleset: &RuleSet, log: &Log) -> Option<Match> {
     ruleset
         .rules
         .par_iter()
         .flat_map(|rule| evaluate_rule(rule, log))
         .max_by(|a, b| a.rule.priority.cmp(&b.rule.priority))
 }
+
+/// Evaluate the ruleset against `log`. Returns the match with the highest line number, or
+/// None if no rule matched.
+pub fn evaluate_ruleset_by_position(ruleset: &RuleSet, log: &Log) -> Option<Match> {
+    ruleset
+        .rules
+        .par_iter()
+        .flat_map(|rule| evaluate_rule(rule, log))
+        .max_by(|a, b| a.line_number.cmp(&b.line_number))
+}
diff --git a/aws/lambda/log-classifier/src/main.rs b/aws/lambda/log-classifier/src/main.rs
@@ -4,7 +4,8 @@ use anyhow::Result;
 use std::time::Instant;
 use tracing::info;
 
-use log_classifier::engine::evaluate_ruleset;
+use log_classifier::engine::evaluate_ruleset_by_position;
+use log_classifier::engine::evaluate_ruleset_by_priority;
 use log_classifier::log::Log;
 use log_classifier::network::{
     download_log, get_dynamo_client, get_s3_client, upload_classification_dynamo,
@@ -37,7 +38,7 @@ async fn handle(
     // Run the matching
     let start = Instant::now();
     let ruleset = RuleSet::new_from_config();
-    let maybe_match = evaluate_ruleset(&ruleset, &log);
+    let maybe_match = evaluate_ruleset_by_priority(&ruleset, &log);
     info!("evaluate: {:?}", start.elapsed());
 
     match maybe_match {
@@ -126,9 +127,12 @@ mod test {
             "
             .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 1);
         assert_eq!(match_.rule.name, "foo");
+        let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
+        assert_eq!(match_position_.line_number, 1);
+        assert_eq!(match_position_.rule.name, "foo");
     }
 
     #[test]
@@ -142,9 +146,12 @@ mod test {
             "
             .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 1);
         assert_eq!(match_.rule.name, "foo");
+        let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
+        assert_eq!(match_position_.line_number, 1);
+        assert_eq!(match_position_.rule.name, "foo");
     }
 
     #[test]
@@ -159,7 +166,7 @@ mod test {
             "
             .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 2);
         assert_eq!(match_.rule.name, "higher priority");
     }
@@ -176,24 +183,10 @@ mod test {
             "
                 .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log);
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log);
         assert!(match_.is_none());
-    }
-
-    #[test]
-    fn match_before_ignore() {
-        let mut ruleset = RuleSet::new();
-        ruleset.add("test", r"^test");
-        let log = Log::new(
-            "\
-            testt\n\
-            =================== sccache compilation log ===================\n\
-            =========== If your build fails, please take a look at the log above for possible reasons ===========\n\
-            "
-                .into(),
-        );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
-        assert_eq!(match_.line_number, 1);
+        let match_position_ = evaluate_ruleset_by_position(&ruleset, &log);
+        assert!(match_position_.is_none());
     }
 
     #[test]
@@ -208,8 +201,10 @@ mod test {
             "
                 .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 3);
+        let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
+        assert_eq!(match_position_.line_number, 3);
     }
 
     #[test]
@@ -223,7 +218,7 @@ mod test {
             "
             .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 2);
     }
 
@@ -249,7 +244,7 @@ mod test {
             "
             .into(),
         );
-        let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
+        let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
         assert_eq!(match_.line_number, 4);
 
         let match_json = SerializedMatch::new(&match_, &log, 12);
@@ -265,4 +260,55 @@ mod test {
     //    let foo = handle(12421522599, "pytorch/vision", ShouldWriteDynamo(false)).await;
     //    panic!("{:#?}", foo);
     // }
+    #[test]
+    fn test_evaluate_ruleset_by_position_smoke_test() {
+        let ruleset = RuleSet {
+            rules: vec![Rule {
+                name: "test".into(),
+                pattern: r"^test".parse().unwrap(),
+                priority: 100,
+            }],
+        };
+        let log = Log::new(
+            "\
+            testt\n\
+            test foo\n\
+            "
+            .into(),
+        );
+
+        let match_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
+
+        assert_eq!(match_.line_number, 2);
+    }
+
+    #[test]
+    fn test_evaluate_ruleset_by_position_later_line_wins() {
+        let ruleset = RuleSet {
+            rules: vec![
+                Rule {
+                    name: "test".into(),
+                    pattern: r"^test".parse().unwrap(),
+                    priority: 100,
+                },
+                Rule {
+                    name: "foo".into(),
+                    pattern: r"^foo".parse().unwrap(),
+                    priority: 1000,
+                },
+            ],
+        };
+        let log = Log::new(
+            "\
+            test foo\n\
+            testt\n\
+            "
+            .into(),
+        );
+
+        let match_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
+
+        assert_eq!(match_.line_number, 2);
+        assert_eq!(match_.rule.name, "test");
+    }
 }
diff --git a/torchci/components/FilteredJobList.tsx b/torchci/components/FilteredJobList.tsx
@@ -50,10 +50,9 @@ export default function FilteredJobList({
   pred: (job: JobData) => boolean;
   showClassification?: boolean;
 }) {
-  const filteredJobs = jobs.filter(pred);
-
   const router = useRouter();
   const { repoOwner, repoName } = router.query;
+  const filteredJobs = jobs.filter(pred);
   const { data } = useSWR(
     showClassification
       ? `/api/job_annotation/${repoOwner}/${repoName}/annotations/${encodeURIComponent(
@@ -69,6 +68,7 @@ export default function FilteredJobList({
   if (filteredJobs.length === 0) {
     return null;
   }
+
   return (
     <div>
       <h2>{filterName}</h2>

diff --git a/torchci/components/JobLinks.tsx b/torchci/components/JobLinks.tsx
@@ -61,11 +61,11 @@ export default function JobLinks({ job }: { job: JobData }) {
       <TestInsightsLink job={job} separator={" | "} />
       <DisableTest job={job} label={"skipped"} />
       {authenticated && <UnstableJob job={job} label={"unstable"} />}
-      {authenticated && job.failureLine && (
+      {authenticated && job.failureLines && (
         <ReproductionCommand
           job={job}
           separator={" | "}
-          testName={getTestName(job.failureLine)}
+          testName={getTestName(job.failureLines[0])}
         />
       )}
     </span>
@@ -98,7 +98,8 @@ This test was disabled because it is failing on main branch ([recent examples]($
 }
 
 function DisableTest({ job, label }: { job: JobData; label: string }) {
-  const hasFailureClassification = job.failureLine != null;
+  const hasFailureClassification =
+    job.failureLines != null && job.failureLines.every((line) => line !== null);
   const swrKey = hasFailureClassification ? `/api/issue/${label}` : null;
   const { data } = useSWR(swrKey, fetcher, {
     // Set a 60s cache for the request, so that lots of tooltip hovers don't
@@ -114,7 +115,10 @@ function DisableTest({ job, label }: { job: JobData; label: string }) {
     return null;
   }
 
-  const testName = getTestName(job.failureLine!);
+  const testName =
+    job.failureLines && job.failureLines[0]
+      ? getTestName(job.failureLines[0])
+      : null;
   // - The failure classification is not a python unittest or pytest failure.
   if (testName === null) {
     return null;

diff --git a/torchci/components/LogViewer.tsx b/torchci/components/LogViewer.tsx
@@ -194,13 +194,22 @@ function Log({ url, line }: { url: string; line: number | null }) {
 export default function LogViewer({
   job,
   logRating = LogAnnotation.NULL,
-  showAnnotationToggle = false,
+  showAnnotationToggle = process.env.DEBUG_LOG_CLASSIFIER === "true",
+  maxNumFailureLines = process.env.DEBUG_LOG_CLASSIFIER === "true" ? 2 : 1,
 }: {
   job: JobData;
   logRating?: LogAnnotation;
   showAnnotationToggle?: boolean;
+  maxNumFailureLines?: number;
 }) {
-  const [showLogViewer, setShowLogViewer] = useState(false);
+  // @TODO: PaliC
+  // const numFailureLines =
+  //   Math.min(job.failureLines?.length || 0, maxNumFailureLines);
+  // we will replace this with the code above once we support having multiple failure lines in rockset
+  const numFailureLines = maxNumFailureLines;
+  const [showLogViewer, setShowLogViewer] = useState<boolean[]>(
+    Array.from({ length: numFailureLines }, () => false)
+  );
 
   useEffect(() => {
     document.addEventListener("copy", (e) => {
@@ -212,29 +221,43 @@ export default function LogViewer({
       e.preventDefault();
     });
   });
-
-  if (!job.failureLine && !isFailure(job.conclusion)) {
+  if (!job.failureLines && !isFailure(job.conclusion)) {
     return null;
   }
 
-  function handleClick() {
-    setShowLogViewer(!showLogViewer);
-  }
+  const toggleLogViewer = (index: number) => {
+    // Make a copy of the current array state
+    const updatedShowLogViewer = [...showLogViewer];
 
+    // Toggle the boolean value at the given index
+    updatedShowLogViewer[index] = !updatedShowLogViewer[index];
+
+    // Update the state
+    setShowLogViewer(updatedShowLogViewer);
+  };
   return (
     <div>
-      <button
-        style={{ background: "none", cursor: "pointer" }}
-        onClick={handleClick}
-      >
-        {showLogViewer ? "▼ " : "▶ "}
-        <code>{job.failureLine ?? "Show log"}</code>
-      </button>
-      {showLogViewer && <Log url={job.logUrl!} line={job.failureLineNumber!} />}
+      {showLogViewer.map((show, index) => (
+        <div key={index}>
+          <button
+            style={{ background: "none", cursor: "pointer" }}
+            onClick={() => toggleLogViewer(index)}
+          >
+            {show ? "▼ " : "▶ "}
+            <code>
+              {(job.failureLines && job.failureLines[index]) ?? "Show log"}
+            </code>
+          </button>
+          {show && (
+            <Log url={job.logUrl!} line={job.failureLineNumbers![index]} />
+          )}
+        </div>
+      ))}
       {showAnnotationToggle && (
         <div>
           <LogAnnotationToggle
             job={job}
+            // send in real metadata later
             log_metadata={{ job_id: "1" }}
             annotation={logRating}
           />

diff --git a/torchci/lib/drciUtils.ts b/torchci/lib/drciUtils.ts
@@ -311,7 +311,7 @@ export async function hasSimilarFailures(
       head_sha: record.sha as string,
       head_branch: record.branch as string,
       failure_captures: record.failureCaptures as string[],
-      failure_line: record.failureLine,
+      failure_lines: record.failureLines,
     };
 
     // Only count different jobs with the same failure
@@ -333,9 +333,9 @@ export function isInfraFlakyJob(job: RecentWorkflowsData): boolean {
   // the workflow summary tab
   return (
     job.conclusion === "failure" &&
-    (job.failure_line === null ||
-      job.failure_line === undefined ||
-      job.failure_line === "") &&
+    (job.failure_lines === null ||
+      job.failure_lines === undefined ||
+      job.failure_lines.join("") === "") &&
     (job.runnerName === null ||
       job.runnerName === undefined ||
       job.runnerName === "")

diff --git a/torchci/lib/searchUtils.ts b/torchci/lib/searchUtils.ts
@@ -95,10 +95,11 @@ export async function searchSimilarFailures(
       time: data.completed_at,
       conclusion: data.conclusion,
       htmlUrl: data.html_url,
-      failureLine: data.torchci_classification.line,
-      failureLineNumber: data.torchci_classification.line_num,
+      failureLines: [data.torchci_classification.line],
+      failureLineNumbers: [data.torchci_classification.line_num],
       failureCaptures: data.torchci_classification.captures,
     });
   });
+
   return { jobs: jobs };
 }
diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts
@@ -21,8 +21,8 @@ export interface JobData extends BasicJobData {
   logUrl?: string;
   durationS?: number;
   queueTimeS?: number;
-  failureLine?: string;
-  failureLineNumber?: number;
+  failureLines?: string[];
+  failureLineNumbers?: number[];
   failureCaptures?: string[];
   repo?: string;
   failureAnnotation?: string;
@@ -41,7 +41,7 @@ export interface RecentWorkflowsData extends BasicJobData {
   head_branch?: string | null;
   pr_number?: number;
   failure_captures: string[];
-  failure_line?: string | null;
+  failure_lines?: string[] | null;
 }
 
 export interface Artifact {

diff --git a/torchci/pages/api/drci/drci.ts b/torchci/pages/api/drci/drci.ts
@@ -537,8 +537,9 @@ function isFlaky(job: RecentWorkflowsData, flakyRules: FlakyRule[]): boolean {
             failureCapture.match(captureRegex)
           );
         const matchFailureLine: boolean =
-          job.failure_line != null &&
-          job.failure_line.match(captureRegex) != null;
+          job.failure_lines != null &&
+          job.failure_lines[0] != null &&
+          job.failure_lines[0].match(captureRegex) != null;
 
         // Accept both failure captures array and failure line string to make sure
         // that nothing is missing