ubiquity-os-marketplace · 0x4007 · Sep 22, 2024 · Aug 8, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/src/configuration/content-evaluator-config.ts b/src/configuration/content-evaluator-config.ts
@@ -31,22 +31,6 @@ export const contentEvaluatorConfigurationType = Type.Object({
           role: ["ISSUE_SPECIFICATION"],
           relevance: 1,
         },
-        {
-          role: ["PULL_AUTHOR"],
-          relevance: 1,
-        },
-        {
-          role: ["PULL_ASSIGNEE"],
-          relevance: 1,
-        },
-        {
-          role: ["PULL_COLLABORATOR"],
-          relevance: 1,
-        },
-        {
-          role: ["PULL_CONTRIBUTOR"],
-          relevance: 1,
-        },
       ],
     }
   ),

diff --git a/src/parser/content-evaluator-module.ts b/src/parser/content-evaluator-module.ts
@@ -1,18 +1,23 @@
-import { Value } from "@sinclair/typebox/value";
 import Decimal from "decimal.js";
 import { encodingForModel, Tiktoken } from "js-tiktoken";
 import OpenAI from "openai";
-import { commentEnum, CommentType } from "../configuration/comment-types";
 import configuration from "../configuration/config-reader";
 import { OPENAI_API_KEY } from "../configuration/constants";
 import {
   ContentEvaluatorConfiguration,
   contentEvaluatorConfigurationType,
 } from "../configuration/content-evaluator-config";
-import logger from "../helpers/logger";
 import { IssueActivity } from "../issue-activity";
-import openAiRelevanceResponseSchema, { RelevancesByOpenAi } from "../types/openai-type";
 import { GithubCommentScore, Module, Result } from "./processor";
+import { Value } from "@sinclair/typebox/value";
+import { commentEnum, CommentKind, CommentType } from "../configuration/comment-types";
+import logger from "../helpers/logger";
+import {
+  openAiRelevanceResponseSchema,
+  CommentToEvaluate,
+  Relevances,
+  PrCommentToEvaluate,
+} from "../types/content-evaluator-module-type";
 
 /**
  * Evaluates and rates comments.
@@ -76,22 +81,9 @@ export class ContentEvaluatorModule implements Module {
 
   async _processComment(comments: Readonly<GithubCommentScore>[], specificationBody: string) {
     const commentsWithScore: GithubCommentScore[] = [...comments];
+    const { commentsToEvaluate, prCommentsToEvaluate } = this._splitCommentsByPrompt(commentsWithScore);
 
-    // exclude comments that have fixed relevance multiplier. e.g. review comments = 1
-    const commentsToEvaluate: { id: number; comment: string }[] = [];
-    for (let i = 0; i < commentsWithScore.length; i++) {
-      const currentComment = commentsWithScore[i];
-      if (!this._fixedRelevances[currentComment.type]) {
-        commentsToEvaluate.push({
-          id: currentComment.id,
-          comment: currentComment.content,
-        });
-      }
-    }
-
-    const relevancesByAI = commentsToEvaluate.length
-      ? await this._evaluateComments(specificationBody, commentsToEvaluate)
-      : {};
+    const relevancesByAI = await this._evaluateComments(specificationBody, commentsToEvaluate, prCommentsToEvaluate);
 
     if (Object.keys(relevancesByAI).length !== commentsToEvaluate.length) {
       console.error("Relevance / Comment length mismatch! \nWill use 1 as relevance for missing comments.");
@@ -133,14 +125,60 @@ export class ContentEvaluatorModule implements Module {
     }, {});
   }
 
+  _splitCommentsByPrompt(commentsWithScore: Readonly<GithubCommentScore>[]): {
+    commentsToEvaluate: CommentToEvaluate[];
+    prCommentsToEvaluate: PrCommentToEvaluate[];
+  } {
+    const commentsToEvaluate: CommentToEvaluate[] = [];
+    const prCommentsToEvaluate: PrCommentToEvaluate[] = [];
+    for (let i = 0; i < commentsWithScore.length; i++) {
+      const currentComment = commentsWithScore[i];
+      if (!this._fixedRelevances[currentComment.type]) {
+        if (currentComment.type & CommentKind.PULL) {
+          prCommentsToEvaluate.push({
+            id: currentComment.id,
+            comment: currentComment.content,
+            diffHunk: currentComment?.diffHunk,
+          });
+        } else {
+          commentsToEvaluate.push({
+            id: currentComment.id,
+            comment: currentComment.content,
+          });
+        }
+      }
+    }
+    return { commentsToEvaluate, prCommentsToEvaluate };
+  }
+
   async _evaluateComments(
     specification: string,
-    comments: { id: number; comment: string }[]
-  ): Promise<RelevancesByOpenAi> {
-    const prompt = this._generatePrompt(specification, comments);
-    const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
-    const maxTokens = this._calculateMaxTokens(dummyResponse);
+    comments: CommentToEvaluate[],
+    prComments: PrCommentToEvaluate[]
+  ): Promise<Relevances> {
+    let commentRelevances: Relevances = {};
+    let prCommentRelevances: Relevances = {};
+
+    if (comments.length) {
+      const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
+      const maxTokens = this._calculateMaxTokens(dummyResponse);
+
+      const promptForComments = this._generatePromptForComments(specification, comments);
+      commentRelevances = await this._submitPrompt(promptForComments, maxTokens);
+    }
+
+    if (prComments.length) {
+      const dummyResponse = JSON.stringify(this._generateDummyResponse(prComments), null, 2);
+      const maxTokens = this._calculateMaxTokens(dummyResponse);
+
+      const promptForPrComments = this._generatePromptForPrComments(specification, prComments);
+      prCommentRelevances = await this._submitPrompt(promptForPrComments, maxTokens);
+    }
+
+    return { ...commentRelevances, ...prCommentRelevances };
+  }
 
+  async _submitPrompt(prompt: string, maxTokens: number): Promise<Relevances> {
     const response: OpenAI.Chat.ChatCompletion = await this._openAi.chat.completions.create({
       model: this._configuration?.openAi.model || "gpt-4o-2024-08-06",
       response_format: { type: "json_object" },
@@ -172,7 +210,7 @@ export class ContentEvaluatorModule implements Module {
     }
   }
 
-  _generatePrompt(issue: string, comments: { id: number; comment: string }[]) {
+  _generatePromptForComments(issue: string, comments: CommentToEvaluate[]) {
     if (!issue?.length) {
       throw new Error("Issue specification comment is missing or empty");
     }
@@ -182,4 +220,15 @@ export class ContentEvaluatorModule implements Module {
       comments.length
     }.`;
   }
+
+  _generatePromptForPrComments(issue: string, comments: PrCommentToEvaluate[]) {
+    if (!issue?.length) {
+      throw new Error("Issue specification comment is missing or empty");
+    }
+    return `I need to evaluate the value of a GitHub contributor's comments in a pull request. Some of these comments are code review comments, and some are general suggestions or a part of the discussion. I'm interested in how much each comment helps to solve the GitHub issue and improve code quality. Please provide a float between 0 and 1 to represent the value of each comment. A score of 1 indicates that the comment is very valuable and significantly improves the submitted solution and code quality, whereas a score of 0 indicates a negative or zero impact. A stringified JSON is given below that contains the specification of the GitHub issue, and comments by different contributors. The property "diffHunk" presents the chunk of code being addressed for a possible change in a code review comment. \n\n\`\`\`\n${JSON.stringify(
+      { specification: issue, comments: comments }
+    )}\n\`\`\`\n\n\nTo what degree are each of the comments valuable? Please reply with ONLY a JSON where each key is the comment ID given in JSON above, and the value is a float number between 0 and 1 corresponding to the comment. The float number should represent the value of the comment for improving the issue solution and code quality. The total number of properties in your JSON response should equal exactly ${
+      comments.length
+    }.`;
+  }
 }
diff --git a/src/parser/data-purge-module.ts b/src/parser/data-purge-module.ts
@@ -3,6 +3,7 @@ import configuration from "../configuration/config-reader";
 import { DataPurgeConfiguration, dataPurgeConfigurationType } from "../configuration/data-purge-config";
 import { IssueActivity } from "../issue-activity";
 import { Module, Result } from "./processor";
+import { GitHubPullRequestReviewComment } from "../github-types";
 
 /**
  * Removes the data in the comments that we do not want to be processed.
@@ -29,6 +30,9 @@ export class DataPurgeModule implements Module {
           // Keep only one new line needed by markdown-it package to convert to html
           .replace(/\n\s*\n/g, "\n")
           .trim();
+
+        const reviewComment = comment as GitHubPullRequestReviewComment;
+
         if (newContent.length) {
           result[comment.user.login].comments = [
             ...(result[comment.user.login].comments ?? []),
@@ -37,6 +41,7 @@ export class DataPurgeModule implements Module {
               content: newContent,
               url: comment.html_url,
               type: comment.type,
+              diffHunk: reviewComment?.pull_request_review_id ? reviewComment?.diff_hunk : undefined,
             },
           ];
         }

diff --git a/src/parser/processor.ts b/src/parser/processor.ts
@@ -94,6 +94,7 @@ export interface GithubCommentScore {
   content: string;
   url: string;
   type: CommentKind | CommentAssociation;
+  diffHunk?: string;
   score?: {
     formatting?: {
       content: Record<string, { symbols: { [p: string]: { count: number; multiplier: number } }; score: number }>;

diff --git a/src/types/content-evaluator-module-type.ts b/src/types/content-evaluator-module-type.ts
@@ -0,0 +1,9 @@
+import { Type, Static } from "@sinclair/typebox";
+
+export type CommentToEvaluate = { id: number; comment: string };
+
+export type PrCommentToEvaluate = { id: number; comment: string; diffHunk?: string };
+
+export const openAiRelevanceResponseSchema = Type.Record(Type.String(), Type.Number({ minimum: 0, maximum: 1 }));
+
+export type Relevances = Static<typeof openAiRelevanceResponseSchema>;
diff --git a/src/types/openai-type.ts b/src/types/openai-type.ts
diff --git a/tests/__mocks__/handlers.ts b/tests/__mocks__/handlers.ts
@@ -1,17 +1,25 @@
 import { http, HttpResponse } from "msw";
 import { db } from "./db";
+import issue5Get from "./routes/issue-5-conversation-rewards/issue-5-get.json";
 import issue22CommentsGet from "./routes/issue-22-comments-get.json";
+import issue5EventsGet from "./routes/issue-5-conversation-rewards/issue-5-events-get.json";
 import issue22Get from "./routes/issue-22-get.json";
 import issue25CommentsGet from "./routes/issue-25-comments-get.json";
 import issue69EventsGet from "./routes/issue-69-events-get.json";
 import issue69CommentsGet from "./routes/issue-69-comments-get.json";
 import issue69Get from "./routes/issue-69-get.json";
 import issueEvents2Get from "./routes/issue-events-2-get.json";
 import issueEventsGet from "./routes/issue-events-get.json";
+import issue5CommentsGet from "./routes/issue-5-conversation-rewards/issue-5-comments-get.json";
+import issue12CommentsGet from "./routes/pull-12-conversation-rewards/issue-12-comments-get.json";
+import pull12Get from "./routes/pull-12-conversation-rewards/pull-12-get.json";
+import pull12ReviewsGet from "./routes/pull-12-conversation-rewards/pull-12-reviews-get.json";
+import pull12CommentsGet from "./routes/pull-12-conversation-rewards/pull-12-comments-get.json";
 import issueTimelineGet from "./routes/issue-timeline-get.json";
 import issue69TimelineGet from "./routes/issue-69-timeline-get.json";
 import issue70CommentsGet from "./routes/issue-70-comments-get.json";
 import pullsCommentsGet from "./routes/pulls-comments-get.json";
+import issue5TimelineGet from "./routes/issue-5-conversation-rewards/issue-5-timeline-get.json";
 import pullsGet from "./routes/pulls-get.json";
 import pulls70Get from "./routes/issue-70-get.json";
 import pullsReviewsGet from "./routes/pulls-reviews-get.json";
@@ -20,6 +28,31 @@ import pullsReviewsGet from "./routes/pulls-reviews-get.json";
  * Intercepts the routes and returns a custom payload
  */
 export const handlers = [
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/issues/5", () => {
+    return HttpResponse.json(issue5Get);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/issues/5/events", () => {
+    return HttpResponse.json(issue5EventsGet);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/issues/5/comments", () => {
+    return HttpResponse.json(issue5CommentsGet);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/issues/12/comments", () => {
+    return HttpResponse.json(issue12CommentsGet);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/issues/5/timeline", () => {
+    return HttpResponse.json(issue5TimelineGet);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/pulls/12", () => {
+    return HttpResponse.json(pull12Get);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/pulls/12/reviews", () => {
+    return HttpResponse.json(pull12ReviewsGet);
+  }),
+  http.get("https://api.github.com/repos/ubiquibot/conversation-rewards/pulls/12/comments", () => {
+    return HttpResponse.json(pull12CommentsGet);
+  }),
+
   http.get("https://api.github.com/repos/ubiquibot/comment-incentives/issues/22", () => {
     return HttpResponse.json(issue22Get);
   }),