Skip to content

Commit

Permalink
Implement issue summary generation
Browse files Browse the repository at this point in the history
  • Loading branch information
arkid15r committed Sep 13, 2024
1 parent c4461a8 commit d6d85de
Show file tree
Hide file tree
Showing 16 changed files with 559 additions and 31 deletions.
23 changes: 14 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ build:
collect-static:
@CMD="poetry run python manage.py collectstatic --noinput" $(MAKE) exec-backend-command

django-shell:
@CMD="poetry run python manage.py shell" $(MAKE) exec-backend-command

dump-data:
@CMD="poetry run python manage.py dumpdata github owasp --indent=2 --output=data/nest.json" $(MAKE) exec-backend-command

Expand All @@ -16,6 +19,9 @@ github-sync-owasp-organization:
github-sync-related-repositories:
@CMD="poetry run python manage.py github_sync_related_repositories" $(MAKE) exec-backend-command

github-summarize-issues:
@CMD="poetry run python manage.py github_summarize_issues" $(MAKE) exec-backend-command

index:
@CMD="poetry run python manage.py algolia_reindex" $(MAKE) exec-backend-command

Expand Down Expand Up @@ -44,7 +50,7 @@ purge-data:
@CMD="poetry run python manage.py purge_data" $(MAKE) exec-backend-command

run:
@$(MAKE) build
@docker compose build
@docker compose up

setup:
Expand All @@ -53,16 +59,15 @@ setup:
shell:
@CMD="/bin/bash" $(MAKE) exec-backend-command

sync:
@$(MAKE) github-sync-owasp-organization
@$(MAKE) owasp-scrape-site-data
@$(MAKE) github-sync-related-repositories
@$(MAKE) owasp-update-projects
sync: \
github-sync-owasp-organization \
owasp-scrape-site-data \
github-sync-related-repositories \
github_summarize_issues \
owasp-update-projects

test:
@docker build -f backend/Dockerfile.test backend -t nest-backend-test 2>/dev/null
@docker run -e DJANGO_CONFIGURATION=Test nest-backend-test poetry run pytest 2>/dev/null

update:
@$(MAKE) sync
@$(MAKE) index
update: sync index
1 change: 1 addition & 0 deletions backend/.env/template
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ DJANGO_DB_NAME="None"
DJANGO_DB_PASSWORD="None"
DJANGO_DB_PORT="None"
DJANGO_DB_USER="None"
DJANGO_OPEN_AI_SECRET_KEY="None"
DJANGO_SECRET_KEY="None"
GITHUB_TOKEN="None"
57 changes: 57 additions & 0 deletions backend/apps/common/open_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Open AI API module."""

import logging

import openai
from django.conf import settings

logger = logging.getLogger(__name__)


class OpenAi:
"""Open AI communication class."""

def __init__(self, model="gpt-4o-mini", max_tokens=1000, temperature=0.7, prompt=None):
"""OpenAi constructor."""
self.client = openai.OpenAI(
api_key=settings.OPEN_AI_SECRET_KEY,
timeout=30, # In seconds.
)

self.max_tokens = max_tokens
self.model = model
self.temperature = temperature

if prompt:
self.set_prompt(prompt)

def set_prompt(self, content):
"""Set system role content."""
self.prompt = content

return self

def set_input(self, content):
"""Set system role content."""
self.input = content

return self

def complete(self):
"""Get API response."""
try:
response = self.client.chat.completions.create(
max_tokens=self.max_tokens,
messages=[
{"role": "system", "content": self.prompt},
{"role": "user", "content": self.input},
],
model=self.model,
temperature=self.temperature,
)

return response.choices[0].message.content
except openai.APIConnectionError:
logger.exception("A connection error occurred during OpenAI API request.")
except Exception:
logger.exception("An error occurred during OpenAI API request.")
2 changes: 1 addition & 1 deletion backend/apps/github/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class IssueAdmin(admin.ModelAdmin):
"state",
"is_locked",
)
search_fields = ("title", "description")
search_fields = ("title", "body", "summary")

def custom_field_github_url(self, obj):
"""Issue GitHub URL."""
Expand Down
4 changes: 2 additions & 2 deletions backend/apps/github/index/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class IssueIndex(AlgoliaIndex):
fields = (
"idx_author_login",
"idx_author_name",
"idx_body",
"idx_comments_count",
"idx_created_at",
"idx_labels",
Expand All @@ -34,6 +33,7 @@ class IssueIndex(AlgoliaIndex):
"idx_repository_name",
"idx_repository_stars_count",
"idx_repository_topics",
"idx_summary",
"idx_title",
"idx_updated_at",
"idx_url",
Expand Down Expand Up @@ -65,7 +65,7 @@ class IssueIndex(AlgoliaIndex):
"unordered(idx_project_description, idx_repository_description)",
"unordered(idx_project_tags, idx_repository_topics)",
"unordered(idx_author_login, idx_author_name)",
"unordered(idx_body)",
"unordered(idx_summary)",
],
}

Expand Down
48 changes: 48 additions & 0 deletions backend/apps/github/management/commands/github_summarize_issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""A command to update OWASP entities related repositories data."""

import logging

from django.core.management.base import BaseCommand

from apps.common.open_ai import OpenAi
from apps.github.models import Issue

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Populated GitHub issue summary."

def add_arguments(self, parser):
parser.add_argument("--offset", default=0, required=False, type=int)

def handle(self, *args, **options):
open_ai = OpenAi()
open_issues = Issue.open_issues.without_summary.order_by("-created_at")
open_issues_count = open_issues.count()

issues = []
offset = options["offset"]
for idx, issue in enumerate(open_issues[offset:][:10]):
prefix = f"{idx + offset + 1} of {open_issues_count - offset}"
print(f"{prefix:<10} {issue.title}")

open_ai.set_prompt(
(
"Summarize the following GitHub issue using imperative mood. "
"Add a good amount of technical details."
"Include possible first steps of tackling the problem."
)
if issue.project.is_code_type or issue.project.is_tool_type
else (
"Summarize the following GitHub issue."
"Avoid mentioning author's name or issue creation date."
"Add a hint of what needs to be done if possible."
)
)

issue.summary = open_ai.set_input(f"{issue.title}\r\n{issue.body}").complete()
issues.append(issue)

# Bulk save data.
Issue.bulk_save(issues)
17 changes: 17 additions & 0 deletions backend/apps/github/migrations/0002_issue_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 5.1.1 on 2024-09-12 19:39

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("github", "0001_initial"),
]

operations = [
migrations.AddField(
model_name="issue",
name="summary",
field=models.TextField(default="", max_length=3000, verbose_name="Summary"),
),
]
7 changes: 6 additions & 1 deletion backend/apps/github/models/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@

from apps.common.models import BulkSaveModel, TimestampedModel
from apps.github.models.common import NodeModel
from apps.github.models.managers.issue import OpenIssueManager
from apps.github.models.mixins import IssueIndexMixin


class Issue(BulkSaveModel, IssueIndexMixin, NodeModel, TimestampedModel):
"""Issue model."""

objects = models.Manager()
open_issues = OpenIssueManager()

class Meta:
db_table = "github_issues"
ordering = ("-updated_at", "-state")
Expand All @@ -21,6 +25,7 @@ class State(models.TextChoices):

title = models.CharField(verbose_name="Title", max_length=500)
body = models.TextField(verbose_name="Body", default="")
summary = models.TextField(verbose_name="Summary", max_length=3000, default="")
state = models.CharField(
verbose_name="State", max_length=20, choices=State, default=State.OPEN
)
Expand Down Expand Up @@ -83,7 +88,7 @@ def is_indexable(self):
@property
def project(self):
"""Return project."""
return self.repository.project_set.first()
return self.repository.project

@property
def repository_id(self):
Expand Down
Empty file.
26 changes: 26 additions & 0 deletions backend/apps/github/models/managers/issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""GitHub app issue managers."""

from django.db import models


class OpenIssueManager(models.Manager):
"""Open issues."""

def get_queryset(self):
"""Get queryset."""
return (
super()
.get_queryset()
.select_related(
"repository",
)
.filter(
repository__project__isnull=False,
state="open",
)
)

@property
def without_summary(self):
"""Return issues without summary."""
return self.get_queryset().filter(summary="")
12 changes: 5 additions & 7 deletions backend/apps/github/models/mixins/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@ def idx_author_name(self):
else None
)

@property
def idx_body(self):
"""Return body for indexing."""
# TODO(arkid15r): reduce noise by adding only up to 100 most popular words.
# Skip exceptions, logs, short words, etc.
return self.body[:4500] if self.body else None

@property
def idx_comments_count(self):
"""Return comments count at for indexing."""
Expand Down Expand Up @@ -101,6 +94,11 @@ def idx_repository_stars_count(self):
"""Return repository stars count for indexing."""
return self.repository.idx_stars_count

@property
def idx_summary(self):
"""Return summary for indexing."""
return self.summary if self.summary else None

@property
def idx_title(self):
"""Return title for indexing."""
Expand Down
5 changes: 5 additions & 0 deletions backend/apps/github/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def latest_release(self):
"""Repository latest release."""
return self.releases.order_by("-created_at").first()

@property
def project(self):
"""Return project."""
return self.project_set.first()

@property
def top_languages(self):
"""Return a list of top used languages."""
Expand Down
15 changes: 15 additions & 0 deletions backend/apps/owasp/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,21 @@ def __str__(self):
"""Project human readable representation."""
return f"{self.name or self.key}"

@property
def is_code_type(self):
"""Indicate whether project has CODE type."""
return self.type == self.ProjectType.CODE

@property
def is_documentation_type(self):
"""Indicate whether project has DOCUMENTATION type."""
return self.type == self.ProjectType.DOCUMENTATION

@property
def is_tool_type(self):
"""Indicate whether project has TOOL type."""
return self.type == self.ProjectType.TOOL

@property
def is_indexable(self):
"""Projects to index."""
Expand Down
Loading

0 comments on commit d6d85de

Please sign in to comment.