Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: refactor LLM model selection and attack surface analysis #233

Open
wants to merge 21 commits into
base: release/2.2.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
284d83b
refactor: replace GPT with LLM for vulnerability reporting
psyray Nov 10, 2024
3ad4ea6
feat: enhance LLM model selection and attack surface analysis
psyray Nov 11, 2024
82fccad
refactor(logging): improve vulnerability logging details
psyray Nov 11, 2024
b170a31
feat(ui): enhance LLM toolkit UI and refactor model management
psyray Nov 11, 2024
d6a1b4b
feat(llm): convert LLM markdown response to HTML sanitize it
psyray Nov 11, 2024
678e317
feat: enhance attack surface analysis with model selection and deleti…
psyray Nov 11, 2024
916ed10
feat: enhance markdown rendering and update UI settings
psyray Nov 11, 2024
7e87530
refactor: update LLM vulnerability report generation and storage
psyray Nov 12, 2024
e7e56c1
fix: remove unused imports
psyray Nov 12, 2024
5390b37
fix: enable section response generation
psyray Nov 12, 2024
20823c0
refactor: update fixtures and permissions, remove unused data
psyray Nov 13, 2024
0d3f6ba
feat: enhance reference handling
psyray Nov 13, 2024
f5f37f4
fix: task reference conversion
psyray Nov 13, 2024
ac29d7f
fix: update model selection logic
psyray Nov 14, 2024
c5d0496
feat: enhance model management and download functionality in LLM Toolkit
psyray Nov 14, 2024
a8bd492
feat: integrate WebSocket support for model operations
psyray Nov 17, 2024
638ea94
test: enhance OllamaManager and LLM tests with additional mock setups
psyray Nov 17, 2024
79b52e5
refactor: remove cancel download feature
psyray Nov 17, 2024
d9d7af4
feat: add model name to progress bar popup
psyray Nov 17, 2024
5d489f7
fix: update model selection API endpoint
psyray Nov 17, 2024
3809741
feat: enhance model URL handling
psyray Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/celery/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/r
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=50 --loglevel=$CELERY_LOGLEVEL -Q run_command_queue -n run_command_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q query_reverse_whois_queue -n query_reverse_whois_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q query_ip_history_queue -n query_ip_history_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q gpt_queue -n gpt_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=30 --loglevel=$CELERY_LOGLEVEL -Q llm_queue -n llm_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q dorking_queue -n dorking_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q osint_discovery_queue -n osint_discovery_worker &
watchmedo auto-restart --recursive --pattern="*.py" --directory="/home/rengine/rengine/" -- poetry run -C $HOME/ celery -A reNgine.tasks worker --pool=gevent --concurrency=10 --loglevel=$CELERY_LOGLEVEL -Q h8mail_queue -n h8mail_worker &
Expand Down
23 changes: 0 additions & 23 deletions web/api/tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
This file contains the test cases for the API views.
"""

from unittest.mock import patch
from django.utils import timezone
from django.urls import reverse
from rest_framework import status
Expand All @@ -12,7 +11,6 @@
'TestCreateProjectApi',
'TestAddReconNote',
'TestListTodoNotes',
'TestGPTAttackSuggestion'
]

class TestCreateProjectApi(BaseTestCase):
Expand Down Expand Up @@ -108,24 +106,3 @@ def test_list_todo_notes(self):
self.data_generator.todo_note.scan_history.id,
)

class TestGPTAttackSuggestion(BaseTestCase):
"""Tests for the GPT Attack Suggestion API."""

def setUp(self):
super().setUp()
self.data_generator.create_project_base()

@patch("reNgine.gpt.GPTAttackSuggestionGenerator.get_attack_suggestion")
def test_get_attack_suggestion(self, mock_get_suggestion):
"""Test getting an attack suggestion for a subdomain."""
mock_get_suggestion.return_value = {
"status": True,
"description": "Test attack suggestion",
}
api_url = reverse("api:gpt_get_possible_attacks")
response = self.client.get(
api_url, {"subdomain_id": self.data_generator.subdomain.id}
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertTrue(response.data["status"])
self.assertEqual(response.data["description"], "Test attack suggestion")
10 changes: 5 additions & 5 deletions web/api/tests/test_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

__all__ = [
'TestVulnerabilityViewSet',
'TestGPTVulnerabilityReportGenerator',
'TestLLMVulnerabilityReportGenerator',
'TestDeleteVulnerability',
'TestVulnerabilityReport',
'TestFetchMostCommonVulnerability',
Expand Down Expand Up @@ -79,16 +79,16 @@ def test_list_vulnerabilities_by_severity(self):
self.data_generator.vulnerabilities[0].name,
)

class TestGPTVulnerabilityReportGenerator(BaseTestCase):
"""Tests for the GPT Vulnerability Report Generator API."""
class TestLLMVulnerabilityReportGenerator(BaseTestCase):
"""Tests for the LLM Vulnerability Report Generator API."""

def setUp(self):
super().setUp()
self.data_generator.create_project_base()
self.data_generator.create_endpoint()
self.data_generator.create_vulnerability()

@patch("reNgine.tasks.gpt_vulnerability_description.apply_async")
@patch("reNgine.tasks.llm_vulnerability_description.apply_async")
def test_get_vulnerability_report(self, mock_apply_async):
"""Test generating a vulnerability report."""
mock_task = MagicMock()
Expand All @@ -97,7 +97,7 @@ def test_get_vulnerability_report(self, mock_apply_async):
"description": "Test vulnerability report",
}
mock_apply_async.return_value = mock_task
api_url = reverse("api:gpt_vulnerability_report_generator")
api_url = reverse("api:llm_vulnerability_report_generator")
response = self.client.get(
api_url, {"id": self.data_generator.vulnerabilities[0].id}
)
Expand Down
16 changes: 10 additions & 6 deletions web/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,17 @@
GfList.as_view(),
name='gf_list'),
path(
'tools/gpt_vulnerability_report/',
GPTVulnerabilityReportGenerator.as_view(),
name='gpt_vulnerability_report_generator'),
'tools/llm_vulnerability_report/',
LLMVulnerabilityReportGenerator.as_view(),
name='llm_vulnerability_report_generator'),
path(
'tools/gpt_get_possible_attacks/',
GPTAttackSuggestion.as_view(),
name='gpt_get_possible_attacks'),
'tools/llm_get_possible_attacks/',
LLMAttackSuggestion.as_view(),
name='llm_get_possible_attacks'),
path(
'tools/llm_models/',
LLMModelsManager.as_view(),
name='llm_models_manager'),
path(
'github/tool/get_latest_releases/',
GithubToolCheckGetLatestRelease.as_view(),
Expand Down
80 changes: 68 additions & 12 deletions web/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import socket
from ipaddress import IPv4Network
from collections import defaultdict
from datetime import datetime

import requests
import validators
Expand All @@ -30,23 +31,27 @@
get_interesting_endpoints,
get_interesting_subdomains,
get_lookup_keywords,
safe_int_cast
safe_int_cast,
get_open_ai_key,
)
from reNgine.definitions import (
ABORTED_TASK,
OLLAMA_INSTANCE,
NUCLEI_SEVERITY_MAP,
DEFAULT_GPT_MODELS,
RUNNING_TASK,
SUCCESS_TASK
)
from reNgine.llm.config import (
OLLAMA_INSTANCE,
DEFAULT_GPT_MODELS,
MODEL_REQUIREMENTS
)
from reNgine.settings import (
RENGINE_CURRENT_VERSION,
RENGINE_TOOL_GITHUB_PATH
)
from reNgine.tasks import (
create_scan_activity,
gpt_vulnerability_description,
llm_vulnerability_description,
initiate_subscan,
query_ip_history,
query_reverse_whois,
Expand All @@ -57,7 +62,7 @@
run_wafw00f,
send_hackerone_report
)
from reNgine.gpt import GPTAttackSuggestionGenerator
from reNgine.llm.llm import LLMAttackSuggestionGenerator
from reNgine.utilities import is_safe_path, remove_lead_and_trail_slash
from scanEngine.models import EngineType, InstalledExternalTool
from startScan.models import (
Expand Down Expand Up @@ -169,17 +174,16 @@
defaults={
'selected_model': model_name,
'use_ollama': use_ollama,
'selected': True
}
)
return Response({'status': True})
except Exception as e:
logger.error(f"Error in OllamaManager PUT: {str(e)}")
return Response({'status': False, 'message': 'An error occurred while updating Ollama settings.'}, status=500)

class GPTAttackSuggestion(APIView):
class LLMAttackSuggestion(APIView):
def get(self, request):
req = self.request
req = request
subdomain_id = safe_int_cast(req.query_params.get('subdomain_id'))
if not subdomain_id:
return Response({
Expand All @@ -195,11 +199,11 @@
})

if subdomain.attack_surface:
return Response({
'status': True,
'subdomain_name': subdomain.name,
'description': subdomain.attack_surface
})

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium

Stack trace information
flows to this location and may be exposed to an external user.

ip_addrs = subdomain.ip_addresses.all()
open_ports = ', '.join(f'{port.number}/{port.service_name}' for ip in ip_addrs for port in ip.ports.all())
Expand All @@ -216,18 +220,18 @@
Page Content Length: {subdomain.content_length}
'''

gpt = GPTAttackSuggestionGenerator()
response = gpt.get_attack_suggestion(input_data)
llm = LLMAttackSuggestionGenerator()
response = llm.get_attack_suggestion(input_data)
response['subdomain_name'] = subdomain.name

if response.get('status'):
subdomain.attack_surface = response.get('description')
subdomain.save()

return Response(response)

Check warning

Code scanning / CodeQL

Information exposure through an exception Medium

Stack trace information
flows to this location and may be exposed to an external user.


class GPTVulnerabilityReportGenerator(APIView):
class LLMVulnerabilityReportGenerator(APIView):
def get(self, request):
req = self.request
vulnerability_id = safe_int_cast(req.query_params.get('id'))
Expand All @@ -236,7 +240,7 @@
'status': False,
'error': 'Missing GET param Vulnerability `id`'
})
task = gpt_vulnerability_description.apply_async(args=(vulnerability_id,))
task = llm_vulnerability_description.apply_async(args=(vulnerability_id,))
response = task.wait()
return Response(response)

Expand Down Expand Up @@ -2892,3 +2896,55 @@
print(e)

return qs

class LLMModelsManager(APIView):
def get(self, request):
"""Get all available LLM models (GPT and Ollama) and currently selected model"""
try:
# Get default GPT models
all_models = DEFAULT_GPT_MODELS.copy()

# Get Ollama models
try:
response = requests.get(f'{OLLAMA_INSTANCE}/api/tags')
if response.status_code == 200:
ollama_models = response.json().get('models', [])
date_format = "%Y-%m-%dT%H:%M:%S"
all_models.extend([{
**model,
'modified_at': datetime.strptime(model['modified_at'].split('.')[0], date_format),
'is_local': True,
} for model in ollama_models])
except Exception as e:
logger.error(f"Error fetching Ollama models: {str(e)}")

# Get currently selected model
selected_model = OllamaSettings.objects.first()
selected_model_name = selected_model.selected_model if selected_model else 'gpt-3.5-turbo'

# Mark selected model
for model in all_models:
if model['name'] == selected_model_name:
model['selected'] = True

# Add model capabilities
for model in all_models:
# Strip tags from model name (e.g., "llama2:latest" -> "llama2")
base_model_name = model['name'].split(':')[0]
if base_model_name in MODEL_REQUIREMENTS:
model['capabilities'] = MODEL_REQUIREMENTS[base_model_name]

return Response({
'status': True,
'models': all_models,
'selected_model': selected_model_name,
'openai_key_error': not get_open_ai_key() and 'gpt' in selected_model_name
})

except Exception as e:
logger.error(f"Error in LLMModelsManager GET: {str(e)}")
return Response({
'status': False,
'error': 'Failed to fetch LLM models',
'message': str(e)
psyray marked this conversation as resolved.
Show resolved Hide resolved
}, status=500)
Fixed Show fixed Hide fixed
2 changes: 1 addition & 1 deletion web/config/default_yaml_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ vulnerability_scan: {
'rate_limit': 150,
'retries': 1,
'timeout': 5,
'fetch_gpt_report': true,
'fetch_llm_report': true,
'nuclei': {
'use_nuclei_config': false,
'severities': ['unknown', 'info', 'low', 'medium', 'high', 'critical'],
Expand Down
2 changes: 1 addition & 1 deletion web/dashboard/templates/dashboard/onboarding.html
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ <h4 class="mt-3 mt-lg-0">Default API Keys</h4>
<p class="text-muted mb-4">If you have API keys for these services, please enter them here.</p>
<div class="mb-3">
<label for="key_openai" class="form-label">OpenAI <span class="ms-1 badge bg-soft-danger text-danger">🔥 Recommended</span><span class="ms-1 badge bg-soft-primary text-primary">Experimental</span></label>
<p class="text-muted">OpenAI keys will be used to generate vulnerability description, remediation, impact and vulnerability report writing using ChatGPT.</p>
<p class="text-muted">OpenAI keys will be used to generate vulnerability description, remediation, impact and vulnerability report writing using LLM.</p>
{% if openai_key %}
<input class="form-control" type="text" id="key_openai" name="key_openai" placeholder="Enter OpenAI Key" value="{{openai_key}}">
{% else %}
Expand Down
16 changes: 8 additions & 8 deletions web/fixtures/auth.json
Original file line number Diff line number Diff line change
Expand Up @@ -1875,36 +1875,36 @@
"model": "auth.permission",
"pk": 209,
"fields": {
"name": "Can add gpt vulnerability report",
"name": "Can add llm vulnerability report",
"content_type": 53,
"codename": "add_gptvulnerabilityreport"
"codename": "add_llmvulnerabilityreport"
}
},
{
"model": "auth.permission",
"pk": 210,
"fields": {
"name": "Can change gpt vulnerability report",
"name": "Can change llm vulnerability report",
"content_type": 53,
"codename": "change_gptvulnerabilityreport"
"codename": "change_llmvulnerabilityreport"
}
},
{
"model": "auth.permission",
"pk": 211,
"fields": {
"name": "Can delete gpt vulnerability report",
"name": "Can delete llm vulnerability report",
"content_type": 53,
"codename": "delete_gptvulnerabilityreport"
"codename": "delete_llmvulnerabilityreport"
}
},
{
"model": "auth.permission",
"pk": 212,
"fields": {
"name": "Can view gpt vulnerability report",
"name": "Can view llm vulnerability report",
"content_type": 53,
"codename": "view_gptvulnerabilityreport"
"codename": "view_llmvulnerabilityreport"
}
},
{
Expand Down
6 changes: 3 additions & 3 deletions web/fixtures/default_scan_engines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'jpg', 'jpeg', 'gif', 'mp4', 'mpeg', 'mp3'],\r\n 'threads': 30\r\n}\r\nvulnerability_scan: {\r\n
\ 'run_nuclei': true,\r\n 'run_dalfox': true,\r\n 'run_crlfuzz': true,\r\n
\ 'enable_http_crawl': true,\r\n 'concurrency': 50,\r\n 'intensity': 'normal',\r\n
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_gpt_report':
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_llm_report':
true,\r\n 'nuclei': {\r\n 'use_nuclei_config': false,\r\n 'severities': ['unknown',
'info', 'low', 'medium', 'high', 'critical']\r\n }\r\n}\r\nwaf_detection: {\r\n\r\n}\r\nscreenshot:
{\r\n 'enable_http_crawl': true,\r\n 'intensity': 'normal',\r\n 'timeout':
Expand Down Expand Up @@ -70,7 +70,7 @@
\ ],\r\n 'intensity': 'normal',\r\n 'documents_limit': 50\r\n}\r\nvulnerability_scan:
{\r\n 'run_nuclei': true,\r\n 'run_dalfox': true,\r\n 'run_crlfuzz': true,\r\n
\ 'enable_http_crawl': true,\r\n 'concurrency': 50,\r\n 'intensity': 'normal',\r\n
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_gpt_report':
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_llm_report':
true,\r\n 'nuclei': {\r\n 'use_nuclei_config': false,\r\n 'severities': ['unknown',
'info', 'low', 'medium', 'high', 'critical']\r\n }\r\n}"
default_engine: true
Expand All @@ -97,7 +97,7 @@
\ ],\r\n 'intensity': 'normal',\r\n 'documents_limit': 50\r\n}\r\nvulnerability_scan:
{\r\n 'run_nuclei': true,\r\n 'run_dalfox': true,\r\n 'run_crlfuzz': true,\r\n
\ 'enable_http_crawl': false,\r\n 'concurrency': 50,\r\n 'intensity': 'normal',\r\n
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_gpt_report':
\ 'rate_limit': 150,\r\n 'retries': 1,\r\n 'timeout': 5,\r\n 'fetch_llm_report':
true,\r\n 'nuclei': {\r\n 'use_nuclei_config': false,\r\n 'severities': ['low',
'medium', 'high', 'critical']\r\n }\r\n}"
default_engine: true
Loading
Loading