Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
brett-fitz authored Sep 19, 2024
1 parent 42f5528 commit 0c327b7
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 18 deletions.
13 changes: 7 additions & 6 deletions external-import/urlscan/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ If you are using it independently, remember that the connector will try to conne
| `connector_create_indicators` | `CONNECTOR_CREATE_INDICATORS` | No | Create indicators for each observable processed. |
| `connector_tlp` | `CONNECTOR_TLP` | No | The TLP to apply to any indicators and observables, this could be `white`,`green`,`amber` or `red` |
| `connector_labels` | `CONNECTOR_LABELS` | No | Comma delimited list of labels to apply to each observable. |
| `connector_interval` | `CONNECTOR_INTERVAL` | No | An interval (in minutes) for data gathering from Urlscan. |
| `urlscan_url` | `URLSCAN_URL` | Yes | The Urlscan URL. |
| `urlscan_api_key` | `URLSCAN_API_KEY` | Yes | The Urlscan client secret. |
| `urlscan_default_x_opencti_score`| `URLSCAN_DEFAULT_X_OPENCTI_SCORE`| No | The default x_opencti_score to use across observable/indicator types. Default is 50.
| `urlscan_x_opencti_score_domain` | `URLSCAN_X_OPENCTI_SCORE_DOMAIN` | No | The x_opencti_score to use across Domain-Name observable and indicators. Defaults to default score.
| `urlscan_x_opencti_score_url` | `URLSCAN_X_OPENCTI_URL` | No | The x_opencti_score to use across Url observable and indicators. Defaults to default score.
| `connector_interval` | `CONNECTOR_INTERVAL` | No | An interval (in seconds) for data gathering from Urlscan. |
| `connector_lookback` | `CONNECTOR_LOOKBACK` | No | How far to look back in days if the connector has never run or the last run is older than this value. Default is 3. You should not go above 7. |
| `urlscan_url` | `URLSCAN_URL` | Yes | The Urlscan URL. |
| `urlscan_api_key` | `URLSCAN_API_KEY` | Yes | The Urlscan client secret. |
| `urlscan_default_x_opencti_score`| `URLSCAN_DEFAULT_X_OPENCTI_SCORE`| No | The default x_opencti_score to use across observable/indicator types. Default is 50. |
| `urlscan_x_opencti_score_domain` | `URLSCAN_X_OPENCTI_SCORE_DOMAIN` | No | The x_opencti_score to use across Domain-Name observable and indicators. Defaults to default score. |
| `urlscan_x_opencti_score_url` | `URLSCAN_X_OPENCTI_URL` | No | The x_opencti_score to use across Url observable and indicators. Defaults to default score. |
3 changes: 2 additions & 1 deletion external-import/urlscan/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ services:
- CONNECTOR_TLP=white
- CONNECTOR_LABELS=Phishing,Phishfeed
- CONNECTOR_INTERVAL=86400 # seconds, 1d
- URLSCAN_URL=https://urlscan.io/api/v1/pro/phishfeed?format=json&q=date:>now-1d
- CONNECTOR_LOOKBACK=3 # days
- URLSCAN_URL=https://urlscan.io/api/v1/pro/phishfeed?format=json
- URLSCAN_API_KEY=
- URLSCAN_DEFAULT_X_OPENCTI_SCORE=50
restart: always
3 changes: 2 additions & 1 deletion external-import/urlscan/src/config.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ connector:
tlp: "white"
labels: "Phishing,Phishfeed"
interval: 86400 # seconds, 1d
lookback: 3 # days

urlscan:
url: "https://urlscan.io/api/v1/pro/phishfeed?format=json&q=date:>now-1d"
url: "https://urlscan.io/api/v1/pro/phishfeed?format=json"
api_key: ""
default_x_opencti_score: 50
23 changes: 21 additions & 2 deletions external-import/urlscan/src/urlscan/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Urlscan client"""

from typing import Iterator, List
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse

import requests
from pydantic.v1 import BaseModel, parse_raw_as
Expand All @@ -27,12 +28,30 @@ def __init__(self, url: str, api_key: str):
if not api_key:
raise ValueError("Urlscan API key must be set")

def query(self) -> Iterator[str]:
def query(self, date_math: str) -> Iterator[str]:
"""Process the feed URL and return any indicators.
:param date_math: Date math string for the feed.
:return: Feed results.
"""
# if date_math already in url, remove it
parsed_url = urlparse(self._url)
query_params = parse_qs(parsed_url.query)

# Update the date_math in the query parameters
if "q" in query_params:
query_params["q"] = [f"date:>{date_math}"] + [
param for param in query_params["q"] if not param.startswith("date:")
]
else:
query_params["q"] = [f"date:>{date_math}"]

# Reconstruct the URL with the updated query parameters
updated_url = urlunparse(
parsed_url._replace(query=urlencode(query_params, doseq=True))
)

resp = requests.get(
self._url,
updated_url,
headers={"API-key": self._api_key},
)
resp.raise_for_status()
Expand Down
28 changes: 25 additions & 3 deletions external-import/urlscan/src/urlscan/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ def __init__(self):
)

self._helper = OpenCTIConnectorHelper(config)
interval = get_config_variable(
"CONNECTOR_INTERVAL",
["connector", "interval"],
config,
default=86400,
isNumber=True,
)
lookback = get_config_variable(
"CONNECTOR_LOOKBACK",
["connector", "lookback"],
config,
default=3,
isNumber=True,
)

urlscan_url = get_config_variable(
"URLSCAN_URL",
Expand Down Expand Up @@ -117,7 +131,14 @@ def __init__(self):

self._default_labels = ["Phishing", "phishfeed"]
self._client = UrlscanClient(urlscan_url, urlscan_api_key)
self._loop = ConnectorLoop(self._helper, 86_400, 60, self._process_feed, False)
self._loop = ConnectorLoop(
helper=self._helper,
interval=interval,
lookback=lookback,
loop_interval=60,
callback=self._process_feed,
stop_on_error=False,
)

def start(self) -> None:
"""Start the connector
Expand All @@ -126,14 +147,15 @@ def start(self) -> None:
self._loop.start()
self._loop.join()

def _process_feed(self, work_id: str) -> None:
def _process_feed(self, work_id: str, date_math: str) -> None:
"""Process the external connector feed
:param work_id: Work ID
:param date_math: Date math string
:return: None
"""
bundle_objects = []

results = self._client.query()
results = self._client.query(date_math=date_math)
for url in results:
obs1 = self._create_url_observable(url, "Urlscan.io URL")
bundle_objects.extend(filter(None, [*obs1]))
Expand Down
24 changes: 19 additions & 5 deletions external-import/urlscan/src/urlscan/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import threading
import time
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import Callable

from pycti import OpenCTIConnectorHelper
Expand All @@ -24,6 +24,7 @@ def __init__(
self,
helper: OpenCTIConnectorHelper,
interval: int,
lookback: int,
loop_interval: int,
callback: Callable[[str], None],
stop_on_error: bool = False,
Expand All @@ -32,13 +33,16 @@ def __init__(
Create a new ListenQueue object
:param helper: Connector helper
:param interval: Interval between runs in seconds
:param lookback: How far to look back in days if the connector has never run
or the last run is older than this value.
:param loop_interval: Interval between loops between runs in seconds
:param callback: callback(work_id), executed after the interval has elapsed
:param stop_on_error: Stop looping when an unhandled exception is thrown
"""
super().__init__()
self._helper = helper
self._interval = interval
self._lookback = lookback
self._loop_interval = loop_interval
self._callback = callback
self._stop_on_error = stop_on_error
Expand Down Expand Up @@ -86,18 +90,28 @@ def _run_loop(self) -> None:
# Get the current timestamp and check
state = self._helper.get_state() or {}

now = datetime.utcnow().replace(microsecond=0)
now = datetime.now(timezone.utc).replace(microsecond=0)
last_run = state.get("last_run", 0)
last_run = datetime.utcfromtimestamp(last_run).replace(microsecond=0)
last_run = datetime.fromtimestamp(last_run, timezone.utc).replace(microsecond=0)

if last_run.year == 1970:
log.info("Connector has never run")
else:
log.info(f"Connector last run: {last_run}")

time_since_last_run = (now - last_run).total_seconds()
# Check the difference between now and the last run to the interval
if (now - last_run).total_seconds() > self._interval:
if time_since_last_run > self._interval:
log.info("Connector will now run")

# Compute date math string to get all data since last run
if time_since_last_run > (self._lookback * 86400):
if self._lookback > 7:
log.warning("Lookback is greater than 7 days, this could fail...")
date_math = f"now-{self._lookback}d"
else:
date_math = f"now-{int(time_since_last_run)}s"

last_run = now

name = self._helper.connect_name or "Connector"
Expand All @@ -107,7 +121,7 @@ def _run_loop(self) -> None:
)

try:
self._callback(work_id)
self._callback(work_id, date_math)
except Exception as ex:
log.exception(f"Unhandled exception processing connector feed: {ex}")
self._helper.api.work.to_processed(work_id, f"Failed: {ex}", True)
Expand Down

0 comments on commit 0c327b7

Please sign in to comment.