Skip to content

Commit

Permalink
Merge pull request #2375 from gethvi/fix-2167
Browse files Browse the repository at this point in the history
FIX: Updates Turris Greylist parser and feed URL
  • Loading branch information
sebix committed Jun 21, 2023
2 parents 6d4d889 + 79bb3d0 commit 61c45ac
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 63 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ CHANGELOG
- Removed duplicate mappings from the 'Spam-URL' report. (PR#2348)
- `intelmq.bots.parsers.generic.parser_csv`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
- `intelmq.bots.parsers.html_table.parser`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
- `intelmq.bots.parsers.turris.parser.py` Updated to the latest data format (issue #2167). (PR#2373 by Filip Pokorný).

#### Experts
- `intelmq.bots.experts.sieve`:
Expand Down
61 changes: 29 additions & 32 deletions intelmq/bots/parsers/turris/parser.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,47 @@
# SPDX-FileCopyrightText: 2015 robcza
# SPDX-FileCopyrightText: 2023 Filip Pokorný
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import csv
import io

from intelmq.lib import utils
from intelmq.lib.bot import ParserBot

PORTS = {
"ftp": 21,
"telnet": 23,
"http": 80
# smtp uses both 25 and 587, therefore we can't say for certain
}


class TurrisGreylistParserBot(ParserBot):
"""Parse the Turris Greylist feed"""

def process(self):
report = self.receive_message()

columns = [
"source.ip",
"source.geolocation.cc",
"event_description.text",
"source.asn"
]

headers = True
raw_report = utils.base64_decode(report.get("raw"))
raw_report = raw_report.translate({0: None})
for row in csv.reader(io.StringIO(raw_report)):
# ignore headers
if headers:
headers = False
continue
parse = ParserBot.parse_csv_dict
recover_line = ParserBot.recover_line_csv_dict
_ignore_lines_starting = ["#"]

def parse_line(self, line, report):

for tag in line.get("Tags", "").split(","):

event = self.new_event(report)

for key, value in zip(columns, row):
if key == "__IGNORE__":
continue
if tag in ["smtp", "http", "ftp", "telnet"]:
event.add("protocol.transport", "tcp")
event.add("protocol.application", tag)
event.add("classification.type", "brute-force")
event.add("destination.port", PORTS.get(tag))

event.add(key, value)
elif tag == "port_scan":
event.add("classification.type", "scanner")

event.add('classification.type', 'scanner')
event.add("raw", ",".join(row))
else:
# cases such as "haas", "hass_logged" and "hass_not_logged" come from CZ.NIC HaaS Feed (available in IntelMQ)
# it's better to use that feed for this data (it's data from SSH honeypot)
continue

self.send_message(event)
self.acknowledge_message()
event.add("raw", self.recover_line(line))
event.add("source.ip", line.get("Address"))
yield event


BOT = TurrisGreylistParserBot
4 changes: 2 additions & 2 deletions intelmq/etc/feeds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -651,14 +651,14 @@ providers:
collector:
module: intelmq.bots.collectors.http.collector_http
parameters:
http_url: https://www.turris.cz/greylist-data/greylist-latest.csv
http_url: https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv
rate_limit: 43200
name: __FEED__
provider: __PROVIDER__
parser:
module: intelmq.bots.parsers.turris.parser
parameters:
revision: 2018-01-20
revision: 2023-06-13
documentation: https://project.turris.cz/en/greylist
public: true
Greylist with PGP signature verification:
Expand Down
18 changes: 18 additions & 0 deletions intelmq/lib/upgrades.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
'v301_deprecations',
'v310_feed_changes',
'v310_shadowserver_feednames',
'v320_update_turris_greylist_url',
]


Expand Down Expand Up @@ -862,6 +863,22 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
return messages + ' Remove affected bots yourself.' if messages else None, configuration, harmonization


def v320_update_turris_greylist_url(configuration, harmonization, dry_run, **kwargs):
"""
Updates Turris Greylist feed URL.
"""

messages = []

for bot_id, bot in configuration.items():
if bot.get("module") == "intelmq.bots.collectors.http.collector":
if bot.get("parameters", {}).get("http_url", "").startswith("https://project.turris.cz/greylist-data/greylist-latest.csv"):
bot["parameters"]["http_url"] = "https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv"
messages.append("Turris Greylist feed URL updated.")

return ' '.join(messages) if messages else None, configuration, harmonization


UPGRADES = OrderedDict([
((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)),
((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)),
Expand All @@ -887,6 +904,7 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
((3, 0, 1), (v301_deprecations,)),
((3, 0, 2), ()),
((3, 1, 0), (v310_feed_changes, v310_shadowserver_feednames)),
((3, 2, 0), (v320_update_turris_greylist_url,)),
])

ALWAYS = (harmonization,)
3 changes: 0 additions & 3 deletions intelmq/tests/bots/parsers/turris/greylist-latest.csv

This file was deleted.

2 changes: 0 additions & 2 deletions intelmq/tests/bots/parsers/turris/greylist-latest.csv.license

This file was deleted.

147 changes: 123 additions & 24 deletions intelmq/tests/bots/parsers/turris/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,121 @@
# SPDX-FileCopyrightText: 2015 Sebastian Wagner
# SPDX-FileCopyrightText: 2023 Filip Pokorný
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import base64
import os
import unittest

import intelmq.lib.test as test
from intelmq.bots.parsers.turris.parser import TurrisGreylistParserBot
from intelmq.lib import utils

with open(os.path.join(os.path.dirname(__file__), 'greylist-latest.csv'), 'rb') as fh:
RAW = base64.b64encode(fh.read()).decode()

OUTPUT1 = {'__type': 'Event',
'classification.type': 'scanner',
'event_description.text': 'dns',
'source.geolocation.cc': 'AU',
'source.asn': 15169,
'raw': 'MS4xLjEuMixBVSxkbnMsMTUxNjk=',
'source.ip': '1.1.1.2'}
OUTPUT2 = {'__type': 'Event',
'classification.type': 'scanner',
'event_description.text': 'telnet',
'raw': 'MS4yMC45Ni4xNDIsVEgsdGVsbmV0LDU2MTIw',
'source.geolocation.cc': 'TH',
'source.asn': 56120,
'source.ip': '1.20.96.142'}
INPUT = """\
# For the terms of use see https://view.sentinel.turris.cz/greylist-data/LICENSE.txt
Address,Tags
159.203.8.168,http
103.155.105.100,"ftp,http"
117.247.161.208,telnet
103.185.234.2,telnet
152.32.236.101,"ftp,http,port_scan,smtp,telnet"
61.219.175.42,telnet
"""

OUTPUT = [
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjE1OS4yMDMuOC4xNjgsaHR0cA==",
"source.ip": "159.203.8.168",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "ftp",
"classification.type": "brute-force",
"destination.port": 21,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
"source.ip": "103.155.105.100",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
"source.ip": "103.155.105.100",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjExNy4yNDcuMTYxLjIwOCx0ZWxuZXQ=",
"source.ip": "117.247.161.208",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xODUuMjM0LjIsdGVsbmV0",
"source.ip": "103.185.234.2",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "ftp",
"classification.type": "brute-force",
"destination.port": 21,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "http",
"classification.type": "brute-force",
"destination.port": 80,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"classification.type": "scanner",
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "smtp",
"classification.type": "brute-force",
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
"source.ip": "152.32.236.101",
"__type": "Event"
},
{
"protocol.transport": "tcp",
"protocol.application": "telnet",
"classification.type": "brute-force",
"destination.port": 23,
"raw": "QWRkcmVzcyxUYWdzCjYxLjIxOS4xNzUuNDIsdGVsbmV0",
"source.ip": "61.219.175.42",
"__type": "Event"
}
]


class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
Expand All @@ -37,12 +126,22 @@ class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
@classmethod
def set_bot(cls):
cls.bot_reference = TurrisGreylistParserBot
cls.default_input_message = {'__type': 'Report', 'raw': RAW}
cls.default_input_message = {'__type': 'Report', 'raw': utils.base64_encode(INPUT)}

def test_event(self):
self.run_bot()
self.assertMessageEqual(0, OUTPUT1)
self.assertMessageEqual(1, OUTPUT2)
self.assertMessageEqual(0, OUTPUT[0])
self.assertMessageEqual(1, OUTPUT[1])
self.assertMessageEqual(2, OUTPUT[2])
self.assertMessageEqual(3, OUTPUT[3])
self.assertMessageEqual(4, OUTPUT[4])
self.assertMessageEqual(5, OUTPUT[5])
self.assertMessageEqual(6, OUTPUT[6])
self.assertMessageEqual(7, OUTPUT[7])
self.assertMessageEqual(8, OUTPUT[8])
self.assertMessageEqual(9, OUTPUT[9])
self.assertMessageEqual(10, OUTPUT[10])


if __name__ == '__main__': # pragma: no cover
unittest.main()

0 comments on commit 61c45ac

Please sign in to comment.