diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2311d03..a877ec9a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ CHANGELOG - Removed duplicate mappings from the 'Spam-URL' report. (PR#2348) - `intelmq.bots.parsers.generic.parser_csv`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný). - `intelmq.bots.parsers.html_table.parser`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný). +- `intelmq.bots.parsers.turris.parser.py` Updated to the latest data format (issue #2167). (PR#2373 by Filip Pokorný). #### Experts - `intelmq.bots.experts.sieve`: diff --git a/intelmq/bots/parsers/turris/parser.py b/intelmq/bots/parsers/turris/parser.py index 62723307d..94755e429 100644 --- a/intelmq/bots/parsers/turris/parser.py +++ b/intelmq/bots/parsers/turris/parser.py @@ -1,50 +1,47 @@ -# SPDX-FileCopyrightText: 2015 robcza +# SPDX-FileCopyrightText: 2023 Filip Pokorný # # SPDX-License-Identifier: AGPL-3.0-or-later -# -*- coding: utf-8 -*- -import csv -import io - -from intelmq.lib import utils from intelmq.lib.bot import ParserBot +PORTS = { + "ftp": 21, + "telnet": 23, + "http": 80 + # smtp uses both 25 and 587, therefore we can't say for certain +} + class TurrisGreylistParserBot(ParserBot): """Parse the Turris Greylist feed""" - def process(self): - report = self.receive_message() - - columns = [ - "source.ip", - "source.geolocation.cc", - "event_description.text", - "source.asn" - ] - - headers = True - raw_report = utils.base64_decode(report.get("raw")) - raw_report = raw_report.translate({0: None}) - for row in csv.reader(io.StringIO(raw_report)): - # ignore headers - if headers: - headers = False - continue + parse = ParserBot.parse_csv_dict + recover_line = ParserBot.recover_line_csv_dict + _ignore_lines_starting = ["#"] + + def parse_line(self, line, report): + + for tag in line.get("Tags", "").split(","): event = self.new_event(report) - for key, value in zip(columns, row): - if key == "__IGNORE__": - continue + if tag in ["smtp", "http", "ftp", "telnet"]: + event.add("protocol.transport", "tcp") + event.add("protocol.application", tag) + event.add("classification.type", "brute-force") + event.add("destination.port", PORTS.get(tag)) - event.add(key, value) + elif tag == "port_scan": + event.add("classification.type", "scanner") - event.add('classification.type', 'scanner') - event.add("raw", ",".join(row)) + else: + # cases such as "haas", "hass_logged" and "hass_not_logged" come from CZ.NIC HaaS Feed (available in IntelMQ) + # it's better to use that feed for this data (it's data from SSH honeypot) + continue - self.send_message(event) - self.acknowledge_message() + event.add("raw", self.recover_line(line)) + event.add("source.ip", line.get("Address")) + yield event BOT = TurrisGreylistParserBot diff --git a/intelmq/etc/feeds.yaml b/intelmq/etc/feeds.yaml index 803749a80..72e1fb825 100644 --- a/intelmq/etc/feeds.yaml +++ b/intelmq/etc/feeds.yaml @@ -651,14 +651,14 @@ providers: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: https://www.turris.cz/greylist-data/greylist-latest.csv + http_url: https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv rate_limit: 43200 name: __FEED__ provider: __PROVIDER__ parser: module: intelmq.bots.parsers.turris.parser parameters: - revision: 2018-01-20 + revision: 2023-06-13 documentation: https://project.turris.cz/en/greylist public: true Greylist with PGP signature verification: diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 51e33052f..a920d4be8 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -38,6 +38,7 @@ 'v301_deprecations', 'v310_feed_changes', 'v310_shadowserver_feednames', + 'v320_update_turris_greylist_url', ] @@ -862,6 +863,22 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs): return messages + ' Remove affected bots yourself.' if messages else None, configuration, harmonization +def v320_update_turris_greylist_url(configuration, harmonization, dry_run, **kwargs): + """ + Updates Turris Greylist feed URL. + """ + + messages = [] + + for bot_id, bot in configuration.items(): + if bot.get("module") == "intelmq.bots.collectors.http.collector": + if bot.get("parameters", {}).get("http_url", "").startswith("https://project.turris.cz/greylist-data/greylist-latest.csv"): + bot["parameters"]["http_url"] = "https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv" + messages.append("Turris Greylist feed URL updated.") + + return ' '.join(messages) if messages else None, configuration, harmonization + + UPGRADES = OrderedDict([ ((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)), ((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)), @@ -887,6 +904,7 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs): ((3, 0, 1), (v301_deprecations,)), ((3, 0, 2), ()), ((3, 1, 0), (v310_feed_changes, v310_shadowserver_feednames)), + ((3, 2, 0), (v320_update_turris_greylist_url,)), ]) ALWAYS = (harmonization,) diff --git a/intelmq/tests/bots/parsers/turris/greylist-latest.csv b/intelmq/tests/bots/parsers/turris/greylist-latest.csv deleted file mode 100644 index d0272430d..000000000 --- a/intelmq/tests/bots/parsers/turris/greylist-latest.csv +++ /dev/null @@ -1,3 +0,0 @@ -Address,Country,Tags,ASN -1.1.1.2,AU,dns,15169 -1.20.96.142,TH,telnet,56120 diff --git a/intelmq/tests/bots/parsers/turris/greylist-latest.csv.license b/intelmq/tests/bots/parsers/turris/greylist-latest.csv.license deleted file mode 100644 index f0b62ad2d..000000000 --- a/intelmq/tests/bots/parsers/turris/greylist-latest.csv.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2016 Sebastian Wagner -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/turris/test_parser.py b/intelmq/tests/bots/parsers/turris/test_parser.py index df26f401f..2cfbec6c7 100644 --- a/intelmq/tests/bots/parsers/turris/test_parser.py +++ b/intelmq/tests/bots/parsers/turris/test_parser.py @@ -1,32 +1,121 @@ -# SPDX-FileCopyrightText: 2015 Sebastian Wagner +# SPDX-FileCopyrightText: 2023 Filip Pokorný # # SPDX-License-Identifier: AGPL-3.0-or-later -# -*- coding: utf-8 -*- -import base64 -import os import unittest import intelmq.lib.test as test from intelmq.bots.parsers.turris.parser import TurrisGreylistParserBot +from intelmq.lib import utils -with open(os.path.join(os.path.dirname(__file__), 'greylist-latest.csv'), 'rb') as fh: - RAW = base64.b64encode(fh.read()).decode() - -OUTPUT1 = {'__type': 'Event', - 'classification.type': 'scanner', - 'event_description.text': 'dns', - 'source.geolocation.cc': 'AU', - 'source.asn': 15169, - 'raw': 'MS4xLjEuMixBVSxkbnMsMTUxNjk=', - 'source.ip': '1.1.1.2'} -OUTPUT2 = {'__type': 'Event', - 'classification.type': 'scanner', - 'event_description.text': 'telnet', - 'raw': 'MS4yMC45Ni4xNDIsVEgsdGVsbmV0LDU2MTIw', - 'source.geolocation.cc': 'TH', - 'source.asn': 56120, - 'source.ip': '1.20.96.142'} +INPUT = """\ +# For the terms of use see https://view.sentinel.turris.cz/greylist-data/LICENSE.txt +Address,Tags +159.203.8.168,http +103.155.105.100,"ftp,http" +117.247.161.208,telnet +103.185.234.2,telnet +152.32.236.101,"ftp,http,port_scan,smtp,telnet" +61.219.175.42,telnet +""" + +OUTPUT = [ + { + "protocol.transport": "tcp", + "protocol.application": "http", + "classification.type": "brute-force", + "destination.port": 80, + "raw": "QWRkcmVzcyxUYWdzCjE1OS4yMDMuOC4xNjgsaHR0cA==", + "source.ip": "159.203.8.168", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "ftp", + "classification.type": "brute-force", + "destination.port": 21, + "raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi", + "source.ip": "103.155.105.100", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "http", + "classification.type": "brute-force", + "destination.port": 80, + "raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi", + "source.ip": "103.155.105.100", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "telnet", + "classification.type": "brute-force", + "destination.port": 23, + "raw": "QWRkcmVzcyxUYWdzCjExNy4yNDcuMTYxLjIwOCx0ZWxuZXQ=", + "source.ip": "117.247.161.208", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "telnet", + "classification.type": "brute-force", + "destination.port": 23, + "raw": "QWRkcmVzcyxUYWdzCjEwMy4xODUuMjM0LjIsdGVsbmV0", + "source.ip": "103.185.234.2", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "ftp", + "classification.type": "brute-force", + "destination.port": 21, + "raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi", + "source.ip": "152.32.236.101", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "http", + "classification.type": "brute-force", + "destination.port": 80, + "raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi", + "source.ip": "152.32.236.101", + "__type": "Event" + }, + { + "classification.type": "scanner", + "raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi", + "source.ip": "152.32.236.101", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "smtp", + "classification.type": "brute-force", + "raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi", + "source.ip": "152.32.236.101", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "telnet", + "classification.type": "brute-force", + "destination.port": 23, + "raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi", + "source.ip": "152.32.236.101", + "__type": "Event" + }, + { + "protocol.transport": "tcp", + "protocol.application": "telnet", + "classification.type": "brute-force", + "destination.port": 23, + "raw": "QWRkcmVzcyxUYWdzCjYxLjIxOS4xNzUuNDIsdGVsbmV0", + "source.ip": "61.219.175.42", + "__type": "Event" + } +] class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase): @@ -37,12 +126,22 @@ class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(cls): cls.bot_reference = TurrisGreylistParserBot - cls.default_input_message = {'__type': 'Report', 'raw': RAW} + cls.default_input_message = {'__type': 'Report', 'raw': utils.base64_encode(INPUT)} def test_event(self): self.run_bot() - self.assertMessageEqual(0, OUTPUT1) - self.assertMessageEqual(1, OUTPUT2) + self.assertMessageEqual(0, OUTPUT[0]) + self.assertMessageEqual(1, OUTPUT[1]) + self.assertMessageEqual(2, OUTPUT[2]) + self.assertMessageEqual(3, OUTPUT[3]) + self.assertMessageEqual(4, OUTPUT[4]) + self.assertMessageEqual(5, OUTPUT[5]) + self.assertMessageEqual(6, OUTPUT[6]) + self.assertMessageEqual(7, OUTPUT[7]) + self.assertMessageEqual(8, OUTPUT[8]) + self.assertMessageEqual(9, OUTPUT[9]) + self.assertMessageEqual(10, OUTPUT[10]) + if __name__ == '__main__': # pragma: no cover unittest.main()