diff --git a/CHANGELOG.md b/CHANGELOG.md index ebac8ecf5e..99c43214ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Core - `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll). +- `intelmq.lib.utils.unzip`: Ignore directories themselves when extracting data to prevent the extraction of empty data for a directory entries (PR#2512 by Kamil Mankowski). - `intelmq.lib.mixins.cache.CacheMixin` was extended to support temporary storing messages in a cache queue (PR#2509 by Kamil Mankowski). @@ -34,6 +35,8 @@ - `intelmq.bots.parsers.shadowserver._config`: - Fetch schema before first run (PR#2482 by elsif2, fixes #2480). - `intelmq.bots.parsers.dataplane.parser`: Use ` | ` as field delimiter, fix parsing of AS names including `|` (PR#2488 by DigitalTrustCenter). +- all parsers: add `copy_collector_provided_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. + (PR#2513 by Kamil Mankowski). #### Experts - `intelmq.bots.experts.sieve.expert`: diff --git a/docs/admin/installation/linux-packages.md b/docs/admin/installation/linux-packages.md index 23d97c3d57..eccc4ab158 100644 --- a/docs/admin/installation/linux-packages.md +++ b/docs/admin/installation/linux-packages.md @@ -1,5 +1,5 @@ @@ -18,7 +18,6 @@ Native packages are currently provided for the following Linux distributions: - **Debian 11** (bullseye) - **Debian 12** (bookworm) - **openSUSE Tumbleweed** -- **openSUSE Leap 15.5** - **Ubuntu 20.04** (focal fossa) - **Ubuntu 22.04** (jammy jellyfish) @@ -27,7 +26,7 @@ Native packages are currently provided for the following Linux distributions: Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): ```bash -echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/Debian_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq +echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/Debian_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq.list curl -fsSL "https://download.opensuse.org/repositories/home:sebix:intelmq/Debian_$(lsb_release -rs)/Release.key" | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/intelmq.gpg > /dev/null sudo apt update sudo apt install intelmq intelmq-api intelmq-manager @@ -50,13 +49,19 @@ For Ubuntu you must enable the Universe repository which provides community-main Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): 1. Open the file `/etc/apt/sources.list` in an editor of your choice. Use `sudo` or the `root` user. - + 2. Append `universe` to this line: ``` deb http://[...].archive.ubuntu.com/ubuntu/ focal main universe ``` -3. Update the list of available packages and install IntelMQ: +3. Next, add the IntelMQ APT Repository for Ubuntu: +```bash +echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/xUbuntu_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq.list +curl -fsSL "https://download.opensuse.org/repositories/home:sebix:intelmq/xUbuntu_$(lsb_release -rs)/Release.key" | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/intelmq.gpg > /dev/null +``` + +3. Now update the list of available packages and install the IntelMQ packages: ```bash sudo apt update sudo apt install intelmq intelmq-api intelmq-manager diff --git a/docs/user/bots.md b/docs/user/bots.md index a36fc05051..c39b322376 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1331,6 +1331,17 @@ tweet text is sent separately and if allowed, links to pastebin are followed and ## Parser Bots +If not set differently during parsing, all parser bots copy the following fields from the report to an event: + + - `feed.accuracy` + - `feed.code` + - `feed.documentation` + - `feed.name` + - `feed.provider` + - `feed.url` + - `rtir_id` + - `time.observation` + ### Common parameters #### `default_fields` @@ -1346,6 +1357,18 @@ defaults_fields: protocol.transport: tcp ``` +#### `copy_collector_provided_fields` + +(optional, list) List of additional fields to be copy from the report (only applied if parsing the +event doesn't set the value). + +Example usage: + +```yaml +copy_collector_provided_fields: + - extra.file_name +``` + --- ### Abuse.ch Feodo Tracker
diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index ef09f51a3f..e4afe5268e 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1086,6 +1086,7 @@ class ParserBot(Bot): _default_message_type = 'Report' default_fields: Optional[dict] = {} + copy_collector_provided_fields: Optional[list] = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1131,6 +1132,11 @@ def _get_io_and_save_line_ending(self, raw: str) -> io.StringIO: self._line_ending = '\r\n' return data_io + def new_event(self, *args, **kwargs): + if self.copy_collector_provided_fields: + kwargs['copy_collector_provided_fields'] = self.copy_collector_provided_fields + return super().new_event(*args, **kwargs) + def parse_csv(self, report: libmessage.Report): """ A basic CSV parser. diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 4353dd5682..71186d2592 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -99,7 +99,7 @@ class Message(dict): _default_value_set = False def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: dict = None) -> None: + harmonization: dict = None, **_) -> None: try: classname = message['__type'].lower() del message['__type'] @@ -523,9 +523,13 @@ def __contains__(self, item: str) -> bool: class Event(Message): - - def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + def __init__( + self, + message: Union[dict, tuple] = (), + auto: bool = False, + harmonization: Optional[dict] = None, + copy_collector_provided_fields: Optional[dict] = None, + ) -> None: """ Parameters: message: Give a report and feed.name, feed.url and @@ -552,6 +556,12 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, template['rtir_id'] = message['rtir_id'] if 'time.observation' in message: template['time.observation'] = message['time.observation'] + + if copy_collector_provided_fields: + for key in copy_collector_provided_fields: + if key not in message: + continue + template[key] = message.get(key) else: template = message super().__init__(template, auto, harmonization) @@ -560,7 +570,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, class Report(Message): def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + harmonization: Optional[dict] = None, **_) -> None: """ Parameters: message: Passed along to Message's and dict's init. diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py index 42d551ad98..de59a223a6 100644 --- a/intelmq/lib/utils.py +++ b/intelmq/lib/utils.py @@ -538,7 +538,7 @@ def extract_tar(file): def extract(filename): return tar.extractfile(filename).read() - return tuple(file.name for file in tar.getmembers()), tar, extract + return tuple(file.name for file in tar.getmembers() if file.isfile()), tar, extract def extract_gzip(file): @@ -547,7 +547,7 @@ def extract_gzip(file): def extract_zip(file): zfp = zipfile.ZipFile(io.BytesIO(file), "r") - return zfp.namelist(), zfp, zfp.read + return [member.filename for member in zfp.infolist() if not member.is_dir()], zfp, zfp.read def unzip(file: bytes, extract_files: Union[bool, list], logger=None, diff --git a/intelmq/tests/assets/subdir.tar.gz b/intelmq/tests/assets/subdir.tar.gz new file mode 100644 index 0000000000..03daf10d51 Binary files /dev/null and b/intelmq/tests/assets/subdir.tar.gz differ diff --git a/intelmq/tests/assets/subdir.tar.gz.license b/intelmq/tests/assets/subdir.tar.gz.license new file mode 100644 index 0000000000..056d32ec61 --- /dev/null +++ b/intelmq/tests/assets/subdir.tar.gz.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 CERT.at GmbH + +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/assets/subdir.zip b/intelmq/tests/assets/subdir.zip new file mode 100644 index 0000000000..5fba87a856 Binary files /dev/null and b/intelmq/tests/assets/subdir.zip differ diff --git a/intelmq/tests/assets/subdir.zip.license b/intelmq/tests/assets/subdir.zip.license new file mode 100644 index 0000000000..056d32ec61 --- /dev/null +++ b/intelmq/tests/assets/subdir.zip.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 CERT.at GmbH + +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/collectors/http/test_collector.py b/intelmq/tests/bots/collectors/http/test_collector.py index fa315c6931..cefdacc866 100644 --- a/intelmq/tests/bots/collectors/http/test_collector.py +++ b/intelmq/tests/bots/collectors/http/test_collector.py @@ -143,6 +143,25 @@ def test_zip(self, mocker): self.assertMessageEqual(0, output0) self.assertMessageEqual(1, output1) + def test_zip_subdirs(self, mocker): + """ + Test unzipping when the zip has subdirectories + """ + prepare_mocker(mocker) + self.run_bot(parameters={'http_url': 'http://localhost/subdir.zip', + 'name': 'Example feed', + }, + iterations=1) + + output0 = OUTPUT[0].copy() + output0['feed.url'] = 'http://localhost/subdir.zip' + output0['extra.file_name'] = 'subdir/bar' + output1 = OUTPUT[1].copy() + output1['feed.url'] = 'http://localhost/subdir.zip' + output1['extra.file_name'] = 'subdir/foo' + self.assertMessageEqual(0, output0) + self.assertMessageEqual(1, output1) + @test.skip_exotic() def test_pgp(self, mocker): """ diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index 0ccf05813c..43b83f71d5 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -167,6 +167,19 @@ def test_bad_default_fields_parameter_2(self): self.assertAnyLoglineEqual(message="Invalid value of key 'source.port' in default_fields parameter.", levelname="ERROR") + def test_copy_collector_provided_fields_from_report(self): + """Allow copying custom fields from the report message to support more context from reports""" + report = {**EXAMPLE_SHORT, "extra.file_name": "file.txt", "extra.field2": "value2"} + self.input_message = report + + self.run_bot(parameters={"copy_collector_provided_fields": + ["extra.file_name", "extra.not_exists"]}) + + output_message = EXAMPLE_EVENT.copy() + output_message["extra.file_name"] = "file.txt" + self.assertMessageEqual(0, output_message) + + def test_missing_raw(self): """ Test DummyParserBot with missing raw. """ self.input_message = EXAMPLE_EMPTY_REPORT diff --git a/intelmq/tests/lib/test_utils.py b/intelmq/tests/lib/test_utils.py index b99a501382..ddb34408a3 100644 --- a/intelmq/tests/lib/test_utils.py +++ b/intelmq/tests/lib/test_utils.py @@ -260,6 +260,14 @@ def test_unzip_tar_gz_return_names(self): self.assertEqual(tuple(result), (('bar', b'bar text\n'), ('foo', b'foo text\n'))) + def test_unzip_tar_gz_with_subdir(self): + """ Test the unzip function with a tar gz file containing a subdirectory and return_names. Test that the directories themselves are ignored. """ + filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.tar.gz') + with open(filename, 'rb') as fh: + result = utils.unzip(fh.read(), extract_files=True, return_names=True) + self.assertEqual(tuple(result), (('subdir/foo', b'foo text\n'), + ('subdir/bar', b'bar text\n'))) + def test_unzip_gz(self): """ Test the unzip function with a gz file. """ filename = os.path.join(os.path.dirname(__file__), '../assets/foobar.gz') @@ -289,6 +297,14 @@ def test_unzip_zip_return_names(self): self.assertEqual(tuple(result), (('bar', b'bar text\n'), ('foo', b'foo text\n'))) + def test_unzip_zip_with_subdir(self): + """ Test the unzip function with a zip containing a subdirectory and returning names. Test that the directories themselves are ignored.""" + filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.zip') + with open(filename, 'rb') as fh: + result = utils.unzip(fh.read(), extract_files=True, return_names=True) + self.assertEqual(tuple(result), (('subdir/bar', b'bar text\n'), + ('subdir/foo', b'foo text\n'))) + def test_file_name_from_response(self): """ test file_name_from_response """ response = requests.Response()