From 8620d9fda40ea242ba024990c4d6f4cf4e36415d Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 9 Jul 2024 11:49:29 +0200 Subject: [PATCH 1/8] FIX: Support for extracting data from archives with dirs When zip or tar archive contains directories, they appear in the default listings in addition to files they contain. It causes exceptions or extracting empty data, what eventually causes issues on creating a report message. --- CHANGELOG.md | 2 ++ intelmq/lib/utils.py | 4 ++-- intelmq/tests/assets/subdir.tar.gz | Bin 0 -> 183 bytes intelmq/tests/assets/subdir.tar.gz.license | 3 +++ intelmq/tests/assets/subdir.zip | Bin 0 -> 430 bytes intelmq/tests/assets/subdir.zip.license | 3 +++ .../bots/collectors/http/test_collector.py | 19 ++++++++++++++++++ intelmq/tests/lib/test_utils.py | 16 +++++++++++++++ 8 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 intelmq/tests/assets/subdir.tar.gz create mode 100644 intelmq/tests/assets/subdir.tar.gz.license create mode 100644 intelmq/tests/assets/subdir.zip create mode 100644 intelmq/tests/assets/subdir.zip.license diff --git a/CHANGELOG.md b/CHANGELOG.md index f6fb896a73..8de5546380 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ ### Core - `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll). +- `intelmq.lib.utils.unzip`: Filter out directory entries when extracting data fixing the issue that + archives with directories causes extracting empty data for a directory entry (PR# by Kamil Mankowski). ### Development diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py index 42d551ad98..de59a223a6 100644 --- a/intelmq/lib/utils.py +++ b/intelmq/lib/utils.py @@ -538,7 +538,7 @@ def extract_tar(file): def extract(filename): return tar.extractfile(filename).read() - return tuple(file.name for file in tar.getmembers()), tar, extract + return tuple(file.name for file in tar.getmembers() if file.isfile()), tar, extract def extract_gzip(file): @@ -547,7 +547,7 @@ def extract_gzip(file): def extract_zip(file): zfp = zipfile.ZipFile(io.BytesIO(file), "r") - return zfp.namelist(), zfp, zfp.read + return [member.filename for member in zfp.infolist() if not member.is_dir()], zfp, zfp.read def unzip(file: bytes, extract_files: Union[bool, list], logger=None, diff --git a/intelmq/tests/assets/subdir.tar.gz b/intelmq/tests/assets/subdir.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..03daf10d51d353d2180e5c9cddb1d997e625abcd GIT binary patch literal 183 zcmV;o07(BIiwFS6_l#x$1MQSS3c@fDMYHx4a|36RNphYR6^crsNx|bg(x9#?E@F|) z+x$fa^26|w=Ke5D`_nj@Y9L}@21$w@-?KmjoE_VnVqG13I000N?S_%LF literal 0 HcmV?d00001 diff --git a/intelmq/tests/assets/subdir.tar.gz.license b/intelmq/tests/assets/subdir.tar.gz.license new file mode 100644 index 0000000000..056d32ec61 --- /dev/null +++ b/intelmq/tests/assets/subdir.tar.gz.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 CERT.at GmbH + +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/assets/subdir.zip b/intelmq/tests/assets/subdir.zip new file mode 100644 index 0000000000000000000000000000000000000000..5fba87a8563e7317c513e6f8284e59156355245b GIT binary patch literal 430 zcmWIWW@Zs#W&nZUml0qZO0Waz;?ks)%p&~&pdv1?qNlY^-pjvuuyO(=L6{4nC@HZB zh!jdvD@wQ!8Xf>OO!rdv#-brDKOcx-8W@@EnQ=Kp1!xNh2sFHP1kq4uaWN=>cnpjT z5 Date: Wed, 10 Jul 2024 09:09:45 +0200 Subject: [PATCH 2/8] Improve descriptions Co-authored-by: Sebastian --- CHANGELOG.md | 3 +-- intelmq/tests/lib/test_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8de5546380..ff3d2ee5a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,7 @@ ### Core - `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll). -- `intelmq.lib.utils.unzip`: Filter out directory entries when extracting data fixing the issue that - archives with directories causes extracting empty data for a directory entry (PR# by Kamil Mankowski). +- `intelmq.lib.utils.unzip`: Ignore directories themselves when extracting data to prevent the extraction of empty data for a directory entries (PR#2512 by Kamil Mankowski). ### Development diff --git a/intelmq/tests/lib/test_utils.py b/intelmq/tests/lib/test_utils.py index daba629960..ddb34408a3 100644 --- a/intelmq/tests/lib/test_utils.py +++ b/intelmq/tests/lib/test_utils.py @@ -261,7 +261,7 @@ def test_unzip_tar_gz_return_names(self): ('foo', b'foo text\n'))) def test_unzip_tar_gz_with_subdir(self): - """ Test the unzip function with a tar gz file and return_names. """ + """ Test the unzip function with a tar gz file containing a subdirectory and return_names. Test that the directories themselves are ignored. """ filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.tar.gz') with open(filename, 'rb') as fh: result = utils.unzip(fh.read(), extract_files=True, return_names=True) @@ -298,7 +298,7 @@ def test_unzip_zip_return_names(self): ('foo', b'foo text\n'))) def test_unzip_zip_with_subdir(self): - """ Test the unzip function with a zip containing a subdirectory and returning names.""" + """ Test the unzip function with a zip containing a subdirectory and returning names. Test that the directories themselves are ignored.""" filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.zip') with open(filename, 'rb') as fh: result = utils.unzip(fh.read(), extract_files=True, return_names=True) From 93206adc1c372ecbf2e4f85d289a44fb85c1915a Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 9 Jul 2024 14:28:22 +0200 Subject: [PATCH 3/8] ENH: Add ability to copy more fields from the report Sometimes collectors sets more context information, e.g. the source file name. Currently, parsers usually remove those non-standard information. With this change, we let the user decide to copy more information to the event. In addition, the currently copied fields were documented. --- CHANGELOG.md | 2 ++ docs/user/bots.md | 23 +++++++++++++++++++++++ intelmq/lib/bot.py | 8 ++++++++ intelmq/tests/lib/test_parser_bot.py | 13 +++++++++++++ 4 files changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6fb896a73..c6bcde9321 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ - `intelmq.bots.parsers.shadowserver._config`: - Fetch schema before first run (PR#2482 by elsif2, fixes #2480). - `intelmq.bots.parsers.dataplane.parser`: Use ` | ` as field delimiter, fix parsing of AS names including `|` (PR#2488 by DigitalTrustCenter). +- all parsers: add `copy_custom_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. + (PR# by Kamil Mankowski). #### Experts - `intelmq.bots.experts.sieve.expert`: diff --git a/docs/user/bots.md b/docs/user/bots.md index 5b826843db..351cd0da86 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1331,6 +1331,17 @@ tweet text is sent separately and if allowed, links to pastebin are followed and ## Parser Bots +If not set differently during parsing, all parser bots copy the following fields from the report to an event: + + - `feed.accuracy` + - `feed.code` + - `feed.documentation` + - `feed.name` + - `feed.provider` + - `feed.url` + - `rtir_id` + - `time.observation` + ### Common parameters #### `default_fields` @@ -1346,6 +1357,18 @@ defaults_fields: protocol.transport: tcp ``` +#### `copy_custom_fields` + +(optional, list) List of additional fields to be copy from the report (only applied if parsing the +event doesn't set the value). + +Example usage: + +```yaml +copy_custom_fields: + - extra.file_name +``` + --- ### Abuse.ch Feodo Tracker
diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index f1b0ed3335..dd0696810b 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1082,6 +1082,7 @@ class ParserBot(Bot): _default_message_type = 'Report' default_fields: Optional[dict] = {} + copy_custom_fields: Optional[list] = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1245,6 +1246,13 @@ def process(self): for key, value in self.default_fields.items(): event.add(key, value, overwrite=False) + if self.copy_custom_fields: + for key in self.copy_custom_fields: + if key not in report: + continue + for event in events: + event.add(key, report.get(key), overwrite=False) + except Exception: self.logger.exception('Failed to parse line.') self.__failed.append((traceback.format_exc(), self._current_line)) diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index 0ccf05813c..c1d9faa6d0 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -167,6 +167,19 @@ def test_bad_default_fields_parameter_2(self): self.assertAnyLoglineEqual(message="Invalid value of key 'source.port' in default_fields parameter.", levelname="ERROR") + def test_copy_custom_fields_from_report(self): + """Allow copying custom fields from the report message to support more context from reports""" + report = {**EXAMPLE_SHORT, "extra.file_name": "file.txt", "extra.field2": "value2"} + self.input_message = report + + self.run_bot(parameters={"copy_custom_fields": + ["extra.file_name", "extra.not_exists"]}) + + output_message = EXAMPLE_EVENT.copy() + output_message["extra.file_name"] = "file.txt" + self.assertMessageEqual(0, output_message) + + def test_missing_raw(self): """ Test DummyParserBot with missing raw. """ self.input_message = EXAMPLE_EMPTY_REPORT From 472bf47581536656b919af64e12b8bc7927c8f32 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 24 Jul 2024 16:24:13 +0200 Subject: [PATCH 4/8] Rename and move to the library --- CHANGELOG.md | 4 ++-- docs/user/bots.md | 4 ++-- intelmq/lib/bot.py | 14 ++++++-------- intelmq/lib/message.py | 20 +++++++++++++++----- intelmq/tests/lib/test_parser_bot.py | 4 ++-- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6bcde9321..df7ca001c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,8 +32,8 @@ - `intelmq.bots.parsers.shadowserver._config`: - Fetch schema before first run (PR#2482 by elsif2, fixes #2480). - `intelmq.bots.parsers.dataplane.parser`: Use ` | ` as field delimiter, fix parsing of AS names including `|` (PR#2488 by DigitalTrustCenter). -- all parsers: add `copy_custom_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. - (PR# by Kamil Mankowski). +- all parsers: add `copy_collector_provided_fields` parameter allowing copying additional fields from the report, e.g. `extra.file_name`. + (PR#2513 by Kamil Mankowski). #### Experts - `intelmq.bots.experts.sieve.expert`: diff --git a/docs/user/bots.md b/docs/user/bots.md index 351cd0da86..2c8ec6e9d9 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -1357,7 +1357,7 @@ defaults_fields: protocol.transport: tcp ``` -#### `copy_custom_fields` +#### `copy_collector_provided_fields` (optional, list) List of additional fields to be copy from the report (only applied if parsing the event doesn't set the value). @@ -1365,7 +1365,7 @@ event doesn't set the value). Example usage: ```yaml -copy_custom_fields: +copy_collector_provided_fields: - extra.file_name ``` diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index dd0696810b..49591f9a12 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1082,7 +1082,7 @@ class ParserBot(Bot): _default_message_type = 'Report' default_fields: Optional[dict] = {} - copy_custom_fields: Optional[list] = [] + copy_collector_provided_fields: Optional[list] = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1127,6 +1127,11 @@ def _get_io_and_save_line_ending(self, raw: str) -> io.StringIO: if not self._line_ending or isinstance(self._line_ending, tuple): self._line_ending = '\r\n' return data_io + + def new_event(self, *args, **kwargs): + if self.copy_collector_provided_fields: + kwargs['copy_collector_provided_fields'] = self.copy_collector_provided_fields + return super().new_event(*args, **kwargs) def parse_csv(self, report: libmessage.Report): """ @@ -1246,13 +1251,6 @@ def process(self): for key, value in self.default_fields.items(): event.add(key, value, overwrite=False) - if self.copy_custom_fields: - for key in self.copy_custom_fields: - if key not in report: - continue - for event in events: - event.add(key, report.get(key), overwrite=False) - except Exception: self.logger.exception('Failed to parse line.') self.__failed.append((traceback.format_exc(), self._current_line)) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index e99e227313..603bcd2814 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -98,7 +98,7 @@ class Message(dict): _default_value_set = False def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: dict = None) -> None: + harmonization: dict = None, **_) -> None: try: classname = message['__type'].lower() del message['__type'] @@ -522,9 +522,13 @@ def __contains__(self, item: str) -> bool: class Event(Message): - - def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + def __init__( + self, + message: Union[dict, tuple] = (), + auto: bool = False, + harmonization: Optional[dict] = None, + copy_collector_provided_fields: Optional[dict] = None, + ) -> None: """ Parameters: message: Give a report and feed.name, feed.url and @@ -551,6 +555,12 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, template['rtir_id'] = message['rtir_id'] if 'time.observation' in message: template['time.observation'] = message['time.observation'] + + if copy_collector_provided_fields: + for key in copy_collector_provided_fields: + if key not in message: + continue + template[key] = message.get(key) else: template = message super().__init__(template, auto, harmonization) @@ -559,7 +569,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, class Report(Message): def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, - harmonization: Optional[dict] = None) -> None: + harmonization: Optional[dict] = None, **_) -> None: """ Parameters: message: Passed along to Message's and dict's init. diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index c1d9faa6d0..43b83f71d5 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -167,12 +167,12 @@ def test_bad_default_fields_parameter_2(self): self.assertAnyLoglineEqual(message="Invalid value of key 'source.port' in default_fields parameter.", levelname="ERROR") - def test_copy_custom_fields_from_report(self): + def test_copy_collector_provided_fields_from_report(self): """Allow copying custom fields from the report message to support more context from reports""" report = {**EXAMPLE_SHORT, "extra.file_name": "file.txt", "extra.field2": "value2"} self.input_message = report - self.run_bot(parameters={"copy_custom_fields": + self.run_bot(parameters={"copy_collector_provided_fields": ["extra.file_name", "extra.not_exists"]}) output_message = EXAMPLE_EVENT.copy() From 903708824ecaf38b53096f78af58ae7e44c64725 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 25 Jul 2024 10:16:43 +0200 Subject: [PATCH 5/8] Fix whitespaces --- intelmq/lib/bot.py | 2 +- intelmq/lib/message.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index 49591f9a12..4325ecbb96 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1127,7 +1127,7 @@ def _get_io_and_save_line_ending(self, raw: str) -> io.StringIO: if not self._line_ending or isinstance(self._line_ending, tuple): self._line_ending = '\r\n' return data_io - + def new_event(self, *args, **kwargs): if self.copy_collector_provided_fields: kwargs['copy_collector_provided_fields'] = self.copy_collector_provided_fields diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 603bcd2814..bdbf921d26 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -555,7 +555,7 @@ def __init__( template['rtir_id'] = message['rtir_id'] if 'time.observation' in message: template['time.observation'] = message['time.observation'] - + if copy_collector_provided_fields: for key in copy_collector_provided_fields: if key not in message: From 9d58beb56b23e7fcdded120b96dc2ad25f3cb024 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 13 Aug 2024 20:57:50 +0200 Subject: [PATCH 6/8] docs: installation: name apt source file .list fixes https://github.com/certtools/intelmq/issues/2496 --- docs/admin/installation/linux-packages.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/admin/installation/linux-packages.md b/docs/admin/installation/linux-packages.md index 23d97c3d57..d32a594c2d 100644 --- a/docs/admin/installation/linux-packages.md +++ b/docs/admin/installation/linux-packages.md @@ -1,5 +1,5 @@ @@ -27,7 +27,7 @@ Native packages are currently provided for the following Linux distributions: Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): ```bash -echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/Debian_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq +echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/Debian_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq.list curl -fsSL "https://download.opensuse.org/repositories/home:sebix:intelmq/Debian_$(lsb_release -rs)/Release.key" | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/intelmq.gpg > /dev/null sudo apt update sudo apt install intelmq intelmq-api intelmq-manager @@ -50,7 +50,7 @@ For Ubuntu you must enable the Universe repository which provides community-main Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): 1. Open the file `/etc/apt/sources.list` in an editor of your choice. Use `sudo` or the `root` user. - + 2. Append `universe` to this line: ``` deb http://[...].archive.ubuntu.com/ubuntu/ focal main universe From 5da874215abae6352f824a6f8b0315d884c3b943 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 13 Aug 2024 21:05:59 +0200 Subject: [PATCH 7/8] docs: remove leap 15.5 from installation docs --- docs/admin/installation/linux-packages.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/admin/installation/linux-packages.md b/docs/admin/installation/linux-packages.md index d32a594c2d..8e60276731 100644 --- a/docs/admin/installation/linux-packages.md +++ b/docs/admin/installation/linux-packages.md @@ -18,7 +18,6 @@ Native packages are currently provided for the following Linux distributions: - **Debian 11** (bullseye) - **Debian 12** (bookworm) - **openSUSE Tumbleweed** -- **openSUSE Leap 15.5** - **Ubuntu 20.04** (focal fossa) - **Ubuntu 22.04** (jammy jellyfish) From 6aa11474fb4d770ccb5cbe24b3a209a2325c2221 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 13 Aug 2024 21:06:15 +0200 Subject: [PATCH 8/8] docs: add missing apt configuration to ubuntu installation fixes https://github.com/certtools/intelmq/issues/2517 --- docs/admin/installation/linux-packages.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/admin/installation/linux-packages.md b/docs/admin/installation/linux-packages.md index 8e60276731..eccc4ab158 100644 --- a/docs/admin/installation/linux-packages.md +++ b/docs/admin/installation/linux-packages.md @@ -55,7 +55,13 @@ Add the repository to the package manager and install IntelMQ (packages `intelmq deb http://[...].archive.ubuntu.com/ubuntu/ focal main universe ``` -3. Update the list of available packages and install IntelMQ: +3. Next, add the IntelMQ APT Repository for Ubuntu: +```bash +echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/xUbuntu_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq.list +curl -fsSL "https://download.opensuse.org/repositories/home:sebix:intelmq/xUbuntu_$(lsb_release -rs)/Release.key" | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/intelmq.gpg > /dev/null +``` + +3. Now update the list of available packages and install the IntelMQ packages: ```bash sudo apt update sudo apt install intelmq intelmq-api intelmq-manager