From 28f591702fe6997b8519282c975f61e5b6b2d787 Mon Sep 17 00:00:00 2001 From: Ramo Date: Thu, 14 Nov 2024 14:06:05 +1100 Subject: [PATCH] Small cleanup in Dataframe -> FS exporter (#932) * Small cleanup in Dataframe -> FS exporter * Small bugfix in gcp_logging_timesketch --- dftimewolf/lib/exporters/df_to_filesystem.py | 34 ++++++++----------- .../lib/processors/gcp_logging_timesketch.py | 2 +- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/dftimewolf/lib/exporters/df_to_filesystem.py b/dftimewolf/lib/exporters/df_to_filesystem.py index 42237e043..325c3365c 100644 --- a/dftimewolf/lib/exporters/df_to_filesystem.py +++ b/dftimewolf/lib/exporters/df_to_filesystem.py @@ -77,10 +77,7 @@ def SetUp(self, output_formats: str, output_directory: str) -> None: s.strip().lower() for s in output_formats.split(',') if s] self._formats = list(filter(None, self._formats)) - invalid_formats = [] - for f in self._formats: - if f not in _VALID_FORMATS: - invalid_formats.append(f) + invalid_formats = [f for f in self._formats if f not in _VALID_FORMATS] if invalid_formats: self.ModuleError( f'Invalid format(s) specified: {", ".join(invalid_formats)}', @@ -125,25 +122,24 @@ def _ExportSingleContainer(self, container: containers.DataFrame) -> None: Args: container: The dataframe container to export. """ - for f in _VALID_FORMATS: - if f in self._formats: - output_path = os.path.join( - self._output_dir, - f'{_ConvertToValidFilename(container.name)}{_EXTENSION_MAP[f]}') + for f in self._formats: + output_path = os.path.join( + self._output_dir, + f'{_ConvertToValidFilename(container.name)}{_EXTENSION_MAP[f]}') - self.logger.debug(f'Exporting {container.name} to {output_path}') + self.logger.debug(f'Exporting {container.name} to {output_path}') - self._ExportSingleDataframe(df=container.data_frame, - output_format=f, - output_path=output_path) + self._ExportSingleDataframe(df=container.data_frame, + output_format=f, + output_path=output_path) - self.state.StoreContainer(container=containers.File( - name=os.path.basename(output_path), - path=output_path, - description=container.description)) + self.state.StoreContainer(container=containers.File( + name=os.path.basename(output_path), + path=output_path, + description=container.description)) - self.logger.debug( - f'Export of {container.name} to {output_path} complete') + self.logger.debug( + f'Export of {container.name} to {output_path} complete') def _ExportSingleDataframe(self, df: pd.DataFrame, diff --git a/dftimewolf/lib/processors/gcp_logging_timesketch.py b/dftimewolf/lib/processors/gcp_logging_timesketch.py index 0059417d3..fabc108e4 100644 --- a/dftimewolf/lib/processors/gcp_logging_timesketch.py +++ b/dftimewolf/lib/processors/gcp_logging_timesketch.py @@ -187,7 +187,7 @@ def _ParseRequestMetadata( timesketch_record['gcloud_command_partial'] = command_string if 'invocation-id/' in user_agent: - invocation_regex = re.search(r'invocation-id/()', user_agent) + invocation_regex = re.search(r'invocation-id/([^\s]+)', user_agent) if invocation_regex: invocation_id = str(invocation_regex.group(1)) timesketch_record['gcloud_command_id'] = invocation_id