Skip to content

Commit

Permalink
Small cleanup in Dataframe -> FS exporter (#932)
Browse files Browse the repository at this point in the history
* Small cleanup in Dataframe -> FS exporter

* Small bugfix in gcp_logging_timesketch
  • Loading branch information
ramo-j authored Nov 14, 2024
1 parent 1043cd1 commit 28f5917
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 20 deletions.
34 changes: 15 additions & 19 deletions dftimewolf/lib/exporters/df_to_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ def SetUp(self, output_formats: str, output_directory: str) -> None:
s.strip().lower() for s in output_formats.split(',') if s]
self._formats = list(filter(None, self._formats))

invalid_formats = []
for f in self._formats:
if f not in _VALID_FORMATS:
invalid_formats.append(f)
invalid_formats = [f for f in self._formats if f not in _VALID_FORMATS]
if invalid_formats:
self.ModuleError(
f'Invalid format(s) specified: {", ".join(invalid_formats)}',
Expand Down Expand Up @@ -125,25 +122,24 @@ def _ExportSingleContainer(self, container: containers.DataFrame) -> None:
Args:
container: The dataframe container to export.
"""
for f in _VALID_FORMATS:
if f in self._formats:
output_path = os.path.join(
self._output_dir,
f'{_ConvertToValidFilename(container.name)}{_EXTENSION_MAP[f]}')
for f in self._formats:
output_path = os.path.join(
self._output_dir,
f'{_ConvertToValidFilename(container.name)}{_EXTENSION_MAP[f]}')

self.logger.debug(f'Exporting {container.name} to {output_path}')
self.logger.debug(f'Exporting {container.name} to {output_path}')

self._ExportSingleDataframe(df=container.data_frame,
output_format=f,
output_path=output_path)
self._ExportSingleDataframe(df=container.data_frame,
output_format=f,
output_path=output_path)

self.state.StoreContainer(container=containers.File(
name=os.path.basename(output_path),
path=output_path,
description=container.description))
self.state.StoreContainer(container=containers.File(
name=os.path.basename(output_path),
path=output_path,
description=container.description))

self.logger.debug(
f'Export of {container.name} to {output_path} complete')
self.logger.debug(
f'Export of {container.name} to {output_path} complete')

def _ExportSingleDataframe(self,
df: pd.DataFrame,
Expand Down
2 changes: 1 addition & 1 deletion dftimewolf/lib/processors/gcp_logging_timesketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _ParseRequestMetadata(
timesketch_record['gcloud_command_partial'] = command_string

if 'invocation-id/' in user_agent:
invocation_regex = re.search(r'invocation-id/()', user_agent)
invocation_regex = re.search(r'invocation-id/([^\s]+)', user_agent)
if invocation_regex:
invocation_id = str(invocation_regex.group(1))
timesketch_record['gcloud_command_id'] = invocation_id
Expand Down

0 comments on commit 28f5917

Please sign in to comment.