diff --git a/data/winevt_features.yaml b/data/winevt_features.yaml new file mode 100644 index 0000000000..ea898f926f --- /dev/null +++ b/data/winevt_features.yaml @@ -0,0 +1,650 @@ +# Config file for the windows event (winevt) plugin of the feature extraction +# analyzer. +# A winevt feature extraction definition looks like this: +# +# name: +# +# source_name: Type: list[str] | REQUIRED | case-insensitive +# A list of source names to match against. Multiple +# entries will be checked with OR. +# +# provider_identifier: Type: list[str] | OPTIONAL | case-insensitive +# A list of provider identifiers to match against. +# Multiple entries will be checked with OR. +# +# event_version: Type: int | REQUIRED +# The event version to match against. +# +# event_identifier: Type: int | REQUIRED +# The event identifier to match against. +# +# references: Type: list[str] | OPTIONAL +# A list of references to provide as context and +# source for the event mapping. E.g. a URL to the +# official Microsoft documentation on the event. +# +# mapping: Type: list[dict] | REQUIRED +# A list of dicts that define the new attribute name +# and the string index of the event to extract the +# value from. Additonally it can also contain an +# alias list to add multiple attributes with +# the same value but different names. +# +# name: Type: str | REQUIRED +# The name of the new attribute to create. +# +# string_index: Type: int | REQUIRED | Starting at index 0 +# The string index of the event to extract the +# value from. Based on the plaso extracted "strings" +# attribute with Windows eventlog entries. +# +# aliases: Type: list[str] | OPTIONAL +# A list of aliases to add additionally to the +# offical name of the attribute. This can be used +# to add different field names matching individual +# field name ontologies. E.g. srcIP, domain, etc. +# +# For more details and examples of such an extraction check the Timesketch +# documentation: +# +# TODO(Add documentation link) +# +# ------------------------------------------------------------------------ +# 4624: An account was successfully logged on. +## Windows Server 2008, Windows Vista +security_4624_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-a5ba-3e3b0328c30d}' + event_identifier: 4624 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4624 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: target_user_sid + string_index: 4 + - name: target_user_name + string_index: 5 + aliases: + - username + - name: target_domain_name + string_index: 6 + aliases: + - domain + - name: target_logon_id + string_index: 7 + aliases: + - logon_id + - name: logon_type + string_index: 8 + - name: logon_process_name + string_index: 9 + - name: authentication_package_name + string_index: 10 + - name: workstation_name + string_index: 11 + - name: logon_guid + string_index: 12 + - name: transmitted_services + string_index: 13 + - name: lm_package_name + string_index: 14 + - name: key_length + string_index: 15 + - name: process_id + string_index: 16 + - name: process_name + string_index: 17 + - name: ip_address + string_index: 18 + - name: ip_port + string_index: 19 + aliases: + - port + +## Windows Server 2012, Windows 8 +security_4624_v1: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-a5ba-3e3b0328c30d}' + event_identifier: 4624 + event_version: 1 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4624 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: target_user_sid + string_index: 4 + - name: target_user_name + string_index: 5 + aliases: + - username + - name: target_domain_name + string_index: 6 + aliases: + - domain + - name: target_logon_id + string_index: 7 + aliases: + - logon_id + - name: logon_type + string_index: 8 + - name: logon_process_name + string_index: 9 + - name: authentication_package_name + string_index: 10 + - name: workstation_name + string_index: 11 + - name: logon_guid + string_index: 12 + - name: transmitted_services + string_index: 13 + - name: lm_package_name + string_index: 14 + - name: key_length + string_index: 15 + - name: process_id + string_index: 16 + - name: process_name + string_index: 17 + - name: ip_address + string_index: 18 + - name: ip_port + string_index: 19 + aliases: + - port + - name: impersonation_level + string_index: 20 + +## Windows 10 +security_4624_v2: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-a5ba-3e3b0328c30d}' + event_identifier: 4624 + event_version: 2 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4624 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: target_user_sid + string_index: 4 + - name: target_user_name + string_index: 5 + aliases: + - username + - name: target_domain_name + string_index: 6 + aliases: + - domain + - name: target_logon_id + string_index: 7 + aliases: + - logon_id + - name: logon_type + string_index: 8 + - name: logon_process_name + string_index: 9 + - name: authentication_package_name + string_index: 10 + - name: workstation_name + string_index: 11 + - name: logon_guid + string_index: 12 + - name: transmitted_services + string_index: 13 + - name: lm_package_name + string_index: 14 + - name: key_length + string_index: 15 + - name: process_id + string_index: 16 + - name: process_name + string_index: 17 + - name: ip_address + string_index: 18 + - name: ip_port + string_index: 19 + aliases: + - port + - name: impersonation_level + string_index: 20 + - name: restricted_admin_mode + string_index: 21 + - name: target_outbound_user_name + string_index: 22 + - name: target_outbound_domain_name + string_index: 23 + - name: virtual_account + string_index: 24 + - name: target_linked_logon_id + string_index: 25 + - name: elevated_token + string_index: 26 + +# 4625: An account failed to log on. +security_4625_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-a5ba-3e3b0328c30d}' + event_identifier: 4625 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4625 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: target_user_sid + string_index: 4 + - name: target_user_name + string_index: 5 + aliases: + - username + - name: target_domain_name + string_index: 6 + aliases: + - domain + - name: status + string_index: 7 + - name: failure_reason + string_index: 8 + - name: sub_status + string_index: 9 + - name: logon_type + string_index: 10 + - name: logon_process_name + string_index: 11 + - name: authentication_package_name + string_index: 12 + - name: workstation_name + string_index: 13 + - name: transmitted_services + string_index: 14 + - name: lm_package_name + string_index: 15 + - name: key_length + string_index: 16 + - name: process_id + string_index: 17 + - name: process_name + string_index: 18 + - name: ip_address + string_index: 19 + - name: ip_port + string_index: 20 + aliases: + - port + +# 4634: An account was logged off. +security_4634_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4634 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4634 + mapping: + - name: target_user_sid + string_index: 0 + - name: target_user_name + string_index: 1 + aliases: + - username + - name: target_domain_name + string_index: 2 + aliases: + - domain + - name: target_logon_id + string_index: 3 + aliases: + - logon_id + - name: logon_type + string_index: 4 + +# 4648: A logon was attempted using explicit credentials. +security_4648_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4648 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4648 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: logon_guid + string_index: 4 + - name: target_user_name + string_index: 5 + aliases: + - username + - name: target_domain_name + string_index: 6 + aliases: + - domain + - name: target_logon_guid + string_index: 7 + - name: target_server_name + string_index: 8 + - name: target_info + string_index: 9 + - name: process_id + string_index: 10 + - name: process_name + string_index: 11 + - name: ip_address + string_index: 12 + - name: ip_port + string_index: 13 + aliases: + - port + +# 4688: A new process has been created. +## Windows Server 2008, Windows Vista. +security_4688_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4688 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4688 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_logon_id + string_index: 2 + - name: new_process_id + string_index: 3 + - name: new_process_name + string_index: 4 + - name: token_elevation_type + string_index: 5 + - name: process_id + string_index: 6 + +## Windows Server 2012 R2, Windows 8.1. +security_4688_v1: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4688 + event_version: 1 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4688 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_logon_id + string_index: 2 + - name: new_process_id + string_index: 3 + - name: new_process_name + string_index: 4 + - name: token_elevation_type + string_index: 5 + - name: process_id + string_index: 6 + - name: command_line + string_index: 7 + +## Windows 10 +security_4688_v2: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4688 + event_version: 2 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4688 + mapping: + - name: subject_user_sid + string_index: 0 + - name: subject_user_name + string_index: 1 + - name: subject_domain_name + string_index: 2 + - name: subject_logon_id + string_index: 3 + - name: new_process_id + string_index: 4 + aliases: + - process_id + - name: new_process_name + string_index: 5 + aliases: + - process_name + - name: token_elevation_type + string_index: 6 + - name: process_id + string_index: 7 + - name: command_line + string_index: 8 + - name: target_user_sid + string_index: 9 + - name: target_user_name + string_index: 10 + aliases: + - username + - name: target_domain_name + string_index: 11 + aliases: + - domain + - name: target_logon_id + string_index: 12 + aliases: + - logon_id + - name: parent_process_name + string_index: 13 + - name: mandatory_level + string_index: 14 + +# 4720: A user account was created. +security_4720_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4720 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4720 + mapping: + - name: target_user_name + string_index: 0 + - name: target_domain_name + string_index: 1 + - name: subject_user_sid + string_index: 2 + - name: subject_user_name + string_index: 3 + - name: subject_domain_name + string_index: 4 + - name: subject_logon_id + string_index: 5 + - name: privilege_list + string_index: 6 + - name: sam_account_name + string_index: 7 + aliases: + - username + - name: display_name + string_index: 8 + - name: user_principal_name + string_index: 9 + - name: home_directory + string_index: 10 + - name: home_path + string_index: 11 + - name: script_path + string_index: 12 + - name: profile_path + string_index: 13 + - name: user_workstations + string_index: 14 + - name: password_last_set + string_index: 15 + - name: account_expires + string_index: 16 + - name: primary_group_id + string_index: 17 + - name: allowed_to_delegate_to + string_index: 18 + - name: old_uac_value + string_index: 19 + - name: new_uac_value + string_index: 20 + - name: user_account_control + string_index: 21 + - name: user_parameters + string_index: 22 + - name: sid_history + string_index: 23 + - name: logon_hours + string_index: 24 + +# 4728: A member was added to a security-enabled global group +security_4728_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4728 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/audit-security-group-management + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4732 + mapping: + - name: member_name + string_index: 0 + - name: member_sid + string_index: 1 + - name: target_user_name + string_index: 2 + aliases: + - username + - name: target_domain_name + string_index: 3 + aliases: + - domain + - name: target_sid + string_index: 4 + - name: subject_user_sid + string_index: 5 + - name: subject_user_name + string_index: 6 + - name: subject_domain_name + string_index: 7 + - name: subject_logon_id + string_index: 8 + - name: privilege_list + string_index: 9 + +# 4732: A member was added to a security-enabled local group +security_4732_v0: + source_name: + - Microsoft-Windows-Security-Auditing + provider_identifier: + - '{54849625-5478-4994-A5BA-3E3B0328C30D}' + event_identifier: 4732 + event_version: 0 + references: + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4732 + mapping: + - name: member_name + string_index: 0 + - name: member_sid + string_index: 1 + - name: target_user_name + string_index: 2 + aliases: + - username + - name: target_domain_name + string_index: 3 + aliases: + - domain + - name: target_sid + string_index: 4 + - name: subject_user_sid + string_index: 5 + - name: subject_user_name + string_index: 6 + - name: subject_domain_name + string_index: 7 + - name: subject_logon_id + string_index: 8 + - name: privilege_list + string_index: 9 + +# 7045: A new service was installed in the system. +system_7045_v0: + source_name: + - 'Service Control Manager' + provider_identifier: + - '{555908d1-a6d7-4695-8e1e-26931d2012f4}' + event_identifier: 7045 + event_version: 0 + references: + - https://windows-event-explorer.app.elstc.co/publisher/Service%20Control%20Manager/event/7045/v0 + - https://learn.microsoft.com/en-us/windows/security/threat-protection/auditing/event-4697 + mapping: + - name: service_name + string_index: 0 + - name: image_path + string_index: 1 + - name: service_type + string_index: 2 + - name: start_type + string_index: 3 + - name: account_name + string_index: 4 + diff --git a/timesketch/lib/analyzers/__init__.py b/timesketch/lib/analyzers/__init__.py index dd63913dbd..2f24932576 100644 --- a/timesketch/lib/analyzers/__init__.py +++ b/timesketch/lib/analyzers/__init__.py @@ -20,6 +20,7 @@ from timesketch.lib.analyzers import chain from timesketch.lib.analyzers import domain from timesketch.lib.analyzers import expert_sessionizers +from timesketch.lib.analyzers import feature_extraction_plugin from timesketch.lib.analyzers import feature_extraction from timesketch.lib.analyzers import gcp_logging from timesketch.lib.analyzers import geoip diff --git a/timesketch/lib/analyzers/feature_extraction_plugin.py b/timesketch/lib/analyzers/feature_extraction_plugin.py new file mode 100644 index 0000000000..21bb1df84f --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugin.py @@ -0,0 +1,140 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Main sketch analyzer for feature extraction.""" + +import logging +from typing import List, Optional, Dict + +from timesketch.lib.analyzers import interface +from timesketch.lib.analyzers import manager +from timesketch.lib.analyzers.feature_extraction_plugins import ( + manager as feature_manager, +) + +logger = logging.getLogger("timesketch.analyzers.feature_extraction") + + +class FeatureExtractionSketchPlugin(interface.BaseAnalyzer): + """Main sketch analyzer for feature extraction. + + This analyzer runs all the feature extractions within the feature_plugins directory. + """ + + NAME = "feature_extraction_plugin" + DISPLAY_NAME = "Feature Extractions" + DESCRIPTION = ( + "Runs all feature extraction plugins on selected timelines. " + "Currently implemented extractions: * regex features * winevt features." + ) + + DEPENDENCIES = frozenset() + + def __init__( + self, + index_name: str, + sketch_id: int, + timeline_id: Optional[int] = None, + **kwargs, + ) -> None: + """Initializes the sketch analyzer. + + Args: + index_name (str): OpenSearch index name. + sketch_id (int): TimeSketch's sketch ID. + timeline_id (int): The ID of the timeline. + """ + self._plugin_name: str = kwargs.get("plugin_name") + self._feature_name: str = kwargs.get("feature_name") + self._feature_config: Dict = kwargs.get("feature_config") + + super().__init__( + index_name=index_name, sketch_id=sketch_id, timeline_id=timeline_id + ) + + @property + def plugin_name(self) -> str: + return self._plugin_name + + @plugin_name.setter + def plugin_name(self, value: str) -> None: + self._plugin_name = value + + @property + def feature_name(self) -> str: + return self._feature_name + + @feature_name.setter + def feature_name(self, value: str) -> None: + self._feature_name = value + + @property + def feature_config(self) -> Dict: + return self._feature_config + + @feature_config.setter + def feature_config(self, value: Dict) -> None: + self._feature_config = value + + def run(self) -> str: + """Entry point for the sketch analyzer. + + Returns: + str: A summary of sketch analyzer result. + """ + # Handling unset self._plugin_name + if not self._plugin_name: + logger.debug("Feature extraction plugin name is empty") + return "Feature extraction plugin name is empty" + + try: + plugin_class = feature_manager.PluginManager.get_plugin( + self._plugin_name, self + ) + if not plugin_class: + raise ValueError( + f"Feature extraction plugin {self._plugin_name} is not " + "registered. Check if the feature is registered in " + "feature_plugins." + ) + + return plugin_class.run_plugin(self._feature_name, self._feature_config) + except ValueError as exception: + logger.error(str(exception)) + return f"Error: {str(exception)}" + + @staticmethod + def get_kwargs() -> List[Dict]: + """Get kwargs for the analyzer. + + Returns: + List[dict]: A list of dict containing plugin name, feature name and feature + config. + """ + feature_kwargs_list = [] + + plugin_classes = feature_manager.PluginManager.get_plugins(None) + for plugin in plugin_classes: + feature_list = plugin.get_kwargs() + if not feature_list: + logger.debug("No configuration for %s", plugin.NAME) + continue + + for feature_config in feature_list: + feature_config["plugin_name"] = plugin.NAME.lower() + feature_kwargs_list.append(feature_config) + + return feature_kwargs_list + + +manager.AnalysisManager.register_analyzer(FeatureExtractionSketchPlugin) diff --git a/timesketch/lib/analyzers/feature_extraction_plugin_test.py b/timesketch/lib/analyzers/feature_extraction_plugin_test.py new file mode 100644 index 0000000000..47188d5afb --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugin_test.py @@ -0,0 +1,313 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for feature extraction.""" + +import os +import re +import textwrap +from typing import List, Dict + +import yaml +import mock + +from timesketch.lib import emojis +from timesketch.lib.analyzers.feature_extraction_plugin import ( + FeatureExtractionSketchPlugin, +) +from timesketch.lib.analyzers.feature_extraction_plugins import regex_features +from timesketch.lib.analyzers.sequence_sessionizer_test import _create_eventObj +from timesketch.lib.testlib import BaseTest +from timesketch.lib.testlib import MockDataStore + + +class TestFeatureExtractionSketchPlugin(BaseTest): + """A class to test FeatureExtractionSketchPlugin class methods.""" + + EXPECTED_RESULT = textwrap.dedent( + """winevt feature extraction: [security_4624_v2] extracted 1 features.""" + ) + + def test_winevt_config(self): + """Tests Windows event log feature extraction config.""" + config_file = os.path.join("data", "winevt_features.yaml") + self.assertTrue(os.path.isfile(config_file)) + + with open(config_file, "r", encoding="utf-8") as fh: + config = yaml.safe_load(fh) + + self.assertIsInstance(config, dict) + + for key, value in config.items(): + self.assertIsInstance(key, str) + self.assertIsInstance(value, dict) + + @mock.patch("timesketch.lib.analyzers.interface.OpenSearchDataStore", MockDataStore) + def test_run(self) -> None: + """Tests run method.""" + plugin_object = FeatureExtractionSketchPlugin( + index_name="test", sketch_id=1, timeline_id=1 + ) + + plugin_object.datastore.client = mock.Mock() + datastore = plugin_object.datastore + + self._create_mock_events(datastore) + + plugin_object.plugin_name = "winevt_extraction_plugin" + plugin_object.feature_name = "security_4624_v2" + plugin_object.feature_config = self._get_feature_config( + "winevt_features.yaml", plugin_object.feature_name + ) + + result = plugin_object.run() + self.assertEqual(self.EXPECTED_RESULT, result) + + def _get_feature_config(self, file_name: str, feature_name: str) -> Dict: + """Returns the feature configuration. + + Args: + file_name (str): Feature configuration file name. + feature_name (str): Feature name in the configuration file. + + Returns: + Dict: Configuration parameter for the feature. + """ + path = os.path.join("data", file_name) + + with open(path, "r", encoding="utf-8") as fh: + config = yaml.safe_load(fh) + + for name, config in config.items(): + if name == feature_name: + return config + + return None # Return None if no match. + + def _create_mock_events(self, datastore) -> None: + """Creates mock events.""" + events: List[Dict] = [] + events.extend(self._create_mock_winevt_events()) + + # Adding new events + # Use the following example to extend the events add add mock events. + # Example: events.extend(self._create_mock_xyz_events()) + + event_id = 0 + timestamp = 1672097149681987 + + for event in events: + _create_eventObj(datastore, event_id, timestamp, event) + event_id += 1 + timestamp += 1000000 + + def _create_mock_winevt_events(self) -> List[Dict]: + """Creates mock Windows event log events. + + Returns: + List[Dict]: A list of dictionary containing Windows event logs. + """ + events = [] + + security_4624_v2_event = { + "source_name": "Microsoft-Windows-Security-Auditing", + "event_identifier": 4624, + "event_version": 2, + "strings": [ + "S-1-5-18", + "WIN-MDLVGLNGOM0$", + "WORKGROUP", + "0x00000000000003e7", + "S-1-5-18", + "SYSTEM", + "NT AUTHORITY", + "0x00000000000003e7", + "5", + "Advapi ", + "Negotiate", + "-", + "{00000000-0000-0000-0000-000000000000}", + "-", + "-", + "0", + "0x000000000000026c", + "C:\\Windows\\System32\\services.exe", + "-", + "-", + "%%1833", + "-", + "-", + "-", + "%%1843", + "0x0000000000000000", + "%%1842", + ], + } + events.append(security_4624_v2_event) + + return events + + # Copied from feature_extraction_test + + def _config_validation(self, config): + """Validate that all items of a config are valid.""" + query = config.get("query_string", config.get("query_dsl")) + self.assertIsNotNone(query) + self.assertIsInstance(query, str) + + attribute = config.get("attribute") + self.assertIsNotNone(attribute) + + store_as = config.get("store_as") + self.assertIsNotNone(store_as) + + expression = config.get("re") + self.assertIsNotNone(expression) + try: + _ = re.compile(expression) + except re.error as exception: + self.assertIsNone(exception) + + emojis_to_add = config.get("emojis") + if emojis_to_add: + self.assertIsInstance(emojis_to_add, (list, tuple)) + for emoji_name in emojis_to_add: + emoji_code = emojis.get_emoji(emoji_name) + self.assertNotEqual(emoji_code, "") + + tags = config.get("tags") + if tags: + self.assertIsInstance(tags, (list, tuple)) + + create_view = config.get("create_view") + if create_view: + self.assertIsInstance(create_view, bool) + + aggregate = config.get("aggregate") + if aggregate: + self.assertIsInstance(aggregate, bool) + + # TODO: Add tests for the feature extraction. + def test_config(self): + """Tests that the config file is valid.""" + config_file = os.path.join("data", "features.yaml") + self.assertTrue(os.path.isfile(config_file)) + + with open(config_file) as fh: + config = yaml.safe_load(fh) + + self.assertIsInstance(config, dict) + + for key, value in iter(config.items()): + self.assertIsInstance(key, str) + self.assertIsInstance(value, dict) + self._config_validation(value) + + # Mock the OpenSearch datastore. + @mock.patch("timesketch.lib.analyzers.interface.OpenSearchDataStore", MockDataStore) + def test_get_attribute_value(self): + """Test function _get_attribute_value().""" + analyzer = FeatureExtractionSketchPlugin( + index_name="test_index", sketch_id=1, timeline_id=1 + ) + plugin = regex_features.RegexFeatureExtractionPlugin(analyzer) + + current_val = ["hello"] + extracted_value = ["hello"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val=current_val, + extracted_value=extracted_value, + keep_multi=True, + merge_values=True, + type_list=True, + ) + new_val.sort() + + self.assertEqual(new_val, ["hello"]) + + current_val = ["hello"] + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, True, True, True + ) + new_val.sort() + + self.assertEqual(new_val, ["hello", "hello2", "hello3"]) + + current_val = ["hello"] + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, False, True, True + ) + new_val.sort() + + self.assertEqual(new_val, ["hello", "hello2"]) + + current_val = ["hello"] + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, False, False, True + ) + new_val.sort() + + self.assertEqual(new_val, ["hello2"]) + + current_val = ["hello"] + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, True, False, True + ) + new_val.sort() + + self.assertEqual(new_val, ["hello2", "hello3"]) + + current_val = "hello" + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, True, True, False + ) + + self.assertEqual(new_val, "hello,hello2,hello3") + + current_val = "hello" + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, False, True, False + ) + + self.assertEqual(new_val, "hello,hello2") + + current_val = "hello" + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, True, False, False + ) + + self.assertEqual(new_val, "hello2,hello3") + + current_val = "hello" + extracted_value = ["hello2", "hello3"] + # pylint: disable=protected-access + new_val = plugin._get_attribute_value( + current_val, extracted_value, False, False, False + ) + + self.assertEqual(new_val, "hello2") diff --git a/timesketch/lib/analyzers/feature_extraction_plugins/__init__.py b/timesketch/lib/analyzers/feature_extraction_plugins/__init__.py new file mode 100644 index 0000000000..7022ecfb80 --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugins/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Imports for the feature extraction plugins.""" + +from timesketch.lib.analyzers.feature_extraction_plugins import winevt_features +from timesketch.lib.analyzers.feature_extraction_plugins import regex_features diff --git a/timesketch/lib/analyzers/feature_extraction_plugins/interface.py b/timesketch/lib/analyzers/feature_extraction_plugins/interface.py new file mode 100644 index 0000000000..4dd7e3b8ca --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugins/interface.py @@ -0,0 +1,53 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an interface to feature extraction plugins.""" + +import abc +from typing import Optional + + +class BaseFeatureExtractionPlugin(object): + """A base plugin for feature extraction. + + This class serves as an interface for feature extraction plugins. + """ + + NAME = "base_feature_extraction" + DISPLAY_NAME = "Base Feature Extraction" + DESCRIPTION = "" + + def __init__(self, analyzer_object: Optional["FeatureSketchPlugin"] = None) -> None: + """Initializes the base plugin. + + Args: + analyzer_object (FeatureSketchPlugin): An object of class + FeatureSketchPlugin. + """ + super().__init__() + self.analyzer_object = analyzer_object + + @abc.abstractmethod + def run_plugin(self, name: str, config: dict) -> str: + """Main entry point to feature extraction plugins. + + This method should be implemented by subclasses to perform feature extraction. + + Args: + name (str): The name of the feature to extract. + config (dict): Configuration parameters for the feature extraction. + + Returns: + str: A summary of the feature extraction results. + """ + raise NotImplementedError("Subclass must implement the run_plugin() method") diff --git a/timesketch/lib/analyzers/feature_extraction_plugins/manager.py b/timesketch/lib/analyzers/feature_extraction_plugins/manager.py new file mode 100644 index 0000000000..4933b33d65 --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugins/manager.py @@ -0,0 +1,114 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The feature extraction plugins manager object.""" + +import logging +from typing import List, Type + + +class PluginManager(object): + """ "A class that implements the plugins manager.""" + + _plugin_classes: dict = {} + logger = logging.getLogger(__name__) + + @classmethod + def register_plugin(cls, plugin_class: Type["BaseFeatureExtractionPlugin"]) -> None: + """Registers a plugin class. + + Args: + plugin_class (Type[BaseFeatureExtractionPlugin]): A class object of a + plugin. + + Raises: + KeyError: If the plugin_class is already registered. + """ + plugin_name = plugin_class.NAME.lower() + if plugin_name in cls._plugin_classes: + raise KeyError(f"Plugin class {plugin_class.NAME} is already registered.") + + cls.logger.info("Registering plugin class %s", plugin_class.NAME) + cls._plugin_classes[plugin_name] = plugin_class + + @classmethod + def register_plugins( + cls, plugin_classes: List[Type["BaseFeatureExtractionPlugin"]] + ) -> None: + """Registers multiple plugin classes. + + Args: + plugin_classes (List[Type[BaseFeatureExtractionPlugin]]): A list of plugin + class objects. + + Raises: + KeyError: If plugin classes are already registered. + """ + for plugin_class in plugin_classes: + cls.register_plugin(plugin_class=plugin_class) + + @classmethod + def deregister_plugin( + cls, plugin_class: Type["BaseFeatureExtractionPlugin"] + ) -> None: + """Deregisters a plugin class. + + Args: + plugin_class (Type[BaseFeatureExtractionPlugin]): A plugin class to be + deregistered. + + Raises: + KeyError: If the plugin class is not registered. + """ + plugin_name = plugin_class.NAME.lower() + if plugin_name not in cls._plugin_classes: + raise KeyError(f"Plugin class {plugin_class.NAME} is not registered.") + + cls.logger.info("Deregistering plugin class: %s", plugin_class.NAME) + del cls._plugin_classes[plugin_name] + + @classmethod + def get_plugin( + cls, plugin_name: str, analyzer_object: "FeatureSketchPlugin" + ) -> "BaseFeatureExtractionPlugin": + """Returns plugin class. + + Args: + plugin_name (str): The name of the plugin to retrieve. + analyzer_object (FeatureSketchPlugin): An instance of FeatureSketchPlugin. + + Returns: + BaseFeatureExtractionPlugin: The plugin class object. + """ + for plugin_class in cls._plugin_classes.values(): + if plugin_class.NAME.lower() == plugin_name: + return plugin_class(analyzer_object) + + return None # Return None if plugin not found + + @classmethod + def get_plugins( + cls, analyzer_object: "FeatureSketchPlugin" + ) -> List["BaseFeatureExtractionPlugin"]: + """Retrieves plugins classes. + + Args: + analayzer_object (FeatureSketchPlugin): An instance of FeatureSketchPlugin. + + Returns: + List[BaseFeatureExtractionPlugin]: A list of plugin class objects. + """ + return [ + plugin_class(analyzer_object) + for plugin_class in cls._plugin_classes.values() + ] diff --git a/timesketch/lib/analyzers/feature_extraction_plugins/regex_features.py b/timesketch/lib/analyzers/feature_extraction_plugins/regex_features.py new file mode 100644 index 0000000000..b383d1591c --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugins/regex_features.py @@ -0,0 +1,355 @@ +"""Sketch analyzer plugin for feature extraction.""" +from __future__ import unicode_literals + +import logging + +import six + +from timesketch.lib import emojis +from timesketch.lib.analyzers import interface as base_interface +from timesketch.lib.analyzers.feature_extraction_plugins import interface +from timesketch.lib.analyzers.feature_extraction_plugins import manager +from timesketch.lib.analyzers import utils + + +logger = logging.getLogger("timesketch.analyzers.feature_extraction.regex") +RE_FLAGS = [ + "re.ASCII", + "re.IGNORECASE", + "re.LOCALE", + "re.MULTILINE", + "re.DOTALL", + "re.VERBOSE", +] + + +class RegexFeatureExtractionPlugin(interface.BaseFeatureExtractionPlugin): + """Analyzer for FeatureExtraction.""" + + NAME = "regex_extraction_plugin" + DISPLAY_NAME = "Regex feature extractor" + DESCRIPTION = ( + "Extract features from event based on stored regex definitions in the " + "'regex_features.yaml' config." + ) + + FORM_FIELDS = [ + { + "name": "query_string", + "type": "ts-dynamic-form-text-input", + "label": "The filter query to narrow down the result set", + "placeholder": "Query", + "default_value": "", + }, + { + "name": "query_dsl", + "type": "ts-dynamic-form-text-input", + "label": "The filter query DSL to narrow down the result", + "placeholder": "Query DSL", + "default_value": "", + }, + { + "name": "attribute", + "type": "ts-dynamic-form-text-input", + "label": "Name of the field to apply regular expression against", + "placeholder": "Field Name", + "default_value": "", + }, + { + "name": "store_as", + "type": "ts-dynamic-form-text-input", + "label": "Name of the field to store the extracted results in", + "placeholder": "Store results as field name", + "default_value": "", + }, + { + "name": "re", + "type": "ts-dynamic-form-text-input", + "label": "The regular expression to extract data from field", + "placeholder": "Regular Expression", + "default_value": "", + }, + { + "name": "re_flags", + "type": "ts-dynamic-form-multi-select-input", + "label": "List of flags to pass to the regular expression", + "placeholder": "Regular Expression flags", + "default_value": [], + "options": RE_FLAGS, + "optional": True, + }, + { + "name": "emojis", + "type": "ts-dynamic-form-multi-select-input", + "label": "List of emojis to add to events with matches", + "placeholder": "Emojis to add to events", + "default_value": [], + "options": [x.code for x in emojis.EMOJI_MAP.values()], + "options-label": [ + "{0:s} - {1:s}".format(x, y.help) for x, y in emojis.EMOJI_MAP.items() + ], + "optional": True, + }, + { + "name": "tags", + "type": "ts-dynamic-form-text-input", + "label": "Tag to add to events with matches", + "placeholder": "Tag to add to events", + "default_value": "", + "optional": True, + }, + { + "name": "create_view", + "type": "ts-dynamic-form-boolean", + "label": "Should a view be created if there is a match", + "placeholder": "Create a view", + "default_value": False, + "optional": True, + }, + { + "name": "store_type_list", + "type": "ts-dynamic-form-boolean", + "label": "Store extracted result in type List", + "placeholder": "Store results as field type list", + "default_value": False, + "optional": True, + }, + { + "name": "overwrite_store_as", + "type": "ts-dynamic-form-boolean", + "label": "Overwrite the field to store if already exist", + "placeholder": "Overwrite the field to store", + "default_value": True, + "optional": True, + }, + { + "name": "overwrite_and_merge_store_as", + "type": "ts-dynamic-form-boolean", + "label": "Overwrite the field to store and merge value if exist", + "placeholder": "Overwrite the field to store and merge value", + "default_value": False, + "optional": True, + }, + { + "name": "keep_multimatch", + "type": "ts-dynamic-form-boolean", + "label": "Keep multi match datas", + "placeholder": "Keep multi match", + "default_value": False, + "optional": True, + }, + { + "name": "aggregate", + "type": "ts-dynamic-form-boolean", + "label": "Should results be aggregated if there is a match", + "placeholder": "Aggregate results", + "default_value": False, + "optional": True, + }, + ] + + def run_plugin(self, name: str, config: dict) -> str: + """Entry point for the analyzer. + + Args: + name (str): Feature extraction name. + config (dict): A dict that contains the configuration for the feature + extraction. + + Returns: + str: String with summary of the analyzer result. + """ + return self.extract_feature(name, config) + + @staticmethod + def _get_attribute_value( + current_val, extracted_value, keep_multi, merge_values, type_list + ): + """Returns the attribute value as it should be stored. + + Args: + current_val: current value of store_as. + extracted_value: values matched from regexp (type list). + keep_multi: choice if you keep all match from regex (type boolean). + merge_values: choice if you merge value from extracted + and current (type boolean). + type_list: choice if you store values in list type(type boolean). + + Returns: + Value to store + """ + if not current_val: + merge_values = False + if len(extracted_value) == 1: + keep_multi = False + if type_list: + if merge_values and keep_multi: + return sorted(list(set(current_val) | set(extracted_value))) + if merge_values: + if extracted_value[0] not in current_val: + current_val.append(extracted_value[0]) + return sorted(current_val) + if keep_multi: + return sorted(extracted_value) + return [extracted_value[0]] + if merge_values and keep_multi: + list_cur = current_val.split(",") + merge_list = sorted(list(set(list_cur) | set(extracted_value))) + return ",".join(merge_list) + if merge_values: + if extracted_value[0] in current_val: + return current_val + return f"{current_val},{extracted_value[0]}" + if keep_multi: + return ",".join(extracted_value) + return extracted_value[0] + + def extract_feature(self, name, config): + """Extract features from events. + + Args: + name: String with the name describing the feature to be extracted. + config: A dict that contains the configuration for the feature + extraction. See data/features.yaml for fields and further + documentation of what needs to be defined. + + Returns: + String with summary of the analyzer result. + """ + query = config.get("query_string") + query_dsl = config.get("query_dsl") + attribute = config.get("attribute") + store_type_list = config.get("store_type_list", False) + keep_multimatch = config.get("keep_multimatch", False) + overwrite_store_as = config.get("overwrite_store_as", True) + overwrite_and_merge_store_as = config.get("overwrite_and_merge_store_as", False) + + if not attribute: + logger.warning("No attribute defined.") + return "" + + store_as = config.get("store_as") + if not store_as: + logger.warning("No attribute defined to store results in.") + return "" + + tags = config.get("tags", []) + + expression_string = config.get("re") + if not expression_string: + logger.warning("No regular expression defined.") + return "" + + expression = utils.compile_regular_expression( + expression_string=expression_string, expression_flags=config.get("re_flags") + ) + + emoji_names = config.get("emojis", []) + emojis_to_add = [emojis.get_emoji(x) for x in emoji_names] + + return_fields = [attribute, store_as] + + events = self.analyzer_object.event_stream( + query_string=query, query_dsl=query_dsl, return_fields=return_fields + ) + + event_counter = 0 + for event in events: + attribute_field = event.source.get(attribute) + if isinstance(attribute_field, six.text_type): + attribute_value = attribute_field + elif isinstance(attribute_field, (list, tuple)): + attribute_value = ",".join(attribute_field) + elif isinstance(attribute_field, (int, float)): + attribute_value = attribute_field + else: + attribute_value = None + + if not attribute_value: + continue + + result = expression.findall(attribute_value) + if not result: + continue + result = list(set(result)) + + event_counter += 1 + store_as_current_val = event.source.get(store_as) + if store_as_current_val and not overwrite_store_as: + continue + if isinstance(store_as_current_val, six.text_type): + store_type_list = False + elif isinstance(store_as_current_val, (list, tuple)): + store_type_list = True + new_value = self._get_attribute_value( + store_as_current_val, + result, + keep_multimatch, + overwrite_and_merge_store_as, + store_type_list, + ) + if not new_value: + continue + event.add_attributes({store_as: new_value}) + event.add_emojis(emojis_to_add) + event.add_tags(tags) + + # Commit the event to the datastore. + event.commit() + + aggregate_results = config.get("aggregate", False) + create_view = config.get("create_view", False) + + # If aggregation is turned on, we automatically create an aggregation. + if aggregate_results: + create_view = True + + if create_view and event_counter: + view = self.sketch.add_view( + name, self.NAME, query_string=query, query_dsl=query_dsl + ) + + if aggregate_results: + params = { + "field": store_as, + "limit": 20, + "index": [self.timeline_id], + } + self.sketch.add_aggregation( + name="Top 20 for: {0:s} [{1:s}]".format(store_as, name), + agg_name="field_bucket", + agg_params=params, + description="Created by the feature extraction analyzer", + view_id=view.id, + chart_type="hbarchart", + ) + + return "Feature extraction [{0:s}] extracted {1:d} features.".format( + name, event_counter + ) + + @staticmethod + def get_kwargs(): + """Get kwargs for the analyzer. + + Returns: + List of features to search for. + """ + features_config = base_interface.get_yaml_config("regex_features.yaml") + if not features_config: + # Backwards compatibility with old config name: + features_config = base_interface.get_yaml_config("features.yaml") + if not features_config: + return ( + "Unable to parse the 'regex_features.yaml' or " + "'features.yaml' config file!" + ) + + features_kwargs = [ + {"feature_name": feature, "feature_config": config} + for feature, config in features_config.items() + ] + return features_kwargs + + +manager.PluginManager.register_plugin(RegexFeatureExtractionPlugin) diff --git a/timesketch/lib/analyzers/feature_extraction_plugins/winevt_features.py b/timesketch/lib/analyzers/feature_extraction_plugins/winevt_features.py new file mode 100644 index 0000000000..d90b1413a0 --- /dev/null +++ b/timesketch/lib/analyzers/feature_extraction_plugins/winevt_features.py @@ -0,0 +1,230 @@ +# Copyright 2023 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the plugin for Windows event logs feature extraction.""" + +import logging +from typing import List, Dict + +from timesketch.lib.analyzers import interface as base_interface +from timesketch.lib.analyzers.feature_extraction_plugins import interface +from timesketch.lib.analyzers.feature_extraction_plugins import manager + +logger = logging.getLogger("timesketch.analyzers.feature_extraction.winevt") + + +class WindowsEventFeatureExtractionPlugin(interface.BaseFeatureExtractionPlugin): + """A plugin for Windows event log feature extraction.""" + + NAME = "winevt_extraction_plugin" + DISPLAY_NAME = "Windows Event Log Feature Extraction" + DESCRIPTION = ( + "This plugin extracts Windows event logs attributes from the plaso " + "output attribute `strings` based on the 'winect_features.yaml' config." + ) + + EVENT_FIELDS = ["strings"] + + def run_plugin(self, name: str, config: dict) -> str: + """Extracts features from events. + + Args: + name (str): Feature extraction name. + config (dict): A dict that contains the configuration for the feature + extraction. + + Returns: + str: Returns summary of the feature extraction. + """ + return self.extract_features(name, config) + + def validate_feature_config(self, name: str, config: dict) -> None: + """Validates the name and configuration. + If any of the required properties does not exist or is of the wrong + type, a ValueError is raised and the feature definition is not extracted. + This does not affect other feature extractions since each feature is + executed in their own celery task. + + Args: + name (str): Name of the feature. + config (dict): Configuration parameter. + + Raises: + ValueError: Raises ValueError for value type errors. + """ + if not name: + raise ValueError( + "Feature name is empty, please check your 'winevt_features.yaml' " + "config!" + ) + + if not config: + raise ValueError( + f"Feature configuration for [{name}] value is empty, please check your " + "'winevt_features.yaml' config!" + ) + + if not ( + isinstance(config.get("source_name"), list) + or isinstance(config.get("provider_identifier"), list) + ): + raise ValueError( + f"[{name}] Either 'source_name' or 'provider_identifier' are required " + "and have to be a list!" + ) + + if not isinstance(config.get("event_identifier"), int): + raise ValueError( + f"[{name}] Missing 'event_identifier' or it is not an integer!" + ) + + if not isinstance(config.get("event_version"), int): + raise ValueError( + f"[{name}] Missing 'event_version' or it is not an integer!" + ) + + if not isinstance(config.get("mapping"), list): + raise ValueError("f[{name}] Missing 'mapping' or it is not a list!") + + if not len(config.get("mapping")) > 0: + raise ValueError(f"[{name}] 'mapping' list cannot be empty!") + + for entry in config.get("mapping"): + if not isinstance(entry.get("name"), str): + raise ValueError( + f"[{name}] 'mapping.name' is required and must be a string!" + ) + + if not isinstance(entry.get("string_index"), int): + raise ValueError( + f"[{name}] 'mapping.string_index' is required and must be an" + " integer!" + ) + + def extract_features(self, name: str, config: dict) -> str: + """Extracts features from events. + + Args: + name (str): Features extraction name. + config (dict): A dict that contains the configuration fo the + feature extraction. + + Returns: + str: Returns summary of the feature extraction for `feature_name`. + """ + self.validate_feature_config(name, config) + + source_name = config.get("source_name", [""])[0] + provider_identifier = config.get("provider_identifier", [""])[0] + + event_identifier = int(config.get("event_identifier")) + event_version = int(config.get("event_version")) + + mappings = config.get("mapping") + + # Building search query + query = "" + if source_name and provider_identifier: + query = ( + f'source_name:"{source_name}" AND provider_identifier:' + f'"{provider_identifier}"' + ) + elif source_name: + query = f'source_name:"{source_name}"' + elif provider_identifier: + query = f'provider_identifier:"{provider_identifier}"' + + query = ( + f"{query} AND event_identifier: {event_identifier}" + f" AND event_version: {event_version}" + ) + + events = self.analyzer_object.event_stream( + query_string=query, return_fields=self.EVENT_FIELDS + ) + event_counter = 0 + + for event in events: + attributes = {} + strings = event.source.get("strings", None) + + if not strings or not isinstance(strings, list): + logger.debug( + "[%s] Missing or invalid strings field in the event. Skipping the" + " event %s.", + name, + event.event_id, + ) + continue + + for mapping in mappings: + attribute_name = mapping.get("name") + string_index = int(mapping.get("string_index")) + + attribute_aliases = mapping.get("aliases", []) + + try: + attribute_value = strings[string_index] + except IndexError: + logger.warning( + "[%s] The index '%d' for field '%s' does not exist in strings. " + "Skipping the event '%s'", + name, + string_index, + attribute_name, + event.event_id, + ) + event.add_comment( + f"Analyzer[{self.NAME}]: [{name}] The index '{string_index}' " + f"for field '{attribute_name}' does not exist in strings!" + ) + continue + + attributes[attribute_name] = attribute_value + if attribute_aliases: + for alias in attribute_aliases: + attributes[alias] = attribute_value + + event.add_attributes(attributes) + event.commit() + event_counter += 1 + + logger.debug("%d features extracted using feature %s", event_counter, name) + return ( + f"winevt feature extraction: [{name}] extracted {event_counter} features." + ) + + @staticmethod + def get_kwargs() -> List[Dict]: + """Get keywords arguments. + + Returns: + List[dict]: A list of dict containing features name and configuration. + """ + # config_file winevt.yaml is located within the timesketch/data directory. + config_file = "winevt_features.yaml" + + features_config = base_interface.get_yaml_config(config_file) + if not features_config: + logger.warning("No feature configuration data in %s", config_file) + return [] + + features_kwargs = [ + {"feature_name": feature, "feature_config": config} + for feature, config in features_config.items() + ] + + return features_kwargs + + +manager.PluginManager.register_plugin(WindowsEventFeatureExtractionPlugin)