From 0d3256415956c45abc8f0eb4440b684bdf4db4f2 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 10:39:20 +0000 Subject: [PATCH 1/7] Add detection-rules submodule --- .gitmodules | 3 +++ detection-rules | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 detection-rules diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..7f0b5265 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "detection-rules"] + path = detection-rules + url = https://github.com/elastic/detection-rules diff --git a/detection-rules b/detection-rules new file mode 160000 index 00000000..c3ca01eb --- /dev/null +++ b/detection-rules @@ -0,0 +1 @@ +Subproject commit c3ca01ebcc40ed2806d236177e6657238d2c18a1 From 9e6453176d6cd681e841a0f83cc6b06ae178d589 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 10:47:15 +0000 Subject: [PATCH 2/7] Borrow code from detection-rules repository --- eland/dataframe.py | 5 +++++ kql | 1 + requirements.txt | 20 ++++++++++++++++++++ setup.py | 2 +- 4 files changed, 27 insertions(+), 1 deletion(-) create mode 120000 kql diff --git a/eland/dataframe.py b/eland/dataframe.py index 045166ba..af1966de 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -41,6 +41,8 @@ from eland.series import Series from eland.utils import is_valid_attr_name +from kql import to_dsl + if TYPE_CHECKING: from elasticsearch import Elasticsearch @@ -789,6 +791,9 @@ def es_query(self, query) -> "DataFrame": if tuple(query) == ("query",): query = query["query"] return DataFrame(_query_compiler=self._query_compiler.es_query(query)) + + def kql_query(self, query): + return self.es_query(to_dsl(query)) def _index_summary(self): # Print index summary e.g. diff --git a/kql b/kql new file mode 120000 index 00000000..9f6d7878 --- /dev/null +++ b/kql @@ -0,0 +1 @@ +./detection-rules/kql/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1f885009..042df9fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,23 @@ elasticsearch>=8.3,<9 pandas>=1.5,<2 matplotlib>=3.6 numpy>=1.2.0,<2 +Click~=8.1.0 +eql==0.9.19 +jsl==0.2.4 +jsonschema>=3.2.0 +marko==2.0.1 +marshmallow-dataclass[union]~=8.5.12 +marshmallow-jsonschema~=0.12.0 +marshmallow-union~=0.1.15 +marshmallow~=3.13.0 +pywin32 ; platform_system=='Windows' +pytoml==0.1.21 +PyYAML~=5.3 ; python_version<='3.9' +PyYAML~=6.0.1 ; python_version>='3.10' +requests~=2.27 +toml==0.10.0 +typing-inspect==0.8.0 +typing-extensions==4.5.0 ; python_version<='3.11' +typing-extensions==4.8.0 ; python_version>='3.12' +XlsxWriter~=1.3.6 +semver==3.0.0-dev.4" \ No newline at end of file diff --git a/setup.py b/setup.py index 4a3334e0..63072c83 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ license="Apache-2.0", classifiers=CLASSIFIERS, keywords="elastic eland pandas python", - packages=find_packages(include=["eland", "eland.*"]), + packages=find_packages(include=["eland", "eland.*", "kql"]), install_requires=[ "elasticsearch>=8.3,<9", "pandas>=1.5,<2", From 2d1c72ef33084147c09230ca8622f96caa9fb6fe Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 14:22:18 +0000 Subject: [PATCH 3/7] Fix packaging --- setup.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 63072c83..a068778e 100644 --- a/setup.py +++ b/setup.py @@ -86,12 +86,32 @@ "matplotlib>=3.6", "numpy>=1.2.0,<2", "packaging", + "Click~=8.1.0", + "eql==0.9.19", + "jsl==0.2.4", + "jsonschema>=3.2.0", + "marko==2.0.1", + "marshmallow-dataclass[union]~=8.5.12", + "marshmallow-jsonschema~=0.12.0", + "marshmallow-union~=0.1.15", + "marshmallow~=3.13.0", + "pywin32 ; platform_system=='Windows'", + "pytoml==0.1.21", + "PyYAML~=5.3 ; python_version<='3.9'", + "PyYAML~=6.0.1 ; python_version>='3.10'", + "requests~=2.27", + "toml==0.10.0", + "typing-inspect==0.8.0", + "typing-extensions==4.5.0 ; python_version<='3.11'", + "typing-extensions==4.8.0 ; python_version>='3.12'", + "XlsxWriter~=1.3.6", + "semver==3.0.0-dev.4" ], entry_points={ "console_scripts": "eland_import_hub_model=eland.cli.eland_import_hub_model:main" }, python_requires=">=3.8", - package_data={"eland": ["py.typed"]}, + package_data={"eland": ["py.typed"], "kql": ["*.g"]}, include_package_data=True, zip_safe=False, extras_require=extras, From fd686c0272d14918f7f3d8e73f5dff040dd5ac3f Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 15:32:37 +0000 Subject: [PATCH 4/7] Write a docstring for kql_query --- eland/dataframe.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/eland/dataframe.py b/eland/dataframe.py index af1966de..264957e5 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -793,6 +793,30 @@ def es_query(self, query) -> "DataFrame": return DataFrame(_query_compiler=self._query_compiler.es_query(query)) def kql_query(self, query): + """Applies a Kibana Query Language query (KQL / Kuery) to the DataFrame. + The query is converted to Elasticsearch DSL. + Note that KQL can only be used to filter data. + + Parameters + ---------- + query: + KQL query as a string + + Returns + ------- + eland.DataFrame: + eland DataFrame with the query applied + + Examples + -------- + + Apply a filtering to the flights dataset, flights from Milan that take longer than 12 hours. + + >>> columns = ["OriginCityName", "DestCityName", "FlightTimeHour", "AvgTicketPrice"] + >>> df = ed.DataFrame('http://localhost:9200', 'flights', columns=columns) + >>> df.kql_query('OriginCityName:Milan and FlightTimeHour > 12').head(10) + # TODO output + """ return self.es_query(to_dsl(query)) def _index_summary(self): From 8feb09fe6eebb57f351fdba9552f889e40f48d37 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 21:01:53 +0100 Subject: [PATCH 5/7] Add example output --- eland/dataframe.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/eland/dataframe.py b/eland/dataframe.py index 264957e5..ec8586ea 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -815,7 +815,19 @@ def kql_query(self, query): >>> columns = ["OriginCityName", "DestCityName", "FlightTimeHour", "AvgTicketPrice"] >>> df = ed.DataFrame('http://localhost:9200', 'flights', columns=columns) >>> df.kql_query('OriginCityName:Milan and FlightTimeHour > 12').head(10) - # TODO output + OriginCityName DestCityName FlightTimeHour AvgTicketPrice + 468 Milan Chitose / Tomakomai 15.499956 646.162588 + 471 Milan Tokyo 14.805640 933.586896 + 726 Milan Sydney 14.552814 574.534422 + 886 Milan Buenos Aires 16.938412 748.639741 + 1097 Milan Sydney 25.469282 913.483049 + 1708 Milan Buenos Aires 17.718905 975.483549 + 1865 Milan Sydney 20.014910 956.411751 + 2410 Milan Melbourne 15.988900 650.720199 + 2463 Milan Sydney 13.166832 344.815508 + 2487 Milan Buenos Aires 14.916122 684.506066 + + [10 rows x 4 columns] """ return self.es_query(to_dsl(query)) From 4e55612af2f76dabd3096bc03e1668adba45d47c Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 20:18:20 +0000 Subject: [PATCH 6/7] Reformat --- eland/dataframe.py | 5 ++--- setup.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/eland/dataframe.py b/eland/dataframe.py index ec8586ea..e22c5156 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -23,6 +23,7 @@ import numpy as np import pandas as pd # type: ignore +from kql import to_dsl from pandas.core.common import apply_if_callable, is_bool_indexer # type: ignore from pandas.core.computation.eval import eval # type: ignore from pandas.core.dtypes.common import is_list_like # type: ignore @@ -41,8 +42,6 @@ from eland.series import Series from eland.utils import is_valid_attr_name -from kql import to_dsl - if TYPE_CHECKING: from elasticsearch import Elasticsearch @@ -791,7 +790,7 @@ def es_query(self, query) -> "DataFrame": if tuple(query) == ("query",): query = query["query"] return DataFrame(_query_compiler=self._query_compiler.es_query(query)) - + def kql_query(self, query): """Applies a Kibana Query Language query (KQL / Kuery) to the DataFrame. The query is converted to Elasticsearch DSL. diff --git a/setup.py b/setup.py index a068778e..ddf96a7a 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ "typing-extensions==4.5.0 ; python_version<='3.11'", "typing-extensions==4.8.0 ; python_version>='3.12'", "XlsxWriter~=1.3.6", - "semver==3.0.0-dev.4" + "semver==3.0.0-dev.4", ], entry_points={ "console_scripts": "eland_import_hub_model=eland.cli.eland_import_hub_model:main" From ab055cc13b99f3e75ab4a25f3f98cd0d63de21ff Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 13 Feb 2024 21:14:57 +0000 Subject: [PATCH 7/7] Add scaffolding for tests --- eland/dataframe.py | 26 ++++++++--------- noxfile.py | 1 + requirements-dev.txt | 4 ++- tests/dataframe/test_kql_query_pytest.py | 36 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 14 deletions(-) create mode 100644 tests/dataframe/test_kql_query_pytest.py diff --git a/eland/dataframe.py b/eland/dataframe.py index e22c5156..cc255af9 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -814,19 +814,19 @@ def kql_query(self, query): >>> columns = ["OriginCityName", "DestCityName", "FlightTimeHour", "AvgTicketPrice"] >>> df = ed.DataFrame('http://localhost:9200', 'flights', columns=columns) >>> df.kql_query('OriginCityName:Milan and FlightTimeHour > 12').head(10) - OriginCityName DestCityName FlightTimeHour AvgTicketPrice - 468 Milan Chitose / Tomakomai 15.499956 646.162588 - 471 Milan Tokyo 14.805640 933.586896 - 726 Milan Sydney 14.552814 574.534422 - 886 Milan Buenos Aires 16.938412 748.639741 - 1097 Milan Sydney 25.469282 913.483049 - 1708 Milan Buenos Aires 17.718905 975.483549 - 1865 Milan Sydney 20.014910 956.411751 - 2410 Milan Melbourne 15.988900 650.720199 - 2463 Milan Sydney 13.166832 344.815508 - 2487 Milan Buenos Aires 14.916122 684.506066 - - [10 rows x 4 columns] + OriginCityName DestCityName FlightTimeHour AvgTicketPrice + 468 Milan Chitose / Tomakomai 15.499956 646.162588 + 471 Milan Tokyo 14.805640 933.586896 + 726 Milan Sydney 14.552814 574.534422 + 886 Milan Buenos Aires 16.938412 748.639741 + 1097 Milan Sydney 25.469282 913.483049 + 1708 Milan Buenos Aires 17.718905 975.483549 + 1865 Milan Sydney 20.014910 956.411751 + 2410 Milan Melbourne 15.988900 650.720199 + 2463 Milan Sydney 13.166832 344.815508 + 2487 Milan Buenos Aires 14.916122 684.506066 + + [10 rows x 4 columns] """ return self.es_query(to_dsl(query)) diff --git a/noxfile.py b/noxfile.py index 19698d3a..be6f8865 100644 --- a/noxfile.py +++ b/noxfile.py @@ -72,6 +72,7 @@ def lint(session): # https://numpy.org/devdocs/reference/typing.html#mypy-plugin session.install("black", "flake8", "mypy", "isort", "numpy") session.install("--pre", "elasticsearch>=8.3,<9") + session.install("git+https://github.com/elastic/detection-rules") session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) session.run("black", "--check", "--target-version=py38", *SOURCE_FILES) session.run("isort", "--check", "--profile=black", *SOURCE_FILES) diff --git a/requirements-dev.txt b/requirements-dev.txt index 9a1308f0..65495f61 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -38,4 +38,6 @@ nox numpydoc>=0.9.0 mypy build -twine \ No newline at end of file +twine + +git+https://github.com/elastic/detection-rules diff --git a/tests/dataframe/test_kql_query_pytest.py b/tests/dataframe/test_kql_query_pytest.py new file mode 100644 index 00000000..d26f53c4 --- /dev/null +++ b/tests/dataframe/test_kql_query_pytest.py @@ -0,0 +1,36 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# File called _pytest for PyCharm compatability + +from tests.common import TestData, assert_eland_frame_equal + + +class TestDataKQLQuery(TestData): + def test_flights_match_query(self): + ed_flights = self.ed_flights() + + left = ed_flights.kql_query("OriginCityName:Rome")[ + ed_flights["Carrier"] == "Kibana Airlines" + ] + + right = ed_flights[ed_flights["Carrier"] == "Kibana Airlines"].kql_query( + "OriginCityName:Rome" + ) + + assert len(left) > 0 + assert_eland_frame_equal(left, right)