diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..7f0b5265 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "detection-rules"] + path = detection-rules + url = https://github.com/elastic/detection-rules diff --git a/detection-rules b/detection-rules new file mode 160000 index 00000000..c3ca01eb --- /dev/null +++ b/detection-rules @@ -0,0 +1 @@ +Subproject commit c3ca01ebcc40ed2806d236177e6657238d2c18a1 diff --git a/eland/dataframe.py b/eland/dataframe.py index 7c5f9540..bceda78f 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -23,6 +23,7 @@ import numpy as np import pandas as pd # type: ignore +from kql import to_dsl from pandas.core.common import apply_if_callable, is_bool_indexer # type: ignore from pandas.core.computation.eval import eval # type: ignore from pandas.core.dtypes.common import is_list_like # type: ignore @@ -790,6 +791,45 @@ def es_query(self, query) -> "DataFrame": query = query["query"] return DataFrame(_query_compiler=self._query_compiler.es_query(query)) + def kql_query(self, query): + """Applies a Kibana Query Language query (KQL / Kuery) to the DataFrame. + The query is converted to Elasticsearch DSL. + Note that KQL can only be used to filter data. + + Parameters + ---------- + query: + KQL query as a string + + Returns + ------- + eland.DataFrame: + eland DataFrame with the query applied + + Examples + -------- + + Apply a filtering to the flights dataset, flights from Milan that take longer than 12 hours. + + >>> columns = ["OriginCityName", "DestCityName", "FlightTimeHour", "AvgTicketPrice"] + >>> df = ed.DataFrame('http://localhost:9200', 'flights', columns=columns) + >>> df.kql_query('OriginCityName:Milan and FlightTimeHour > 12').head(10) + OriginCityName DestCityName FlightTimeHour AvgTicketPrice + 468 Milan Chitose / Tomakomai 15.499956 646.162588 + 471 Milan Tokyo 14.805640 933.586896 + 726 Milan Sydney 14.552814 574.534422 + 886 Milan Buenos Aires 16.938412 748.639741 + 1097 Milan Sydney 25.469282 913.483049 + 1708 Milan Buenos Aires 17.718905 975.483549 + 1865 Milan Sydney 20.014910 956.411751 + 2410 Milan Melbourne 15.988900 650.720199 + 2463 Milan Sydney 13.166832 344.815508 + 2487 Milan Buenos Aires 14.916122 684.506066 + + [10 rows x 4 columns] + """ + return self.es_query(to_dsl(query)) + def _index_summary(self): # Print index summary e.g. # Index: 103 entries, 0 to 102 diff --git a/kql b/kql new file mode 120000 index 00000000..9f6d7878 --- /dev/null +++ b/kql @@ -0,0 +1 @@ +./detection-rules/kql/ \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index e8a57191..7552cf2a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -72,6 +72,7 @@ def lint(session): # https://numpy.org/devdocs/reference/typing.html#mypy-plugin session.install("black", "flake8", "mypy", "isort", "numpy") session.install(".") + session.install("git+https://github.com/elastic/detection-rules") session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) session.run("black", "--check", "--target-version=py38", *SOURCE_FILES) session.run("isort", "--check", "--profile=black", *SOURCE_FILES) diff --git a/requirements-dev.txt b/requirements-dev.txt index e4af9c28..fd405b11 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,4 +19,6 @@ nox numpydoc>=0.9.0 mypy build -twine \ No newline at end of file +twine + +git+https://github.com/elastic/detection-rules diff --git a/setup.py b/setup.py index 1befe7d0..e1aba047 100644 --- a/setup.py +++ b/setup.py @@ -84,19 +84,39 @@ license="Apache-2.0", classifiers=CLASSIFIERS, keywords="elastic eland pandas python", - packages=find_packages(include=["eland", "eland.*"]), + packages=find_packages(include=["eland", "eland.*", "kql"]), install_requires=[ "elasticsearch>=8.3,<9", "pandas>=1.5,<2", "matplotlib>=3.6", "numpy>=1.2.0,<2", "packaging", + "Click~=8.1.0", + "eql==0.9.19", + "jsl==0.2.4", + "jsonschema>=3.2.0", + "marko==2.0.1", + "marshmallow-dataclass[union]~=8.5.12", + "marshmallow-jsonschema~=0.12.0", + "marshmallow-union~=0.1.15", + "marshmallow~=3.13.0", + "pywin32 ; platform_system=='Windows'", + "pytoml==0.1.21", + "PyYAML~=5.3 ; python_version<='3.9'", + "PyYAML~=6.0.1 ; python_version>='3.10'", + "requests~=2.27", + "toml==0.10.0", + "typing-inspect==0.8.0", + "typing-extensions==4.5.0 ; python_version<='3.11'", + "typing-extensions==4.8.0 ; python_version>='3.12'", + "XlsxWriter~=1.3.6", + "semver==3.0.0-dev.4", ], entry_points={ "console_scripts": "eland_import_hub_model=eland.cli.eland_import_hub_model:main" }, python_requires=">=3.8,<3.12", - package_data={"eland": ["py.typed"]}, + package_data={"eland": ["py.typed"], "kql": ["*.g"]}, include_package_data=True, zip_safe=False, extras_require=extras, diff --git a/tests/dataframe/test_kql_query_pytest.py b/tests/dataframe/test_kql_query_pytest.py new file mode 100644 index 00000000..d26f53c4 --- /dev/null +++ b/tests/dataframe/test_kql_query_pytest.py @@ -0,0 +1,36 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# File called _pytest for PyCharm compatability + +from tests.common import TestData, assert_eland_frame_equal + + +class TestDataKQLQuery(TestData): + def test_flights_match_query(self): + ed_flights = self.ed_flights() + + left = ed_flights.kql_query("OriginCityName:Rome")[ + ed_flights["Carrier"] == "Kibana Airlines" + ] + + right = ed_flights[ed_flights["Carrier"] == "Kibana Airlines"].kql_query( + "OriginCityName:Rome" + ) + + assert len(left) > 0 + assert_eland_frame_equal(left, right)