-
-
Notifications
You must be signed in to change notification settings - Fork 62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(mapping-optimizer): Support in operator for mapping optimizer #5691
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import logging | ||
from enum import Enum | ||
from typing import Optional, Tuple | ||
from typing import Optional, Tuple, cast | ||
|
||
from snuba import environment | ||
from snuba.clickhouse.query import Query | ||
|
@@ -13,6 +13,7 @@ | |
) | ||
from snuba.query.conditions import ( | ||
BooleanFunctions, | ||
ConditionFunctions, | ||
combine_and_conditions, | ||
combine_or_conditions, | ||
get_first_level_and_conditions, | ||
|
@@ -90,9 +91,8 @@ def __init__( | |
self.__killswitch = killswitch | ||
self.__value_subcolumn_name = value_subcolumn_name | ||
|
||
# TODO: Add the support for IN conditions. | ||
self.__optimizable_pattern = FunctionCall( | ||
function_name=String("equals"), | ||
self.__equals_condition_pattern = FunctionCall( | ||
function_name=String(ConditionFunctions.EQ), | ||
parameters=( | ||
Or( | ||
[ | ||
|
@@ -106,6 +106,19 @@ def __init__( | |
Param("right_hand_side", Literal(Any(str))), | ||
), | ||
) | ||
self.__in_condition_pattern = FunctionCall( | ||
function_name=String(ConditionFunctions.IN), | ||
parameters=( | ||
mapping_pattern, | ||
Param( | ||
"right_hand_side", | ||
FunctionCall( | ||
Or([String("array"), String("tuple")]), | ||
all_parameters=Literal(), | ||
), | ||
), | ||
), | ||
) | ||
self.__tag_exists_patterns = [ | ||
FunctionCall( | ||
function_name=String("notEquals"), | ||
|
@@ -155,20 +168,43 @@ def __classify_combined_conditions(self, condition: Expression) -> ConditionClas | |
|
||
def __classify_condition(self, condition: Expression) -> ConditionClass: | ||
# Expects this to be an individual condition | ||
match = self.__optimizable_pattern.match(condition) | ||
equals_condition_match = self.__equals_condition_pattern.match(condition) | ||
in_condition_match = ( | ||
self.__in_condition_pattern.match(condition) | ||
if equals_condition_match is None | ||
else None | ||
) | ||
if ( | ||
match is not None | ||
and match.string(KEY_COL_MAPPING_PARAM) == f"{self.__column_name}.key" | ||
equals_condition_match is not None | ||
and equals_condition_match.string(KEY_COL_MAPPING_PARAM) | ||
== f"{self.__column_name}.key" | ||
): | ||
rhs = match.expression("right_hand_side") | ||
rhs = equals_condition_match.expression("right_hand_side") | ||
assert isinstance(rhs, LiteralExpr) | ||
return ( | ||
ConditionClass.NOT_OPTIMIZABLE | ||
# ifNull(tags[asd], '') = '' is not optimizable. | ||
if rhs.value == "" | ||
else ConditionClass.OPTIMIZABLE | ||
) | ||
elif match is None: | ||
if ( | ||
in_condition_match is not None | ||
and in_condition_match.string(KEY_COL_MAPPING_PARAM) | ||
== f"{self.__column_name}.key" | ||
): | ||
rhs = in_condition_match.expression("right_hand_side") | ||
assert isinstance(rhs, FunctionExpr) | ||
|
||
params = rhs.parameters | ||
for param in params: | ||
assert isinstance(param, LiteralExpr) | ||
|
||
# tags[foo] IN array('') is not optimizable | ||
if param.value == "": | ||
return ConditionClass.NOT_OPTIMIZABLE | ||
Comment on lines
+202
to
+204
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the cause of the test failure. Due to the way we query for tags, even if To handle this, we just mark these cases are not optimizable. and run the query as is. |
||
|
||
return ConditionClass.OPTIMIZABLE | ||
elif equals_condition_match is None and in_condition_match is None: | ||
# If this condition is not matching an optimizable condition, | ||
# check that it does not reference the optimizable column. | ||
# If it does, it means we should not optimize this query. | ||
|
@@ -183,35 +219,89 @@ def __classify_condition(self, condition: Expression) -> ConditionClass: | |
return ConditionClass.IRRELEVANT | ||
|
||
def __replace_with_hash(self, condition: Expression) -> Expression: | ||
match = self.__optimizable_pattern.match(condition) | ||
equals_condition_match = self.__equals_condition_pattern.match(condition) | ||
in_condition_match = ( | ||
self.__in_condition_pattern.match(condition) | ||
if equals_condition_match is None | ||
else None | ||
) | ||
if ( | ||
match is None | ||
or match.string(KEY_COL_MAPPING_PARAM) != f"{self.__column_name}.key" | ||
equals_condition_match is not None | ||
and equals_condition_match.string(KEY_COL_MAPPING_PARAM) | ||
== f"{self.__column_name}.key" | ||
): | ||
return condition | ||
rhs = match.expression("right_hand_side") | ||
assert isinstance(rhs, LiteralExpr) | ||
key = match.scalar(KEY_MAPPING_PARAM) | ||
assert isinstance(key, (str, int)) | ||
if isinstance(key, str): | ||
key = key.translate(ESCAPE_TRANSLATION) | ||
|
||
return FunctionExpr( | ||
alias=condition.alias, | ||
function_name="has", | ||
parameters=( | ||
Column( | ||
alias=None, | ||
table_name=match.optional_string(TABLE_MAPPING_PARAM), | ||
column_name=self.__hash_map_name, | ||
rhs = equals_condition_match.expression("right_hand_side") | ||
assert isinstance(rhs, LiteralExpr) | ||
key = equals_condition_match.scalar(KEY_MAPPING_PARAM) | ||
assert isinstance(key, (str, int)) | ||
if isinstance(key, str): | ||
key = key.translate(ESCAPE_TRANSLATION) | ||
|
||
return FunctionExpr( | ||
alias=condition.alias, | ||
function_name="has", | ||
parameters=( | ||
Column( | ||
alias=None, | ||
table_name=equals_condition_match.optional_string( | ||
TABLE_MAPPING_PARAM | ||
), | ||
column_name=self.__hash_map_name, | ||
), | ||
FunctionExpr( | ||
alias=None, | ||
function_name="cityHash64", | ||
parameters=(LiteralExpr(None, f"{key}={rhs.value}"),), | ||
), | ||
), | ||
FunctionExpr( | ||
alias=None, | ||
function_name="cityHash64", | ||
parameters=(LiteralExpr(None, f"{key}={rhs.value}"),), | ||
) | ||
elif ( | ||
in_condition_match is not None | ||
and in_condition_match.string(KEY_COL_MAPPING_PARAM) | ||
== f"{self.__column_name}.key" | ||
): | ||
key = in_condition_match.scalar(KEY_MAPPING_PARAM) | ||
assert isinstance(key, (str, int)) | ||
if isinstance(key, str): | ||
key = key.translate(ESCAPE_TRANSLATION) | ||
|
||
rhs = in_condition_match.expression("right_hand_side") | ||
assert isinstance(rhs, FunctionExpr) | ||
for param in rhs.parameters: | ||
assert isinstance(param, LiteralExpr) | ||
params = cast(list[LiteralExpr], rhs.parameters) | ||
|
||
return FunctionExpr( | ||
alias=condition.alias, | ||
function_name="hasAny", | ||
parameters=( | ||
Column( | ||
alias=None, | ||
table_name=in_condition_match.optional_string( | ||
TABLE_MAPPING_PARAM | ||
), | ||
column_name=self.__hash_map_name, | ||
), | ||
FunctionExpr( | ||
alias=rhs.alias, | ||
function_name="array", | ||
parameters=tuple( | ||
[ | ||
FunctionExpr( | ||
alias=None, | ||
function_name="cityHash64", | ||
parameters=( | ||
LiteralExpr(None, f"{key}={lit.value}"), | ||
), | ||
) | ||
for lit in params | ||
] | ||
), | ||
), | ||
), | ||
), | ||
) | ||
) | ||
|
||
return condition | ||
|
||
def _get_condition_without_redundant_checks( | ||
self, condition: Expression, query: Query | ||
|
@@ -265,7 +355,7 @@ def _get_condition_without_redundant_checks( | |
if tag_exist_match: | ||
matched_tag_exists_conditions[condition_id] = tag_exist_match | ||
if not tag_exist_match: | ||
eq_match = self.__optimizable_pattern.match(cond) | ||
eq_match = self.__equals_condition_pattern.match(cond) | ||
if eq_match: | ||
tag_eq_match_keys.add(eq_match.scalar(KEY_MAPPING_PARAM)) | ||
useful_conditions = [] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,6 +278,108 @@ | |
nested_condition("tags", "my_tag", ConditionFunctions.EQ, "a"), | ||
id="Non optimizable having", | ||
), | ||
pytest.param( | ||
build_query( | ||
selected_columns=[column("event_id")], | ||
condition=binary_condition( | ||
ConditionFunctions.IN, | ||
nested_expression("tags", "my_tag"), | ||
FunctionCall( | ||
None, | ||
"tuple", | ||
( | ||
Literal(None, "a"), | ||
Literal(None, "b"), | ||
Literal(None, "c"), | ||
), | ||
), | ||
), | ||
), | ||
FunctionCall( | ||
None, | ||
"hasAny", | ||
( | ||
column("_tags_hash_map", True), | ||
FunctionCall( | ||
None, | ||
"array", | ||
( | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=a"),)), | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=b"),)), | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=c"),)), | ||
), | ||
), | ||
), | ||
), | ||
id="Optimizable IN condition using tuple", | ||
), | ||
pytest.param( | ||
build_query( | ||
selected_columns=[column("event_id")], | ||
condition=binary_condition( | ||
ConditionFunctions.IN, | ||
nested_expression("tags", "my_tag"), | ||
FunctionCall( | ||
None, | ||
"array", | ||
( | ||
Literal(None, "a"), | ||
Literal(None, "b"), | ||
Literal(None, "c"), | ||
), | ||
), | ||
), | ||
), | ||
FunctionCall( | ||
None, | ||
"hasAny", | ||
( | ||
column("_tags_hash_map", True), | ||
FunctionCall( | ||
None, | ||
"array", | ||
( | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=a"),)), | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=b"),)), | ||
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=c"),)), | ||
), | ||
), | ||
), | ||
), | ||
id="Optimizable IN condition using array", | ||
), | ||
pytest.param( | ||
build_query( | ||
selected_columns=[column("event_id")], | ||
condition=binary_condition( | ||
ConditionFunctions.IN, | ||
nested_expression("tags", "my_tag"), | ||
FunctionCall( | ||
None, | ||
"array", | ||
( | ||
Literal(None, "a"), | ||
Literal(None, "b"), | ||
Literal(None, ""), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new test with an empty value in the list to capture this failure case |
||
), | ||
), | ||
), | ||
), | ||
binary_condition( | ||
ConditionFunctions.IN, | ||
nested_expression("tags", "my_tag"), | ||
FunctionCall( | ||
None, | ||
"array", | ||
( | ||
Literal(None, "a"), | ||
Literal(None, "b"), | ||
Literal(None, ""), | ||
), | ||
), | ||
), | ||
id="Non optimizable IN condition with empty value", | ||
), | ||
] | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A test in here was failing in the last attempt