From 09aa233b71ddf8e809d9fd28f2fc1dc08858c067 Mon Sep 17 00:00:00 2001 From: Lucas Guillermou Date: Tue, 15 Oct 2024 12:41:31 +0200 Subject: [PATCH 1/3] Improve benchmarks of NodeUniqueAttributeConstraintQuery --- backend/infrahub/config.py | 1 + .../core/validators/uniqueness/model.py | 14 ++ backend/tests/conftest.py | 12 +- backend/tests/helpers/constants.py | 1 + .../query_benchmark/car_person_generators.py | 70 ++++++- .../helpers/query_benchmark/data_generator.py | 10 +- .../query_benchmark/db_query_profiler.py | 27 ++- backend/tests/helpers/utils.py | 3 +- backend/tests/query_benchmark/conftest.py | 46 +++-- .../test_node_manager_query.py | 172 ------------------ .../test_node_unique_attribute_constraint.py | 171 ++++++++++++----- backend/tests/query_benchmark/utils.py | 19 +- 12 files changed, 297 insertions(+), 249 deletions(-) delete mode 100644 backend/tests/query_benchmark/test_node_manager_query.py diff --git a/backend/infrahub/config.py b/backend/infrahub/config.py index 64b67802ef..f1fb8ca9c9 100644 --- a/backend/infrahub/config.py +++ b/backend/infrahub/config.py @@ -192,6 +192,7 @@ class DatabaseSettings(BaseSettings): password: str = "admin" address: str = "localhost" port: int = 7687 + neo4j_http_port: int = 7474 database: Optional[str] = Field(default=None, pattern=VALID_DATABASE_NAME_REGEX, description="Name of the database") tls_enabled: bool = Field(default=False, description="Indicates if TLS is enabled for the connection") tls_insecure: bool = Field(default=False, description="Indicates if TLS certificates are verified") diff --git a/backend/infrahub/core/validators/uniqueness/model.py b/backend/infrahub/core/validators/uniqueness/model.py index e2cb7a4100..0be3e0072e 100644 --- a/backend/infrahub/core/validators/uniqueness/model.py +++ b/backend/infrahub/core/validators/uniqueness/model.py @@ -44,6 +44,20 @@ def __bool__(self) -> bool: return True return False + def __str__(self) -> str: + return ( + "ATTRS: " + + "; ".join( + q.attribute_name + " " + str(q.property_name) + " " + (str(q.value) if q.value is not None else "") + for q in self.unique_attribute_paths + ) + + " RELS: " + + "; ".join( + q.identifier + " " + str(q.attribute_name) + " " + (str(q.value) if q.value is not None else "") + for q in self.relationship_attribute_paths + ) + ) + class NonUniqueRelatedAttribute(BaseModel): relationship: RelationshipSchema diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 9ebdaa3681..478b882a39 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -48,6 +48,7 @@ NEO4J_IMAGE, PORT_BOLT_NEO4J, PORT_CLIENT_RABBITMQ, + PORT_HTTP_NEO4J, PORT_HTTP_RABBITMQ, PORT_MEMGRAPH, PORT_NATS, @@ -97,6 +98,7 @@ async def db( config.SETTINGS.database.address = "localhost" if neo4j is not None: config.SETTINGS.database.port = neo4j[PORT_BOLT_NEO4J] + config.SETTINGS.database.neo4j_http_port = neo4j[PORT_HTTP_NEO4J] else: assert memgraph is not None config.SETTINGS.database.port = memgraph[PORT_MEMGRAPH] @@ -120,7 +122,8 @@ async def reset_registry(db: InfrahubDatabase) -> None: @pytest.fixture -async def default_branch(reset_registry, local_storage_dir, empty_database, db: InfrahubDatabase) -> Branch: +async def default_branch(db: InfrahubDatabase) -> Branch: + await create_root_node(db=db) branch = await create_default_branch(db=db) await create_global_branch(db=db) registry.schema = SchemaManager() @@ -183,7 +186,10 @@ def neo4j(request: pytest.FixtureRequest, load_settings_before_session) -> Optio container = start_neo4j_container(NEO4J_IMAGE) request.addfinalizer(container.stop) - return {PORT_BOLT_NEO4J: get_exposed_port(container, PORT_BOLT_NEO4J)} + return { + PORT_BOLT_NEO4J: get_exposed_port(container, PORT_BOLT_NEO4J), + PORT_HTTP_NEO4J: get_exposed_port(container, PORT_HTTP_NEO4J), + } @pytest.fixture(scope="session") @@ -475,8 +481,6 @@ async def car_person_schema_unregistered(db: InfrahubDatabase, node_group_schema "name": "owner", "label": "Commander of Car", "peer": "TestPerson", - "optional": False, - "kind": "Parent", "cardinality": "one", "direction": "outbound", }, diff --git a/backend/tests/helpers/constants.py b/backend/tests/helpers/constants.py index 1b96bec7c1..5f61e866db 100644 --- a/backend/tests/helpers/constants.py +++ b/backend/tests/helpers/constants.py @@ -7,6 +7,7 @@ PORT_REDIS = 6379 PORT_CLIENT_RABBITMQ = 5672 PORT_HTTP_RABBITMQ = 15672 +PORT_HTTP_NEO4J = 7474 PORT_BOLT_NEO4J = 7687 PORT_MEMGRAPH = 7687 PORT_PREFECT = 4200 diff --git a/backend/tests/helpers/query_benchmark/car_person_generators.py b/backend/tests/helpers/query_benchmark/car_person_generators.py index b094498a36..7719b8e42b 100644 --- a/backend/tests/helpers/query_benchmark/car_person_generators.py +++ b/backend/tests/helpers/query_benchmark/car_person_generators.py @@ -1,6 +1,6 @@ import random import uuid -from typing import Optional +from typing import Optional, Tuple from infrahub.core import registry from infrahub.core.node import Node @@ -44,7 +44,11 @@ class PersonGenerator(DataGenerator): async def load_data(self, nb_elements: int) -> None: await self.load_persons(nb_persons=nb_elements) - async def load_persons(self, nb_persons: int, cars: Optional[dict[str, Node]] = None) -> dict[str, Node]: + async def load_persons( + self, + nb_persons: int, + cars: Optional[dict[str, Node]] = None, + ) -> dict[str, Node]: """ Load persons and return a mapping person_name -> person_node. If 'cars' is specified, each person created is linked to a few random cars. @@ -58,6 +62,7 @@ async def load_persons(self, nb_persons: int, cars: Optional[dict[str, Node]] = short_id = str(uuid.uuid4())[:8] person_name = f"person-{short_id}" person_node = await Node.init(db=self.db, schema=person_schema, branch=default_branch) + if cars is not None: random_cars = [cars[car_name] for car_name in random.choices(list(cars.keys()), k=5)] await person_node.new(db=self.db, name=person_name, cars=random_cars) @@ -90,6 +95,67 @@ async def load_data(self, nb_elements: int) -> None: await self.load_persons(nb_persons=nb_elements, cars=self.cars) +class CarFromExistingPersonGenerator(CarGenerator): + persons: Optional[dict[str, Node]] # mapping of existing cars names -> node + nb_persons: int + + def __init__(self, db: InfrahubDatabaseProfiler, nb_persons: int) -> None: + super().__init__(db) + self.nb_persons = nb_persons + self.persons = None + + async def init(self) -> None: + """Load persons, that will be later connected to generated cars.""" + self.persons = await PersonGenerator(self.db).load_persons(nb_persons=self.nb_persons) + + async def load_data(self, nb_elements: int) -> None: + assert self.persons is not None, "'init' method should be called before 'load_data'" + await self.load_cars(nb_cars=nb_elements, persons=self.persons) + + +class CarGeneratorWithOwnerHavingUniqueCar(CarGenerator): + persons: list[Tuple[str, Node]] # mapping of existing cars names -> node + nb_persons: int + nb_cars_loaded: int + + def __init__(self, db: InfrahubDatabaseProfiler, nb_persons: int) -> None: + super().__init__(db) + self.nb_persons = nb_persons + self.persons = [] + self.nb_cars_loaded = 0 + + async def init(self) -> None: + """Load persons, that will be later connected to generated cars.""" + persons = await PersonGenerator(self.db).load_persons(nb_persons=self.nb_persons) + self.persons = list(persons.items()) + + async def load_data(self, nb_elements: int) -> None: + """ + Generate cars with an owner, in a way that an owner can't have multiple cars. + Also generate distinct nb_seats per car. + """ + + default_branch = await registry.get_branch(db=self.db) + car_schema = registry.schema.get_node_schema(name="TestCar", branch=default_branch) + + for i in range(nb_elements): + short_id = str(uuid.uuid4())[:8] + car_name = f"car-{short_id}" + car_node = await Node.init(db=self.db, schema=car_schema, branch=default_branch) + + await car_node.new( + db=self.db, + name=car_name, + nbr_seats=self.nb_cars_loaded + i, + owner=self.persons[self.nb_cars_loaded + i][1], + ) + + async with self.db.start_session(): + await car_node.save(db=self.db) + + self.nb_cars_loaded += nb_elements + + class CarAndPersonIsolatedGenerator(DataGenerator): def __init__(self, db: InfrahubDatabaseProfiler) -> None: super().__init__(db) diff --git a/backend/tests/helpers/query_benchmark/data_generator.py b/backend/tests/helpers/query_benchmark/data_generator.py index df8bb003c6..eb35f42d4a 100644 --- a/backend/tests/helpers/query_benchmark/data_generator.py +++ b/backend/tests/helpers/query_benchmark/data_generator.py @@ -5,7 +5,10 @@ from rich.console import Console from rich.progress import Progress -from tests.helpers.query_benchmark.db_query_profiler import InfrahubDatabaseProfiler, ProfilerEnabler, QueryAnalyzer +from tests.helpers.query_benchmark.db_query_profiler import ( + InfrahubDatabaseProfiler, + ProfilerEnabler, +) class DataGenerator: @@ -33,7 +36,6 @@ async def load_data_and_profile( profile_frequency: int, graphs_output_location: Path, test_label: str, - query_analyzer: QueryAnalyzer, memory_profiling_rate: int = 25, ) -> None: """ @@ -54,11 +56,11 @@ async def load_data_and_profile( await data_generator.init() - query_analyzer.reset() - q, r = divmod(nb_elements, profile_frequency) nb_elem_per_batch = [profile_frequency] * q + ([r] if r else []) + query_analyzer = data_generator.db.query_analyzer + with Progress(console=Console(force_terminal=True)) as progress: # Need force_terminal to display with pytest task = progress.add_task( f"Loading elements from {data_generator.__class__.__name__}", total=len(nb_elem_per_batch) diff --git a/backend/tests/helpers/query_benchmark/db_query_profiler.py b/backend/tests/helpers/query_benchmark/db_query_profiler.py index dea1776459..13e1817118 100644 --- a/backend/tests/helpers/query_benchmark/db_query_profiler.py +++ b/backend/tests/helpers/query_benchmark/db_query_profiler.py @@ -9,14 +9,27 @@ from infrahub_sdk import Timestamp from neo4j import Record +from infrahub.config import SETTINGS + # pylint: skip-file from infrahub.database import InfrahubDatabase from infrahub.database.constants import Neo4jRuntime from infrahub.log import get_logger +from tests.helpers.constants import NEO4J_ENTERPRISE_IMAGE log = get_logger() +@dataclass +class BenchmarkConfig: + neo4j_image: str = NEO4J_ENTERPRISE_IMAGE + neo4j_runtime: Neo4jRuntime = Neo4jRuntime.DEFAULT + load_db_indexes: bool = False + + def __str__(self) -> str: + return f"{self.neo4j_image=} ; runtime: {self.neo4j_runtime} ; indexes: {self.load_db_indexes}" + + @dataclass class QueryMeasurement: duration: float @@ -97,7 +110,7 @@ def create_graphs(self, output_location: Path, label: str) -> None: for query_name in query_names: self.create_duration_graph(query_name=query_name, label=label, output_dir=output_location) - self.create_memory_graph(query_name=query_name, label=label, output_dir=output_location) + # self.create_memory_graph(query_name=query_name, label=label, output_dir=output_location) def create_duration_graph(self, query_name: str, label: str, output_dir: Path) -> None: metric = "duration" @@ -113,14 +126,14 @@ def create_duration_graph(self, query_name: str, label: str, output_dir: Path) - y = df_query[metric].values * 1000 plt.plot(x, y, label=label) - plt.legend() + plt.legend(bbox_to_anchor=(1.04, 1), borderaxespad=0) plt.ylabel("msec", fontsize=15) plt.title(f"Query - {query_name} | {metric}", fontsize=20) plt.grid() file_name = f"{name}.png" - plt.savefig(str(output_dir / file_name)) + plt.savefig(str(output_dir / file_name), bbox_inches="tight") def create_memory_graph(self, query_name: str, label: str, output_dir: Path) -> None: metric = "memory" @@ -168,9 +181,9 @@ def __exit__( class InfrahubDatabaseProfiler(InfrahubDatabase): - def __init__(self, query_analyzer: QueryAnalyzer, **kwargs: Any) -> None: + def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) - self.query_analyzer = query_analyzer + self.query_analyzer = QueryAnalyzer() # Note that any attribute added here should be added to get_context method. def get_context(self) -> dict[str, Any]: @@ -193,11 +206,15 @@ async def execute_query_with_metadata( else: profile_memory = False + assert profile_memory is False, "Do not profile memory for now" + # Do the query and measure duration time_start = time.time() response, metadata = await super().execute_query_with_metadata(query, params, name) duration_time = time.time() - time_start + assert len(response) < SETTINGS.database.query_size_limit // 2, "make sure data return is small" + measurement = QueryMeasurement( duration=duration_time, memory=metadata["profile"]["args"]["GlobalMemory"] if profile_memory else None, diff --git a/backend/tests/helpers/utils.py b/backend/tests/helpers/utils.py index 05f6bca9e8..c5ba03e6af 100644 --- a/backend/tests/helpers/utils.py +++ b/backend/tests/helpers/utils.py @@ -1,7 +1,7 @@ from testcontainers.core.container import DockerContainer from testcontainers.core.waiting_utils import wait_for_logs -from tests.helpers.constants import PORT_BOLT_NEO4J +from tests.helpers.constants import PORT_BOLT_NEO4J, PORT_HTTP_NEO4J def get_exposed_port(container: DockerContainer, port: int) -> int: @@ -23,6 +23,7 @@ def start_neo4j_container(neo4j_image: str) -> DockerContainer: .with_env("NEO4J_dbms_security_procedures_unrestricted", "apoc.*") .with_env("NEO4J_dbms_security_auth__minimum__password__length", "4") .with_exposed_ports(PORT_BOLT_NEO4J) + .with_exposed_ports(PORT_HTTP_NEO4J) ) container.start() diff --git a/backend/tests/query_benchmark/conftest.py b/backend/tests/query_benchmark/conftest.py index a403d897a1..fd5894f8f9 100644 --- a/backend/tests/query_benchmark/conftest.py +++ b/backend/tests/query_benchmark/conftest.py @@ -1,29 +1,45 @@ +from pathlib import Path +from typing import Any + import pytest +from infrahub.core.constants import BranchSupportType from infrahub.core.schema import SchemaRoot -from tests.helpers.query_benchmark.db_query_profiler import QueryAnalyzer - -@pytest.fixture(scope="session") -def query_analyzer() -> QueryAnalyzer: - return QueryAnalyzer() +RESULTS_FOLDER = Path(__file__).resolve().parent / "query_performance_results" @pytest.fixture -def car_person_schema_root() -> SchemaRoot: - schema = { +async def car_person_schema_root() -> SchemaRoot: + schema: dict[str, Any] = { "nodes": [ { "name": "Car", "namespace": "Test", "default_filter": "name__value", - "display_labels": ["name__value", "nbr_seats__value"], + "display_labels": ["name__value", "color__value"], + "uniqueness_constraints": [["name__value"]], + "branch": BranchSupportType.AWARE.value, "attributes": [ {"name": "name", "kind": "Text", "unique": True}, - {"name": "nbr_seats", "kind": "Number"}, + {"name": "nbr_seats", "kind": "Number", "optional": True}, + {"name": "color", "kind": "Text", "default_value": "#444444", "max_length": 7, "optional": True}, + {"name": "is_electric", "kind": "Boolean", "optional": True}, + { + "name": "transmission", + "kind": "Text", + "optional": True, + "enum": ["manual", "automatic", "flintstone-feet"], + }, ], "relationships": [ - {"name": "owner", "peer": "TestPerson", "optional": True, "cardinality": "one"}, + { + "name": "owner", + "label": "Commander of Car", + "peer": "TestPerson", + "cardinality": "one", + "direction": "outbound", + }, ], }, { @@ -31,14 +47,18 @@ def car_person_schema_root() -> SchemaRoot: "namespace": "Test", "default_filter": "name__value", "display_labels": ["name__value"], - "order_by": ["height__value"], + "branch": BranchSupportType.AWARE.value, + "uniqueness_constraints": [["name__value"]], "attributes": [ {"name": "name", "kind": "Text", "unique": True}, {"name": "height", "kind": "Number", "optional": True}, ], - "relationships": [{"name": "cars", "peer": "TestCar", "cardinality": "many"}], + "relationships": [ + {"name": "cars", "peer": "TestCar", "cardinality": "many", "direction": "inbound"}, + {"name": "animal", "peer": "TestAnimal", "cardinality": "one", "direction": "inbound"}, + ], }, ], } - return SchemaRoot(**schema) # type: ignore[arg-type] + return SchemaRoot(**schema) diff --git a/backend/tests/query_benchmark/test_node_manager_query.py b/backend/tests/query_benchmark/test_node_manager_query.py deleted file mode 100644 index bd0d80ea92..0000000000 --- a/backend/tests/query_benchmark/test_node_manager_query.py +++ /dev/null @@ -1,172 +0,0 @@ -import inspect -from functools import partial -from pathlib import Path - -import pytest - -from infrahub.core import registry -from infrahub.core.manager import NodeManager -from infrahub.core.query.node import NodeGetListQuery, NodeListGetAttributeQuery, NodeListGetInfoQuery -from infrahub.database import QueryConfig -from infrahub.database.constants import Neo4jRuntime -from infrahub.log import get_logger -from tests.helpers.constants import NEO4J_COMMUNITY_IMAGE, NEO4J_ENTERPRISE_IMAGE -from tests.helpers.query_benchmark.car_person_generators import ( - CarGenerator, - PersonFromExistingCarGenerator, - PersonGenerator, -) -from tests.helpers.query_benchmark.data_generator import load_data_and_profile -from tests.query_benchmark.utils import start_db_and_create_default_branch - -RESULTS_FOLDER = Path(__file__).resolve().parent / "query_performance_results" - -log = get_logger() - - -@pytest.mark.parametrize( - "neo4j_image, neo4j_runtime", - [ - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.PARALLEL, - ), - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ( - NEO4J_COMMUNITY_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ], -) -async def test_query_persons(query_analyzer, neo4j_image: str, neo4j_runtime: Neo4jRuntime, car_person_schema_root): - queries_names_to_config = { - NodeGetListQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetAttributeQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetInfoQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - } - db_profiling_queries, default_branch = await start_db_and_create_default_branch( - neo4j_image=neo4j_image, queries_names_to_config=queries_names_to_config, query_analyzer=query_analyzer - ) - - registry.schema.register_schema(schema=car_person_schema_root, branch=default_branch.name) - person_schema = registry.schema.get_node_schema(name="TestPerson", branch=default_branch) - func_call = partial(NodeManager.query, db=db_profiling_queries, limit=50, schema=person_schema) - - person_generator = PersonGenerator(db=db_profiling_queries) - - graph_output_location = RESULTS_FOLDER / inspect.currentframe().f_code.co_name - - await load_data_and_profile( - data_generator=person_generator, - func_call=func_call, - profile_frequency=50, - nb_elements=1000, - graphs_output_location=graph_output_location, - query_analyzer=query_analyzer, - test_label=f" data: {neo4j_image}" + f" runtime: {neo4j_runtime}", - ) - - -@pytest.mark.parametrize( - "neo4j_image, neo4j_runtime", - [ - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.PARALLEL, - ), - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ( - NEO4J_COMMUNITY_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ], -) -async def test_query_persons_with_isolated_cars( - query_analyzer, neo4j_image: str, neo4j_runtime: Neo4jRuntime, car_person_schema_root -): - queries_names_to_config = { - NodeGetListQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetAttributeQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetInfoQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - } - db_profiling_queries, default_branch = await start_db_and_create_default_branch( - neo4j_image=neo4j_image, queries_names_to_config=queries_names_to_config, query_analyzer=query_analyzer - ) - - registry.schema.register_schema(schema=car_person_schema_root, branch=default_branch.name) - person_schema = registry.schema.get_node_schema(name="TestPerson", branch=default_branch) - func_call = partial(NodeManager.query, db=db_profiling_queries, limit=50, schema=person_schema) - - graph_output_location = RESULTS_FOLDER / inspect.currentframe().f_code.co_name - - # Load cars in database, that are not connected to persons being queried. - cars_generator = CarGenerator(db=db_profiling_queries) - await cars_generator.load_cars(nb_cars=1000) - - person_generator = PersonGenerator(db=db_profiling_queries) - - # Load persons, and run/profile NodeManager.query at a given frequency - await load_data_and_profile( - data_generator=person_generator, - func_call=func_call, - profile_frequency=50, - nb_elements=1000, - graphs_output_location=graph_output_location, - query_analyzer=query_analyzer, - test_label=f" data: {neo4j_image}" + f" runtime: {neo4j_runtime}", - ) - - -@pytest.mark.parametrize( - "neo4j_image, neo4j_runtime", - [ - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.PARALLEL, - ), - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ( - NEO4J_COMMUNITY_IMAGE, - Neo4jRuntime.DEFAULT, - ), - ], -) -async def test_query_persons_with_connected_cars( - query_analyzer, neo4j_image: str, neo4j_runtime: Neo4jRuntime, car_person_schema_root -): - queries_names_to_config = { - NodeGetListQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetAttributeQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - NodeListGetInfoQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime), - } - db_profiling_queries, default_branch = await start_db_and_create_default_branch( - neo4j_image=neo4j_image, queries_names_to_config=queries_names_to_config, query_analyzer=query_analyzer - ) - - registry.schema.register_schema(schema=car_person_schema_root, branch=default_branch.name) - person_schema = registry.schema.get_node_schema(name="TestPerson", branch=default_branch) - func_call = partial(NodeManager.query, db=db_profiling_queries, limit=50, schema=person_schema) - - graph_output_location = RESULTS_FOLDER / inspect.currentframe().f_code.co_name - - person_generator = PersonFromExistingCarGenerator(db=db_profiling_queries, nb_cars=1000) - - # Load persons, and run/profile NodeManager.query at a given frequency - await load_data_and_profile( - data_generator=person_generator, - func_call=func_call, - profile_frequency=50, - nb_elements=1000, - graphs_output_location=graph_output_location, - query_analyzer=query_analyzer, - test_label=f" data: {neo4j_image}" + f" runtime: {neo4j_runtime}", - ) diff --git a/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py b/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py index db88653a86..2c04879eda 100644 --- a/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py +++ b/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py @@ -1,74 +1,163 @@ import inspect -from functools import partial from pathlib import Path import pytest from infrahub.core import registry -from infrahub.core.validators.uniqueness.model import NodeUniquenessQueryRequest, QueryAttributePath +from infrahub.core.validators.uniqueness.model import ( + NodeUniquenessQueryRequest, + QueryAttributePath, + QueryRelationshipAttributePath, +) from infrahub.core.validators.uniqueness.query import NodeUniqueAttributeConstraintQuery from infrahub.database import QueryConfig from infrahub.database.constants import Neo4jRuntime from infrahub.log import get_logger from tests.helpers.constants import NEO4J_COMMUNITY_IMAGE, NEO4J_ENTERPRISE_IMAGE from tests.helpers.query_benchmark.car_person_generators import ( - CarGenerator, + CarGeneratorWithOwnerHavingUniqueCar, ) -from tests.helpers.query_benchmark.data_generator import load_data_and_profile +from tests.helpers.query_benchmark.data_generator import BenchmarkConfig, load_data_and_profile +from tests.query_benchmark.conftest import RESULTS_FOLDER +from tests.query_benchmark.utils import start_db_and_create_default_branch -from .utils import start_db_and_create_default_branch +log = get_logger() -RESULTS_FOLDER = Path(__file__).resolve().parent / "query_performance_results" +# pytestmark = pytest.mark.skip("Not relevant to test this currently.") -log = get_logger() + +async def benchmark_uniqueness_query( + query_request, car_person_schema_root, benchmark_config: BenchmarkConfig, test_params_label: str, test_name: str +): + """ + Profile NodeUniqueAttributeConstraintQuery with a given query_request / configuration, using a Car generator. + """ + + # Initialization + queries_names_to_config = { + NodeUniqueAttributeConstraintQuery.name: QueryConfig(neo4j_runtime=benchmark_config.neo4j_runtime) + } + db_profiling_queries, default_branch = await start_db_and_create_default_branch( + neo4j_image=benchmark_config.neo4j_image, + load_indexes=benchmark_config.load_db_indexes, + queries_names_to_config=queries_names_to_config, + ) + registry.schema.register_schema(schema=car_person_schema_root, branch=default_branch.name) + + # Build function to profile + async def init_and_execute(): + # Need this function to avoid loading data between `init` and `execute` methods. + query = await NodeUniqueAttributeConstraintQuery.init( + db=db_profiling_queries, + branch=default_branch, + query_request=query_request, + ) + await query.execute(db=db_profiling_queries) + assert len(query.results) == 0 # supposed to have no violation with CarGeneratorWithOwnerHavingUniqueCar + + nb_cars = 10_000 + cars_generator = CarGeneratorWithOwnerHavingUniqueCar(db=db_profiling_queries, nb_persons=nb_cars) + module_name = Path(__file__).stem + graph_output_location = RESULTS_FOLDER / module_name / test_name + + await load_data_and_profile( + data_generator=cars_generator, + func_call=init_and_execute, + profile_frequency=100, + nb_elements=nb_cars, + graphs_output_location=graph_output_location, + test_label=test_params_label, + ) @pytest.mark.parametrize( - "neo4j_image, neo4j_runtime", + "query_request", [ - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.PARALLEL, + NodeUniquenessQueryRequest( + kind="TestCar", unique_attribute_paths={QueryAttributePath(attribute_name="name", property_name="value")} ), - ( - NEO4J_ENTERPRISE_IMAGE, - Neo4jRuntime.DEFAULT, + NodeUniquenessQueryRequest( + kind="TestCar", + unique_attribute_paths={ + QueryAttributePath(attribute_name="name", property_name="value"), + QueryAttributePath(attribute_name="nbr_seats", property_name="value"), + }, ), - ( - NEO4J_COMMUNITY_IMAGE, - Neo4jRuntime.DEFAULT, + NodeUniquenessQueryRequest( + kind="TestCar", + unique_attribute_paths={ + QueryAttributePath(attribute_name="name", property_name="value"), + QueryAttributePath(attribute_name="nbr_seats", property_name="value"), + }, + relationship_attribute_paths={ + QueryRelationshipAttributePath(identifier="testcar__testperson", attribute_name="name") + }, ), ], ) -async def test_query_unique_cars_single_attribute( - query_analyzer, neo4j_image: str, neo4j_runtime: Neo4jRuntime, car_person_schema_root -): - queries_names_to_config = {NodeUniqueAttributeConstraintQuery.name: QueryConfig(neo4j_runtime=neo4j_runtime)} - db_profiling_queries, default_branch = await start_db_and_create_default_branch( - neo4j_image=neo4j_image, queries_names_to_config=queries_names_to_config, query_analyzer=query_analyzer +async def test_multiple_constraints(query_request, car_person_schema_root): + benchmark_config = BenchmarkConfig(neo4j_runtime=Neo4jRuntime.DEFAULT, neo4j_image=NEO4J_ENTERPRISE_IMAGE) + await benchmark_uniqueness_query( + query_request=query_request, + car_person_schema_root=car_person_schema_root, + benchmark_config=benchmark_config, + test_params_label=str(query_request), + test_name=inspect.currentframe().f_code.co_name, ) - # Register schema - registry.schema.register_schema(schema=car_person_schema_root, branch=default_branch.name) - query_unique_cars_name = await NodeUniqueAttributeConstraintQuery.init( - db=db_profiling_queries, - branch=default_branch, - query_request=NodeUniquenessQueryRequest( - kind="TestCar", unique_attribute_paths={QueryAttributePath(attribute_name="name", property_name="value")} - ), +@pytest.mark.parametrize( + "benchmark_config", + [ + BenchmarkConfig(neo4j_runtime=Neo4jRuntime.DEFAULT, neo4j_image=NEO4J_COMMUNITY_IMAGE), + BenchmarkConfig(neo4j_runtime=Neo4jRuntime.DEFAULT, neo4j_image=NEO4J_ENTERPRISE_IMAGE), + BenchmarkConfig(neo4j_runtime=Neo4jRuntime.PARALLEL, neo4j_image=NEO4J_ENTERPRISE_IMAGE), + ], +) +async def test_multiple_runtimes(benchmark_config, car_person_schema_root): + query_request = NodeUniquenessQueryRequest( + kind="TestCar", + unique_attribute_paths={ + QueryAttributePath(attribute_name="name", property_name="value"), + QueryAttributePath(attribute_name="nbr_seats", property_name="value"), + }, + relationship_attribute_paths={ + QueryRelationshipAttributePath(identifier="testcar__testperson", attribute_name="name") + }, + ) + + await benchmark_uniqueness_query( + query_request=query_request, + car_person_schema_root=car_person_schema_root, + benchmark_config=benchmark_config, + test_params_label=str(benchmark_config), + test_name=inspect.currentframe().f_code.co_name, ) - cars_generator = CarGenerator(db=db_profiling_queries) - graph_output_location = RESULTS_FOLDER / inspect.currentframe().f_code.co_name +@pytest.mark.parametrize( + "benchmark_config", + [ + BenchmarkConfig(neo4j_runtime=Neo4jRuntime.PARALLEL, neo4j_image=NEO4J_ENTERPRISE_IMAGE, load_db_indexes=False), + BenchmarkConfig(neo4j_runtime=Neo4jRuntime.PARALLEL, neo4j_image=NEO4J_ENTERPRISE_IMAGE, load_db_indexes=True), + ], +) +async def test_indexes(benchmark_config, car_person_schema_root): + query_request = NodeUniquenessQueryRequest( + kind="TestCar", + unique_attribute_paths={ + QueryAttributePath(attribute_name="name", property_name="value"), + QueryAttributePath(attribute_name="nbr_seats", property_name="value"), + }, + relationship_attribute_paths={ + QueryRelationshipAttributePath(identifier="testcar__testperson", attribute_name="name") + }, + ) - await load_data_and_profile( - data_generator=cars_generator, - func_call=partial(query_unique_cars_name.execute, db=db_profiling_queries), - profile_frequency=50, - nb_elements=1000, - graphs_output_location=graph_output_location, - query_analyzer=query_analyzer, - test_label=f" data: {neo4j_image}" + f" runtime: {neo4j_runtime}", + await benchmark_uniqueness_query( + query_request=query_request, + car_person_schema_root=car_person_schema_root, + benchmark_config=benchmark_config, + test_params_label=str(benchmark_config), + test_name=inspect.currentframe().f_code.co_name, ) diff --git a/backend/tests/query_benchmark/utils.py b/backend/tests/query_benchmark/utils.py index a94036f178..86058e2029 100644 --- a/backend/tests/query_benchmark/utils.py +++ b/backend/tests/query_benchmark/utils.py @@ -3,24 +3,24 @@ from infrahub import config from infrahub.core import registry from infrahub.core.branch import Branch +from infrahub.core.graph.index import node_indexes, rel_indexes from infrahub.core.initialization import create_default_branch, create_global_branch, create_root_node from infrahub.core.schema.manager import SchemaManager from infrahub.database import InfrahubDatabaseMode, QueryConfig, get_db -from tests.helpers.constants import PORT_BOLT_NEO4J -from tests.helpers.query_benchmark.db_query_profiler import InfrahubDatabaseProfiler, QueryAnalyzer +from tests.helpers.constants import PORT_BOLT_NEO4J, PORT_HTTP_NEO4J +from tests.helpers.query_benchmark.db_query_profiler import InfrahubDatabaseProfiler from tests.helpers.utils import start_neo4j_container async def start_db_and_create_default_branch( - neo4j_image: str, query_analyzer: QueryAnalyzer, queries_names_to_config: Optional[dict[str, QueryConfig]] = None + neo4j_image: str, load_indexes: bool, queries_names_to_config: Optional[dict[str, QueryConfig]] = None ) -> Tuple[InfrahubDatabaseProfiler, Branch]: + # Start database and create db profiler neo4j_container = start_neo4j_container(neo4j_image) config.SETTINGS.database.port = int(neo4j_container.get_exposed_port(PORT_BOLT_NEO4J)) + config.SETTINGS.database.neo4j_http_port = int(neo4j_container.get_exposed_port(PORT_HTTP_NEO4J)) db = InfrahubDatabaseProfiler( - mode=InfrahubDatabaseMode.DRIVER, - query_analyzer=query_analyzer, - driver=await get_db(), - queries_names_to_config=queries_names_to_config, + mode=InfrahubDatabaseMode.DRIVER, driver=await get_db(), queries_names_to_config=queries_names_to_config ) # Create default branch @@ -29,4 +29,9 @@ async def start_db_and_create_default_branch( await create_global_branch(db=db) registry.schema = SchemaManager() + # Initialize indexes if needed + if load_indexes: + db.manager.index.init(nodes=node_indexes, rels=rel_indexes) + await db.manager.index.add() + return db, default_branch From aa58d5effcc16bed0101e202ac0b8b41d4a221eb Mon Sep 17 00:00:00 2001 From: Lucas Guillermou Date: Thu, 17 Oct 2024 10:42:00 +0200 Subject: [PATCH 2/3] cleaning --- backend/infrahub/config.py | 1 - backend/tests/conftest.py | 6 ++-- .../query_benchmark/db_query_profiler.py | 30 ------------------- backend/tests/query_benchmark/conftest.py | 5 ++-- .../test_node_unique_attribute_constraint.py | 3 +- backend/tests/query_benchmark/utils.py | 3 +- 6 files changed, 8 insertions(+), 40 deletions(-) diff --git a/backend/infrahub/config.py b/backend/infrahub/config.py index f1fb8ca9c9..64b67802ef 100644 --- a/backend/infrahub/config.py +++ b/backend/infrahub/config.py @@ -192,7 +192,6 @@ class DatabaseSettings(BaseSettings): password: str = "admin" address: str = "localhost" port: int = 7687 - neo4j_http_port: int = 7474 database: Optional[str] = Field(default=None, pattern=VALID_DATABASE_NAME_REGEX, description="Name of the database") tls_enabled: bool = Field(default=False, description="Indicates if TLS is enabled for the connection") tls_insecure: bool = Field(default=False, description="Indicates if TLS certificates are verified") diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 478b882a39..354032868c 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -98,7 +98,6 @@ async def db( config.SETTINGS.database.address = "localhost" if neo4j is not None: config.SETTINGS.database.port = neo4j[PORT_BOLT_NEO4J] - config.SETTINGS.database.neo4j_http_port = neo4j[PORT_HTTP_NEO4J] else: assert memgraph is not None config.SETTINGS.database.port = memgraph[PORT_MEMGRAPH] @@ -122,8 +121,7 @@ async def reset_registry(db: InfrahubDatabase) -> None: @pytest.fixture -async def default_branch(db: InfrahubDatabase) -> Branch: - await create_root_node(db=db) +async def default_branch(reset_registry, local_storage_dir, empty_database, db: InfrahubDatabase) -> Branch: branch = await create_default_branch(db=db) await create_global_branch(db=db) registry.schema = SchemaManager() @@ -481,6 +479,8 @@ async def car_person_schema_unregistered(db: InfrahubDatabase, node_group_schema "name": "owner", "label": "Commander of Car", "peer": "TestPerson", + "optional": False, + "kind": "Parent", "cardinality": "one", "direction": "outbound", }, diff --git a/backend/tests/helpers/query_benchmark/db_query_profiler.py b/backend/tests/helpers/query_benchmark/db_query_profiler.py index 13e1817118..c16ac00a71 100644 --- a/backend/tests/helpers/query_benchmark/db_query_profiler.py +++ b/backend/tests/helpers/query_benchmark/db_query_profiler.py @@ -6,7 +6,6 @@ import matplotlib.pyplot as plt import pandas as pd -from infrahub_sdk import Timestamp from neo4j import Record from infrahub.config import SETTINGS @@ -40,18 +39,10 @@ class QueryMeasurement: class QueryAnalyzer: - _start_time: Optional[Timestamp] name: Optional[str] - count: int measurements: list[QueryMeasurement] - count_per_query: dict[str, int] - _df: Optional[pd.DataFrame] - measure_memory_usage: bool - sampling_memory_usage: int output_location: Path - neo4j_runtime: Neo4jRuntime nb_elements_loaded: int - query_to_nb_elts_loaded_to_measurements: dict[str, dict[int, QueryMeasurement]] profile_memory: bool profile_duration: bool @@ -59,14 +50,9 @@ def __init__(self) -> None: self.reset() def reset(self) -> None: - self._start_time = Timestamp() self.name = None - self.count = 0 self.measurements = [] - self.query_to_nb_elts_loaded_to_measurements = {} - self._df = None self.output_location = Path.cwd() - self.neo4j_runtime = Neo4jRuntime.DEFAULT self.nb_elements_loaded = 0 self.profile_duration = False self.profile_memory = False @@ -74,22 +60,6 @@ def reset(self) -> None: def increase_nb_elements_loaded(self, increment: int) -> None: self.nb_elements_loaded += increment - @property - def start_time(self) -> Timestamp: - if self._start_time: - return self._start_time - raise ValueError("start_time hasnt't been initialized yet") - - def create_directory(self, prefix: str, output_location: Path) -> Path: - time_str = self.start_time.to_string() - for char in [":", "-", "."]: - time_str = time_str.replace(char, "_") - directory_name = f"{time_str}_{prefix}" - full_directory = output_location / directory_name - if not full_directory.exists(): - full_directory.mkdir(parents=True) - return full_directory - def get_df(self) -> pd.DataFrame: data = {} for item in QueryMeasurement.__dataclass_fields__.keys(): diff --git a/backend/tests/query_benchmark/conftest.py b/backend/tests/query_benchmark/conftest.py index fd5894f8f9..f601360561 100644 --- a/backend/tests/query_benchmark/conftest.py +++ b/backend/tests/query_benchmark/conftest.py @@ -38,7 +38,6 @@ async def car_person_schema_root() -> SchemaRoot: "label": "Commander of Car", "peer": "TestPerson", "cardinality": "one", - "direction": "outbound", }, ], }, @@ -54,8 +53,8 @@ async def car_person_schema_root() -> SchemaRoot: {"name": "height", "kind": "Number", "optional": True}, ], "relationships": [ - {"name": "cars", "peer": "TestCar", "cardinality": "many", "direction": "inbound"}, - {"name": "animal", "peer": "TestAnimal", "cardinality": "one", "direction": "inbound"}, + {"name": "cars", "peer": "TestCar", "cardinality": "many"}, + {"name": "animal", "peer": "TestAnimal", "cardinality": "one"}, ], }, ], diff --git a/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py b/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py index 2c04879eda..c5391aebfa 100644 --- a/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py +++ b/backend/tests/query_benchmark/test_node_unique_attribute_constraint.py @@ -17,7 +17,8 @@ from tests.helpers.query_benchmark.car_person_generators import ( CarGeneratorWithOwnerHavingUniqueCar, ) -from tests.helpers.query_benchmark.data_generator import BenchmarkConfig, load_data_and_profile +from tests.helpers.query_benchmark.data_generator import load_data_and_profile +from tests.helpers.query_benchmark.db_query_profiler import BenchmarkConfig from tests.query_benchmark.conftest import RESULTS_FOLDER from tests.query_benchmark.utils import start_db_and_create_default_branch diff --git a/backend/tests/query_benchmark/utils.py b/backend/tests/query_benchmark/utils.py index 86058e2029..c13d5951f4 100644 --- a/backend/tests/query_benchmark/utils.py +++ b/backend/tests/query_benchmark/utils.py @@ -7,7 +7,7 @@ from infrahub.core.initialization import create_default_branch, create_global_branch, create_root_node from infrahub.core.schema.manager import SchemaManager from infrahub.database import InfrahubDatabaseMode, QueryConfig, get_db -from tests.helpers.constants import PORT_BOLT_NEO4J, PORT_HTTP_NEO4J +from tests.helpers.constants import PORT_BOLT_NEO4J from tests.helpers.query_benchmark.db_query_profiler import InfrahubDatabaseProfiler from tests.helpers.utils import start_neo4j_container @@ -18,7 +18,6 @@ async def start_db_and_create_default_branch( # Start database and create db profiler neo4j_container = start_neo4j_container(neo4j_image) config.SETTINGS.database.port = int(neo4j_container.get_exposed_port(PORT_BOLT_NEO4J)) - config.SETTINGS.database.neo4j_http_port = int(neo4j_container.get_exposed_port(PORT_HTTP_NEO4J)) db = InfrahubDatabaseProfiler( mode=InfrahubDatabaseMode.DRIVER, driver=await get_db(), queries_names_to_config=queries_names_to_config ) From b5a28476da815b5ac8c07d31f80d1c42da93804f Mon Sep 17 00:00:00 2001 From: Brett Lykins Date: Thu, 17 Oct 2024 13:47:54 -0400 Subject: [PATCH 3/3] Add reo.dev analytics to documentation (#4638) * adding reo.dev analytics * towncrier for reo.dev addition --- changelog/+reo_dot_dev.added.md | 1 + docs/docusaurus.config.ts | 2 ++ docs/static/js/custom-reo.js | 1 + 3 files changed, 4 insertions(+) create mode 100644 changelog/+reo_dot_dev.added.md create mode 100644 docs/static/js/custom-reo.js diff --git a/changelog/+reo_dot_dev.added.md b/changelog/+reo_dot_dev.added.md new file mode 100644 index 0000000000..5f679fca05 --- /dev/null +++ b/changelog/+reo_dot_dev.added.md @@ -0,0 +1 @@ +Added .js for Reo.dev analytics integration. diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 833fb8f191..4d5a2d1ef6 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -11,6 +11,8 @@ const config: Config = { src: 'https://plausible.io/js/script.js', defer: true, 'data-domain': 'docs.infrahub.app' + }, { + src: '/js/custom-reo.js' } ] : [], diff --git a/docs/static/js/custom-reo.js b/docs/static/js/custom-reo.js new file mode 100644 index 0000000000..5d44928c2f --- /dev/null +++ b/docs/static/js/custom-reo.js @@ -0,0 +1 @@ +!function () { var e, t, n; e = "cecb9ced66c4e45", t = function () { Reo.init({ clientID: "cecb9ced66c4e45" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();