From 01fd4a79580afa828a09e7ee14c3b01c4ec2a1e7 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Wed, 21 Aug 2024 10:05:47 -0400 Subject: [PATCH 1/2] chore: Remove docker support --- CHANGELOG.md | 3 + dotmotif/executors/Neo4jExecutor.py | 170 ++++++---------------------- poetry.lock | 99 ++-------------- pyproject.toml | 7 +- requirements.txt | 2 - setup.py | 5 +- 6 files changed, 51 insertions(+), 235 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d04c08..27f07e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +- **0.15.0** (August 21 2024) + - Backwards-incompatible changes: + - Removed the Docker container provisioning functionality from the `Neo4jExecutor` class. To continue using this functionality, please use the `tamarind` package directly, or install `dotmotif==0.14.0` - **0.14.0** (May 16 2023) - Housekeeping: - Upgraded the package manager to Poetry, with version-pinning now handled by the `pyproject.toml` file. diff --git a/dotmotif/executors/Neo4jExecutor.py b/dotmotif/executors/Neo4jExecutor.py index 5c6eb0c..4f03f55 100644 --- a/dotmotif/executors/Neo4jExecutor.py +++ b/dotmotif/executors/Neo4jExecutor.py @@ -1,5 +1,5 @@ """ -Copyright 2022 The Johns Hopkins University Applied Physics Laboratory. +Copyright 2024 The Johns Hopkins University Applied Physics Laboratory. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,22 +15,22 @@ """ from itertools import product -import os -import time -from uuid import uuid4 -from py2neo import Graph -import tamarind +try: + from py2neo import Graph +except ImportError: + raise ImportError( + "The Neo4jExecutor requires the `py2neo` package. " + "You can use `dotmotif[neo4j] or install it with `pip install py2neo`." + ) # Types only: -import networkx as nx from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from .. import dotmotif from .Executor import Executor -from ..ingest import NetworkXIngester def _remapped_operator(op): @@ -110,22 +110,28 @@ class Neo4jExecutor(Executor): """ A Neo4j executor that runs Cypher queries against a running Neo4j database. - . + If you have edit/admin privileges on the database, you can create + indices on the nodes by calling `create_index` with the attribute name. + Note that it is not a good idea to let your query executor log in with an + account that has write access to the database in a production environment. + """ def __init__(self, **kwargs) -> None: """ Create a new executor. - If there is an already-running Neo4j database, you can pass in - authentication information and it will connect. - - If there is no existing database, you can pass in a graph to ingest - and the executor will connect to it automatically. + Pass in authentication for a Neo4j database (username/pass) along with + a db_bolt_uri to connect to an existing database. Alternatively, pass + a py2neo.Graph object that has already been authenticated. - If there is no existing database and you do not pass in a graph, you - must pass an `import_directory`, which the container will mount as an - importable CSV resource. + Optionally, you can pass in `entity_labels` that specify the names of + the node and edge labels to use in the database. You must pass in a + dictionary with the keys "node" and "edge", mapping entity types to + labels. For an example, see `_DEFAULT_ENTITY_LABELS` in + `Neo4jExecutor.py`, and for an example of how to make this compatible + with, say, NeuPrint, see the modified `_DEFAULT_ENTITY_LABELS` used in + the `NeuPrintExecutor.py` file. Arguments: db_bolt_uri (str): If connecting to an existing server, the URI @@ -133,145 +139,35 @@ def __init__(self, **kwargs) -> None: username (str: "neo4j"): The username to use to attach to an existing server. password (str): The password to use to attach to an existing server. - graph (nx.Graph): If provisioning a new database, the networkx - graph to import into the database. - import_directory (str): If provisioning a new database, the local - directory to crawl for CSVs to import into the Neo4j database. - Commonly used when you want to quickly and easily start a new - Executor that uses the export from a previous graph. - autoremove_container (bool: True): Whether to delete the container - when the executor is deconstructed. Set to False if you'd like - to be able to connect with other executors after the first one - has closed. - max_memory (str: "4G"): The maximum amount of memory to provision. - initial_memory (str: "2G"): The starting heap-size for the Neo4j - container's JVM. - max_retries (int: 20): The number of times DotMotif should try to - connect to the neo4j container before giving up. - wait_for_boot (bool: True): Whether the process should pause to - wait for a provisioned Docker container to come online. entity_labels (dict: _DEFAULT_ENTITY_LABELS): The set of labels to - use for nodes and edges. + expect for nodes and edges. """ - db_bolt_uri: Optional[str] = kwargs.get("db_bolt_uri", None) + db_bolt_uri: str = kwargs.get("db_bolt_uri", None) username: str = kwargs.get("username", "neo4j") password: Optional[str] = kwargs.get("password", None) - self._autoremove_container: bool = kwargs.get("autoremove_container", True) - self._wait_for_boot: bool = kwargs.get("wait_for_boot", True) - self._max_memory_size: str = kwargs.get("max_memory", "4G") - self._initial_heap_size: str = kwargs.get("initial_memory", "2G") - self.max_retries: int = kwargs.get("max_retries", 20) + graph: Graph = kwargs.get("graph", None) self._entity_labels = kwargs.get("entity_labels", _DEFAULT_ENTITY_LABELS) - graph: nx.Graph = kwargs.get("graph", None) - import_directory: Optional[str] = kwargs.get("import_directory", None) - - self._created_container = False - self._tamarind_provisioner = None - - if ( - (db_bolt_uri and graph) - or (db_bolt_uri and import_directory) - or (import_directory and graph) - ): - raise ValueError( - "Specify EXACTLY ONE of db_bolt_uri/graph/import_directory." - ) - - if db_bolt_uri: + if db_bolt_uri and username and password: # Authentication information was provided. Use this to log in and # connect to the existing database. self._connect_to_existing_graph(db_bolt_uri, username, password) - - elif graph: - export_dir = "export-custom-graph" - # A networkx graph was provided. - # We must export this to a set of CSV files, drop them to disk, - # and then we can use the same strategy as `import_directory` to - # run a container. - nxi = NetworkXIngester(graph, export_dir) - try: - nxi.ingest() - except Exception as e: - raise ValueError(f"Could not export graph: {e}") - - self._tamarind_provisioner = tamarind.Neo4jDockerProvisioner( - autoremove_containers=self._autoremove_container, - max_memory_size=self._max_memory_size, - initial_heap_size=self._initial_heap_size, - ) - self._create_container(export_dir) - - elif import_directory: - self._tamarind_provisioner = tamarind.Neo4jDockerProvisioner( - autoremove_containers=self._autoremove_container, - max_memory_size=self._max_memory_size, - initial_heap_size=self._initial_heap_size, - ) - self._create_container(import_directory) - + elif graph and isinstance(graph, Graph): + self.G = graph else: raise ValueError( - "You must supply either an existing db or a graph to load." + "You must provide either (db_bolt_uri and username and password) " + "or `graph` (a py2neo.Graph object)." ) - def __del__(self): - """ - Destroy the docker container from the running processes. - - Also will handle (TODO) other teardown actions. - """ - if self._created_container: - self._teardown_container() - def _connect_to_existing_graph( self, db_bolt_uri: str, username: str, password: str ) -> None: try: self.G = Graph(db_bolt_uri, username=username, password=password) - except: - raise ValueError(f"Could not connect to graph {db_bolt_uri}.") - - def _create_container(self, import_dir: str): - # Create a docker container: - - _run_before = ( - f"""./bin/neo4j-admin import --id-type STRING --nodes={self._entity_labels['node']}=""" - + f""""/import/export-neurons-.*.csv" --relationships={self._entity_labels['edge']['DEFAULT']}=""" - + """"/import/export-synapses-.*.csv" """ - ) - - self._tamarind_container_id = str(uuid4()) - ( - self._running_container, - self._container_port, - ) = self._tamarind_provisioner.start( - self._tamarind_container_id, - import_path=f"{os.getcwd()}/{import_dir}", - run_before=_run_before, - wait=self._wait_for_boot, - wait_attempt_limit=self.max_retries, - ) - self._created_container = True - container_is_ready = False - tries = 0 - while not container_is_ready: - try: - self.G = self._tamarind_provisioner[self._tamarind_container_id] - container_is_ready = True - except Exception as e: - tries += 1 - if tries > self.max_retries: - raise IOError( - f"Could not connect to neo4j container {self._running_container}. " - "For more information, see https://github.com/aplbrain/dotmotif/wiki/Troubleshooting-Neo4jExecutor." - ) - time.sleep(3) - self.G = self._tamarind_provisioner[self._tamarind_container_id] - - def _teardown_container(self): - self._tamarind_provisioner.stop(self._tamarind_container_id) + except Exception as e: + raise ValueError(f"Could not connect to graph {db_bolt_uri}.") from e def create_index(self, attribute_name: str): """ diff --git a/poetry.lock b/poetry.lock index c310eab..d4d3f93 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "asciitree" @@ -63,7 +63,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, @@ -179,27 +179,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "docker" -version = "6.1.2" -description = "A Python library for the Docker Engine API." -optional = true -python-versions = ">=3.7" -files = [ - {file = "docker-6.1.2-py3-none-any.whl", hash = "sha256:134cd828f84543cbf8e594ff81ca90c38288df3c0a559794c12f2e4b634ea19e"}, - {file = "docker-6.1.2.tar.gz", hash = "sha256:dcc088adc2ec4e7cfc594e275d8bd2c9738c56c808de97476939ef67db5af8c2"}, -] - -[package.dependencies] -packaging = ">=14.0" -pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} -requests = ">=2.26.0" -urllib3 = ">=1.26.0" -websocket-client = ">=0.32.0" - -[package.extras] -ssh = ["paramiko (>=2.4.3)"] - [[package]] name = "grandiso" version = "2.1.1" @@ -225,7 +204,7 @@ files = [ name = "interchange" version = "2021.0.4" description = "Data types and interchange formats" -optional = true +optional = false python-versions = "*" files = [ {file = "interchange-2021.0.4-py2.py3-none-any.whl", hash = "sha256:3a791b4df765c7136c318e53c388380ac5b003767808876c4fb4fef393917768"}, @@ -256,7 +235,7 @@ regex = ["regex"] name = "monotonic" version = "1.6" description = "An implementation of time.monotonic() for Python 2 & < 3.3" -optional = true +optional = false python-versions = "*" files = [ {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, @@ -431,7 +410,7 @@ xml = ["lxml (>=4.6.3)"] name = "pansi" version = "2020.7.3" description = "ANSI escape code library for Python" -optional = true +optional = false python-versions = "*" files = [ {file = "pansi-2020.7.3-py2.py3-none-any.whl", hash = "sha256:ce2b8acaf06dc59dcc711f61efbe53c836877f127d73f11fdd898b994e5c4234"}, @@ -469,13 +448,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- [[package]] name = "py2neo" -version = "2021.2.3" +version = "2021.2.4" description = "Python client library and toolkit for Neo4j" -optional = true +optional = false python-versions = "*" files = [ - {file = "py2neo-2021.2.3-py2.py3-none-any.whl", hash = "sha256:39c55614acdc00cbd4b2c9a43a2c7d1af81b92684da390f5501d844162a52173"}, - {file = "py2neo-2021.2.3.tar.gz", hash = "sha256:92bed6c03ed9c1068085c18e1f5af9313ddb25e9e441525cd856839a34c1e31a"}, + {file = "py2neo-2021.2.4-py2.py3-none-any.whl", hash = "sha256:2ddbe818354a6fa16d47dfd0fe5cb0287fa42ff109e87aa7b3e43636060d85a1"}, + {file = "py2neo-2021.2.4.tar.gz", hash = "sha256:4b2737fcd9fd8d82b57e856de4eda005281c9cf0741c989e5252678f0503f77e"}, ] [package.dependencies] @@ -492,7 +471,7 @@ urllib3 = "*" name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"}, @@ -527,29 +506,6 @@ files = [ {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] -[[package]] -name = "pywin32" -version = "306" -description = "Python for Window Extensions" -optional = true -python-versions = "*" -files = [ - {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, - {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, - {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, - {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, - {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, - {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, - {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, - {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, - {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, - {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, - {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, - {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, - {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, - {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, -] - [[package]] name = "requests" version = "2.31.0" @@ -646,23 +602,6 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -[[package]] -name = "tamarind" -version = "0.2.1" -description = "More Neo4j than you can shake a cat at" -optional = true -python-versions = ">=3.9.0" -files = [ - {file = "tamarind-0.2.1.tar.gz", hash = "sha256:f4a0085ddf1454f274e7e0a5665ae77b98ce98319df03589fc6153de191ed786"}, -] - -[package.dependencies] -docker = "*" -py2neo = "*" - -[package.extras] -dev = ["pylint", "pytest"] - [[package]] name = "tomli" version = "2.0.1" @@ -807,22 +746,6 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17. socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] -[[package]] -name = "websocket-client" -version = "1.5.1" -description = "WebSocket client for Python with low level API options" -optional = true -python-versions = ">=3.7" -files = [ - {file = "websocket-client-1.5.1.tar.gz", hash = "sha256:3f09e6d8230892547132177f575a4e3e73cfdf06526e20cc02aa1c3b47184d40"}, - {file = "websocket_client-1.5.1-py3-none-any.whl", hash = "sha256:cdf5877568b7e83aa7cf2244ab56a3213de587bbe0ce9d8b9600fc77b455d89e"}, -] - -[package.extras] -docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"] -optional = ["python-socks", "wsaccel"] -test = ["websockets"] - [extras] neo4j = ["py2neo", "tamarind"] neuprint = ["neuprint-python", "py2neo"] @@ -830,4 +753,4 @@ neuprint = ["neuprint-python", "py2neo"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3844a18f704442dc8e21db0def70a825c9ecf001c808574d1471165daedcea01" +content-hash = "f3ec5952ceed5c46b76c745700b68cff95d470ec5e5732e248274d0416a5c1b6" diff --git a/pyproject.toml b/pyproject.toml index 6066ebc..607d03e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,8 @@ lark-parser = "^0.12.0" pandas = "^2.0.1" grandiso = "^2.1.1" networkx = "^3.1" -tamarind = {version = "^0.2.1", optional = true} -py2neo = {version = "^2021.2.3", optional = true} neuprint-python = {version = "^0.4.25", optional = true} +py2neo = { version = "^2021.2.4", optional = true } [tool.poetry.group.dev.dependencies] ruff = "^0.0.267" @@ -28,8 +27,6 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry.extras] -neo4j = ["tamarind", "py2neo"] +neo4j = ["py2neo"] neuprint = ["neuprint-python", "py2neo"] - - diff --git a/requirements.txt b/requirements.txt index be2243b..e99920b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,7 @@ networkx>=2.4 numpy lark-parser -docker pandas py2neo -tamarind>=0.2.0 neuprint-python grandiso>=2.1.0 diff --git a/setup.py b/setup.py index 0b010a0..77d4f5e 100644 --- a/setup.py +++ b/setup.py @@ -35,15 +35,14 @@ "numpy", "lark-parser", "pandas", - "grandiso>=2.0.0", + "grandiso>=2.1.0", ], extras_require={ "neo4j": [ - "docker", - "tamarind>=0.1.5", "py2neo", ], "neuprint": [ + "py2neo", "neuprint-python", ], }, From 2469069dab85702be49086944c5b5d8585acb8c3 Mon Sep 17 00:00:00 2001 From: Jordan Matelsky Date: Wed, 21 Aug 2024 10:15:22 -0400 Subject: [PATCH 2/2] tests: Remove deprecated test --- dotmotif/executors/test_neo4jexecutor.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dotmotif/executors/test_neo4jexecutor.py b/dotmotif/executors/test_neo4jexecutor.py index b552ba8..51e98a6 100644 --- a/dotmotif/executors/test_neo4jexecutor.py +++ b/dotmotif/executors/test_neo4jexecutor.py @@ -4,14 +4,6 @@ import networkx as nx -class TestNeo4jExecutor(unittest.TestCase): - def test_numerical_ids_fail_with_message_github_26(self): - g = nx.DiGraph() - g.add_edge(1, 2) - with self.assertRaisesRegex(Exception, "numerical IDs"): - Neo4jExecutor(graph=g) - - class TestNeo4jExecutor_Automorphisms(unittest.TestCase): def test_basic_node_attr(self): exp = """\