From 22713342d5e3f09c4c0e279a9dbfd6f134ccb186 Mon Sep 17 00:00:00 2001 From: Vicente Calisto <37163344+VicenteVicente@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:52:30 -0300 Subject: [PATCH] Feat/walker querying (#6) * feat: get node ids by label * feat: updated example + get edge ids --- examples/graph_walker.ipynb | 115 ++++++++++++++++++++++----- src/pymilldb/graph.py | 151 +++++++++++++++++++----------------- src/pymilldb/protocol.py | 2 + 3 files changed, 176 insertions(+), 92 deletions(-) diff --git a/examples/graph_walker.ipynb b/examples/graph_walker.ipynb index 6cb9e93..6362db6 100644 --- a/examples/graph_walker.ipynb +++ b/examples/graph_walker.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 30, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -22,14 +22,14 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 129, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "I will use the node id: \"2328697699386406400\"\n" + "I will use the node id: \"2328705391622959104\"\n" ] } ], @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 130, "metadata": {}, "outputs": [ { @@ -51,11 +51,11 @@ "output_type": "stream", "text": [ "Node:\n", - "WalkerNode(node_id=2328697699386406400, name=Q28926, num_labels=3, num_properties=5)\n", + "WalkerNode(node_id=2328705391622959104, name=Q97690, num_labels=2, num_properties=3)\n", "Labels:\n", - "['buns', 'guava', 'soup']\n", + "['arisen', 'senatorial']\n", "Properties:\n", - "{'Tunney': False, 'antipodess': 8799.3271484375, 'snowplowing': 'stickups', 'departmentalizing': 8259.69921875, 'submergence': 479}\n" + "{'monosyllable': 7546.28076171875, 'pleasantry': 5804.23095703125, 'rubberized': 1730}\n" ] } ], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 131, "metadata": {}, "outputs": [ { @@ -80,16 +80,16 @@ "output_type": "stream", "text": [ "Outgoing edges:\n", - " ->2328697677811036416 :tentacles {'kabob': True}\n", - " ->2328698773095055360 :orbital {'passing': 182.10508728027344, 'emblazoning': 6269, 'penguins': 'Asmara'}\n", - " ->2328699876884428544 :extrication {'whereon': True, 'jiujitsu': 'scanners'}\n", - " ->2328699876884558080 :uniforms {'hungers': 9209.4384765625, 'persona': False, 'parallelogram': 9294.974609375}\n", - " ->2328699885441069312 :ghostwritten {'prayer': 4890, 'hampered': False}\n", - " ->2328702101727739904 :transformation {'cloudier': 'debacles', 'pleasantry': True, 'victimized': 2692}\n", + " ->2328696565465035008 :buttocks {'Mithra': 8458, 'gauntness': 2347}\n", + " ->2328702093121238784 :immigrated {'ode': True, 'burrowed': 1800.509033203125, 'submergence': 1984.509521484375}\n", "Incoming edges:\n", - " <-2328698803059110144 :raccoons {'ranched': 6923.93310546875, 'embryonic': 'thundershowers', 'Midwests': 8196, 'correctnesss': 6617, 'Davidson': 6676.39697265625, 'Tippecanoe': 'framework'}\n", - " <-2328704279125766912 :infrareds {'disjointedly': 1743.763916015625, 'practicable': 7463, 'hampered': 7928, 'juxtaposes': 3367, 'Zworykins': 7019, 'Rosicrucian': 6589.841796875}\n", - " <-2328705365903161344 :ghostwritten {'barricades': 4384.06884765625, 'parliamentarians': 6381.96044921875}\n" + " <-2328700980691415296 :arisen {'grey': 'Troys', 'policy': 'impairment', 'jiujitsu': 4926, 'scientists': 5640}\n", + " <-2328702093054456832 :daubed {'anxiously': 'wreath', 'vagrancys': 432, 'aftermath': 'outdoorss'}\n", + " <-2328705361574835200 :jaded {'racisms': 7671, 'sarong': 6811, 'hampered': 'coronet'}\n", + " <-2328704270653076224 :movers {'carpels': True, 'cortege': 7145.32666015625, 'gayness': 5296, 'trickerys': 2071.525390625, 'prohibitions': True}\n", + " <-2328699885373764608 :shlep {'deicers': True, 'douched': False, 'during': True, 'brooding': 'shifty'}\n", + " <-2328700963561682432 :hourglasss {'Arthur': 'Kristines', 'thousandths': 8442.953125, 'trifecta': 941.1663208007812, 'inamoratas': False}\n", + " <-2328696574071551744 :complexion {'Roche': False, 'don': 'smoothies', 'gushier': 3158, 'xenons': True, 'confounds': 'payable', 'juicinesss': 2487.49658203125}\n" ] } ], @@ -106,7 +106,84 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 1973 node_ids with label \"arisen\"\n", + "A few of them:\n", + " node_id=2328696565363978752\n", + " node_id=2328696565364111104\n", + " node_id=2328696565364240384\n", + " node_id=2328696565364307456\n", + " node_id=2328696565377531904\n" + ] + } + ], + "source": [ + "node_label = node.labels[0]\n", + "node_ids_with_label = gw.get_node_ids_by_label(label=node_label)\n", + "print(f\"Found {len(node_ids_with_label)} node_ids with label \\\"{node_label}\\\"\")\n", + "print(\"A few of them:\")\n", + "for i in range(min(5, len(node_ids_with_label))):\n", + " print(f\" node_id={node_ids_with_label[i]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 4821 edge_ids with type \"buttocks\"\n", + "A few of them:\n", + " edge_id=9223372036855016781\n", + " edge_id=9223372036855180955\n", + " edge_id=9223372036854787005\n", + " edge_id=9223372036855037123\n", + " edge_id=9223372036855251952\n", + "Found 1 edge_ids with type \"buttocks\" outgoing from \"2328705391622959104\"\n", + "A few of them:\n", + " edge_id=9223372036855269909\n", + "Found 2 edge_ids with type \"buttocks\" incoming from \"2328696565465035008\"\n", + "A few of them:\n", + " edge_id=9223372036855183349\n", + " edge_id=9223372036855269909\n" + ] + } + ], + "source": [ + "edge_type = edges_out[0].edge_type\n", + "edge_ids_with_type = gw.get_edge_ids_by_type(edge_type)\n", + "print(f'Found {len(edge_ids_with_type)} edge_ids with type \"{edge_type}\"')\n", + "print(\"A few of them:\")\n", + "for i in range(min(5, len(edge_ids_with_type))):\n", + " print(f\" edge_id={edge_ids_with_type[i]}\")\n", + "\n", + "source_id = edges_out[0].source\n", + "edge_ids_with_type_and_source = gw.get_edge_ids_by_type(edge_type=edge_type, node_id=source_id, direction=\"outgoing\")\n", + "print(f'Found {len(edge_ids_with_type_and_source)} edge_ids with type \"{edge_type}\" outgoing from \"{source_id}\"')\n", + "print(\"A few of them:\")\n", + "for i in range(min(5, len(edge_ids_with_type_and_source))):\n", + " print(f\" edge_id={edge_ids_with_type_and_source[i]}\")\n", + "\n", + "target_id = edges_out[0].target\n", + "edge_ids_with_type_and_target = gw.get_edge_ids_by_type(edge_type=edge_type, node_id=target_id, direction=\"incoming\")\n", + "print(f'Found {len(edge_ids_with_type_and_target)} edge_ids with type \"{edge_type}\" incoming from \"{target_id}\"')\n", + "print(\"A few of them:\")\n", + "for i in range(min(5, len(edge_ids_with_type_and_target))):\n", + " print(f\" edge_id={edge_ids_with_type_and_target[i]}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ diff --git a/src/pymilldb/graph.py b/src/pymilldb/graph.py index dad8ee9..8429dee 100644 --- a/src/pymilldb/graph.py +++ b/src/pymilldb/graph.py @@ -22,9 +22,7 @@ def dump_properties_milldb(properties: PropertiesDict) -> str: elif value_type == bool: ret += f" {k}:{str(v).lower()}" else: - print( - f'Skipping property with type "{value_type}". Only str, int, float and bool are supported.' - ) + print(f'Skipping property with type "{value_type}". Only str, int, float and bool are supported.') return ret @@ -176,36 +174,7 @@ def __init__(self, client: "MDBClient"): ## Client instance self.client = client - ## Describe a node by its identifier or name - def get_node(self, node_id: int | str) -> WalkerNode: - # Send request - msg = b"" - if isinstance(node_id, int): - msg += packer.pack_bool(True) - msg += packer.pack_uint64(node_id) - elif isinstance(node_id, str): - msg += packer.pack_bool(False) - msg += packer.pack_string(node_id) - else: - raise TypeError(f"node_id must be int or str, got {type(node_id)}") - self.client._send(RequestType.GRAPH_WALKER_GET_NODE, msg) - - # Handle response - data, _ = self.client._recv() - # Name - lo, hi = 0, data.index(b"\x00") - name = packer.unpack_string(data, lo, hi) - hi += 1 - # Labels - lo, hi = hi, hi + 8 - num_labels = packer.unpack_uint64(data, lo, hi) - labels = list() - for _ in range(num_labels): - lo, hi = hi, data.index(b"\x00", hi) - label = packer.unpack_string(data, lo, hi) - hi += 1 - labels.append(label) - # Properties + def _unpack_properties(self, data: bytes, hi: int) -> PropertiesDict: lo, hi = hi, hi + 8 num_properties = packer.unpack_uint64(data, lo, hi) properties = dict() @@ -237,17 +206,84 @@ def get_node(self, node_id: int | str) -> WalkerNode: else: raise ValueError(f"Invalid property value type code: {value_type_code}") properties[key] = value - return WalkerNode( - node_id=node_id, name=name, labels=labels, properties=properties - ) + return properties, lo, hi + + ## Describe a node by its identifier or name + def get_node(self, node_id: int | str) -> WalkerNode: + # Send request + msg = b"" + if isinstance(node_id, int): + msg += packer.pack_bool(True) + msg += packer.pack_uint64(node_id) + elif isinstance(node_id, str): + msg += packer.pack_bool(False) + msg += packer.pack_string(node_id) + else: + raise TypeError(f"node_id must be int or str, got {type(node_id)}") + self.client._send(RequestType.GRAPH_WALKER_GET_NODE, msg) + + # Handle response + data, _ = self.client._recv() + # Name + lo, hi = 0, data.index(b"\x00") + name = packer.unpack_string(data, lo, hi) + hi += 1 + # Labels + lo, hi = hi, hi + 8 + num_labels = packer.unpack_uint64(data, lo, hi) + labels = list() + for _ in range(num_labels): + lo, hi = hi, data.index(b"\x00", hi) + label = packer.unpack_string(data, lo, hi) + hi += 1 + labels.append(label) + # Properties + properties, _, _ = self._unpack_properties(data, hi) + return WalkerNode(node_id=node_id, name=name, labels=labels, properties=properties) + + ## Get all node_ids with a given label + def get_node_ids_by_label(self, label: str) -> List[int]: + # Send request + msg = b"" + msg += packer.pack_string(label) + self.client._send(RequestType.GRAPH_WALKER_GET_NODE_IDS_BY_LABEL, msg) + + # Handle response + data, _ = self.client._recv() + return packer.unpack_uint64_vector(data, 0, len(data)) + + ## Get all edge_ids with a given type. Optionally filter it by a node_id and its direction + def get_edge_ids_by_type( + self, edge_type: str, node_id: int = None, direction: Literal["outgoing", "incoming"] = None + ) -> List[int]: + # Send request + msg = b"" + msg += packer.pack_string(edge_type) + if node_id is None: + msg += packer.pack_bool(False) + else: + if direction not in ["outgoing", "incoming"]: + raise ValueError('Direction must be either "outgoing" or "incoming".') + msg += packer.pack_bool(True) + if isinstance(node_id, int): + msg += packer.pack_bool(True) + msg += packer.pack_uint64(node_id) + elif isinstance(node_id, str): + msg += packer.pack_bool(False) + msg += packer.pack_string(node_id) + else: + raise TypeError(f"node_id must be int or str, got {type(node_id)}") + msg += packer.pack_bool(direction == "outgoing") + self.client._send(RequestType.GRAPH_WALKER_GET_EDGE_IDS_BY_TYPE, msg) + + # Handle response + data, _ = self.client._recv() + return packer.unpack_uint64_vector(data, 0, len(data)) ## Get all outgoing or incoming edges from a node by its identifier or name - def get_edges( - self, node_id: int, direction: Literal["outgoing", "incoming"] = "outgoing" - ) -> List[WalkerNode]: + def get_edges(self, node_id: int | str, direction: Literal["outgoing", "incoming"]) -> List[WalkerNode]: if direction not in ["outgoing", "incoming"]: raise ValueError('Direction must be either "outgoing" or "incoming".') - # Send request msg = b"" if isinstance(node_id, int): @@ -258,6 +294,7 @@ def get_edges( msg += packer.pack_string(node_id) else: raise TypeError(f"node_id must be int or str, got {type(node_id)}") + msg += packer.pack_bool(direction == "outgoing") self.client._send(RequestType.GRAPH_WALKER_GET_EDGES, msg) # Handle response @@ -275,39 +312,7 @@ def get_edges( lo, hi = hi, data.index(b"\x00", hi) edge_type = packer.unpack_string(data, lo, hi) hi += 1 - lo, hi = hi, hi + 8 - num_properties = packer.unpack_uint64(data, lo, hi) - properties = dict() - for _ in range(num_properties): - # Key - lo, hi = hi, data.index(b"\x00", hi) - key = packer.unpack_string(data, lo, hi) - hi += 1 - # Value - value_type_code = data[hi] - hi += 1 - if value_type_code == 1: - # bool - value = packer.unpack_bool(data, hi) - hi += 1 - elif value_type_code == 2: - # int64 - lo, hi = hi, hi + 8 - value = packer.unpack_int64(data, lo, hi) - elif value_type_code == 3: - # float - lo, hi = hi, hi + 4 - value = packer.unpack_float(data, lo, hi) - elif value_type_code == 4: - # string - lo, hi = hi, data.index(b"\x00", hi) - value = packer.unpack_string(data, lo, hi) - hi += 1 - else: - raise ValueError( - f"Invalid property value type code: {value_type_code}" - ) - properties[key] = value + properties, lo, hi = self._unpack_properties(data, hi) edges.append( WalkerEdge( source=source, diff --git a/src/pymilldb/protocol.py b/src/pymilldb/protocol.py index 11c5c7a..860a866 100644 --- a/src/pymilldb/protocol.py +++ b/src/pymilldb/protocol.py @@ -33,6 +33,8 @@ class RequestType(IntEnum): # GRAPH EXPLORER GRAPH_WALKER_GET_EDGES = 0b0001_0001 GRAPH_WALKER_GET_NODE = 0b0001_0010 + GRAPH_WALKER_GET_NODE_IDS_BY_LABEL = 0b0001_0011 + GRAPH_WALKER_GET_EDGE_IDS_BY_TYPE = 0b0001_0100 ## Server response status codes.