diff --git a/CHANGELOG.md b/CHANGELOG.md index 49e0b734..ea4aa471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,16 @@ # Changelog -## [1.0](https://github.com/rapidsai/gQuant/tree/1.0) (2020-12-17) +## [v1.0.1](https://github.com/rapidsai/gQuant/tree/v1.0.1) (2021-01-20) -[Full Changelog](https://github.com/rapidsai/gQuant/compare/0.5...1.0) +[Full Changelog](https://github.com/rapidsai/gQuant/compare/v1.0.0...v1.0.1) + +**Merged pull requests:** + +- \[REVIEW\] Simple external plugin example [\#113](https://github.com/rapidsai/gQuant/pull/113) ([yidong72](https://github.com/yidong72)) + +## [v1.0.0](https://github.com/rapidsai/gQuant/tree/v1.0.0) (2020-12-30) + +[Full Changelog](https://github.com/rapidsai/gQuant/compare/0.5...v1.0.0) **Closed issues:** @@ -15,6 +23,7 @@ - \[REVIEW\]gQuant plugin implementation [\#112](https://github.com/rapidsai/gQuant/pull/112) ([yidong72](https://github.com/yidong72)) - Gpuciscripts clean and update [\#111](https://github.com/rapidsai/gQuant/pull/111) ([msadang](https://github.com/msadang)) +- \[REVIEW\] gQuant 1.0 [\#110](https://github.com/rapidsai/gQuant/pull/110) ([yidong72](https://github.com/yidong72)) - Streamz gQuant example 2 [\#109](https://github.com/rapidsai/gQuant/pull/109) ([yidong72](https://github.com/yidong72)) - Revert "Streamz gQuant example" [\#108](https://github.com/rapidsai/gQuant/pull/108) ([yidong72](https://github.com/yidong72)) - Streamz gQuant example [\#107](https://github.com/rapidsai/gQuant/pull/107) ([yidong72](https://github.com/yidong72)) @@ -22,6 +31,7 @@ - Nemo and xgboost integration [\#103](https://github.com/rapidsai/gQuant/pull/103) ([yidong72](https://github.com/yidong72)) - FIX Update change log check [\#102](https://github.com/rapidsai/gQuant/pull/102) ([mike-wendt](https://github.com/mike-wendt)) - \[REVIEW\] Update CI scripts to remove references to master \[skip ci\] [\#99](https://github.com/rapidsai/gQuant/pull/99) ([dillon-cullinan](https://github.com/dillon-cullinan)) +- \[skip ci\] Update master references for main branch [\#98](https://github.com/rapidsai/gQuant/pull/98) ([ajschmidt8](https://github.com/ajschmidt8)) - \[REVIEW\]gQuant UI, first version [\#89](https://github.com/rapidsai/gQuant/pull/89) ([yidong72](https://github.com/yidong72)) ## [0.5](https://github.com/rapidsai/gQuant/tree/0.5) (2020-07-10) diff --git a/README.md b/README.md index 37cac9ed..09566519 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,10 @@ To install JupyterLab plugin, install the following dependence libraries: ```bash conda install nodejs ipywidgets ``` +Build the ipywidgets Jupyterlab plugin +```bash +jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 +``` Then install the gquantlab lib: ```bash pip install gquantlab==0.1.1 diff --git a/docker/build.sh b/docker/build.sh index 941779e2..39980f60 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -39,7 +39,7 @@ echo -e "\nPlease, select your CUDA version:\n" \ read -p "Enter your option and hit return [1]-3: " CUDA_VERSION -RAPIDS_VERSION="0.14.1" +RAPIDS_VERSION="0.17.0" CUDA_VERSION=${CUDA_VERSION:-1} case $CUDA_VERSION in @@ -158,7 +158,7 @@ RUN conda install -y -c conda-forge jupyterlab'<3.0.0' RUN conda install -y -c conda-forge python-graphviz bqplot nodejs ipywidgets \ pytables mkl numexpr pydot flask pylint flake8 autopep8 -RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager --no-build +RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 --no-build RUN jupyter labextension install bqplot --no-build #RUN jupyter labextension install jupyterlab-nvdashboard --no-build RUN jupyter lab build && jupyter lab clean @@ -169,7 +169,7 @@ RUN pip install jupyterlab-nvdashboard RUN jupyter labextension install jupyterlab-nvdashboard ## install the dask extension -RUN pip install dask_labextension +RUN pip install "dask_labextension<5.0.0" RUN jupyter labextension install dask-labextension RUN jupyter serverextension enable dask_labextension @@ -289,9 +289,10 @@ index 901a79af..4eb76f95 100644 @@ -14,4 +14,4 @@ unidecode webdataset kaldi-python-io - librosa<=0.7.2 +-librosa<=0.7.2 ++librosa<=0.8.0 -numba<=0.48 -+numba==0.49.1 ++numba==0.52.0 diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index 885adf3e..0e4e44e2 100644 --- a/requirements/requirements_nlp.txt diff --git a/external/README.md b/external/README.md new file mode 100644 index 00000000..50784a72 --- /dev/null +++ b/external/README.md @@ -0,0 +1,51 @@ +## Simple External Plugin Example + +This is a simple example to show how to write an external gQuant plugin. gQuant take advantage of the `entry point` inside the `setup.py` file to register the plugin. gQuant can discover all the plugins that has the entry point group name `gquant.plugin`. Check the `setup.py` file to see details. + +### Create an new Python enviroment +```bash +conda create -n test python=3.8 +``` + +### Install the gQuant lib +To install the gQuant graph computation library, first install the dependence libraries: +```bash +pip install dask[dataframe] distributed networkx +conda install python-graphviz ruamel.yaml numpy pandas +``` +Then install gquant lib: +```bash +pip install gquant +``` + +### Install the gQuantlab plugin +To install JupyterLab plugin, install the following dependence libraries: +```bash +conda install nodejs ipywidgets +``` +Build the ipywidgets Jupyterlab plugin +```bash +jupyter labextension install @jupyter-widgets/jupyterlab-manager@2.0 +``` +Then install the gquantlab lib: +```bash +pip install gquantlab +``` +If you launch the JupyterLab, it will prompt to build the new plugin. You can +explicitly build it by: +```bash +jupyter lab build +``` + +### Install the external example plugin +To install the external plugin, in the plugin diretory, run following command +```bash +pip install . +``` + +### Launch the Jupyter lab +After launching the JupyterLab by, +```bash +jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' +``` +You can see the `DistanceNode` and `PointNode` under the name `custom_node` in the menu. diff --git a/external/example/__init__.py b/external/example/__init__.py new file mode 100644 index 00000000..532b5555 --- /dev/null +++ b/external/example/__init__.py @@ -0,0 +1,77 @@ +from .distanceNode import DistanceNode +from .pointNode import PointNode +import pandas as pd +import numpy as np +from .client import validation, display # noqa: F40 +from gquant.dataframe_flow._node_flow import register_validator +from gquant.dataframe_flow._node_flow import register_copy_function + + +def _validate_df(df_to_val, ref_cols, obj): + '''Validate a pandas DataFrame. + + :param df_to_val: A dataframe typically of type pd.DataFrame + :param ref_cols: Dictionary of column names and their expected types. + :returns: True or False based on matching all columns in the df_to_val + and columns spec in ref_cols. + :raises: Exception - Raised when invalid dataframe length or unexpected + number of columns. TODO: Create a ValidationError subclass. + + ''' + if (isinstance(df_to_val, pd.DataFrame) and len(df_to_val) == 0): + err_msg = 'Node "{}" produced empty output'.format(obj.uid) + raise Exception(err_msg) + + if not isinstance(df_to_val, pd.DataFrame): + return True + + i_cols = df_to_val.columns + if len(i_cols) != len(ref_cols): + print("expect %d columns, only see %d columns" + % (len(ref_cols), len(i_cols))) + print("ref:", ref_cols) + print("columns", i_cols) + raise Exception("not valid for node %s" % (obj.uid)) + + for col in ref_cols.keys(): + if col not in i_cols: + print("error for node %s, column %s is not in the required " + "output df" % (obj.uid, col)) + return False + + if ref_cols[col] is None: + continue + + err_msg = "for node {} type {}, column {} type {} "\ + "does not match expected type {}".format( + obj.uid, type(obj), col, df_to_val[col].dtype, + ref_cols[col]) + + if ref_cols[col] == 'category': + # comparing pandas.core.dtypes.dtypes.CategoricalDtype to + # numpy.dtype causes TypeError. Instead, let's compare + # after converting all types to their string representation + # d_type_tuple = (pd.core.dtypes.dtypes.CategoricalDtype(),) + d_type_tuple = (str(pd.CategoricalDtype()),) + elif ref_cols[col] == 'date': + # Cudf read_csv doesn't understand 'datetime64[ms]' even + # though it reads the data in as 'datetime64[ms]', but + # expects 'date' as dtype specified passed to read_csv. + d_type_tuple = ('datetime64[ms]', 'date', 'datetime64[ns]') + else: + d_type_tuple = (str(np.dtype(ref_cols[col])),) + + if (str(df_to_val[col].dtype) not in d_type_tuple): + print("ERROR: {}".format(err_msg)) + # Maybe raise an exception here and have the caller + # try/except the validation routine. + return False + return True + + +def copy_df(df_obj): + return df_obj.copy(deep=False) + + +register_validator(pd.DataFrame, _validate_df) +register_copy_function(pd.DataFrame, copy_df) \ No newline at end of file diff --git a/external/example/client.py b/external/example/client.py new file mode 100644 index 00000000..c24d98b1 --- /dev/null +++ b/external/example/client.py @@ -0,0 +1,26 @@ + +display_fun = """ + const columnKeys = Object.keys(metaObj); + let header = ''; + if (columnKeys.length > 0) { + header += ''; + header += ''; + header += ''; + for (let i = 0; i < columnKeys.length; i++) { + header += ``; + } + header += ''; + header += ''; + header += ''; + for (let i = 0; i < columnKeys.length; i++) { + header += ``; + } + header += ''; + header += '
Column Name${columnKeys[i]}
Type${metaObj[columnKeys[i]]}
'; + } + return header; +""" + +validation = {} +display = {} +display['pandas.core.frame.DataFrame'] = display_fun diff --git a/external/example/distanceNode.py b/external/example/distanceNode.py new file mode 100644 index 00000000..a9e6f5b1 --- /dev/null +++ b/external/example/distanceNode.py @@ -0,0 +1,82 @@ +import pandas as pd +import numpy as np +from gquant.dataframe_flow import Node, MetaData +from gquant.dataframe_flow import NodePorts, PortsSpecSchema +from gquant.dataframe_flow import ConfSchema + + +class DistanceNode(Node): + + def ports_setup(self): + port_type = PortsSpecSchema.port_type + input_ports = { + 'points_df_in': { + port_type: [pd.DataFrame] + } + } + + output_ports = { + 'distance_df': { + port_type: [pd.DataFrame] + }, + 'distance_abs_df': { + PortsSpecSchema.port_type: [pd.DataFrame] + } + } + input_connections = self.get_connected_inports() + if 'points_df_in' in input_connections: + types = input_connections['points_df_in'] + # connected, use the types passed in from parent + return NodePorts(inports={'points_df_in': {port_type: types}}, + outports={'distance_df': {port_type: types}, + 'distance_abs_df': {port_type: types}, + }) + else: + return NodePorts(inports=input_ports, outports=output_ports) + + def conf_schema(self): + return ConfSchema() + + def init(self): + self.delayed_process = True + + def meta_setup(self): + req_cols = { + 'x': 'float64', + 'y': 'float64' + } + required = { + 'points_df_in': req_cols, + } + input_meta = self.get_input_meta() + output_cols = ({ + 'distance_df': { + 'distance_cudf': 'float64', + 'x': 'float64', + 'y': 'float64' + }, + 'distance_abs_df': { + 'distance_abs_cudf': 'float64', + 'x': 'float64', + 'y': 'float64' + } + }) + if 'points_df_in' in input_meta: + col_from_inport = input_meta['points_df_in'] + # additional ports + output_cols['distance_df'].update(col_from_inport) + output_cols['distance_abs_df'].update(col_from_inport) + return MetaData(inports=required, outports=output_cols) + + def process(self, inputs): + df = inputs['points_df_in'] + output = {} + if self.outport_connected('distance_df'): + copy_df = df.copy() + copy_df['distance_cudf'] = np.sqrt((df['x'] ** 2 + df['y'] ** 2)) + output.update({'distance_df': copy_df}) + if self.outport_connected('distance_abs_df'): + copy_df = df.copy() + copy_df['distance_abs_cudf'] = np.abs(df['x']) + np.abs(df['y']) + output.update({'distance_abs_df': copy_df}) + return output diff --git a/external/example/pointNode.py b/external/example/pointNode.py new file mode 100644 index 00000000..692b1166 --- /dev/null +++ b/external/example/pointNode.py @@ -0,0 +1,58 @@ +import numpy as np +import pandas as pd +from gquant.dataframe_flow import Node, MetaData +from gquant.dataframe_flow import NodePorts, PortsSpecSchema +from gquant.dataframe_flow import ConfSchema + + +class PointNode(Node): + + def ports_setup(self): + input_ports = {} + output_ports = { + 'points_df_out': { + PortsSpecSchema.port_type: pd.DataFrame + } + } + return NodePorts(inports=input_ports, outports=output_ports) + + def conf_schema(self): + json = { + "title": "PointNode configure", + "type": "object", + "properties": { + "npts": { + "type": "number", + "description": "number of data points", + "minimum": 10 + } + }, + "required": ["npts"], + } + + ui = { + "npts": {"ui:widget": "updown"} + } + return ConfSchema(json=json, ui=ui) + + def init(self): + pass + + def meta_setup(self): + columns_out = { + 'points_df_out': { + 'x': 'float64', + 'y': 'float64' + }, + } + return MetaData(inports={}, outports=columns_out) + + def process(self, inputs): + npts = self.conf['npts'] + df = pd.DataFrame() + df['x'] = np.random.rand(npts) + df['y'] = np.random.rand(npts) + output = {} + if self.outport_connected('points_df_out'): + output.update({'points_df_out': df}) + return output diff --git a/external/setup.py b/external/setup.py new file mode 100644 index 00000000..5ab267f7 --- /dev/null +++ b/external/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages + +setup( + name='example_plugin', + packages=find_packages(include=['example']), + entry_points={ + 'gquant.plugin': [ + 'custom_nodes = example', + ], + } +) diff --git a/gquant/dataframe_flow/__init__.py b/gquant/dataframe_flow/__init__.py index 473fc62f..2afab34b 100644 --- a/gquant/dataframe_flow/__init__.py +++ b/gquant/dataframe_flow/__init__.py @@ -2,3 +2,16 @@ from .taskSpecSchema import * # noqa: F401,F403 from .taskGraph import * # noqa: F401,F403 from .portsSpecSchema import * # noqa: F401,F403 +import sys +try: + # For python 3.8 and later + import importlib.metadata as importlib_metadata +except ImportError: + # prior to python 3.8 need to install importlib-metadata + import importlib_metadata + +# load all the plugins from entry points +for entry_point in importlib_metadata.entry_points().get('gquant.plugin', ()): + mod = entry_point.load() + name = entry_point.name + sys.modules[name] = mod diff --git a/gquant/dataframe_flow/task.py b/gquant/dataframe_flow/task.py index e1f44b4c..a4717629 100644 --- a/gquant/dataframe_flow/task.py +++ b/gquant/dataframe_flow/task.py @@ -40,7 +40,7 @@ def get_gquant_config_modules(): if Path(gquant_cfg).is_file(): config.read(gquant_cfg) if 'ModuleFiles' not in config: - return [] + return {} modules_names = config.options('ModuleFiles') modules_list = {imod: config['ModuleFiles'][imod] for imod in modules_names} diff --git a/gquantlab/gquantlab/handlers.py b/gquantlab/gquantlab/handlers.py index 4002ad78..4aa73bbf 100644 --- a/gquantlab/gquantlab/handlers.py +++ b/gquantlab/gquantlab/handlers.py @@ -8,6 +8,12 @@ import os from gquant.dataframe_flow.taskGraph import add_module_from_base64 from gquant.dataframe_flow.task import get_gquant_config_modules, load_modules +try: + # For python 3.8 and later + import importlib.metadata as importlib_metadata +except ImportError: + # prior to python 3.8 need to install importlib-metadata + import importlib_metadata class RouteHandlerLoadGraph(APIHandler): @@ -63,6 +69,21 @@ def get(self): print(client_mod, 'no display') # else: # print(key, mod.mod, 'no client') + + # load all the plugins from entry points + for entry_point in importlib_metadata.entry_points().get( + 'gquant.plugin', ()): + client_mod = entry_point.load() + if hasattr(client_mod, 'validation'): + val_dict = getattr(client_mod, 'validation') + client_info['validation'].update(val_dict) + else: + print(client_mod, 'no validation') + if hasattr(client_mod, 'display'): + val_dict = getattr(client_mod, 'display') + client_info['display'].update(val_dict) + else: + print(client_mod, 'no display') self.finish(json.dumps(client_info)) diff --git a/gquantlab/gquantlab/server_utils.py b/gquantlab/gquantlab/server_utils.py index f083168d..72858a72 100644 --- a/gquantlab/gquantlab/server_utils.py +++ b/gquantlab/gquantlab/server_utils.py @@ -7,6 +7,12 @@ import gquant.plugin_nodes as plugin_nodes import inspect import uuid +try: + # For python 3.8 and later + import importlib.metadata as importlib_metadata +except ImportError: + # prior to python 3.8 need to install importlib-metadata + import importlib_metadata from pathlib import Path dynamic_modules = {} @@ -286,4 +292,36 @@ def add_nodes(): n = classObj(t) nodeObj = get_node_obj(n, False) node_lists.append(nodeObj) + + # load all the plugins from entry points + for entry_point in importlib_metadata.entry_points().get('gquant.plugin', + ()): + mod = entry_point.load() + modulename = entry_point.name + + for node in inspect.getmembers(mod): + nodecls = node[1] + if not inspect.isclass(nodecls): + continue + if nodecls == Node: + continue + + if not issubclass(nodecls, Node): + continue + + if nodecls in loaded_node_classes: + continue + + task = {'id': 'node_'+str(uuid.uuid4()), + 'type': node[0], + 'conf': {}, + 'inputs': [], + 'module': modulename + } + t = Task(task) + n = nodecls(t) + nodeObj = get_node_obj(n, False) + all_nodes.setdefault(modulename, []).append(nodeObj) + loaded_node_classes.append(nodecls) + return all_nodes diff --git a/setup.py b/setup.py index 525823c5..13a95c71 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( name='gquant', - version='1.0.0', + version='1.0.1', description='gquant - RAPIDS Financial Services Algorithms', long_description=long_description, long_description_content_type='text/markdown',