diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 039920e272..ba9e0c6f96 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -92,6 +92,7 @@ outputs: - pandas 1.3.* - pluggy 1.0.* - pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF + - pyarrow-hotfix # CVE-2023-47248 - python - python-confluent-kafka 1.9.2 - pytorch 2.0.1 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 884973ebf0..dd9060236f 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -1,14 +1,13 @@ # This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +# To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. channels: -- conda-forge -- huggingface - rapidsai - nvidia/label/cuda-11.8.0 - nvidia -- rapidsai-nightly - nvidia/label/dev +- huggingface - pytorch +- conda-forge - dglteam/label/cu118 dependencies: - appdirs @@ -17,6 +16,8 @@ dependencies: - boost-cpp=1.82 - boto3 - breathe=4.34.0 +- cachetools=5.0.0 +- ccache>=3.7 - clangdev=16 - click >=8 - click>=8 @@ -34,7 +35,7 @@ dependencies: - cython=0.29.24 - dask>=2023.1.1 - datacompy=0.8 -- dgl=1.0.2 +- dgl=1.1.1 - dill - dill=0.3.6 - distributed>=2023.1.1 @@ -52,6 +53,7 @@ dependencies: - include-what-you-use=0.20 - ipython - isort +- kafka-python=2.0 - langchain=0.0.190 - librdkafka=1.9.2 - libwebp>=1.3.2 @@ -65,14 +67,20 @@ dependencies: - ninja=1.10 - nlohmann_json=3.9 - nodejs=18.* +- numpy>=1.21,<1.25 - numpydoc=1.4 - nvtabular=23.06 - openai=0.28 +- pandas=1.3 - papermill=2.3.4 - pip - pluggy=1.0 +- port-for=0.7 - pre-commit - protobuf=4.21.* +- py4j=0.10 +- pyarrow * *_cuda +- pyarrow-hotfix - pybind11-stubgen=0.10 - pylint>=2.17.4,<2.18 - pypdf=3.16 @@ -94,6 +102,7 @@ dependencies: - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 +- sysroot_linux-64=2.17 - tqdm=4 - transformers=4.30.2 - tritonclient=2.26 @@ -109,7 +118,8 @@ dependencies: - grpcio-status==1.58 - milvus==2.3.2 - nemollm - - pyarrow_hotfix - pymilvus==2.3.2 - pytest-kafka==0.6.0 -name: all_cuda-118_arch-x86_64 + - rapids-dependency-file-generator +name: all_cuda-118_arch-x86_64_py-310 + diff --git a/conda/environments/dev_cuda-118_arch-x86_64.yaml b/conda/environments/dev_cuda-118_arch-x86_64.yaml index b13d2e8d64..9377ff1b48 100644 --- a/conda/environments/dev_cuda-118_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-118_arch-x86_64.yaml @@ -1,20 +1,21 @@ # This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +# To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. channels: -- conda-forge -- huggingface - rapidsai - nvidia/label/cuda-11.8.0 - nvidia -- rapidsai-nightly - nvidia/label/dev +- huggingface - pytorch +- conda-forge - dglteam/label/cu118 dependencies: - appdirs - benchmark=1.6.0 - boost-cpp=1.82 - breathe=4.34.0 +- cachetools=5.0.0 +- ccache>=3.7 - clangdev=16 - click >=8 - click>=8 @@ -43,6 +44,7 @@ dependencies: - include-what-you-use=0.20 - ipython - isort +- kafka-python=2.0 - librdkafka=1.9.2 - mlflow>=2.2.1,<3 - mrc=24.03 @@ -52,12 +54,18 @@ dependencies: - ninja=1.10 - nlohmann_json=3.9 - nodejs=18.* +- numpy>=1.21,<1.25 - numpydoc=1.4 - nvtabular=23.06 +- pandas=1.3 - pip - pluggy=1.0 +- port-for=0.7 - pre-commit - protobuf=4.21.* +- py4j=0.10 +- pyarrow * *_cuda +- pyarrow-hotfix - pybind11-stubgen=0.10 - pylint>=2.17.4,<2.18 - pytest @@ -75,6 +83,7 @@ dependencies: - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 +- sysroot_linux-64=2.17 - tqdm=4 - tritonclient=2.26 - typing_utils=0.1 @@ -85,7 +94,8 @@ dependencies: - pip: - databricks-connect - milvus==2.3.2 - - pyarrow_hotfix - pymilvus==2.3.2 - pytest-kafka==0.6.0 -name: dev_cuda-118_arch-x86_64 + - rapids-dependency-file-generator +name: dev_cuda-118_arch-x86_64_py-310 + diff --git a/dependencies.yaml b/dependencies.yaml index 6b31678d78..172be0ded1 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -107,14 +107,13 @@ files: channels: - - conda-forge - - huggingface - rapidsai - nvidia/label/cuda-11.8.0 - nvidia - - rapidsai-nightly - nvidia/label/dev + - huggingface - pytorch + - conda-forge - dglteam/label/cu118 dependencies: @@ -126,6 +125,8 @@ dependencies: - mrc=24.03 # should this be in build, or somewhere else? - gcc_linux-64=11.2 - gxx_linux-64=11.2 + - cachetools=5.0.0 + - ccache>=3.7 - cmake=3.25 - boost-cpp=1.82 - cuda-nvcc @@ -135,10 +136,14 @@ dependencies: - librdkafka=1.9.2 - ninja=1.10 - nlohmann_json=3.9 + - pandas=1.3 - protobuf=4.21.* + - pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF + - pyarrow-hotfix - pybind11-stubgen=0.10 - rapidjson=1.1.0 - scikit-build=0.17.1 + - sysroot_linux-64=2.17 - tritonclient=2.26 # Required by NvTabular, force the version, so we get protobufs compatible with 4.21 - ucx=1.14 @@ -164,6 +169,9 @@ dependencies: - isort - pylint>=2.17.4,<2.18 # 2.17.4 contains a fix for toml support - yapf=0.40.1 + - pip + - pip: + - rapids-dependency-file-generator docs: common: @@ -196,7 +204,7 @@ dependencies: - boto3 - cuml=23.06 - dask>=2023.1.1 - - dgl=1.0.2 + - dgl=1.1.1 - dill=0.3.6 - distributed>=2023.1.1 - huggingface_hub=0.10.1 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 @@ -238,8 +246,12 @@ dependencies: - grpcio - mlflow>=2.2.1,<3 - nb_conda_kernels + # Avoids numerous warnings triggered by cudf, recent versions of cudf impost this version requirement + - numpy>=1.21,<1.25 - numpydoc=1.4 - nvtabular=23.06 + - pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF + - pyarrow-hotfix - python-confluent-kafka=1.9.2 - python-graphviz - pytorch-cuda @@ -251,11 +263,17 @@ dependencies: - typing_utils=0.1 - watchdog=2.1 - websockets + + ####### Pip Transitive Dependencies (keep sorted!) ####### + # These are dependencies that are available on conda, but are required by the pip packages listed below. Its much + # better to install them with conda than pip to allow for better dependency resolution. + - kafka-python=2.0 + - port-for=0.7 + - py4j=0.10 - pip - pip: - databricks-connect - milvus==2.3.2 - - pyarrow_hotfix - pymilvus==2.3.2 test_python_morpheus: diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 5ee09141b0..3d6b029bbd 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -74,6 +74,8 @@ dependencies: - ninja=1.10 - nodejs=18.* - numba>=0.56.2 + # Avoids numerous warnings triggered by cudf, recent versions of cudf impost this version + - numpy>=1.21,<1.25 - numpydoc=1.4 - nvtabular=23.06 - pandas=1.3 @@ -82,6 +84,7 @@ dependencies: - pluggy=1.0 - protobuf=4.21.* - pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF + - pyarrow-hotfix # CVE-2023-47248. See morpheus/__init__.py for more details - pybind11-stubgen=0.10.5 - pydot - pylint>=2.17.4,<2.18 # 2.17.4 contains a fix for toml support @@ -123,6 +126,5 @@ dependencies: # Add additional dev dependencies here - databricks-connect - milvus==2.3.2 - - pyarrow_hotfix # CVE-2023-47248. See morpheus/__init__.py for more details - pymilvus==2.3.2 - pytest-kafka==0.6.0 diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 1d49130bcc..3b549e1f17 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,7 +32,7 @@ dependencies: - boto3 - cuml=23.06 - dask>=2023.1.1 - - dgl=1.0.2 + - dgl=1.1.1 - dill=0.3.6 - distributed>=2023.1.1 - huggingface_hub=0.10.1 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 diff --git a/examples/gnn_fraud_detection_pipeline/requirements.yml b/examples/gnn_fraud_detection_pipeline/requirements.yml index e0f37be2cd..f15ba3847c 100644 --- a/examples/gnn_fraud_detection_pipeline/requirements.yml +++ b/examples/gnn_fraud_detection_pipeline/requirements.yml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,4 +21,4 @@ channels: - defaults dependencies: - cuml=23.06 - - dgl=1.0.2 + - dgl=1.1.1 diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index fb127f4fac..74b24e52c7 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ def run(): ) @click.option( "--model_max_batch_size", - default=64, + default=256, type=click.IntRange(min=1), help="Max batch size to use for the model", ) diff --git a/morpheus/_lib/src/objects/table_info.cpp b/morpheus/_lib/src/objects/table_info.cpp index ab25ebc213..dd89427a69 100644 --- a/morpheus/_lib/src/objects/table_info.cpp +++ b/morpheus/_lib/src/objects/table_info.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -300,7 +300,8 @@ std::optional MutableTableInfo::ensure_sliceable_index() auto df_index = py_df.attr("index"); // Check to see if we actually need the change - if (df_index.attr("is_unique").cast() && df_index.attr("is_monotonic").cast()) + if (df_index.attr("is_unique").cast() && (df_index.attr("is_monotonic_increasing").cast() || + df_index.attr("is_monotonic_decreasing").cast())) { // Set the outputname to nullopt old_index_col_name = std::nullopt; diff --git a/morpheus/stages/general/monitor_stage.py b/morpheus/stages/general/monitor_stage.py index 5d2c4a665e..8d709d7d92 100644 --- a/morpheus/stages/general/monitor_stage.py +++ b/morpheus/stages/general/monitor_stage.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,7 +99,7 @@ def accepted_types(self) -> typing.Tuple: def supports_cpp_node(self): return False - def on_start(self): + async def start_async(self): """ Starts the pipeline stage's progress bar. """ diff --git a/morpheus/stages/output/http_server_sink_stage.py b/morpheus/stages/output/http_server_sink_stage.py index 458f821c67..2a0be0a298 100644 --- a/morpheus/stages/output/http_server_sink_stage.py +++ b/morpheus/stages/output/http_server_sink_stage.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -142,7 +142,7 @@ def supports_cpp_node(self): """Indicates whether or not this stage supports a C++ node.""" return False - def on_start(self): + async def start_async(self): """Starts the HTTP server.""" from morpheus.common import HttpServer self._server = HttpServer(parse_fn=self._request_handler, diff --git a/morpheus/stages/preprocess/preprocess_nlp_stage.py b/morpheus/stages/preprocess/preprocess_nlp_stage.py index a06ee5a30c..cebafa6d65 100644 --- a/morpheus/stages/preprocess/preprocess_nlp_stage.py +++ b/morpheus/stages/preprocess/preprocess_nlp_stage.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ # limitations under the License. import typing +import warnings from functools import partial import mrc @@ -148,13 +149,18 @@ def pre_process_batch(x: MultiMessage, """ text_ser = cudf.Series(x.get_meta(column)) - tokenized = tokenize_text_series(vocab_hash_file=vocab_hash_file, - do_lower_case=do_lower_case, - text_ser=text_ser, - seq_len=seq_len, - stride=stride, - truncation=truncation, - add_special_tokens=add_special_tokens) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="When truncation is not True, the behavior currently differs from HuggingFace.*", + category=UserWarning) + tokenized = tokenize_text_series(vocab_hash_file=vocab_hash_file, + do_lower_case=do_lower_case, + text_ser=text_ser, + seq_len=seq_len, + stride=stride, + truncation=truncation, + add_special_tokens=add_special_tokens) del text_ser seg_ids = tokenized.segment_ids diff --git a/morpheus/utils/column_info.py b/morpheus/utils/column_info.py index 783bbb88c6..59ce19a6ba 100644 --- a/morpheus/utils/column_info.py +++ b/morpheus/utils/column_info.py @@ -387,7 +387,14 @@ def _process_column(self, df: pd.DataFrame) -> pd.Series: The processed column as a datetime Series. """ - return pd.to_datetime(df[self.input_name], infer_datetime_format=True, utc=True).astype(self.get_pandas_dtype()) + dt_series = pd.to_datetime(df[self.input_name], infer_datetime_format=True, utc=True) + + dtype = self.get_pandas_dtype() + if dtype == 'datetime64[ns]': + # avoid deprecation warning about using .astype to convert from a tz-aware type to a tz-naive type + return dt_series.dt.tz_localize(None) + + return dt_series.astype(dtype) @dataclasses.dataclass diff --git a/morpheus/utils/downloader.py b/morpheus/utils/downloader.py index d2882afa93..0a68ae6e14 100644 --- a/morpheus/utils/downloader.py +++ b/morpheus/utils/downloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ import os import threading import typing +import warnings from enum import Enum import fsspec @@ -131,7 +132,13 @@ def get_dask_client(self): dask.config.set({"distributed.client.heartbeat": self._dask_heartbeat_interval}) if (self._merlin_distributed is None): - self._merlin_distributed = Distributed(client=dask.distributed.Client(self.get_dask_cluster())) + with warnings.catch_warnings(): + # Merlin.Distributed will warn if a client already exists, the client in question is the one created + # and are explicitly passing to it in the constructor. + warnings.filterwarnings("ignore", + message="Existing Dask-client object detected in the current context.*", + category=UserWarning) + self._merlin_distributed = Distributed(client=dask.distributed.Client(self.get_dask_cluster())) return self._merlin_distributed diff --git a/pyproject.toml b/pyproject.toml index c33d4e0e88..91056c2b2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,12 +29,16 @@ filterwarnings = [ 'ignore:`np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe', 'ignore:Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe context manager to modify the DataFrame in-place instead.', 'ignore:`np.MachAr` is deprecated \(NumPy 1.22\):DeprecationWarning', - 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', + 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', # Deprecation warning from any project using distutils, currently known sources of this are: # GPUtils https://github.com/anderskm/gputil/issues/48 # PySpark https://issues.apache.org/jira/browse/SPARK-45390 'ignore:The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives', + + # Ignore cudf warnings about Pandas being used under the hood for processing json + 'ignore:Using CPU via Pandas to write JSON dataset', + 'ignore:Using CPU via Pandas to read JSON dataset', ] testpaths = ["tests"] diff --git a/tests/_utils/dataset_manager.py b/tests/_utils/dataset_manager.py index eeb1e9bb27..72a277cf21 100644 --- a/tests/_utils/dataset_manager.py +++ b/tests/_utils/dataset_manager.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ import os import random import typing +import warnings import cupy as cp import pandas as pd @@ -235,7 +236,10 @@ def compare_df(cls, dfb: typing.Union[pd.DataFrame, cdf.DataFrame], **compare_args): """Wrapper for `morpheus.utils.compare_df.compare_df`.""" - return compare_df.compare_df(cls._value_as_pandas(dfa), cls._value_as_pandas(dfb), **compare_args) + with warnings.catch_warnings(): + # Ignore performance warnings from pandas triggered by the comparison + warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) + return compare_df.compare_df(cls._value_as_pandas(dfa), cls._value_as_pandas(dfb), **compare_args) @classmethod def assert_compare_df(cls, diff --git a/tests/_utils/kafka.py b/tests/_utils/kafka.py index 1921e9e289..21e8dee721 100644 --- a/tests/_utils/kafka.py +++ b/tests/_utils/kafka.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ import subprocess import time import typing +import warnings from collections import namedtuple from functools import partial @@ -73,10 +74,15 @@ def seek_to_beginning(kafka_consumer: "KafkaConsumer", timeout: int = PARTITION_ @pytest.fixture(name='kafka_consumer', scope='function') def kafka_consumer_fixture(kafka_topics: KafkaTopics, _kafka_consumer: "KafkaConsumer"): - _kafka_consumer.subscribe([kafka_topics.output_topic]) - seek_to_beginning(_kafka_consumer) - - yield _kafka_consumer + with warnings.catch_warnings(): + # Ignore warnings specific to the test fixture and not the actual morpheus code + warnings.filterwarnings("ignore", + message=r"Exception ignored in:.*ConsumerCoordinator\.__del__", + category=pytest.PytestUnraisableExceptionWarning) + _kafka_consumer.subscribe([kafka_topics.output_topic]) + seek_to_beginning(_kafka_consumer) + + yield _kafka_consumer def _init_pytest_kafka() -> (bool, Exception): diff --git a/tests/dfencoder/test_scalers.py b/tests/dfencoder/test_scalers.py index caa02fb472..7166a88e67 100644 --- a/tests/dfencoder/test_scalers.py +++ b/tests/dfencoder/test_scalers.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import numpy as np import pytest import torch @@ -21,35 +23,41 @@ from morpheus.models.dfencoder import scalers -@pytest.fixture(scope="function") -def fit_tensor(): +@pytest.fixture(name="fit_tensor", scope="function") +def fit_tensor_fixture(): yield torch.tensor([4.4, 5.3, 6.5], dtype=torch.float32) -@pytest.fixture(scope="function") -def tensor(): +@pytest.fixture(name="tensor", scope="function") +def tensor_fixture(): yield torch.tensor([7.4, 8.3, 9.5], dtype=torch.float32) -@pytest.fixture(scope="function") -def standard_scaler(fit_tensor): +@pytest.fixture(name="standard_scaler", scope="function") +def standard_scaler_fixture(fit_tensor): scaler = scalers.StandardScaler() scaler.fit(fit_tensor) yield scaler -@pytest.fixture(scope="function") -def modified_scaler(fit_tensor): +@pytest.fixture(name="modified_scaler", scope="function") +def modified_scaler_fixture(fit_tensor): scaler = scalers.ModifiedScaler() scaler.fit(fit_tensor) yield scaler -@pytest.fixture(scope="function") -def gauss_rank_scaler(fit_tensor): +@pytest.fixture(name="gauss_rank_scaler", scope="function") +def gauss_rank_scaler_fixture(fit_tensor): scaler = scalers.GaussRankScaler() - scaler.fit(fit_tensor) - yield scaler + + with warnings.catch_warnings(): + # This warning is triggered by the abnormally small tensor size used in this test + warnings.filterwarnings("ignore", + message=r"n_quantiles \(1000\) is greater than the total number of samples \(3\).*", + category=UserWarning) + scaler.fit(fit_tensor) + yield scaler def test_ensure_float_type(): @@ -107,8 +115,7 @@ def test_modified_scaler_transform(modified_scaler, tensor): assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" # Test alternate path where median absolute deviation is 1 - t = torch.tensor([3.0, 4.0, 4.0, 5.0]) - modified_scaler.fit(t) + modified_scaler.fit(torch.tensor([3.0, 4.0, 4.0, 5.0])) results = modified_scaler.transform(tensor) expected = torch.tensor([5.43, 6.86, 8.78]) assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" @@ -120,8 +127,7 @@ def test_modified_scaler_inverse_transform(modified_scaler, tensor): assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" # Test alternate path where median absolute deviation is 1 - t = torch.tensor([3.0, 4.0, 4.0, 5.0]) - modified_scaler.fit(t) + modified_scaler.fit(torch.tensor([3.0, 4.0, 4.0, 5.0])) results = modified_scaler.inverse_transform(tensor) expected = torch.tensor([8.64, 9.2, 9.95]) assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" @@ -153,13 +159,13 @@ def test_gauss_rank_scaler_fit_transform(gauss_rank_scaler, tensor): def test_null_scaler(tensor): orig = tensor.to(dtype=torch.float32, copy=True) - ns = scalers.NullScaler() - ns.fit(tensor) + scalar = scalers.NullScaler() + scalar.fit(tensor) # Verify it does nothing - assert ns.transform(tensor) is tensor - assert ns.inverse_transform(tensor) is tensor - assert ns.fit_transform(tensor) is tensor + assert scalar.transform(tensor) is tensor + assert scalar.inverse_transform(tensor) is tensor + assert scalar.fit_transform(tensor) is tensor # After all that the values should be the same assert torch.equal(tensor, orig), f"{tensor} != {orig}" diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py index 0ae8bca2b5..4b13bacde5 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,7 +51,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, # post-process should replace nans, lets add a nan to the DF with dfp_multi_ae_message.meta.mutable_dataframe() as df: - df['v2'][10] = np.nan + df.loc[10, 'v2'] = np.nan df['event_time'] = '' set_log_level(morpheus_log_level) diff --git a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py index bfc5c9366a..8189df73fe 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +16,7 @@ import json import os import typing +import warnings import pytest @@ -99,7 +100,12 @@ def test_extract_users(config: Config, skip_users=skip_users, only_users=only_users) - results = stage.extract_users(df) + with warnings.catch_warnings(): + # Ignore warning about the log message not being set. This happens whenever there aren't any output_messages + warnings.filterwarnings("ignore", + message="Must set log msg before end of context! Skipping log", + category=UserWarning) + results = stage.extract_users(df) if not include_generic and not include_individual: # Extra check for weird combination diff --git a/tests/examples/llm/common/conftest.py b/tests/examples/llm/common/conftest.py index 11ef4bad0c..fa9a6bc25a 100644 --- a/tests/examples/llm/common/conftest.py +++ b/tests/examples/llm/common/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,10 +18,10 @@ from _utils import import_or_skip -@pytest.fixture(name="nemollm", autouse=True, scope='session') -def nemollm_fixture(fail_missing: bool): +@pytest.fixture(name="langchain", autouse=True, scope='session') +def langchain_fixture(fail_missing: bool): """ - All the tests in this subdir require nemollm + All the tests in this subdir require langchain """ skip_reason = ("Tests for the WebScraperStage require the langchain package to be installed, to install this run:\n" "`mamba install -n base -c conda-forge conda-merge`\n" diff --git a/tests/examples/ransomware_detection/test_preprocessing.py b/tests/examples/ransomware_detection/test_preprocessing.py index 36874b4e2f..a72225edbf 100644 --- a/tests/examples/ransomware_detection/test_preprocessing.py +++ b/tests/examples/ransomware_detection/test_preprocessing.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -139,10 +139,10 @@ def test_merge_curr_and_prev_snapshots(self, config: Config, rwd_conf: dict, dat } expected_df = dataset_pandas['examples/ransomware_detection/dask_results.csv'].fillna('') - expected_df['pid_process'][1] = 'test_val1' - expected_df['pid_process'][3] = 'test_val2' + expected_df.loc[1, 'pid_process'] = 'test_val1' + expected_df.loc[3, 'pid_process'] = 'test_val2' - expected_df['snapshot_id'] = snapshot_ids + expected_df.loc[:, 'snapshot_id'] = snapshot_ids expected_df.index = expected_df.snapshot_id stage._merge_curr_and_prev_snapshots(df, source_pid_process) diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py index 226fee96d9..f92a16d148 100644 --- a/tests/llm/conftest.py +++ b/tests/llm/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import typing +from unittest import mock + import pytest from _utils import require_env_variable @@ -94,3 +98,22 @@ def serpapi_api_key_fixture(): yield require_env_variable( varname="SERPAPI_API_KEY", reason="serpapi integration tests require the `SERPAPI_API_KEY` environment variable to be defined.") + + +@pytest.mark.usefixtures("nemollm") +@pytest.fixture(name="mock_nemollm") +def mock_nemollm_fixture(mock_nemollm: mock.MagicMock): + + # The generate function is a blocking call that returns a future when return_type="async" + async def mock_task(fut: asyncio.Future, value: typing.Any = mock.DEFAULT): + fut.set_result(value) + + def create_future(*args, **kwargs) -> asyncio.Future: # pylint: disable=unused-argument + event_loop = asyncio.get_event_loop() + fut = event_loop.create_future() + event_loop.create_task(mock_task(fut, mock.DEFAULT)) + return fut + + mock_nemollm.generate.side_effect = create_future + + yield mock_nemollm diff --git a/tests/llm/test_completion_pipe.py b/tests/llm/test_completion_pipe.py index 615e7954e0..65940bb6d4 100644 --- a/tests/llm/test_completion_pipe.py +++ b/tests/llm/test_completion_pipe.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -82,16 +82,10 @@ def _run_pipeline(config: Config, @pytest.mark.usefixtures("nemollm") @pytest.mark.use_python -@mock.patch("asyncio.wrap_future") -@mock.patch("asyncio.gather", new_callable=mock.AsyncMock) -def test_completion_pipe_nemo( - mock_asyncio_gather: mock.AsyncMock, - mock_asyncio_wrap_future: mock.MagicMock, # pylint: disable=unused-argument - config: Config, - mock_nemollm: mock.MagicMock, - countries: list[str], - capital_responses: list[str]): - mock_asyncio_gather.return_value = [mock.MagicMock() for _ in range(len(countries))] +def test_completion_pipe_nemo(config: Config, + mock_nemollm: mock.MagicMock, + countries: list[str], + capital_responses: list[str]): mock_nemollm.post_process_generate_response.side_effect = [{"text": response} for response in capital_responses] results = _run_pipeline(config, NeMoLLMService, countries=countries, capital_responses=capital_responses) assert_results(results) diff --git a/tests/llm/test_rag_standalone_pipe.py b/tests/llm/test_rag_standalone_pipe.py index a98c9e1c1a..583f84944a 100644 --- a/tests/llm/test_rag_standalone_pipe.py +++ b/tests/llm/test_rag_standalone_pipe.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -131,24 +131,18 @@ def _run_pipeline(config: Config, @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) -@mock.patch("asyncio.wrap_future") -@mock.patch("asyncio.gather", new_callable=mock.AsyncMock) -def test_rag_standalone_pipe_nemo( - mock_asyncio_gather: mock.AsyncMock, - mock_asyncio_wrap_future: mock.MagicMock, # pylint: disable=unused-argument - config: Config, - mock_nemollm: mock.MagicMock, - dataset: DatasetManager, - milvus_server_uri: str, - repeat_count: int, - import_mod: types.ModuleType): +def test_rag_standalone_pipe_nemo(config: Config, + mock_nemollm: mock.MagicMock, + dataset: DatasetManager, + milvus_server_uri: str, + repeat_count: int, + import_mod: types.ModuleType): collection_name = "test_rag_standalone_pipe_nemo" populate_milvus(milvus_server_uri=milvus_server_uri, collection_name=collection_name, resource_kwargs=import_mod.build_milvus_config(embedding_size=EMBEDDING_SIZE), df=dataset["service/milvus_rss_data.json"], overwrite=True) - mock_asyncio_gather.return_value = [mock.MagicMock() for _ in range(repeat_count)] mock_nemollm.post_process_generate_response.side_effect = [{"text": EXPECTED_RESPONSE} for _ in range(repeat_count)] results = _run_pipeline( config=config, diff --git a/tests/llm/test_vdb_upload_pipe.py b/tests/llm/test_vdb_upload_pipe.py index fb0599f938..c1213a70c3 100644 --- a/tests/llm/test_vdb_upload_pipe.py +++ b/tests/llm/test_vdb_upload_pipe.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,7 @@ from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage EMBEDDING_SIZE = 384 -MODEL_MAX_BATCH_SIZE = 64 +MODEL_MAX_BATCH_SIZE = 256 MODEL_FEA_LENGTH = 512 @@ -116,7 +116,7 @@ def test_vdb_upload_pipe(mock_triton_client: mock.MagicMock, "name": "output", "datatype": "FP32", "shape": [-1, EMBEDDING_SIZE] }] } - mock_model_config = {"config": {"max_batch_size": 256}} + mock_model_config = {"config": {"max_batch_size": MODEL_MAX_BATCH_SIZE}} mock_triton_client.return_value = mock_triton_client mock_triton_client.is_server_live.return_value = True diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 40ca0b9612..014fb4ca40 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -93,7 +93,10 @@ def _run_pipeline(filter_probs_df: DataFrameType, pipe = LinearPipeline(config) pipe.set_source(SourceTestStage(config, [filter_probs_df], **source_callbacks)) pipe.add_stage(SinkTestStage(config, **sink_callbacks)) - pipe.run() + + with pytest.deprecated_call(match="The on_start method is deprecated and may be removed in the future.*"): + # The sink stage ensures that the on_start callback method still works, even though it is deprecated. + pipe.run() @pytest.mark.use_cudf diff --git a/tests/test_column_info.py b/tests/test_column_info.py index 3e0c713773..4cd71a9804 100644 --- a/tests/test_column_info.py +++ b/tests/test_column_info.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +27,7 @@ import cudf from _utils import TEST_DIRS +from morpheus.io.deserializers import read_file_to_df from morpheus.utils.column_info import ColumnInfo from morpheus.utils.column_info import CustomColumn from morpheus.utils.column_info import DataFrameInputSchema @@ -38,12 +39,29 @@ from morpheus.utils.schema_transforms import process_dataframe -@pytest.mark.use_python -def test_dataframe_input_schema_with_json_cols(): +@pytest.fixture(name="_azure_ad_logs_pdf", scope="module") +def _azure_ad_logs_pdf_fixture(): + # Explicitly reading this in to ensure that lines=False. + # Using pandas since the C++ impl for read_file_to_df doesn't support parser_kwargs, this also avoids a warning + # that cudf.read_json uses pandas.read_json under the hood. src_file = os.path.join(TEST_DIRS.tests_data_dir, "azure_ad_logs.json") + yield read_file_to_df(src_file, df_type='pandas', parser_kwargs={'lines': False}) + + +@pytest.fixture(name="azure_ad_logs_pdf", scope="function") +def azure_ad_logs_pdf_fixture(_azure_ad_logs_pdf: pd.DataFrame): + yield _azure_ad_logs_pdf.copy(deep=True) + + +@pytest.fixture(name="azure_ad_logs_cdf", scope="function") +def azure_ad_logs_cdf_fixture(_azure_ad_logs_pdf: pd.DataFrame): + # cudf.from_pandas essentially does a deep copy, so we can use this to ensure that the source pandas df is not + # modified + yield cudf.from_pandas(_azure_ad_logs_pdf) - input_df = cudf.read_json(src_file) +@pytest.mark.use_python +def test_dataframe_input_schema_with_json_cols(azure_ad_logs_cdf: cudf.DataFrame): raw_data_columns = [ 'time', 'resourceId', @@ -63,8 +81,8 @@ def test_dataframe_input_schema_with_json_cols(): 'properties' ] - assert len(input_df.columns) == 16 - assert list(input_df.columns) == raw_data_columns + assert len(azure_ad_logs_cdf.columns) == 16 + assert list(azure_ad_logs_cdf.columns) == raw_data_columns column_info = [ DateTimeColumn(name="timestamp", dtype='datetime64[ns]', input_name="time"), @@ -89,10 +107,10 @@ def test_dataframe_input_schema_with_json_cols(): schema = DataFrameInputSchema(json_columns=["properties"], column_info=column_info) - df_processed_schema = process_dataframe(input_df, schema) + df_processed_schema = process_dataframe(azure_ad_logs_cdf, schema) processed_df_cols = df_processed_schema.columns - assert len(input_df) == len(df_processed_schema) + assert len(azure_ad_logs_cdf) == len(df_processed_schema) assert len(processed_df_cols) == len(column_info) assert "timestamp" in processed_df_cols assert "userId" in processed_df_cols @@ -100,17 +118,13 @@ def test_dataframe_input_schema_with_json_cols(): assert "properties.userPrincipalName" not in processed_df_cols nvt_workflow = create_and_attach_nvt_workflow(schema) - df_processed_workflow = process_dataframe(input_df, nvt_workflow) + df_processed_workflow = process_dataframe(azure_ad_logs_cdf, nvt_workflow) assert df_processed_schema.equals(df_processed_workflow) @pytest.mark.use_python -def test_dataframe_input_schema_without_json_cols(): - src_file = os.path.join(TEST_DIRS.tests_data_dir, "azure_ad_logs.json") - - input_df = pd.read_json(src_file) - - assert len(input_df.columns) == 16 +def test_dataframe_input_schema_without_json_cols(azure_ad_logs_pdf: pd.DataFrame): + assert len(azure_ad_logs_pdf.columns) == 16 column_info = [ DateTimeColumn(name="timestamp", dtype='datetime64[ns]', input_name="time"), @@ -119,10 +133,10 @@ def test_dataframe_input_schema_without_json_cols(): schema = DataFrameInputSchema(column_info=column_info) - df_processed = process_dataframe(input_df, schema) + df_processed = process_dataframe(azure_ad_logs_pdf, schema) processed_df_cols = df_processed.columns - assert len(input_df) == len(df_processed) + assert len(azure_ad_logs_pdf) == len(df_processed) assert len(processed_df_cols) == len(column_info) assert "timestamp" in processed_df_cols assert "time" not in processed_df_cols @@ -152,7 +166,7 @@ def test_dataframe_input_schema_without_json_cols(): # When trying to concat columns that don't exist in the dataframe, an exception is raised. with pytest.raises(Exception): - process_dataframe(input_df, schema2) + process_dataframe(azure_ad_logs_pdf, schema2) @pytest.mark.use_python diff --git a/tests/test_monitor_stage.py b/tests/test_monitor_stage.py index 0db399f749..91bc936878 100755 --- a/tests/test_monitor_stage.py +++ b/tests/test_monitor_stage.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import inspect import logging import os @@ -59,13 +60,13 @@ def two_x(x): @mock.patch('morpheus.controllers.monitor_controller.MorpheusTqdm') -def test_on_start(mock_morph_tqdm: mock.MagicMock, config: Config): +def test_start_async(mock_morph_tqdm: mock.MagicMock, config: Config): mock_morph_tqdm.return_value = mock_morph_tqdm stage = MonitorStage(config, log_level=logging.WARNING) assert stage._mc._progress is None - stage.on_start() + asyncio.run(stage.start_async()) mock_morph_tqdm.assert_called_once() mock_morph_tqdm.reset.assert_called_once() assert stage._mc._progress is mock_morph_tqdm @@ -82,7 +83,7 @@ def test_stop(mock_morph_tqdm: mock.MagicMock, config: Config): stage.stop() mock_morph_tqdm.assert_not_called() - stage.on_start() + asyncio.run(stage.start_async()) stage.stop() mock_morph_tqdm.close.assert_called_once() @@ -94,7 +95,7 @@ def test_refresh(mock_morph_tqdm: mock.MagicMock, config: Config): stage = MonitorStage(config, log_level=logging.WARNING) assert stage._mc._progress is None - stage.on_start() + asyncio.run(stage.start_async()) stage._mc.refresh_progress(None) mock_morph_tqdm.refresh.assert_called_once() @@ -138,7 +139,7 @@ def test_progress_sink(mock_morph_tqdm: mock.MagicMock, config: Config): mock_morph_tqdm.return_value = mock_morph_tqdm stage = MonitorStage(config, log_level=logging.WARNING) - stage.on_start() + asyncio.run(stage.start_async()) stage._mc.progress_sink(None) assert stage._mc._determine_count_fn is None diff --git a/tests/utils/nvt/test_json_flatten_transform.py b/tests/utils/nvt/test_json_flatten_transform.py index faf998e4ff..e0657925f5 100644 --- a/tests/utils/nvt/test_json_flatten_transform.py +++ b/tests/utils/nvt/test_json_flatten_transform.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import pandas as pd import pytest -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from nvtabular.ops.operator import ColumnSelector import cudf diff --git a/tests/utils/nvt/test_mutate_op.py b/tests/utils/nvt/test_mutate_op.py index 034e4f9049..3023d9701e 100644 --- a/tests/utils/nvt/test_mutate_op.py +++ b/tests/utils/nvt/test_mutate_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import numpy as np import pandas as pd import pytest -from merlin.core.dispatch import DataFrameType -from merlin.schema import ColumnSchema -from merlin.schema import Schema -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from merlin.core.dispatch import DataFrameType + from merlin.schema import ColumnSchema + from merlin.schema import Schema + from nvtabular.ops.operator import ColumnSelector from morpheus.utils.nvt.mutate import MutateOp diff --git a/tests/utils/nvt/test_schema_converters.py b/tests/utils/nvt/test_schema_converters.py index 03270a6da5..9b00440d1a 100644 --- a/tests/utils/nvt/test_schema_converters.py +++ b/tests/utils/nvt/test_schema_converters.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,13 @@ # limitations under the License. import json +import warnings + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + import nvtabular as nvt -import nvtabular as nvt import pandas as pd import pytest diff --git a/tests/utils/nvt/test_transforms.py b/tests/utils/nvt/test_transforms.py index c390f37627..96df15447c 100644 --- a/tests/utils/nvt/test_transforms.py +++ b/tests/utils/nvt/test_transforms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import pandas as pd import pytest -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from nvtabular.ops.operator import ColumnSelector from _utils.dataset_manager import DatasetManager from morpheus.utils.nvt.transforms import json_flatten