Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/refactor-tests-with-pytest' into…
Browse files Browse the repository at this point in the history
… release/541

# Conflicts:
#	nlu/__init__.py
#	tests/nlu_hc_tests/component_tests/de_identification/de_identification_tests.py
#	tests/nlu_hc_tests/component_tests/generic_classifier/generic_classifier_tests.py
  • Loading branch information
C-K-Loan committed Sep 27, 2024
2 parents 9cbec12 + 4ae2153 commit 86186ca
Show file tree
Hide file tree
Showing 145 changed files with 784 additions and 4,644 deletions.
180 changes: 12 additions & 168 deletions .github/workflows/nlu_test_flow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ jobs:
build:
runs-on: ubuntu-latest
env:
JSL_LICENSE: ${{ secrets.JSL_LICENSE }}
JOHNSNOWLABS_LICENSE_JSON: ${{ secrets.JOHNSNOWLABS_LICENSE_JSON }}
strategy:
matrix:
python-version: [3.7]
steps:
- uses: actions/setup-java@v1
with:
java-version: '1.8.0' # The JDK version to make available on the path.
java-package: jdk # (jre, jdk, or jdk+fx) - defaults to jd
java-version: '1.8.0'
java-package: jdk
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
Expand All @@ -27,173 +27,17 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pypandoc scikit-learn
pip install wheel dataclasses pandas numpy pytest modin[ray] pyspark==3.2.0 spark-nlp
java -version
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
! echo 2 | update-alternatives --config java
- name: NLU Chunker Component tests
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/component_tests/chunker_tests' -p '*tests.py'
- name: NLU Classifier Cyber tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/cyber_tests.py'
- name: NLU Classifier E2E tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/e2e_tests.py'
- name: NLU Classifier Emotion tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/emotion_tests.py'
- name: NLU Classifier Language tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/language_tests.py'
- name: NLU Classifier NER tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/ner_tests.py'
- name: NLU Classifier POS tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/pos_tests.py'
- name: NLU Classifier Question tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/question_tests.py'
- name: NLU Classifier Sarcasm tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/sarcasm_tests.py'
- name: NLU Classifier Sentiment tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_tests.py'
- name: NLU Classifier Sentiment test Imdb -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_test_imdb.py'
- name: NLU Classifier Sentiment test Twitter -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_test_twitter.py'
- name: NLU Classifier Spam tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/spam_tests.py'
- name: NLU Classifier Toxic tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/toxic_tests.py'
- name: NLU Classifier YAKE tests -
if: always()
run: |
python -m unittest 'tests/nlu_core_tests/component_tests/classifier_tests/yake_tests.py'
# - name: NLU Embed Component tests
# if: always()
# run: |
# python -m unittest discover -s './tests/nlu_core_tests/component_tests/embed_tests' -p '*tests.py'
- name: NLU Text Cleaner Component tests -
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/component_tests/pre_processing_tests' -p '*tests.py'
# - name: NLU Matcher Component tests # not yet converted in Spark 3
# if: always()
# run: |
# python -m unittest discover -s 'tests/nlu_core_tests/component_tests/matcher_tests' -p '*tests.py'
- name: NLU Typed Dependency Component tests -
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/component_tests/typed_dependency_tests' -p '*tests.py'
- name: NLU Untyped Dependency Component tests -
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/component_tests/untyped_dependency_tests' -p '*tests.py'
- name: NLU Pipe tests
if: always()
pip install wheel dataclasses pandas numpy pytest modin[ray] pytest-xdist pytest-xdist pytest-forked nbformat librosa johnsnowlabs==5.3.4rc1
pip uninstall nlu -y
- name: Install Licensed Libs
if:
run: |
python -m unittest discover -s './tests/nlu_core_tests/pipeline_tests' -p '*tests.py'
- name: NLU Training sentiment tests
python -c 'from johnsnowlabs import nlp;nlp.install(browser_login = False, force_browser=False,visual=True)'
- name: Run one test per lib
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/training_tests/classifiers' -p '*sentiment_dl*.py'
# Too hevy for Github actions
# - name: NLU Training classifier tests
# if: always()
# run: |
# python -m unittest discover -s './tests/nlu_core_tests/training_tests/classifiers' -p '*classifier_dl*.py'
# - name: NLU Training multi classifier tests
# if: always()
# run: |
# python -m unittest discover -s './tests/nlu_core_tests/training_tests/classifiers' -p '*multi*.py'
# - name: NLU Training NER tests
# if: always()
# run: |
# python -m unittest discover -s './tests/nlu_core_tests/training_tests/classifiers' -p '*ner*.py'
- name: NLU Training POS tests
python tests/run_tests.py one_per_lib
- name: Run all tests
if: always()
run: |
python -m unittest discover -s './tests/nlu_core_tests/training_tests/classifiers' -p '*pos*.py'
# - name: NLU Healthcare Verification tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests -p 'verification_tests.py'
# - name: NLU OCR tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_ocr_tests -p '*tests.py'
# - name: NLU Healthcare Assertion DL tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/assertion_dl -p '*tests.py'
## - name: NLU Healthcare Contextual Parser tests
## if: always()
## run: |
## python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/contextual_parser -p '*tests.py'
# - name: NLU Healthcare De Identification tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/de_identification -p '*tests.py'
# - name: NLU Healthcare Drug Normalizer tests
# if: always()
# run: |
# python -m unittest 'tests/nlu_hc_tests/component_tests/drug_normalizer/drug_normalizer_test.py'
# - name: NLU Healthcare Generic Classifier tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/generic_classifier -p '*tests.py'
# - name: NLU Healthcare Licensed Classifier tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/licensed_classifier -p '*tests.py'
# - name: NLU Healthcare Relation Extraction tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/relation_extraction -p '*tests.py'
# - name: NLU Healthcare Sentence Entity Resolver tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/component_tests/sentence_entity_resolver -p '*tests.py'
## - name: NLU Healthcare Pipe tests
## if: always()
## run: |
## python -m unittest discover -s ./tests/nlu_hc_tests/pipe_tests -p '*tests.py'
## - name: NLU Healthcare Training Chunk Resolution tests
## if: always()
## run: |
## python -m unittest discover -s ./tests/nlu_hc_tests/training_tests/chunk_resolution -p '*tests.py'
# - name: NLU Healthcare Training Sentence Resolution tests
# if: always()
# run: |
# python -m unittest discover -s ./tests/nlu_hc_tests/training_tests/sentence_resolution -p '*tests.py'
## - name: NLU Saving and Loading tests
## if: always()
## run: |
## python -m unittest discover -s './tests/nlu_core_tests/training_tests/trained_pipe_tests' -p '*tests.py'
## - name: NLU Modin tests
## if: always()
## run: |
## python -m unittest discover -s './tests/modin' -p '*tests.py'
python tests/run_tests.py all
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include VERSION
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

# NLU: The Power of Spark NLP, the Simplicity of Python
John Snow Labs' NLU is a Python library for applying state-of-the-art text mining, directly on any dataframe, with a single line of code.
As a facade of the award-winning Spark NLP library, it comes with **1000+** of pretrained models in **100+**, all production-grade, scalable, and trainable, with **everything in 1 line of code.**
Expand Down
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.1.12
34 changes: 34 additions & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package:
name: nlu
version: {{ environ.get('CODE_VERSION', '') }}

app:
entry: nlu
summary: The power of 15000+ State-of-the-art pre-trained NLP models in 300 languages with 1 line of Python code.

source:
path: ../conda_src

build:
noarch: generic
number: 0
script: "python3 -m pip install . --no-deps -vv"

requirements:
build:
- python
run:
- python
- pyspark==3.0.1
- spark-nlp >=5.2.0
- numpy
- pyarrow >=0.16.0
- pandas >=1.3.5
- dataclasses
about:
home: https://nlu.johnsnowlabs.com/
license: Apache License 2.0
license_family: APACHE
license_url: https://github.com/JohnSnowLabs/nlu/blob/master/LICENSE
description: John Snow Labs' NLU is a Python library for applying state-of-the-art text mining, directly on any dataframe, with a single line of code. As a facade of the award-winning Spark NLP library, it comes with hundreds of pretrained models in tens of languages - all production-grade, scalable, and trainable.
summary: The The power of 15000+ State-of-the-art pre-trained NLP models in 300 languages with 1 line of Python code.
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
;markers =
; db_cloud_node_params: marker for parameterizing databricks tests with cloud credentials and node types (azure,aws, gcp)
; db_cloud_params: marker for parameterizing databricks tests over all cloud credentials (azure,aws, gcp)
addopts = -s --capture=no
8 changes: 5 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import nlu

from codecs import open
from os import path

Expand All @@ -10,6 +8,10 @@
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()

# Get a version from file
with open(path.join(here, 'VERSION')) as version_file:
version = f"{version_file.read().strip()}"

REQUIRED_PKGS = [
'spark-nlp>=5.0.2',
'numpy',
Expand All @@ -22,7 +24,7 @@

name='nlu',

version=nlu.version(),
version=version,

description='John Snow Labs NLU provides state of the art algorithms for NLP&NLU with 20000+ of pretrained models in 200+ languages. It enables swift and simple development and research with its powerful Pythonic and Keras inspired API. It is powerd by John Snow Labs powerful Spark NLP library.',

Expand Down
43 changes: 43 additions & 0 deletions tests/base_model_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest

from tests.utils import all_tests, one_per_lib, NluTest, model_and_output_levels_test


def model_id(model_to_test: NluTest) -> str:
return f"{model_to_test.test_group}_{model_to_test.nlu_ref}"


def all_annotator_tests():
return all_tests


def one_test_per_lib():
return one_per_lib


@pytest.mark.skip(reason="Use run_tests.py instead until pytest-xdist issue is fixed")
@pytest.mark.parametrize("model_to_test", all_annotator_tests(), ids=model_id)
def test_model_all_annotators(model_to_test: NluTest):
model_and_output_levels_test(
nlu_ref=model_to_test.nlu_ref,
lang=model_to_test.lang,
test_group=model_to_test.test_group,
output_levels=model_to_test.output_levels,
input_data_type=model_to_test.input_data_type,
library=model_to_test.library,
pipe_params=model_to_test.pipe_params
)


@pytest.mark.skip(reason="Local testing")
@pytest.mark.parametrize("model_to_test", one_test_per_lib(), ids=model_id)
def test_one_per_lib(model_to_test: NluTest):
model_and_output_levels_test(
nlu_ref=model_to_test.nlu_ref,
lang=model_to_test.lang,
test_group=model_to_test.test_group,
output_levels=model_to_test.output_levels,
input_data_type=model_to_test.input_data_type,
library=model_to_test.library,
pipe_params=model_to_test.pipe_params
)
File renamed without changes.
1 change: 0 additions & 1 deletion tests/nlu_core_tests/component_info_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class TestComponentInfo(unittest.TestCase):
def test_list_all_names(self):
a = nlu.AllComponentsInfo()
a.list_all_components()
a.DEBUG_list_all_components()

def test_print_all_default_components_as_markdown(self):
d = nlu.Spellbook.component_alias_references
Expand Down
15 changes: 0 additions & 15 deletions tests/nlu_core_tests/component_parameterization_tests.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import unittest
import nlu



def test_albert_for_question_answering():
pipe = nlu.load("en.answer_question.squadv2.albert.xxl.by_sultan", verbose=True)
data = "What is my name?|||My name is CKL"
df = pipe.predict(
data,
)
for c in df.columns:
print(df[c])


Empty file.
29 changes: 0 additions & 29 deletions tests/nlu_core_tests/component_tests/chunker_tests/chunk_tests.py

This file was deleted.

Loading

0 comments on commit 86186ca

Please sign in to comment.