Merge remote-tracking branch 'origin/refactor-tests-with-pytest' into…

… release/541 # Conflicts: # nlu/__init__.py # tests/nlu_hc_tests/component_tests/de_identification/de_identification_tests.py # tests/nlu_hc_tests/component_tests/generic_classifier/generic_classifier_tests.py
JohnSnowLabs · Sep 27, 2024 · 86186ca · 86186ca
2 parents 9cbec12 + 4ae2153
commit 86186ca
Show file tree

Hide file tree

Showing 145 changed files with 784 additions and 4,644 deletions.
diff --git a/.github/workflows/nlu_test_flow.yaml b/.github/workflows/nlu_test_flow.yaml
@@ -10,15 +10,15 @@ jobs:
   build:
     runs-on: ubuntu-latest
     env:
-      JSL_LICENSE: ${{ secrets.JSL_LICENSE }}
+      JOHNSNOWLABS_LICENSE_JSON: ${{ secrets.JOHNSNOWLABS_LICENSE_JSON }}
     strategy:
       matrix:
         python-version: [3.7]
     steps:
       - uses: actions/setup-java@v1
         with:
-          java-version: '1.8.0' # The JDK version to make available on the path.
-          java-package: jdk # (jre, jdk, or jdk+fx) - defaults to jd
+          java-version: '1.8.0'
+          java-package: jdk
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
@@ -27,173 +27,17 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install pypandoc scikit-learn
-          pip install  wheel  dataclasses pandas numpy pytest modin[ray] pyspark==3.2.0 spark-nlp
-          java -version
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-          ! echo 2 |  update-alternatives --config java
-      - name: NLU Chunker Component tests
-        if: always()
-        run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/component_tests/chunker_tests' -p '*tests.py'
-      - name: NLU Classifier Cyber tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/cyber_tests.py'
-      - name: NLU Classifier E2E tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/e2e_tests.py'
-      - name: NLU Classifier Emotion tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/emotion_tests.py'
-      - name: NLU Classifier Language tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/language_tests.py'
-      - name: NLU Classifier NER tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/ner_tests.py'
-      - name: NLU Classifier POS tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/pos_tests.py'
-      - name: NLU Classifier Question tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/question_tests.py'
-      - name: NLU Classifier Sarcasm tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/sarcasm_tests.py'
-      - name: NLU Classifier Sentiment tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_tests.py'
-      - name: NLU Classifier Sentiment test Imdb -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_test_imdb.py'
-      - name: NLU Classifier Sentiment test Twitter -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/sentiment_test_twitter.py'
-      - name: NLU Classifier Spam tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/spam_tests.py'
-      - name: NLU Classifier Toxic tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/toxic_tests.py'
-      - name: NLU Classifier YAKE tests -
-        if: always()
-        run: |
-          python -m unittest  'tests/nlu_core_tests/component_tests/classifier_tests/yake_tests.py'
-#      - name: NLU Embed Component tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover   -s './tests/nlu_core_tests/component_tests/embed_tests' -p '*tests.py'
-      - name: NLU Text Cleaner Component tests -
-        if: always()
-        run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/component_tests/pre_processing_tests' -p '*tests.py'
-#      - name: NLU Matcher Component tests # not yet converted in Spark 3
-#        if: always()
-#        run: |
-#          python -m unittest  discover   -s 'tests/nlu_core_tests/component_tests/matcher_tests' -p '*tests.py'
-      - name: NLU Typed Dependency Component tests -
-        if: always()
-        run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/component_tests/typed_dependency_tests' -p '*tests.py'
-      - name: NLU Untyped Dependency Component tests -
-        if: always()
-        run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/component_tests/untyped_dependency_tests' -p '*tests.py'
-      - name: NLU Pipe tests
-        if: always()
+          pip install wheel dataclasses pandas numpy pytest modin[ray] pytest-xdist pytest-xdist pytest-forked nbformat librosa johnsnowlabs==5.3.4rc1
+          pip uninstall nlu -y
+      - name: Install Licensed Libs
+        if:
         run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/pipeline_tests' -p '*tests.py'
-      - name: NLU Training sentiment tests
+          python -c 'from johnsnowlabs import nlp;nlp.install(browser_login = False, force_browser=False,visual=True)'
+      - name: Run one test per lib
         if: always()
         run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/classifiers' -p '*sentiment_dl*.py'
-      # Too hevy for Github actions
-      #      - name: NLU Training classifier tests
-      #        if: always()
-      #        run: |
-      #          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/classifiers' -p '*classifier_dl*.py'
-#      - name: NLU Training multi classifier tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/classifiers' -p '*multi*.py'
-#      - name: NLU Training NER tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/classifiers' -p '*ner*.py'
-      - name: NLU Training POS tests
+          python tests/run_tests.py one_per_lib
+      - name: Run all tests
         if: always()
         run: |
-          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/classifiers' -p '*pos*.py'
-#      - name: NLU Healthcare Verification tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests -p 'verification_tests.py'
-#      - name: NLU OCR tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_ocr_tests -p '*tests.py'
-#      - name: NLU Healthcare Assertion DL tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/assertion_dl -p '*tests.py'
-##      - name: NLU Healthcare Contextual Parser tests
-##        if: always()
-##        run: |
-##          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/contextual_parser -p '*tests.py'
-#      - name: NLU Healthcare De Identification tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/de_identification -p '*tests.py'
-#      - name: NLU Healthcare Drug Normalizer tests
-#        if: always()
-#        run: |
-#          python -m unittest  'tests/nlu_hc_tests/component_tests/drug_normalizer/drug_normalizer_test.py'
-#      - name: NLU Healthcare Generic Classifier tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/generic_classifier -p '*tests.py'
-#      - name: NLU Healthcare Licensed Classifier tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/licensed_classifier -p '*tests.py'
-#      - name: NLU Healthcare Relation Extraction tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/relation_extraction -p '*tests.py'
-#      - name: NLU Healthcare Sentence Entity Resolver tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/component_tests/sentence_entity_resolver -p '*tests.py'
-##      - name: NLU Healthcare Pipe tests
-##        if: always()
-##        run: |
-##          python -m unittest  discover  -s ./tests/nlu_hc_tests/pipe_tests -p '*tests.py'
-##      - name: NLU Healthcare Training Chunk Resolution tests
-##        if: always()
-##        run: |
-##          python -m unittest  discover  -s ./tests/nlu_hc_tests/training_tests/chunk_resolution -p '*tests.py'
-#      - name: NLU Healthcare Training Sentence Resolution tests
-#        if: always()
-#        run: |
-#          python -m unittest  discover  -s ./tests/nlu_hc_tests/training_tests/sentence_resolution -p '*tests.py'
-##      - name: NLU Saving and Loading tests
-##        if: always()
-##        run: |
-##          python -m unittest  discover   -s './tests/nlu_core_tests/training_tests/trained_pipe_tests' -p '*tests.py'
-##      - name: NLU Modin tests
-##        if: always()
-##        run: |
-##          python -m unittest  discover   -s './tests/modin' -p '*tests.py'
+          python tests/run_tests.py all
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include VERSION
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-
+ 
 # NLU: The Power of Spark NLP, the Simplicity of Python
 John Snow Labs' NLU is a Python library for applying state-of-the-art text mining, directly on any dataframe, with a single line of code.
 As a facade of the award-winning Spark NLP library, it comes with **1000+** of pretrained models in **100+**, all production-grade, scalable, and trainable, with **everything in 1 line of code.**

diff --git a/VERSION b/VERSION
@@ -0,0 +1 @@
+1.1.12
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -0,0 +1,34 @@
+package:
+  name: nlu
+  version: {{ environ.get('CODE_VERSION', '') }}
+
+app:
+  entry: nlu
+  summary: The power of 15000+ State-of-the-art pre-trained NLP models in 300 languages with 1 line of Python code.
+
+source:
+  path: ../conda_src
+
+build:
+  noarch: generic
+  number: 0
+  script: "python3 -m pip install . --no-deps -vv"
+
+requirements:
+  build:
+    - python
+  run:
+    - python
+    - pyspark==3.0.1
+    - spark-nlp >=5.2.0
+    - numpy
+    - pyarrow >=0.16.0
+    - pandas >=1.3.5
+    - dataclasses
+about:
+  home: https://nlu.johnsnowlabs.com/
+  license: Apache License 2.0
+  license_family: APACHE
+  license_url: https://github.com/JohnSnowLabs/nlu/blob/master/LICENSE
+  description: John Snow Labs' NLU is a Python library for applying state-of-the-art text mining, directly on any dataframe, with a single line of code. As a facade of the award-winning Spark NLP library, it comes with hundreds of pretrained models in tens of languages - all production-grade, scalable, and trainable.
+  summary: The The power of 15000+ State-of-the-art pre-trained NLP models in 300 languages with 1 line of Python code.  
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+;markers =
+;    db_cloud_node_params: marker for parameterizing databricks tests with cloud credentials and node types (azure,aws, gcp)
+;    db_cloud_params: marker for parameterizing databricks tests over all cloud credentials (azure,aws, gcp)
+addopts = -s --capture=no
diff --git a/setup.py b/setup.py
@@ -1,5 +1,3 @@
-import nlu
-
 from codecs import open
 from os import path
 
@@ -10,6 +8,10 @@
 with open(path.join(here, 'README.md'), encoding='utf-8') as f:
     long_description = f.read()
 
+# Get a version from file
+with open(path.join(here, 'VERSION')) as version_file:
+    version = f"{version_file.read().strip()}"
+
 REQUIRED_PKGS = [
     'spark-nlp>=5.0.2',
     'numpy',
@@ -22,7 +24,7 @@
 
     name='nlu',
 
-    version=nlu.version(),
+    version=version,
 
     description='John Snow Labs NLU provides state of the art algorithms for NLP&NLU with 20000+ of pretrained models in 200+ languages. It enables swift and simple development and research with its powerful Pythonic and Keras inspired API. It is powerd by John Snow Labs powerful Spark NLP library.',
 

diff --git a/tests/base_model_test.py b/tests/base_model_test.py
@@ -0,0 +1,43 @@
+import pytest
+
+from tests.utils import all_tests, one_per_lib, NluTest, model_and_output_levels_test
+
+
+def model_id(model_to_test: NluTest) -> str:
+    return f"{model_to_test.test_group}_{model_to_test.nlu_ref}"
+
+
+def all_annotator_tests():
+    return all_tests
+
+
+def one_test_per_lib():
+    return one_per_lib
+
+
+@pytest.mark.skip(reason="Use run_tests.py instead until pytest-xdist issue is fixed")
+@pytest.mark.parametrize("model_to_test", all_annotator_tests(), ids=model_id)
+def test_model_all_annotators(model_to_test: NluTest):
+    model_and_output_levels_test(
+        nlu_ref=model_to_test.nlu_ref,
+        lang=model_to_test.lang,
+        test_group=model_to_test.test_group,
+        output_levels=model_to_test.output_levels,
+        input_data_type=model_to_test.input_data_type,
+        library=model_to_test.library,
+        pipe_params=model_to_test.pipe_params
+    )
+
+
+@pytest.mark.skip(reason="Local testing")
+@pytest.mark.parametrize("model_to_test", one_test_per_lib(), ids=model_id)
+def test_one_per_lib(model_to_test: NluTest):
+    model_and_output_levels_test(
+        nlu_ref=model_to_test.nlu_ref,
+        lang=model_to_test.lang,
+        test_group=model_to_test.test_group,
+        output_levels=model_to_test.output_levels,
+        input_data_type=model_to_test.input_data_type,
+        library=model_to_test.library,
+        pipe_params=model_to_test.pipe_params
+    )
diff --git a/tests/modin/modin_tests.py → tests/modin_tests.py b/tests/modin/modin_tests.py → tests/modin_tests.py
diff --git a/tests/nlu_core_tests/component_info_tests.py b/tests/nlu_core_tests/component_info_tests.py
@@ -9,7 +9,6 @@ class TestComponentInfo(unittest.TestCase):
     def test_list_all_names(self):
         a = nlu.AllComponentsInfo()
         a.list_all_components()
-        a.DEBUG_list_all_components()
 
     def test_print_all_default_components_as_markdown(self):
         d = nlu.Spellbook.component_alias_references

diff --git a/tests/nlu_core_tests/component_parameterization_tests.py b/tests/nlu_core_tests/component_parameterization_tests.py
diff --git a/tests/nlu_core_tests/component_tests/albert_for_question_answering _tests.py b/tests/nlu_core_tests/component_tests/albert_for_question_answering _tests.py
@@ -0,0 +1,15 @@
+import unittest
+import nlu
+
+
+
+def test_albert_for_question_answering():
+    pipe = nlu.load("en.answer_question.squadv2.albert.xxl.by_sultan", verbose=True)
+    data = "What is my name?|||My name is CKL"
+    df = pipe.predict(
+        data,
+    )
+    for c in df.columns:
+        print(df[c])
+
+
diff --git a/...t_tests/chunkmapper/chunk_mapper_tests.py → ...sts/component_tests/chunk_mapper_tests.py b/...t_tests/chunkmapper/chunk_mapper_tests.py → ...sts/component_tests/chunk_mapper_tests.py
diff --git a/tests/nlu_core_tests/component_tests/chunker_tests/__init__.py b/tests/nlu_core_tests/component_tests/chunker_tests/__init__.py
diff --git a/tests/nlu_core_tests/component_tests/chunker_tests/chunk_tests.py b/tests/nlu_core_tests/component_tests/chunker_tests/chunk_tests.py