diff --git a/e2e/preview/pipelines/test_preprocessing_pipeline.py b/e2e/preview/pipelines/test_preprocessing_pipeline.py index b99ec89a56..9dac07b8d1 100644 --- a/e2e/preview/pipelines/test_preprocessing_pipeline.py +++ b/e2e/preview/pipelines/test_preprocessing_pipeline.py @@ -3,8 +3,8 @@ from haystack.preview import Pipeline from haystack.preview.components.embedders import SentenceTransformersDocumentEmbedder from haystack.preview.components.file_converters import TextFileToDocument -from haystack.preview.components.preprocessors import TextDocumentSplitter, TextDocumentCleaner -from haystack.preview.components.routers import FileTypeRouter, DocumentLanguageClassifier +from haystack.preview.components.preprocessors import TextDocumentSplitter, DocumentCleaner, DocumentLanguageClassifier +from haystack.preview.components.routers import FileTypeRouter from haystack.preview.components.writers import DocumentWriter from haystack.preview.document_stores import MemoryDocumentStore @@ -16,7 +16,7 @@ def test_preprocessing_pipeline(tmp_path): preprocessing_pipeline.add_component(instance=FileTypeRouter(mime_types=["text/plain"]), name="file_type_router") preprocessing_pipeline.add_component(instance=TextFileToDocument(), name="text_file_converter") preprocessing_pipeline.add_component(instance=DocumentLanguageClassifier(), name="language_classifier") - preprocessing_pipeline.add_component(instance=TextDocumentCleaner(), name="cleaner") + preprocessing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner") preprocessing_pipeline.add_component( instance=TextDocumentSplitter(split_by="sentence", split_length=1), name="splitter" ) diff --git a/test/preview/components/preprocessors/test_document_language_classifier.py b/test/preview/components/preprocessors/test_document_language_classifier.py index 838d437c12..a7ab826ac8 100644 --- a/test/preview/components/preprocessors/test_document_language_classifier.py +++ b/test/preview/components/preprocessors/test_document_language_classifier.py @@ -6,8 +6,19 @@ class TestDocumentLanguageClassifier: + @pytest.mark.unit + def test_init(self): + component = DocumentLanguageClassifier() + assert component.languages == ["en"] + @pytest.mark.unit def test_to_dict(self): + component = DocumentLanguageClassifier() + data = component.to_dict() + assert data == {"type": "DocumentLanguageClassifier", "init_parameters": {"languages": ["en"]}} + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self): component = DocumentLanguageClassifier(languages=["en", "de"]) data = component.to_dict() assert data == {"type": "DocumentLanguageClassifier", "init_parameters": {"languages": ["en", "de"]}}