Skip to content

Commit

Permalink
fix import, rename DocumentCleaner
Browse files Browse the repository at this point in the history
  • Loading branch information
julian-risch committed Oct 13, 2023
1 parent 6b0408e commit 2837d69
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
6 changes: 3 additions & 3 deletions e2e/preview/pipelines/test_preprocessing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from haystack.preview import Pipeline
from haystack.preview.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.preview.components.file_converters import TextFileToDocument
from haystack.preview.components.preprocessors import TextDocumentSplitter, TextDocumentCleaner
from haystack.preview.components.routers import FileTypeRouter, DocumentLanguageClassifier
from haystack.preview.components.preprocessors import TextDocumentSplitter, DocumentCleaner, DocumentLanguageClassifier
from haystack.preview.components.routers import FileTypeRouter
from haystack.preview.components.writers import DocumentWriter
from haystack.preview.document_stores import MemoryDocumentStore

Expand All @@ -16,7 +16,7 @@ def test_preprocessing_pipeline(tmp_path):
preprocessing_pipeline.add_component(instance=FileTypeRouter(mime_types=["text/plain"]), name="file_type_router")
preprocessing_pipeline.add_component(instance=TextFileToDocument(), name="text_file_converter")
preprocessing_pipeline.add_component(instance=DocumentLanguageClassifier(), name="language_classifier")
preprocessing_pipeline.add_component(instance=TextDocumentCleaner(), name="cleaner")
preprocessing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
preprocessing_pipeline.add_component(
instance=TextDocumentSplitter(split_by="sentence", split_length=1), name="splitter"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,19 @@


class TestDocumentLanguageClassifier:
@pytest.mark.unit
def test_init(self):
component = DocumentLanguageClassifier()
assert component.languages == ["en"]

@pytest.mark.unit
def test_to_dict(self):
component = DocumentLanguageClassifier()
data = component.to_dict()
assert data == {"type": "DocumentLanguageClassifier", "init_parameters": {"languages": ["en"]}}

@pytest.mark.unit
def test_to_dict_with_custom_init_parameters(self):
component = DocumentLanguageClassifier(languages=["en", "de"])
data = component.to_dict()
assert data == {"type": "DocumentLanguageClassifier", "init_parameters": {"languages": ["en", "de"]}}
Expand Down

0 comments on commit 2837d69

Please sign in to comment.