diff --git a/setup.py b/setup.py index fee1fd8..4f01330 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read_git_requirements(file): setup( name='thepipe_api', - version='1.2.7', + version='1.2.8', author='Emmett McFarlane', author_email='emmett@thepi.pe', description='AI-native extractor, powered by multimodal LLMs.', diff --git a/thepipe/scraper.py b/thepipe/scraper.py index b9c25a2..00f35da 100644 --- a/thepipe/scraper.py +++ b/thepipe/scraper.py @@ -126,7 +126,7 @@ def scrape_file(filepath: str, ai_extraction: bool = False, text_only: bool = Fa return scraped_chunks def scrape_plaintext(file_path: str) -> List[Chunk]: - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: text = file.read() return [Chunk(path=file_path, texts=[text])]