Skip to content

Commit

Permalink
ignoring utf8 read errors
Browse files Browse the repository at this point in the history
  • Loading branch information
emcf committed Sep 5, 2024
1 parent 4fc33ec commit 1c851da
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read_git_requirements(file):

setup(
name='thepipe_api',
version='1.2.7',
version='1.2.8',
author='Emmett McFarlane',
author_email='[email protected]',
description='AI-native extractor, powered by multimodal LLMs.',
Expand Down
2 changes: 1 addition & 1 deletion thepipe/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def scrape_file(filepath: str, ai_extraction: bool = False, text_only: bool = Fa
return scraped_chunks

def scrape_plaintext(file_path: str) -> List[Chunk]:
with open(file_path, 'r', encoding='utf-8') as file:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
text = file.read()
return [Chunk(path=file_path, texts=[text])]

Expand Down

0 comments on commit 1c851da

Please sign in to comment.