Skip to content

Commit

Permalink
Fix example notebook if playwright is not installed yet
Browse files Browse the repository at this point in the history
  • Loading branch information
leopiney committed Nov 3, 2024
1 parent 2e69143 commit bc8d3d7
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 7 deletions.
3 changes: 2 additions & 1 deletion examples/01_basics_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
},
"outputs": [],
"source": [
"!pip install --upgrade neuralnoise"
"!pip install --upgrade neuralnoise\n",
"!playwright install --with-deps"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "neuralnoise"
version = "1.3.0"
version = "1.3.1"
description = "An AI-powered podcast studio that uses multiple AI agents working together."
authors = [
{ name = "Leonardo Piñeyro", email = "[email protected]" }
Expand Down
9 changes: 5 additions & 4 deletions src/neuralnoise/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
from pathlib import Path
from tempfile import NamedTemporaryFile
from textwrap import dedent
from typing import AsyncIterator, Iterator
from typing import AsyncIterator

import requests # type: ignore
from crawl4ai import AsyncWebCrawler, CrawlResult
from langchain_community.document_loaders import (
BSHTMLLoader,
PyMuPDFLoader,
Expand All @@ -30,6 +29,8 @@ def __init__(
self.css_selector = css_selector

async def crawl(self, url: str, css_selector: str | None = None):
from crawl4ai import AsyncWebCrawler

async with AsyncWebCrawler(verbose=True) as crawler:
result = await crawler.arun(
url,
Expand All @@ -38,11 +39,11 @@ async def crawl(self, url: str, css_selector: str | None = None):

return result

def _process_result(self, result: CrawlResult):
def _process_result(self, result):
if result.markdown is None:
raise ValueError(f"No valid content found at {self.url}")

metadata: dict[str, str | None] = {
metadata: dict[str, str | None] = { # type: ignore
**(result.metadata or {}),
"source": self.url,
}
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bc8d3d7

Please sign in to comment.