Skip to content

Commit

Permalink
support js rendering in crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
yujonglee committed Oct 14, 2024
1 parent 784e7d1 commit 70fe2db
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
12 changes: 12 additions & 0 deletions core/config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,15 @@ if config_env() == :prod do
otlp_headers: [{"Authorization", "Bearer #{System.fetch_env!("OTEL_COLLECTOR_URL_AUTH")}"}]
end
end

if System.get_env("SELF_HOST") not in ~w(true 1) do
if [
"PUPPETEER_BASE_URL",
"PUPPETEER_API_KEY"
]
|> Enum.any?(&System.get_env/1) do
config :canary, :puppeteer,
base_url: System.get_env("PUPPETEER_BASE_URL"),
api_key: System.get_env("PUPPETEER_API_KEY")
end
end
24 changes: 22 additions & 2 deletions core/lib/canary/crawler.ex
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,28 @@ defmodule Canary.Crawler.Visitor do
end

defp fetch(url, state, _opts) do
with {:ok, response} <- Crawler.req() |> Req.get(url: url, receive_timeout: 7_000) do
{:ok, response, state}
puppeteer = Application.get_env(:canary, :puppeteer)

if puppeteer do
proxy_url =
URI.parse(puppeteer[:base_url])
|> Map.put(:path, "/api/render")
|> URI.to_string()

opts = [
url: proxy_url,
headers: [{"Authorization", "Bearer #{puppeteer[:api_key]}"}],
params: [url: url],
receive_timeout: 10_000
]

with {:ok, response} <- Crawler.req() |> Req.get(opts) do
{:ok, response, state}
end
else
with {:ok, response} <- Crawler.req() |> Req.get(url: url, receive_timeout: 5_000) do
{:ok, response, state}
end
end
end

Expand Down

0 comments on commit 70fe2db

Please sign in to comment.