Skip to content

Commit

Permalink
Fixed restrict crawl
Browse files Browse the repository at this point in the history
  • Loading branch information
berrysauce committed Aug 6, 2024
1 parent 63aba92 commit b430806
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion indexbot/spiders/indexbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class IndexbotSpider(CrawlSpider):

# Restrict crawling to the start URLs if RESTRICT_CRAWL is set
if os.getenv("RESTRICT_CRAWL"):
allowed_domains = start_urls
allowed_domains = []
for url in start_urls:
allowed_domains.append(url.replace("https://", "").replace("http://", "").split("/")[0])

#allowed_domains = ["producthunt.com"] # Replace with the target domain(s)
#start_urls = ["http://producthunt.com"] # Replace with the initial URL(s)
Expand Down

0 comments on commit b430806

Please sign in to comment.