From 60ebebd17e40124b4edd19fec87b56a0cc0651ac Mon Sep 17 00:00:00 2001 From: Melroy van den Berg Date: Tue, 14 May 2024 20:14:35 +0200 Subject: [PATCH] Extend AI user agent bot ban list (#779) --- public/robots.txt | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/public/robots.txt b/public/robots.txt index 2d1e03d5a..65037c88a 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -1,10 +1,40 @@ -# Ban ChatGPT from indexing Mbin instances at all, in order to prevent training their [the OpenAI] models on users' data. +# Ban several AI bots from indexing Mbin instances at all, in order to prevent training their models on users' data. + +# OpenAI, ChatGPT User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / +# Google AI (Gemini, etc) +User-agent: Google-Extended +Disallow: / + +# Block common crawl +User-agent: CCBot +Disallow: / + +# Facebook +User-agent: FacebookBot +Disallow: / + +# Cohere.ai +User-agent: cohere-ai +Disallow: / + +# Perplexity +User-agent: PerplexityBot +Disallow: / + +# Anthropic +User-agent: anthropic-ai +Disallow: / + +# ...also anthropic +User-agent: ClaudeBot +Disallow: / + # Rest of indexing robots User-agent: * Request-rate: 1/1s