From abd436e5a720d530a9c8381b144d754f350723dd Mon Sep 17 00:00:00 2001 From: pghorpade Date: Mon, 3 Jun 2024 15:07:48 -0700 Subject: [PATCH 1/2] feat: uodate robots.txt to block AI bots --- public/robots_allow.txt | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/public/robots_allow.txt b/public/robots_allow.txt index 18050662..5d734a68 100644 --- a/public/robots_allow.txt +++ b/public/robots_allow.txt @@ -1,4 +1,37 @@ Sitemap: https://digital.library.ucla.edu/sitemap.xml -User-agent: * -Disallow: +Sitemap: https://digital.library.ucla.edu/sitemap.xml + +User-agent: AdsBot-Google +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: AwarioRssBot +User-agent: AwarioSmartBot +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: ClaudeBot +User-agent: Claude-Web +User-agent: cohere-ai +User-agent: DataForSeoBot +User-agent: Diffbot +User-agent: FacebookBot +User-agent: FriendlyCrawler +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GPTBot +User-agent: img2dataset +User-agent: ImagesiftBot +User-agent: magpie-crawler +User-agent: Meltwater +User-agent: omgili +User-agent: omgilibot +User-agent: peer39_crawler +User-agent: peer39_crawler/1.0 +User-agent: PerplexityBot +User-agent: PiplBot +User-agent: scoop.it +User-agent: Seekr +User-agent: YouBot +Disallow: / From 20cbbd17bce3c91c95f022c5b2a6d04dc32eb2ce Mon Sep 17 00:00:00 2001 From: pghorpade Date: Thu, 13 Jun 2024 16:14:39 -0700 Subject: [PATCH 2/2] fix: syntax and format of robots.txt --- public/robots_allow.txt | 68 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/public/robots_allow.txt b/public/robots_allow.txt index 5d734a68..1702133f 100644 --- a/public/robots_allow.txt +++ b/public/robots_allow.txt @@ -1,37 +1,95 @@ -Sitemap: https://digital.library.ucla.edu/sitemap.xml - -Sitemap: https://digital.library.ucla.edu/sitemap.xml - User-agent: AdsBot-Google +Disallow: / + User-agent: Amazonbot +Disallow: / + User-agent: anthropic-ai +Disallow: / + User-agent: Applebot +Disallow: / + User-agent: AwarioRssBot +Disallow: / + User-agent: AwarioSmartBot +Disallow: / + User-agent: Bytespider +Disallow: / + User-agent: CCBot +Disallow: / + User-agent: ChatGPT-User +Disallow: / + User-agent: ClaudeBot +Disallow: / + User-agent: Claude-Web +Disallow: / + User-agent: cohere-ai +Disallow: / + User-agent: DataForSeoBot +Disallow: / + User-agent: Diffbot +Disallow: / + User-agent: FacebookBot +Disallow: / + User-agent: FriendlyCrawler +Disallow: / + User-agent: Google-Extended +Disallow: / + User-agent: GoogleOther +Disallow: / + User-agent: GPTBot +Disallow: / + User-agent: img2dataset +Disallow: / + User-agent: ImagesiftBot +Disallow: / + User-agent: magpie-crawler +Disallow: / + User-agent: Meltwater +Disallow: / + User-agent: omgili +Disallow: / + User-agent: omgilibot +Disallow: / + User-agent: peer39_crawler +Disallow: / + User-agent: peer39_crawler/1.0 +Disallow: / + User-agent: PerplexityBot +Disallow: / + User-agent: PiplBot +Disallow: / + User-agent: scoop.it +Disallow: / + User-agent: Seekr -User-agent: YouBot Disallow: / + +User-agent: YouBot +Disallow: / \ No newline at end of file