From 969bbfcf2a2d3198661cde7107bf74eae72ddbe2 Mon Sep 17 00:00:00 2001 From: Stephen Smoogen Date: Mon, 8 Jul 2024 12:16:52 -0400 Subject: [PATCH] Add blockers to dl.fedoraproject.org Looked at logs of servers being hit by the 'non-responsive' bots and the following were hit heavily every day multiple times a day: 100006 nagios.fedoraproject.org-access.log 102150 koschei.fedoraproject.org-access.log 162296 lists.fedoraproject.org-access.log 495776 fedoraproject.org-access.log 850471 dl.fedoraproject.org-access.log Added bloks to dl.fedoraproject to try and lower its hit rate. Others need review from people who know their internals more. Signed-off-by: Stephen Smoogen --- .../files/httpd/dl.fedoraproject.org/rewrite.conf | 7 ++++++- roles/mailman3/templates/mailmanweb.conf.j2 | 2 +- roles/pagure/templates/0_pagure.conf | 9 ++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf b/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf index 581012ed97..6ca112f2f6 100644 --- a/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf +++ b/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf @@ -3,8 +3,13 @@ RewriteEngine On RewriteCond %{HTTP_USER_AGENT} "lftp" RewriteRule ^.*$ https://fedoraproject.org/wiki/Infrastructure/Mirroring#Tools_to_avoid [R,L] -RewriteRule ^/$ /pub [R=302,L] +# Spiders-gone-wild +# These spiders may not follow robots.txt and will +# hit admin sections which consume large amounts of CPU +RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] +RewriteRule .* - [F,L] +RewriteRule ^/$ /pub [R=302,L] RedirectMatch 302 ^/pub/fedora/linux/atomic/(.*$) https://kojipkgs.fedoraproject.org/atomic/$1 RedirectMatch 302 ^/pub/fedora/linux/atomic https://kojipkgs.fedoraproject.org/atomic/ diff --git a/roles/mailman3/templates/mailmanweb.conf.j2 b/roles/mailman3/templates/mailmanweb.conf.j2 index 749dc9a444..87d3e99eff 100644 --- a/roles/mailman3/templates/mailmanweb.conf.j2 +++ b/roles/mailman3/templates/mailmanweb.conf.j2 @@ -37,7 +37,7 @@ RewriteRule ^/$ /archives [R,L] # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU -RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|claudebot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] +RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] RewriteRule .* - [F,L] # Old static archives diff --git a/roles/pagure/templates/0_pagure.conf b/roles/pagure/templates/0_pagure.conf index 5fdd9cba72..8ed899f6de 100644 --- a/roles/pagure/templates/0_pagure.conf +++ b/roles/pagure/templates/0_pagure.conf @@ -138,10 +138,13 @@ MaxConnectionsPerChild 1000 # RewriteEngine On # RewriteCond %{REQUEST_URI} ^/fedora-web/websites$ # RewriteRule .* - [F] - # Reject Bytespider spider + RewriteEngine On - RewriteCond %{HTTP_USER_AGENT} .*Bytespider.* - RewriteRule .* - [F] +# Spiders-gone-wild +# These spiders may not follow robots.txt and will +# hit admin sections which consume large amounts of CPU + RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] + RewriteRule .* - [F,L] SetHandler server-status