block bots

2026-01-30 08:52:17 +01:00
parent f31a25fb03
commit 7118f1ea45
3 changed files with 112 additions and 0 deletions
@@ -1,4 +1,6 @@
 class ApplicationController < ActionController::Base
+  include BotBlocker
+
  # Changes to the importmap will invalidate the etag for HTML responses
  stale_when_importmap_changes

@@ -0,0 +1,56 @@
+module BotBlocker
+  extend ActiveSupport::Concern
+
+  included do
+    before_action :block_bots
+  end
+
+  private
+
+  def block_bots
+    return unless bot_request?
+
+    render plain: "Bot access is not allowed", status: :forbidden
+  end
+
+  def bot_request?
+    user_agent = request.user_agent.to_s.downcase
+
+    # List of known bot user agents
+    bot_patterns = [
+      'gptbot',           # OpenAI GPTBot
+      'chatgpt',          # ChatGPT
+      'claude-web',       # Anthropic Claude
+      'bingbot',          # Microsoft Bing
+      'googlebot',        # Google
+      'baiduspider',      # Baidu
+      'yandexbot',        # Yandex
+      'duckduckbot',      # DuckDuckGo
+      'slurp',            # Yahoo
+      'facebookexternalhit', # Facebook
+      'twitterbot',       # Twitter
+      'linkedinbot',      # LinkedIn
+      'whatsapp',         # WhatsApp
+      'telegrambot',      # Telegram
+      'slackbot',         # Slack
+      'discordbot',       # Discord
+      'applebot',         # Apple
+      'ia_archiver',      # Alexa/Internet Archive
+      'petalbot',         # Huawei
+      'seznambot',        # Seznam
+      'ahrefsbot',        # Ahrefs
+      'semrushbot',       # SEMrush
+      'mj12bot',          # Majestic
+      'dotbot',           # OpenSiteExplorer
+      'rogerbot',         # Moz
+      'exabot',           # Exalead
+      'facebot',          # Facebook
+      'spider',           # Generic spiders
+      'crawler',          # Generic crawlers
+      'scraper',          # Generic scrapers
+      'bot',              # Generic bots (last resort)
+    ]
+
+    bot_patterns.any? { |pattern| user_agent.include?(pattern) }
+  end
+end
@@ -0,0 +1,54 @@
+require "test_helper"
+
+class BotBlockingTest < ActionDispatch::IntegrationTest
+  test "should block GPTBot" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.3; +https://openai.com/gptbot)" }
+    assert_response :forbidden
+    assert_match(/bot access is not allowed/i, response.body)
+  end
+
+  test "should block ChatGPT bot" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; ChatGPT-User/1.0; +https://openai.com/bot)" }
+    assert_response :forbidden
+  end
+
+  test "should block Googlebot" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" }
+    assert_response :forbidden
+  end
+
+  test "should block Bingbot" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" }
+    assert_response :forbidden
+  end
+
+  test "should block generic bot user agent" do
+    get root_path, headers: { "User-Agent" => "SomeBot/1.0" }
+    assert_response :forbidden
+  end
+
+  test "should allow normal browsers" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" }
+    assert_response :success
+  end
+
+  test "should allow Firefox" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0" }
+    assert_response :success
+  end
+
+  test "should allow Safari" do
+    get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15" }
+    assert_response :success
+  end
+
+  test "should block crawler" do
+    get root_path, headers: { "User-Agent" => "SomeCrawler/1.0" }
+    assert_response :forbidden
+  end
+
+  test "should block scraper" do
+    get root_path, headers: { "User-Agent" => "WebScraper/2.0" }
+    assert_response :forbidden
+  end
+end