diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 7ddd11d..60e5832 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -1,4 +1,6 @@ class ApplicationController < ActionController::Base + include BotBlocker + # Changes to the importmap will invalidate the etag for HTML responses stale_when_importmap_changes diff --git a/app/controllers/concerns/bot_blocker.rb b/app/controllers/concerns/bot_blocker.rb new file mode 100644 index 0000000..b5d3e08 --- /dev/null +++ b/app/controllers/concerns/bot_blocker.rb @@ -0,0 +1,56 @@ +module BotBlocker + extend ActiveSupport::Concern + + included do + before_action :block_bots + end + + private + + def block_bots + return unless bot_request? + + render plain: "Bot access is not allowed", status: :forbidden + end + + def bot_request? + user_agent = request.user_agent.to_s.downcase + + # List of known bot user agents + bot_patterns = [ + 'gptbot', # OpenAI GPTBot + 'chatgpt', # ChatGPT + 'claude-web', # Anthropic Claude + 'bingbot', # Microsoft Bing + 'googlebot', # Google + 'baiduspider', # Baidu + 'yandexbot', # Yandex + 'duckduckbot', # DuckDuckGo + 'slurp', # Yahoo + 'facebookexternalhit', # Facebook + 'twitterbot', # Twitter + 'linkedinbot', # LinkedIn + 'whatsapp', # WhatsApp + 'telegrambot', # Telegram + 'slackbot', # Slack + 'discordbot', # Discord + 'applebot', # Apple + 'ia_archiver', # Alexa/Internet Archive + 'petalbot', # Huawei + 'seznambot', # Seznam + 'ahrefsbot', # Ahrefs + 'semrushbot', # SEMrush + 'mj12bot', # Majestic + 'dotbot', # OpenSiteExplorer + 'rogerbot', # Moz + 'exabot', # Exalead + 'facebot', # Facebook + 'spider', # Generic spiders + 'crawler', # Generic crawlers + 'scraper', # Generic scrapers + 'bot', # Generic bots (last resort) + ] + + bot_patterns.any? { |pattern| user_agent.include?(pattern) } + end +end diff --git a/test/controllers/bot_blocking_test.rb b/test/controllers/bot_blocking_test.rb new file mode 100644 index 0000000..baa5873 --- /dev/null +++ b/test/controllers/bot_blocking_test.rb @@ -0,0 +1,54 @@ +require "test_helper" + +class BotBlockingTest < ActionDispatch::IntegrationTest + test "should block GPTBot" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.3; +https://openai.com/gptbot)" } + assert_response :forbidden + assert_match(/bot access is not allowed/i, response.body) + end + + test "should block ChatGPT bot" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; ChatGPT-User/1.0; +https://openai.com/bot)" } + assert_response :forbidden + end + + test "should block Googlebot" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" } + assert_response :forbidden + end + + test "should block Bingbot" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" } + assert_response :forbidden + end + + test "should block generic bot user agent" do + get root_path, headers: { "User-Agent" => "SomeBot/1.0" } + assert_response :forbidden + end + + test "should allow normal browsers" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } + assert_response :success + end + + test "should allow Firefox" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0" } + assert_response :success + end + + test "should allow Safari" do + get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15" } + assert_response :success + end + + test "should block crawler" do + get root_path, headers: { "User-Agent" => "SomeCrawler/1.0" } + assert_response :forbidden + end + + test "should block scraper" do + get root_path, headers: { "User-Agent" => "WebScraper/2.0" } + assert_response :forbidden + end +end