block bots
This commit is contained in:
@@ -1,4 +1,6 @@
|
|||||||
class ApplicationController < ActionController::Base
|
class ApplicationController < ActionController::Base
|
||||||
|
include BotBlocker
|
||||||
|
|
||||||
# Changes to the importmap will invalidate the etag for HTML responses
|
# Changes to the importmap will invalidate the etag for HTML responses
|
||||||
stale_when_importmap_changes
|
stale_when_importmap_changes
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
module BotBlocker
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
included do
|
||||||
|
before_action :block_bots
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def block_bots
|
||||||
|
return unless bot_request?
|
||||||
|
|
||||||
|
render plain: "Bot access is not allowed", status: :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
def bot_request?
|
||||||
|
user_agent = request.user_agent.to_s.downcase
|
||||||
|
|
||||||
|
# List of known bot user agents
|
||||||
|
bot_patterns = [
|
||||||
|
'gptbot', # OpenAI GPTBot
|
||||||
|
'chatgpt', # ChatGPT
|
||||||
|
'claude-web', # Anthropic Claude
|
||||||
|
'bingbot', # Microsoft Bing
|
||||||
|
'googlebot', # Google
|
||||||
|
'baiduspider', # Baidu
|
||||||
|
'yandexbot', # Yandex
|
||||||
|
'duckduckbot', # DuckDuckGo
|
||||||
|
'slurp', # Yahoo
|
||||||
|
'facebookexternalhit', # Facebook
|
||||||
|
'twitterbot', # Twitter
|
||||||
|
'linkedinbot', # LinkedIn
|
||||||
|
'whatsapp', # WhatsApp
|
||||||
|
'telegrambot', # Telegram
|
||||||
|
'slackbot', # Slack
|
||||||
|
'discordbot', # Discord
|
||||||
|
'applebot', # Apple
|
||||||
|
'ia_archiver', # Alexa/Internet Archive
|
||||||
|
'petalbot', # Huawei
|
||||||
|
'seznambot', # Seznam
|
||||||
|
'ahrefsbot', # Ahrefs
|
||||||
|
'semrushbot', # SEMrush
|
||||||
|
'mj12bot', # Majestic
|
||||||
|
'dotbot', # OpenSiteExplorer
|
||||||
|
'rogerbot', # Moz
|
||||||
|
'exabot', # Exalead
|
||||||
|
'facebot', # Facebook
|
||||||
|
'spider', # Generic spiders
|
||||||
|
'crawler', # Generic crawlers
|
||||||
|
'scraper', # Generic scrapers
|
||||||
|
'bot', # Generic bots (last resort)
|
||||||
|
]
|
||||||
|
|
||||||
|
bot_patterns.any? { |pattern| user_agent.include?(pattern) }
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class BotBlockingTest < ActionDispatch::IntegrationTest
|
||||||
|
test "should block GPTBot" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.3; +https://openai.com/gptbot)" }
|
||||||
|
assert_response :forbidden
|
||||||
|
assert_match(/bot access is not allowed/i, response.body)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block ChatGPT bot" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; ChatGPT-User/1.0; +https://openai.com/bot)" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block Googlebot" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block Bingbot" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block generic bot user agent" do
|
||||||
|
get root_path, headers: { "User-Agent" => "SomeBot/1.0" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should allow normal browsers" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" }
|
||||||
|
assert_response :success
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should allow Firefox" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0" }
|
||||||
|
assert_response :success
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should allow Safari" do
|
||||||
|
get root_path, headers: { "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15" }
|
||||||
|
assert_response :success
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block crawler" do
|
||||||
|
get root_path, headers: { "User-Agent" => "SomeCrawler/1.0" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
|
||||||
|
test "should block scraper" do
|
||||||
|
get root_path, headers: { "User-Agent" => "WebScraper/2.0" }
|
||||||
|
assert_response :forbidden
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user