geoip_shop_manager.py aktualisiert
This commit is contained in:
@@ -10,7 +10,7 @@ Supports three modes:
|
||||
- php-only: GeoIP blocking without CrowdSec
|
||||
- bot-only: Rate-limit bots, shop remains globally accessible
|
||||
|
||||
v3.4.5: Fix regex delimiter escape für curl pattern
|
||||
v3.5.0: Erweiterte Bot-Erkennung mit 300+ Bots und generischen Fallback-Patterns
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -94,27 +94,394 @@ GEO_REGIONS = {
|
||||
# BOT DETECTION
|
||||
# =============================================================================
|
||||
BOT_PATTERNS = {
|
||||
'GPTBot': r'GPTBot', 'OAI-SearchBot': r'OAI-SearchBot', 'ChatGPT-User': r'ChatGPT-User',
|
||||
'ClaudeBot': r'ClaudeBot', 'Claude-User': r'Claude-User', 'anthropic-ai': r'anthropic-ai',
|
||||
'Googlebot': r'Googlebot', 'Google-Extended': r'Google-Extended', 'AdsBot-Google': r'AdsBot-Google',
|
||||
'Bingbot': r'[Bb]ingbot', 'BingPreview': r'BingPreview', 'msnbot': r'msnbot',
|
||||
'PerplexityBot': r'PerplexityBot', 'Applebot': r'Applebot', 'Amazonbot': r'Amazonbot',
|
||||
'FacebookBot': r'facebookexternalhit|FacebookBot', 'Bytespider': r'Bytespider',
|
||||
'DuckDuckBot': r'DuckDuckBot', 'YandexBot': r'YandexBot', 'Baiduspider': r'Baiduspider',
|
||||
'AhrefsBot': r'AhrefsBot', 'SemrushBot': r'SemrushBot', 'MJ12bot': r'MJ12bot',
|
||||
'DotBot': r'DotBot', 'PetalBot': r'PetalBot', 'DataForSeoBot': r'DataForSeoBot',
|
||||
'LinkedInBot': r'LinkedInBot', 'Twitterbot': r'Twitterbot', 'Slackbot': r'Slackbot',
|
||||
'UptimeRobot': r'UptimeRobot', 'Pingdom': r'Pingdom', 'curl': r'^curl/',
|
||||
'python-requests': r'python-requests', 'Wget': r'Wget', 'Scrapy': r'Scrapy',
|
||||
# =========================================================================
|
||||
# AI/LLM SERVICES
|
||||
# =========================================================================
|
||||
'ChatGPT-User': r'chatgpt-user',
|
||||
'ChatGPT-Operator': r'chatgpt-operator',
|
||||
'ChatGPT-Agent': r'chatgpt agent',
|
||||
'ChatGPT': r'chatgpt',
|
||||
'GPTBot (OpenAI)': r'gptbot',
|
||||
'OAI-SearchBot (OpenAI)': r'oai-searchbot',
|
||||
'OpenAI': r'openai',
|
||||
'ClaudeBot (Anthropic)': r'claudebot',
|
||||
'Claude-User': r'claude-user',
|
||||
'Claude-Web': r'claude-web',
|
||||
'Claude-SearchBot': r'claude-searchbot',
|
||||
'Anthropic-AI': r'anthropic-ai',
|
||||
'Anthropic': r'anthropic',
|
||||
'Gemini-Deep-Research': r'gemini-deep-research',
|
||||
'Google-NotebookLM': r'google-notebooklm',
|
||||
'NotebookLM': r'notebooklm',
|
||||
'GoogleAgent-Mariner': r'googleagent-mariner',
|
||||
'PerplexityBot': r'perplexitybot',
|
||||
'Perplexity-User': r'perplexity-user',
|
||||
'Perplexity': r'perplexity',
|
||||
'Cohere-AI': r'cohere-ai',
|
||||
'Cohere-Training-Crawler': r'cohere-training-data-crawler',
|
||||
'Cohere': r'cohere',
|
||||
'MistralAI-User': r'mistralai-user',
|
||||
'MistralAI': r'mistralai',
|
||||
'Mistral': r'mistral',
|
||||
'DeepSeekBot': r'deepseekbot',
|
||||
'DeepSeek': r'deepseek',
|
||||
'Bytespider (TikTok/ByteDance)': r'bytespider',
|
||||
'TikTokSpider': r'tiktokspider',
|
||||
'ByteDance': r'bytedance',
|
||||
'AI2Bot-DeepResearchEval': r'ai2bot-deepresearcheval',
|
||||
'AI2Bot-Dolma': r'ai2bot-dolma',
|
||||
'AI2Bot (Allen Institute)': r'ai2bot',
|
||||
'CCBot (Common Crawl)': r'ccbot',
|
||||
'Diffbot': r'diffbot',
|
||||
'img2dataset': r'img2dataset',
|
||||
'LAIONDownloader': r'laiondownloader',
|
||||
'LAION-HuggingFace': r'laion-huggingface',
|
||||
'LAION': r'laion',
|
||||
'HuggingFace': r'huggingface',
|
||||
'BedrockBot (AWS)': r'bedrockbot',
|
||||
'DuckAssistBot': r'duckassistbot',
|
||||
'PhindBot': r'phindbot',
|
||||
'YouBot': r'youbot',
|
||||
'iAskSpider': r'iaskspider',
|
||||
'iAskBot': r'iaskbot',
|
||||
'ChatGLM-Spider': r'chatglm-spider',
|
||||
'Panscient': r'panscient',
|
||||
'Devin (Cognition)': r'devin',
|
||||
'Manus-User': r'manus-user',
|
||||
'TwinAgent': r'twinagent',
|
||||
'NovaAct': r'novaact',
|
||||
'FirecrawlAgent': r'firecrawlagent',
|
||||
'Firecrawl': r'firecrawl',
|
||||
'Crawl4AI': r'crawl4ai',
|
||||
'Crawlspace': r'crawlspace',
|
||||
'Cloudflare-AutoRAG': r'cloudflare-autorag',
|
||||
'TerraCotta': r'terracotta',
|
||||
'Thinkbot': r'thinkbot',
|
||||
# =========================================================================
|
||||
# SUCHMASCHINEN
|
||||
# =========================================================================
|
||||
'Googlebot-Image': r'googlebot-image',
|
||||
'Googlebot-Video': r'googlebot-video',
|
||||
'Googlebot-News': r'googlebot-news',
|
||||
'Googlebot-Discovery': r'googlebot-discovery',
|
||||
'Googlebot': r'googlebot',
|
||||
'Google-Extended': r'google-extended',
|
||||
'Google-CloudVertexBot': r'google-cloudvertexbot',
|
||||
'Google-Firebase': r'google-firebase',
|
||||
'Google-InspectionTool': r'google-inspectiontool',
|
||||
'GoogleOther-Image': r'googleother-image',
|
||||
'GoogleOther-Video': r'googleother-video',
|
||||
'GoogleOther': r'googleother',
|
||||
'Storebot-Google': r'storebot-google',
|
||||
'AdsBot-Google': r'adsbot-google',
|
||||
'Bingbot (Microsoft)': r'bingbot',
|
||||
'BingPreview': r'bingpreview',
|
||||
'MSNBot': r'msnbot',
|
||||
'Baiduspider': r'baiduspider',
|
||||
'Baidu': r'baidu',
|
||||
'YandexBot': r'yandexbot',
|
||||
'YandexAdditionalBot': r'yandexadditionalbot',
|
||||
'YandexAdditional': r'yandexadditional',
|
||||
'Yandex': r'yandex',
|
||||
'DuckDuckBot': r'duckduckbot',
|
||||
'DuckDuckGo': r'duckduckgo',
|
||||
'Applebot-Extended': r'applebot-extended',
|
||||
'Applebot': r'applebot',
|
||||
'Yahoo Slurp': r'slurp',
|
||||
'Sogou': r'sogou',
|
||||
'Sosospider': r'sosospider',
|
||||
'NaverBot': r'naverbot',
|
||||
'Naver': r'naver',
|
||||
'SeznamBot': r'seznambot',
|
||||
'MojeekBot': r'mojeekbot',
|
||||
'QwantBot': r'qwantbot',
|
||||
'PetalBot (Huawei)': r'petalbot',
|
||||
'CocCocBot': r'coccocbot',
|
||||
'Exabot': r'exabot',
|
||||
'BraveBot': r'bravebot',
|
||||
'Bravest': r'bravest',
|
||||
'SeekportBot': r'seekportbot',
|
||||
# =========================================================================
|
||||
# SEO & MARKETING TOOLS
|
||||
# =========================================================================
|
||||
'AhrefsBot': r'ahrefsbot',
|
||||
'Ahrefs': r'ahrefs',
|
||||
'SemrushBot-OCOB': r'semrushbot-ocob',
|
||||
'SemrushBot-SWA': r'semrushbot-swa',
|
||||
'SemrushBot': r'semrushbot',
|
||||
'Semrush': r'semrush',
|
||||
'MJ12Bot (Majestic)': r'mj12bot',
|
||||
'Majestic': r'majestic',
|
||||
'DotBot (Moz)': r'dotbot',
|
||||
'RogerBot (Moz)': r'rogerbot',
|
||||
'Screaming Frog': r'screaming frog',
|
||||
'BLEXBot': r'blexbot',
|
||||
'DataForSEOBot': r'dataforseobot',
|
||||
'Linkdex': r'linkdex',
|
||||
'SearchmetricsBot': r'searchmetricsbot',
|
||||
# =========================================================================
|
||||
# SOCIAL MEDIA
|
||||
# =========================================================================
|
||||
'Facebook External Hit': r'facebookexternalhit',
|
||||
'FacebookBot': r'facebookbot',
|
||||
'Facebot': r'facebot',
|
||||
'Meta-ExternalAgent': r'meta-externalagent',
|
||||
'Meta-ExternalFetcher': r'meta-externalfetcher',
|
||||
'Meta-WebIndexer': r'meta-webindexer',
|
||||
'Facebook': r'facebook',
|
||||
'Twitterbot': r'twitterbot',
|
||||
'Twitter': r'twitter',
|
||||
'Instagram': r'instagram',
|
||||
'LinkedInBot': r'linkedinbot',
|
||||
'LinkedIn': r'linkedin',
|
||||
'Pinterestbot': r'pinterestbot',
|
||||
'Pinterest': r'pinterest',
|
||||
'WhatsApp': r'whatsapp',
|
||||
'TelegramBot': r'telegrambot',
|
||||
'Telegram': r'telegram',
|
||||
'DiscordBot': r'discordbot',
|
||||
'Discord': r'discord',
|
||||
'Slackbot': r'slackbot',
|
||||
'Slack': r'slack',
|
||||
'Quora-Bot': r'quora-bot',
|
||||
'Snapchat': r'snapchat',
|
||||
'RedditBot': r'redditbot',
|
||||
# =========================================================================
|
||||
# E-COMMERCE & PREISVERGLEICH
|
||||
# =========================================================================
|
||||
'Amazonbot': r'amazonbot',
|
||||
'Amazon-Kendra': r'amazon-kendra',
|
||||
'AmazonBuyForMe': r'amazonbuyforme',
|
||||
'AMZNKAssocBot': r'amznkassocbot',
|
||||
'GeedoShopProductFinder': r'geedoshopproductfinder',
|
||||
'Geedo': r'geedo',
|
||||
'ShopWiki': r'shopwiki',
|
||||
'PriceGrabber': r'pricegrabber',
|
||||
'Shopify': r'shopify',
|
||||
'Idealo': r'idealo',
|
||||
'Guenstiger.de': r'guenstiger',
|
||||
'Billiger.de': r'billiger',
|
||||
'Ladenzeile': r'ladenzeile',
|
||||
'Kelkoo': r'kelkoo',
|
||||
'PriceRunner': r'pricerunner',
|
||||
# =========================================================================
|
||||
# ARCHIV & RESEARCH
|
||||
# =========================================================================
|
||||
'Archive.org Bot': r'archive\.org_bot|archive-org-bot',
|
||||
'Internet Archive': r'ia_archiver|ia-archiver',
|
||||
'Wayback Machine': r'wayback',
|
||||
'Heritrix': r'heritrix',
|
||||
'Apache Nutch': r'nutch',
|
||||
'Common Crawl': r'commoncrawl',
|
||||
# =========================================================================
|
||||
# MONITORING & UPTIME
|
||||
# =========================================================================
|
||||
'UptimeRobot': r'uptimerobot',
|
||||
'Pingdom': r'pingdom',
|
||||
'StatusCake': r'statuscake',
|
||||
'Site24x7': r'site24x7',
|
||||
'NewRelic': r'newrelic',
|
||||
'Datadog': r'datadog',
|
||||
'GTmetrix': r'gtmetrix',
|
||||
'PageSpeed Insights': r'pagespeed',
|
||||
'Chrome Lighthouse': r'chrome-lighthouse',
|
||||
# =========================================================================
|
||||
# DOWNLOAD & SCRAPER TOOLS
|
||||
# =========================================================================
|
||||
'HTTrack': r'httrack',
|
||||
'Teleport Pro': r'teleportpro|teleport pro',
|
||||
'Teleport': r'teleport',
|
||||
'GetRight': r'getright',
|
||||
'FlashGet': r'flashget',
|
||||
'LeechFTP': r'leechftp',
|
||||
'LeechGet': r'leechget',
|
||||
'Leech': r'leech',
|
||||
'Offline Explorer': r'offline explorer',
|
||||
'Offline Navigator': r'offline navigator',
|
||||
'Offline Tool': r'offline',
|
||||
'WebCopier': r'webcopier',
|
||||
'WebCopy': r'webcopy',
|
||||
'WebRipper': r'webripper',
|
||||
'WebReaper': r'webreaper',
|
||||
'WebStripper': r'webstripper',
|
||||
'WebSauger': r'websauger',
|
||||
'WebZIP': r'webzip',
|
||||
'WebWhacker': r'webwhacker',
|
||||
'WebBandit': r'webbandit',
|
||||
'SiteSucker': r'sitesucker',
|
||||
'SiteSnagger': r'sitesnagger',
|
||||
'BlackWidow': r'blackwidow',
|
||||
'Mass Downloader': r'mass downloader',
|
||||
'Download Demon': r'download demon',
|
||||
'Download Ninja': r'download ninja',
|
||||
'Download Master': r'download master',
|
||||
'FreshDownload': r'freshdownload',
|
||||
'SmartDownload': r'smartdownload',
|
||||
'RealDownload': r'realdownload',
|
||||
'StarDownloader': r'stardownloader',
|
||||
'Net Vampire': r'net vampire',
|
||||
'NetAnts': r'netants',
|
||||
'NetZIP': r'netzip',
|
||||
'Go!Zilla': r'go!zilla|gozilla',
|
||||
'Grabber': r'grabber',
|
||||
'PageGrabber': r'pagegrabber',
|
||||
'EirGrabber': r'eirgrabber',
|
||||
'EmailSiphon': r'emailsiphon',
|
||||
'EmailCollector': r'emailcollector',
|
||||
'EmailWolf': r'emailwolf',
|
||||
'Email Extractor': r'email extractor',
|
||||
'ExtractorPro': r'extractorpro',
|
||||
'HarvestMan': r'harvestman',
|
||||
'Harvest': r'harvest',
|
||||
'Collector': r'collector',
|
||||
'Vacuum': r'vacuum',
|
||||
'WebVac': r'webvac',
|
||||
'Zeus': r'zeus',
|
||||
'ScrapeBox': r'scrapebox',
|
||||
'Xenu Link Sleuth': r'xenu',
|
||||
'Larbin': r'larbin',
|
||||
'Grub': r'grub',
|
||||
# =========================================================================
|
||||
# HTTP LIBRARIES & FRAMEWORKS
|
||||
# =========================================================================
|
||||
'Python-Requests': r'python-requests',
|
||||
'Python-urllib': r'python-urllib',
|
||||
'Python-HTTPX': r'python-httpx',
|
||||
'Python HTTP': r'python/',
|
||||
'aiohttp': r'aiohttp',
|
||||
'HTTPX': r'httpx/',
|
||||
'cURL': r'curl/|^curl',
|
||||
'Wget': r'wget/|^wget',
|
||||
'Go-HTTP-Client': r'go-http-client',
|
||||
'Go HTTP': r'go http|go-http',
|
||||
'Java HTTP Client': r'java/|java ',
|
||||
'Apache-HttpClient': r'apache-httpclient',
|
||||
'Jakarta Commons': r'jakarta',
|
||||
'Axios': r'axios/|axios',
|
||||
'Node-Fetch': r'node-fetch',
|
||||
'Got (Node.js)': r'got/',
|
||||
'libwww-perl': r'libwww-perl',
|
||||
'LWP (Perl)': r'lwp::|lwp/',
|
||||
'WWW-Mechanize': r'www-mechanize',
|
||||
'Mechanize': r'mechanize',
|
||||
'Scrapy': r'scrapy/|scrapy',
|
||||
'HTTP.rb': r'http\.rb',
|
||||
'Typhoeus': r'typhoeus',
|
||||
'OkHttp': r'okhttp/|okhttp',
|
||||
'CFNetwork': r'cfnetwork',
|
||||
'WinHTTP': r'winhttp',
|
||||
'Indy Library': r'indy library',
|
||||
'Chilkat': r'chilkat',
|
||||
'httplib': r'httplib',
|
||||
'ApacheBench': r'apachebench',
|
||||
'Guzzle (PHP)': r'guzzle',
|
||||
'Requests': r'requests/',
|
||||
# =========================================================================
|
||||
# SECURITY SCANNER
|
||||
# =========================================================================
|
||||
'Nessus': r'nessus',
|
||||
'SQLMap': r'sqlmap',
|
||||
'Netsparker': r'netsparker',
|
||||
'Nikto': r'nikto',
|
||||
'Acunetix': r'acunetix',
|
||||
'Burp Suite': r'burpsuite|burp',
|
||||
'OWASP ZAP': r'owasp zap',
|
||||
'OpenVAS': r'openvas',
|
||||
'Nmap': r'nmap',
|
||||
'Masscan': r'masscan',
|
||||
'WPScan': r'wpscan',
|
||||
# =========================================================================
|
||||
# HEADLESS BROWSERS & AUTOMATION
|
||||
# =========================================================================
|
||||
'PhantomJS': r'phantomjs',
|
||||
'Headless Chrome': r'headlesschrome',
|
||||
'Headless Browser': r'headless',
|
||||
'Selenium': r'selenium',
|
||||
'Puppeteer': r'puppeteer',
|
||||
'Playwright': r'playwright',
|
||||
'Cypress': r'cypress',
|
||||
# =========================================================================
|
||||
# FEED READER & RSS
|
||||
# =========================================================================
|
||||
'FeedFetcher': r'feedfetcher',
|
||||
'FeedParser': r'feedparser',
|
||||
'Feedly': r'feedly',
|
||||
'Inoreader': r'inoreader',
|
||||
'NewsBlur': r'newsblur',
|
||||
# =========================================================================
|
||||
# WEITERE BEKANNTE BOTS
|
||||
# =========================================================================
|
||||
'OmgiliBot': r'omgilibot',
|
||||
'Omgili': r'omgili',
|
||||
'Webzio-Extended': r'webzio-extended',
|
||||
'Webzio': r'webzio',
|
||||
'Timpibot': r'timpibot',
|
||||
'PanguBot': r'pangubot',
|
||||
'ImagesiftBot': r'imagesiftbot',
|
||||
'Kangaroo Bot': r'kangaroo bot',
|
||||
'QualifiedBot': r'qualifiedbot',
|
||||
'VelenPublicWebCrawler': r'velenpublicwebcrawler',
|
||||
'Linguee Bot': r'linguee bot',
|
||||
'Linguee': r'linguee',
|
||||
'QuillBot': r'quillbot',
|
||||
'TurnitinBot': r'turnitinbot',
|
||||
'Turnitin': r'turnitin',
|
||||
'ZanistaBot': r'zanistabot',
|
||||
'WRTNBot': r'wrtnbot',
|
||||
'WARDBot': r'wardbot',
|
||||
'ShapBot': r'shapbot',
|
||||
'LinerBot': r'linerbot',
|
||||
'LinkupBot': r'linkupbot',
|
||||
'KlaviyoAIBot': r'klaviyoaibot',
|
||||
'KunatoCrawler': r'kunatocrawler',
|
||||
'IbouBot': r'iboubot',
|
||||
'BuddyBot': r'buddybot',
|
||||
'BrightBot': r'brightbot',
|
||||
'Channel3Bot': r'channel3bot',
|
||||
'Andibot': r'andibot',
|
||||
'Anomura': r'anomura',
|
||||
'Awario': r'awario',
|
||||
'BigSur.ai': r'bigsur',
|
||||
'Cotoyogi': r'cotoyogi',
|
||||
'AddSearchBot': r'addsearchbot',
|
||||
'aiHitBot': r'aihitbot',
|
||||
'Atlassian-Bot': r'atlassian-bot',
|
||||
'RainBot': r'rainbot',
|
||||
'TinyTestBot': r'tinytestbot',
|
||||
'Brandwatch': r'brandwatch',
|
||||
'Meltwater': r'meltwater',
|
||||
'Netvibes': r'netvibes',
|
||||
'BitlyBot': r'bitlybot',
|
||||
'Mail.ru Bot': r'mail\.ru',
|
||||
'YaK': r'yak',
|
||||
}
|
||||
|
||||
# Generische Patterns (Fallback für unbekannte Bots)
|
||||
GENERIC_BOT_PATTERNS = [
|
||||
'bot', 'crawler', 'spider', 'scraper', 'fetch', 'scan', 'check',
|
||||
'monitor', 'probe', 'index', 'archive', 'capture', 'reader',
|
||||
'download', 'mirror', 'ripper', 'collector', 'extractor', 'siphon',
|
||||
'copier', 'sucker', 'bandit', 'stripper', 'whacker', 'reaper',
|
||||
'robot', 'agent', 'seeker', 'finder', 'walker', 'roam', 'snagger',
|
||||
]
|
||||
|
||||
|
||||
def detect_bot(user_agent):
|
||||
"""Erkennt Bots anhand des User-Agents. Gibt den Anzeigenamen zurück."""
|
||||
if not user_agent or user_agent == 'Unknown':
|
||||
return 'Unbekannt'
|
||||
|
||||
# Erst spezifische Patterns prüfen
|
||||
for bot_name, pattern in BOT_PATTERNS.items():
|
||||
if re.search(pattern, user_agent, re.IGNORECASE):
|
||||
return bot_name
|
||||
|
||||
# Dann generische Patterns als Fallback
|
||||
ua_lower = user_agent.lower()
|
||||
for pattern in GENERIC_BOT_PATTERNS:
|
||||
if pattern in ua_lower:
|
||||
return f'Bot ({pattern})'
|
||||
|
||||
return 'Unbekannt'
|
||||
|
||||
|
||||
@@ -1985,7 +2352,7 @@ def show_all_logs():
|
||||
|
||||
def main():
|
||||
print("\n" + "=" * 60)
|
||||
print(" GeoIP Shop Blocker Manager v3.4.4")
|
||||
print(" GeoIP Shop Blocker Manager v3.5.0")
|
||||
print(" 🇩🇪🇦🇹🇨🇭 DACH | 🇪🇺 Eurozone+GB | 🤖 Bot-Rate-Limiting")
|
||||
print(" 🛡️ Mit Cache-Validierung und Fail-Open")
|
||||
print(" 🚦 Bots unter Rate-Limit werden durchgelassen")
|
||||
|
||||
Reference in New Issue
Block a user