geoip_shop_manager.py aktualisiert

This commit is contained in:
2025-12-10 18:03:54 +01:00
parent 3ebd1123a6
commit e721f86d34

View File

@@ -10,7 +10,7 @@ Supports three modes:
- php-only: GeoIP blocking without CrowdSec
- bot-only: Rate-limit bots, shop remains globally accessible
v3.4.5: Fix regex delimiter escape für curl pattern
v3.5.0: Erweiterte Bot-Erkennung mit 300+ Bots und generischen Fallback-Patterns
"""
import os
@@ -94,27 +94,394 @@ GEO_REGIONS = {
# BOT DETECTION
# =============================================================================
BOT_PATTERNS = {
'GPTBot': r'GPTBot', 'OAI-SearchBot': r'OAI-SearchBot', 'ChatGPT-User': r'ChatGPT-User',
'ClaudeBot': r'ClaudeBot', 'Claude-User': r'Claude-User', 'anthropic-ai': r'anthropic-ai',
'Googlebot': r'Googlebot', 'Google-Extended': r'Google-Extended', 'AdsBot-Google': r'AdsBot-Google',
'Bingbot': r'[Bb]ingbot', 'BingPreview': r'BingPreview', 'msnbot': r'msnbot',
'PerplexityBot': r'PerplexityBot', 'Applebot': r'Applebot', 'Amazonbot': r'Amazonbot',
'FacebookBot': r'facebookexternalhit|FacebookBot', 'Bytespider': r'Bytespider',
'DuckDuckBot': r'DuckDuckBot', 'YandexBot': r'YandexBot', 'Baiduspider': r'Baiduspider',
'AhrefsBot': r'AhrefsBot', 'SemrushBot': r'SemrushBot', 'MJ12bot': r'MJ12bot',
'DotBot': r'DotBot', 'PetalBot': r'PetalBot', 'DataForSeoBot': r'DataForSeoBot',
'LinkedInBot': r'LinkedInBot', 'Twitterbot': r'Twitterbot', 'Slackbot': r'Slackbot',
'UptimeRobot': r'UptimeRobot', 'Pingdom': r'Pingdom', 'curl': r'^curl/',
'python-requests': r'python-requests', 'Wget': r'Wget', 'Scrapy': r'Scrapy',
# =========================================================================
# AI/LLM SERVICES
# =========================================================================
'ChatGPT-User': r'chatgpt-user',
'ChatGPT-Operator': r'chatgpt-operator',
'ChatGPT-Agent': r'chatgpt agent',
'ChatGPT': r'chatgpt',
'GPTBot (OpenAI)': r'gptbot',
'OAI-SearchBot (OpenAI)': r'oai-searchbot',
'OpenAI': r'openai',
'ClaudeBot (Anthropic)': r'claudebot',
'Claude-User': r'claude-user',
'Claude-Web': r'claude-web',
'Claude-SearchBot': r'claude-searchbot',
'Anthropic-AI': r'anthropic-ai',
'Anthropic': r'anthropic',
'Gemini-Deep-Research': r'gemini-deep-research',
'Google-NotebookLM': r'google-notebooklm',
'NotebookLM': r'notebooklm',
'GoogleAgent-Mariner': r'googleagent-mariner',
'PerplexityBot': r'perplexitybot',
'Perplexity-User': r'perplexity-user',
'Perplexity': r'perplexity',
'Cohere-AI': r'cohere-ai',
'Cohere-Training-Crawler': r'cohere-training-data-crawler',
'Cohere': r'cohere',
'MistralAI-User': r'mistralai-user',
'MistralAI': r'mistralai',
'Mistral': r'mistral',
'DeepSeekBot': r'deepseekbot',
'DeepSeek': r'deepseek',
'Bytespider (TikTok/ByteDance)': r'bytespider',
'TikTokSpider': r'tiktokspider',
'ByteDance': r'bytedance',
'AI2Bot-DeepResearchEval': r'ai2bot-deepresearcheval',
'AI2Bot-Dolma': r'ai2bot-dolma',
'AI2Bot (Allen Institute)': r'ai2bot',
'CCBot (Common Crawl)': r'ccbot',
'Diffbot': r'diffbot',
'img2dataset': r'img2dataset',
'LAIONDownloader': r'laiondownloader',
'LAION-HuggingFace': r'laion-huggingface',
'LAION': r'laion',
'HuggingFace': r'huggingface',
'BedrockBot (AWS)': r'bedrockbot',
'DuckAssistBot': r'duckassistbot',
'PhindBot': r'phindbot',
'YouBot': r'youbot',
'iAskSpider': r'iaskspider',
'iAskBot': r'iaskbot',
'ChatGLM-Spider': r'chatglm-spider',
'Panscient': r'panscient',
'Devin (Cognition)': r'devin',
'Manus-User': r'manus-user',
'TwinAgent': r'twinagent',
'NovaAct': r'novaact',
'FirecrawlAgent': r'firecrawlagent',
'Firecrawl': r'firecrawl',
'Crawl4AI': r'crawl4ai',
'Crawlspace': r'crawlspace',
'Cloudflare-AutoRAG': r'cloudflare-autorag',
'TerraCotta': r'terracotta',
'Thinkbot': r'thinkbot',
# =========================================================================
# SUCHMASCHINEN
# =========================================================================
'Googlebot-Image': r'googlebot-image',
'Googlebot-Video': r'googlebot-video',
'Googlebot-News': r'googlebot-news',
'Googlebot-Discovery': r'googlebot-discovery',
'Googlebot': r'googlebot',
'Google-Extended': r'google-extended',
'Google-CloudVertexBot': r'google-cloudvertexbot',
'Google-Firebase': r'google-firebase',
'Google-InspectionTool': r'google-inspectiontool',
'GoogleOther-Image': r'googleother-image',
'GoogleOther-Video': r'googleother-video',
'GoogleOther': r'googleother',
'Storebot-Google': r'storebot-google',
'AdsBot-Google': r'adsbot-google',
'Bingbot (Microsoft)': r'bingbot',
'BingPreview': r'bingpreview',
'MSNBot': r'msnbot',
'Baiduspider': r'baiduspider',
'Baidu': r'baidu',
'YandexBot': r'yandexbot',
'YandexAdditionalBot': r'yandexadditionalbot',
'YandexAdditional': r'yandexadditional',
'Yandex': r'yandex',
'DuckDuckBot': r'duckduckbot',
'DuckDuckGo': r'duckduckgo',
'Applebot-Extended': r'applebot-extended',
'Applebot': r'applebot',
'Yahoo Slurp': r'slurp',
'Sogou': r'sogou',
'Sosospider': r'sosospider',
'NaverBot': r'naverbot',
'Naver': r'naver',
'SeznamBot': r'seznambot',
'MojeekBot': r'mojeekbot',
'QwantBot': r'qwantbot',
'PetalBot (Huawei)': r'petalbot',
'CocCocBot': r'coccocbot',
'Exabot': r'exabot',
'BraveBot': r'bravebot',
'Bravest': r'bravest',
'SeekportBot': r'seekportbot',
# =========================================================================
# SEO & MARKETING TOOLS
# =========================================================================
'AhrefsBot': r'ahrefsbot',
'Ahrefs': r'ahrefs',
'SemrushBot-OCOB': r'semrushbot-ocob',
'SemrushBot-SWA': r'semrushbot-swa',
'SemrushBot': r'semrushbot',
'Semrush': r'semrush',
'MJ12Bot (Majestic)': r'mj12bot',
'Majestic': r'majestic',
'DotBot (Moz)': r'dotbot',
'RogerBot (Moz)': r'rogerbot',
'Screaming Frog': r'screaming frog',
'BLEXBot': r'blexbot',
'DataForSEOBot': r'dataforseobot',
'Linkdex': r'linkdex',
'SearchmetricsBot': r'searchmetricsbot',
# =========================================================================
# SOCIAL MEDIA
# =========================================================================
'Facebook External Hit': r'facebookexternalhit',
'FacebookBot': r'facebookbot',
'Facebot': r'facebot',
'Meta-ExternalAgent': r'meta-externalagent',
'Meta-ExternalFetcher': r'meta-externalfetcher',
'Meta-WebIndexer': r'meta-webindexer',
'Facebook': r'facebook',
'Twitterbot': r'twitterbot',
'Twitter': r'twitter',
'Instagram': r'instagram',
'LinkedInBot': r'linkedinbot',
'LinkedIn': r'linkedin',
'Pinterestbot': r'pinterestbot',
'Pinterest': r'pinterest',
'WhatsApp': r'whatsapp',
'TelegramBot': r'telegrambot',
'Telegram': r'telegram',
'DiscordBot': r'discordbot',
'Discord': r'discord',
'Slackbot': r'slackbot',
'Slack': r'slack',
'Quora-Bot': r'quora-bot',
'Snapchat': r'snapchat',
'RedditBot': r'redditbot',
# =========================================================================
# E-COMMERCE & PREISVERGLEICH
# =========================================================================
'Amazonbot': r'amazonbot',
'Amazon-Kendra': r'amazon-kendra',
'AmazonBuyForMe': r'amazonbuyforme',
'AMZNKAssocBot': r'amznkassocbot',
'GeedoShopProductFinder': r'geedoshopproductfinder',
'Geedo': r'geedo',
'ShopWiki': r'shopwiki',
'PriceGrabber': r'pricegrabber',
'Shopify': r'shopify',
'Idealo': r'idealo',
'Guenstiger.de': r'guenstiger',
'Billiger.de': r'billiger',
'Ladenzeile': r'ladenzeile',
'Kelkoo': r'kelkoo',
'PriceRunner': r'pricerunner',
# =========================================================================
# ARCHIV & RESEARCH
# =========================================================================
'Archive.org Bot': r'archive\.org_bot|archive-org-bot',
'Internet Archive': r'ia_archiver|ia-archiver',
'Wayback Machine': r'wayback',
'Heritrix': r'heritrix',
'Apache Nutch': r'nutch',
'Common Crawl': r'commoncrawl',
# =========================================================================
# MONITORING & UPTIME
# =========================================================================
'UptimeRobot': r'uptimerobot',
'Pingdom': r'pingdom',
'StatusCake': r'statuscake',
'Site24x7': r'site24x7',
'NewRelic': r'newrelic',
'Datadog': r'datadog',
'GTmetrix': r'gtmetrix',
'PageSpeed Insights': r'pagespeed',
'Chrome Lighthouse': r'chrome-lighthouse',
# =========================================================================
# DOWNLOAD & SCRAPER TOOLS
# =========================================================================
'HTTrack': r'httrack',
'Teleport Pro': r'teleportpro|teleport pro',
'Teleport': r'teleport',
'GetRight': r'getright',
'FlashGet': r'flashget',
'LeechFTP': r'leechftp',
'LeechGet': r'leechget',
'Leech': r'leech',
'Offline Explorer': r'offline explorer',
'Offline Navigator': r'offline navigator',
'Offline Tool': r'offline',
'WebCopier': r'webcopier',
'WebCopy': r'webcopy',
'WebRipper': r'webripper',
'WebReaper': r'webreaper',
'WebStripper': r'webstripper',
'WebSauger': r'websauger',
'WebZIP': r'webzip',
'WebWhacker': r'webwhacker',
'WebBandit': r'webbandit',
'SiteSucker': r'sitesucker',
'SiteSnagger': r'sitesnagger',
'BlackWidow': r'blackwidow',
'Mass Downloader': r'mass downloader',
'Download Demon': r'download demon',
'Download Ninja': r'download ninja',
'Download Master': r'download master',
'FreshDownload': r'freshdownload',
'SmartDownload': r'smartdownload',
'RealDownload': r'realdownload',
'StarDownloader': r'stardownloader',
'Net Vampire': r'net vampire',
'NetAnts': r'netants',
'NetZIP': r'netzip',
'Go!Zilla': r'go!zilla|gozilla',
'Grabber': r'grabber',
'PageGrabber': r'pagegrabber',
'EirGrabber': r'eirgrabber',
'EmailSiphon': r'emailsiphon',
'EmailCollector': r'emailcollector',
'EmailWolf': r'emailwolf',
'Email Extractor': r'email extractor',
'ExtractorPro': r'extractorpro',
'HarvestMan': r'harvestman',
'Harvest': r'harvest',
'Collector': r'collector',
'Vacuum': r'vacuum',
'WebVac': r'webvac',
'Zeus': r'zeus',
'ScrapeBox': r'scrapebox',
'Xenu Link Sleuth': r'xenu',
'Larbin': r'larbin',
'Grub': r'grub',
# =========================================================================
# HTTP LIBRARIES & FRAMEWORKS
# =========================================================================
'Python-Requests': r'python-requests',
'Python-urllib': r'python-urllib',
'Python-HTTPX': r'python-httpx',
'Python HTTP': r'python/',
'aiohttp': r'aiohttp',
'HTTPX': r'httpx/',
'cURL': r'curl/|^curl',
'Wget': r'wget/|^wget',
'Go-HTTP-Client': r'go-http-client',
'Go HTTP': r'go http|go-http',
'Java HTTP Client': r'java/|java ',
'Apache-HttpClient': r'apache-httpclient',
'Jakarta Commons': r'jakarta',
'Axios': r'axios/|axios',
'Node-Fetch': r'node-fetch',
'Got (Node.js)': r'got/',
'libwww-perl': r'libwww-perl',
'LWP (Perl)': r'lwp::|lwp/',
'WWW-Mechanize': r'www-mechanize',
'Mechanize': r'mechanize',
'Scrapy': r'scrapy/|scrapy',
'HTTP.rb': r'http\.rb',
'Typhoeus': r'typhoeus',
'OkHttp': r'okhttp/|okhttp',
'CFNetwork': r'cfnetwork',
'WinHTTP': r'winhttp',
'Indy Library': r'indy library',
'Chilkat': r'chilkat',
'httplib': r'httplib',
'ApacheBench': r'apachebench',
'Guzzle (PHP)': r'guzzle',
'Requests': r'requests/',
# =========================================================================
# SECURITY SCANNER
# =========================================================================
'Nessus': r'nessus',
'SQLMap': r'sqlmap',
'Netsparker': r'netsparker',
'Nikto': r'nikto',
'Acunetix': r'acunetix',
'Burp Suite': r'burpsuite|burp',
'OWASP ZAP': r'owasp zap',
'OpenVAS': r'openvas',
'Nmap': r'nmap',
'Masscan': r'masscan',
'WPScan': r'wpscan',
# =========================================================================
# HEADLESS BROWSERS & AUTOMATION
# =========================================================================
'PhantomJS': r'phantomjs',
'Headless Chrome': r'headlesschrome',
'Headless Browser': r'headless',
'Selenium': r'selenium',
'Puppeteer': r'puppeteer',
'Playwright': r'playwright',
'Cypress': r'cypress',
# =========================================================================
# FEED READER & RSS
# =========================================================================
'FeedFetcher': r'feedfetcher',
'FeedParser': r'feedparser',
'Feedly': r'feedly',
'Inoreader': r'inoreader',
'NewsBlur': r'newsblur',
# =========================================================================
# WEITERE BEKANNTE BOTS
# =========================================================================
'OmgiliBot': r'omgilibot',
'Omgili': r'omgili',
'Webzio-Extended': r'webzio-extended',
'Webzio': r'webzio',
'Timpibot': r'timpibot',
'PanguBot': r'pangubot',
'ImagesiftBot': r'imagesiftbot',
'Kangaroo Bot': r'kangaroo bot',
'QualifiedBot': r'qualifiedbot',
'VelenPublicWebCrawler': r'velenpublicwebcrawler',
'Linguee Bot': r'linguee bot',
'Linguee': r'linguee',
'QuillBot': r'quillbot',
'TurnitinBot': r'turnitinbot',
'Turnitin': r'turnitin',
'ZanistaBot': r'zanistabot',
'WRTNBot': r'wrtnbot',
'WARDBot': r'wardbot',
'ShapBot': r'shapbot',
'LinerBot': r'linerbot',
'LinkupBot': r'linkupbot',
'KlaviyoAIBot': r'klaviyoaibot',
'KunatoCrawler': r'kunatocrawler',
'IbouBot': r'iboubot',
'BuddyBot': r'buddybot',
'BrightBot': r'brightbot',
'Channel3Bot': r'channel3bot',
'Andibot': r'andibot',
'Anomura': r'anomura',
'Awario': r'awario',
'BigSur.ai': r'bigsur',
'Cotoyogi': r'cotoyogi',
'AddSearchBot': r'addsearchbot',
'aiHitBot': r'aihitbot',
'Atlassian-Bot': r'atlassian-bot',
'RainBot': r'rainbot',
'TinyTestBot': r'tinytestbot',
'Brandwatch': r'brandwatch',
'Meltwater': r'meltwater',
'Netvibes': r'netvibes',
'BitlyBot': r'bitlybot',
'Mail.ru Bot': r'mail\.ru',
'YaK': r'yak',
}
# Generische Patterns (Fallback für unbekannte Bots)
GENERIC_BOT_PATTERNS = [
'bot', 'crawler', 'spider', 'scraper', 'fetch', 'scan', 'check',
'monitor', 'probe', 'index', 'archive', 'capture', 'reader',
'download', 'mirror', 'ripper', 'collector', 'extractor', 'siphon',
'copier', 'sucker', 'bandit', 'stripper', 'whacker', 'reaper',
'robot', 'agent', 'seeker', 'finder', 'walker', 'roam', 'snagger',
]
def detect_bot(user_agent):
"""Erkennt Bots anhand des User-Agents. Gibt den Anzeigenamen zurück."""
if not user_agent or user_agent == 'Unknown':
return 'Unbekannt'
# Erst spezifische Patterns prüfen
for bot_name, pattern in BOT_PATTERNS.items():
if re.search(pattern, user_agent, re.IGNORECASE):
return bot_name
# Dann generische Patterns als Fallback
ua_lower = user_agent.lower()
for pattern in GENERIC_BOT_PATTERNS:
if pattern in ua_lower:
return f'Bot ({pattern})'
return 'Unbekannt'
@@ -1985,7 +2352,7 @@ def show_all_logs():
def main():
print("\n" + "=" * 60)
print(" GeoIP Shop Blocker Manager v3.4.4")
print(" GeoIP Shop Blocker Manager v3.5.0")
print(" 🇩🇪🇦🇹🇨🇭 DACH | 🇪🇺 Eurozone+GB | 🤖 Bot-Rate-Limiting")
print(" 🛡️ Mit Cache-Validierung und Fail-Open")
print(" 🚦 Bots unter Rate-Limit werden durchgelassen")