diff --git a/geoip_shop_manager.py b/geoip_shop_manager.py index 62282e6..c123871 100644 --- a/geoip_shop_manager.py +++ b/geoip_shop_manager.py @@ -10,7 +10,7 @@ Supports three modes: - php-only: GeoIP blocking without CrowdSec - bot-only: Rate-limit bots, shop remains globally accessible -v3.4.5: Fix regex delimiter escape für curl pattern +v3.5.0: Erweiterte Bot-Erkennung mit 300+ Bots und generischen Fallback-Patterns """ import os @@ -94,27 +94,394 @@ GEO_REGIONS = { # BOT DETECTION # ============================================================================= BOT_PATTERNS = { - 'GPTBot': r'GPTBot', 'OAI-SearchBot': r'OAI-SearchBot', 'ChatGPT-User': r'ChatGPT-User', - 'ClaudeBot': r'ClaudeBot', 'Claude-User': r'Claude-User', 'anthropic-ai': r'anthropic-ai', - 'Googlebot': r'Googlebot', 'Google-Extended': r'Google-Extended', 'AdsBot-Google': r'AdsBot-Google', - 'Bingbot': r'[Bb]ingbot', 'BingPreview': r'BingPreview', 'msnbot': r'msnbot', - 'PerplexityBot': r'PerplexityBot', 'Applebot': r'Applebot', 'Amazonbot': r'Amazonbot', - 'FacebookBot': r'facebookexternalhit|FacebookBot', 'Bytespider': r'Bytespider', - 'DuckDuckBot': r'DuckDuckBot', 'YandexBot': r'YandexBot', 'Baiduspider': r'Baiduspider', - 'AhrefsBot': r'AhrefsBot', 'SemrushBot': r'SemrushBot', 'MJ12bot': r'MJ12bot', - 'DotBot': r'DotBot', 'PetalBot': r'PetalBot', 'DataForSeoBot': r'DataForSeoBot', - 'LinkedInBot': r'LinkedInBot', 'Twitterbot': r'Twitterbot', 'Slackbot': r'Slackbot', - 'UptimeRobot': r'UptimeRobot', 'Pingdom': r'Pingdom', 'curl': r'^curl/', - 'python-requests': r'python-requests', 'Wget': r'Wget', 'Scrapy': r'Scrapy', + # ========================================================================= + # AI/LLM SERVICES + # ========================================================================= + 'ChatGPT-User': r'chatgpt-user', + 'ChatGPT-Operator': r'chatgpt-operator', + 'ChatGPT-Agent': r'chatgpt agent', + 'ChatGPT': r'chatgpt', + 'GPTBot (OpenAI)': r'gptbot', + 'OAI-SearchBot (OpenAI)': r'oai-searchbot', + 'OpenAI': r'openai', + 'ClaudeBot (Anthropic)': r'claudebot', + 'Claude-User': r'claude-user', + 'Claude-Web': r'claude-web', + 'Claude-SearchBot': r'claude-searchbot', + 'Anthropic-AI': r'anthropic-ai', + 'Anthropic': r'anthropic', + 'Gemini-Deep-Research': r'gemini-deep-research', + 'Google-NotebookLM': r'google-notebooklm', + 'NotebookLM': r'notebooklm', + 'GoogleAgent-Mariner': r'googleagent-mariner', + 'PerplexityBot': r'perplexitybot', + 'Perplexity-User': r'perplexity-user', + 'Perplexity': r'perplexity', + 'Cohere-AI': r'cohere-ai', + 'Cohere-Training-Crawler': r'cohere-training-data-crawler', + 'Cohere': r'cohere', + 'MistralAI-User': r'mistralai-user', + 'MistralAI': r'mistralai', + 'Mistral': r'mistral', + 'DeepSeekBot': r'deepseekbot', + 'DeepSeek': r'deepseek', + 'Bytespider (TikTok/ByteDance)': r'bytespider', + 'TikTokSpider': r'tiktokspider', + 'ByteDance': r'bytedance', + 'AI2Bot-DeepResearchEval': r'ai2bot-deepresearcheval', + 'AI2Bot-Dolma': r'ai2bot-dolma', + 'AI2Bot (Allen Institute)': r'ai2bot', + 'CCBot (Common Crawl)': r'ccbot', + 'Diffbot': r'diffbot', + 'img2dataset': r'img2dataset', + 'LAIONDownloader': r'laiondownloader', + 'LAION-HuggingFace': r'laion-huggingface', + 'LAION': r'laion', + 'HuggingFace': r'huggingface', + 'BedrockBot (AWS)': r'bedrockbot', + 'DuckAssistBot': r'duckassistbot', + 'PhindBot': r'phindbot', + 'YouBot': r'youbot', + 'iAskSpider': r'iaskspider', + 'iAskBot': r'iaskbot', + 'ChatGLM-Spider': r'chatglm-spider', + 'Panscient': r'panscient', + 'Devin (Cognition)': r'devin', + 'Manus-User': r'manus-user', + 'TwinAgent': r'twinagent', + 'NovaAct': r'novaact', + 'FirecrawlAgent': r'firecrawlagent', + 'Firecrawl': r'firecrawl', + 'Crawl4AI': r'crawl4ai', + 'Crawlspace': r'crawlspace', + 'Cloudflare-AutoRAG': r'cloudflare-autorag', + 'TerraCotta': r'terracotta', + 'Thinkbot': r'thinkbot', + # ========================================================================= + # SUCHMASCHINEN + # ========================================================================= + 'Googlebot-Image': r'googlebot-image', + 'Googlebot-Video': r'googlebot-video', + 'Googlebot-News': r'googlebot-news', + 'Googlebot-Discovery': r'googlebot-discovery', + 'Googlebot': r'googlebot', + 'Google-Extended': r'google-extended', + 'Google-CloudVertexBot': r'google-cloudvertexbot', + 'Google-Firebase': r'google-firebase', + 'Google-InspectionTool': r'google-inspectiontool', + 'GoogleOther-Image': r'googleother-image', + 'GoogleOther-Video': r'googleother-video', + 'GoogleOther': r'googleother', + 'Storebot-Google': r'storebot-google', + 'AdsBot-Google': r'adsbot-google', + 'Bingbot (Microsoft)': r'bingbot', + 'BingPreview': r'bingpreview', + 'MSNBot': r'msnbot', + 'Baiduspider': r'baiduspider', + 'Baidu': r'baidu', + 'YandexBot': r'yandexbot', + 'YandexAdditionalBot': r'yandexadditionalbot', + 'YandexAdditional': r'yandexadditional', + 'Yandex': r'yandex', + 'DuckDuckBot': r'duckduckbot', + 'DuckDuckGo': r'duckduckgo', + 'Applebot-Extended': r'applebot-extended', + 'Applebot': r'applebot', + 'Yahoo Slurp': r'slurp', + 'Sogou': r'sogou', + 'Sosospider': r'sosospider', + 'NaverBot': r'naverbot', + 'Naver': r'naver', + 'SeznamBot': r'seznambot', + 'MojeekBot': r'mojeekbot', + 'QwantBot': r'qwantbot', + 'PetalBot (Huawei)': r'petalbot', + 'CocCocBot': r'coccocbot', + 'Exabot': r'exabot', + 'BraveBot': r'bravebot', + 'Bravest': r'bravest', + 'SeekportBot': r'seekportbot', + # ========================================================================= + # SEO & MARKETING TOOLS + # ========================================================================= + 'AhrefsBot': r'ahrefsbot', + 'Ahrefs': r'ahrefs', + 'SemrushBot-OCOB': r'semrushbot-ocob', + 'SemrushBot-SWA': r'semrushbot-swa', + 'SemrushBot': r'semrushbot', + 'Semrush': r'semrush', + 'MJ12Bot (Majestic)': r'mj12bot', + 'Majestic': r'majestic', + 'DotBot (Moz)': r'dotbot', + 'RogerBot (Moz)': r'rogerbot', + 'Screaming Frog': r'screaming frog', + 'BLEXBot': r'blexbot', + 'DataForSEOBot': r'dataforseobot', + 'Linkdex': r'linkdex', + 'SearchmetricsBot': r'searchmetricsbot', + # ========================================================================= + # SOCIAL MEDIA + # ========================================================================= + 'Facebook External Hit': r'facebookexternalhit', + 'FacebookBot': r'facebookbot', + 'Facebot': r'facebot', + 'Meta-ExternalAgent': r'meta-externalagent', + 'Meta-ExternalFetcher': r'meta-externalfetcher', + 'Meta-WebIndexer': r'meta-webindexer', + 'Facebook': r'facebook', + 'Twitterbot': r'twitterbot', + 'Twitter': r'twitter', + 'Instagram': r'instagram', + 'LinkedInBot': r'linkedinbot', + 'LinkedIn': r'linkedin', + 'Pinterestbot': r'pinterestbot', + 'Pinterest': r'pinterest', + 'WhatsApp': r'whatsapp', + 'TelegramBot': r'telegrambot', + 'Telegram': r'telegram', + 'DiscordBot': r'discordbot', + 'Discord': r'discord', + 'Slackbot': r'slackbot', + 'Slack': r'slack', + 'Quora-Bot': r'quora-bot', + 'Snapchat': r'snapchat', + 'RedditBot': r'redditbot', + # ========================================================================= + # E-COMMERCE & PREISVERGLEICH + # ========================================================================= + 'Amazonbot': r'amazonbot', + 'Amazon-Kendra': r'amazon-kendra', + 'AmazonBuyForMe': r'amazonbuyforme', + 'AMZNKAssocBot': r'amznkassocbot', + 'GeedoShopProductFinder': r'geedoshopproductfinder', + 'Geedo': r'geedo', + 'ShopWiki': r'shopwiki', + 'PriceGrabber': r'pricegrabber', + 'Shopify': r'shopify', + 'Idealo': r'idealo', + 'Guenstiger.de': r'guenstiger', + 'Billiger.de': r'billiger', + 'Ladenzeile': r'ladenzeile', + 'Kelkoo': r'kelkoo', + 'PriceRunner': r'pricerunner', + # ========================================================================= + # ARCHIV & RESEARCH + # ========================================================================= + 'Archive.org Bot': r'archive\.org_bot|archive-org-bot', + 'Internet Archive': r'ia_archiver|ia-archiver', + 'Wayback Machine': r'wayback', + 'Heritrix': r'heritrix', + 'Apache Nutch': r'nutch', + 'Common Crawl': r'commoncrawl', + # ========================================================================= + # MONITORING & UPTIME + # ========================================================================= + 'UptimeRobot': r'uptimerobot', + 'Pingdom': r'pingdom', + 'StatusCake': r'statuscake', + 'Site24x7': r'site24x7', + 'NewRelic': r'newrelic', + 'Datadog': r'datadog', + 'GTmetrix': r'gtmetrix', + 'PageSpeed Insights': r'pagespeed', + 'Chrome Lighthouse': r'chrome-lighthouse', + # ========================================================================= + # DOWNLOAD & SCRAPER TOOLS + # ========================================================================= + 'HTTrack': r'httrack', + 'Teleport Pro': r'teleportpro|teleport pro', + 'Teleport': r'teleport', + 'GetRight': r'getright', + 'FlashGet': r'flashget', + 'LeechFTP': r'leechftp', + 'LeechGet': r'leechget', + 'Leech': r'leech', + 'Offline Explorer': r'offline explorer', + 'Offline Navigator': r'offline navigator', + 'Offline Tool': r'offline', + 'WebCopier': r'webcopier', + 'WebCopy': r'webcopy', + 'WebRipper': r'webripper', + 'WebReaper': r'webreaper', + 'WebStripper': r'webstripper', + 'WebSauger': r'websauger', + 'WebZIP': r'webzip', + 'WebWhacker': r'webwhacker', + 'WebBandit': r'webbandit', + 'SiteSucker': r'sitesucker', + 'SiteSnagger': r'sitesnagger', + 'BlackWidow': r'blackwidow', + 'Mass Downloader': r'mass downloader', + 'Download Demon': r'download demon', + 'Download Ninja': r'download ninja', + 'Download Master': r'download master', + 'FreshDownload': r'freshdownload', + 'SmartDownload': r'smartdownload', + 'RealDownload': r'realdownload', + 'StarDownloader': r'stardownloader', + 'Net Vampire': r'net vampire', + 'NetAnts': r'netants', + 'NetZIP': r'netzip', + 'Go!Zilla': r'go!zilla|gozilla', + 'Grabber': r'grabber', + 'PageGrabber': r'pagegrabber', + 'EirGrabber': r'eirgrabber', + 'EmailSiphon': r'emailsiphon', + 'EmailCollector': r'emailcollector', + 'EmailWolf': r'emailwolf', + 'Email Extractor': r'email extractor', + 'ExtractorPro': r'extractorpro', + 'HarvestMan': r'harvestman', + 'Harvest': r'harvest', + 'Collector': r'collector', + 'Vacuum': r'vacuum', + 'WebVac': r'webvac', + 'Zeus': r'zeus', + 'ScrapeBox': r'scrapebox', + 'Xenu Link Sleuth': r'xenu', + 'Larbin': r'larbin', + 'Grub': r'grub', + # ========================================================================= + # HTTP LIBRARIES & FRAMEWORKS + # ========================================================================= + 'Python-Requests': r'python-requests', + 'Python-urllib': r'python-urllib', + 'Python-HTTPX': r'python-httpx', + 'Python HTTP': r'python/', + 'aiohttp': r'aiohttp', + 'HTTPX': r'httpx/', + 'cURL': r'curl/|^curl', + 'Wget': r'wget/|^wget', + 'Go-HTTP-Client': r'go-http-client', + 'Go HTTP': r'go http|go-http', + 'Java HTTP Client': r'java/|java ', + 'Apache-HttpClient': r'apache-httpclient', + 'Jakarta Commons': r'jakarta', + 'Axios': r'axios/|axios', + 'Node-Fetch': r'node-fetch', + 'Got (Node.js)': r'got/', + 'libwww-perl': r'libwww-perl', + 'LWP (Perl)': r'lwp::|lwp/', + 'WWW-Mechanize': r'www-mechanize', + 'Mechanize': r'mechanize', + 'Scrapy': r'scrapy/|scrapy', + 'HTTP.rb': r'http\.rb', + 'Typhoeus': r'typhoeus', + 'OkHttp': r'okhttp/|okhttp', + 'CFNetwork': r'cfnetwork', + 'WinHTTP': r'winhttp', + 'Indy Library': r'indy library', + 'Chilkat': r'chilkat', + 'httplib': r'httplib', + 'ApacheBench': r'apachebench', + 'Guzzle (PHP)': r'guzzle', + 'Requests': r'requests/', + # ========================================================================= + # SECURITY SCANNER + # ========================================================================= + 'Nessus': r'nessus', + 'SQLMap': r'sqlmap', + 'Netsparker': r'netsparker', + 'Nikto': r'nikto', + 'Acunetix': r'acunetix', + 'Burp Suite': r'burpsuite|burp', + 'OWASP ZAP': r'owasp zap', + 'OpenVAS': r'openvas', + 'Nmap': r'nmap', + 'Masscan': r'masscan', + 'WPScan': r'wpscan', + # ========================================================================= + # HEADLESS BROWSERS & AUTOMATION + # ========================================================================= + 'PhantomJS': r'phantomjs', + 'Headless Chrome': r'headlesschrome', + 'Headless Browser': r'headless', + 'Selenium': r'selenium', + 'Puppeteer': r'puppeteer', + 'Playwright': r'playwright', + 'Cypress': r'cypress', + # ========================================================================= + # FEED READER & RSS + # ========================================================================= + 'FeedFetcher': r'feedfetcher', + 'FeedParser': r'feedparser', + 'Feedly': r'feedly', + 'Inoreader': r'inoreader', + 'NewsBlur': r'newsblur', + # ========================================================================= + # WEITERE BEKANNTE BOTS + # ========================================================================= + 'OmgiliBot': r'omgilibot', + 'Omgili': r'omgili', + 'Webzio-Extended': r'webzio-extended', + 'Webzio': r'webzio', + 'Timpibot': r'timpibot', + 'PanguBot': r'pangubot', + 'ImagesiftBot': r'imagesiftbot', + 'Kangaroo Bot': r'kangaroo bot', + 'QualifiedBot': r'qualifiedbot', + 'VelenPublicWebCrawler': r'velenpublicwebcrawler', + 'Linguee Bot': r'linguee bot', + 'Linguee': r'linguee', + 'QuillBot': r'quillbot', + 'TurnitinBot': r'turnitinbot', + 'Turnitin': r'turnitin', + 'ZanistaBot': r'zanistabot', + 'WRTNBot': r'wrtnbot', + 'WARDBot': r'wardbot', + 'ShapBot': r'shapbot', + 'LinerBot': r'linerbot', + 'LinkupBot': r'linkupbot', + 'KlaviyoAIBot': r'klaviyoaibot', + 'KunatoCrawler': r'kunatocrawler', + 'IbouBot': r'iboubot', + 'BuddyBot': r'buddybot', + 'BrightBot': r'brightbot', + 'Channel3Bot': r'channel3bot', + 'Andibot': r'andibot', + 'Anomura': r'anomura', + 'Awario': r'awario', + 'BigSur.ai': r'bigsur', + 'Cotoyogi': r'cotoyogi', + 'AddSearchBot': r'addsearchbot', + 'aiHitBot': r'aihitbot', + 'Atlassian-Bot': r'atlassian-bot', + 'RainBot': r'rainbot', + 'TinyTestBot': r'tinytestbot', + 'Brandwatch': r'brandwatch', + 'Meltwater': r'meltwater', + 'Netvibes': r'netvibes', + 'BitlyBot': r'bitlybot', + 'Mail.ru Bot': r'mail\.ru', + 'YaK': r'yak', } +# Generische Patterns (Fallback für unbekannte Bots) +GENERIC_BOT_PATTERNS = [ + 'bot', 'crawler', 'spider', 'scraper', 'fetch', 'scan', 'check', + 'monitor', 'probe', 'index', 'archive', 'capture', 'reader', + 'download', 'mirror', 'ripper', 'collector', 'extractor', 'siphon', + 'copier', 'sucker', 'bandit', 'stripper', 'whacker', 'reaper', + 'robot', 'agent', 'seeker', 'finder', 'walker', 'roam', 'snagger', +] + def detect_bot(user_agent): + """Erkennt Bots anhand des User-Agents. Gibt den Anzeigenamen zurück.""" if not user_agent or user_agent == 'Unknown': return 'Unbekannt' + + # Erst spezifische Patterns prüfen for bot_name, pattern in BOT_PATTERNS.items(): if re.search(pattern, user_agent, re.IGNORECASE): return bot_name + + # Dann generische Patterns als Fallback + ua_lower = user_agent.lower() + for pattern in GENERIC_BOT_PATTERNS: + if pattern in ua_lower: + return f'Bot ({pattern})' + return 'Unbekannt' @@ -1985,7 +2352,7 @@ def show_all_logs(): def main(): print("\n" + "=" * 60) - print(" GeoIP Shop Blocker Manager v3.4.4") + print(" GeoIP Shop Blocker Manager v3.5.0") print(" 🇩🇪🇦🇹🇨🇭 DACH | 🇪🇺 Eurozone+GB | 🤖 Bot-Rate-Limiting") print(" 🛡️ Mit Cache-Validierung und Fail-Open") print(" 🚦 Bots unter Rate-Limit werden durchgelassen")