diff --git a/geoip_shop_manager.py b/geoip_shop_manager.py index a69d154..0cb430d 100644 --- a/geoip_shop_manager.py +++ b/geoip_shop_manager.py @@ -11,6 +11,7 @@ import shutil import subprocess import json import time +import re from datetime import datetime, timedelta from pathlib import Path @@ -25,6 +26,152 @@ WATCHER_SCRIPT = "/usr/local/bin/geoip_crowdsec_watcher.py" SYSTEMD_SERVICE = "/etc/systemd/system/geoip-crowdsec-watcher.service" ACTIVE_SHOPS_FILE = "/var/lib/crowdsec/geoip_active_shops.json" +# ============================================================================= +# BOT DETECTION - Comprehensive list of known bots/crawlers +# ============================================================================= +BOT_PATTERNS = { + # OpenAI + 'GPTBot': r'GPTBot', + 'OAI-SearchBot': r'OAI-SearchBot', + 'ChatGPT-User': r'ChatGPT-User', + + # Anthropic (Claude) + 'ClaudeBot': r'ClaudeBot', + 'Claude-User': r'Claude-User', + 'Claude-SearchBot': r'Claude-SearchBot', + 'anthropic-ai': r'anthropic-ai', + 'claude-web': r'claude-web', + + # Google + 'Googlebot': r'Googlebot', + 'Google-Extended': r'Google-Extended', + 'Googlebot-Image': r'Googlebot-Image', + 'Googlebot-Video': r'Googlebot-Video', + 'Googlebot-News': r'Googlebot-News', + 'Gemini-Deep-Research': r'Gemini-Deep-Research', + 'Google-CloudVertexBot': r'Google-CloudVertexBot', + 'AdsBot-Google': r'AdsBot-Google', + 'Mediapartners-Google': r'Mediapartners-Google', + 'FeedFetcher-Google': r'FeedFetcher-Google', + 'Google-InspectionTool': r'Google-InspectionTool', + + # Microsoft/Bing + 'Bingbot': r'[Bb]ingbot', + 'BingPreview': r'BingPreview', + 'msnbot': r'msnbot', + 'AdIdxBot': r'AdIdxBot', + + # Perplexity + 'PerplexityBot': r'PerplexityBot', + 'Perplexity-User': r'Perplexity-User', + + # Apple + 'Applebot': r'Applebot', + 'Applebot-Extended': r'Applebot-Extended', + + # Amazon + 'Amazonbot': r'Amazonbot', + + # Meta/Facebook + 'FacebookBot': r'facebookexternalhit|FacebookBot', + 'meta-externalagent': r'meta-externalagent', + 'Meta-WebIndexer': r'Meta-WebIndexer', + + # ByteDance/TikTok + 'Bytespider': r'Bytespider', + + # DuckDuckGo + 'DuckDuckBot': r'DuckDuckBot', + 'DuckAssistBot': r'DuckAssistBot', + + # Other AI/LLM + 'cohere-ai': r'cohere-ai', + 'YouBot': r'YouBot', + 'MistralAI-User': r'MistralAI-User', + 'AI2Bot': r'AI2Bot', + 'CCBot': r'CCBot', + 'Diffbot': r'Diffbot', + 'Timpibot': r'Timpibot', + 'omgili': r'omgili', + 'webzio': r'webzio', + 'ICC-Crawler': r'ICC-Crawler', + + # SEO Tools + 'AhrefsBot': r'AhrefsBot', + 'SemrushBot': r'SemrushBot', + 'MJ12bot': r'MJ12bot', + 'DotBot': r'DotBot', + 'BLEXBot': r'BLEXBot', + 'DataForSeoBot': r'DataForSeoBot', + 'SEOkicks': r'SEOkicks', + 'seoscanners': r'seoscanners', + 'Screaming Frog': r'Screaming Frog', + 'Sistrix': r'Sistrix', + 'JEEC2Bot': r'JEEC2Bot', + + # Other Search Engines + 'YandexBot': r'YandexBot', + 'YandexImages': r'YandexImages', + 'Baiduspider': r'Baiduspider', + 'PetalBot': r'PetalBot', + 'Sogou': r'Sogou', + 'Qwantify': r'Qwantify', + 'ia_archiver': r'ia_archiver', + + # Social Media + 'LinkedInBot': r'LinkedInBot', + 'Twitterbot': r'Twitterbot', + 'Pinterest': r'Pinterest', + 'Slackbot': r'Slackbot', + 'TelegramBot': r'TelegramBot', + 'WhatsApp': r'WhatsApp', + 'Discordbot': r'Discordbot', + + # Monitoring & Security + 'UptimeRobot': r'UptimeRobot', + 'Pingdom': r'Pingdom', + 'StatusCake': r'StatusCake', + 'GTmetrix': r'GTmetrix', + 'Site24x7': r'Site24x7', + + # Payment/E-Commerce + 'PayPal IPN': r'PayPal', + 'Stripe': r'Stripe', + 'Shopify': r'Shopify', + + # Feed Readers + 'Feedly': r'Feedly', + 'NewsBlur': r'NewsBlur', + + # Other known bots + 'SeznamBot': r'SeznamBot', + 'Exabot': r'Exabot', + 'archive.org_bot': r'archive\.org_bot', + 'Wget': r'Wget', + 'curl': r'^curl/', + 'python-requests': r'python-requests', + 'Go-http-client': r'Go-http-client', + 'Java': r'^Java/', + 'Apache-HttpClient': r'Apache-HttpClient', + 'okhttp': r'okhttp', + 'HeadlessChrome': r'HeadlessChrome', + 'PhantomJS': r'PhantomJS', + 'Scrapy': r'Scrapy', +} + + +def detect_bot(user_agent): + """Detect bot name from user agent string""" + if not user_agent or user_agent == 'Unknown': + return 'Unbekannt' + + for bot_name, pattern in BOT_PATTERNS.items(): + if re.search(pattern, user_agent, re.IGNORECASE): + return bot_name + + return 'Unbekannt' + + # PHP GeoIP blocking script (no exec, just logging) GEOIP_SCRIPT = ''' {'count': N, 'ua': user_agent} if os.path.isfile(log_file): with open(log_file, 'r') as f: for line in f: php_blocks += 1 - # Extract IP from log line + # Extract IP and User-Agent from log line + # Format: [timestamp] IP: x.x.x.x | UA: user_agent | URI: /path + ip = None + ua = 'Unknown' + if 'IP: ' in line: try: ip = line.split('IP: ')[1].split(' |')[0].strip() - ips[ip] = ips.get(ip, 0) + 1 except: pass + + if 'UA: ' in line: + try: + ua = line.split('UA: ')[1].split(' |')[0].strip() + except: + pass + + if ip: + if ip not in ips: + ips[ip] = {'count': 0, 'ua': ua} + ips[ip]['count'] += 1 + # Update UA if we have a better one (not Unknown) + if ua != 'Unknown' and ips[ip]['ua'] == 'Unknown': + ips[ip]['ua'] = ua - return php_blocks, ips + # Get activation time + activation_time = get_shop_activation_time(shop) + + return php_blocks, ips, activation_time def get_crowdsec_stats_by_shop(): @@ -1091,34 +1286,64 @@ def show_all_logs(): print("\n⚠️ Keine aktiven Shops") return - print(f"\n{'═' * 60}") + print(f"\n{'═' * 70}") print(" 📊 GESAMTÜBERSICHT ALLER SHOPS") - print(f"{'═' * 60}") + print(f"{'═' * 70}") total_php_blocks = 0 - shop_php_stats = {} - all_ips = {} + shop_php_stats = {} # shop -> {'blocks': N, 'activation': datetime, 'req_min': float} + all_ips = {} # ip -> {'count': N, 'ua': user_agent} + total_minutes = 0 # Collect PHP stats for shop in active_shops: - blocks, ips = get_shop_log_stats(shop) + blocks, ips, activation_time = get_shop_log_stats(shop) total_php_blocks += blocks - shop_php_stats[shop] = blocks - for ip, count in ips.items(): - all_ips[ip] = all_ips.get(ip, 0) + count + # Calculate runtime and req/min + if activation_time: + runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60 + req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0 + else: + runtime_minutes = 0 + req_min = 0 + + shop_php_stats[shop] = { + 'blocks': blocks, + 'activation': activation_time, + 'runtime_minutes': runtime_minutes, + 'req_min': req_min + } + + if runtime_minutes > total_minutes: + total_minutes = runtime_minutes + + for ip, data in ips.items(): + if ip not in all_ips: + all_ips[ip] = {'count': 0, 'ua': data['ua']} + all_ips[ip]['count'] += data['count'] + # Keep the most informative UA + if data['ua'] != 'Unknown' and all_ips[ip]['ua'] == 'Unknown': + all_ips[ip]['ua'] = data['ua'] + + # Calculate total req/min + total_req_min = total_php_blocks / total_minutes if total_minutes > 0 else 0 # Get CrowdSec stats crowdsec_stats = get_crowdsec_stats_by_shop() total_crowdsec = sum(crowdsec_stats.values()) - # Display PHP blocks - print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks}") + # Display PHP blocks with req/min + print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks} (⌀ {total_req_min:.1f} req/min, Laufzeit: {format_duration(total_minutes)})") if shop_php_stats: for shop in sorted(shop_php_stats.keys()): - count = shop_php_stats[shop] - bar = "█" * min(count // 10, 20) if count > 0 else "" - print(f" ├─ {shop}: {count} {bar}") + stats = shop_php_stats[shop] + count = stats['blocks'] + req_min = stats['req_min'] + runtime = stats['runtime_minutes'] + bar = "█" * min(int(req_min * 2), 20) if req_min > 0 else "" + runtime_str = format_duration(runtime) if runtime > 0 else "?" + print(f" ├─ {shop}: {count} ({req_min:.1f} req/min, seit {runtime_str}) {bar}") # Display CrowdSec bans print(f"\n🛡️ CrowdSec-Bans gesamt: {total_crowdsec}") @@ -1132,15 +1357,18 @@ def show_all_logs(): else: print(" └─ CrowdSec nicht verfügbar") - # Top blocked IPs + # Top blocked IPs with bot detection if all_ips: print(f"\n🔥 Top 100 blockierte IPs (alle Shops):") - sorted_ips = sorted(all_ips.items(), key=lambda x: x[1], reverse=True)[:100] - for ip, count in sorted_ips: + sorted_ips = sorted(all_ips.items(), key=lambda x: x[1]['count'], reverse=True)[:100] + for ip, data in sorted_ips: + count = data['count'] + ua = data['ua'] + bot_name = detect_bot(ua) bar = "█" * min(count // 5, 20) if count > 0 else "█" - print(f" {ip}: {count} {bar}") + print(f" {ip} ({bot_name}): {count} {bar}") - print(f"\n{'═' * 60}") + print(f"\n{'═' * 70}") # Wait for user input("\nDrücke Enter um fortzufahren...") @@ -1152,25 +1380,57 @@ def show_logs(shop): log_file = os.path.join(httpdocs, LOG_FILE) shop_mode = get_shop_mode(shop) + # Get stats + blocks, ips, activation_time = get_shop_log_stats(shop) + + # Calculate runtime and req/min + if activation_time: + runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60 + req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0 + runtime_str = format_duration(runtime_minutes) + activation_str = activation_time.strftime('%Y-%m-%d %H:%M:%S') + else: + runtime_minutes = 0 + req_min = 0 + runtime_str = "unbekannt" + activation_str = "unbekannt" + mode_display = "PHP + CrowdSec 🛡️" if shop_mode == "php+crowdsec" else "Nur PHP 📝" - print(f"\n📊 Logs für {shop} [{mode_display}]") + + print(f"\n{'═' * 70}") + print(f"📊 Logs für {shop} [{mode_display}]") + print(f"{'═' * 70}") + print(f"\n⏱️ Aktiviert: {activation_str}") + print(f"⏱️ Laufzeit: {runtime_str}") + print(f"📈 Blocks: {blocks} ({req_min:.1f} req/min)") if os.path.isfile(log_file): - print(f"\n📝 PHP-Blocks:") - print("=" * 80) + print(f"\n📝 Letzte 50 PHP-Blocks:") + print("=" * 70) with open(log_file, 'r') as f: lines = f.readlines() for line in lines[-50:]: print(line.rstrip()) - print("=" * 80) + print("=" * 70) print(f"Gesamt: {len(lines)}") + + # Show top IPs with bot detection + if ips: + print(f"\n🔥 Top 20 blockierte IPs:") + sorted_ips = sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:20] + for ip, data in sorted_ips: + count = data['count'] + ua = data['ua'] + bot_name = detect_bot(ua) + bar = "█" * min(count // 5, 20) if count > 0 else "█" + print(f" {ip} ({bot_name}): {count} {bar}") else: - print(f"ℹ️ Keine PHP-Logs für {shop}") + print(f"\nℹ️ Keine PHP-Logs für {shop}") # Only show CrowdSec decisions if mode is php+crowdsec if shop_mode == "php+crowdsec" and check_crowdsec(): print(f"\n🛡️ CrowdSec Decisions:") - print("=" * 80) + print("=" * 70) # Use raw output with --limit 0 (no pagination) code, stdout, _ = run_command("cscli decisions list -o raw --limit 0") @@ -1207,7 +1467,7 @@ def show_logs(shop): else: print("Konnte Decisions nicht abrufen") - print("=" * 80) + print("=" * 70) elif shop_mode == "php-only": print(f"\n📝 CrowdSec-Synchronisation ist für diesen Shop deaktiviert (PHP-only Modus)") @@ -1348,7 +1608,18 @@ def main(): mode = get_shop_mode(shop) mode_icon = "🛡️" if mode == "php+crowdsec" else "📝" mode_text = "PHP+CS" if mode == "php+crowdsec" else "PHP" - print(f" ✓ {shop} [{mode_text}] {mode_icon}") + + # Get stats + blocks, _, activation_time = get_shop_log_stats(shop) + if activation_time: + runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60 + req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0 + runtime_str = format_duration(runtime_minutes) + else: + req_min = 0 + runtime_str = "?" + + print(f" ✓ {shop} [{mode_text}] {mode_icon} - {blocks} blocks ({req_min:.1f} req/min, {runtime_str})") elif choice == "5": activate_all_shops()