geoip_shop_manager.py aktualisiert

This commit is contained in:
2025-12-09 07:31:18 +01:00
parent b162c1655f
commit 2c7d40fef1

View File

@@ -11,6 +11,7 @@ import shutil
import subprocess import subprocess
import json import json
import time import time
import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
@@ -25,6 +26,152 @@ WATCHER_SCRIPT = "/usr/local/bin/geoip_crowdsec_watcher.py"
SYSTEMD_SERVICE = "/etc/systemd/system/geoip-crowdsec-watcher.service" SYSTEMD_SERVICE = "/etc/systemd/system/geoip-crowdsec-watcher.service"
ACTIVE_SHOPS_FILE = "/var/lib/crowdsec/geoip_active_shops.json" ACTIVE_SHOPS_FILE = "/var/lib/crowdsec/geoip_active_shops.json"
# =============================================================================
# BOT DETECTION - Comprehensive list of known bots/crawlers
# =============================================================================
BOT_PATTERNS = {
# OpenAI
'GPTBot': r'GPTBot',
'OAI-SearchBot': r'OAI-SearchBot',
'ChatGPT-User': r'ChatGPT-User',
# Anthropic (Claude)
'ClaudeBot': r'ClaudeBot',
'Claude-User': r'Claude-User',
'Claude-SearchBot': r'Claude-SearchBot',
'anthropic-ai': r'anthropic-ai',
'claude-web': r'claude-web',
# Google
'Googlebot': r'Googlebot',
'Google-Extended': r'Google-Extended',
'Googlebot-Image': r'Googlebot-Image',
'Googlebot-Video': r'Googlebot-Video',
'Googlebot-News': r'Googlebot-News',
'Gemini-Deep-Research': r'Gemini-Deep-Research',
'Google-CloudVertexBot': r'Google-CloudVertexBot',
'AdsBot-Google': r'AdsBot-Google',
'Mediapartners-Google': r'Mediapartners-Google',
'FeedFetcher-Google': r'FeedFetcher-Google',
'Google-InspectionTool': r'Google-InspectionTool',
# Microsoft/Bing
'Bingbot': r'[Bb]ingbot',
'BingPreview': r'BingPreview',
'msnbot': r'msnbot',
'AdIdxBot': r'AdIdxBot',
# Perplexity
'PerplexityBot': r'PerplexityBot',
'Perplexity-User': r'Perplexity-User',
# Apple
'Applebot': r'Applebot',
'Applebot-Extended': r'Applebot-Extended',
# Amazon
'Amazonbot': r'Amazonbot',
# Meta/Facebook
'FacebookBot': r'facebookexternalhit|FacebookBot',
'meta-externalagent': r'meta-externalagent',
'Meta-WebIndexer': r'Meta-WebIndexer',
# ByteDance/TikTok
'Bytespider': r'Bytespider',
# DuckDuckGo
'DuckDuckBot': r'DuckDuckBot',
'DuckAssistBot': r'DuckAssistBot',
# Other AI/LLM
'cohere-ai': r'cohere-ai',
'YouBot': r'YouBot',
'MistralAI-User': r'MistralAI-User',
'AI2Bot': r'AI2Bot',
'CCBot': r'CCBot',
'Diffbot': r'Diffbot',
'Timpibot': r'Timpibot',
'omgili': r'omgili',
'webzio': r'webzio',
'ICC-Crawler': r'ICC-Crawler',
# SEO Tools
'AhrefsBot': r'AhrefsBot',
'SemrushBot': r'SemrushBot',
'MJ12bot': r'MJ12bot',
'DotBot': r'DotBot',
'BLEXBot': r'BLEXBot',
'DataForSeoBot': r'DataForSeoBot',
'SEOkicks': r'SEOkicks',
'seoscanners': r'seoscanners',
'Screaming Frog': r'Screaming Frog',
'Sistrix': r'Sistrix',
'JEEC2Bot': r'JEEC2Bot',
# Other Search Engines
'YandexBot': r'YandexBot',
'YandexImages': r'YandexImages',
'Baiduspider': r'Baiduspider',
'PetalBot': r'PetalBot',
'Sogou': r'Sogou',
'Qwantify': r'Qwantify',
'ia_archiver': r'ia_archiver',
# Social Media
'LinkedInBot': r'LinkedInBot',
'Twitterbot': r'Twitterbot',
'Pinterest': r'Pinterest',
'Slackbot': r'Slackbot',
'TelegramBot': r'TelegramBot',
'WhatsApp': r'WhatsApp',
'Discordbot': r'Discordbot',
# Monitoring & Security
'UptimeRobot': r'UptimeRobot',
'Pingdom': r'Pingdom',
'StatusCake': r'StatusCake',
'GTmetrix': r'GTmetrix',
'Site24x7': r'Site24x7',
# Payment/E-Commerce
'PayPal IPN': r'PayPal',
'Stripe': r'Stripe',
'Shopify': r'Shopify',
# Feed Readers
'Feedly': r'Feedly',
'NewsBlur': r'NewsBlur',
# Other known bots
'SeznamBot': r'SeznamBot',
'Exabot': r'Exabot',
'archive.org_bot': r'archive\.org_bot',
'Wget': r'Wget',
'curl': r'^curl/',
'python-requests': r'python-requests',
'Go-http-client': r'Go-http-client',
'Java': r'^Java/',
'Apache-HttpClient': r'Apache-HttpClient',
'okhttp': r'okhttp',
'HeadlessChrome': r'HeadlessChrome',
'PhantomJS': r'PhantomJS',
'Scrapy': r'Scrapy',
}
def detect_bot(user_agent):
"""Detect bot name from user agent string"""
if not user_agent or user_agent == 'Unknown':
return 'Unbekannt'
for bot_name, pattern in BOT_PATTERNS.items():
if re.search(pattern, user_agent, re.IGNORECASE):
return bot_name
return 'Unbekannt'
# PHP GeoIP blocking script (no exec, just logging) # PHP GeoIP blocking script (no exec, just logging)
GEOIP_SCRIPT = '''<?php GEOIP_SCRIPT = '''<?php
/** /**
@@ -488,6 +635,34 @@ def get_shop_mode(shop):
return "php+crowdsec" return "php+crowdsec"
def get_shop_activation_time(shop):
"""Get the activation timestamp for a shop"""
if not os.path.isfile(ACTIVE_SHOPS_FILE):
return None
try:
with open(ACTIVE_SHOPS_FILE, 'r') as f:
shops = json.load(f)
activated_str = shops.get(shop, {}).get("activated")
if activated_str:
return datetime.fromisoformat(activated_str)
except:
pass
return None
def format_duration(minutes):
"""Format minutes as human readable duration"""
if minutes < 60:
return f"{int(minutes)}m"
hours = minutes / 60
if hours < 24:
return f"{int(hours)}h {int(minutes % 60)}m"
days = hours / 24
remaining_hours = hours % 24
return f"{int(days)}d {int(remaining_hours)}h"
def remove_shop_from_active(shop): def remove_shop_from_active(shop):
"""Remove shop from active shops tracking""" """Remove shop from active shops tracking"""
if not os.path.isfile(ACTIVE_SHOPS_FILE): if not os.path.isfile(ACTIVE_SHOPS_FILE):
@@ -1041,26 +1216,46 @@ def deactivate_all_shops():
def get_shop_log_stats(shop): def get_shop_log_stats(shop):
"""Get log statistics for a single shop""" """Get log statistics for a single shop including user agents"""
httpdocs = os.path.join(VHOSTS_DIR, shop, 'httpdocs') httpdocs = os.path.join(VHOSTS_DIR, shop, 'httpdocs')
log_file = os.path.join(httpdocs, LOG_FILE) log_file = os.path.join(httpdocs, LOG_FILE)
php_blocks = 0 php_blocks = 0
ips = {} ips = {} # ip -> {'count': N, 'ua': user_agent}
if os.path.isfile(log_file): if os.path.isfile(log_file):
with open(log_file, 'r') as f: with open(log_file, 'r') as f:
for line in f: for line in f:
php_blocks += 1 php_blocks += 1
# Extract IP from log line # Extract IP and User-Agent from log line
# Format: [timestamp] IP: x.x.x.x | UA: user_agent | URI: /path
ip = None
ua = 'Unknown'
if 'IP: ' in line: if 'IP: ' in line:
try: try:
ip = line.split('IP: ')[1].split(' |')[0].strip() ip = line.split('IP: ')[1].split(' |')[0].strip()
ips[ip] = ips.get(ip, 0) + 1
except: except:
pass pass
if 'UA: ' in line:
try:
ua = line.split('UA: ')[1].split(' |')[0].strip()
except:
pass
if ip:
if ip not in ips:
ips[ip] = {'count': 0, 'ua': ua}
ips[ip]['count'] += 1
# Update UA if we have a better one (not Unknown)
if ua != 'Unknown' and ips[ip]['ua'] == 'Unknown':
ips[ip]['ua'] = ua
return php_blocks, ips # Get activation time
activation_time = get_shop_activation_time(shop)
return php_blocks, ips, activation_time
def get_crowdsec_stats_by_shop(): def get_crowdsec_stats_by_shop():
@@ -1091,34 +1286,64 @@ def show_all_logs():
print("\n⚠️ Keine aktiven Shops") print("\n⚠️ Keine aktiven Shops")
return return
print(f"\n{'' * 60}") print(f"\n{'' * 70}")
print(" 📊 GESAMTÜBERSICHT ALLER SHOPS") print(" 📊 GESAMTÜBERSICHT ALLER SHOPS")
print(f"{'' * 60}") print(f"{'' * 70}")
total_php_blocks = 0 total_php_blocks = 0
shop_php_stats = {} shop_php_stats = {} # shop -> {'blocks': N, 'activation': datetime, 'req_min': float}
all_ips = {} all_ips = {} # ip -> {'count': N, 'ua': user_agent}
total_minutes = 0
# Collect PHP stats # Collect PHP stats
for shop in active_shops: for shop in active_shops:
blocks, ips = get_shop_log_stats(shop) blocks, ips, activation_time = get_shop_log_stats(shop)
total_php_blocks += blocks total_php_blocks += blocks
shop_php_stats[shop] = blocks
for ip, count in ips.items(): # Calculate runtime and req/min
all_ips[ip] = all_ips.get(ip, 0) + count if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
else:
runtime_minutes = 0
req_min = 0
shop_php_stats[shop] = {
'blocks': blocks,
'activation': activation_time,
'runtime_minutes': runtime_minutes,
'req_min': req_min
}
if runtime_minutes > total_minutes:
total_minutes = runtime_minutes
for ip, data in ips.items():
if ip not in all_ips:
all_ips[ip] = {'count': 0, 'ua': data['ua']}
all_ips[ip]['count'] += data['count']
# Keep the most informative UA
if data['ua'] != 'Unknown' and all_ips[ip]['ua'] == 'Unknown':
all_ips[ip]['ua'] = data['ua']
# Calculate total req/min
total_req_min = total_php_blocks / total_minutes if total_minutes > 0 else 0
# Get CrowdSec stats # Get CrowdSec stats
crowdsec_stats = get_crowdsec_stats_by_shop() crowdsec_stats = get_crowdsec_stats_by_shop()
total_crowdsec = sum(crowdsec_stats.values()) total_crowdsec = sum(crowdsec_stats.values())
# Display PHP blocks # Display PHP blocks with req/min
print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks}") print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks} (⌀ {total_req_min:.1f} req/min, Laufzeit: {format_duration(total_minutes)})")
if shop_php_stats: if shop_php_stats:
for shop in sorted(shop_php_stats.keys()): for shop in sorted(shop_php_stats.keys()):
count = shop_php_stats[shop] stats = shop_php_stats[shop]
bar = "" * min(count // 10, 20) if count > 0 else "" count = stats['blocks']
print(f" ├─ {shop}: {count} {bar}") req_min = stats['req_min']
runtime = stats['runtime_minutes']
bar = "" * min(int(req_min * 2), 20) if req_min > 0 else ""
runtime_str = format_duration(runtime) if runtime > 0 else "?"
print(f" ├─ {shop}: {count} ({req_min:.1f} req/min, seit {runtime_str}) {bar}")
# Display CrowdSec bans # Display CrowdSec bans
print(f"\n🛡️ CrowdSec-Bans gesamt: {total_crowdsec}") print(f"\n🛡️ CrowdSec-Bans gesamt: {total_crowdsec}")
@@ -1132,15 +1357,18 @@ def show_all_logs():
else: else:
print(" └─ CrowdSec nicht verfügbar") print(" └─ CrowdSec nicht verfügbar")
# Top blocked IPs # Top blocked IPs with bot detection
if all_ips: if all_ips:
print(f"\n🔥 Top 100 blockierte IPs (alle Shops):") print(f"\n🔥 Top 100 blockierte IPs (alle Shops):")
sorted_ips = sorted(all_ips.items(), key=lambda x: x[1], reverse=True)[:100] sorted_ips = sorted(all_ips.items(), key=lambda x: x[1]['count'], reverse=True)[:100]
for ip, count in sorted_ips: for ip, data in sorted_ips:
count = data['count']
ua = data['ua']
bot_name = detect_bot(ua)
bar = "" * min(count // 5, 20) if count > 0 else "" bar = "" * min(count // 5, 20) if count > 0 else ""
print(f" {ip}: {count} {bar}") print(f" {ip} ({bot_name}): {count} {bar}")
print(f"\n{'' * 60}") print(f"\n{'' * 70}")
# Wait for user # Wait for user
input("\nDrücke Enter um fortzufahren...") input("\nDrücke Enter um fortzufahren...")
@@ -1152,25 +1380,57 @@ def show_logs(shop):
log_file = os.path.join(httpdocs, LOG_FILE) log_file = os.path.join(httpdocs, LOG_FILE)
shop_mode = get_shop_mode(shop) shop_mode = get_shop_mode(shop)
# Get stats
blocks, ips, activation_time = get_shop_log_stats(shop)
# Calculate runtime and req/min
if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
runtime_str = format_duration(runtime_minutes)
activation_str = activation_time.strftime('%Y-%m-%d %H:%M:%S')
else:
runtime_minutes = 0
req_min = 0
runtime_str = "unbekannt"
activation_str = "unbekannt"
mode_display = "PHP + CrowdSec 🛡️" if shop_mode == "php+crowdsec" else "Nur PHP 📝" mode_display = "PHP + CrowdSec 🛡️" if shop_mode == "php+crowdsec" else "Nur PHP 📝"
print(f"\n📊 Logs für {shop} [{mode_display}]")
print(f"\n{'' * 70}")
print(f"📊 Logs für {shop} [{mode_display}]")
print(f"{'' * 70}")
print(f"\n⏱️ Aktiviert: {activation_str}")
print(f"⏱️ Laufzeit: {runtime_str}")
print(f"📈 Blocks: {blocks} ({req_min:.1f} req/min)")
if os.path.isfile(log_file): if os.path.isfile(log_file):
print(f"\n📝 PHP-Blocks:") print(f"\n📝 Letzte 50 PHP-Blocks:")
print("=" * 80) print("=" * 70)
with open(log_file, 'r') as f: with open(log_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
for line in lines[-50:]: for line in lines[-50:]:
print(line.rstrip()) print(line.rstrip())
print("=" * 80) print("=" * 70)
print(f"Gesamt: {len(lines)}") print(f"Gesamt: {len(lines)}")
# Show top IPs with bot detection
if ips:
print(f"\n🔥 Top 20 blockierte IPs:")
sorted_ips = sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:20]
for ip, data in sorted_ips:
count = data['count']
ua = data['ua']
bot_name = detect_bot(ua)
bar = "" * min(count // 5, 20) if count > 0 else ""
print(f" {ip} ({bot_name}): {count} {bar}")
else: else:
print(f" Keine PHP-Logs für {shop}") print(f"\n Keine PHP-Logs für {shop}")
# Only show CrowdSec decisions if mode is php+crowdsec # Only show CrowdSec decisions if mode is php+crowdsec
if shop_mode == "php+crowdsec" and check_crowdsec(): if shop_mode == "php+crowdsec" and check_crowdsec():
print(f"\n🛡️ CrowdSec Decisions:") print(f"\n🛡️ CrowdSec Decisions:")
print("=" * 80) print("=" * 70)
# Use raw output with --limit 0 (no pagination) # Use raw output with --limit 0 (no pagination)
code, stdout, _ = run_command("cscli decisions list -o raw --limit 0") code, stdout, _ = run_command("cscli decisions list -o raw --limit 0")
@@ -1207,7 +1467,7 @@ def show_logs(shop):
else: else:
print("Konnte Decisions nicht abrufen") print("Konnte Decisions nicht abrufen")
print("=" * 80) print("=" * 70)
elif shop_mode == "php-only": elif shop_mode == "php-only":
print(f"\n📝 CrowdSec-Synchronisation ist für diesen Shop deaktiviert (PHP-only Modus)") print(f"\n📝 CrowdSec-Synchronisation ist für diesen Shop deaktiviert (PHP-only Modus)")
@@ -1348,7 +1608,18 @@ def main():
mode = get_shop_mode(shop) mode = get_shop_mode(shop)
mode_icon = "🛡️" if mode == "php+crowdsec" else "📝" mode_icon = "🛡️" if mode == "php+crowdsec" else "📝"
mode_text = "PHP+CS" if mode == "php+crowdsec" else "PHP" mode_text = "PHP+CS" if mode == "php+crowdsec" else "PHP"
print(f"{shop} [{mode_text}] {mode_icon}")
# Get stats
blocks, _, activation_time = get_shop_log_stats(shop)
if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
runtime_str = format_duration(runtime_minutes)
else:
req_min = 0
runtime_str = "?"
print(f"{shop} [{mode_text}] {mode_icon} - {blocks} blocks ({req_min:.1f} req/min, {runtime_str})")
elif choice == "5": elif choice == "5":
activate_all_shops() activate_all_shops()