geoip_shop_manager.py aktualisiert

This commit is contained in:
2025-12-09 07:31:18 +01:00
parent b162c1655f
commit 2c7d40fef1

View File

@@ -11,6 +11,7 @@ import shutil
import subprocess
import json
import time
import re
from datetime import datetime, timedelta
from pathlib import Path
@@ -25,6 +26,152 @@ WATCHER_SCRIPT = "/usr/local/bin/geoip_crowdsec_watcher.py"
SYSTEMD_SERVICE = "/etc/systemd/system/geoip-crowdsec-watcher.service"
ACTIVE_SHOPS_FILE = "/var/lib/crowdsec/geoip_active_shops.json"
# =============================================================================
# BOT DETECTION - Comprehensive list of known bots/crawlers
# =============================================================================
BOT_PATTERNS = {
# OpenAI
'GPTBot': r'GPTBot',
'OAI-SearchBot': r'OAI-SearchBot',
'ChatGPT-User': r'ChatGPT-User',
# Anthropic (Claude)
'ClaudeBot': r'ClaudeBot',
'Claude-User': r'Claude-User',
'Claude-SearchBot': r'Claude-SearchBot',
'anthropic-ai': r'anthropic-ai',
'claude-web': r'claude-web',
# Google
'Googlebot': r'Googlebot',
'Google-Extended': r'Google-Extended',
'Googlebot-Image': r'Googlebot-Image',
'Googlebot-Video': r'Googlebot-Video',
'Googlebot-News': r'Googlebot-News',
'Gemini-Deep-Research': r'Gemini-Deep-Research',
'Google-CloudVertexBot': r'Google-CloudVertexBot',
'AdsBot-Google': r'AdsBot-Google',
'Mediapartners-Google': r'Mediapartners-Google',
'FeedFetcher-Google': r'FeedFetcher-Google',
'Google-InspectionTool': r'Google-InspectionTool',
# Microsoft/Bing
'Bingbot': r'[Bb]ingbot',
'BingPreview': r'BingPreview',
'msnbot': r'msnbot',
'AdIdxBot': r'AdIdxBot',
# Perplexity
'PerplexityBot': r'PerplexityBot',
'Perplexity-User': r'Perplexity-User',
# Apple
'Applebot': r'Applebot',
'Applebot-Extended': r'Applebot-Extended',
# Amazon
'Amazonbot': r'Amazonbot',
# Meta/Facebook
'FacebookBot': r'facebookexternalhit|FacebookBot',
'meta-externalagent': r'meta-externalagent',
'Meta-WebIndexer': r'Meta-WebIndexer',
# ByteDance/TikTok
'Bytespider': r'Bytespider',
# DuckDuckGo
'DuckDuckBot': r'DuckDuckBot',
'DuckAssistBot': r'DuckAssistBot',
# Other AI/LLM
'cohere-ai': r'cohere-ai',
'YouBot': r'YouBot',
'MistralAI-User': r'MistralAI-User',
'AI2Bot': r'AI2Bot',
'CCBot': r'CCBot',
'Diffbot': r'Diffbot',
'Timpibot': r'Timpibot',
'omgili': r'omgili',
'webzio': r'webzio',
'ICC-Crawler': r'ICC-Crawler',
# SEO Tools
'AhrefsBot': r'AhrefsBot',
'SemrushBot': r'SemrushBot',
'MJ12bot': r'MJ12bot',
'DotBot': r'DotBot',
'BLEXBot': r'BLEXBot',
'DataForSeoBot': r'DataForSeoBot',
'SEOkicks': r'SEOkicks',
'seoscanners': r'seoscanners',
'Screaming Frog': r'Screaming Frog',
'Sistrix': r'Sistrix',
'JEEC2Bot': r'JEEC2Bot',
# Other Search Engines
'YandexBot': r'YandexBot',
'YandexImages': r'YandexImages',
'Baiduspider': r'Baiduspider',
'PetalBot': r'PetalBot',
'Sogou': r'Sogou',
'Qwantify': r'Qwantify',
'ia_archiver': r'ia_archiver',
# Social Media
'LinkedInBot': r'LinkedInBot',
'Twitterbot': r'Twitterbot',
'Pinterest': r'Pinterest',
'Slackbot': r'Slackbot',
'TelegramBot': r'TelegramBot',
'WhatsApp': r'WhatsApp',
'Discordbot': r'Discordbot',
# Monitoring & Security
'UptimeRobot': r'UptimeRobot',
'Pingdom': r'Pingdom',
'StatusCake': r'StatusCake',
'GTmetrix': r'GTmetrix',
'Site24x7': r'Site24x7',
# Payment/E-Commerce
'PayPal IPN': r'PayPal',
'Stripe': r'Stripe',
'Shopify': r'Shopify',
# Feed Readers
'Feedly': r'Feedly',
'NewsBlur': r'NewsBlur',
# Other known bots
'SeznamBot': r'SeznamBot',
'Exabot': r'Exabot',
'archive.org_bot': r'archive\.org_bot',
'Wget': r'Wget',
'curl': r'^curl/',
'python-requests': r'python-requests',
'Go-http-client': r'Go-http-client',
'Java': r'^Java/',
'Apache-HttpClient': r'Apache-HttpClient',
'okhttp': r'okhttp',
'HeadlessChrome': r'HeadlessChrome',
'PhantomJS': r'PhantomJS',
'Scrapy': r'Scrapy',
}
def detect_bot(user_agent):
"""Detect bot name from user agent string"""
if not user_agent or user_agent == 'Unknown':
return 'Unbekannt'
for bot_name, pattern in BOT_PATTERNS.items():
if re.search(pattern, user_agent, re.IGNORECASE):
return bot_name
return 'Unbekannt'
# PHP GeoIP blocking script (no exec, just logging)
GEOIP_SCRIPT = '''<?php
/**
@@ -488,6 +635,34 @@ def get_shop_mode(shop):
return "php+crowdsec"
def get_shop_activation_time(shop):
"""Get the activation timestamp for a shop"""
if not os.path.isfile(ACTIVE_SHOPS_FILE):
return None
try:
with open(ACTIVE_SHOPS_FILE, 'r') as f:
shops = json.load(f)
activated_str = shops.get(shop, {}).get("activated")
if activated_str:
return datetime.fromisoformat(activated_str)
except:
pass
return None
def format_duration(minutes):
"""Format minutes as human readable duration"""
if minutes < 60:
return f"{int(minutes)}m"
hours = minutes / 60
if hours < 24:
return f"{int(hours)}h {int(minutes % 60)}m"
days = hours / 24
remaining_hours = hours % 24
return f"{int(days)}d {int(remaining_hours)}h"
def remove_shop_from_active(shop):
"""Remove shop from active shops tracking"""
if not os.path.isfile(ACTIVE_SHOPS_FILE):
@@ -1041,26 +1216,46 @@ def deactivate_all_shops():
def get_shop_log_stats(shop):
"""Get log statistics for a single shop"""
"""Get log statistics for a single shop including user agents"""
httpdocs = os.path.join(VHOSTS_DIR, shop, 'httpdocs')
log_file = os.path.join(httpdocs, LOG_FILE)
php_blocks = 0
ips = {}
ips = {} # ip -> {'count': N, 'ua': user_agent}
if os.path.isfile(log_file):
with open(log_file, 'r') as f:
for line in f:
php_blocks += 1
# Extract IP from log line
# Extract IP and User-Agent from log line
# Format: [timestamp] IP: x.x.x.x | UA: user_agent | URI: /path
ip = None
ua = 'Unknown'
if 'IP: ' in line:
try:
ip = line.split('IP: ')[1].split(' |')[0].strip()
ips[ip] = ips.get(ip, 0) + 1
except:
pass
if 'UA: ' in line:
try:
ua = line.split('UA: ')[1].split(' |')[0].strip()
except:
pass
if ip:
if ip not in ips:
ips[ip] = {'count': 0, 'ua': ua}
ips[ip]['count'] += 1
# Update UA if we have a better one (not Unknown)
if ua != 'Unknown' and ips[ip]['ua'] == 'Unknown':
ips[ip]['ua'] = ua
return php_blocks, ips
# Get activation time
activation_time = get_shop_activation_time(shop)
return php_blocks, ips, activation_time
def get_crowdsec_stats_by_shop():
@@ -1091,34 +1286,64 @@ def show_all_logs():
print("\n⚠️ Keine aktiven Shops")
return
print(f"\n{'' * 60}")
print(f"\n{'' * 70}")
print(" 📊 GESAMTÜBERSICHT ALLER SHOPS")
print(f"{'' * 60}")
print(f"{'' * 70}")
total_php_blocks = 0
shop_php_stats = {}
all_ips = {}
shop_php_stats = {} # shop -> {'blocks': N, 'activation': datetime, 'req_min': float}
all_ips = {} # ip -> {'count': N, 'ua': user_agent}
total_minutes = 0
# Collect PHP stats
for shop in active_shops:
blocks, ips = get_shop_log_stats(shop)
blocks, ips, activation_time = get_shop_log_stats(shop)
total_php_blocks += blocks
shop_php_stats[shop] = blocks
for ip, count in ips.items():
all_ips[ip] = all_ips.get(ip, 0) + count
# Calculate runtime and req/min
if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
else:
runtime_minutes = 0
req_min = 0
shop_php_stats[shop] = {
'blocks': blocks,
'activation': activation_time,
'runtime_minutes': runtime_minutes,
'req_min': req_min
}
if runtime_minutes > total_minutes:
total_minutes = runtime_minutes
for ip, data in ips.items():
if ip not in all_ips:
all_ips[ip] = {'count': 0, 'ua': data['ua']}
all_ips[ip]['count'] += data['count']
# Keep the most informative UA
if data['ua'] != 'Unknown' and all_ips[ip]['ua'] == 'Unknown':
all_ips[ip]['ua'] = data['ua']
# Calculate total req/min
total_req_min = total_php_blocks / total_minutes if total_minutes > 0 else 0
# Get CrowdSec stats
crowdsec_stats = get_crowdsec_stats_by_shop()
total_crowdsec = sum(crowdsec_stats.values())
# Display PHP blocks
print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks}")
# Display PHP blocks with req/min
print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks} (⌀ {total_req_min:.1f} req/min, Laufzeit: {format_duration(total_minutes)})")
if shop_php_stats:
for shop in sorted(shop_php_stats.keys()):
count = shop_php_stats[shop]
bar = "" * min(count // 10, 20) if count > 0 else ""
print(f" ├─ {shop}: {count} {bar}")
stats = shop_php_stats[shop]
count = stats['blocks']
req_min = stats['req_min']
runtime = stats['runtime_minutes']
bar = "" * min(int(req_min * 2), 20) if req_min > 0 else ""
runtime_str = format_duration(runtime) if runtime > 0 else "?"
print(f" ├─ {shop}: {count} ({req_min:.1f} req/min, seit {runtime_str}) {bar}")
# Display CrowdSec bans
print(f"\n🛡️ CrowdSec-Bans gesamt: {total_crowdsec}")
@@ -1132,15 +1357,18 @@ def show_all_logs():
else:
print(" └─ CrowdSec nicht verfügbar")
# Top blocked IPs
# Top blocked IPs with bot detection
if all_ips:
print(f"\n🔥 Top 100 blockierte IPs (alle Shops):")
sorted_ips = sorted(all_ips.items(), key=lambda x: x[1], reverse=True)[:100]
for ip, count in sorted_ips:
sorted_ips = sorted(all_ips.items(), key=lambda x: x[1]['count'], reverse=True)[:100]
for ip, data in sorted_ips:
count = data['count']
ua = data['ua']
bot_name = detect_bot(ua)
bar = "" * min(count // 5, 20) if count > 0 else ""
print(f" {ip}: {count} {bar}")
print(f" {ip} ({bot_name}): {count} {bar}")
print(f"\n{'' * 60}")
print(f"\n{'' * 70}")
# Wait for user
input("\nDrücke Enter um fortzufahren...")
@@ -1152,25 +1380,57 @@ def show_logs(shop):
log_file = os.path.join(httpdocs, LOG_FILE)
shop_mode = get_shop_mode(shop)
# Get stats
blocks, ips, activation_time = get_shop_log_stats(shop)
# Calculate runtime and req/min
if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
runtime_str = format_duration(runtime_minutes)
activation_str = activation_time.strftime('%Y-%m-%d %H:%M:%S')
else:
runtime_minutes = 0
req_min = 0
runtime_str = "unbekannt"
activation_str = "unbekannt"
mode_display = "PHP + CrowdSec 🛡️" if shop_mode == "php+crowdsec" else "Nur PHP 📝"
print(f"\n📊 Logs für {shop} [{mode_display}]")
print(f"\n{'' * 70}")
print(f"📊 Logs für {shop} [{mode_display}]")
print(f"{'' * 70}")
print(f"\n⏱️ Aktiviert: {activation_str}")
print(f"⏱️ Laufzeit: {runtime_str}")
print(f"📈 Blocks: {blocks} ({req_min:.1f} req/min)")
if os.path.isfile(log_file):
print(f"\n📝 PHP-Blocks:")
print("=" * 80)
print(f"\n📝 Letzte 50 PHP-Blocks:")
print("=" * 70)
with open(log_file, 'r') as f:
lines = f.readlines()
for line in lines[-50:]:
print(line.rstrip())
print("=" * 80)
print("=" * 70)
print(f"Gesamt: {len(lines)}")
# Show top IPs with bot detection
if ips:
print(f"\n🔥 Top 20 blockierte IPs:")
sorted_ips = sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:20]
for ip, data in sorted_ips:
count = data['count']
ua = data['ua']
bot_name = detect_bot(ua)
bar = "" * min(count // 5, 20) if count > 0 else ""
print(f" {ip} ({bot_name}): {count} {bar}")
else:
print(f" Keine PHP-Logs für {shop}")
print(f"\n Keine PHP-Logs für {shop}")
# Only show CrowdSec decisions if mode is php+crowdsec
if shop_mode == "php+crowdsec" and check_crowdsec():
print(f"\n🛡️ CrowdSec Decisions:")
print("=" * 80)
print("=" * 70)
# Use raw output with --limit 0 (no pagination)
code, stdout, _ = run_command("cscli decisions list -o raw --limit 0")
@@ -1207,7 +1467,7 @@ def show_logs(shop):
else:
print("Konnte Decisions nicht abrufen")
print("=" * 80)
print("=" * 70)
elif shop_mode == "php-only":
print(f"\n📝 CrowdSec-Synchronisation ist für diesen Shop deaktiviert (PHP-only Modus)")
@@ -1348,7 +1608,18 @@ def main():
mode = get_shop_mode(shop)
mode_icon = "🛡️" if mode == "php+crowdsec" else "📝"
mode_text = "PHP+CS" if mode == "php+crowdsec" else "PHP"
print(f"{shop} [{mode_text}] {mode_icon}")
# Get stats
blocks, _, activation_time = get_shop_log_stats(shop)
if activation_time:
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
runtime_str = format_duration(runtime_minutes)
else:
req_min = 0
runtime_str = "?"
print(f"{shop} [{mode_text}] {mode_icon} - {blocks} blocks ({req_min:.1f} req/min, {runtime_str})")
elif choice == "5":
activate_all_shops()