geoip_shop_manager.py aktualisiert
This commit is contained in:
@@ -11,6 +11,7 @@ import shutil
|
||||
import subprocess
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
@@ -25,6 +26,152 @@ WATCHER_SCRIPT = "/usr/local/bin/geoip_crowdsec_watcher.py"
|
||||
SYSTEMD_SERVICE = "/etc/systemd/system/geoip-crowdsec-watcher.service"
|
||||
ACTIVE_SHOPS_FILE = "/var/lib/crowdsec/geoip_active_shops.json"
|
||||
|
||||
# =============================================================================
|
||||
# BOT DETECTION - Comprehensive list of known bots/crawlers
|
||||
# =============================================================================
|
||||
BOT_PATTERNS = {
|
||||
# OpenAI
|
||||
'GPTBot': r'GPTBot',
|
||||
'OAI-SearchBot': r'OAI-SearchBot',
|
||||
'ChatGPT-User': r'ChatGPT-User',
|
||||
|
||||
# Anthropic (Claude)
|
||||
'ClaudeBot': r'ClaudeBot',
|
||||
'Claude-User': r'Claude-User',
|
||||
'Claude-SearchBot': r'Claude-SearchBot',
|
||||
'anthropic-ai': r'anthropic-ai',
|
||||
'claude-web': r'claude-web',
|
||||
|
||||
# Google
|
||||
'Googlebot': r'Googlebot',
|
||||
'Google-Extended': r'Google-Extended',
|
||||
'Googlebot-Image': r'Googlebot-Image',
|
||||
'Googlebot-Video': r'Googlebot-Video',
|
||||
'Googlebot-News': r'Googlebot-News',
|
||||
'Gemini-Deep-Research': r'Gemini-Deep-Research',
|
||||
'Google-CloudVertexBot': r'Google-CloudVertexBot',
|
||||
'AdsBot-Google': r'AdsBot-Google',
|
||||
'Mediapartners-Google': r'Mediapartners-Google',
|
||||
'FeedFetcher-Google': r'FeedFetcher-Google',
|
||||
'Google-InspectionTool': r'Google-InspectionTool',
|
||||
|
||||
# Microsoft/Bing
|
||||
'Bingbot': r'[Bb]ingbot',
|
||||
'BingPreview': r'BingPreview',
|
||||
'msnbot': r'msnbot',
|
||||
'AdIdxBot': r'AdIdxBot',
|
||||
|
||||
# Perplexity
|
||||
'PerplexityBot': r'PerplexityBot',
|
||||
'Perplexity-User': r'Perplexity-User',
|
||||
|
||||
# Apple
|
||||
'Applebot': r'Applebot',
|
||||
'Applebot-Extended': r'Applebot-Extended',
|
||||
|
||||
# Amazon
|
||||
'Amazonbot': r'Amazonbot',
|
||||
|
||||
# Meta/Facebook
|
||||
'FacebookBot': r'facebookexternalhit|FacebookBot',
|
||||
'meta-externalagent': r'meta-externalagent',
|
||||
'Meta-WebIndexer': r'Meta-WebIndexer',
|
||||
|
||||
# ByteDance/TikTok
|
||||
'Bytespider': r'Bytespider',
|
||||
|
||||
# DuckDuckGo
|
||||
'DuckDuckBot': r'DuckDuckBot',
|
||||
'DuckAssistBot': r'DuckAssistBot',
|
||||
|
||||
# Other AI/LLM
|
||||
'cohere-ai': r'cohere-ai',
|
||||
'YouBot': r'YouBot',
|
||||
'MistralAI-User': r'MistralAI-User',
|
||||
'AI2Bot': r'AI2Bot',
|
||||
'CCBot': r'CCBot',
|
||||
'Diffbot': r'Diffbot',
|
||||
'Timpibot': r'Timpibot',
|
||||
'omgili': r'omgili',
|
||||
'webzio': r'webzio',
|
||||
'ICC-Crawler': r'ICC-Crawler',
|
||||
|
||||
# SEO Tools
|
||||
'AhrefsBot': r'AhrefsBot',
|
||||
'SemrushBot': r'SemrushBot',
|
||||
'MJ12bot': r'MJ12bot',
|
||||
'DotBot': r'DotBot',
|
||||
'BLEXBot': r'BLEXBot',
|
||||
'DataForSeoBot': r'DataForSeoBot',
|
||||
'SEOkicks': r'SEOkicks',
|
||||
'seoscanners': r'seoscanners',
|
||||
'Screaming Frog': r'Screaming Frog',
|
||||
'Sistrix': r'Sistrix',
|
||||
'JEEC2Bot': r'JEEC2Bot',
|
||||
|
||||
# Other Search Engines
|
||||
'YandexBot': r'YandexBot',
|
||||
'YandexImages': r'YandexImages',
|
||||
'Baiduspider': r'Baiduspider',
|
||||
'PetalBot': r'PetalBot',
|
||||
'Sogou': r'Sogou',
|
||||
'Qwantify': r'Qwantify',
|
||||
'ia_archiver': r'ia_archiver',
|
||||
|
||||
# Social Media
|
||||
'LinkedInBot': r'LinkedInBot',
|
||||
'Twitterbot': r'Twitterbot',
|
||||
'Pinterest': r'Pinterest',
|
||||
'Slackbot': r'Slackbot',
|
||||
'TelegramBot': r'TelegramBot',
|
||||
'WhatsApp': r'WhatsApp',
|
||||
'Discordbot': r'Discordbot',
|
||||
|
||||
# Monitoring & Security
|
||||
'UptimeRobot': r'UptimeRobot',
|
||||
'Pingdom': r'Pingdom',
|
||||
'StatusCake': r'StatusCake',
|
||||
'GTmetrix': r'GTmetrix',
|
||||
'Site24x7': r'Site24x7',
|
||||
|
||||
# Payment/E-Commerce
|
||||
'PayPal IPN': r'PayPal',
|
||||
'Stripe': r'Stripe',
|
||||
'Shopify': r'Shopify',
|
||||
|
||||
# Feed Readers
|
||||
'Feedly': r'Feedly',
|
||||
'NewsBlur': r'NewsBlur',
|
||||
|
||||
# Other known bots
|
||||
'SeznamBot': r'SeznamBot',
|
||||
'Exabot': r'Exabot',
|
||||
'archive.org_bot': r'archive\.org_bot',
|
||||
'Wget': r'Wget',
|
||||
'curl': r'^curl/',
|
||||
'python-requests': r'python-requests',
|
||||
'Go-http-client': r'Go-http-client',
|
||||
'Java': r'^Java/',
|
||||
'Apache-HttpClient': r'Apache-HttpClient',
|
||||
'okhttp': r'okhttp',
|
||||
'HeadlessChrome': r'HeadlessChrome',
|
||||
'PhantomJS': r'PhantomJS',
|
||||
'Scrapy': r'Scrapy',
|
||||
}
|
||||
|
||||
|
||||
def detect_bot(user_agent):
|
||||
"""Detect bot name from user agent string"""
|
||||
if not user_agent or user_agent == 'Unknown':
|
||||
return 'Unbekannt'
|
||||
|
||||
for bot_name, pattern in BOT_PATTERNS.items():
|
||||
if re.search(pattern, user_agent, re.IGNORECASE):
|
||||
return bot_name
|
||||
|
||||
return 'Unbekannt'
|
||||
|
||||
|
||||
# PHP GeoIP blocking script (no exec, just logging)
|
||||
GEOIP_SCRIPT = '''<?php
|
||||
/**
|
||||
@@ -488,6 +635,34 @@ def get_shop_mode(shop):
|
||||
return "php+crowdsec"
|
||||
|
||||
|
||||
def get_shop_activation_time(shop):
|
||||
"""Get the activation timestamp for a shop"""
|
||||
if not os.path.isfile(ACTIVE_SHOPS_FILE):
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(ACTIVE_SHOPS_FILE, 'r') as f:
|
||||
shops = json.load(f)
|
||||
activated_str = shops.get(shop, {}).get("activated")
|
||||
if activated_str:
|
||||
return datetime.fromisoformat(activated_str)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def format_duration(minutes):
|
||||
"""Format minutes as human readable duration"""
|
||||
if minutes < 60:
|
||||
return f"{int(minutes)}m"
|
||||
hours = minutes / 60
|
||||
if hours < 24:
|
||||
return f"{int(hours)}h {int(minutes % 60)}m"
|
||||
days = hours / 24
|
||||
remaining_hours = hours % 24
|
||||
return f"{int(days)}d {int(remaining_hours)}h"
|
||||
|
||||
|
||||
def remove_shop_from_active(shop):
|
||||
"""Remove shop from active shops tracking"""
|
||||
if not os.path.isfile(ACTIVE_SHOPS_FILE):
|
||||
@@ -1041,26 +1216,46 @@ def deactivate_all_shops():
|
||||
|
||||
|
||||
def get_shop_log_stats(shop):
|
||||
"""Get log statistics for a single shop"""
|
||||
"""Get log statistics for a single shop including user agents"""
|
||||
httpdocs = os.path.join(VHOSTS_DIR, shop, 'httpdocs')
|
||||
log_file = os.path.join(httpdocs, LOG_FILE)
|
||||
|
||||
php_blocks = 0
|
||||
ips = {}
|
||||
ips = {} # ip -> {'count': N, 'ua': user_agent}
|
||||
|
||||
if os.path.isfile(log_file):
|
||||
with open(log_file, 'r') as f:
|
||||
for line in f:
|
||||
php_blocks += 1
|
||||
# Extract IP from log line
|
||||
# Extract IP and User-Agent from log line
|
||||
# Format: [timestamp] IP: x.x.x.x | UA: user_agent | URI: /path
|
||||
ip = None
|
||||
ua = 'Unknown'
|
||||
|
||||
if 'IP: ' in line:
|
||||
try:
|
||||
ip = line.split('IP: ')[1].split(' |')[0].strip()
|
||||
ips[ip] = ips.get(ip, 0) + 1
|
||||
except:
|
||||
pass
|
||||
|
||||
return php_blocks, ips
|
||||
if 'UA: ' in line:
|
||||
try:
|
||||
ua = line.split('UA: ')[1].split(' |')[0].strip()
|
||||
except:
|
||||
pass
|
||||
|
||||
if ip:
|
||||
if ip not in ips:
|
||||
ips[ip] = {'count': 0, 'ua': ua}
|
||||
ips[ip]['count'] += 1
|
||||
# Update UA if we have a better one (not Unknown)
|
||||
if ua != 'Unknown' and ips[ip]['ua'] == 'Unknown':
|
||||
ips[ip]['ua'] = ua
|
||||
|
||||
# Get activation time
|
||||
activation_time = get_shop_activation_time(shop)
|
||||
|
||||
return php_blocks, ips, activation_time
|
||||
|
||||
|
||||
def get_crowdsec_stats_by_shop():
|
||||
@@ -1091,34 +1286,64 @@ def show_all_logs():
|
||||
print("\n⚠️ Keine aktiven Shops")
|
||||
return
|
||||
|
||||
print(f"\n{'═' * 60}")
|
||||
print(f"\n{'═' * 70}")
|
||||
print(" 📊 GESAMTÜBERSICHT ALLER SHOPS")
|
||||
print(f"{'═' * 60}")
|
||||
print(f"{'═' * 70}")
|
||||
|
||||
total_php_blocks = 0
|
||||
shop_php_stats = {}
|
||||
all_ips = {}
|
||||
shop_php_stats = {} # shop -> {'blocks': N, 'activation': datetime, 'req_min': float}
|
||||
all_ips = {} # ip -> {'count': N, 'ua': user_agent}
|
||||
total_minutes = 0
|
||||
|
||||
# Collect PHP stats
|
||||
for shop in active_shops:
|
||||
blocks, ips = get_shop_log_stats(shop)
|
||||
blocks, ips, activation_time = get_shop_log_stats(shop)
|
||||
total_php_blocks += blocks
|
||||
shop_php_stats[shop] = blocks
|
||||
|
||||
for ip, count in ips.items():
|
||||
all_ips[ip] = all_ips.get(ip, 0) + count
|
||||
# Calculate runtime and req/min
|
||||
if activation_time:
|
||||
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
|
||||
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
|
||||
else:
|
||||
runtime_minutes = 0
|
||||
req_min = 0
|
||||
|
||||
shop_php_stats[shop] = {
|
||||
'blocks': blocks,
|
||||
'activation': activation_time,
|
||||
'runtime_minutes': runtime_minutes,
|
||||
'req_min': req_min
|
||||
}
|
||||
|
||||
if runtime_minutes > total_minutes:
|
||||
total_minutes = runtime_minutes
|
||||
|
||||
for ip, data in ips.items():
|
||||
if ip not in all_ips:
|
||||
all_ips[ip] = {'count': 0, 'ua': data['ua']}
|
||||
all_ips[ip]['count'] += data['count']
|
||||
# Keep the most informative UA
|
||||
if data['ua'] != 'Unknown' and all_ips[ip]['ua'] == 'Unknown':
|
||||
all_ips[ip]['ua'] = data['ua']
|
||||
|
||||
# Calculate total req/min
|
||||
total_req_min = total_php_blocks / total_minutes if total_minutes > 0 else 0
|
||||
|
||||
# Get CrowdSec stats
|
||||
crowdsec_stats = get_crowdsec_stats_by_shop()
|
||||
total_crowdsec = sum(crowdsec_stats.values())
|
||||
|
||||
# Display PHP blocks
|
||||
print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks}")
|
||||
# Display PHP blocks with req/min
|
||||
print(f"\n📝 PHP-Blocks gesamt: {total_php_blocks} (⌀ {total_req_min:.1f} req/min, Laufzeit: {format_duration(total_minutes)})")
|
||||
if shop_php_stats:
|
||||
for shop in sorted(shop_php_stats.keys()):
|
||||
count = shop_php_stats[shop]
|
||||
bar = "█" * min(count // 10, 20) if count > 0 else ""
|
||||
print(f" ├─ {shop}: {count} {bar}")
|
||||
stats = shop_php_stats[shop]
|
||||
count = stats['blocks']
|
||||
req_min = stats['req_min']
|
||||
runtime = stats['runtime_minutes']
|
||||
bar = "█" * min(int(req_min * 2), 20) if req_min > 0 else ""
|
||||
runtime_str = format_duration(runtime) if runtime > 0 else "?"
|
||||
print(f" ├─ {shop}: {count} ({req_min:.1f} req/min, seit {runtime_str}) {bar}")
|
||||
|
||||
# Display CrowdSec bans
|
||||
print(f"\n🛡️ CrowdSec-Bans gesamt: {total_crowdsec}")
|
||||
@@ -1132,15 +1357,18 @@ def show_all_logs():
|
||||
else:
|
||||
print(" └─ CrowdSec nicht verfügbar")
|
||||
|
||||
# Top blocked IPs
|
||||
# Top blocked IPs with bot detection
|
||||
if all_ips:
|
||||
print(f"\n🔥 Top 100 blockierte IPs (alle Shops):")
|
||||
sorted_ips = sorted(all_ips.items(), key=lambda x: x[1], reverse=True)[:100]
|
||||
for ip, count in sorted_ips:
|
||||
sorted_ips = sorted(all_ips.items(), key=lambda x: x[1]['count'], reverse=True)[:100]
|
||||
for ip, data in sorted_ips:
|
||||
count = data['count']
|
||||
ua = data['ua']
|
||||
bot_name = detect_bot(ua)
|
||||
bar = "█" * min(count // 5, 20) if count > 0 else "█"
|
||||
print(f" {ip}: {count} {bar}")
|
||||
print(f" {ip} ({bot_name}): {count} {bar}")
|
||||
|
||||
print(f"\n{'═' * 60}")
|
||||
print(f"\n{'═' * 70}")
|
||||
|
||||
# Wait for user
|
||||
input("\nDrücke Enter um fortzufahren...")
|
||||
@@ -1152,25 +1380,57 @@ def show_logs(shop):
|
||||
log_file = os.path.join(httpdocs, LOG_FILE)
|
||||
shop_mode = get_shop_mode(shop)
|
||||
|
||||
# Get stats
|
||||
blocks, ips, activation_time = get_shop_log_stats(shop)
|
||||
|
||||
# Calculate runtime and req/min
|
||||
if activation_time:
|
||||
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
|
||||
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
|
||||
runtime_str = format_duration(runtime_minutes)
|
||||
activation_str = activation_time.strftime('%Y-%m-%d %H:%M:%S')
|
||||
else:
|
||||
runtime_minutes = 0
|
||||
req_min = 0
|
||||
runtime_str = "unbekannt"
|
||||
activation_str = "unbekannt"
|
||||
|
||||
mode_display = "PHP + CrowdSec 🛡️" if shop_mode == "php+crowdsec" else "Nur PHP 📝"
|
||||
print(f"\n📊 Logs für {shop} [{mode_display}]")
|
||||
|
||||
print(f"\n{'═' * 70}")
|
||||
print(f"📊 Logs für {shop} [{mode_display}]")
|
||||
print(f"{'═' * 70}")
|
||||
print(f"\n⏱️ Aktiviert: {activation_str}")
|
||||
print(f"⏱️ Laufzeit: {runtime_str}")
|
||||
print(f"📈 Blocks: {blocks} ({req_min:.1f} req/min)")
|
||||
|
||||
if os.path.isfile(log_file):
|
||||
print(f"\n📝 PHP-Blocks:")
|
||||
print("=" * 80)
|
||||
print(f"\n📝 Letzte 50 PHP-Blocks:")
|
||||
print("=" * 70)
|
||||
with open(log_file, 'r') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines[-50:]:
|
||||
print(line.rstrip())
|
||||
print("=" * 80)
|
||||
print("=" * 70)
|
||||
print(f"Gesamt: {len(lines)}")
|
||||
|
||||
# Show top IPs with bot detection
|
||||
if ips:
|
||||
print(f"\n🔥 Top 20 blockierte IPs:")
|
||||
sorted_ips = sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:20]
|
||||
for ip, data in sorted_ips:
|
||||
count = data['count']
|
||||
ua = data['ua']
|
||||
bot_name = detect_bot(ua)
|
||||
bar = "█" * min(count // 5, 20) if count > 0 else "█"
|
||||
print(f" {ip} ({bot_name}): {count} {bar}")
|
||||
else:
|
||||
print(f"ℹ️ Keine PHP-Logs für {shop}")
|
||||
print(f"\nℹ️ Keine PHP-Logs für {shop}")
|
||||
|
||||
# Only show CrowdSec decisions if mode is php+crowdsec
|
||||
if shop_mode == "php+crowdsec" and check_crowdsec():
|
||||
print(f"\n🛡️ CrowdSec Decisions:")
|
||||
print("=" * 80)
|
||||
print("=" * 70)
|
||||
|
||||
# Use raw output with --limit 0 (no pagination)
|
||||
code, stdout, _ = run_command("cscli decisions list -o raw --limit 0")
|
||||
@@ -1207,7 +1467,7 @@ def show_logs(shop):
|
||||
else:
|
||||
print("Konnte Decisions nicht abrufen")
|
||||
|
||||
print("=" * 80)
|
||||
print("=" * 70)
|
||||
elif shop_mode == "php-only":
|
||||
print(f"\n📝 CrowdSec-Synchronisation ist für diesen Shop deaktiviert (PHP-only Modus)")
|
||||
|
||||
@@ -1348,7 +1608,18 @@ def main():
|
||||
mode = get_shop_mode(shop)
|
||||
mode_icon = "🛡️" if mode == "php+crowdsec" else "📝"
|
||||
mode_text = "PHP+CS" if mode == "php+crowdsec" else "PHP"
|
||||
print(f" ✓ {shop} [{mode_text}] {mode_icon}")
|
||||
|
||||
# Get stats
|
||||
blocks, _, activation_time = get_shop_log_stats(shop)
|
||||
if activation_time:
|
||||
runtime_minutes = (datetime.now() - activation_time).total_seconds() / 60
|
||||
req_min = blocks / runtime_minutes if runtime_minutes > 0 else 0
|
||||
runtime_str = format_duration(runtime_minutes)
|
||||
else:
|
||||
req_min = 0
|
||||
runtime_str = "?"
|
||||
|
||||
print(f" ✓ {shop} [{mode_text}] {mode_icon} - {blocks} blocks ({req_min:.1f} req/min, {runtime_str})")
|
||||
|
||||
elif choice == "5":
|
||||
activate_all_shops()
|
||||
|
||||
Reference in New Issue
Block a user