From 5ccbdb7f411bc03be567a095219011e57f3b4dee Mon Sep 17 00:00:00 2001 From: thomasciesla Date: Tue, 16 Dec 2025 16:07:52 +0100 Subject: [PATCH] geoip_shop_manager.py aktualisiert --- geoip_shop_manager.py | 220 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 191 insertions(+), 29 deletions(-) diff --git a/geoip_shop_manager.py b/geoip_shop_manager.py index dd47b0b..8f78678 100644 --- a/geoip_shop_manager.py +++ b/geoip_shop_manager.py @@ -8,6 +8,7 @@ Supports two modes: - geoip: GeoIP blocking (only allowed regions can access) - bot: Rate-limit bots by bot-type, shop remains globally accessible +v4.1.0: IP-basierte Bot-Erkennung (für Bots die sich tarnen) v4.0.0: Bot-Rate-Limiting nach Bot-Typ (nicht IP), CrowdSec entfernt """ @@ -19,6 +20,7 @@ import json import time import re import socket +import ipaddress from datetime import datetime, timedelta from pathlib import Path @@ -85,6 +87,85 @@ GEO_REGIONS = { } } +# ============================================================================= +# BOT IP RANGES - Für Bots die sich mit normalem User-Agent tarnen +# ============================================================================= +BOT_IP_RANGES = { + # Alibaba Cloud / Alibaba Spider - tarnt sich oft mit normalem UA + 'Alibaba-Bot': [ + '43.104.26.0/24', # Bekanntes Alibaba-Crawling-Netz + '47.88.0.0/16', # Alibaba US + '47.89.0.0/16', # Alibaba US + '47.90.0.0/16', # Alibaba US + '47.91.0.0/16', # Alibaba US + '47.74.0.0/16', # Alibaba SG + '47.75.0.0/16', # Alibaba SG + '47.76.0.0/16', # Alibaba SG + '47.52.0.0/16', # Alibaba HK + '47.56.0.0/16', # Alibaba HK + '47.57.0.0/16', # Alibaba HK + '8.208.0.0/16', # Alibaba Cloud Global + '8.209.0.0/16', # Alibaba Cloud Global + '8.210.0.0/16', # Alibaba Cloud Global + '8.211.0.0/16', # Alibaba Cloud Global + '8.212.0.0/16', # Alibaba Cloud Global + '8.213.0.0/16', # Alibaba Cloud Global + '8.214.0.0/16', # Alibaba Cloud Global + '8.215.0.0/16', # Alibaba Cloud Global + '8.216.0.0/16', # Alibaba Cloud Global + '8.217.0.0/16', # Alibaba Cloud Global + '8.218.0.0/16', # Alibaba Cloud Global + '8.219.0.0/16', # Alibaba Cloud Global + '39.96.0.0/16', # Alibaba China + '39.97.0.0/16', # Alibaba China + '39.98.0.0/16', # Alibaba China + '39.99.0.0/16', # Alibaba China + '39.100.0.0/16', # Alibaba China + '39.101.0.0/16', # Alibaba China + '39.102.0.0/16', # Alibaba China + '39.103.0.0/16', # Alibaba China + '39.104.0.0/16', # Alibaba China + '39.105.0.0/16', # Alibaba China + '39.106.0.0/16', # Alibaba China + '39.107.0.0/16', # Alibaba China + '39.108.0.0/16', # Alibaba China + '101.132.0.0/16', # Alibaba China + '101.133.0.0/16', # Alibaba China + '106.14.0.0/16', # Alibaba China + '106.15.0.0/16', # Alibaba China + '112.124.0.0/16', # Alibaba China + '114.55.0.0/16', # Alibaba China + '115.28.0.0/16', # Alibaba China + '115.29.0.0/16', # Alibaba China + '116.62.0.0/16', # Alibaba China + '118.31.0.0/16', # Alibaba China + '119.23.0.0/16', # Alibaba China + '120.24.0.0/16', # Alibaba China + '120.25.0.0/16', # Alibaba China + '120.26.0.0/16', # Alibaba China + '120.27.0.0/16', # Alibaba China + '120.55.0.0/16', # Alibaba China + '120.76.0.0/16', # Alibaba China + '120.77.0.0/16', # Alibaba China + '120.78.0.0/16', # Alibaba China + '120.79.0.0/16', # Alibaba China + '121.40.0.0/16', # Alibaba China + '121.41.0.0/16', # Alibaba China + '121.42.0.0/16', # Alibaba China + '121.43.0.0/16', # Alibaba China + '121.196.0.0/16', # Alibaba China + '121.197.0.0/16', # Alibaba China + '121.198.0.0/16', # Alibaba China + '121.199.0.0/16', # Alibaba China + '139.196.0.0/16', # Alibaba China + '139.224.0.0/16', # Alibaba China + '140.205.0.0/16', # Alibaba China + '182.92.0.0/16', # Alibaba China + '203.107.0.0/16', # Alibaba DNS + '203.119.128.0/17', # Alibaba + ], +} + # ============================================================================= # BOT DETECTION # ============================================================================= @@ -248,6 +329,10 @@ BOT_PATTERNS = { 'Amazon-Kendra': r'amazon-kendra', 'AmazonBuyForMe': r'amazonbuyforme', 'AMZNKAssocBot': r'amznkassocbot', + # Alibaba - auch per User-Agent erkennbar (zusätzlich zu IP-Erkennung) + 'Alibaba-Bot': r'alibaba|alibabagroup|aliyun|alicdn|alimama|taobao|tmall|1688\.com', + 'AlibabaSpider': r'alibabaspider', + 'Aliyun': r'aliyun', 'GeedoShopProductFinder': r'geedoshopproductfinder', 'Geedo': r'geedo', 'ShopWiki': r'shopwiki', @@ -461,17 +546,38 @@ GENERIC_BOT_PATTERNS = [ ] -def detect_bot(user_agent): - """Erkennt Bots anhand des User-Agents. Gibt den Anzeigenamen zurück.""" +def ip_in_cidr(ip_str, cidr_str): + """Prüft ob eine IP in einem CIDR-Netz liegt.""" + try: + ip = ipaddress.ip_address(ip_str) + network = ipaddress.ip_network(cidr_str, strict=False) + return ip in network + except ValueError: + return False + + +def detect_bot(user_agent, ip=None): + """ + Erkennt Bots anhand des User-Agents und/oder der IP. + IP-basierte Erkennung hat Priorität (für getarnte Bots). + Gibt den Anzeigenamen zurück. + """ + # SCHRITT 1: IP-basierte Erkennung (höchste Priorität) + if ip: + for bot_name, ip_ranges in BOT_IP_RANGES.items(): + for cidr in ip_ranges: + if ip_in_cidr(ip, cidr): + return bot_name + if not user_agent or user_agent == 'Unknown': return 'Unbekannt' - # Erst spezifische Patterns prüfen + # SCHRITT 2: Spezifische User-Agent Patterns for bot_name, pattern in BOT_PATTERNS.items(): if re.search(pattern, user_agent, re.IGNORECASE): return bot_name - # Dann generische Patterns als Fallback + # SCHRITT 3: Generische Patterns als Fallback ua_lower = user_agent.lower() for pattern in GENERIC_BOT_PATTERNS: if pattern in ua_lower: @@ -530,6 +636,16 @@ def generate_php_generic_patterns(): return ", ".join(patterns) +def generate_php_bot_ip_ranges(): + """Generiert PHP-Array für IP-basierte Bot-Erkennung.""" + lines = [] + for bot_name, ip_ranges in BOT_IP_RANGES.items(): + safe_bot_name = bot_name.replace("'", "\\'") + ranges_str = ", ".join([f"'{r}'" for r in ip_ranges]) + lines.append(f"'{safe_bot_name}' => [{ranges_str}]") + return ",\n ".join(lines) + + # ============================================================================= # CACHE VALIDATION # ============================================================================= @@ -700,6 +816,7 @@ if (!$is_allowed) {{ # ============================================================================= # PHP TEMPLATES - BOT RATE-LIMITING (By Bot-Type, not IP) +# Mit IP-basierter Bot-Erkennung für getarnte Bots # ============================================================================= BOT_SCRIPT_TEMPLATE = ''' $pattern) {{ - if (preg_match($pattern, $user_agent)) {{ - $detected_bot = $bot_name; - break; +if (!empty($visitor_ip)) {{ + foreach ($bot_ip_ranges as $bot_name => $ip_ranges) {{ + foreach ($ip_ranges as $cidr) {{ + if (ip_in_cidr($visitor_ip, $cidr)) {{ + $detected_bot = $bot_name; + break 2; // Aus beiden Schleifen ausbrechen + }} + }} }} }} -// Check generic patterns as fallback -if ($detected_bot === null) {{ - $ua_lower = strtolower($user_agent); - foreach ($generic_patterns as $pattern) {{ - if (strpos($ua_lower, $pattern) !== false) {{ - $detected_bot = "Bot ($pattern)"; +// === STEP 1: User-Agent-basierte Erkennung (falls IP nicht erkannt) === +if ($detected_bot === null && !empty($user_agent)) {{ + // Check specific patterns first + foreach ($bot_patterns as $bot_name => $pattern) {{ + if (preg_match($pattern, $user_agent)) {{ + $detected_bot = $bot_name; break; }} }} + + // Check generic patterns as fallback + if ($detected_bot === null) {{ + $ua_lower = strtolower($user_agent); + foreach ($generic_patterns as $pattern) {{ + if (strpos($ua_lower, $pattern) !== false) {{ + $detected_bot = "Bot ($pattern)"; + break; + }} + }} + }} }} // Not a bot - allow through without any rate limiting @@ -768,7 +915,7 @@ if ($detected_bot === null) return; // === Create hash based on BOT-TYPE only (not IP!) === $bot_hash = md5($detected_bot); -// === STEP 1: Check if this bot-type is banned === +// === STEP 2: Check if this bot-type is banned === $ban_file = "$bans_dir/$bot_hash.ban"; if (file_exists($ban_file)) {{ $ban_content = @file_get_contents($ban_file); @@ -787,7 +934,7 @@ if (file_exists($ban_file)) {{ @unlink($ban_file); }} -// === STEP 2: Rate-Limit Check for this bot-type === +// === STEP 3: Rate-Limit Check for this bot-type === $count_file = "$counts_dir/$bot_hash.count"; $current_time = time(); $count = 1; @@ -823,7 +970,7 @@ if (file_exists($count_file)) {{ @file_put_contents($count_file, "$window_start|$count", LOCK_EX); }} -// === STEP 3: Check if limit exceeded === +// === STEP 4: Check if limit exceeded === if ($count > $rate_limit) {{ // Create ban for this bot-type (store timestamp|botname) $ban_until = $current_time + $ban_duration; @@ -840,12 +987,12 @@ if ($count > $rate_limit) {{ exit; }} -// === STEP 4: Under limit - log and ALLOW through === +// === STEP 5: Under limit - log and ALLOW through === $timestamp = date('Y-m-d H:i:s'); $uri = $_SERVER['REQUEST_URI'] ?? '/'; @file_put_contents($log_file, "[$timestamp] BOT: $detected_bot | IP: $visitor_ip | Count: $count/$rate_limit | URI: $uri\\n", FILE_APPEND | LOCK_EX); -// === STEP 5: Probabilistic cleanup === +// === STEP 6: Probabilistic cleanup === if (rand(1, $cleanup_probability) === 1) {{ $now = time(); foreach (glob("$bans_dir/*.ban") as $f) {{ @@ -1163,6 +1310,7 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_ ratelimit_dir=RATELIMIT_DIR, bot_patterns=generate_php_bot_patterns(), generic_patterns=generate_php_generic_patterns(), + bot_ip_ranges=generate_php_bot_ip_ranges(), rate_limit=rate_limit, ban_duration=ban_duration, ban_duration_min=ban_duration // 60 @@ -1213,6 +1361,9 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_ else: add_shop_to_active(shop, mode, geo_region) + # Count IP ranges for bot detection + total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values()) + if not silent: print("\n" + "=" * 60) print(f"✅ {region_info['icon']} {region_info['name']} aktiviert") @@ -1223,12 +1374,14 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_ print(f" 🛡️ Fail-Open: Bei Cache-Fehlern wird Traffic durchgelassen") else: print(f" 🤖 {len(BOT_PATTERNS)} Bot-Patterns + {len(GENERIC_BOT_PATTERNS)} generische Patterns") + print(f" 🌐 {total_ip_ranges} IP-Ranges für {len(BOT_IP_RANGES)} getarnte Bots") if rate_limit == 0: print(f" 🚫 Rate-Limit: 0 (Bots werden SOFORT gebannt!)") else: print(f" 🚦 Rate-Limit: {rate_limit} req/min PRO BOT-TYP") print(f" ⏱️ Ban-Dauer: {ban_duration // 60} min") print(f" ℹ️ Alle Googlebot-Requests teilen sich EIN Limit!") + print(f" ℹ️ Alle Alibaba-IPs teilen sich EIN Limit!") print(f" Gültig bis: {expiry.strftime('%Y-%m-%d %H:%M:%S CET')}") print("=" * 60) @@ -1378,6 +1531,7 @@ def activate_all_shops(): ratelimit_dir=RATELIMIT_DIR, bot_patterns=generate_php_bot_patterns(), generic_patterns=generate_php_generic_patterns(), + bot_ip_ranges=generate_php_bot_ip_ranges(), rate_limit=rate_limit, ban_duration=ban_duration, ban_duration_min=ban_duration // 60 @@ -1399,7 +1553,8 @@ def activate_all_shops(): if bot_mode: add_shop_to_active(shop, mode, geo_region, rate_limit, ban_duration) - print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Bot-Patterns, {rate_limit} req/min)") + total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values()) + print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Patterns, {total_ip_ranges} IP-Ranges, {rate_limit} req/min)") else: print(f" ⏳ Cache generieren...") cache_ok, count, _ = generate_and_validate_cache(httpdocs, geo_region) @@ -1526,6 +1681,7 @@ def activate_direct_shops_only(): ratelimit_dir=RATELIMIT_DIR, bot_patterns=generate_php_bot_patterns(), generic_patterns=generate_php_generic_patterns(), + bot_ip_ranges=generate_php_bot_ip_ranges(), rate_limit=rate_limit, ban_duration=ban_duration, ban_duration_min=ban_duration // 60 @@ -1547,7 +1703,8 @@ def activate_direct_shops_only(): if bot_mode: add_shop_to_active(shop, mode, geo_region, rate_limit, ban_duration) - print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Bot-Patterns, {rate_limit} req/min)") + total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values()) + print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Patterns, {total_ip_ranges} IP-Ranges, {rate_limit} req/min)") else: print(f" ⏳ Cache generieren...") cache_ok, count, _ = generate_and_validate_cache(httpdocs, geo_region) @@ -1671,7 +1828,7 @@ def get_shop_log_stats(shop): if detected_bot: bots[detected_bot] = bots.get(detected_bot, 0) + 1 elif ua and ua != 'Unknown': - bot_name = detect_bot(ua) + bot_name = detect_bot(ua, ip) if bot_name != 'Unbekannt': bots[bot_name] = bots.get(bot_name, 0) + 1 @@ -1724,7 +1881,9 @@ def show_logs(shop): print(f"✅ Cache: {count:,} Ranges" if valid else f"⚠️ Cache: {err}") else: rate_limit, ban_duration = get_shop_rate_limit_config(shop) + total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values()) print(f"🤖 Bot-Patterns: {len(BOT_PATTERNS)} + {len(GENERIC_BOT_PATTERNS)} generische") + print(f"🌐 IP-basierte Erkennung: {total_ip_ranges} Ranges für {len(BOT_IP_RANGES)} Bot(s)") if rate_limit is not None and ban_duration is not None: print(f"🚦 Rate-Limit: {rate_limit} req/min PRO BOT-TYP, Ban: {ban_duration // 60} min") print(f"🚫 Bans: {total_bans} ausgelöst, {active_bans} Bot-Typen aktuell gebannt") @@ -1746,7 +1905,7 @@ def show_logs(shop): if ips: print(f"\n🔥 Top 10 IPs:") for ip, data in sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:10]: - bot = data.get('bot') or detect_bot(data['ua']) + bot = data.get('bot') or detect_bot(data['ua'], ip) print(f" {ip} ({bot}): {data['count']}x") @@ -1882,7 +2041,7 @@ def show_all_logs(): for ip, data in sorted_ips: count = data['count'] ua = data['ua'] - bot_name = data.get('bot') or detect_bot(ua) + bot_name = data.get('bot') or detect_bot(ua, ip) shops_data = data['shops'] ip_req_min = count / total_minutes if total_minutes > 0 else 0 @@ -1920,11 +2079,14 @@ def show_all_logs(): def main(): + total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values()) + print("\n" + "=" * 60) - print(" GeoIP Shop Blocker Manager v4.0.0") + print(" GeoIP Shop Blocker Manager v4.1.0") print(" 🇩🇪🇦🇹🇨🇭 DACH | 🇪🇺 Eurozone+GB | 🤖 Bot-Rate-Limiting") print(" 🛡️ Mit Cache-Validierung und Fail-Open") print(" 🚦 Rate-Limiting nach BOT-TYP (nicht IP)") + print(f" 🌐 IP-basierte Erkennung: {total_ip_ranges} Ranges für {len(BOT_IP_RANGES)} Bot(s)") print("=" * 60) while True: