geoip_shop_manager.py aktualisiert

This commit is contained in:
2025-12-16 16:07:52 +01:00
parent 85b7286dc6
commit 5ccbdb7f41

View File

@@ -8,6 +8,7 @@ Supports two modes:
- geoip: GeoIP blocking (only allowed regions can access)
- bot: Rate-limit bots by bot-type, shop remains globally accessible
v4.1.0: IP-basierte Bot-Erkennung (für Bots die sich tarnen)
v4.0.0: Bot-Rate-Limiting nach Bot-Typ (nicht IP), CrowdSec entfernt
"""
@@ -19,6 +20,7 @@ import json
import time
import re
import socket
import ipaddress
from datetime import datetime, timedelta
from pathlib import Path
@@ -85,6 +87,85 @@ GEO_REGIONS = {
}
}
# =============================================================================
# BOT IP RANGES - Für Bots die sich mit normalem User-Agent tarnen
# =============================================================================
BOT_IP_RANGES = {
# Alibaba Cloud / Alibaba Spider - tarnt sich oft mit normalem UA
'Alibaba-Bot': [
'43.104.26.0/24', # Bekanntes Alibaba-Crawling-Netz
'47.88.0.0/16', # Alibaba US
'47.89.0.0/16', # Alibaba US
'47.90.0.0/16', # Alibaba US
'47.91.0.0/16', # Alibaba US
'47.74.0.0/16', # Alibaba SG
'47.75.0.0/16', # Alibaba SG
'47.76.0.0/16', # Alibaba SG
'47.52.0.0/16', # Alibaba HK
'47.56.0.0/16', # Alibaba HK
'47.57.0.0/16', # Alibaba HK
'8.208.0.0/16', # Alibaba Cloud Global
'8.209.0.0/16', # Alibaba Cloud Global
'8.210.0.0/16', # Alibaba Cloud Global
'8.211.0.0/16', # Alibaba Cloud Global
'8.212.0.0/16', # Alibaba Cloud Global
'8.213.0.0/16', # Alibaba Cloud Global
'8.214.0.0/16', # Alibaba Cloud Global
'8.215.0.0/16', # Alibaba Cloud Global
'8.216.0.0/16', # Alibaba Cloud Global
'8.217.0.0/16', # Alibaba Cloud Global
'8.218.0.0/16', # Alibaba Cloud Global
'8.219.0.0/16', # Alibaba Cloud Global
'39.96.0.0/16', # Alibaba China
'39.97.0.0/16', # Alibaba China
'39.98.0.0/16', # Alibaba China
'39.99.0.0/16', # Alibaba China
'39.100.0.0/16', # Alibaba China
'39.101.0.0/16', # Alibaba China
'39.102.0.0/16', # Alibaba China
'39.103.0.0/16', # Alibaba China
'39.104.0.0/16', # Alibaba China
'39.105.0.0/16', # Alibaba China
'39.106.0.0/16', # Alibaba China
'39.107.0.0/16', # Alibaba China
'39.108.0.0/16', # Alibaba China
'101.132.0.0/16', # Alibaba China
'101.133.0.0/16', # Alibaba China
'106.14.0.0/16', # Alibaba China
'106.15.0.0/16', # Alibaba China
'112.124.0.0/16', # Alibaba China
'114.55.0.0/16', # Alibaba China
'115.28.0.0/16', # Alibaba China
'115.29.0.0/16', # Alibaba China
'116.62.0.0/16', # Alibaba China
'118.31.0.0/16', # Alibaba China
'119.23.0.0/16', # Alibaba China
'120.24.0.0/16', # Alibaba China
'120.25.0.0/16', # Alibaba China
'120.26.0.0/16', # Alibaba China
'120.27.0.0/16', # Alibaba China
'120.55.0.0/16', # Alibaba China
'120.76.0.0/16', # Alibaba China
'120.77.0.0/16', # Alibaba China
'120.78.0.0/16', # Alibaba China
'120.79.0.0/16', # Alibaba China
'121.40.0.0/16', # Alibaba China
'121.41.0.0/16', # Alibaba China
'121.42.0.0/16', # Alibaba China
'121.43.0.0/16', # Alibaba China
'121.196.0.0/16', # Alibaba China
'121.197.0.0/16', # Alibaba China
'121.198.0.0/16', # Alibaba China
'121.199.0.0/16', # Alibaba China
'139.196.0.0/16', # Alibaba China
'139.224.0.0/16', # Alibaba China
'140.205.0.0/16', # Alibaba China
'182.92.0.0/16', # Alibaba China
'203.107.0.0/16', # Alibaba DNS
'203.119.128.0/17', # Alibaba
],
}
# =============================================================================
# BOT DETECTION
# =============================================================================
@@ -248,6 +329,10 @@ BOT_PATTERNS = {
'Amazon-Kendra': r'amazon-kendra',
'AmazonBuyForMe': r'amazonbuyforme',
'AMZNKAssocBot': r'amznkassocbot',
# Alibaba - auch per User-Agent erkennbar (zusätzlich zu IP-Erkennung)
'Alibaba-Bot': r'alibaba|alibabagroup|aliyun|alicdn|alimama|taobao|tmall|1688\.com',
'AlibabaSpider': r'alibabaspider',
'Aliyun': r'aliyun',
'GeedoShopProductFinder': r'geedoshopproductfinder',
'Geedo': r'geedo',
'ShopWiki': r'shopwiki',
@@ -461,17 +546,38 @@ GENERIC_BOT_PATTERNS = [
]
def detect_bot(user_agent):
"""Erkennt Bots anhand des User-Agents. Gibt den Anzeigenamen zurück."""
def ip_in_cidr(ip_str, cidr_str):
"""Prüft ob eine IP in einem CIDR-Netz liegt."""
try:
ip = ipaddress.ip_address(ip_str)
network = ipaddress.ip_network(cidr_str, strict=False)
return ip in network
except ValueError:
return False
def detect_bot(user_agent, ip=None):
"""
Erkennt Bots anhand des User-Agents und/oder der IP.
IP-basierte Erkennung hat Priorität (für getarnte Bots).
Gibt den Anzeigenamen zurück.
"""
# SCHRITT 1: IP-basierte Erkennung (höchste Priorität)
if ip:
for bot_name, ip_ranges in BOT_IP_RANGES.items():
for cidr in ip_ranges:
if ip_in_cidr(ip, cidr):
return bot_name
if not user_agent or user_agent == 'Unknown':
return 'Unbekannt'
# Erst spezifische Patterns prüfen
# SCHRITT 2: Spezifische User-Agent Patterns
for bot_name, pattern in BOT_PATTERNS.items():
if re.search(pattern, user_agent, re.IGNORECASE):
return bot_name
# Dann generische Patterns als Fallback
# SCHRITT 3: Generische Patterns als Fallback
ua_lower = user_agent.lower()
for pattern in GENERIC_BOT_PATTERNS:
if pattern in ua_lower:
@@ -530,6 +636,16 @@ def generate_php_generic_patterns():
return ", ".join(patterns)
def generate_php_bot_ip_ranges():
"""Generiert PHP-Array für IP-basierte Bot-Erkennung."""
lines = []
for bot_name, ip_ranges in BOT_IP_RANGES.items():
safe_bot_name = bot_name.replace("'", "\\'")
ranges_str = ", ".join([f"'{r}'" for r in ip_ranges])
lines.append(f"'{safe_bot_name}' => [{ranges_str}]")
return ",\n ".join(lines)
# =============================================================================
# CACHE VALIDATION
# =============================================================================
@@ -700,6 +816,7 @@ if (!$is_allowed) {{
# =============================================================================
# PHP TEMPLATES - BOT RATE-LIMITING (By Bot-Type, not IP)
# Mit IP-basierter Bot-Erkennung für getarnte Bots
# =============================================================================
BOT_SCRIPT_TEMPLATE = '''<?php
@@ -708,6 +825,7 @@ BOT_SCRIPT_TEMPLATE = '''<?php
* Valid until: {expiry_date}
* Rate-limits known bots/crawlers BY BOT-TYPE (not by IP)
* All requests from the same bot-type share ONE counter
* Includes IP-based detection for bots that disguise their User-Agent
* Rate-Limit: {rate_limit} req/min, Ban: {ban_duration_min} min
*/
@@ -732,17 +850,45 @@ $user_agent = $_SERVER['HTTP_USER_AGENT'] ?? '';
if (!is_dir($bans_dir)) @mkdir($bans_dir, 0777, true);
if (!is_dir($counts_dir)) @mkdir($counts_dir, 0777, true);
// === Bot Detection ===
// === IP-in-CIDR Check Function ===
function ip_in_cidr($ip, $cidr) {{
if (strpos($cidr, '/') === false) return false;
list($subnet, $mask) = explode('/', $cidr);
$ip_long = ip2long($ip);
$subnet_long = ip2long($subnet);
if ($ip_long === false || $subnet_long === false) return false;
$mask_long = -1 << (32 - (int)$mask);
return ($ip_long & $mask_long) === ($subnet_long & $mask_long);
}}
// === Bot IP Ranges (für getarnte Bots) ===
$bot_ip_ranges = [
{bot_ip_ranges}
];
// === Bot Detection Patterns (User-Agent) ===
$bot_patterns = [
{bot_patterns}
];
$generic_patterns = [{generic_patterns}];
if (empty($user_agent)) return;
// === STEP 0: IP-basierte Bot-Erkennung (höchste Priorität) ===
$detected_bot = null;
if (!empty($visitor_ip)) {{
foreach ($bot_ip_ranges as $bot_name => $ip_ranges) {{
foreach ($ip_ranges as $cidr) {{
if (ip_in_cidr($visitor_ip, $cidr)) {{
$detected_bot = $bot_name;
break 2; // Aus beiden Schleifen ausbrechen
}}
}}
}}
}}
// === STEP 1: User-Agent-basierte Erkennung (falls IP nicht erkannt) ===
if ($detected_bot === null && !empty($user_agent)) {{
// Check specific patterns first
foreach ($bot_patterns as $bot_name => $pattern) {{
if (preg_match($pattern, $user_agent)) {{
@@ -761,6 +907,7 @@ if ($detected_bot === null) {{
}}
}}
}}
}}
// Not a bot - allow through without any rate limiting
if ($detected_bot === null) return;
@@ -768,7 +915,7 @@ if ($detected_bot === null) return;
// === Create hash based on BOT-TYPE only (not IP!) ===
$bot_hash = md5($detected_bot);
// === STEP 1: Check if this bot-type is banned ===
// === STEP 2: Check if this bot-type is banned ===
$ban_file = "$bans_dir/$bot_hash.ban";
if (file_exists($ban_file)) {{
$ban_content = @file_get_contents($ban_file);
@@ -787,7 +934,7 @@ if (file_exists($ban_file)) {{
@unlink($ban_file);
}}
// === STEP 2: Rate-Limit Check for this bot-type ===
// === STEP 3: Rate-Limit Check for this bot-type ===
$count_file = "$counts_dir/$bot_hash.count";
$current_time = time();
$count = 1;
@@ -823,7 +970,7 @@ if (file_exists($count_file)) {{
@file_put_contents($count_file, "$window_start|$count", LOCK_EX);
}}
// === STEP 3: Check if limit exceeded ===
// === STEP 4: Check if limit exceeded ===
if ($count > $rate_limit) {{
// Create ban for this bot-type (store timestamp|botname)
$ban_until = $current_time + $ban_duration;
@@ -840,12 +987,12 @@ if ($count > $rate_limit) {{
exit;
}}
// === STEP 4: Under limit - log and ALLOW through ===
// === STEP 5: Under limit - log and ALLOW through ===
$timestamp = date('Y-m-d H:i:s');
$uri = $_SERVER['REQUEST_URI'] ?? '/';
@file_put_contents($log_file, "[$timestamp] BOT: $detected_bot | IP: $visitor_ip | Count: $count/$rate_limit | URI: $uri\\n", FILE_APPEND | LOCK_EX);
// === STEP 5: Probabilistic cleanup ===
// === STEP 6: Probabilistic cleanup ===
if (rand(1, $cleanup_probability) === 1) {{
$now = time();
foreach (glob("$bans_dir/*.ban") as $f) {{
@@ -1163,6 +1310,7 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_
ratelimit_dir=RATELIMIT_DIR,
bot_patterns=generate_php_bot_patterns(),
generic_patterns=generate_php_generic_patterns(),
bot_ip_ranges=generate_php_bot_ip_ranges(),
rate_limit=rate_limit,
ban_duration=ban_duration,
ban_duration_min=ban_duration // 60
@@ -1213,6 +1361,9 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_
else:
add_shop_to_active(shop, mode, geo_region)
# Count IP ranges for bot detection
total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values())
if not silent:
print("\n" + "=" * 60)
print(f"{region_info['icon']} {region_info['name']} aktiviert")
@@ -1223,12 +1374,14 @@ def activate_blocking(shop, silent=False, mode="geoip", geo_region="dach", rate_
print(f" 🛡️ Fail-Open: Bei Cache-Fehlern wird Traffic durchgelassen")
else:
print(f" 🤖 {len(BOT_PATTERNS)} Bot-Patterns + {len(GENERIC_BOT_PATTERNS)} generische Patterns")
print(f" 🌐 {total_ip_ranges} IP-Ranges für {len(BOT_IP_RANGES)} getarnte Bots")
if rate_limit == 0:
print(f" 🚫 Rate-Limit: 0 (Bots werden SOFORT gebannt!)")
else:
print(f" 🚦 Rate-Limit: {rate_limit} req/min PRO BOT-TYP")
print(f" ⏱️ Ban-Dauer: {ban_duration // 60} min")
print(f" Alle Googlebot-Requests teilen sich EIN Limit!")
print(f" Alle Alibaba-IPs teilen sich EIN Limit!")
print(f" Gültig bis: {expiry.strftime('%Y-%m-%d %H:%M:%S CET')}")
print("=" * 60)
@@ -1378,6 +1531,7 @@ def activate_all_shops():
ratelimit_dir=RATELIMIT_DIR,
bot_patterns=generate_php_bot_patterns(),
generic_patterns=generate_php_generic_patterns(),
bot_ip_ranges=generate_php_bot_ip_ranges(),
rate_limit=rate_limit,
ban_duration=ban_duration,
ban_duration_min=ban_duration // 60
@@ -1399,7 +1553,8 @@ def activate_all_shops():
if bot_mode:
add_shop_to_active(shop, mode, geo_region, rate_limit, ban_duration)
print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Bot-Patterns, {rate_limit} req/min)")
total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values())
print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Patterns, {total_ip_ranges} IP-Ranges, {rate_limit} req/min)")
else:
print(f" ⏳ Cache generieren...")
cache_ok, count, _ = generate_and_validate_cache(httpdocs, geo_region)
@@ -1526,6 +1681,7 @@ def activate_direct_shops_only():
ratelimit_dir=RATELIMIT_DIR,
bot_patterns=generate_php_bot_patterns(),
generic_patterns=generate_php_generic_patterns(),
bot_ip_ranges=generate_php_bot_ip_ranges(),
rate_limit=rate_limit,
ban_duration=ban_duration,
ban_duration_min=ban_duration // 60
@@ -1547,7 +1703,8 @@ def activate_direct_shops_only():
if bot_mode:
add_shop_to_active(shop, mode, geo_region, rate_limit, ban_duration)
print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Bot-Patterns, {rate_limit} req/min)")
total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values())
print(f" ✅ Aktiviert ({len(BOT_PATTERNS)} Patterns, {total_ip_ranges} IP-Ranges, {rate_limit} req/min)")
else:
print(f" ⏳ Cache generieren...")
cache_ok, count, _ = generate_and_validate_cache(httpdocs, geo_region)
@@ -1671,7 +1828,7 @@ def get_shop_log_stats(shop):
if detected_bot:
bots[detected_bot] = bots.get(detected_bot, 0) + 1
elif ua and ua != 'Unknown':
bot_name = detect_bot(ua)
bot_name = detect_bot(ua, ip)
if bot_name != 'Unbekannt':
bots[bot_name] = bots.get(bot_name, 0) + 1
@@ -1724,7 +1881,9 @@ def show_logs(shop):
print(f"✅ Cache: {count:,} Ranges" if valid else f"⚠️ Cache: {err}")
else:
rate_limit, ban_duration = get_shop_rate_limit_config(shop)
total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values())
print(f"🤖 Bot-Patterns: {len(BOT_PATTERNS)} + {len(GENERIC_BOT_PATTERNS)} generische")
print(f"🌐 IP-basierte Erkennung: {total_ip_ranges} Ranges für {len(BOT_IP_RANGES)} Bot(s)")
if rate_limit is not None and ban_duration is not None:
print(f"🚦 Rate-Limit: {rate_limit} req/min PRO BOT-TYP, Ban: {ban_duration // 60} min")
print(f"🚫 Bans: {total_bans} ausgelöst, {active_bans} Bot-Typen aktuell gebannt")
@@ -1746,7 +1905,7 @@ def show_logs(shop):
if ips:
print(f"\n🔥 Top 10 IPs:")
for ip, data in sorted(ips.items(), key=lambda x: x[1]['count'], reverse=True)[:10]:
bot = data.get('bot') or detect_bot(data['ua'])
bot = data.get('bot') or detect_bot(data['ua'], ip)
print(f" {ip} ({bot}): {data['count']}x")
@@ -1882,7 +2041,7 @@ def show_all_logs():
for ip, data in sorted_ips:
count = data['count']
ua = data['ua']
bot_name = data.get('bot') or detect_bot(ua)
bot_name = data.get('bot') or detect_bot(ua, ip)
shops_data = data['shops']
ip_req_min = count / total_minutes if total_minutes > 0 else 0
@@ -1920,11 +2079,14 @@ def show_all_logs():
def main():
total_ip_ranges = sum(len(ranges) for ranges in BOT_IP_RANGES.values())
print("\n" + "=" * 60)
print(" GeoIP Shop Blocker Manager v4.0.0")
print(" GeoIP Shop Blocker Manager v4.1.0")
print(" 🇩🇪🇦🇹🇨🇭 DACH | 🇪🇺 Eurozone+GB | 🤖 Bot-Rate-Limiting")
print(" 🛡️ Mit Cache-Validierung und Fail-Open")
print(" 🚦 Rate-Limiting nach BOT-TYP (nicht IP)")
print(f" 🌐 IP-basierte Erkennung: {total_ip_ranges} Ranges für {len(BOT_IP_RANGES)} Bot(s)")
print("=" * 60)
while True: