diff --git a/jtl-wafi-agent.py b/jtl-wafi-agent.py index 97eeb74..9ddace4 100644 --- a/jtl-wafi-agent.py +++ b/jtl-wafi-agent.py @@ -3139,6 +3139,117 @@ def deactivate_blocking(shop: str, silent: bool = True) -> bool: # ============================================================================= # SHOP LOG STATS # ============================================================================= + +def levenshtein_similarity(s1: str, s2: str) -> float: + """ + Berechnet die Ähnlichkeit zweier Strings basierend auf Levenshtein-Distanz. + Returns: Float zwischen 0.0 (komplett unterschiedlich) und 1.0 (identisch) + """ + if s1 == s2: + return 1.0 + len1, len2 = len(s1), len(s2) + if len1 == 0 or len2 == 0: + return 0.0 + + # Optimierung: Bei sehr unterschiedlicher Länge ist Ähnlichkeit gering + if abs(len1 - len2) > max(len1, len2) * 0.5: + return 0.0 + + # Levenshtein-Distanz berechnen (dynamische Programmierung) + if len1 > len2: + s1, s2, len1, len2 = s2, s1, len2, len1 + + current_row = range(len1 + 1) + for i in range(1, len2 + 1): + previous_row, current_row = current_row, [i] + [0] * len1 + for j in range(1, len1 + 1): + add, delete, change = previous_row[j] + 1, current_row[j-1] + 1, previous_row[j-1] + if s1[j-1] != s2[i-1]: + change += 1 + current_row[j] = min(add, delete, change) + + distance = current_row[len1] + max_len = max(len1, len2) + return 1.0 - (distance / max_len) + + +def group_similar_urls(urls: Dict[str, int], similarity_threshold: float = 0.85) -> Dict[str, int]: + """ + Gruppiert ähnliche URLs basierend auf Hybrid-Ansatz: + 1. Vorfiltern nach Präfix (erstes Pfad-Segment) + 2. Innerhalb der Gruppe: Levenshtein für kleine Gruppen + + Args: + urls: Dict mit {url: count} + similarity_threshold: Mindest-Ähnlichkeit für Gruppierung (0.0-1.0) + + Returns: + Dict mit {repräsentative_url: summierter_count} + """ + if not urls: + return {} + + # Schritt 1: Nach erstem Pfad-Segment gruppieren + prefix_groups = {} + for url, count in urls.items(): + # Präfix extrahieren (erstes Segment oder bis zum ersten ?) + path = url.split('?')[0] if '?' in url else url + segments = path.strip('/').split('/') + prefix = segments[0][:20] if segments and segments[0] else '/' + + if prefix not in prefix_groups: + prefix_groups[prefix] = [] + prefix_groups[prefix].append((url, count)) + + # Schritt 2: Innerhalb jeder Präfix-Gruppe ähnliche URLs zusammenfassen + result = {} + + for prefix, url_list in prefix_groups.items(): + # Sortieren nach Count (häufigste zuerst) + url_list.sort(key=lambda x: x[1], reverse=True) + + # Für kleine Gruppen: Levenshtein-Gruppierung + if len(url_list) <= 100: + clusters = [] # Liste von (repräsentant, total_count, [urls]) + + for url, count in url_list: + # Versuche, zu einem bestehenden Cluster hinzuzufügen + added = False + for cluster in clusters: + rep_url = cluster[0] + similarity = levenshtein_similarity(url, rep_url) + if similarity >= similarity_threshold: + cluster[1] += count + cluster[2].append(url) + added = True + break + + if not added: + # Neuer Cluster + clusters.append([url, count, [url]]) + + # Cluster-Repräsentanten als Ergebnis + for rep_url, total_count, members in clusters: + # Wenn mehrere URLs zusammengefasst wurden, markiere mit * + if len(members) > 1: + display_url = rep_url.split('?')[0] # Query-Parameter entfernen + if len(display_url) > 50: + display_url = display_url[:47] + '...' + display_url += f' ({len(members)}x)' + else: + display_url = rep_url + if len(display_url) > 60: + display_url = display_url[:57] + '...' + result[display_url] = total_count + else: + # Für große Gruppen: Einfache Präfix-Gruppierung + total = sum(c for _, c in url_list) + display_prefix = f'/{prefix}/* ({len(url_list)} URLs)' + result[display_prefix] = total + + return result + + def get_shop_log_stats(shop: str) -> Dict[str, Any]: """ Sammelt Statistiken aus dem Shop-Log (v2.5). @@ -3165,8 +3276,9 @@ def get_shop_log_stats(shop: str) -> Dict[str, Any]: 'unique_bots': 0, 'unique_countries': 0, 'top_bots': {}, - 'top_ips': {}, + 'top_ips': [], 'top_countries': {}, + 'top_requests': {}, 'human_requests': 0, 'bot_requests': 0 } @@ -3174,6 +3286,7 @@ def get_shop_log_stats(shop: str) -> Dict[str, Any]: ips = {} bots = {} countries = {} + uris = {} # Log-Datei auswerten if os.path.isfile(log_file): @@ -3235,6 +3348,17 @@ def get_shop_log_stats(shop: str) -> Dict[str, Any]: except: pass + # URI extrahieren + uri = None + if 'URI: ' in line: + try: + uri = line.split('URI: ')[1].split(' |')[0].strip() + # Leere URIs ignorieren + if uri and uri != '/': + uris[uri] = uris.get(uri, 0) + 1 + except: + pass + # Statistiken sammeln if ip: ips[ip] = ips.get(ip, 0) + 1 @@ -3293,18 +3417,34 @@ def get_shop_log_stats(shop: str) -> Dict[str, Any]: stats['unique_bots'] = len(bots) stats['unique_countries'] = len(countries) - # Top Bots (max 10) - sorted_bots = sorted(bots.items(), key=lambda x: x[1], reverse=True)[:10] + # Top Bots (max 20) + sorted_bots = sorted(bots.items(), key=lambda x: x[1], reverse=True)[:20] stats['top_bots'] = dict(sorted_bots) - # Top IPs (max 10) - sorted_ips = sorted(ips.items(), key=lambda x: x[1], reverse=True)[:10] - stats['top_ips'] = dict(sorted_ips) + # Top IPs (max 20) - mit Country-Info + sorted_ips = sorted(ips.items(), key=lambda x: x[1], reverse=True)[:20] + top_ips_list = [] + for ip, count in sorted_ips: + ip_info = get_ip_info(ip) + top_ips_list.append({ + 'ip': ip, + 'count': count, + 'country': ip_info.get('countryCode', 'XX'), + 'org': ip_info.get('org', '') or ip_info.get('isp', ''), + 'asn': ip_info.get('as', '') + }) + stats['top_ips'] = top_ips_list - # Top Countries (max 10) - sorted_countries = sorted(countries.items(), key=lambda x: x[1], reverse=True)[:10] + # Top Countries (max 20) + sorted_countries = sorted(countries.items(), key=lambda x: x[1], reverse=True)[:20] stats['top_countries'] = dict(sorted_countries) + # Top Requests (max 20) - mit Ähnlichkeits-Gruppierung + if uris: + grouped_uris = group_similar_urls(uris, similarity_threshold=0.85) + sorted_uris = sorted(grouped_uris.items(), key=lambda x: x[1], reverse=True)[:20] + stats['top_requests'] = dict(sorted_uris) + # Req/min berechnen activation_time = get_shop_activation_time(shop) if activation_time and stats['log_entries'] > 0: diff --git a/jtl-wafi-dashboard.py b/jtl-wafi-dashboard.py index 7dc33db..c140bf2 100644 --- a/jtl-wafi-dashboard.py +++ b/jtl-wafi-dashboard.py @@ -130,8 +130,9 @@ class ShopData: unique_bots: int = 0 unique_countries: int = 0 top_bots: Dict[str, int] = field(default_factory=dict) - top_ips: Dict[str, int] = field(default_factory=dict) + top_ips: List[Dict] = field(default_factory=list) top_countries: Dict[str, int] = field(default_factory=dict) + top_requests: Dict[str, int] = field(default_factory=dict) human_requests: int = 0 bot_requests: int = 0 human_rpm: float = 0.0 @@ -314,8 +315,9 @@ class DataStore: shop.unique_bots = stats.get('unique_bots', 0) shop.unique_countries = stats.get('unique_countries', 0) shop.top_bots = stats.get('top_bots', {}) - shop.top_ips = stats.get('top_ips', {}) + shop.top_ips = stats.get('top_ips', []) shop.top_countries = stats.get('top_countries', {}) + shop.top_requests = stats.get('top_requests', {}) shop.human_requests = stats.get('human_requests', 0) shop.bot_requests = stats.get('bot_requests', 0) shop.human_rpm = stats.get('human_rpm', 0.0) @@ -355,6 +357,7 @@ class DataStore: shop.top_bots = stats.get('top_bots', shop.top_bots) shop.top_ips = stats.get('top_ips', shop.top_ips) shop.top_countries = stats.get('top_countries', shop.top_countries) + shop.top_requests = stats.get('top_requests', shop.top_requests) shop.human_requests = stats.get('human_requests', shop.human_requests) shop.bot_requests = stats.get('bot_requests', shop.bot_requests) shop.human_rpm = stats.get('human_rpm', shop.human_rpm) @@ -435,6 +438,7 @@ class DataStore: 'top_bots': shop.top_bots, 'top_ips': shop.top_ips, 'top_countries': shop.top_countries, + 'top_requests': shop.top_requests, 'human_requests': shop.human_requests, 'bot_requests': shop.bot_requests, 'human_rpm': shop.human_rpm, @@ -1567,6 +1571,15 @@ def get_dashboard_html() -> str: .detail-section-title { font-size: 14px; font-weight: 600; margin-bottom: 12px; color: var(--text-secondary); } .bot-list { max-height: 200px; overflow-y: auto; } .bot-item { display: flex; justify-content: space-between; padding: 8px 12px; background: var(--bg-card); border-radius: 6px; margin-bottom: 4px; font-size: 13px; } + .ip-list { max-height: 300px; overflow-y: auto; } + .ip-list-item { display: flex; justify-content: space-between; align-items: center; padding: 8px 12px; background: var(--bg-card); border-radius: 6px; margin-bottom: 4px; font-size: 13px; gap: 8px; } + .ip-list-item .ip-info { flex: 1; display: flex; flex-direction: column; gap: 2px; } + .ip-list-item .ip-addr { font-family: monospace; color: var(--accent); text-decoration: none; } + .ip-list-item .ip-addr:hover { text-decoration: underline; } + .ip-list-item .ip-meta { font-size: 11px; color: var(--text-secondary); } + .ip-list-item .ip-count { font-weight: 600; color: var(--warning); min-width: 40px; text-align: right; } + .ip-list-item .ip-actions { display: flex; gap: 4px; } + .ip-list-item .ip-actions button { padding: 4px 8px; font-size: 11px; } .chart-container { background: var(--bg-card); border: 1px solid var(--border); border-radius: 8px; padding: 16px; height: 280px; } .chart-legend { display: flex; flex-wrap: wrap; gap: 12px; margin-top: 12px; font-size: 11px; } .legend-item { display: flex; align-items: center; gap: 4px; } @@ -1676,7 +1689,7 @@ def get_dashboard_html() -> str:
- +