Files
deep-shop-strace-analyse/straceanalyse.py

600 lines
26 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
JTL-Shop Performance Analyzer - Analysiert ALLE PHP-FPM Prozesse
"""
import subprocess
import re
import sys
import os
from collections import Counter, defaultdict
from datetime import datetime
class ShopPerformanceAnalyzer:
def __init__(self, domain):
self.domain = domain
self.results = {
'missing_files': Counter(),
'syscalls': Counter(),
'mysql_queries': [],
'redis_operations': Counter(),
'slow_operations': [],
'file_paths': Counter(),
'errors': Counter()
}
self.debug = False # Weniger Output bei vielen Prozessen
self.output_dir = f"/root/shop_analysis_{domain}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
def get_php_fpm_pids(self):
"""Finde alle PHP-FPM PIDs für den Shop"""
try:
cmd = f"ps aux | grep 'php-fpm: pool {self.domain}' | grep -v grep | awk '{{print $2}}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
pids = [int(pid) for pid in result.stdout.strip().split('\n') if pid]
return pids
except Exception as e:
print(f"❌ Fehler beim Finden der PIDs: {e}")
return []
def run_strace(self, pid, duration=5):
"""Führe strace auf einem Prozess aus"""
try:
cmd = [
'strace',
'-p', str(pid),
'-f',
'-s', '300',
'-e', 'trace=all',
'-T'
]
result = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
try:
stdout, stderr = result.communicate(timeout=duration)
except subprocess.TimeoutExpired:
result.kill()
stdout, stderr = result.communicate()
output = stderr if stderr else stdout
return output
except Exception as e:
return ""
def analyze_strace_output(self, output):
"""Analysiere strace Output"""
if not output or len(output) < 10:
return
lines = output.split('\n')
for line in lines:
if not line.strip():
continue
syscall_match = re.match(r'^(\w+)\(', line)
if syscall_match:
self.results['syscalls'][syscall_match.group(1)] += 1
if 'ENOENT' in line:
patterns = [
r'"([^"]+)".*ENOENT',
r'newfstatat\(.*?"([^"]+)".*ENOENT',
r'openat\(.*?"([^"]+)".*ENOENT',
]
for pattern in patterns:
file_match = re.search(pattern, line)
if file_match:
filepath = file_match.group(1)
self.results['missing_files'][filepath] += 1
self.results['errors']['ENOENT'] += 1
break
if 'EAGAIN' in line:
self.results['errors']['EAGAIN'] += 1
if any(keyword in line for keyword in ['SELECT', 'UPDATE', 'INSERT', 'DELETE']):
query_match = re.search(r'(SELECT|UPDATE|INSERT|DELETE)[^"]*', line)
if query_match:
query = query_match.group(0)[:100]
self.results['mysql_queries'].append(query)
redis_ops = ['SADD', 'GET', 'SET', 'HGET', 'HSET', 'EXEC', 'MULTI', 'QUEUED']
for op in redis_ops:
if op in line:
self.results['redis_operations'][op] += 1
path_match = re.search(r'"(/var/www/vhosts/[^"]+)"', line)
if path_match:
path = path_match.group(1)
if any(x in path for x in ['media', 'image', 'templates', 'cache']):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
self.results['file_paths'][short_path] += 1
if 'ImageMagick' in line or 'locale.xml' in line:
self.results['slow_operations'].append('ImageMagick/Config-Load')
if 'mremap' in line or 'mmap' in line:
size_match = re.search(r'(\d+)', line)
if size_match and int(size_match.group(1)) > 1000000:
self.results['slow_operations'].append('Large Memory Operation')
if 'poll' in line or 'select' in line:
time_match = re.search(r'<([\d.]+)>', line)
if time_match and float(time_match.group(1)) > 0.1:
self.results['slow_operations'].append(f'Slow I/O Wait ({time_match.group(1)}s)')
def export_missing_files(self):
"""Exportiere fehlende Dateien in verschiedene Formate"""
if not self.results['missing_files']:
return None
os.makedirs(self.output_dir, exist_ok=True)
# 1. Komplette Liste (sortiert nach Häufigkeit)
list_file = os.path.join(self.output_dir, 'missing_files_all.txt')
with open(list_file, 'w') as f:
f.write(f"# Fehlende Dateien für {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"# Total: {len(self.results['missing_files'])} Dateien\n")
f.write(f"# Zugriffe: {sum(self.results['missing_files'].values())}\n")
f.write("#" + "="*70 + "\n\n")
for filepath, count in self.results['missing_files'].most_common():
f.write(f"[{count:4d}x] {filepath}\n")
# 2. Nach Kategorie sortiert
category_file = os.path.join(self.output_dir, 'missing_files_by_category.txt')
with open(category_file, 'w') as f:
f.write(f"# Fehlende Dateien nach Kategorie - {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
categories = defaultdict(list)
for filepath, count in self.results['missing_files'].items():
if 'manufacturer' in filepath:
categories['Hersteller-Bilder'].append((filepath, count))
elif 'product' in filepath or 'artikel' in filepath:
categories['Produkt-Bilder'].append((filepath, count))
elif 'variation' in filepath:
categories['Variationen-Bilder'].append((filepath, count))
elif 'ImageMagick' in filepath:
categories['ImageMagick Config'].append((filepath, count))
elif 'category' in filepath or 'kategorie' in filepath:
categories['Kategorie-Bilder'].append((filepath, count))
else:
categories['Sonstige'].append((filepath, count))
for category, items in sorted(categories.items()):
total_accesses = sum(count for _, count in items)
f.write(f"\n{'='*70}\n")
f.write(f"{category.upper()}\n")
f.write(f"{'='*70}\n")
f.write(f"Anzahl Dateien: {len(items)}\n")
f.write(f"Zugriffe gesamt: {total_accesses}\n\n")
for filepath, count in sorted(items, key=lambda x: x[1], reverse=True):
f.write(f"[{count:4d}x] {filepath}\n")
# 3. Bash Script zum Erstellen von Platzhaltern
script_file = os.path.join(self.output_dir, 'create_placeholders.sh')
with open(script_file, 'w') as f:
f.write("#!/bin/bash\n")
f.write(f"# Auto-generated Script zum Erstellen fehlender Bilder\n")
f.write(f"# Domain: {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write('set -e\n\n')
f.write(f'SHOP_ROOT="/var/www/vhosts/{self.domain}/httpdocs"\n')
f.write('PLACEHOLDER="$SHOP_ROOT/gfx/keinBild.gif"\n\n')
f.write('if [ ! -f "$PLACEHOLDER" ]; then\n')
f.write(' echo "❌ Fehler: Placeholder nicht gefunden: $PLACEHOLDER"\n')
f.write(' exit 1\n')
f.write('fi\n\n')
f.write('echo "Erstelle fehlende Dateien..."\n')
f.write('CREATED=0\n')
f.write('SKIPPED=0\n\n')
# Nur Bilder
image_files = [fp for fp in self.results['missing_files'].keys()
if any(ext in fp.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp'])]
dirs_created = set()
for filepath in image_files:
if filepath.startswith('/var/www/vhosts'):
dirname = os.path.dirname(filepath)
if dirname not in dirs_created:
f.write(f'mkdir -p "{dirname}" 2>/dev/null || true\n')
dirs_created.add(dirname)
f.write(f'if [ ! -f "{filepath}" ]; then\n')
f.write(f' cp "$PLACEHOLDER" "{filepath}" 2>/dev/null && ((CREATED++)) || true\n')
f.write(f'else\n')
f.write(f' ((SKIPPED++))\n')
f.write(f'fi\n')
f.write('\necho ""\n')
f.write('echo "✅ Fertig!"\n')
f.write('echo " Erstellt: $CREATED Platzhalter"\n')
f.write('echo " Übersprungen: $SKIPPED (existieren bereits)"\n')
f.write(f'echo " Total Dateien: {len(image_files)}"\n')
os.chmod(script_file, 0o755)
# 4. CSV Export
csv_file = os.path.join(self.output_dir, 'missing_files.csv')
with open(csv_file, 'w') as f:
f.write("Zugriffe,Kategorie,Dateipfad\n")
for filepath, count in self.results['missing_files'].most_common():
if 'manufacturer' in filepath:
category = 'Hersteller'
elif 'product' in filepath or 'artikel' in filepath:
category = 'Produkt'
elif 'variation' in filepath:
category = 'Variation'
elif 'category' in filepath or 'kategorie' in filepath:
category = 'Kategorie'
else:
category = 'Sonstig'
filepath_safe = filepath.replace('"', '""')
f.write(f'{count},{category},"{filepath_safe}"\n')
# 5. Hersteller IDs extrahieren
manufacturer_file = os.path.join(self.output_dir, 'missing_manufacturer_ids.txt')
manufacturer_ids = set()
for filepath in self.results['missing_files'].keys():
if 'manufacturer' in filepath:
match = re.search(r'manufacturer/(\d+)/', filepath)
if match:
manufacturer_ids.add(match.group(1))
if manufacturer_ids:
with open(manufacturer_file, 'w') as f:
f.write(f"# Hersteller IDs mit fehlenden Bildern\n")
f.write(f"# Total: {len(manufacturer_ids)} Hersteller\n")
f.write(f"# Verwendung: Im JTL-Shop Admin diese Hersteller prüfen\n\n")
for mid in sorted(manufacturer_ids, key=int):
f.write(f"{mid}\n")
# 6. Nur Dateipfade (für weitere Verarbeitung)
paths_only_file = os.path.join(self.output_dir, 'missing_files_paths_only.txt')
with open(paths_only_file, 'w') as f:
for filepath in self.results['missing_files'].keys():
f.write(f"{filepath}\n")
return self.output_dir
def generate_report(self):
"""Generiere Analyse-Report"""
print("\n" + "="*80)
print(f"🔍 PERFORMANCE ANALYSE: {self.domain}")
print(f"📅 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80 + "\n")
total_syscalls = sum(self.results['syscalls'].values())
if total_syscalls == 0:
print("⚠️ WARNUNG: Keine Syscalls aufgezeichnet!")
print(" Mögliche Gründe:")
print(" - Prozesse sind gerade idle (wenig Traffic)")
print(" - Strace hat keine Berechtigung")
print(" - Prozesse wurden zwischen Analyse beendet\n")
print(" Versuche: python3 script.py spiel-und-modellbau.com 10\n")
return
print("📊 SYSCALL STATISTIK")
print("-" * 80)
for syscall, count in self.results['syscalls'].most_common(15):
percentage = (count / total_syscalls * 100) if total_syscalls > 0 else 0
bar = '' * int(percentage / 2)
print(f" {syscall:20s}: {count:6d} ({percentage:5.1f}%) {bar}")
print()
# Fehlende Dateien
missing_count = len(self.results['missing_files'])
if missing_count > 0:
print("❌ FEHLENDE DATEIEN (ENOENT)")
print("-" * 80)
print(f" ⚠️ {missing_count} verschiedene Dateien nicht gefunden!")
print(f" ⚠️ {sum(self.results['missing_files'].values())} Zugriffe auf nicht-existierende Dateien!\n")
# Kategorien
categories = defaultdict(int)
for filepath in self.results['missing_files'].keys():
if 'manufacturer' in filepath:
categories['Hersteller-Bilder'] += 1
elif 'product' in filepath or 'artikel' in filepath:
categories['Produkt-Bilder'] += 1
elif 'variation' in filepath:
categories['Variationen-Bilder'] += 1
elif 'category' in filepath or 'kategorie' in filepath:
categories['Kategorie-Bilder'] += 1
else:
categories['Sonstige'] += 1
print(" Kategorien:")
for category, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
print(f"{category:25s}: {count:4d} Dateien")
print()
print(" Top 15 fehlende Dateien:")
for path, count in self.results['missing_files'].most_common(15):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
print(f" [{count:3d}x] {short_path}")
if len(self.results['missing_files']) > 15:
print(f"\n ... und {len(self.results['missing_files'])-15} weitere")
print(f" Vollständige Liste siehe Export-Dateien!")
print()
# Errors
if self.results['errors']:
print("⚠️ FEHLER")
print("-" * 80)
for error, count in self.results['errors'].items():
print(f" {error:20s}: {count:6d}x")
print()
# MySQL Queries
if self.results['mysql_queries']:
print("🗄️ MYSQL QUERIES")
print("-" * 80)
query_counter = Counter(self.results['mysql_queries'])
print(f" Total Queries: {len(self.results['mysql_queries'])}")
print(f" Unique Queries: {len(query_counter)}")
print("\n Häufigste Queries:")
for query, count in query_counter.most_common(10):
print(f" [{count:3d}x] {query[:70]}...")
print()
# File Paths
if self.results['file_paths']:
print("📁 HÄUFIGSTE DATEIZUGRIFFE")
print("-" * 80)
for path, count in self.results['file_paths'].most_common(15):
print(f" [{count:3d}x] {path}")
print()
# Redis
if self.results['redis_operations']:
print("🔴 REDIS OPERATIONEN")
print("-" * 80)
total_redis = sum(self.results['redis_operations'].values())
for op, count in self.results['redis_operations'].most_common():
percentage = (count / total_redis * 100) if total_redis > 0 else 0
print(f" {op:15s}: {count:6d}x ({percentage:5.1f}%)")
print()
# Slow Operations
if self.results['slow_operations']:
print("🐌 LANGSAME OPERATIONEN")
print("-" * 80)
slow_counter = Counter(self.results['slow_operations'])
for op, count in slow_counter.items():
print(f" ⚠️ {op}: {count}x")
print()
# Export fehlende Dateien
if missing_count > 0:
print("="*80)
print("💾 EXPORTIERE FEHLENDE DATEIEN")
print("="*80 + "\n")
export_dir = self.export_missing_files()
if export_dir:
print(f"✅ Dateien exportiert nach: {export_dir}\n")
print(" Erstellt:")
print(f" 📄 missing_files_all.txt - Komplette Liste (sortiert nach Häufigkeit)")
print(f" 📁 missing_files_by_category.txt - Nach Kategorie gruppiert")
print(f" 📊 missing_files.csv - CSV für Excel")
print(f" 🔧 create_placeholders.sh - Bash Script (ausführbar)")
print(f" 🏷️ missing_manufacturer_ids.txt - Hersteller IDs")
print(f" 📝 missing_files_paths_only.txt - Nur Pfade (für Scripts)\n")
print(" Quick-Fix:")
print(f" bash {export_dir}/create_placeholders.sh\n")
# Handlungsempfehlungen
self.generate_recommendations(total_syscalls, missing_count)
def generate_recommendations(self, total_syscalls, missing_count):
"""Generiere Handlungsempfehlungen"""
print("="*80)
print("💡 HANDLUNGSEMPFEHLUNGEN FÜR DEN KUNDEN")
print("="*80 + "\n")
recommendations = []
priority = 1
if missing_count > 5:
manufacturer_missing = sum(1 for p in self.results['missing_files'] if 'manufacturer' in p)
product_missing = sum(1 for p in self.results['missing_files'] if 'product' in p or 'artikel' in p)
if manufacturer_missing > 0:
recommendations.append({
'priority': priority,
'severity': '🔥 KRITISCH',
'problem': f'{manufacturer_missing} Hersteller-Bilder fehlen',
'impact': f'Jedes fehlende Bild = 6-8 stat() Calls. Bei {sum(v for k,v in self.results["missing_files"].items() if "manufacturer" in k)} Zugriffen!',
'solution': '1. JTL-Shop Admin einloggen\n2. Bilder → Hersteller-Bilder → "Fehlende generieren"\n3. ODER: Bash Script ausführen (siehe Export)',
'files': [f"{self.output_dir}/create_placeholders.sh",
f"{self.output_dir}/missing_manufacturer_ids.txt"]
})
priority += 1
if product_missing > 0:
recommendations.append({
'priority': priority,
'severity': '⚠️ WICHTIG',
'problem': f'{product_missing} Produkt-Bilder fehlen',
'impact': 'Erhöhte I/O Last',
'solution': 'JTL-Shop Admin → Bilder → "Bildcache regenerieren"'
})
priority += 1
stat_calls = sum(count for syscall, count in self.results['syscalls'].items()
if 'stat' in syscall.lower())
if stat_calls > 500:
recommendations.append({
'priority': priority,
'severity': '⚠️ WICHTIG',
'problem': f'{stat_calls} Filesystem stat() Calls',
'impact': 'Filesystem-Thrashing, langsame Response-Times',
'solution': 'PHP Realpath Cache erhöhen in PHP-Einstellungen',
'technical': 'Plesk → Domain → PHP Settings:\nrealpath_cache_size = 4096K\nrealpath_cache_ttl = 600'
})
priority += 1
imagemagick_count = sum(1 for op in self.results['slow_operations'] if 'ImageMagick' in op)
if imagemagick_count > 3:
recommendations.append({
'priority': priority,
'severity': '🔥 KRITISCH',
'problem': f'ImageMagick wird {imagemagick_count}x aufgerufen',
'impact': 'CPU-intensive Bildverarbeitung bei jedem Request!',
'solution': '1. Bild-Cache in JTL-Shop aktivieren\n2. Alle Bildgrößen vorher generieren\n3. Prüfen ob Bilder wirklich vorhanden sind'
})
priority += 1
if len(self.results['mysql_queries']) > 50:
recommendations.append({
'priority': priority,
'severity': '⚠️ WICHTIG',
'problem': f'{len(self.results["mysql_queries"])} MySQL Queries',
'impact': 'N+1 Query Problem, Database Overhead',
'solution': 'JTL-Shop: System → Cache → Object Cache aktivieren (Redis)'
})
priority += 1
eagain_count = self.results['errors'].get('EAGAIN', 0)
if eagain_count > 100:
recommendations.append({
'priority': priority,
'severity': '⚠️ WICHTIG',
'problem': f'{eagain_count}x EAGAIN',
'impact': 'Redis/MySQL Verbindungen überlastet',
'solution': 'Redis Connection Pool erhöhen oder PHP-FPM Worker erhöhen'
})
priority += 1
if recommendations:
for rec in recommendations:
print(f"{rec['severity']} PRIORITÄT {rec['priority']}: {rec['problem']}")
print(f" 📊 Impact: {rec['impact']}")
print(f" ✅ Lösung: {rec['solution']}")
if 'files' in rec:
print(f" 📁 Dateien:")
for file in rec['files']:
print(f"{file}")
if 'technical' in rec:
lines = rec['technical'].split('\n')
print(f" 🔧 Technisch:")
for line in lines:
print(f" {line}")
print()
else:
print("✅ Keine kritischen Probleme gefunden!\n")
print("="*80)
print("📋 ZUSAMMENFASSUNG")
print("="*80)
print(f" • Total Syscalls: {total_syscalls}")
print(f" • Fehlende Dateien: {missing_count}")
print(f" • MySQL Queries: {len(self.results['mysql_queries'])}")
print(f" • Redis Operations: {sum(self.results['redis_operations'].values())}")
print(f" • Handlungsempfehlungen: {len(recommendations)}")
if missing_count > 0:
print(f"\n 📁 Export-Verzeichnis: {self.output_dir}")
print()
def main():
if len(sys.argv) < 2:
print("\n" + "="*80)
print("JTL-Shop Performance Analyzer")
print("="*80)
print("\nUsage: python3 shop_analyzer.py <domain> [duration] [max_processes]")
print("\nExamples:")
print(" python3 shop_analyzer.py spiel-und-modellbau.com")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10 20 # Max 20 Prozesse")
print("\nParameter:")
print(" domain - Shop Domain")
print(" duration - Sekunden pro Prozess (default: 5)")
print(" max_processes - Max Anzahl Prozesse (default: alle)")
print()
sys.exit(1)
domain = sys.argv[1]
duration = int(sys.argv[2]) if len(sys.argv) > 2 else 5
max_processes = int(sys.argv[3]) if len(sys.argv) > 3 else None
print(f"\n🚀 Starte Performance-Analyse für: {domain}")
print(f"⏱️ Analyse-Dauer: {duration} Sekunden pro Prozess")
if max_processes:
print(f"🔢 Max Prozesse: {max_processes}")
else:
print(f"🔢 Prozesse: ALLE gefundenen PHP-FPM Worker")
print()
analyzer = ShopPerformanceAnalyzer(domain)
pids = analyzer.get_php_fpm_pids()
if not pids:
print("❌ Keine PHP-FPM Prozesse gefunden!")
sys.exit(1)
# Limit anwenden falls gesetzt
if max_processes and len(pids) > max_processes:
print(f"{len(pids)} PHP-FPM Prozesse gefunden (analysiere {max_processes})")
pids = pids[:max_processes]
else:
print(f"{len(pids)} PHP-FPM Prozesse gefunden (analysiere alle)")
print(f" PIDs: {pids}\n")
# Progress bar setup
total = len(pids)
analyzed = 0
failed = 0
print("🔄 Analyse läuft...")
print("-" * 80)
for i, pid in enumerate(pids, 1):
# Progress indicator
percent = int((i / total) * 100)
bar_length = 40
filled = int((percent / 100) * bar_length)
bar = '' * filled + '' * (bar_length - filled)
print(f"\r[{bar}] {percent:3d}% | PID {pid:6d} ({i}/{total})", end='', flush=True)
output = analyzer.run_strace(pid, duration)
if output and len(output) > 100:
analyzer.analyze_strace_output(output)
analyzed += 1
else:
failed += 1
print(f"\r[{'' * bar_length}] 100% | Fertig!{' ' * 30}")
print("-" * 80)
print(f"\n✅ Analyse abgeschlossen!")
print(f" • Erfolgreich analysiert: {analyzed}/{total}")
if failed > 0:
print(f" • Idle/Keine Daten: {failed}")
print()
if analyzed == 0:
print("⚠️ Konnte kesine Daten sammeln!")
print(" Shop hat gerade wenig Traffic. Versuche später nochmal oder erhöhe duration.\n")
sys.exit(1)
analyzer.generate_report()
if __name__ == "__main__":
main()