Files
deep-shop-strace-analyse/straceanalyse.py

600 lines
26 KiB
Python

#!/usr/bin/env python3
"""
JTL-Shop Performance Analyzer - Analysiert ALLE PHP-FPM Prozesse
"""
import subprocess
import re
import sys
import os
from collections import Counter, defaultdict
from datetime import datetime
class ShopPerformanceAnalyzer:
def __init__(self, domain):
self.domain = domain
self.results = {
'missing_files': Counter(),
'syscalls': Counter(),
'mysql_queries': [],
'redis_operations': Counter(),
'slow_operations': [],
'file_paths': Counter(),
'errors': Counter()
}
self.debug = False
self.output_dir = f"/root/shop_analysis_{domain}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
def get_php_fpm_pids(self):
"""Finde alle PHP-FPM PIDs fuer den Shop"""
try:
cmd = f"ps aux | grep 'php-fpm: pool {self.domain}' | grep -v grep | awk '{{print $2}}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
pids = [int(pid) for pid in result.stdout.strip().split('\n') if pid]
return pids
except Exception as e:
print(f"Fehler beim Finden der PIDs: {e}")
return []
def run_strace(self, pid, duration=5):
"""Fuehre strace auf einem Prozess aus"""
try:
cmd = [
'strace',
'-p', str(pid),
'-f',
'-s', '300',
'-e', 'trace=all',
'-T'
]
result = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
try:
stdout, stderr = result.communicate(timeout=duration)
except subprocess.TimeoutExpired:
result.kill()
stdout, stderr = result.communicate()
output = stderr if stderr else stdout
return output
except Exception as e:
return ""
def analyze_strace_output(self, output):
"""Analysiere strace Output"""
if not output or len(output) < 10:
return
lines = output.split('\n')
for line in lines:
if not line.strip():
continue
syscall_match = re.match(r'^(\w+)\(', line)
if syscall_match:
self.results['syscalls'][syscall_match.group(1)] += 1
if 'ENOENT' in line:
patterns = [
r'"([^"]+)".*ENOENT',
r'newfstatat\(.*?"([^"]+)".*ENOENT',
r'openat\(.*?"([^"]+)".*ENOENT',
]
for pattern in patterns:
file_match = re.search(pattern, line)
if file_match:
filepath = file_match.group(1)
self.results['missing_files'][filepath] += 1
self.results['errors']['ENOENT'] += 1
break
if 'EAGAIN' in line:
self.results['errors']['EAGAIN'] += 1
if any(keyword in line for keyword in ['SELECT', 'UPDATE', 'INSERT', 'DELETE']):
query_match = re.search(r'(SELECT|UPDATE|INSERT|DELETE)[^"]*', line)
if query_match:
query = query_match.group(0)[:100]
self.results['mysql_queries'].append(query)
redis_ops = ['SADD', 'GET', 'SET', 'HGET', 'HSET', 'EXEC', 'MULTI', 'QUEUED']
for op in redis_ops:
if op in line:
self.results['redis_operations'][op] += 1
path_match = re.search(r'"(/var/www/vhosts/[^"]+)"', line)
if path_match:
path = path_match.group(1)
if any(x in path for x in ['media', 'image', 'templates', 'cache']):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
self.results['file_paths'][short_path] += 1
if 'ImageMagick' in line or 'locale.xml' in line:
self.results['slow_operations'].append('ImageMagick/Config-Load')
if 'mremap' in line or 'mmap' in line:
size_match = re.search(r'(\d+)', line)
if size_match and int(size_match.group(1)) > 1000000:
self.results['slow_operations'].append('Large Memory Operation')
if 'poll' in line or 'select' in line:
time_match = re.search(r'<([\d.]+)>', line)
if time_match and float(time_match.group(1)) > 0.1:
self.results['slow_operations'].append(f'Slow I/O Wait ({time_match.group(1)}s)')
def export_missing_files(self):
"""Exportiere fehlende Dateien in verschiedene Formate"""
if not self.results['missing_files']:
return None
os.makedirs(self.output_dir, exist_ok=True)
# 1. Komplette Liste (sortiert nach Haeufigkeit)
list_file = os.path.join(self.output_dir, 'missing_files_all.txt')
with open(list_file, 'w') as f:
f.write(f"# Fehlende Dateien fuer {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"# Total: {len(self.results['missing_files'])} Dateien\n")
f.write(f"# Zugriffe: {sum(self.results['missing_files'].values())}\n")
f.write("#" + "="*70 + "\n\n")
for filepath, count in self.results['missing_files'].most_common():
f.write(f"[{count:4d}x] {filepath}\n")
# 2. Nach Kategorie sortiert
category_file = os.path.join(self.output_dir, 'missing_files_by_category.txt')
with open(category_file, 'w') as f:
f.write(f"# Fehlende Dateien nach Kategorie - {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
categories = defaultdict(list)
for filepath, count in self.results['missing_files'].items():
if 'manufacturer' in filepath:
categories['Hersteller-Bilder'].append((filepath, count))
elif 'product' in filepath or 'artikel' in filepath:
categories['Produkt-Bilder'].append((filepath, count))
elif 'variation' in filepath:
categories['Variationen-Bilder'].append((filepath, count))
elif 'ImageMagick' in filepath:
categories['ImageMagick Config'].append((filepath, count))
elif 'category' in filepath or 'kategorie' in filepath:
categories['Kategorie-Bilder'].append((filepath, count))
else:
categories['Sonstige'].append((filepath, count))
for category, items in sorted(categories.items()):
total_accesses = sum(count for _, count in items)
f.write(f"\n{'='*70}\n")
f.write(f"{category.upper()}\n")
f.write(f"{'='*70}\n")
f.write(f"Anzahl Dateien: {len(items)}\n")
f.write(f"Zugriffe gesamt: {total_accesses}\n\n")
for filepath, count in sorted(items, key=lambda x: x[1], reverse=True):
f.write(f"[{count:4d}x] {filepath}\n")
# 3. Bash Script zum Erstellen von Platzhaltern
script_file = os.path.join(self.output_dir, 'create_placeholders.sh')
with open(script_file, 'w') as f:
f.write("#!/bin/bash\n")
f.write(f"# Auto-generated Script zum Erstellen fehlender Bilder\n")
f.write(f"# Domain: {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write('set -e\n\n')
f.write(f'SHOP_ROOT="/var/www/vhosts/{self.domain}/httpdocs"\n')
f.write('PLACEHOLDER="$SHOP_ROOT/gfx/keinBild.gif"\n\n')
f.write('if [ ! -f "$PLACEHOLDER" ]; then\n')
f.write(' echo "Fehler: Placeholder nicht gefunden: $PLACEHOLDER"\n')
f.write(' exit 1\n')
f.write('fi\n\n')
f.write('echo "Erstelle fehlende Dateien..."\n')
f.write('CREATED=0\n')
f.write('SKIPPED=0\n\n')
# Nur Bilder
image_files = [fp for fp in self.results['missing_files'].keys()
if any(ext in fp.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp'])]
dirs_created = set()
for filepath in image_files:
if filepath.startswith('/var/www/vhosts'):
dirname = os.path.dirname(filepath)
if dirname not in dirs_created:
f.write(f'mkdir -p "{dirname}" 2>/dev/null || true\n')
dirs_created.add(dirname)
f.write(f'if [ ! -f "{filepath}" ]; then\n')
f.write(f' cp "$PLACEHOLDER" "{filepath}" 2>/dev/null && ((CREATED++)) || true\n')
f.write(f'else\n')
f.write(f' ((SKIPPED++))\n')
f.write(f'fi\n')
f.write('\necho ""\n')
f.write('echo "Fertig!"\n')
f.write('echo " Erstellt: $CREATED Platzhalter"\n')
f.write('echo " Uebersprungen: $SKIPPED (existieren bereits)"\n')
f.write(f'echo " Total Dateien: {len(image_files)}"\n')
os.chmod(script_file, 0o755)
# 4. CSV Export
csv_file = os.path.join(self.output_dir, 'missing_files.csv')
with open(csv_file, 'w') as f:
f.write("Zugriffe,Kategorie,Dateipfad\n")
for filepath, count in self.results['missing_files'].most_common():
if 'manufacturer' in filepath:
category = 'Hersteller'
elif 'product' in filepath or 'artikel' in filepath:
category = 'Produkt'
elif 'variation' in filepath:
category = 'Variation'
elif 'category' in filepath or 'kategorie' in filepath:
category = 'Kategorie'
else:
category = 'Sonstig'
filepath_safe = filepath.replace('"', '""')
f.write(f'{count},{category},"{filepath_safe}"\n')
# 5. Hersteller IDs extrahieren
manufacturer_file = os.path.join(self.output_dir, 'missing_manufacturer_ids.txt')
manufacturer_ids = set()
for filepath in self.results['missing_files'].keys():
if 'manufacturer' in filepath:
match = re.search(r'manufacturer/(\d+)/', filepath)
if match:
manufacturer_ids.add(match.group(1))
if manufacturer_ids:
with open(manufacturer_file, 'w') as f:
f.write(f"# Hersteller IDs mit fehlenden Bildern\n")
f.write(f"# Total: {len(manufacturer_ids)} Hersteller\n")
f.write(f"# Verwendung: Im JTL-Shop Admin diese Hersteller pruefen\n\n")
for mid in sorted(manufacturer_ids, key=int):
f.write(f"{mid}\n")
# 6. Nur Dateipfade (fuer weitere Verarbeitung)
paths_only_file = os.path.join(self.output_dir, 'missing_files_paths_only.txt')
with open(paths_only_file, 'w') as f:
for filepath in self.results['missing_files'].keys():
f.write(f"{filepath}\n")
return self.output_dir
def generate_report(self):
"""Generiere Analyse-Report"""
print("\n" + "="*80)
print(f"PERFORMANCE ANALYSE: {self.domain}")
print(f"Datum: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80 + "\n")
total_syscalls = sum(self.results['syscalls'].values())
if total_syscalls == 0:
print("WARNUNG: Keine Syscalls aufgezeichnet!")
print(" Moegliche Gruende:")
print(" - Prozesse sind gerade idle (wenig Traffic)")
print(" - Strace hat keine Berechtigung")
print(" - Prozesse wurden zwischen Analyse beendet\n")
print(" Versuche: python3 script.py spiel-und-modellbau.com 10\n")
return
print("SYSCALL STATISTIK")
print("-" * 80)
for syscall, count in self.results['syscalls'].most_common(15):
percentage = (count / total_syscalls * 100) if total_syscalls > 0 else 0
bar = '#' * int(percentage / 2)
print(f" {syscall:20s}: {count:6d} ({percentage:5.1f}%) {bar}")
print()
# Fehlende Dateien
missing_count = len(self.results['missing_files'])
if missing_count > 0:
print("FEHLENDE DATEIEN (ENOENT)")
print("-" * 80)
print(f" WARNUNG: {missing_count} verschiedene Dateien nicht gefunden!")
print(f" WARNUNG: {sum(self.results['missing_files'].values())} Zugriffe auf nicht-existierende Dateien!\n")
# Kategorien
categories = defaultdict(int)
for filepath in self.results['missing_files'].keys():
if 'manufacturer' in filepath:
categories['Hersteller-Bilder'] += 1
elif 'product' in filepath or 'artikel' in filepath:
categories['Produkt-Bilder'] += 1
elif 'variation' in filepath:
categories['Variationen-Bilder'] += 1
elif 'category' in filepath or 'kategorie' in filepath:
categories['Kategorie-Bilder'] += 1
else:
categories['Sonstige'] += 1
print(" Kategorien:")
for category, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
print(f" * {category:25s}: {count:4d} Dateien")
print()
print(" Top 15 fehlende Dateien:")
for path, count in self.results['missing_files'].most_common(15):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
print(f" [{count:3d}x] {short_path}")
if len(self.results['missing_files']) > 15:
print(f"\n INFO: ... und {len(self.results['missing_files'])-15} weitere")
print(f" INFO: Vollstaendige Liste siehe Export-Dateien!")
print()
# Errors
if self.results['errors']:
print("FEHLER")
print("-" * 80)
for error, count in self.results['errors'].items():
print(f" {error:20s}: {count:6d}x")
print()
# MySQL Queries
if self.results['mysql_queries']:
print("MYSQL QUERIES")
print("-" * 80)
query_counter = Counter(self.results['mysql_queries'])
print(f" Total Queries: {len(self.results['mysql_queries'])}")
print(f" Unique Queries: {len(query_counter)}")
print("\n Haeufigste Queries:")
for query, count in query_counter.most_common(10):
print(f" [{count:3d}x] {query[:70]}...")
print()
# File Paths
if self.results['file_paths']:
print("HAEUFIGSTE DATEIZUGRIFFE")
print("-" * 80)
for path, count in self.results['file_paths'].most_common(15):
print(f" [{count:3d}x] {path}")
print()
# Redis
if self.results['redis_operations']:
print("REDIS OPERATIONEN")
print("-" * 80)
total_redis = sum(self.results['redis_operations'].values())
for op, count in self.results['redis_operations'].most_common():
percentage = (count / total_redis * 100) if total_redis > 0 else 0
print(f" {op:15s}: {count:6d}x ({percentage:5.1f}%)")
print()
# Slow Operations
if self.results['slow_operations']:
print("LANGSAME OPERATIONEN")
print("-" * 80)
slow_counter = Counter(self.results['slow_operations'])
for op, count in slow_counter.items():
print(f" WARNUNG: {op}: {count}x")
print()
# Export fehlende Dateien
if missing_count > 0:
print("="*80)
print("EXPORTIERE FEHLENDE DATEIEN")
print("="*80 + "\n")
export_dir = self.export_missing_files()
if export_dir:
print(f"Dateien exportiert nach: {export_dir}\n")
print(" Erstellt:")
print(f" missing_files_all.txt - Komplette Liste (sortiert nach Haeufigkeit)")
print(f" missing_files_by_category.txt - Nach Kategorie gruppiert")
print(f" missing_files.csv - CSV fuer Excel")
print(f" create_placeholders.sh - Bash Script (ausfuehrbar)")
print(f" missing_manufacturer_ids.txt - Hersteller IDs")
print(f" missing_files_paths_only.txt - Nur Pfade (fuer Scripts)\n")
print(" Quick-Fix:")
print(f" bash {export_dir}/create_placeholders.sh\n")
# Handlungsempfehlungen
self.generate_recommendations(total_syscalls, missing_count)
def generate_recommendations(self, total_syscalls, missing_count):
"""Generiere Handlungsempfehlungen"""
print("="*80)
print("HANDLUNGSEMPFEHLUNGEN FUER DEN KUNDEN")
print("="*80 + "\n")
recommendations = []
priority = 1
if missing_count > 5:
manufacturer_missing = sum(1 for p in self.results['missing_files'] if 'manufacturer' in p)
product_missing = sum(1 for p in self.results['missing_files'] if 'product' in p or 'artikel' in p)
if manufacturer_missing > 0:
recommendations.append({
'priority': priority,
'severity': 'KRITISCH',
'problem': f'{manufacturer_missing} Hersteller-Bilder fehlen',
'impact': f'Jedes fehlende Bild = 6-8 stat() Calls. Bei {sum(v for k,v in self.results["missing_files"].items() if "manufacturer" in k)} Zugriffen!',
'solution': '1. JTL-Shop Admin einloggen\n2. Bilder -> Hersteller-Bilder -> "Fehlende generieren"\n3. ODER: Bash Script ausfuehren (siehe Export)',
'files': [f"{self.output_dir}/create_placeholders.sh",
f"{self.output_dir}/missing_manufacturer_ids.txt"]
})
priority += 1
if product_missing > 0:
recommendations.append({
'priority': priority,
'severity': 'WICHTIG',
'problem': f'{product_missing} Produkt-Bilder fehlen',
'impact': 'Erhoehte I/O Last',
'solution': 'JTL-Shop Admin -> Bilder -> "Bildcache regenerieren"'
})
priority += 1
stat_calls = sum(count for syscall, count in self.results['syscalls'].items()
if 'stat' in syscall.lower())
if stat_calls > 500:
recommendations.append({
'priority': priority,
'severity': 'WICHTIG',
'problem': f'{stat_calls} Filesystem stat() Calls',
'impact': 'Filesystem-Thrashing, langsame Response-Times',
'solution': 'PHP Realpath Cache erhoehen in PHP-Einstellungen',
'technical': 'Plesk -> Domain -> PHP Settings:\nrealpath_cache_size = 4096K\nrealpath_cache_ttl = 600'
})
priority += 1
imagemagick_count = sum(1 for op in self.results['slow_operations'] if 'ImageMagick' in op)
if imagemagick_count > 3:
recommendations.append({
'priority': priority,
'severity': 'KRITISCH',
'problem': f'ImageMagick wird {imagemagick_count}x aufgerufen',
'impact': 'CPU-intensive Bildverarbeitung bei jedem Request!',
'solution': '1. Bild-Cache in JTL-Shop aktivieren\n2. Alle Bildgroessen vorher generieren\n3. Pruefen ob Bilder wirklich vorhanden sind'
})
priority += 1
if len(self.results['mysql_queries']) > 50:
recommendations.append({
'priority': priority,
'severity': 'WICHTIG',
'problem': f'{len(self.results["mysql_queries"])} MySQL Queries',
'impact': 'N+1 Query Problem, Database Overhead',
'solution': 'JTL-Shop: System -> Cache -> Object Cache aktivieren (Redis)'
})
priority += 1
eagain_count = self.results['errors'].get('EAGAIN', 0)
if eagain_count > 100:
recommendations.append({
'priority': priority,
'severity': 'WICHTIG',
'problem': f'{eagain_count}x EAGAIN',
'impact': 'Redis/MySQL Verbindungen ueberlastet',
'solution': 'Redis Connection Pool erhoehen oder PHP-FPM Worker erhoehen'
})
priority += 1
if recommendations:
for rec in recommendations:
print(f"[{rec['severity']}] PRIORITAET {rec['priority']}: {rec['problem']}")
print(f" Impact: {rec['impact']}")
print(f" Loesung: {rec['solution']}")
if 'files' in rec:
print(f" Dateien:")
for file in rec['files']:
print(f" * {file}")
if 'technical' in rec:
lines = rec['technical'].split('\n')
print(f" Technisch:")
for line in lines:
print(f" {line}")
print()
else:
print("Keine kritischen Probleme gefunden!\n")
print("="*80)
print("ZUSAMMENFASSUNG")
print("="*80)
print(f" * Total Syscalls: {total_syscalls}")
print(f" * Fehlende Dateien: {missing_count}")
print(f" * MySQL Queries: {len(self.results['mysql_queries'])}")
print(f" * Redis Operations: {sum(self.results['redis_operations'].values())}")
print(f" * Handlungsempfehlungen: {len(recommendations)}")
if missing_count > 0:
print(f"\n Export-Verzeichnis: {self.output_dir}")
print()
def main():
if len(sys.argv) < 2:
print("\n" + "="*80)
print("JTL-Shop Performance Analyzer")
print("="*80)
print("\nUsage: python3 shop_analyzer.py <domain> [duration] [max_processes]")
print("\nExamples:")
print(" python3 shop_analyzer.py spiel-und-modellbau.com")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10 20 # Max 20 Prozesse")
print("\nParameter:")
print(" domain - Shop Domain")
print(" duration - Sekunden pro Prozess (default: 5)")
print(" max_processes - Max Anzahl Prozesse (default: alle)")
print()
sys.exit(1)
domain = sys.argv[1]
duration = int(sys.argv[2]) if len(sys.argv) > 2 else 5
max_processes = int(sys.argv[3]) if len(sys.argv) > 3 else None
print(f"\nStarte Performance-Analyse fuer: {domain}")
print(f"Analyse-Dauer: {duration} Sekunden pro Prozess")
if max_processes:
print(f"Max Prozesse: {max_processes}")
else:
print(f"Prozesse: ALLE gefundenen PHP-FPM Worker")
print()
analyzer = ShopPerformanceAnalyzer(domain)
pids = analyzer.get_php_fpm_pids()
if not pids:
print("Keine PHP-FPM Prozesse gefunden!")
sys.exit(1)
# Limit anwenden falls gesetzt
if max_processes and len(pids) > max_processes:
print(f"{len(pids)} PHP-FPM Prozesse gefunden (analysiere {max_processes})")
pids = pids[:max_processes]
else:
print(f"{len(pids)} PHP-FPM Prozesse gefunden (analysiere alle)")
print(f" PIDs: {pids}\n")
# Progress bar setup
total = len(pids)
analyzed = 0
failed = 0
print("Analyse laeuft...")
print("-" * 80)
for i, pid in enumerate(pids, 1):
# Progress indicator
percent = int((i / total) * 100)
bar_length = 40
filled = int((percent / 100) * bar_length)
bar = '#' * filled + '-' * (bar_length - filled)
print(f"\r[{bar}] {percent:3d}% | PID {pid:6d} ({i}/{total})", end='', flush=True)
output = analyzer.run_strace(pid, duration)
if output and len(output) > 100:
analyzer.analyze_strace_output(output)
analyzed += 1
else:
failed += 1
print(f"\r[{'#' * bar_length}] 100% | Fertig!{' ' * 30}")
print("-" * 80)
print(f"\nAnalyse abgeschlossen!")
print(f" * Erfolgreich analysiert: {analyzed}/{total}")
if failed > 0:
print(f" * Idle/Keine Daten: {failed}")
print()
if analyzed == 0:
print("Konnte keine Daten sammeln!")
print(" Shop hat gerade wenig Traffic. Versuche spaeter nochmal oder erhoehe duration.\n")
sys.exit(1)
analyzer.generate_report()
if __name__ == "__main__":
main()