deep-shop-strace-analyse/straceanalyse.py

#!/usr/bin/env python3
"""
JTL-Shop Performance Analyzer - Analysiert ALLE PHP-FPM Prozesse
"""

import subprocess
import re
import sys
import os
from collections import Counter, defaultdict
from datetime import datetime

class ShopPerformanceAnalyzer:
    def __init__(self, domain):
        self.domain = domain
        self.results = {
            'missing_files': Counter(),
            'syscalls': Counter(),
            'mysql_queries': [],
            'redis_operations': Counter(),
            'slow_operations': [],
            'file_paths': Counter(),
            'errors': Counter()
        }
        self.debug = False  # Weniger Output bei vielen Prozessen
        self.output_dir = f"/root/shop_analysis_{domain}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    def get_php_fpm_pids(self):
        """Finde alle PHP-FPM PIDs für den Shop"""
        try:
            cmd = f"ps aux | grep 'php-fpm: pool {self.domain}' | grep -v grep | awk '{{print $2}}'"
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
            pids = [int(pid) for pid in result.stdout.strip().split('\n') if pid]
            return pids
        except Exception as e:
            print(f"❌ Fehler beim Finden der PIDs: {e}")
            return []

    def run_strace(self, pid, duration=5):
        """Führe strace auf einem Prozess aus"""
        try:
            cmd = [
                'strace',
                '-p', str(pid),
                '-f',
                '-s', '300',
                '-e', 'trace=all',
                '-T'
            ]

            result = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )

            try:
                stdout, stderr = result.communicate(timeout=duration)
            except subprocess.TimeoutExpired:
                result.kill()
                stdout, stderr = result.communicate()

            output = stderr if stderr else stdout
            return output

        except Exception as e:
            return ""

    def analyze_strace_output(self, output):
        """Analysiere strace Output"""
        if not output or len(output) < 10:
            return

        lines = output.split('\n')

        for line in lines:
            if not line.strip():
                continue

            syscall_match = re.match(r'^(\w+)\(', line)
            if syscall_match:
                self.results['syscalls'][syscall_match.group(1)] += 1

            if 'ENOENT' in line:
                patterns = [
                    r'"([^"]+)".*ENOENT',
                    r'newfstatat\(.*?"([^"]+)".*ENOENT',
                    r'openat\(.*?"([^"]+)".*ENOENT',
                ]
                for pattern in patterns:
                    file_match = re.search(pattern, line)
                    if file_match:
                        filepath = file_match.group(1)
                        self.results['missing_files'][filepath] += 1
                        self.results['errors']['ENOENT'] += 1
                        break

            if 'EAGAIN' in line:
                self.results['errors']['EAGAIN'] += 1

            if any(keyword in line for keyword in ['SELECT', 'UPDATE', 'INSERT', 'DELETE']):
                query_match = re.search(r'(SELECT|UPDATE|INSERT|DELETE)[^"]*', line)
                if query_match:
                    query = query_match.group(0)[:100]
                    self.results['mysql_queries'].append(query)

            redis_ops = ['SADD', 'GET', 'SET', 'HGET', 'HSET', 'EXEC', 'MULTI', 'QUEUED']
            for op in redis_ops:
                if op in line:
                    self.results['redis_operations'][op] += 1

            path_match = re.search(r'"(/var/www/vhosts/[^"]+)"', line)
            if path_match:
                path = path_match.group(1)
                if any(x in path for x in ['media', 'image', 'templates', 'cache']):
                    short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
                    self.results['file_paths'][short_path] += 1

            if 'ImageMagick' in line or 'locale.xml' in line:
                self.results['slow_operations'].append('ImageMagick/Config-Load')

            if 'mremap' in line or 'mmap' in line:
                size_match = re.search(r'(\d+)', line)
                if size_match and int(size_match.group(1)) > 1000000:
                    self.results['slow_operations'].append('Large Memory Operation')

            if 'poll' in line or 'select' in line:
                time_match = re.search(r'<([\d.]+)>', line)
                if time_match and float(time_match.group(1)) > 0.1:
                    self.results['slow_operations'].append(f'Slow I/O Wait ({time_match.group(1)}s)')

    def export_missing_files(self):
        """Exportiere fehlende Dateien in verschiedene Formate"""
        if not self.results['missing_files']:
            return None

        os.makedirs(self.output_dir, exist_ok=True)

        # 1. Komplette Liste (sortiert nach Häufigkeit)
        list_file = os.path.join(self.output_dir, 'missing_files_all.txt')
        with open(list_file, 'w') as f:
            f.write(f"# Fehlende Dateien für {self.domain}\n")
            f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"# Total: {len(self.results['missing_files'])} Dateien\n")
            f.write(f"# Zugriffe: {sum(self.results['missing_files'].values())}\n")
            f.write("#" + "="*70 + "\n\n")

            for filepath, count in self.results['missing_files'].most_common():
                f.write(f"[{count:4d}x] {filepath}\n")

        # 2. Nach Kategorie sortiert
        category_file = os.path.join(self.output_dir, 'missing_files_by_category.txt')
        with open(category_file, 'w') as f:
            f.write(f"# Fehlende Dateien nach Kategorie - {self.domain}\n")
            f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

            categories = defaultdict(list)
            for filepath, count in self.results['missing_files'].items():
                if 'manufacturer' in filepath:
                    categories['Hersteller-Bilder'].append((filepath, count))
                elif 'product' in filepath or 'artikel' in filepath:
                    categories['Produkt-Bilder'].append((filepath, count))
                elif 'variation' in filepath:
                    categories['Variationen-Bilder'].append((filepath, count))
                elif 'ImageMagick' in filepath:
                    categories['ImageMagick Config'].append((filepath, count))
                elif 'category' in filepath or 'kategorie' in filepath:
                    categories['Kategorie-Bilder'].append((filepath, count))
                else:
                    categories['Sonstige'].append((filepath, count))

            for category, items in sorted(categories.items()):
                total_accesses = sum(count for _, count in items)
                f.write(f"\n{'='*70}\n")
                f.write(f"{category.upper()}\n")
                f.write(f"{'='*70}\n")
                f.write(f"Anzahl Dateien: {len(items)}\n")
                f.write(f"Zugriffe gesamt: {total_accesses}\n\n")

                for filepath, count in sorted(items, key=lambda x: x[1], reverse=True):
                    f.write(f"[{count:4d}x] {filepath}\n")

        # 3. Bash Script zum Erstellen von Platzhaltern
        script_file = os.path.join(self.output_dir, 'create_placeholders.sh')
        with open(script_file, 'w') as f:
            f.write("#!/bin/bash\n")
            f.write(f"# Auto-generated Script zum Erstellen fehlender Bilder\n")
            f.write(f"# Domain: {self.domain}\n")
            f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write('set -e\n\n')
            f.write(f'SHOP_ROOT="/var/www/vhosts/{self.domain}/httpdocs"\n')
            f.write('PLACEHOLDER="$SHOP_ROOT/gfx/keinBild.gif"\n\n')
            f.write('if [ ! -f "$PLACEHOLDER" ]; then\n')
            f.write('  echo "❌ Fehler: Placeholder nicht gefunden: $PLACEHOLDER"\n')
            f.write('  exit 1\n')
            f.write('fi\n\n')
            f.write('echo "Erstelle fehlende Dateien..."\n')
            f.write('CREATED=0\n')
            f.write('SKIPPED=0\n\n')

            # Nur Bilder
            image_files = [fp for fp in self.results['missing_files'].keys()
                          if any(ext in fp.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp'])]

            dirs_created = set()
            for filepath in image_files:
                if filepath.startswith('/var/www/vhosts'):
                    dirname = os.path.dirname(filepath)
                    if dirname not in dirs_created:
                        f.write(f'mkdir -p "{dirname}" 2>/dev/null || true\n')
                        dirs_created.add(dirname)
                    f.write(f'if [ ! -f "{filepath}" ]; then\n')
                    f.write(f'  cp "$PLACEHOLDER" "{filepath}" 2>/dev/null && ((CREATED++)) || true\n')
                    f.write(f'else\n')
                    f.write(f'  ((SKIPPED++))\n')
                    f.write(f'fi\n')

            f.write('\necho ""\n')
            f.write('echo "✅ Fertig!"\n')
            f.write('echo "   Erstellt: $CREATED Platzhalter"\n')
            f.write('echo "   Übersprungen: $SKIPPED (existieren bereits)"\n')
            f.write(f'echo "   Total Dateien: {len(image_files)}"\n')

        os.chmod(script_file, 0o755)

        # 4. CSV Export
        csv_file = os.path.join(self.output_dir, 'missing_files.csv')
        with open(csv_file, 'w') as f:
            f.write("Zugriffe,Kategorie,Dateipfad\n")

            for filepath, count in self.results['missing_files'].most_common():
                if 'manufacturer' in filepath:
                    category = 'Hersteller'
                elif 'product' in filepath or 'artikel' in filepath:
                    category = 'Produkt'
                elif 'variation' in filepath:
                    category = 'Variation'
                elif 'category' in filepath or 'kategorie' in filepath:
                    category = 'Kategorie'
                else:
                    category = 'Sonstig'

                filepath_safe = filepath.replace('"', '""')
                f.write(f'{count},{category},"{filepath_safe}"\n')

        # 5. Hersteller IDs extrahieren
        manufacturer_file = os.path.join(self.output_dir, 'missing_manufacturer_ids.txt')
        manufacturer_ids = set()
        for filepath in self.results['missing_files'].keys():
            if 'manufacturer' in filepath:
                match = re.search(r'manufacturer/(\d+)/', filepath)
                if match:
                    manufacturer_ids.add(match.group(1))

        if manufacturer_ids:
            with open(manufacturer_file, 'w') as f:
                f.write(f"# Hersteller IDs mit fehlenden Bildern\n")
                f.write(f"# Total: {len(manufacturer_ids)} Hersteller\n")
                f.write(f"# Verwendung: Im JTL-Shop Admin diese Hersteller prüfen\n\n")
                for mid in sorted(manufacturer_ids, key=int):
                    f.write(f"{mid}\n")

        # 6. Nur Dateipfade (für weitere Verarbeitung)
        paths_only_file = os.path.join(self.output_dir, 'missing_files_paths_only.txt')
        with open(paths_only_file, 'w') as f:
            for filepath in self.results['missing_files'].keys():
                f.write(f"{filepath}\n")

        return self.output_dir

    def generate_report(self):
        """Generiere Analyse-Report"""
        print("\n" + "="*80)
        print(f"🔍 PERFORMANCE ANALYSE: {self.domain}")
        print(f"📅 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print("="*80 + "\n")

        total_syscalls = sum(self.results['syscalls'].values())

        if total_syscalls == 0:
            print("⚠️ WARNUNG: Keine Syscalls aufgezeichnet!")
            print("   Mögliche Gründe:")
            print("   - Prozesse sind gerade idle (wenig Traffic)")
            print("   - Strace hat keine Berechtigung")
            print("   - Prozesse wurden zwischen Analyse beendet\n")
            print("   Versuche: python3 script.py spiel-und-modellbau.com 10\n")
            return

        print("📊 SYSCALL STATISTIK")
        print("-" * 80)
        for syscall, count in self.results['syscalls'].most_common(15):
            percentage = (count / total_syscalls * 100) if total_syscalls > 0 else 0
            bar = '█' * int(percentage / 2)
            print(f"  {syscall:20s}: {count:6d} ({percentage:5.1f}%) {bar}")
        print()

        # Fehlende Dateien
        missing_count = len(self.results['missing_files'])
        if missing_count > 0:
            print("❌ FEHLENDE DATEIEN (ENOENT)")
            print("-" * 80)
            print(f"  ⚠️  {missing_count} verschiedene Dateien nicht gefunden!")
            print(f"  ⚠️  {sum(self.results['missing_files'].values())} Zugriffe auf nicht-existierende Dateien!\n")

            # Kategorien
            categories = defaultdict(int)
            for filepath in self.results['missing_files'].keys():
                if 'manufacturer' in filepath:
                    categories['Hersteller-Bilder'] += 1
                elif 'product' in filepath or 'artikel' in filepath:
                    categories['Produkt-Bilder'] += 1
                elif 'variation' in filepath:
                    categories['Variationen-Bilder'] += 1
                elif 'category' in filepath or 'kategorie' in filepath:
                    categories['Kategorie-Bilder'] += 1
                else:
                    categories['Sonstige'] += 1

            print("  Kategorien:")
            for category, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
                print(f"    • {category:25s}: {count:4d} Dateien")
            print()

            print("  Top 15 fehlende Dateien:")
            for path, count in self.results['missing_files'].most_common(15):
                short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
                print(f"    [{count:3d}x] {short_path}")

            if len(self.results['missing_files']) > 15:
                print(f"\n    ℹ️  ... und {len(self.results['missing_files'])-15} weitere")
                print(f"    ℹ️  Vollständige Liste siehe Export-Dateien!")
            print()

        # Errors
        if self.results['errors']:
            print("⚠️ FEHLER")
            print("-" * 80)
            for error, count in self.results['errors'].items():
                print(f"  {error:20s}: {count:6d}x")
            print()

        # MySQL Queries
        if self.results['mysql_queries']:
            print("🗄️  MYSQL QUERIES")
            print("-" * 80)
            query_counter = Counter(self.results['mysql_queries'])
            print(f"  Total Queries: {len(self.results['mysql_queries'])}")
            print(f"  Unique Queries: {len(query_counter)}")
            print("\n  Häufigste Queries:")
            for query, count in query_counter.most_common(10):
                print(f"    [{count:3d}x] {query[:70]}...")
            print()

        # File Paths
        if self.results['file_paths']:
            print("📁 HÄUFIGSTE DATEIZUGRIFFE")
            print("-" * 80)
            for path, count in self.results['file_paths'].most_common(15):
                print(f"    [{count:3d}x] {path}")
            print()

        # Redis
        if self.results['redis_operations']:
            print("🔴 REDIS OPERATIONEN")
            print("-" * 80)
            total_redis = sum(self.results['redis_operations'].values())
            for op, count in self.results['redis_operations'].most_common():
                percentage = (count / total_redis * 100) if total_redis > 0 else 0
                print(f"  {op:15s}: {count:6d}x ({percentage:5.1f}%)")
            print()

        # Slow Operations
        if self.results['slow_operations']:
            print("🐌 LANGSAME OPERATIONEN")
            print("-" * 80)
            slow_counter = Counter(self.results['slow_operations'])
            for op, count in slow_counter.items():
                print(f"  ⚠️  {op}: {count}x")
            print()

        # Export fehlende Dateien
        if missing_count > 0:
            print("="*80)
            print("💾 EXPORTIERE FEHLENDE DATEIEN")
            print("="*80 + "\n")

            export_dir = self.export_missing_files()
            if export_dir:
                print(f"✅ Dateien exportiert nach: {export_dir}\n")
                print("  Erstellt:")
                print(f"    📄 missing_files_all.txt          - Komplette Liste (sortiert nach Häufigkeit)")
                print(f"    📁 missing_files_by_category.txt  - Nach Kategorie gruppiert")
                print(f"    📊 missing_files.csv              - CSV für Excel")
                print(f"    🔧 create_placeholders.sh         - Bash Script (ausführbar)")
                print(f"    🏷️  missing_manufacturer_ids.txt   - Hersteller IDs")
                print(f"    📝 missing_files_paths_only.txt   - Nur Pfade (für Scripts)\n")

                print("  Quick-Fix:")
                print(f"    bash {export_dir}/create_placeholders.sh\n")

        # Handlungsempfehlungen
        self.generate_recommendations(total_syscalls, missing_count)

    def generate_recommendations(self, total_syscalls, missing_count):
        """Generiere Handlungsempfehlungen"""
        print("="*80)
        print("💡 HANDLUNGSEMPFEHLUNGEN FÜR DEN KUNDEN")
        print("="*80 + "\n")

        recommendations = []
        priority = 1

        if missing_count > 5:
            manufacturer_missing = sum(1 for p in self.results['missing_files'] if 'manufacturer' in p)
            product_missing = sum(1 for p in self.results['missing_files'] if 'product' in p or 'artikel' in p)

            if manufacturer_missing > 0:
                recommendations.append({
                    'priority': priority,
                    'severity': '🔥 KRITISCH',
                    'problem': f'{manufacturer_missing} Hersteller-Bilder fehlen',
                    'impact': f'Jedes fehlende Bild = 6-8 stat() Calls. Bei {sum(v for k,v in self.results["missing_files"].items() if "manufacturer" in k)} Zugriffen!',
                    'solution': '1. JTL-Shop Admin einloggen\n2. Bilder → Hersteller-Bilder → "Fehlende generieren"\n3. ODER: Bash Script ausführen (siehe Export)',
                    'files': [f"{self.output_dir}/create_placeholders.sh",
                             f"{self.output_dir}/missing_manufacturer_ids.txt"]
                })
                priority += 1

            if product_missing > 0:
                recommendations.append({
                    'priority': priority,
                    'severity': '⚠️ WICHTIG',
                    'problem': f'{product_missing} Produkt-Bilder fehlen',
                    'impact': 'Erhöhte I/O Last',
                    'solution': 'JTL-Shop Admin → Bilder → "Bildcache regenerieren"'
                })
                priority += 1

        stat_calls = sum(count for syscall, count in self.results['syscalls'].items()
                        if 'stat' in syscall.lower())
        if stat_calls > 500:
            recommendations.append({
                'priority': priority,
                'severity': '⚠️ WICHTIG',
                'problem': f'{stat_calls} Filesystem stat() Calls',
                'impact': 'Filesystem-Thrashing, langsame Response-Times',
                'solution': 'PHP Realpath Cache erhöhen in PHP-Einstellungen',
                'technical': 'Plesk → Domain → PHP Settings:\nrealpath_cache_size = 4096K\nrealpath_cache_ttl = 600'
            })
            priority += 1

        imagemagick_count = sum(1 for op in self.results['slow_operations'] if 'ImageMagick' in op)
        if imagemagick_count > 3:
            recommendations.append({
                'priority': priority,
                'severity': '🔥 KRITISCH',
                'problem': f'ImageMagick wird {imagemagick_count}x aufgerufen',
                'impact': 'CPU-intensive Bildverarbeitung bei jedem Request!',
                'solution': '1. Bild-Cache in JTL-Shop aktivieren\n2. Alle Bildgrößen vorher generieren\n3. Prüfen ob Bilder wirklich vorhanden sind'
            })
            priority += 1

        if len(self.results['mysql_queries']) > 50:
            recommendations.append({
                'priority': priority,
                'severity': '⚠️ WICHTIG',
                'problem': f'{len(self.results["mysql_queries"])} MySQL Queries',
                'impact': 'N+1 Query Problem, Database Overhead',
                'solution': 'JTL-Shop: System → Cache → Object Cache aktivieren (Redis)'
            })
            priority += 1

        eagain_count = self.results['errors'].get('EAGAIN', 0)
        if eagain_count > 100:
            recommendations.append({
                'priority': priority,
                'severity': '⚠️ WICHTIG',
                'problem': f'{eagain_count}x EAGAIN',
                'impact': 'Redis/MySQL Verbindungen überlastet',
                'solution': 'Redis Connection Pool erhöhen oder PHP-FPM Worker erhöhen'
            })
            priority += 1

        if recommendations:
            for rec in recommendations:
                print(f"{rec['severity']} PRIORITÄT {rec['priority']}: {rec['problem']}")
                print(f"   📊 Impact: {rec['impact']}")
                print(f"   ✅ Lösung: {rec['solution']}")
                if 'files' in rec:
                    print(f"   📁 Dateien:")
                    for file in rec['files']:
                        print(f"      • {file}")
                if 'technical' in rec:
                    lines = rec['technical'].split('\n')
                    print(f"   🔧 Technisch:")
                    for line in lines:
                        print(f"      {line}")
                print()
        else:
            print("✅ Keine kritischen Probleme gefunden!\n")

        print("="*80)
        print("📋 ZUSAMMENFASSUNG")
        print("="*80)
        print(f"  • Total Syscalls: {total_syscalls}")
        print(f"  • Fehlende Dateien: {missing_count}")
        print(f"  • MySQL Queries: {len(self.results['mysql_queries'])}")
        print(f"  • Redis Operations: {sum(self.results['redis_operations'].values())}")
        print(f"  • Handlungsempfehlungen: {len(recommendations)}")
        if missing_count > 0:
            print(f"\n  📁 Export-Verzeichnis: {self.output_dir}")
        print()

def main():
    if len(sys.argv) < 2:
        print("\n" + "="*80)
        print("JTL-Shop Performance Analyzer")
        print("="*80)
        print("\nUsage: python3 shop_analyzer.py <domain> [duration] [max_processes]")
        print("\nExamples:")
        print("  python3 shop_analyzer.py spiel-und-modellbau.com")
        print("  python3 shop_analyzer.py spiel-und-modellbau.com 10")
        print("  python3 shop_analyzer.py spiel-und-modellbau.com 10 20  # Max 20 Prozesse")
        print("\nParameter:")
        print("  domain        - Shop Domain")
        print("  duration      - Sekunden pro Prozess (default: 5)")
        print("  max_processes - Max Anzahl Prozesse (default: alle)")
        print()
        sys.exit(1)

    domain = sys.argv[1]
    duration = int(sys.argv[2]) if len(sys.argv) > 2 else 5
    max_processes = int(sys.argv[3]) if len(sys.argv) > 3 else None

    print(f"\n🚀 Starte Performance-Analyse für: {domain}")
    print(f"⏱️  Analyse-Dauer: {duration} Sekunden pro Prozess")
    if max_processes:
        print(f"🔢 Max Prozesse: {max_processes}")
    else:
        print(f"🔢 Prozesse: ALLE gefundenen PHP-FPM Worker")
    print()

    analyzer = ShopPerformanceAnalyzer(domain)

    pids = analyzer.get_php_fpm_pids()
    if not pids:
        print("❌ Keine PHP-FPM Prozesse gefunden!")
        sys.exit(1)

    # Limit anwenden falls gesetzt
    if max_processes and len(pids) > max_processes:
        print(f"✅ {len(pids)} PHP-FPM Prozesse gefunden (analysiere {max_processes})")
        pids = pids[:max_processes]
    else:
        print(f"✅ {len(pids)} PHP-FPM Prozesse gefunden (analysiere alle)")

    print(f"   PIDs: {pids}\n")

    # Progress bar setup
    total = len(pids)
    analyzed = 0
    failed = 0

    print("🔄 Analyse läuft...")
    print("-" * 80)

    for i, pid in enumerate(pids, 1):
        # Progress indicator
        percent = int((i / total) * 100)
        bar_length = 40
        filled = int((percent / 100) * bar_length)
        bar = '█' * filled + '░' * (bar_length - filled)

        print(f"\r[{bar}] {percent:3d}% | PID {pid:6d} ({i}/{total})", end='', flush=True)

        output = analyzer.run_strace(pid, duration)
        if output and len(output) > 100:
            analyzer.analyze_strace_output(output)
            analyzed += 1
        else:
            failed += 1

    print(f"\r[{'█' * bar_length}] 100% | Fertig!{' ' * 30}")
    print("-" * 80)
    print(f"\n✅ Analyse abgeschlossen!")
    print(f"   • Erfolgreich analysiert: {analyzed}/{total}")
    if failed > 0:
        print(f"   • Idle/Keine Daten: {failed}")
    print()

    if analyzed == 0:
        print("⚠️ Konnte kesine Daten sammeln!")
        print("   Shop hat gerade wenig Traffic. Versuche später nochmal oder erhöhe duration.\n")
        sys.exit(1)

    analyzer.generate_report()

if __name__ == "__main__":
    main()