Files
deep-shop-strace-analyse/straceanalyse.py

547 lines
24 KiB
Python

#!/usr/bin/env python3
"""
JTL-Shop Performance Analyzer - Mit Script-Detection (Complete Fixed Version)
"""
import subprocess
import re
import sys
import os
from collections import Counter, defaultdict
from datetime import datetime
class ShopPerformanceAnalyzer:
def __init__(self, domain):
self.domain = domain
self.results = {
'missing_files': Counter(),
'missing_files_context': {},
'syscalls': Counter(),
'mysql_queries': [],
'redis_operations': Counter(),
'slow_operations': [],
'file_paths': Counter(),
'errors': Counter()
}
self.debug = False
self.output_dir = f"/root/shop_analysis_{domain}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
def get_process_info(self, pid):
"""Hole Informationen ueber den Prozess"""
info = {
'cwd': None,
'request_uri': None,
'script_filename': None
}
try:
cwd_link = f'/proc/{pid}/cwd'
if os.path.exists(cwd_link):
info['cwd'] = os.readlink(cwd_link)
except:
pass
try:
with open(f'/proc/{pid}/environ', 'rb') as f:
environ = f.read().decode('utf-8', errors='ignore')
env_vars = environ.split('\x00')
for var in env_vars:
if var.startswith('REQUEST_URI='):
info['request_uri'] = var.split('=', 1)[1]
elif var.startswith('SCRIPT_FILENAME='):
info['script_filename'] = var.split('=', 1)[1]
except:
pass
return info
def get_php_fpm_pids(self):
"""Finde alle PHP-FPM PIDs fuer den Shop"""
try:
cmd = f"ps aux | grep 'php-fpm: pool {self.domain}' | grep -v grep | awk '{{print $2}}'"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
pids = [int(pid) for pid in result.stdout.strip().split('\n') if pid]
return pids
except Exception as e:
print(f"Fehler beim Finden der PIDs: {e}")
return []
def run_strace(self, pid, duration=5):
"""Fuehre strace auf einem Prozess aus"""
try:
cmd = [
'strace',
'-p', str(pid),
'-f',
'-s', '500',
'-e', 'trace=all',
'-T'
]
result = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
try:
stdout, stderr = result.communicate(timeout=duration)
except subprocess.TimeoutExpired:
result.kill()
stdout, stderr = result.communicate()
output = stderr if stderr else stdout
return output
except Exception as e:
return ""
def analyze_strace_output(self, output, pid):
"""Analysiere strace Output mit Context"""
if not output or len(output) < 10:
return
lines = output.split('\n')
proc_info = self.get_process_info(pid)
last_php_file = proc_info.get('script_filename') or 'unknown'
current_request = proc_info.get('request_uri') or 'unknown'
for line in lines:
if not line.strip():
continue
# Track PHP File opens
if 'openat' in line or 'open(' in line:
php_match = re.search(r'"([^"]+\.php)"', line)
if php_match:
last_php_file = php_match.group(1)
syscall_match = re.match(r'^(\w+)\(', line)
if syscall_match:
self.results['syscalls'][syscall_match.group(1)] += 1
if 'ENOENT' in line:
patterns = [
r'"([^"]+)".*ENOENT',
r'newfstatat\(.*?"([^"]+)".*ENOENT',
r'openat\(.*?"([^"]+)".*ENOENT',
]
for pattern in patterns:
file_match = re.search(pattern, line)
if file_match:
filepath = file_match.group(1)
self.results['missing_files'][filepath] += 1
self.results['errors']['ENOENT'] += 1
if filepath not in self.results['missing_files_context']:
self.results['missing_files_context'][filepath] = {
'count': 0,
'php_scripts': set(),
'requests': set(),
'pids': set()
}
context = self.results['missing_files_context'][filepath]
context['count'] += 1
if last_php_file:
context['php_scripts'].add(last_php_file)
if current_request:
context['requests'].add(current_request)
context['pids'].add(pid)
break
if 'EAGAIN' in line:
self.results['errors']['EAGAIN'] += 1
if any(keyword in line for keyword in ['SELECT', 'UPDATE', 'INSERT', 'DELETE']):
query_match = re.search(r'(SELECT|UPDATE|INSERT|DELETE)[^"]*', line)
if query_match:
query = query_match.group(0)[:100]
self.results['mysql_queries'].append(query)
redis_ops = ['SADD', 'GET', 'SET', 'HGET', 'HSET', 'EXEC', 'MULTI', 'QUEUED']
for op in redis_ops:
if op in line:
self.results['redis_operations'][op] += 1
path_match = re.search(r'"(/var/www/vhosts/[^"]+)"', line)
if path_match:
path = path_match.group(1)
if any(x in path for x in ['media', 'image', 'templates', 'cache']):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
self.results['file_paths'][short_path] += 1
if 'ImageMagick' in line or 'locale.xml' in line:
self.results['slow_operations'].append('ImageMagick/Config-Load')
if 'mremap' in line or 'mmap' in line:
size_match = re.search(r'(\d+)', line)
if size_match and int(size_match.group(1)) > 1000000:
self.results['slow_operations'].append('Large Memory Operation')
if 'poll' in line or 'select' in line:
time_match = re.search(r'<([\d.]+)>', line)
if time_match and float(time_match.group(1)) > 0.1:
self.results['slow_operations'].append(f'Slow I/O Wait ({time_match.group(1)}s)')
def export_missing_files(self):
"""Exportiere fehlende Dateien mit Script-Context"""
if not self.results['missing_files']:
return None
os.makedirs(self.output_dir, exist_ok=True)
# 1. Komplette Liste mit Context
list_file = os.path.join(self.output_dir, 'missing_files_all.txt')
with open(list_file, 'w') as f:
f.write(f"# Fehlende Dateien fuer {self.domain} - MIT SCRIPT CONTEXT\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"# Total: {len(self.results['missing_files'])} Dateien\n")
f.write(f"# Zugriffe: {sum(self.results['missing_files'].values())}\n")
f.write("#" + "="*70 + "\n\n")
for filepath, count in self.results['missing_files'].most_common():
f.write(f"\n{'='*70}\n")
f.write(f"[{count:4d}x] {filepath}\n")
f.write(f"{'='*70}\n")
if filepath in self.results['missing_files_context']:
ctx = self.results['missing_files_context'][filepath]
php_scripts = [s for s in ctx['php_scripts'] if s is not None]
real_scripts = [s for s in php_scripts if s != 'unknown']
if real_scripts:
f.write(f"\n Aufgerufen von:\n")
for script in sorted(real_scripts):
short_script = script.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
f.write(f" * {short_script}\n")
else:
f.write(f"\n Aufgerufen von: unknown (konnte nicht ermittelt werden)\n")
requests = [r for r in ctx['requests'] if r is not None and r != 'unknown']
if requests:
f.write(f"\n Bei Requests:\n")
for req in sorted(requests):
f.write(f" * {req}\n")
f.write(f"\n PIDs: {', '.join(map(str, sorted(ctx['pids'])))}\n")
f.write("\n")
# 2. Script-zu-Dateien Mapping
script_mapping_file = os.path.join(self.output_dir, 'missing_files_by_script.txt')
with open(script_mapping_file, 'w') as f:
f.write(f"# Fehlende Dateien gruppiert nach aufrufendem Script\n")
f.write(f"# Domain: {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
script_to_files = defaultdict(list)
for filepath, ctx in self.results['missing_files_context'].items():
php_scripts = [s for s in ctx['php_scripts'] if s is not None]
if not php_scripts:
php_scripts = ['unknown']
for script in php_scripts:
script_to_files[script].append((filepath, ctx['count']))
for script, files in sorted(script_to_files.items()):
if script and script != 'unknown':
short_script = script.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
else:
short_script = 'unknown (konnte nicht ermittelt werden)'
f.write(f"\n{'='*70}\n")
f.write(f"SCRIPT: {short_script}\n")
f.write(f"{'='*70}\n")
f.write(f"Anzahl fehlender Dateien: {len(files)}\n\n")
for filepath, count in sorted(files, key=lambda x: x[1], reverse=True):
short_path = filepath.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
f.write(f" [{count:4d}x] {short_path}\n")
# 3. Nach Kategorie sortiert
category_file = os.path.join(self.output_dir, 'missing_files_by_category.txt')
with open(category_file, 'w') as f:
f.write(f"# Fehlende Dateien nach Kategorie - {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
categories = defaultdict(list)
for filepath, count in self.results['missing_files'].items():
if 'manufacturer' in filepath:
categories['Hersteller-Bilder'].append((filepath, count))
elif 'product' in filepath or 'artikel' in filepath:
categories['Produkt-Bilder'].append((filepath, count))
elif 'variation' in filepath:
categories['Variationen-Bilder'].append((filepath, count))
elif 'ImageMagick' in filepath:
categories['ImageMagick Config'].append((filepath, count))
elif 'category' in filepath or 'kategorie' in filepath:
categories['Kategorie-Bilder'].append((filepath, count))
else:
categories['Sonstige'].append((filepath, count))
for category, items in sorted(categories.items()):
total_accesses = sum(count for _, count in items)
f.write(f"\n{'='*70}\n")
f.write(f"{category.upper()}\n")
f.write(f"{'='*70}\n")
f.write(f"Anzahl Dateien: {len(items)}\n")
f.write(f"Zugriffe gesamt: {total_accesses}\n\n")
for filepath, count in sorted(items, key=lambda x: x[1], reverse=True):
f.write(f"[{count:4d}x] {filepath}\n")
# 4. Bash Script
script_file = os.path.join(self.output_dir, 'create_placeholders.sh')
with open(script_file, 'w') as f:
f.write("#!/bin/bash\n")
f.write(f"# Auto-generated Script zum Erstellen fehlender Bilder\n")
f.write(f"# Domain: {self.domain}\n")
f.write(f"# Erstellt: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write('set -e\n\n')
f.write(f'SHOP_ROOT="/var/www/vhosts/{self.domain}/httpdocs"\n')
f.write('PLACEHOLDER="$SHOP_ROOT/gfx/keinBild.gif"\n\n')
f.write('if [ ! -f "$PLACEHOLDER" ]; then\n')
f.write(' echo "Fehler: Placeholder nicht gefunden: $PLACEHOLDER"\n')
f.write(' exit 1\n')
f.write('fi\n\n')
f.write('echo "Erstelle fehlende Dateien..."\n')
f.write('CREATED=0\n')
f.write('SKIPPED=0\n\n')
image_files = [fp for fp in self.results['missing_files'].keys()
if any(ext in fp.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp'])]
dirs_created = set()
for filepath in image_files:
if filepath.startswith('/var/www/vhosts'):
dirname = os.path.dirname(filepath)
if dirname not in dirs_created:
f.write(f'mkdir -p "{dirname}" 2>/dev/null || true\n')
dirs_created.add(dirname)
f.write(f'if [ ! -f "{filepath}" ]; then\n')
f.write(f' cp "$PLACEHOLDER" "{filepath}" 2>/dev/null && ((CREATED++)) || true\n')
f.write(f'else\n')
f.write(f' ((SKIPPED++))\n')
f.write(f'fi\n')
f.write('\necho ""\n')
f.write('echo "Fertig!"\n')
f.write('echo " Erstellt: $CREATED Platzhalter"\n')
f.write('echo " Uebersprungen: $SKIPPED (existieren bereits)"\n')
f.write(f'echo " Total Dateien: {len(image_files)}"\n')
os.chmod(script_file, 0o755)
# 5. CSV Export mit Script-Info
csv_file = os.path.join(self.output_dir, 'missing_files.csv')
with open(csv_file, 'w') as f:
f.write("Zugriffe,Kategorie,Dateipfad,Aufgerufen_von_Script\n")
for filepath, count in self.results['missing_files'].most_common():
if 'manufacturer' in filepath:
category = 'Hersteller'
elif 'product' in filepath or 'artikel' in filepath:
category = 'Produkt'
elif 'variation' in filepath:
category = 'Variation'
elif 'category' in filepath or 'kategorie' in filepath:
category = 'Kategorie'
else:
category = 'Sonstig'
filepath_safe = filepath.replace('"', '""')
scripts = 'unknown'
if filepath in self.results['missing_files_context']:
ctx = self.results['missing_files_context'][filepath]
php_scripts = [s for s in ctx['php_scripts'] if s is not None]
real_scripts = [s.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
for s in php_scripts if s != 'unknown']
if real_scripts:
scripts = '; '.join(real_scripts)
f.write(f'{count},{category},"{filepath_safe}","{scripts}"\n')
# 6. Hersteller IDs
manufacturer_file = os.path.join(self.output_dir, 'missing_manufacturer_ids.txt')
manufacturer_ids = set()
for filepath in self.results['missing_files'].keys():
if 'manufacturer' in filepath:
match = re.search(r'manufacturer/(\d+)/', filepath)
if match:
manufacturer_ids.add(match.group(1))
if manufacturer_ids:
with open(manufacturer_file, 'w') as f:
f.write(f"# Hersteller IDs mit fehlenden Bildern\n")
f.write(f"# Total: {len(manufacturer_ids)} Hersteller\n")
f.write(f"# Verwendung: Im JTL-Shop Admin diese Hersteller pruefen\n\n")
for mid in sorted(manufacturer_ids, key=int):
f.write(f"{mid}\n")
# 7. Nur Pfade
paths_only_file = os.path.join(self.output_dir, 'missing_files_paths_only.txt')
with open(paths_only_file, 'w') as f:
for filepath in self.results['missing_files'].keys():
f.write(f"{filepath}\n")
return self.output_dir
def generate_report(self):
"""Generiere Analyse-Report"""
print("\n" + "="*80)
print(f"PERFORMANCE ANALYSE: {self.domain}")
print(f"Datum: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80 + "\n")
total_syscalls = sum(self.results['syscalls'].values())
if total_syscalls == 0:
print("WARNUNG: Keine Syscalls aufgezeichnet!")
return
print("SYSCALL STATISTIK")
print("-" * 80)
for syscall, count in self.results['syscalls'].most_common(15):
percentage = (count / total_syscalls * 100) if total_syscalls > 0 else 0
bar = '#' * int(percentage / 2)
print(f" {syscall:20s}: {count:6d} ({percentage:5.1f}%) {bar}")
print()
missing_count = len(self.results['missing_files'])
if missing_count > 0:
print("FEHLENDE DATEIEN (ENOENT) - MIT SCRIPT CONTEXT")
print("-" * 80)
print(f" WARNUNG: {missing_count} verschiedene Dateien nicht gefunden!")
print(f" WARNUNG: {sum(self.results['missing_files'].values())} Zugriffe\n")
print(" Top 10 fehlende Dateien (mit aufrufendem Script):")
for path, count in list(self.results['missing_files'].most_common(10)):
short_path = path.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
print(f"\n [{count:3d}x] {short_path}")
if path in self.results['missing_files_context']:
ctx = self.results['missing_files_context'][path]
php_scripts = [s for s in ctx['php_scripts'] if s is not None]
real_scripts = [s for s in php_scripts if s != 'unknown']
if real_scripts:
print(f" Aufgerufen von:")
for script in list(real_scripts)[:3]:
short_script = script.replace(f'/var/www/vhosts/{self.domain}/httpdocs/', '')
print(f" -> {short_script}")
if len(real_scripts) > 3:
print(f" -> ... und {len(real_scripts)-3} weitere")
else:
print(f" Aufgerufen von: unknown (konnte nicht ermittelt werden)")
if len(self.results['missing_files']) > 10:
print(f"\n INFO: ... und {len(self.results['missing_files'])-10} weitere")
print(f"\n INFO: Vollstaendige Liste siehe Export-Dateien!")
print()
if self.results['errors']:
print("FEHLER")
print("-" * 80)
for error, count in self.results['errors'].items():
print(f" {error:20s}: {count:6d}x")
print()
if missing_count > 0:
print("="*80)
print("EXPORTIERE FEHLENDE DATEIEN")
print("="*80 + "\n")
export_dir = self.export_missing_files()
if export_dir:
print(f"Dateien exportiert nach: {export_dir}\n")
print(" Erstellt:")
print(f" missing_files_all.txt - Komplette Liste MIT Script-Context")
print(f" missing_files_by_script.txt - Gruppiert nach PHP-Script")
print(f" missing_files_by_category.txt - Nach Kategorie gruppiert")
print(f" missing_files.csv - CSV mit Script-Info")
print(f" create_placeholders.sh - Bash Script")
print(f" missing_manufacturer_ids.txt - Hersteller IDs")
print(f" missing_files_paths_only.txt - Nur Pfade\n")
print(" Quick-Fix:")
print(f" bash {export_dir}/create_placeholders.sh\n")
print("="*80)
print("ZUSAMMENFASSUNG")
print("="*80)
print(f" * Total Syscalls: {total_syscalls}")
print(f" * Fehlende Dateien: {missing_count}")
print(f" * MySQL Queries: {len(self.results['mysql_queries'])}")
if missing_count > 0:
print(f"\n Export-Verzeichnis: {self.output_dir}")
print()
def main():
if len(sys.argv) < 2:
print("\n" + "="*80)
print("JTL-Shop Performance Analyzer - Mit Script Detection")
print("="*80)
print("\nUsage: python3 shop_analyzer.py <domain> [duration] [max_processes]")
print("\nExamples:")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10")
print(" python3 shop_analyzer.py spiel-und-modellbau.com 10 20")
print()
sys.exit(1)
domain = sys.argv[1]
duration = int(sys.argv[2]) if len(sys.argv) > 2 else 5
max_processes = int(sys.argv[3]) if len(sys.argv) > 3 else None
print(f"\nStarte Performance-Analyse mit Script-Detection fuer: {domain}")
print(f"Analyse-Dauer: {duration} Sekunden pro Prozess\n")
analyzer = ShopPerformanceAnalyzer(domain)
pids = analyzer.get_php_fpm_pids()
if not pids:
print("Keine PHP-FPM Prozesse gefunden!")
sys.exit(1)
if max_processes and len(pids) > max_processes:
pids = pids[:max_processes]
print(f"{len(pids)} PHP-FPM Prozesse gefunden\n")
print("Analyse laeuft...")
print("-" * 80)
total = len(pids)
analyzed = 0
for i, pid in enumerate(pids, 1):
percent = int((i / total) * 100)
bar_length = 40
filled = int((percent / 100) * bar_length)
bar = '#' * filled + '-' * (bar_length - filled)
print(f"\r[{bar}] {percent:3d}% | PID {pid:6d} ({i}/{total})", end='', flush=True)
output = analyzer.run_strace(pid, duration)
if output and len(output) > 100:
analyzer.analyze_strace_output(output, pid)
analyzed += 1
print(f"\r[{'#' * bar_length}] 100% | Fertig!{' ' * 30}")
print("-" * 80)
print(f"\nAnalyse abgeschlossen! ({analyzed} Prozesse erfolgreich)\n")
if analyzed == 0:
print("Konnte keine Daten sammeln!\n")
sys.exit(1)
analyzer.generate_report()
if __name__ == "__main__":
main()