# modules/backups.py import os import glob import subprocess import json import hashlib import tarfile import re from config import ( RUNNING_ON_MAIN_SERVER, MAIN_SERVER_IP, MAIN_SERVER_USER, MAIN_SERVER_KEY, MAIN_SERVER_PORT, VM_HOST, VM_PORT, VM_KEY, VM_USER, ) def _run(cmd, timeout=30): try: r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout) return r.stdout.strip(), r.stderr.strip() except Exception as e: return '', str(e) def _human_bytes(n): n = int(n) if n < 1024: return f'{n} B' if n < 1024 ** 2: return f'{n / 1024:.1f} KB' if n < 1024 ** 3: return f'{n / (1024 ** 2):.1f} MB' if n < 1024 ** 4: return f'{n / (1024 ** 3):.2f} GB' return f'{n / (1024 ** 4):.2f} TB' def _ssh_main(remote_cmd, timeout=30): if RUNNING_ON_MAIN_SERVER: return _run(remote_cmd, timeout=timeout) else: escaped = remote_cmd.replace("'", "'\\''") ssh = ( f"ssh -i {MAIN_SERVER_KEY} -p {MAIN_SERVER_PORT} " f"-o StrictHostKeyChecking=no -o ConnectTimeout=10 " f"-o BatchMode=yes " f"{MAIN_SERVER_USER}@{MAIN_SERVER_IP}" ) return _run(f"{ssh} '{escaped}'", timeout=timeout) # ──────────────────────────────────────────────────────────────── # BACKUPS # ──────────────────────────────────────────────────────────────── def get_local_backups(): stdout, _ = _ssh_main( "ls -t /root/backups/myapps-backup-*.tar.gz 2>/dev/null | head -20" ) files = [] if stdout: for line in stdout.split('\n'): line = line.strip() if line: files.append(os.path.basename(line)) return files def get_vm_backups(): vm_backups = [] if RUNNING_ON_MAIN_SERVER: try: cmd = ( f"ssh -i {VM_KEY} -p {VM_PORT} " f"-o StrictHostKeyChecking=no -o ConnectTimeout=10 " f"-o BatchMode=yes " f"{VM_USER}@{VM_HOST} " f"'ls -t /backups/main-server/myapps-backup-*.tar.gz 2>/dev/null | head -20'" ) stdout, _ = _run(cmd, timeout=25) if stdout: for line in stdout.split('\n'): line = line.strip() if line and '.tar.gz' in line: vm_backups.append(os.path.basename(line)) except Exception as e: print(f"[backups] VM backup fetch error: {e}") else: backup_dir = '/backups/main-server' if os.path.exists(backup_dir): files = glob.glob(f'{backup_dir}/myapps-backup-*.tar.gz') files.sort(key=os.path.getmtime, reverse=True) vm_backups = [os.path.basename(f) for f in files[:20]] return vm_backups # ──────────────────────────────────────────────────────────────── # BACKUP HEALTH AUDIT # ──────────────────────────────────────────────────────────────── def audit_backup(backup_file, source='local'): checks = [] def add(name, status, detail='', more=None): entry = {'name': name, 'status': status, 'detail': detail} if more: entry['more'] = more checks.append(entry) if source == 'local': archive_path = f"/root/backups/{backup_file}" else: archive_path = f"/backups/main-server/{backup_file}" if not RUNNING_ON_MAIN_SERVER and source == 'local': tmp_path = f"/tmp/audit_{backup_file}" if not os.path.exists(tmp_path): pull_cmd = ( f"scp -i {MAIN_SERVER_KEY} -P {MAIN_SERVER_PORT} " f"-o StrictHostKeyChecking=no -o ConnectTimeout=15 " f"{MAIN_SERVER_USER}@{MAIN_SERVER_IP}:/root/backups/{backup_file} " f"{tmp_path}" ) out, err = _run(pull_cmd, timeout=120) if not os.path.exists(tmp_path): return { 'ok': False, 'score': 0, 'backup_file': backup_file, 'file_size_bytes': None, 'file_size_display': None, 'health_tier': 'critical', 'health_label': 'Unhealthy', 'checks': [{'name': 'File Access', 'status': 'fail', 'detail': f'Could not pull from main server: {err}'}], 'summary': 'Cannot access backup file from this host.' } archive_path = tmp_path if not os.path.exists(archive_path): add('File Exists', 'fail', f'Not found: {archive_path}') return { 'ok': False, 'score': 0, 'checks': checks, 'backup_file': backup_file, 'file_size_bytes': None, 'file_size_display': None, 'health_tier': 'critical', 'health_label': 'Unhealthy', 'summary': 'Backup file does not exist on disk.', } add('File Exists', 'pass', archive_path) size_bytes = os.path.getsize(archive_path) size_mb = size_bytes / (1024 * 1024) size_human = _human_bytes(size_bytes) size_more = [ f'Exact size: {size_bytes:,} bytes ({size_human})', 'We flag archives under 1 MB as corrupt and under ~50 MB as unusually small for a full stack backup.', ] if size_bytes < 1024 * 1024: add('File Size', 'fail', f'{size_human} — suspiciously tiny, likely corrupt', more=size_more) elif size_mb < 50: add('File Size', 'warn', f'{size_human} — smaller than expected (typical full backup > 50 MB)', more=size_more) else: add('File Size', 'pass', f'{size_human} — within expected range', more=size_more) sha_file = archive_path + '.sha256' if os.path.exists(sha_file): try: with open(sha_file, 'r') as f: expected_hash = f.read().split()[0].strip() actual_hash = _sha256_file(archive_path) if actual_hash == expected_hash: add('Checksum (SHA256)', 'pass', f'Hash verified — {actual_hash[:20]}…') else: add('Checksum (SHA256)', 'fail', f'MISMATCH — expected {expected_hash[:20]}… got {actual_hash[:20]}…') except Exception as e: add('Checksum (SHA256)', 'warn', f'Could not verify: {e}') else: add('Checksum (SHA256)', 'warn', 'No .sha256 sidecar found — run a new backup to get checksums') try: result = subprocess.run( ['gzip', '--test', archive_path], capture_output=True, text=True, timeout=120 ) if result.returncode == 0: add('Archive Integrity', 'pass', 'gzip test passed — archive is not corrupted', more=[ 'Runs gzip --test on the .tar.gz so the compressed stream is readable end-to-end.', ]) else: add('Archive Integrity', 'fail', f'gzip test failed: {(result.stderr or result.stdout)[:200]}') except FileNotFoundError: try: import gzip with gzip.open(archive_path, 'rb') as f: f.read(1024 * 1024) add('Archive Integrity', 'pass', 'gzip header valid') except Exception as e: add('Archive Integrity', 'fail', f'Archive appears corrupt: {e}') except subprocess.TimeoutExpired: add('Archive Integrity', 'warn', 'Integrity check timed out — file is large, probably OK') except Exception as e: add('Archive Integrity', 'warn', f'Could not test: {e}') members = [] try: with tarfile.open(archive_path, 'r:gz') as tf: members = tf.getnames() except Exception: pass if members: has_volumes = any('volumes/' in m for m in members) has_info = any('backup-info.txt' in m for m in members) has_compose = any('compose-files/' in m for m in members) vol_count = len([m for m in members if '/volumes/' in m and m.endswith('.tar.gz')]) issues = [] if not has_volumes: issues.append('volumes/ missing') if not has_info: issues.append('backup-info.txt missing') if not issues: detail = f'volumes/ ✓ backup-info.txt ✓' if has_compose: detail += ' compose-files/ ✓' detail += f' ({vol_count} volume archives)' add('Internal Structure', 'pass', detail) else: add('Internal Structure', 'fail', ' · '.join(issues)) else: add('Internal Structure', 'warn', 'Could not inspect archive members') SUSPICIOUS = [ (r'\.\./', 'path traversal (..)'), (r'^/', 'absolute path in archive'), (r'/etc/passwd', '/etc/passwd reference'), (r'/etc/shadow', '/etc/shadow reference'), (r'\.ssh/', '.ssh directory reference'), (r'id_rsa(?!\.pub)', 'private SSH key reference'), (r'authorized_keys', 'authorized_keys reference'), ] found_suspicious = [] for m in members: for pat, label in SUSPICIOUS: if re.search(pat, m): found_suspicious.append(f'{m} ({label})') break if found_suspicious: add('Security Scan', 'fail', f'Suspicious entries found: {found_suspicious[:3]}') else: add('Security Scan', 'pass', 'No path traversal or dangerous entries detected', more=[ 'Member paths are checked for .. segments, absolute roots, and sensitive paths ' '(e.g. .ssh, /etc/shadow).', ]) SCRIPT_EXTENSIONS = ('.sh', '.py', '.pl', '.rb', '.bash', '.zsh') SAFE_PREFIXES = ( 'compose-files/', 'volumes/', 'container-configs/', 'configs/', ) suspicious_scripts = [] try: with tarfile.open(archive_path, 'r:gz') as tf: for member in tf.getmembers(): if not member.isfile(): continue name = member.name if any(name.startswith(p) or f'/{p}' in name for p in SAFE_PREFIXES): continue name_lower = name.lower() has_script_ext = any(name_lower.endswith(ext) for ext in SCRIPT_EXTENSIONS) has_exec_bit = bool(member.mode & 0o111) if has_script_ext and has_exec_bit: suspicious_scripts.append(os.path.basename(name)) except Exception: pass if suspicious_scripts: add('Executable Scripts', 'warn', f'Scripts with execute bit outside expected dirs: {suspicious_scripts[:3]}') else: add('Executable Scripts', 'pass', 'No unexpected executable scripts found') vol_archives = [m for m in members if 'volumes/' in m and m.endswith('.tar.gz')] v = len(vol_archives) if v == 0: add('Volume Count', 'fail', 'No volume archives found in backup') elif v < 5: add('Volume Count', 'warn', f'Only {v} volumes (expected ≥5 for a full backup)') else: add('Volume Count', 'pass', f'{v} volume archives present') weights = {'pass': 10, 'warn': 5, 'fail': 0} total = len(checks) * 10 earned = sum(weights.get(c['status'], 0) for c in checks) score = int((earned / total) * 100) if total > 0 else 0 has_fails = any(c['status'] == 'fail' for c in checks) ok = not has_fails and score >= 60 if score >= 90: summary = 'Backup looks healthy and is safe to restore.' elif score >= 70: summary = 'Minor warnings — likely safe, but review before restoring.' elif score >= 40: summary = 'Significant issues detected — restore with caution.' else: summary = 'Multiple checks failed — do NOT restore without manual inspection.' has_warns = any(c['status'] == 'warn' for c in checks) if has_fails: health_tier = 'critical' health_label = 'Unhealthy' elif score == 100: health_tier = 'excellent' health_label = '100% healthy' elif score >= 90: health_tier = 'good' health_label = 'Healthy' if not has_warns else 'Healthy (with notes)' elif score >= 70: health_tier = 'fair' health_label = 'Mostly healthy' elif score >= 40: health_tier = 'poor' health_label = 'At risk' else: health_tier = 'critical' health_label = 'Unhealthy' return { 'ok': ok, 'score': score, 'checks': checks, 'summary': summary, 'backup_file': backup_file, 'file_size_bytes': size_bytes, 'file_size_display': size_human, 'health_tier': health_tier, 'health_label': health_label, } def _sha256_file(path): h = hashlib.sha256() with open(path, 'rb') as f: for chunk in iter(lambda: f.read(65536), b''): h.update(chunk) return h.hexdigest() # ──────────────────────────────────────────────────────────────── # DELETE BACKUP # ──────────────────────────────────────────────────────────────── def delete_backup(backup_file, source='local'): if not re.match(r'^myapps-backup-\d{8}_\d{6}\.tar\.gz$', backup_file): return False, f'Invalid backup filename: {backup_file}' if source == 'local': if RUNNING_ON_MAIN_SERVER: archive_path = f"/root/backups/{backup_file}" if not os.path.exists(archive_path): return False, f'File not found: {archive_path}' os.remove(archive_path) sha = archive_path + '.sha256' if os.path.exists(sha): os.remove(sha) return True, f'Deleted {backup_file} from main server' else: cmd = f"rm -f /root/backups/{backup_file} /root/backups/{backup_file}.sha256" out, err = _ssh_main(cmd) if err and 'No such file' not in err: return False, f'Remote delete error: {err}' return True, f'Deleted {backup_file} from main server' elif source == 'vm': archive_path = f"/backups/main-server/{backup_file}" if not RUNNING_ON_MAIN_SERVER: if not os.path.exists(archive_path): return False, f'File not found: {archive_path}' os.remove(archive_path) sha = archive_path + '.sha256' if os.path.exists(sha): os.remove(sha) return True, f'Deleted {backup_file} from VM' else: cmd = ( f"ssh -i {VM_KEY} -p {VM_PORT} " f"-o StrictHostKeyChecking=no -o ConnectTimeout=10 " f"-o BatchMode=yes " f"{VM_USER}@{VM_HOST} " f"'rm -f /backups/main-server/{backup_file} " f"/backups/main-server/{backup_file}.sha256'" ) out, err = _run(cmd, timeout=30) if err and 'No such file' not in err: return False, f'VM delete error: {err}' return True, f'Deleted {backup_file} from VM' return False, 'Unknown source' # ──────────────────────────────────────────────────────────────── # BACKUP STATUS LOG # ──────────────────────────────────────────────────────────────── def get_backup_log_entries(limit=20): stdout, _ = _ssh_main( f"tail -n {limit} /root/backups/backup-status.log 2>/dev/null || echo ''" ) entries = [] if not stdout: return entries for line in stdout.strip().split('\n'): if not line.strip(): continue parts = line.split('|') entries.append({ 'timestamp': parts[0].strip() if len(parts) > 0 else '', 'status': parts[1].strip() if len(parts) > 1 else '', 'name': parts[2].strip() if len(parts) > 2 else '', 'message': parts[3].strip() if len(parts) > 3 else '', }) return list(reversed(entries)) def get_backup_script_path(): candidates = ['/root/backup-myapps.sh'] for p in candidates: out, _ = _ssh_main(f"[ -f {p} ] && echo yes || echo no") if out.strip() == 'yes': return p return None # ──────────────────────────────────────────────────────────────── # CONTAINERS # ──────────────────────────────────────────────────────────────── def _parse_containers(raw, owner='root'): containers = [] if raw: for line in raw.split('\n'): if '|' not in line: continue parts = line.split('|') containers.append({ 'name': parts[0].strip(), 'status': parts[1].strip() if len(parts) > 1 else '', 'image': parts[2].strip() if len(parts) > 2 else '', 'ports': parts[3].strip() if len(parts) > 3 else '', 'owner': owner, }) return containers def get_containers(): stdout, _ = _ssh_main( "docker ps -a --format '{{.Names}}|{{.Status}}|{{.Image}}|{{.Ports}}' 2>/dev/null | " "grep -E 'frappe|nextcloud|mautic|n8n|odoo'" ) return _parse_containers(stdout) def get_all_root_containers(): stdout, _ = _ssh_main( "docker ps -a --format '{{.Names}}|{{.Status}}|{{.Image}}|{{.Ports}}' 2>/dev/null" ) return _parse_containers(stdout) def get_rootless_user_containers_remote(): cmd = "ls /run/user/*/docker.sock 2>/dev/null" stdout, _ = _ssh_main(cmd) containers = [] if not stdout: return containers for sock_path in stdout.split('\n'): sock_path = sock_path.strip() if not sock_path: continue try: uid = sock_path.split('/run/user/')[1].split('/')[0] except (IndexError, ValueError): continue name_out, _ = _ssh_main(f"getent passwd {uid} | cut -d: -f1") username = name_out.strip() or f"uid{uid}" ctr_out, _ = _ssh_main( f"DOCKER_HOST=unix://{sock_path} " f"docker ps -a --format '{{{{.Names}}}}|{{{{.Status}}}}|{{{{.Image}}}}|{{{{.Ports}}}}' 2>/dev/null" ) containers.extend(_parse_containers(ctr_out, owner=username)) return containers # ──────────────────────────────────────────────────────────────── # CONTAINER ACTIONS # ──────────────────────────────────────────────────────────────── def container_action(container_name, action): if action not in ('start', 'stop', 'restart'): return False, "Invalid action" safe_name = container_name.replace('"', '').replace(';', '').replace('|', '') stdout, stderr = _ssh_main(f"docker {action} {safe_name} 2>&1", timeout=30) output = (stdout + stderr).strip() return True, output def get_container_status(container_name): safe_name = container_name.replace('"', '').replace(';', '').replace('|', '') stdout, _ = _ssh_main( f"docker inspect --format='{{{{.State.Status}}}}' {safe_name} 2>/dev/null" ) raw = stdout.strip().lower() if raw in ('running', 'restarting'): status = 'running' elif raw in ('exited', 'stopped', 'dead', 'paused'): status = 'stopped' else: status = 'unknown' return {'name': container_name, 'status': status, 'raw': raw} # ──────────────────────────────────────────────────────────────── # STATS # ──────────────────────────────────────────────────────────────── def get_container_stats_remote(): stdout, _ = _ssh_main( "docker stats --no-stream --format " "'{{.Name}}|{{.CPUPerc}}|{{.MemUsage}}|{{.MemPerc}}|{{.NetIO}}|{{.BlockIO}}' 2>/dev/null", timeout=35 ) stats = {} if stdout: for line in stdout.split('\n'): if '|' not in line: continue parts = line.split('|') if len(parts) < 6: continue name = parts[0].strip() stats[name] = { 'cpu': parts[1].strip(), 'mem': parts[2].strip(), 'mem_pct': parts[3].strip(), 'net': parts[4].strip(), 'block': parts[5].strip(), } return stats def get_all_stats(): all_stats = get_container_stats_remote() socks_out, _ = _ssh_main("ls /run/user/*/docker.sock 2>/dev/null") if socks_out: for sock in socks_out.split('\n'): sock = sock.strip() if not sock: continue stdout, _ = _ssh_main( f"DOCKER_HOST=unix://{sock} " f"docker stats --no-stream --format " f"'{{{{.Name}}}}|{{{{.CPUPerc}}}}|{{{{.MemUsage}}}}|{{{{.MemPerc}}}}|{{{{.NetIO}}}}|{{{{.BlockIO}}}}' 2>/dev/null", timeout=35 ) if stdout: for line in stdout.split('\n'): if '|' not in line: continue parts = line.split('|') if len(parts) < 6: continue all_stats[parts[0].strip()] = { 'cpu': parts[1].strip(), 'mem': parts[2].strip(), 'mem_pct': parts[3].strip(), 'net': parts[4].strip(), 'block': parts[5].strip(), } return all_stats # ──────────────────────────────────────────────────────────────── # SYSTEM INFO — single batched SSH call # ──────────────────────────────────────────────────────────────── def get_system_info(): """ Collect all system metrics in a SINGLE SSH call instead of 8 separate ones. Emits a pipe-delimited line: cpu|mem|mem_pct|disk|disk_pct|load|uptime|docker_v|hostname """ batch_cmd = ( "printf '%s|%s|%s|%s|%s|%s|%s|%s|%s\\n' " "\"$(top -bn1 | grep 'Cpu(s)' | awk '{print $2+$4}')\" " "\"$(free -m | awk 'NR==2{printf \"%s/%sMB\", $3, $2}')\" " "\"$(free | awk 'NR==2{printf \"%.0f\", $3/$2*100}')\" " "\"$(df -h / | awk 'NR==2{printf \"%s/%s\", $3, $2}')\" " "\"$(df / | awk 'NR==2{print $5}' | tr -d '%')\" " "\"$(cat /proc/loadavg | awk '{print $1, $2, $3}')\" " "\"$(uptime -p)\" " "\"$(docker --version 2>/dev/null | cut -d' ' -f3 | tr -d ',')\" " "\"$(hostname -f 2>/dev/null || hostname)\"" ) stdout, stderr = _ssh_main(batch_cmd, timeout=20) # Parse the pipe-delimited result if stdout and '|' in stdout: # Use the last line in case there's extra output for line in reversed(stdout.splitlines()): line = line.strip() if '|' in line: parts = line.split('|') if len(parts) >= 9: return { 'cpu_pct': parts[0] or '0', 'memory': parts[1] or 'N/A', 'mem_pct': parts[2] or '0', 'disk': parts[3] or 'N/A', 'disk_pct': parts[4] or '0', 'load': parts[5] or 'N/A', 'uptime': parts[6] or 'N/A', 'docker_v': parts[7] or 'N/A', 'hostname': parts[8] or 'main server', } # Fallback: individual calls if batch failed cpu_out, _ = _ssh_main("top -bn1 | grep 'Cpu(s)' | awk '{print $2+$4}'") mem_out, _ = _ssh_main("free -m | awk 'NR==2{printf \"%s/%sMB\", $3, $2}'") mem_pct, _ = _ssh_main("free | awk 'NR==2{printf \"%.0f\", $3/$2*100}'") disk_out, _ = _ssh_main("df -h / | awk 'NR==2{printf \"%s/%s\", $3, $2}'") disk_pct, _ = _ssh_main("df / | awk 'NR==2{print $5}' | tr -d '%'") load_out, _ = _ssh_main("cat /proc/loadavg | awk '{print $1, $2, $3}'") uptime, _ = _ssh_main("uptime -p") docker_v, _ = _ssh_main("docker --version | cut -d' ' -f3 | tr -d ','") hostname, _ = _run("hostname -f 2>/dev/null || hostname") return { 'cpu_pct': cpu_out or '0', 'memory': mem_out or 'N/A', 'mem_pct': mem_pct or '0', 'disk': disk_out or 'N/A', 'disk_pct': disk_pct or '0', 'load': load_out or 'N/A', 'uptime': uptime or 'N/A', 'docker_v': docker_v or 'N/A', 'hostname': hostname or 'main server', }