fix: use psutil for reliable CPU and memory metrics

This commit is contained in:
2026-06-04 15:24:02 +01:00
parent 47600ccd42
commit b3d3c0d457
2 changed files with 156 additions and 99 deletions

View File

@@ -6,6 +6,9 @@ import json
import hashlib
import tarfile
import re
import time
import socket
import psutil
from config import (
RUNNING_ON_MAIN_SERVER,
MAIN_SERVER_IP, MAIN_SERVER_USER, MAIN_SERVER_KEY, MAIN_SERVER_PORT,
@@ -588,109 +591,162 @@ def get_all_stats():
# ────────────────────────────────────────────────────────────────
# SYSTEM INFO
# SYSTEM INFO (FIXED WITH PSUTIL)
# ────────────────────────────────────────────────────────────────
def get_system_info():
if RUNNING_ON_MAIN_SERVER:
"""Get system information using psutil for reliable metrics inside container"""
info = {
'cpu_pct': '0',
'memory': 'N/A',
'mem_pct': '0',
'disk': 'N/A',
'disk_pct': '0',
'load': 'N/A',
'uptime': 'N/A',
'docker_v': 'N/A',
'hostname': socket.gethostname()
}
try:
# CPU usage - psutil handles /proc access properly
info['cpu_pct'] = str(psutil.cpu_percent(interval=0.5))
# Memory usage
mem = psutil.virtual_memory()
mem_used_gb = mem.used / (1024**3)
mem_total_gb = mem.total / (1024**3)
info['memory'] = f"{mem_used_gb:.1f}G/{mem_total_gb:.1f}G"
info['mem_pct'] = str(int(mem.percent))
# Load average
try:
import time
# ── CPU — two reads with delta (single snapshot always gives ~0%) ──
def _read_cpu():
with open('/proc/stat') as f:
fields = f.readline().split()
idle = int(fields[4])
total = sum(int(x) for x in fields[1:])
return idle, total
idle1, total1 = _read_cpu()
time.sleep(0.5)
idle2, total2 = _read_cpu()
idle_delta = idle2 - idle1
total_delta = total2 - total1
cpu_pct = round(100 * (1 - idle_delta / total_delta), 1) if total_delta else 0.0
# ── Memory ──────────────────────────────────────────────────────────
mem = {}
with open('/proc/meminfo') as f:
for line in f:
parts = line.split(':')
if len(parts) == 2:
mem[parts[0].strip()] = int(parts[1].strip().split()[0])
mem_total_mb = mem['MemTotal'] // 1024
mem_used_mb = (mem['MemTotal'] - mem['MemAvailable']) // 1024
mem_pct = round(100 * mem_used_mb / mem_total_mb) if mem_total_mb else 0
# ── Disk ─────────────────────────────────────────────────────────────
import shutil
disk = shutil.disk_usage('/')
disk_used_gb = round(disk.used / 1024**3, 1)
disk_total_gb = round(disk.total / 1024**3, 1)
disk_pct = round(100 * disk.used / disk.total)
# ── Load ─────────────────────────────────────────────────────────────
with open('/proc/loadavg') as f:
load = psutil.getloadavg()
info['load'] = f"{load[0]:.2f} {load[1]:.2f} {load[2]:.2f}"
except AttributeError:
# Fallback for systems without getloadavg
with open('/proc/loadavg', 'r') as f:
load = f.read().split()[:3]
# ── Uptime ───────────────────────────────────────────────────────────
with open('/proc/uptime') as f:
secs = int(float(f.read().split()[0]))
days = secs // 86400
hrs = (secs % 86400) // 3600
mins = (secs % 3600) // 60
uptime = f"up {days}d {hrs}h {mins}m" if days else f"up {hrs}h {mins}m"
# ── Docker version + hostname ─────────────────────────────────────────
docker_v, _ = _run("docker --version 2>/dev/null | cut -d' ' -f3 | tr -d ','")
hostname, _ = _run("cat /proc/sys/kernel/hostname 2>/dev/null || hostname")
return {
'cpu_pct': str(cpu_pct),
'memory': f"{mem_used_mb}/{mem_total_mb}MB",
'mem_pct': str(mem_pct),
'disk': f"{disk_used_gb}G/{disk_total_gb}G",
'disk_pct': str(disk_pct),
'load': ' '.join(load),
'uptime': uptime,
'docker_v': docker_v or 'N/A',
'hostname': hostname.strip() or 'main server',
}
info['load'] = ' '.join(load)
# Disk usage
try:
disk = psutil.disk_usage('/')
disk_used_gb = disk.used / (1024**3)
disk_total_gb = disk.total / (1024**3)
info['disk'] = f"{disk_used_gb:.0f}G/{disk_total_gb:.0f}G"
info['disk_pct'] = str(int(disk.percent))
except Exception as e:
print(f"[system_info] /proc read failed: {e}")
# fall through to SSH method below
print(f"Disk read error: {e}")
# Uptime
try:
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
days = int(uptime_seconds // 86400)
hours = int((uptime_seconds % 86400) // 3600)
minutes = int((uptime_seconds % 3600) // 60)
info['uptime'] = f"{days}d {hours}h {minutes}m" if days > 0 else f"{hours}h {minutes}m"
except Exception as e:
print(f"Uptime read error: {e}")
# Docker version
try:
docker_v = subprocess.check_output(['docker', '--version'], stderr=subprocess.DEVNULL, text=True)
info['docker_v'] = docker_v.split()[-1].strip(',')
except Exception as e:
print(f"Docker version error: {e}")
except Exception as e:
print(f"System info error: {e}")
# Fallback to /proc method if psutil fails
info = _get_system_info_fallback()
return info
# ── Fallback: SSH to main server (used when RUNNING_ON_MAIN_SERVER = False) ──
cmd = (
"CPU=$(top -bn2 -d0.5 | grep 'Cpu(s)' | tail -1 | "
"awk '{print 100 - $8}' 2>/dev/null || echo 'N/A'); "
"MEM=$(free -m | awk 'NR==2{printf \"%s/%sMB\", $3, $2}'); "
"MEM_PCT=$(free -m | awk 'NR==2{printf \"%d\", $3/$2*100}'); "
"DISK=$(df -h / | awk 'NR==2{print $3\"/\"$2}'); "
"DISK_PCT=$(df / | awk 'NR==2{print $5}' | tr -d '%'); "
"LOAD=$(cat /proc/loadavg | awk '{print $1, $2, $3}'); "
"UPTIME=$(uptime -p 2>/dev/null || uptime); "
"DOCKER=$(docker --version 2>/dev/null | cut -d' ' -f3 | tr -d ','); "
"HOST=$(hostname); "
"echo \"$CPU|$MEM|$MEM_PCT|$DISK|$DISK_PCT|$LOAD|$UPTIME|$DOCKER|$HOST\""
)
stdout, stderr = _ssh_main(cmd, timeout=20)
if not stdout:
return {}
parts = stdout.split('|')
if len(parts) < 9:
return {}
return {
'cpu_pct': parts[0].strip(),
'memory': parts[1].strip(),
'mem_pct': parts[2].strip(),
'disk': parts[3].strip(),
'disk_pct': parts[4].strip(),
'load': parts[5].strip(),
'uptime': parts[6].strip(),
'docker_v': parts[7].strip() or 'N/A',
'hostname': parts[8].strip() or 'main server',
}
def _get_system_info_fallback():
"""Fallback method using /proc directly when psutil is not available"""
info = {
'cpu_pct': '0',
'memory': 'N/A',
'mem_pct': '0',
'disk': 'N/A',
'disk_pct': '0',
'load': 'N/A',
'uptime': 'N/A',
'docker_v': 'N/A',
'hostname': socket.gethostname()
}
try:
# CPU calculation with delta
def _read_cpu():
with open('/proc/stat', 'r') as f:
fields = f.readline().split()
idle = int(fields[4])
total = sum(int(x) for x in fields[1:])
return idle, total
idle1, total1 = _read_cpu()
time.sleep(0.5)
idle2, total2 = _read_cpu()
idle_delta = idle2 - idle1
total_delta = total2 - total1
if total_delta > 0:
cpu_pct = round(100 * (1 - idle_delta / total_delta), 1)
info['cpu_pct'] = str(cpu_pct)
# Memory
mem = {}
with open('/proc/meminfo', 'r') as f:
for line in f:
parts = line.split(':')
if len(parts) == 2:
mem[parts[0].strip()] = int(parts[1].strip().split()[0])
if 'MemTotal' in mem and 'MemAvailable' in mem:
mem_total_mb = mem['MemTotal'] // 1024
mem_used_mb = (mem['MemTotal'] - mem['MemAvailable']) // 1024
info['memory'] = f"{mem_used_mb}M/{mem_total_mb}M"
info['mem_pct'] = str(int(100 * mem_used_mb / mem_total_mb))
# Load
with open('/proc/loadavg', 'r') as f:
load = f.read().split()[:3]
info['load'] = ' '.join(load)
# Uptime
with open('/proc/uptime', 'r') as f:
secs = int(float(f.read().split()[0]))
days = secs // 86400
hours = (secs % 86400) // 3600
minutes = (secs % 3600) // 60
info['uptime'] = f"{days}d {hours}h {minutes}m" if days > 0 else f"{hours}h {minutes}m"
# Disk
try:
disk = psutil.disk_usage('/')
disk_used_gb = disk.used / (1024**3)
disk_total_gb = disk.total / (1024**3)
info['disk'] = f"{disk_used_gb:.0f}G/{disk_total_gb:.0f}G"
info['disk_pct'] = str(int(disk.percent))
except:
import shutil
disk = shutil.disk_usage('/')
info['disk'] = f"{disk.used // (1024**3)}G/{disk.total // (1024**3)}G"
info['disk_pct'] = str(int(100 * disk.used / disk.total))
# Docker version
try:
docker_v = subprocess.check_output(['docker', '--version'], stderr=subprocess.DEVNULL, text=True)
info['docker_v'] = docker_v.split()[-1].strip(',')
except:
pass
except Exception as e:
print(f"Fallback system info error: {e}")
return info