fix: use psutil for reliable CPU and memory metrics
This commit is contained in:
@@ -6,6 +6,9 @@ import json
|
||||
import hashlib
|
||||
import tarfile
|
||||
import re
|
||||
import time
|
||||
import socket
|
||||
import psutil
|
||||
from config import (
|
||||
RUNNING_ON_MAIN_SERVER,
|
||||
MAIN_SERVER_IP, MAIN_SERVER_USER, MAIN_SERVER_KEY, MAIN_SERVER_PORT,
|
||||
@@ -588,109 +591,162 @@ def get_all_stats():
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
# SYSTEM INFO
|
||||
# SYSTEM INFO (FIXED WITH PSUTIL)
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
|
||||
def get_system_info():
|
||||
if RUNNING_ON_MAIN_SERVER:
|
||||
try:
|
||||
import time
|
||||
"""Get system information using psutil for reliable metrics inside container"""
|
||||
|
||||
# ── CPU — two reads with delta (single snapshot always gives ~0%) ──
|
||||
def _read_cpu():
|
||||
with open('/proc/stat') as f:
|
||||
fields = f.readline().split()
|
||||
idle = int(fields[4])
|
||||
total = sum(int(x) for x in fields[1:])
|
||||
return idle, total
|
||||
|
||||
idle1, total1 = _read_cpu()
|
||||
time.sleep(0.5)
|
||||
idle2, total2 = _read_cpu()
|
||||
|
||||
idle_delta = idle2 - idle1
|
||||
total_delta = total2 - total1
|
||||
cpu_pct = round(100 * (1 - idle_delta / total_delta), 1) if total_delta else 0.0
|
||||
|
||||
# ── Memory ──────────────────────────────────────────────────────────
|
||||
mem = {}
|
||||
with open('/proc/meminfo') as f:
|
||||
for line in f:
|
||||
parts = line.split(':')
|
||||
if len(parts) == 2:
|
||||
mem[parts[0].strip()] = int(parts[1].strip().split()[0])
|
||||
mem_total_mb = mem['MemTotal'] // 1024
|
||||
mem_used_mb = (mem['MemTotal'] - mem['MemAvailable']) // 1024
|
||||
mem_pct = round(100 * mem_used_mb / mem_total_mb) if mem_total_mb else 0
|
||||
|
||||
# ── Disk ─────────────────────────────────────────────────────────────
|
||||
import shutil
|
||||
disk = shutil.disk_usage('/')
|
||||
disk_used_gb = round(disk.used / 1024**3, 1)
|
||||
disk_total_gb = round(disk.total / 1024**3, 1)
|
||||
disk_pct = round(100 * disk.used / disk.total)
|
||||
|
||||
# ── Load ─────────────────────────────────────────────────────────────
|
||||
with open('/proc/loadavg') as f:
|
||||
load = f.read().split()[:3]
|
||||
|
||||
# ── Uptime ───────────────────────────────────────────────────────────
|
||||
with open('/proc/uptime') as f:
|
||||
secs = int(float(f.read().split()[0]))
|
||||
days = secs // 86400
|
||||
hrs = (secs % 86400) // 3600
|
||||
mins = (secs % 3600) // 60
|
||||
uptime = f"up {days}d {hrs}h {mins}m" if days else f"up {hrs}h {mins}m"
|
||||
|
||||
# ── Docker version + hostname ─────────────────────────────────────────
|
||||
docker_v, _ = _run("docker --version 2>/dev/null | cut -d' ' -f3 | tr -d ','")
|
||||
hostname, _ = _run("cat /proc/sys/kernel/hostname 2>/dev/null || hostname")
|
||||
|
||||
return {
|
||||
'cpu_pct': str(cpu_pct),
|
||||
'memory': f"{mem_used_mb}/{mem_total_mb}MB",
|
||||
'mem_pct': str(mem_pct),
|
||||
'disk': f"{disk_used_gb}G/{disk_total_gb}G",
|
||||
'disk_pct': str(disk_pct),
|
||||
'load': ' '.join(load),
|
||||
'uptime': uptime,
|
||||
'docker_v': docker_v or 'N/A',
|
||||
'hostname': hostname.strip() or 'main server',
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[system_info] /proc read failed: {e}")
|
||||
# fall through to SSH method below
|
||||
|
||||
# ── Fallback: SSH to main server (used when RUNNING_ON_MAIN_SERVER = False) ──
|
||||
cmd = (
|
||||
"CPU=$(top -bn2 -d0.5 | grep 'Cpu(s)' | tail -1 | "
|
||||
"awk '{print 100 - $8}' 2>/dev/null || echo 'N/A'); "
|
||||
"MEM=$(free -m | awk 'NR==2{printf \"%s/%sMB\", $3, $2}'); "
|
||||
"MEM_PCT=$(free -m | awk 'NR==2{printf \"%d\", $3/$2*100}'); "
|
||||
"DISK=$(df -h / | awk 'NR==2{print $3\"/\"$2}'); "
|
||||
"DISK_PCT=$(df / | awk 'NR==2{print $5}' | tr -d '%'); "
|
||||
"LOAD=$(cat /proc/loadavg | awk '{print $1, $2, $3}'); "
|
||||
"UPTIME=$(uptime -p 2>/dev/null || uptime); "
|
||||
"DOCKER=$(docker --version 2>/dev/null | cut -d' ' -f3 | tr -d ','); "
|
||||
"HOST=$(hostname); "
|
||||
"echo \"$CPU|$MEM|$MEM_PCT|$DISK|$DISK_PCT|$LOAD|$UPTIME|$DOCKER|$HOST\""
|
||||
)
|
||||
stdout, stderr = _ssh_main(cmd, timeout=20)
|
||||
if not stdout:
|
||||
return {}
|
||||
|
||||
parts = stdout.split('|')
|
||||
if len(parts) < 9:
|
||||
return {}
|
||||
|
||||
return {
|
||||
'cpu_pct': parts[0].strip(),
|
||||
'memory': parts[1].strip(),
|
||||
'mem_pct': parts[2].strip(),
|
||||
'disk': parts[3].strip(),
|
||||
'disk_pct': parts[4].strip(),
|
||||
'load': parts[5].strip(),
|
||||
'uptime': parts[6].strip(),
|
||||
'docker_v': parts[7].strip() or 'N/A',
|
||||
'hostname': parts[8].strip() or 'main server',
|
||||
info = {
|
||||
'cpu_pct': '0',
|
||||
'memory': 'N/A',
|
||||
'mem_pct': '0',
|
||||
'disk': 'N/A',
|
||||
'disk_pct': '0',
|
||||
'load': 'N/A',
|
||||
'uptime': 'N/A',
|
||||
'docker_v': 'N/A',
|
||||
'hostname': socket.gethostname()
|
||||
}
|
||||
|
||||
try:
|
||||
# CPU usage - psutil handles /proc access properly
|
||||
info['cpu_pct'] = str(psutil.cpu_percent(interval=0.5))
|
||||
|
||||
# Memory usage
|
||||
mem = psutil.virtual_memory()
|
||||
mem_used_gb = mem.used / (1024**3)
|
||||
mem_total_gb = mem.total / (1024**3)
|
||||
info['memory'] = f"{mem_used_gb:.1f}G/{mem_total_gb:.1f}G"
|
||||
info['mem_pct'] = str(int(mem.percent))
|
||||
|
||||
# Load average
|
||||
try:
|
||||
load = psutil.getloadavg()
|
||||
info['load'] = f"{load[0]:.2f} {load[1]:.2f} {load[2]:.2f}"
|
||||
except AttributeError:
|
||||
# Fallback for systems without getloadavg
|
||||
with open('/proc/loadavg', 'r') as f:
|
||||
load = f.read().split()[:3]
|
||||
info['load'] = ' '.join(load)
|
||||
|
||||
# Disk usage
|
||||
try:
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_used_gb = disk.used / (1024**3)
|
||||
disk_total_gb = disk.total / (1024**3)
|
||||
info['disk'] = f"{disk_used_gb:.0f}G/{disk_total_gb:.0f}G"
|
||||
info['disk_pct'] = str(int(disk.percent))
|
||||
except Exception as e:
|
||||
print(f"Disk read error: {e}")
|
||||
|
||||
# Uptime
|
||||
try:
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
days = int(uptime_seconds // 86400)
|
||||
hours = int((uptime_seconds % 86400) // 3600)
|
||||
minutes = int((uptime_seconds % 3600) // 60)
|
||||
info['uptime'] = f"{days}d {hours}h {minutes}m" if days > 0 else f"{hours}h {minutes}m"
|
||||
except Exception as e:
|
||||
print(f"Uptime read error: {e}")
|
||||
|
||||
# Docker version
|
||||
try:
|
||||
docker_v = subprocess.check_output(['docker', '--version'], stderr=subprocess.DEVNULL, text=True)
|
||||
info['docker_v'] = docker_v.split()[-1].strip(',')
|
||||
except Exception as e:
|
||||
print(f"Docker version error: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"System info error: {e}")
|
||||
# Fallback to /proc method if psutil fails
|
||||
info = _get_system_info_fallback()
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def _get_system_info_fallback():
|
||||
"""Fallback method using /proc directly when psutil is not available"""
|
||||
info = {
|
||||
'cpu_pct': '0',
|
||||
'memory': 'N/A',
|
||||
'mem_pct': '0',
|
||||
'disk': 'N/A',
|
||||
'disk_pct': '0',
|
||||
'load': 'N/A',
|
||||
'uptime': 'N/A',
|
||||
'docker_v': 'N/A',
|
||||
'hostname': socket.gethostname()
|
||||
}
|
||||
|
||||
try:
|
||||
# CPU calculation with delta
|
||||
def _read_cpu():
|
||||
with open('/proc/stat', 'r') as f:
|
||||
fields = f.readline().split()
|
||||
idle = int(fields[4])
|
||||
total = sum(int(x) for x in fields[1:])
|
||||
return idle, total
|
||||
|
||||
idle1, total1 = _read_cpu()
|
||||
time.sleep(0.5)
|
||||
idle2, total2 = _read_cpu()
|
||||
|
||||
idle_delta = idle2 - idle1
|
||||
total_delta = total2 - total1
|
||||
if total_delta > 0:
|
||||
cpu_pct = round(100 * (1 - idle_delta / total_delta), 1)
|
||||
info['cpu_pct'] = str(cpu_pct)
|
||||
|
||||
# Memory
|
||||
mem = {}
|
||||
with open('/proc/meminfo', 'r') as f:
|
||||
for line in f:
|
||||
parts = line.split(':')
|
||||
if len(parts) == 2:
|
||||
mem[parts[0].strip()] = int(parts[1].strip().split()[0])
|
||||
|
||||
if 'MemTotal' in mem and 'MemAvailable' in mem:
|
||||
mem_total_mb = mem['MemTotal'] // 1024
|
||||
mem_used_mb = (mem['MemTotal'] - mem['MemAvailable']) // 1024
|
||||
info['memory'] = f"{mem_used_mb}M/{mem_total_mb}M"
|
||||
info['mem_pct'] = str(int(100 * mem_used_mb / mem_total_mb))
|
||||
|
||||
# Load
|
||||
with open('/proc/loadavg', 'r') as f:
|
||||
load = f.read().split()[:3]
|
||||
info['load'] = ' '.join(load)
|
||||
|
||||
# Uptime
|
||||
with open('/proc/uptime', 'r') as f:
|
||||
secs = int(float(f.read().split()[0]))
|
||||
days = secs // 86400
|
||||
hours = (secs % 86400) // 3600
|
||||
minutes = (secs % 3600) // 60
|
||||
info['uptime'] = f"{days}d {hours}h {minutes}m" if days > 0 else f"{hours}h {minutes}m"
|
||||
|
||||
# Disk
|
||||
try:
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_used_gb = disk.used / (1024**3)
|
||||
disk_total_gb = disk.total / (1024**3)
|
||||
info['disk'] = f"{disk_used_gb:.0f}G/{disk_total_gb:.0f}G"
|
||||
info['disk_pct'] = str(int(disk.percent))
|
||||
except:
|
||||
import shutil
|
||||
disk = shutil.disk_usage('/')
|
||||
info['disk'] = f"{disk.used // (1024**3)}G/{disk.total // (1024**3)}G"
|
||||
info['disk_pct'] = str(int(100 * disk.used / disk.total))
|
||||
|
||||
# Docker version
|
||||
try:
|
||||
docker_v = subprocess.check_output(['docker', '--version'], stderr=subprocess.DEVNULL, text=True)
|
||||
info['docker_v'] = docker_v.split()[-1].strip(',')
|
||||
except:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fallback system info error: {e}")
|
||||
|
||||
return info
|
||||
@@ -1,2 +1,3 @@
|
||||
boto3==1.43.5
|
||||
flask
|
||||
psutil
|
||||
Reference in New Issue
Block a user