package main import ( "encoding/json" "fmt" "log" "net/http" "os" "os/exec" "strings" "sync" "time" "github.com/NVIDIA/go-nvml/pkg/nvml" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/disk" "github.com/shirou/gopsutil/v3/net" "github.com/shirou/gopsutil/v3/process" ) const ( checkInterval = 10 * time.Second monitoringPeriod = 5 * time.Minute cpuThreshold = 20.0 // percentage gpuThreshold = 20.0 // percentage diskThreshold = 5 * 1024 * 1024 // 5 MB/s networkThreshold = 1 * 1024 * 1024 // 1 MB/s httpPort = 8080 ) type ResourceUsage struct { timestamp time.Time cpuUsage float64 gpuUsage float64 diskIO uint64 networkIO uint64 sshConnections int activeUsers int } type SystemStatus struct { CurrentUsage ResourceUsage `json:"current_usage"` Blockers []string `json:"sleep_blockers"` } var ( currentStatus SystemStatus statusMutex sync.RWMutex ) func main() { // Check if running as root if os.Geteuid() != 0 { log.Fatal("This program must be run as root") } // Initialize NVML for GPU monitoring ret := nvml.Init() if ret != nvml.SUCCESS { log.Printf("Warning: Could not initialize NVML: %v", ret) } defer nvml.Shutdown() // Start HTTP server go startHTTPServer() usageHistory := make([]ResourceUsage, 0) ticker := time.NewTicker(checkInterval) defer ticker.Stop() log.Printf("Starting idle monitoring. System will suspend when:\n") log.Printf("- Average CPU usage across all cores < %.1f%%\n", cpuThreshold) log.Printf("- Average GPU usage across all GPUs < %.1f%%\n", gpuThreshold) log.Printf("- Disk I/O < %.1f MB/s\n", float64(diskThreshold)/(1024*1024)) log.Printf("- Network I/O < %.1f MB/s\n", float64(networkThreshold)/(1024*1024)) log.Printf("- No active SSH connections\n") log.Printf("- No active user sessions\n") log.Printf("Over the last %v\n", monitoringPeriod) log.Printf("HTTP status endpoint available at http://localhost:%d/status\n", httpPort) for range ticker.C { usage := getCurrentUsage() usageHistory = append(usageHistory, usage) // Update current status updateSystemStatus(usage, usageHistory) // Remove entries older than monitoring period cutoff := time.Now().Add(-monitoringPeriod) for i, u := range usageHistory { if u.timestamp.After(cutoff) { usageHistory = usageHistory[i:] break } } if len(usageHistory) > 0 && isSystemIdle(usageHistory) { log.Printf("System status before suspend:\n") log.Printf("- CPU: %.1f%%\n", usageHistory[len(usageHistory)-1].cpuUsage) log.Printf("- GPU: %.1f%%\n", usageHistory[len(usageHistory)-1].gpuUsage) log.Printf("- SSH connections: %d\n", usageHistory[len(usageHistory)-1].sshConnections) log.Printf("- Active users: %d\n", usageHistory[len(usageHistory)-1].activeUsers) log.Println("System has been idle for the monitoring period. Suspending...") if err := suspendSystem(); err != nil { log.Printf("Failed to suspend system: %v", err) } } } } func startHTTPServer() { http.HandleFunc("/status", handleStatus) if err := http.ListenAndServe(fmt.Sprintf(":%d", httpPort), nil); err != nil { log.Printf("Failed to start HTTP server: %v", err) } } func handleStatus(w http.ResponseWriter, r *http.Request) { statusMutex.RLock() defer statusMutex.RUnlock() w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(currentStatus) } func updateSystemStatus(current ResourceUsage, history []ResourceUsage) { statusMutex.Lock() defer statusMutex.Unlock() currentStatus.CurrentUsage = current currentStatus.Blockers = []string{} if len(history) >= 2 { // Calculate rates using last two samples duration := history[len(history)-1].timestamp.Sub(history[len(history)-2].timestamp).Seconds() diskIORate := float64(history[len(history)-1].diskIO-history[len(history)-2].diskIO) / duration netIORate := float64(history[len(history)-1].networkIO-history[len(history)-2].networkIO) / duration if current.cpuUsage >= cpuThreshold { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("CPU usage too high: %.1f%% >= %.1f%%", current.cpuUsage, cpuThreshold)) } if current.gpuUsage >= gpuThreshold { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("GPU usage too high: %.1f%% >= %.1f%%", current.gpuUsage, gpuThreshold)) } if diskIORate >= float64(diskThreshold) { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("Disk I/O too high: %.1f MB/s >= %.1f MB/s", diskIORate/(1024*1024), float64(diskThreshold)/(1024*1024))) } if netIORate >= float64(networkThreshold) { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("Network I/O too high: %.1f MB/s >= %.1f MB/s", netIORate/(1024*1024), float64(networkThreshold)/(1024*1024))) } } if current.sshConnections > 0 { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("Active SSH connections: %d", current.sshConnections)) } if current.activeUsers > 0 { currentStatus.Blockers = append(currentStatus.Blockers, fmt.Sprintf("Active user sessions: %d", current.activeUsers)) } if len(currentStatus.Blockers) == 0 { currentStatus.Blockers = append(currentStatus.Blockers, "No blockers - system can sleep") } } func getCurrentUsage() ResourceUsage { usage := ResourceUsage{ timestamp: time.Now(), } // Get CPU usage across all cores if cpuPercent, err := cpu.Percent(0, true); err == nil && len(cpuPercent) > 0 { // Calculate average CPU usage across all cores var totalCPU float64 for _, percent := range cpuPercent { totalCPU += percent } usage.cpuUsage = totalCPU / float64(len(cpuPercent)) } // Get GPU usage across all GPUs count, ret := nvml.DeviceGetCount() if ret == nvml.SUCCESS && count > 0 { var totalGPU float64 var activeGPUs int for i := 0; i < count; i++ { device, ret := nvml.DeviceGetHandleByIndex(i) if ret == nvml.SUCCESS { utilization, ret := device.GetUtilizationRates() if ret == nvml.SUCCESS { totalGPU += float64(utilization.Gpu) activeGPUs++ } } } if activeGPUs > 0 { usage.gpuUsage = totalGPU / float64(activeGPUs) } } // Get disk I/O if diskStats, err := disk.IOCounters(); err == nil { var totalIO uint64 for _, stat := range diskStats { totalIO += stat.ReadBytes + stat.WriteBytes } usage.diskIO = totalIO } // Get network I/O if netStats, err := net.IOCounters(false); err == nil && len(netStats) > 0 { usage.networkIO = netStats[0].BytesSent + netStats[0].BytesRecv } // Count SSH connections if sshCount, err := getSSHConnectionCount(); err == nil { usage.sshConnections = sshCount } // Count active user sessions if userCount, err := getActiveUserCount(); err == nil { usage.activeUsers = userCount } return usage } func getSSHConnectionCount() (int, error) { processes, err := process.Processes() if err != nil { return 0, err } count := 0 for _, p := range processes { name, err := p.Name() if err != nil { continue } if name == "sshd" { cmdline, err := p.Cmdline() if err != nil { continue } // Only count sshd processes that are handling connections // The main sshd process doesn't have "@" in its cmdline if strings.Contains(cmdline, "@") { count++ } } } return count, nil } func getActiveUserCount() (int, error) { out, err := exec.Command("who", "-s").Output() if err != nil { return 0, err } // Count non-empty lines lines := strings.Split(strings.TrimSpace(string(out)), "\n") count := 0 for _, line := range lines { if line != "" { count++ } } return count, nil } func isSystemIdle(history []ResourceUsage) bool { if len(history) < 2 { return false } var avgCPU, avgGPU float64 var maxSSHConnections, maxActiveUsers int samples := len(history) for _, usage := range history { avgCPU += usage.cpuUsage avgGPU += usage.gpuUsage if usage.sshConnections > maxSSHConnections { maxSSHConnections = usage.sshConnections } if usage.activeUsers > maxActiveUsers { maxActiveUsers = usage.activeUsers } } // Calculate I/O rates using first and last samples duration := history[samples-1].timestamp.Sub(history[0].timestamp).Seconds() diskIORate := float64(history[samples-1].diskIO-history[0].diskIO) / duration netIORate := float64(history[samples-1].networkIO-history[0].networkIO) / duration avgCPU /= float64(samples) avgGPU /= float64(samples) return avgCPU < cpuThreshold && avgGPU < gpuThreshold && diskIORate < float64(diskThreshold) && netIORate < float64(networkThreshold) && maxSSHConnections == 0 && maxActiveUsers == 0 } func suspendSystem() error { cmd := exec.Command("systemctl", "suspend") return cmd.Run() }