Add resume grace period functionality and update system status
This commit is contained in:
parent
d0816a0e3c
commit
5a27277e7c
99
main.go
99
main.go
@ -23,16 +23,17 @@ import (
|
||||
|
||||
const (
|
||||
checkInterval = 10 * time.Second
|
||||
monitoringPeriod = 5 * time.Minute
|
||||
cpuThreshold = 20.0 // percentage
|
||||
gpuThreshold = 20.0 // percentage
|
||||
diskThreshold = 5 * 1024 * 1024 // 5 MB/s
|
||||
networkThreshold = 1 * 1024 * 1024 // 1 MB/s
|
||||
httpPort = 8081
|
||||
monitoringPeriod = 5 * time.Minute
|
||||
resumeGracePeriod = 5 * time.Minute // Time to wait after resume before allowing sleep again
|
||||
cpuThreshold = 20.0 // percentage
|
||||
gpuThreshold = 20.0 // percentage
|
||||
diskThreshold = 5 * 1024 * 1024 // 5 MB/s
|
||||
networkThreshold = 1 * 1024 * 1024 // 1 MB/s
|
||||
httpPort = 8081
|
||||
)
|
||||
|
||||
type ResourceUsage struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CpuUsage float64 `json:"cpu_usage"`
|
||||
GpuUsage float64 `json:"gpu_usage"`
|
||||
GpuAvailable bool `json:"gpu_available"`
|
||||
@ -43,14 +44,18 @@ type ResourceUsage struct {
|
||||
}
|
||||
|
||||
type SystemStatus struct {
|
||||
CurrentUsage ResourceUsage `json:"current_usage"`
|
||||
Blockers []string `json:"sleep_blockers"`
|
||||
CurrentUsage ResourceUsage `json:"current_usage"`
|
||||
Blockers []string `json:"sleep_blockers"`
|
||||
InGracePeriod bool `json:"in_grace_period,omitempty"`
|
||||
GraceTimeLeft string `json:"grace_time_left,omitempty"`
|
||||
}
|
||||
|
||||
var (
|
||||
currentStatus SystemStatus
|
||||
statusMutex sync.RWMutex
|
||||
nvmlAvailable bool
|
||||
currentStatus SystemStatus
|
||||
statusMutex sync.RWMutex
|
||||
nvmlAvailable bool
|
||||
lastResumeTime time.Time // Track when the system last resumed from sleep
|
||||
lastTickTime time.Time // Track when we last processed a tick
|
||||
)
|
||||
|
||||
func main() {
|
||||
@ -95,7 +100,8 @@ func main() {
|
||||
log.Printf("- Network I/O < %.1f MB/s\n", float64(networkThreshold)/(1024*1024))
|
||||
log.Printf("- No active SSH connections\n")
|
||||
log.Printf("- No active user sessions\n")
|
||||
log.Printf("Over the last %v\n", monitoringPeriod)
|
||||
log.Printf("- Over the last %v\n", monitoringPeriod)
|
||||
log.Printf("- System will not suspend for %v after resuming from sleep\n", resumeGracePeriod)
|
||||
log.Printf("HTTP status endpoint available at http://localhost:%d/status\n", httpPort)
|
||||
log.Printf("Press Ctrl+C to exit\n")
|
||||
|
||||
@ -109,6 +115,19 @@ mainLoop:
|
||||
cancel()
|
||||
break mainLoop
|
||||
case <-ticker.C:
|
||||
now := time.Now()
|
||||
|
||||
// Check if we just resumed from sleep
|
||||
if !lastTickTime.IsZero() {
|
||||
gap := now.Sub(lastTickTime)
|
||||
// If there was a significant gap, probably resumed from sleep
|
||||
if gap > (checkInterval*3) && gap < time.Hour {
|
||||
log.Printf("Detected system resume after gap of %v", gap)
|
||||
lastResumeTime = now
|
||||
}
|
||||
}
|
||||
lastTickTime = now
|
||||
|
||||
usage := getCurrentUsage()
|
||||
usageHistory = append(usageHistory, usage)
|
||||
|
||||
@ -143,7 +162,7 @@ mainLoop:
|
||||
// Graceful shutdown of HTTP server
|
||||
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer shutdownCancel()
|
||||
|
||||
|
||||
if err := srv.Shutdown(shutdownCtx); err != nil {
|
||||
log.Printf("HTTP server shutdown error: %v", err)
|
||||
}
|
||||
@ -151,6 +170,19 @@ mainLoop:
|
||||
log.Println("Goodbye!")
|
||||
}
|
||||
|
||||
// Function to check if we're within the resume grace period
|
||||
func isInsideResumeGracePeriod() bool {
|
||||
return !lastResumeTime.IsZero() && time.Since(lastResumeTime) < resumeGracePeriod
|
||||
}
|
||||
|
||||
// Function to calculate time left in grace period
|
||||
func timeLeftInGracePeriod() time.Duration {
|
||||
if !isInsideResumeGracePeriod() {
|
||||
return 0
|
||||
}
|
||||
return resumeGracePeriod - time.Since(lastResumeTime)
|
||||
}
|
||||
|
||||
func startHTTPServer(ctx context.Context) *http.Server {
|
||||
srv := &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", httpPort),
|
||||
@ -182,6 +214,18 @@ func updateSystemStatus(current ResourceUsage, history []ResourceUsage) {
|
||||
currentStatus.CurrentUsage = current
|
||||
currentStatus.Blockers = []string{}
|
||||
|
||||
// Add grace period info to status
|
||||
if isInsideResumeGracePeriod() {
|
||||
timeLeft := timeLeftInGracePeriod()
|
||||
currentStatus.InGracePeriod = true
|
||||
currentStatus.GraceTimeLeft = timeLeft.Round(time.Second).String()
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Resume grace period: %v remaining", timeLeft.Round(time.Second)))
|
||||
} else {
|
||||
currentStatus.InGracePeriod = false
|
||||
currentStatus.GraceTimeLeft = ""
|
||||
}
|
||||
|
||||
if len(history) >= 2 {
|
||||
// Calculate rates using last two samples
|
||||
duration := history[len(history)-1].Timestamp.Sub(history[len(history)-2].Timestamp).Seconds()
|
||||
@ -189,31 +233,31 @@ func updateSystemStatus(current ResourceUsage, history []ResourceUsage) {
|
||||
netIORate := float64(history[len(history)-1].NetworkIO-history[len(history)-2].NetworkIO) / duration
|
||||
|
||||
if current.CpuUsage >= cpuThreshold {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("CPU usage too high: %.1f%% >= %.1f%%", current.CpuUsage, cpuThreshold))
|
||||
}
|
||||
if nvmlAvailable && current.GpuUsage >= gpuThreshold {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("GPU usage too high: %.1f%% >= %.1f%%", current.GpuUsage, gpuThreshold))
|
||||
}
|
||||
if diskIORate >= float64(diskThreshold) {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Disk I/O too high: %.1f MB/s >= %.1f MB/s",
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Disk I/O too high: %.1f MB/s >= %.1f MB/s",
|
||||
diskIORate/(1024*1024), float64(diskThreshold)/(1024*1024)))
|
||||
}
|
||||
if netIORate >= float64(networkThreshold) {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Network I/O too high: %.1f MB/s >= %.1f MB/s",
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Network I/O too high: %.1f MB/s >= %.1f MB/s",
|
||||
netIORate/(1024*1024), float64(networkThreshold)/(1024*1024)))
|
||||
}
|
||||
}
|
||||
|
||||
if current.SshConnections > 0 {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Active SSH connections: %d", current.SshConnections))
|
||||
}
|
||||
if current.ActiveUsers > 0 {
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
currentStatus.Blockers = append(currentStatus.Blockers,
|
||||
fmt.Sprintf("Active user sessions: %d", current.ActiveUsers))
|
||||
}
|
||||
|
||||
@ -224,7 +268,7 @@ func updateSystemStatus(current ResourceUsage, history []ResourceUsage) {
|
||||
|
||||
func getCurrentUsage() ResourceUsage {
|
||||
usage := ResourceUsage{
|
||||
Timestamp: time.Now(),
|
||||
Timestamp: time.Now(),
|
||||
GpuAvailable: nvmlAvailable,
|
||||
}
|
||||
|
||||
@ -319,7 +363,7 @@ func getActiveUserCount() (int, error) {
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
|
||||
// Count non-empty lines
|
||||
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||
count := 0
|
||||
@ -332,6 +376,11 @@ func getActiveUserCount() (int, error) {
|
||||
}
|
||||
|
||||
func isSystemIdle(history []ResourceUsage) bool {
|
||||
// Don't allow sleep during grace period after resume
|
||||
if isInsideResumeGracePeriod() {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(history) < 2 {
|
||||
return false
|
||||
}
|
||||
@ -383,4 +432,4 @@ func isSystemIdle(history []ResourceUsage) bool {
|
||||
func suspendSystem() error {
|
||||
cmd := exec.Command("systemctl", "suspend")
|
||||
return cmd.Run()
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user