Fuehre Offline-Schwelle fuer Server-Connectivity ein

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-03-22 18:25:01 +01:00 · 2026-03-22 18:25:01 +01:00 · a69135c0b9
commit a69135c0b9
parent 2c780d3e60
4 changed files with 76 additions and 8 deletions
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@ -187,7 +187,7 @@ go run ./cmd/agent
 1. Backend: einheitliches Fehlerformat und Routing-Grundstruktur anlegen
 2. Backend: Konfigurations- und App-Lifecycle stabilisieren
 3. Agent und Backend: den HTTP-Statuspfad als Grundlage fuer Identitaet, Persistenz und spaetere Admin-Vorschau erweitern
-4. Agent: danach einen expliziten `offline`-Zustand und weitere Connectivity-Schwellenlogik aufsetzen
+4. Agent: danach MQTT-spezifische Reachability und feinere Connectivity-Schwellenlogik aufsetzen
 5. Danach die Netzwerk-, Sync- und Kommandopfade schrittweise produktionsnah ausbauen
 Ergaenzt seit dem ersten Geruest:
@ -199,7 +199,7 @@ Ergaenzt seit dem ersten Geruest:
 - dateibasierte Agent-Konfiguration zusaetzlich zu Env-Overrides
 - strukturierte Agent-Logs mit internem Health-Snapshot und signalgesteuertem Shutdown
 - erster periodischer HTTP-Status-Reporter im Agent
- Server-Connectivity-Zustand im Agent (`unknown`, `online`, `degraded`) auf Basis der Report-Ergebnisse
+- Server-Connectivity-Zustand im Agent (`unknown`, `online`, `degraded`, `offline`) auf Basis der Report-Ergebnisse
 - lokales Compose-Grundgeruest fuer PostgreSQL und Mosquitto
 ## Arbeitsweise
--- a/docs/PLAYER-AGENT-LIFECYCLE.md
+++ b/docs/PLAYER-AGENT-LIFECYCLE.md
@ -47,10 +47,17 @@ Getrennt vom Lifecycle fuehrt der Agent fuer die Server-Erreichbarkeit aktuell d
 - `unknown` vor dem ersten erfolgreichen oder fehlgeschlagenen Status-Report
 - `online` nach einem erfolgreich bestaetigten HTTP-Status-Report
 - `degraded` nach einem fehlgeschlagenen HTTP-Status-Report
 - `offline` nach wiederholten fehlgeschlagenen HTTP-Status-Reports
 Damit bleibt der Lifecycle sauber von Netz- und Gegenstellenproblemen getrennt.
 Ein Report-Fehler stoppt den Agenten nicht, sondern veraendert nur den Connectivity-Zustand.
 Aktuell gilt fuer diese Schwellenlogik bewusst einfach:
 - erster Fehl-Report: `degraded`
 - ab dem dritten aufeinanderfolgenden Fehl-Report: `offline`
 - naechster erfolgreicher Report: Rueckkehr nach `online`
 ## Strukturierte Log-Ereignisse
 Der Agent emittiert in v1 mindestens diese Ereignisse:
@ -88,6 +95,6 @@ Nicht Teil dieser Stufe:
 - Kommandos oder Sync-Status
 Die erste Backend-Reachability-Pruefung ist in dieser Stufe bereits ueber den HTTP-Status-Report abgebildet.
-Ein expliziter `offline`-Zustand, MQTT-Reachability und weitergehende Schwellenlogik folgen spaeter.
+MQTT-Reachability und weitergehende Schwellenlogik folgen spaeter.
 Diese Punkte folgen erst, wenn echte Netzwerk- und Sync-Funktionalitaet eingebaut wird.
--- a/player/agent/internal/app/app.go
+++ b/player/agent/internal/app/app.go
@ -24,8 +24,11 @@ const (
 	ConnectivityUnknown  Connectivity = "unknown"
 	ConnectivityOnline   Connectivity = "online"
 	ConnectivityDegraded Connectivity = "degraded"
 	ConnectivityOffline  Connectivity = "offline"
 )
 const offlineFailureThreshold = 3
 type HealthSnapshot struct {
 	Status             Status
 	ServerConnectivity Connectivity
@ -46,6 +49,7 @@ type App struct {
 	mu                        sync.RWMutex
 	status                    Status
 	serverConnectivity        Connectivity
 	consecutiveReportFailures int
 	startedAt                 time.Time
 	lastHeartbeatAt           time.Time
 }
@ -181,13 +185,18 @@ func (a *App) reportStatus(ctx context.Context) {
 	})
 	if err != nil {
 		a.mu.Lock()
 		a.consecutiveReportFailures++
 		a.serverConnectivity = ConnectivityDegraded
 		if a.consecutiveReportFailures >= offlineFailureThreshold {
 			a.serverConnectivity = ConnectivityOffline
 		}
 		a.mu.Unlock()
 		a.logger.Printf("event=status_report_failed screen_id=%s error=%v", a.Config.ScreenID, err)
 		return
 	}
 	a.mu.Lock()
 	a.consecutiveReportFailures = 0
 	a.serverConnectivity = ConnectivityOnline
 	a.mu.Unlock()
 	a.logger.Printf("event=status_report_sent screen_id=%s", a.Config.ScreenID)
--- a/player/agent/internal/app/app_test.go
+++ b/player/agent/internal/app/app_test.go
@ -15,10 +15,16 @@ import (
 type recordingReporter struct {
 	callCount int
 	err       error
 	errs      []error
 }
 func (r *recordingReporter) Send(_ context.Context, _ statusreporter.Snapshot) error {
 	r.callCount++
 	if len(r.errs) > 0 {
 		err := r.errs[0]
 		r.errs = r.errs[1:]
 		return err
 	}
 	return r.err
 }
@ -236,3 +242,49 @@ func TestAppRunMarksServerConnectivityOnlineAfterSuccessfulReport(t *testing.T)
 	cancel()
 	<-errCh
 }
 func TestReportStatusMarksServerConnectivityOfflineAfterRepeatedFailures(t *testing.T) {
 	reporter := &recordingReporter{err: context.DeadlineExceeded}
 	application := newApp(config.Config{
 		ScreenID:          "screen-offline",
 		ServerBaseURL:     "http://127.0.0.1:8080",
 		MQTTBroker:        "tcp://127.0.0.1:1883",
 		HeartbeatEvery:    30,
 		StatusReportEvery: 30,
 	}, log.New(&bytes.Buffer{}, "", 0), time.Now, reporter)
 	application.reportStatus(context.Background())
 	if got, want := application.Snapshot().ServerConnectivity, ConnectivityDegraded; got != want {
 		t.Fatalf("after first failure ServerConnectivity = %q, want %q", got, want)
 	}
 	application.reportStatus(context.Background())
 	application.reportStatus(context.Background())
 	if got, want := application.Snapshot().ServerConnectivity, ConnectivityOffline; got != want {
 		t.Fatalf("after repeated failures ServerConnectivity = %q, want %q", got, want)
 	}
 }
 func TestReportStatusRecoversFromOfflineToOnline(t *testing.T) {
 	reporter := &recordingReporter{errs: []error{context.DeadlineExceeded, context.DeadlineExceeded, context.DeadlineExceeded, nil}}
 	application := newApp(config.Config{
 		ScreenID:          "screen-recover",
 		ServerBaseURL:     "http://127.0.0.1:8080",
 		MQTTBroker:        "tcp://127.0.0.1:1883",
 		HeartbeatEvery:    30,
 		StatusReportEvery: 30,
 	}, log.New(&bytes.Buffer{}, "", 0), time.Now, reporter)
 	application.reportStatus(context.Background())
 	application.reportStatus(context.Background())
 	application.reportStatus(context.Background())
 	if got, want := application.Snapshot().ServerConnectivity, ConnectivityOffline; got != want {
 		t.Fatalf("offline state = %q, want %q", got, want)
 	}
 	application.reportStatus(context.Background())
 	if got, want := application.Snapshot().ServerConnectivity, ConnectivityOnline; got != want {
 		t.Fatalf("recovered state = %q, want %q", got, want)
 	}
 }