groups:
  - name: cocos-manager-alerts
    rules:
      # Service Down Alert - Primary method using up metric
      - alert: CocosManagerDown
        expr: up{job="cocos-manager"} == 0
        for: 1m
        labels:
          severity: critical
          service: cocos-manager
        annotations:
          summary: "Cocos Manager service is down"
          description: "Cocos Manager service has been down for more than 1 minute. Instance: {{ $labels.instance }}"

      # Alternative: No metrics received (for cases where up metric might not be reliable)
      - alert: CocosManagerNoMetrics
        expr: absent(up{job="cocos-manager"})
        for: 2m
        labels:
          severity: critical
          service: cocos-manager
        annotations:
          summary: "No metrics received from Cocos Manager"
          description: "No metrics have been received from Cocos Manager for more than 2 minutes"

      # Health endpoint specific alert (if you expose health as a metric)
      - alert: CocosManagerUnhealthy
        expr: health_check{job="cocos-manager"} == 0
        for: 30s
        labels:
          severity: warning
          service: cocos-manager
        annotations:
          summary: "Cocos Manager health check failing"
          description: "Cocos Manager health check has been failing for more than 30 seconds. Instance: {{ $labels.instance }}"

      # High latency alert
      - alert: CocosManagerHighLatency
        expr: histogram_quantile(0.95, rate(cocos_manager_latency_bucket[5m])) > 2
        for: 5m
        labels:
          severity: warning
          service: cocos-manager
        annotations:
          summary: "Cocos Manager high latency"
          description: "Cocos Manager 95th percentile latency is above 2 seconds for more than 5 minutes. Current value: {{ $value }}s"

      # High error rate alert
      - alert: CocosManagerHighErrorRate
        expr: rate(cocos_manager_errors_total[5m]) > 0.1
        for: 2m
        labels:
          severity: warning
          service: cocos-manager
        annotations:
          summary: "Cocos Manager high error rate"
          description: "Cocos Manager error rate is above 10% for more than 2 minutes. Current rate: {{ $value }}"