feat(PROJ-40,PROJ-41): Prometheus Metriken + Dashboard Zeitreihe

- PROJ-40: /api/health mit Version+Uptime, /metrics Prometheus-Format
  (mails_last_60min/24h/7d/30d, mails_total, storage_bytes, tenants_total,
   users_total, uptime_seconds) — Token-Schutz optional konfigurierbar
- PROJ-41: GET /api/admin/stats/timeseries (30-Tage tagesgenau, Tenant-scoped)
  + SVG-Balkendiagramm im Dashboard (Mail-Eingang letzte 30 Tage)
- storage.DBQueryRow() Helper für Metrics-Queries ohne Pool-Exposition
- config.MetricsConfig (enabled, token) in config.go

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sysops
2026-04-05 21:10:42 +02:00
parent 4f366a3634
commit 9298216ce0
11 changed files with 302 additions and 14 deletions
+26
View File
@@ -16,6 +16,32 @@ import (
"archivmail/pkg/mailparser"
)
// ── Mail Timeseries handler ───────────────────────────────────────────────
// handleMailTimeseries returns daily mail counts for the last 30 days.
// GET /api/admin/stats/timeseries?days=30
func (s *Server) handleMailTimeseries(w http.ResponseWriter, r *http.Request) {
days := 30
sess := sessionFromCtx(r.Context())
tenantID := tenantFromCtx(r.Context())
// domain_admin sees only own tenant; superadmin sees all
var tid *int64
if sess.TenantID != nil {
tid = tenantID
}
points, err := s.store.MailTimeseries(r.Context(), days, tid)
if err != nil {
writeError(w, http.StatusInternalServerError, "timeseries query failed")
return
}
writeJSON(w, http.StatusOK, map[string]interface{}{
"days": days,
"points": points,
})
}
// ── Audit Log handler ─────────────────────────────────────────────────────
func (s *Server) handleAuditLog(w http.ResponseWriter, r *http.Request) {
+92
View File
@@ -0,0 +1,92 @@
package api
import (
"fmt"
"net/http"
"strings"
"time"
)
// handleHealth returns a simple health check response.
// GET /api/health (public, no auth)
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, map[string]interface{}{
"status": "ok",
"version": s.appVersion,
"uptime_sec": int64(time.Since(s.startTime).Seconds()),
})
}
// handleMetrics serves Prometheus-compatible metrics.
// GET /metrics (public or token-protected)
//
// Metrics are DB-backed gauges; no in-process counters needed.
// Format: https://prometheus.io/docs/instrumenting/exposition_formats/
func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) {
if !s.metricsCfg.Enabled {
writeError(w, http.StatusNotFound, "metrics disabled")
return
}
// Optional token protection
if s.metricsCfg.Token != "" {
tok := extractBearerToken(r)
if tok == "" {
// Also accept ?token= query param for scraper convenience
tok = r.URL.Query().Get("token")
}
if tok != s.metricsCfg.Token {
w.Header().Set("WWW-Authenticate", `Bearer realm="archivmail metrics"`)
writeError(w, http.StatusUnauthorized, "invalid metrics token")
return
}
}
ctx := r.Context()
var sb strings.Builder
metric := func(name, help, typ string, val interface{}) {
fmt.Fprintf(&sb, "# HELP %s %s\n", name, help)
fmt.Fprintf(&sb, "# TYPE %s %s\n", name, typ)
fmt.Fprintf(&sb, "%s %v\n", name, val)
}
// ── Mail counts ───────────────────────────────────────────────────────
activity, _ := s.store.MailActivityStats(ctx)
if activity != nil {
metric("archivmail_mails_last_60min", "Mails received in the last 60 minutes", "gauge", activity.Last60Min)
metric("archivmail_mails_last_24h", "Mails received in the last 24 hours", "gauge", activity.Last24h)
metric("archivmail_mails_last_7d", "Mails received in the last 7 days", "gauge", activity.Last7d)
metric("archivmail_mails_last_30d", "Mails received in the last 30 days", "gauge", activity.Last30d)
}
// Total mails + storage bytes from DB
var totalMails int64
var totalBytes int64
if s.store != nil {
_ = s.store.DBQueryRow(ctx,
`SELECT COUNT(*), COALESCE(SUM(size_bytes),0) FROM emails`,
).Scan(&totalMails, &totalBytes)
}
metric("archivmail_mails_total", "Total number of archived mails", "gauge", totalMails)
metric("archivmail_storage_bytes", "Total size of archived mails in bytes", "gauge", totalBytes)
// Tenant count
var tenantCount int64
if s.tenantStore != nil {
_ = s.store.DBQueryRow(ctx, `SELECT COUNT(*) FROM tenants`).Scan(&tenantCount)
}
metric("archivmail_tenants_total", "Total number of tenants", "gauge", tenantCount)
// User count
var userCount int64
_ = s.store.DBQueryRow(ctx, `SELECT COUNT(*) FROM users WHERE active = true`).Scan(&userCount)
metric("archivmail_users_total", "Total number of active users", "gauge", userCount)
// Process uptime
metric("archivmail_uptime_seconds", "Process uptime in seconds", "gauge", int64(time.Since(s.startTime).Seconds()))
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
w.WriteHeader(http.StatusOK)
w.Write([]byte(sb.String())) //nolint:errcheck
}
+19 -11
View File
@@ -8,6 +8,7 @@ import (
"net/http"
"strings"
"sync"
"time"
"regexp"
@@ -61,6 +62,8 @@ const (
// Server is the archivmail HTTP API server.
type Server struct {
cfg config.APIConfig
metricsCfg config.MetricsConfig
startTime time.Time
store *storage.Store
idx index.Indexer
authMgr *auth.Manager
@@ -122,6 +125,11 @@ func (s *Server) SetGlobalRetentionDays(days int) {
s.globalRetentionDays = days
}
// SetMetrics wires the metrics config into the API server.
func (s *Server) SetMetrics(cfg config.MetricsConfig) {
s.metricsCfg = cfg
}
// SetMailer wires the SMTP-Out mailer into the API server (PROJ-28).
func (s *Server) SetMailer(m *mailer.Mailer) {
s.mailer = m
@@ -153,14 +161,15 @@ func New(
logger *slog.Logger,
) *Server {
s := &Server{
cfg: cfg,
store: store,
idx: idx,
authMgr: authMgr,
users: users,
audlog: audlog,
logger: logger,
mux: http.NewServeMux(),
cfg: cfg,
store: store,
idx: idx,
authMgr: authMgr,
users: users,
audlog: audlog,
logger: logger,
mux: http.NewServeMux(),
startTime: time.Now(),
}
s.routes()
return s
@@ -178,6 +187,7 @@ func (s *Server) authAdmin(h http.HandlerFunc) http.HandlerFunc {
func (s *Server) routes() {
s.mux.HandleFunc("GET /api/health", s.handleHealth)
s.mux.HandleFunc("GET /metrics", s.handleMetrics)
s.mux.HandleFunc("GET /api/version", s.handleVersion)
s.mux.HandleFunc("POST /api/auth/login", s.handleLogin)
s.mux.HandleFunc("GET /api/auth/me", s.auth(s.handleMe))
@@ -206,6 +216,7 @@ func (s *Server) routes() {
s.mux.HandleFunc("POST /api/admin/services/{name}/action", s.authAdmin(s.handleServiceAction))
s.mux.HandleFunc("GET /api/admin/system/stats", s.authAdmin(s.handleSystemStats))
s.mux.HandleFunc("GET /api/admin/stats/timeseries", s.authAdmin(s.handleMailTimeseries))
s.mux.HandleFunc("GET /api/admin/security/audit", s.authAdmin(s.handleSecurityAudit))
// SEC-17: Security fix actions require superadmin, not just domain_admin.
s.mux.HandleFunc("POST /api/admin/security/fix", s.auth(s.requireRole(userstore.RoleSuperAdmin, s.handleSecurityFix)))
@@ -288,9 +299,6 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// --- handlers ---
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
}
// --- middleware ---
+15
View File
@@ -1280,3 +1280,18 @@ func (s *Store) GetVerifyStatus(ctx context.Context, id string) (VerifyStatus, e
vs.VerifiedAt = at
return vs, nil
}
// DBQueryRow exposes a single DB query row for use by the API metrics handler.
// Returns a no-op row if no DB is configured.
func (s *Store) DBQueryRow(ctx context.Context, sql string, args ...interface{}) interface {
Scan(dest ...interface{}) error
} {
if s.db == nil {
return &noopRow{}
}
return s.db.QueryRow(ctx, sql, args...)
}
type noopRow struct{}
func (n *noopRow) Scan(dest ...interface{}) error { return nil }
+67
View File
@@ -5,6 +5,73 @@ import (
"time"
)
// TimeseriesPoint holds mail count for a single day.
type TimeseriesPoint struct {
Day string `json:"day"` // "2026-04-05"
Count int64 `json:"count"`
}
// MailTimeseries returns daily mail counts for the last `days` days.
// Tenant-scoped when tenantID is non-nil.
func (s *Store) MailTimeseries(ctx context.Context, days int, tenantID *int64) ([]TimeseriesPoint, error) {
if s.db == nil {
return nil, nil
}
if days <= 0 {
days = 30
}
var rows interface {
Next() bool
Scan(dest ...interface{}) error
Close()
}
var err error
if tenantID != nil {
rows, err = s.db.Query(ctx, `
SELECT date_trunc('day', e.received_at AT TIME ZONE 'UTC')::date AS day,
COUNT(*) AS cnt
FROM emails e
JOIN email_refs r ON r.email_id = e.id AND r.tenant_id = $2
WHERE e.received_at >= NOW() - ($1 || ' days')::interval
GROUP BY day
ORDER BY day ASC
`, days, *tenantID)
} else {
rows, err = s.db.Query(ctx, `
SELECT date_trunc('day', received_at AT TIME ZONE 'UTC')::date AS day,
COUNT(*) AS cnt
FROM emails
WHERE received_at >= NOW() - ($1 || ' days')::interval
GROUP BY day
ORDER BY day ASC
`, days)
}
if err != nil {
return nil, err
}
defer rows.Close()
// Build a map day→count then fill in zeros for missing days
counts := map[string]int64{}
for rows.Next() {
var day time.Time
var cnt int64
if err := rows.Scan(&day, &cnt); err == nil {
counts[day.Format("2006-01-02")] = cnt
}
}
result := make([]TimeseriesPoint, days)
for i := range result {
d := time.Now().UTC().AddDate(0, 0, -(days-1-i))
key := d.Format("2006-01-02")
result[i] = TimeseriesPoint{Day: key, Count: counts[key]}
}
return result, nil
}
// MailActivity holds mail counts over different time windows.
type MailActivity struct {
Last60Min int64 `json:"last_60_min"`