package storage import ( "context" "errors" "fmt" "github.com/jackc/pgx/v5" ) // PROJ-35: OCR-Status-Verwaltung pro Mail. // Status-Werte: pending | done | failed | skipped | disabled // PendingOCRMail describes one entry in the OCR backlog. TenantID is nil // when the mail has no tenant assignment (system-level / global). type PendingOCRMail struct { ID string TenantID *int64 } // SetOCRStatus updates emails.ocr_status for one mail. // Valid values: pending | done | failed | skipped | disabled. // Silently no-ops when no DB is configured. func (s *Store) SetOCRStatus(ctx context.Context, id, status string) error { if s.db == nil { return nil } if id == "" { return errors.New("storage: SetOCRStatus: empty id") } switch status { case "pending", "done", "failed", "skipped", "disabled": default: return fmt.Errorf("storage: SetOCRStatus: invalid status %q", status) } _, err := s.db.Exec(ctx, `UPDATE emails SET ocr_status = $1 WHERE id = $2`, status, id) if err != nil { return fmt.Errorf("storage: set ocr status: %w", err) } return nil } // OCREnabled reports whether OCR processing should run for the given tenant. // Defaults to true when: // - no DB is configured (DB-less mode) // - tenantID is nil (global / no tenant) // - the tenants row cannot be read (degrade gracefully) // // OCR is opt-out, not opt-in. func (s *Store) OCREnabled(ctx context.Context, tenantID *int64) bool { if s.db == nil || tenantID == nil { return true } var enabled bool err := s.db.QueryRow(ctx, `SELECT COALESCE(ocr_enabled, TRUE) FROM tenants WHERE id = $1`, *tenantID, ).Scan(&enabled) if errors.Is(err, pgx.ErrNoRows) { return true } if err != nil { // On unexpected errors (e.g. column missing on a freshly-initialised DB), // default to enabled so OCR remains opt-out. return true } return enabled } // GetPendingOCRMails returns up to limit mails with ocr_status='pending'. // If tenantID != nil, results are restricted to mails referenced by that // tenant (via email_refs) or whose emails.tenant_id matches. // limit <= 0 means no limit. func (s *Store) GetPendingOCRMails(ctx context.Context, tenantID *int64, limit int) ([]PendingOCRMail, error) { return s.GetMailsByOCRStatus(ctx, "pending", tenantID, limit) } // GetMailsByOCRStatus returns mails matching a specific ocr_status filter. // status="all" returns every mail regardless of status. // If tenantID != nil, results are restricted to that tenant. // limit <= 0 means no limit. func (s *Store) GetMailsByOCRStatus(ctx context.Context, status string, tenantID *int64, limit int) ([]PendingOCRMail, error) { if s.db == nil { return nil, nil } args := []interface{}{} whereParts := []string{} if status != "" && status != "all" { args = append(args, status) whereParts = append(whereParts, fmt.Sprintf("COALESCE(e.ocr_status, 'pending') = $%d", len(args))) } joinClause := "" if tenantID != nil { args = append(args, *tenantID) joinClause = fmt.Sprintf( "LEFT JOIN email_refs r ON r.email_id = e.id AND r.tenant_id = $%d", len(args), ) whereParts = append(whereParts, fmt.Sprintf("(r.tenant_id = $%d OR e.tenant_id = $%d)", len(args), len(args)), ) } whereClause := "" if len(whereParts) > 0 { whereClause = "WHERE " + joinAnd(whereParts) } limitClause := "" if limit > 0 { args = append(args, limit) limitClause = fmt.Sprintf("LIMIT $%d", len(args)) } q := fmt.Sprintf(` SELECT e.id, e.tenant_id FROM emails e %s %s ORDER BY e.received_at DESC %s `, joinClause, whereClause, limitClause) rows, err := s.db.Query(ctx, q, args...) if err != nil { return nil, fmt.Errorf("storage: get mails by ocr status: %w", err) } defer rows.Close() var out []PendingOCRMail for rows.Next() { var m PendingOCRMail if err := rows.Scan(&m.ID, &m.TenantID); err != nil { continue } out = append(out, m) } return out, rows.Err() } // joinAnd joins parts with " AND " — small helper to avoid pulling in strings // for a single use. func joinAnd(parts []string) string { out := "" for i, p := range parts { if i > 0 { out += " AND " } out += p } return out }