feat(PROJ-44): ocr_chars-Spalte + SetOCRResult-Helper

DB-Schema bekommt eine idempotente ocr_chars BIGINT-Spalte (Default 0).
SetOCRResult schreibt status und chars atomar; GetOCRMeta liest beide
mit COALESCE-Defaults. Der OCR-Worker ersetzt jeden SetOCRStatus-Call
durch SetOCRResult und uebergibt die extrahierte Zeichenzahl bei 'done'.
This commit is contained in:
sysops
2026-05-10 22:20:46 +02:00
parent 7be73c1041
commit 5078830469
3 changed files with 70 additions and 9 deletions
+51
View File
@@ -40,6 +40,57 @@ func (s *Store) SetOCRStatus(ctx context.Context, id, status string) error {
return nil
}
// SetOCRResult atomically writes both ocr_status and ocr_chars in one UPDATE.
// Used by the OCR worker after a job completes (PROJ-44).
// chars must be >= 0; for status='failed'/'skipped'/'disabled' callers pass 0.
// Silently no-ops when no DB is configured.
func (s *Store) SetOCRResult(ctx context.Context, id, status string, chars int64) error {
if s.db == nil {
return nil
}
if id == "" {
return errors.New("storage: SetOCRResult: empty id")
}
switch status {
case "pending", "done", "failed", "skipped", "disabled":
default:
return fmt.Errorf("storage: SetOCRResult: invalid status %q", status)
}
if chars < 0 {
chars = 0
}
_, err := s.db.Exec(ctx,
`UPDATE emails SET ocr_status = $1, ocr_chars = $2 WHERE id = $3`,
status, chars, id,
)
if err != nil {
return fmt.Errorf("storage: set ocr result: %w", err)
}
return nil
}
// GetOCRMeta returns ocr_status (defaulting to "pending" if NULL) and
// ocr_chars (defaulting to 0) for a single mail. Returns "", 0, nil when no
// DB is configured or the mail is not found.
func (s *Store) GetOCRMeta(ctx context.Context, id string) (status string, chars int64, err error) {
if s.db == nil {
return "", 0, nil
}
if id == "" {
return "", 0, errors.New("storage: GetOCRMeta: empty id")
}
row := s.db.QueryRow(ctx,
`SELECT COALESCE(ocr_status, 'pending'), COALESCE(ocr_chars, 0)
FROM emails WHERE id = $1`, id)
if scanErr := row.Scan(&status, &chars); scanErr != nil {
if errors.Is(scanErr, pgx.ErrNoRows) {
return "", 0, nil
}
return "", 0, fmt.Errorf("storage: get ocr meta: %w", scanErr)
}
return status, chars, nil
}
// OCREnabled reports whether OCR processing should run for the given tenant.
// Defaults to true when:
// - no DB is configured (DB-less mode)
+9
View File
@@ -317,6 +317,15 @@ func (s *Store) initSchema(ctx context.Context) error {
ALTER TABLE emails ADD COLUMN IF NOT EXISTS ocr_status TEXT DEFAULT 'pending';
CREATE INDEX IF NOT EXISTS idx_emails_ocr_status ON emails (ocr_status) WHERE ocr_status = 'pending';
`)
if err != nil {
return err
}
// PROJ-44: gecachte Anzahl der extrahierten OCR-Zeichen — vermeidet einen
// Manticore-Roundtrip auf der Mail-Detail-Seite. Idempotent wie ocr_status.
_, err = s.db.Exec(ctx, `
ALTER TABLE emails ADD COLUMN IF NOT EXISTS ocr_chars BIGINT DEFAULT 0;
`)
return err
}