fix(PROJ-44): OCR-Tenant-Routing nutzt kanonische DB-tenant_id

Strukturbug auf 132 gefunden: Tenant-User (Rolle user) sahen ihren
OCR-Text nicht, obwohl ocr_chars>0 in PostgreSQL stand. Ursache:

- OCR-Worker hat in den per Job.TenantID gewaehlten Index geschrieben.
  Beim Reprocess via CLI kam TenantID aus dem Submitter-Kontext und
  konnte vom in emails.tenant_id gespeicherten Wert abweichen.
- /ocr-text-Endpoint hat fuer die Index-Auswahl session.TenantID
  benutzt. Bei Admin/Auditor (nil Session-Tenant) wurde immer global
  gelesen, auch wenn die Mail einem Tenant gehoert.

Fix: Beide Stellen lesen jetzt die TenantID **immer** aus
storage.GetTenantForMail(emails.tenant_id) und routen den
Manticore-Index entsprechend. ACL-Check im Endpoint bleibt
unveraendert auf session.TenantID == mail.tenant_id — die
Tenant-Isolation wird nicht aufgeweicht.

Edge cases:
- Mail mit tenant_id NULL: GetTenantForMail liefert nil -> globaler
  Index (vorher und nachher gleich).
- DB-Fehler beim Lookup: faellt auf nil zurueck -> globaler Index,
  liefert leeren Text fuer Tenant-Mails -> 404. Safe (keine
  Querleckage zwischen Tenants).
This commit is contained in:
sysops
2026-05-10 23:13:57 +02:00
parent 5e1a51b028
commit 16013e8b66
2 changed files with 28 additions and 8 deletions
+12 -5
View File
@@ -109,8 +109,15 @@ func (s *Server) handleGetOCRText(w http.ResponseWriter, r *http.Request) {
return
}
// Resolve the correct per-tenant Manticore index for this mail.
reader := s.ocrTextReader(sess.TenantID)
// PROJ-44: Resolve the correct per-tenant Manticore index based on the
// MAIL's tenant assignment, not the session's. The session tenant is
// already used for ACL enforcement above; using it for index selection
// breaks admin/auditor access (who have nil session tenant) and would
// also misread when the two ever disagree. The OCR worker writes into
// the index derived from emails.tenant_id, so the reader must follow
// the same rule.
mailTenant, _ := s.store.GetTenantForMail(r.Context(), id)
reader := s.ocrTextReader(mailTenant)
if reader == nil {
s.logger.Warn("ocr-text: indexer does not support AttachmentTextReader",
"mail_id", id)
@@ -154,9 +161,9 @@ func (s *Server) handleGetOCRText(w http.ResponseWriter, r *http.Request) {
}
// ocrTextReader returns the AttachmentTextReader for the given tenant or
// nil when no reader-capable index is wired. Resolution rules match the
// search path (see handleSearch): per-tenant index when available, else the
// global indexer.
// nil when no reader-capable index is wired. Callers must pass the MAIL's
// tenant id (emails.tenant_id), not the session tenant — see PROJ-44 fix.
// Resolution rules: per-tenant index when available, else the global indexer.
func (s *Server) ocrTextReader(tenantID *int64) index.AttachmentTextReader {
var idx index.Indexer = s.idx
if s.idxMgr != nil && tenantID != nil {
+16 -3
View File
@@ -134,9 +134,22 @@ func (w *Worker) run(ctx context.Context, id int) {
}
func (w *Worker) process(ctx context.Context, job Job) {
logger := w.logger.With("mail_id", job.MailID, "tenant_id", job.TenantID)
// PROJ-44: The canonical source of truth for a mail's tenant assignment
// is emails.tenant_id in PostgreSQL — never the submitter's context.
// Re-imports via IMAP/POP3 scheduler may submit the same mail with a
// different (e.g. nil) tenant pointer than the one stored on the mail,
// which would otherwise route the OCR result into the wrong per-tenant
// Manticore index (emails_global instead of emails_tenant_N).
// We resolve the authoritative tenant once and use it everywhere below.
tenantID := job.TenantID
if w.store != nil {
if t, err := w.store.GetTenantForMail(ctx, job.MailID); err == nil && t != nil {
tenantID = t
}
}
logger := w.logger.With("mail_id", job.MailID, "tenant_id", tenantID)
if w.store.OCREnabled(ctx, job.TenantID) == false {
if w.store.OCREnabled(ctx, tenantID) == false {
_ = w.store.SetOCRResult(ctx, job.MailID, "disabled", 0)
return
}
@@ -187,7 +200,7 @@ func (w *Worker) process(ctx context.Context, job Job) {
return
}
idx := w.idxMgr.ForTenant(job.TenantID)
idx := w.idxMgr.ForTenant(tenantID)
updater, ok := idx.(index.AttachmentTextUpdater)
if !ok {
logger.Warn("ocr worker: indexer does not support AttachmentTextUpdater — text dropped")