diff --git a/internal/api/ocr_handlers.go b/internal/api/ocr_handlers.go index 43e5b43..00fe486 100644 --- a/internal/api/ocr_handlers.go +++ b/internal/api/ocr_handlers.go @@ -109,8 +109,15 @@ func (s *Server) handleGetOCRText(w http.ResponseWriter, r *http.Request) { return } - // Resolve the correct per-tenant Manticore index for this mail. - reader := s.ocrTextReader(sess.TenantID) + // PROJ-44: Resolve the correct per-tenant Manticore index based on the + // MAIL's tenant assignment, not the session's. The session tenant is + // already used for ACL enforcement above; using it for index selection + // breaks admin/auditor access (who have nil session tenant) and would + // also misread when the two ever disagree. The OCR worker writes into + // the index derived from emails.tenant_id, so the reader must follow + // the same rule. + mailTenant, _ := s.store.GetTenantForMail(r.Context(), id) + reader := s.ocrTextReader(mailTenant) if reader == nil { s.logger.Warn("ocr-text: indexer does not support AttachmentTextReader", "mail_id", id) @@ -154,9 +161,9 @@ func (s *Server) handleGetOCRText(w http.ResponseWriter, r *http.Request) { } // ocrTextReader returns the AttachmentTextReader for the given tenant or -// nil when no reader-capable index is wired. Resolution rules match the -// search path (see handleSearch): per-tenant index when available, else the -// global indexer. +// nil when no reader-capable index is wired. Callers must pass the MAIL's +// tenant id (emails.tenant_id), not the session tenant — see PROJ-44 fix. +// Resolution rules: per-tenant index when available, else the global indexer. func (s *Server) ocrTextReader(tenantID *int64) index.AttachmentTextReader { var idx index.Indexer = s.idx if s.idxMgr != nil && tenantID != nil { diff --git a/internal/ocr/worker.go b/internal/ocr/worker.go index c0e3d99..921e1f2 100644 --- a/internal/ocr/worker.go +++ b/internal/ocr/worker.go @@ -134,9 +134,22 @@ func (w *Worker) run(ctx context.Context, id int) { } func (w *Worker) process(ctx context.Context, job Job) { - logger := w.logger.With("mail_id", job.MailID, "tenant_id", job.TenantID) + // PROJ-44: The canonical source of truth for a mail's tenant assignment + // is emails.tenant_id in PostgreSQL — never the submitter's context. + // Re-imports via IMAP/POP3 scheduler may submit the same mail with a + // different (e.g. nil) tenant pointer than the one stored on the mail, + // which would otherwise route the OCR result into the wrong per-tenant + // Manticore index (emails_global instead of emails_tenant_N). + // We resolve the authoritative tenant once and use it everywhere below. + tenantID := job.TenantID + if w.store != nil { + if t, err := w.store.GetTenantForMail(ctx, job.MailID); err == nil && t != nil { + tenantID = t + } + } + logger := w.logger.With("mail_id", job.MailID, "tenant_id", tenantID) - if w.store.OCREnabled(ctx, job.TenantID) == false { + if w.store.OCREnabled(ctx, tenantID) == false { _ = w.store.SetOCRResult(ctx, job.MailID, "disabled", 0) return } @@ -187,7 +200,7 @@ func (w *Worker) process(ctx context.Context, job Job) { return } - idx := w.idxMgr.ForTenant(job.TenantID) + idx := w.idxMgr.ForTenant(tenantID) updater, ok := idx.(index.AttachmentTextUpdater) if !ok { logger.Warn("ocr worker: indexer does not support AttachmentTextUpdater — text dropped")