From 032892bc2be73386cc2acc87dc4cf2d539fa335f Mon Sep 17 00:00:00 2001 From: sysops Date: Sun, 10 May 2026 22:23:24 +0200 Subject: [PATCH] fix(PROJ-44): IMAP+POP3 Live-Import triggert OCR-Worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bisher haben nur der SMTP-Pfad und der Boot-Backfill ocrWorker.Submit gerufen. IMAP- und POP3-Importer riefen nur idx.IndexSync auf — neue Mails blieben dadurch dauerhaft in ocr_status='pending' (auf 132 44 Tage 54 Mails so haengen geblieben). Fix: Importer-Strukturen bekommen einen optionalen ocrSubmit-Callback, in main.go via SetOCRSubmit gehookt. Kein Import von internal/ocr in die Importer-Packages -> kein Risiko von Cycles. Submit ist non-blocking; bei Mails ohne Attachments markiert der Worker selbst 'skipped'. --- cmd/archivmail/main.go | 13 +++++++++++++ internal/imap/importer.go | 17 +++++++++++++++++ internal/pop3/importer.go | 13 +++++++++++++ 3 files changed, 43 insertions(+) diff --git a/cmd/archivmail/main.go b/cmd/archivmail/main.go index 203e260..d809d0b 100644 --- a/cmd/archivmail/main.go +++ b/cmd/archivmail/main.go @@ -413,6 +413,13 @@ func main() { } defer imapSt.Close() imapImp := imapstore.NewImporter(imapSt, mailStore, idx, logger) + // PROJ-44: trigger OCR for IMAP-imported mails — without this every + // IMAP delivery would remain in ocr_status='pending' forever. + if ocrWorker != nil { + imapImp.SetOCRSubmit(func(mailID string, tenantID *int64) { + ocrWorker.Submit(mailID, tenantID) + }) + } imapSched := imapstore.NewScheduler(imapSt, imapImp, logger) imapSched.Start() defer imapSched.Stop() @@ -426,6 +433,12 @@ func main() { } defer pop3St.Close() pop3Imp := pop3store.NewImporter(pop3St, mailStore, idx, logger) + // PROJ-44: same OCR hook as the IMAP importer above. + if ocrWorker != nil { + pop3Imp.SetOCRSubmit(func(mailID string, tenantID *int64) { + ocrWorker.Submit(mailID, tenantID) + }) + } srv.SetPop3(pop3St, pop3Imp) // Backfill in background: migrate existing files into DB metadata + re-index diff --git a/internal/imap/importer.go b/internal/imap/importer.go index 7327eb8..3b6e17e 100644 --- a/internal/imap/importer.go +++ b/internal/imap/importer.go @@ -23,6 +23,9 @@ type Importer struct { mailStore *storage.Store idx index.Indexer logger *slog.Logger + // PROJ-44: optional hook into the async OCR worker. Wired in main.go + // via SetOCRSubmit so the imap package does not import internal/ocr. + ocrSubmit func(mailID string, tenantID *int64) } // NewImporter creates a new Importer wired to the storage and index backends. @@ -35,6 +38,13 @@ func NewImporter(store *Store, mailStore *storage.Store, idx index.Indexer, logg } } +// SetOCRSubmit installs a non-blocking callback that enqueues a mail for +// OCR processing. If never called, IMAP-imported mails are not OCR'd — +// they remain in ocr_status='pending' forever (PROJ-44 fix). +func (imp *Importer) SetOCRSubmit(fn func(mailID string, tenantID *int64)) { + imp.ocrSubmit = fn +} + // Run performs a full IMAP import for the given account. It is designed to be // called as a goroutine: go imp.Run(context.Background(), accountID) func (imp *Importer) Run(ctx context.Context, accountID int64) { @@ -271,5 +281,12 @@ func (imp *Importer) storeAndIndex(raw []byte, tenantID *int64, log *slog.Logger // Non-fatal: mail is stored, just not searchable yet } + // PROJ-44: enqueue OCR job for any mail with attachments. Submit is + // non-blocking; mails with no OCR-eligible parts get marked 'skipped' + // by the worker, so the queue stays in sync regardless. + if imp.ocrSubmit != nil && len(pm.Attachments) > 0 { + imp.ocrSubmit(id, tenantID) + } + return nil } diff --git a/internal/pop3/importer.go b/internal/pop3/importer.go index 01e6b58..409c2d9 100644 --- a/internal/pop3/importer.go +++ b/internal/pop3/importer.go @@ -19,6 +19,8 @@ type Importer struct { idx index.Indexer logger *slog.Logger TenantID *int64 // optional tenant assignment for stored mails + // PROJ-44: optional OCR enqueue hook, wired from main.go. + ocrSubmit func(mailID string, tenantID *int64) } // NewImporter creates a new Importer wired to the storage and index backends. @@ -31,6 +33,12 @@ func NewImporter(store *Store, mailStore *storage.Store, idx index.Indexer, logg } } +// SetOCRSubmit installs a non-blocking callback that enqueues a mail for +// OCR processing. See imap.Importer.SetOCRSubmit for rationale (PROJ-44). +func (imp *Importer) SetOCRSubmit(fn func(mailID string, tenantID *int64)) { + imp.ocrSubmit = fn +} + // Run performs a full POP3 import for the given account. It is designed to be // called as a goroutine: go imp.Run(context.Background(), accountID) func (imp *Importer) Run(ctx context.Context, accountID int64) { @@ -166,5 +174,10 @@ func (imp *Importer) storeAndIndex(raw []byte, log *slog.Logger) error { // Non-fatal: mail is stored, just not searchable yet } + // PROJ-44: enqueue OCR job for any mail with attachments. + if imp.ocrSubmit != nil && len(pm.Attachments) > 0 { + imp.ocrSubmit(id, imp.TenantID) + } + return nil }