From a252ad6f0eb7934748283d04f08be89bbcdba28f Mon Sep 17 00:00:00 2001 From: sysops Date: Fri, 8 May 2026 22:15:20 +0200 Subject: [PATCH] fix(PROJ-35): OCR Boot-Resume drosselt nach Queue-Kapazitaet --- cmd/archivmail/main.go | 50 ++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/cmd/archivmail/main.go b/cmd/archivmail/main.go index b70c990..fd7f9d9 100644 --- a/cmd/archivmail/main.go +++ b/cmd/archivmail/main.go @@ -187,20 +187,46 @@ func main() { "pdftotext", ts.HasPdftotext, "tesseract", ts.HasTesseract, "pdftoppm", ts.HasPdftoppm) } - // Boot-resume: re-enqueue all mails still marked ocr_status='pending'. + // Boot-resume: re-enqueue mails still marked ocr_status='pending'. + // The worker queue holds 1000 slots; we refill it from the DB whenever + // it falls below half capacity. Each iteration only fetches as many + // pending mails as currently fit, so nothing is dropped. + // The worker updates ocr_status to done/failed/skipped, so subsequent + // queries only return genuinely outstanding jobs. go func() { ctx := context.Background() - pending, err := mailStore.GetPendingOCRMails(ctx, nil, 5000) - if err != nil { - logger.Warn("ocr boot-resume: query failed", "err", err) - return - } - if len(pending) == 0 { - return - } - logger.Info("ocr boot-resume: re-enqueueing pending jobs", "count", len(pending)) - for _, m := range pending { - ocrWorker.Submit(m.ID, m.TenantID) + queueCap := 1000 // matches ocr.Options.QueueSize above + processed := 0 + idleRounds := 0 + for { + free := queueCap - ocrWorker.QueueLen() + if free < queueCap/2 { + time.Sleep(2 * time.Second) + continue + } + pending, err := mailStore.GetPendingOCRMails(ctx, nil, free) + if err != nil { + logger.Warn("ocr boot-resume: query failed", "err", err) + return + } + if len(pending) == 0 { + idleRounds++ + if idleRounds >= 3 { + if processed > 0 { + logger.Info("ocr boot-resume: backlog drained", "total", processed) + } + return + } + time.Sleep(10 * time.Second) + continue + } + idleRounds = 0 + for _, m := range pending { + ocrWorker.Submit(m.ID, m.TenantID) + } + processed += len(pending) + logger.Info("ocr boot-resume: enqueued batch", + "batch", len(pending), "total_so_far", processed) } }()