feat(PROJ-35): OCR & Anhang-Volltext-Indexierung

Asynchrone OCR fuer PDF- und Bild-Anhaenge via tesseract + poppler-utils.
Extrahierter Text wird in Manticore (attachment_text) gespeichert und ist
ueber die normale Volltextsuche auffindbar.

- internal/ocr: ExtractText + Worker (queue + drain)
- internal/storage/ocr.go: SetOCRStatus, OCREnabled, GetMailsByOCRStatus
- emails.ocr_status (pending|done|failed|skipped|disabled)
- tenants.ocr_enabled (Default TRUE, opt-out)
- Manticore: attachment_text-Feld + UpdateAttachmentText
- Boot-resume: pending Jobs nach Restart automatisch in die Queue
- CLI: archivmail ocr-reprocess --tenant N --status pending|failed|all
- update.sh: tesseract-ocr + poppler-utils optional installieren
This commit is contained in:
sysops
2026-05-08 22:11:17 +02:00
parent 2a91f6e249
commit 0bda21033e
11 changed files with 926 additions and 25 deletions
+14 -5
View File
@@ -308,6 +308,15 @@ func (s *Store) initSchema(ctx context.Context) error {
);
CREATE INDEX IF NOT EXISTS idx_saved_searches_user ON saved_searches(user_id, tenant_id);
`)
if err != nil {
return err
}
// PROJ-35: OCR-Status pro Mail (idempotent)
_, err = s.db.Exec(ctx, `
ALTER TABLE emails ADD COLUMN IF NOT EXISTS ocr_status TEXT DEFAULT 'pending';
CREATE INDEX IF NOT EXISTS idx_emails_ocr_status ON emails (ocr_status) WHERE ocr_status = 'pending';
`)
return err
}
@@ -1055,8 +1064,8 @@ func (s *Store) GetMailsWithUID(ctx context.Context, tenantID *int64) ([]MailWit
return result, rows.Err()
}
// GetMailsByRecipient returns mails where mail_to contains the given email address.
// Used for personal IMAP mode filtering.
// GetMailsByRecipient returns mails where mail_to or mail_from contains the given email address.
// Used for personal IMAP mode filtering — includes both received and sent mails.
func (s *Store) GetMailsByRecipient(ctx context.Context, tenantID *int64, email string) ([]MailWithUID, error) {
if s.db == nil || email == "" {
return nil, nil
@@ -1066,15 +1075,15 @@ func (s *Store) GetMailsByRecipient(ctx context.Context, tenantID *int64, email
var err error
if tenantID == nil {
rows, err = s.db.Query(ctx,
`SELECT id, COALESCE(uid, 0) FROM emails WHERE mail_to ILIKE $1 ORDER BY uid ASC NULLS LAST`,
`SELECT id, COALESCE(uid, 0) FROM emails WHERE mail_to ILIKE $1 OR mail_from ILIKE $1 ORDER BY uid ASC NULLS LAST`,
pattern)
} else {
rows, err = s.db.Query(ctx, `
SELECT e.id, COALESCE(e.uid, 0)
SELECT DISTINCT e.id, COALESCE(e.uid, 0)
FROM email_refs r
JOIN emails e ON e.id = r.email_id
WHERE r.tenant_id = $1
AND e.mail_to ILIKE $2
AND (e.mail_to ILIKE $2 OR e.mail_from ILIKE $2)
ORDER BY e.uid ASC NULLS LAST`, *tenantID, pattern)
}
if err != nil {