feat(PROJ-35): OCR & Anhang-Volltext-Indexierung
Asynchrone OCR fuer PDF- und Bild-Anhaenge via tesseract + poppler-utils. Extrahierter Text wird in Manticore (attachment_text) gespeichert und ist ueber die normale Volltextsuche auffindbar. - internal/ocr: ExtractText + Worker (queue + drain) - internal/storage/ocr.go: SetOCRStatus, OCREnabled, GetMailsByOCRStatus - emails.ocr_status (pending|done|failed|skipped|disabled) - tenants.ocr_enabled (Default TRUE, opt-out) - Manticore: attachment_text-Feld + UpdateAttachmentText - Boot-resume: pending Jobs nach Restart automatisch in die Queue - CLI: archivmail ocr-reprocess --tenant N --status pending|failed|all - update.sh: tesseract-ocr + poppler-utils optional installieren
This commit is contained in:
+20
-10
@@ -7,16 +7,17 @@ import (
|
||||
|
||||
// MailDocument is the indexed representation of a stored email.
|
||||
type MailDocument struct {
|
||||
ID string
|
||||
From string
|
||||
To string
|
||||
Subject string
|
||||
Body string
|
||||
AttachNames string
|
||||
HasAttachment bool
|
||||
Date time.Time
|
||||
Size int64
|
||||
TenantID *int64 // nil = global / superadmin context
|
||||
ID string
|
||||
From string
|
||||
To string
|
||||
Subject string
|
||||
Body string
|
||||
AttachNames string
|
||||
AttachmentText string // PROJ-35: OCR-extracted text from PDF/image attachments
|
||||
HasAttachment bool
|
||||
Date time.Time
|
||||
Size int64
|
||||
TenantID *int64 // nil = global / superadmin context
|
||||
}
|
||||
|
||||
// SearchRequest specifies search parameters.
|
||||
@@ -53,6 +54,15 @@ type Indexer interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
// AttachmentTextUpdater is implemented by indexers that support partial
|
||||
// updates of the OCR-extracted attachment text. Optional add-on to Indexer:
|
||||
// callers should type-assert and degrade gracefully if not supported.
|
||||
//
|
||||
// PROJ-35: Manticore implements this; legacy Xapian does not.
|
||||
type AttachmentTextUpdater interface {
|
||||
UpdateAttachmentText(mailID, text string) error
|
||||
}
|
||||
|
||||
// TenantIndexer manages per-tenant Indexer instances.
|
||||
// Implemented by ManticoreTenantManager (primary) and TenantIndexManager (legacy Xapian).
|
||||
type TenantIndexer interface {
|
||||
|
||||
Reference in New Issue
Block a user