Files
archivmail/internal/index/index.go
T
sysops 7b75433999 feat(PROJ-44): Snippet + match_field fuer Suche, GetAttachmentText
Hit-Struct um Snippet + MatchField erweitert. enrichHitsWithSnippets
fuellt diese pro Treffer: detectMatchField probt subject>body>
attachment_text>attachment_names>from_addr>to_addr; buildSnippet ruft
CALL SNIPPETS mit <b>-Markern. Snippet-Fehler droppen den Treffer nicht.

AttachmentTextReader-Interface + Manticore-Implementation
GetAttachmentText liefert den indexierten OCR-Text fuer den neuen
/ocr-text-Endpoint.
2026-05-10 22:20:52 +02:00

99 lines
3.1 KiB
Go

package index
import (
"fmt"
"time"
)
// MailDocument is the indexed representation of a stored email.
type MailDocument struct {
ID string
From string
To string
Subject string
Body string
AttachNames string
AttachmentText string // PROJ-35: OCR-extracted text from PDF/image attachments
HasAttachment bool
Date time.Time
Size int64
TenantID *int64 // nil = global / superadmin context
}
// SearchRequest specifies search parameters.
type SearchRequest struct {
Query string
From string
To string
OwnEmail string
DateFrom *time.Time
DateTo *time.Time
HasAttachment *bool // nil=no filter, true=only with, false=only without
Sort string // "relevance", "date_asc", "date_desc" (default: date_desc)
PageSize int
Page int
}
// Hit is a single search result.
//
// PROJ-44: Snippet and MatchField are populated by the Manticore Search path
// when a full-text query was provided. They remain empty for filter-only
// searches (e.g. date range without query) and when the per-hit highlight
// pass fails — the hit is still returned in that case (no hard error).
type Hit struct {
ID string `json:"id"`
Score float64 `json:"score"`
Snippet string `json:"snippet,omitempty"` // HTML-marked excerpt with <b>match</b> tags
MatchField string `json:"match_field,omitempty"` // subject|body|attachment_text|attachment_names|from_addr|to_addr
}
// SearchResult holds paginated search results.
type SearchResult struct {
Total int
Hits []Hit
}
// Indexer is the interface for full-text email indexing.
type Indexer interface {
IndexSync(doc MailDocument) error
Search(req SearchRequest) (*SearchResult, error)
Delete(id string) error
Close() error
}
// AttachmentTextUpdater is implemented by indexers that support partial
// updates of the OCR-extracted attachment text. Optional add-on to Indexer:
// callers should type-assert and degrade gracefully if not supported.
//
// PROJ-35: Manticore implements this; legacy Xapian does not.
type AttachmentTextUpdater interface {
UpdateAttachmentText(mailID, text string) error
}
// AttachmentTextReader is implemented by indexers that can return the stored
// OCR-extracted attachment text for a mail. Optional add-on to Indexer.
//
// PROJ-44: Manticore implements this for the /api/mails/{id}/ocr-text endpoint.
type AttachmentTextReader interface {
GetAttachmentText(mailID string) (string, error)
}
// TenantIndexer manages per-tenant Indexer instances.
// Implemented by ManticoreTenantManager (primary) and TenantIndexManager (legacy Xapian).
type TenantIndexer interface {
ForTenant(tenantID *int64) Indexer
Global() Indexer
Close() error
}
// New creates an Indexer for the specified backend.
// Deprecated: use NewManticoreTenantManager instead.
func New(dir string, batchSize int, backend string) (Indexer, error) {
switch backend {
case "xapian":
return newXapian(dir)
default:
return nil, fmt.Errorf("unknown index backend: %q (use manticore via NewManticoreTenantManager)", backend)
}
}