7b75433999
Hit-Struct um Snippet + MatchField erweitert. enrichHitsWithSnippets fuellt diese pro Treffer: detectMatchField probt subject>body> attachment_text>attachment_names>from_addr>to_addr; buildSnippet ruft CALL SNIPPETS mit <b>-Markern. Snippet-Fehler droppen den Treffer nicht. AttachmentTextReader-Interface + Manticore-Implementation GetAttachmentText liefert den indexierten OCR-Text fuer den neuen /ocr-text-Endpoint.
99 lines
3.1 KiB
Go
99 lines
3.1 KiB
Go
package index
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
// MailDocument is the indexed representation of a stored email.
|
|
type MailDocument struct {
|
|
ID string
|
|
From string
|
|
To string
|
|
Subject string
|
|
Body string
|
|
AttachNames string
|
|
AttachmentText string // PROJ-35: OCR-extracted text from PDF/image attachments
|
|
HasAttachment bool
|
|
Date time.Time
|
|
Size int64
|
|
TenantID *int64 // nil = global / superadmin context
|
|
}
|
|
|
|
// SearchRequest specifies search parameters.
|
|
type SearchRequest struct {
|
|
Query string
|
|
From string
|
|
To string
|
|
OwnEmail string
|
|
DateFrom *time.Time
|
|
DateTo *time.Time
|
|
HasAttachment *bool // nil=no filter, true=only with, false=only without
|
|
Sort string // "relevance", "date_asc", "date_desc" (default: date_desc)
|
|
PageSize int
|
|
Page int
|
|
}
|
|
|
|
// Hit is a single search result.
|
|
//
|
|
// PROJ-44: Snippet and MatchField are populated by the Manticore Search path
|
|
// when a full-text query was provided. They remain empty for filter-only
|
|
// searches (e.g. date range without query) and when the per-hit highlight
|
|
// pass fails — the hit is still returned in that case (no hard error).
|
|
type Hit struct {
|
|
ID string `json:"id"`
|
|
Score float64 `json:"score"`
|
|
Snippet string `json:"snippet,omitempty"` // HTML-marked excerpt with <b>match</b> tags
|
|
MatchField string `json:"match_field,omitempty"` // subject|body|attachment_text|attachment_names|from_addr|to_addr
|
|
}
|
|
|
|
// SearchResult holds paginated search results.
|
|
type SearchResult struct {
|
|
Total int
|
|
Hits []Hit
|
|
}
|
|
|
|
// Indexer is the interface for full-text email indexing.
|
|
type Indexer interface {
|
|
IndexSync(doc MailDocument) error
|
|
Search(req SearchRequest) (*SearchResult, error)
|
|
Delete(id string) error
|
|
Close() error
|
|
}
|
|
|
|
// AttachmentTextUpdater is implemented by indexers that support partial
|
|
// updates of the OCR-extracted attachment text. Optional add-on to Indexer:
|
|
// callers should type-assert and degrade gracefully if not supported.
|
|
//
|
|
// PROJ-35: Manticore implements this; legacy Xapian does not.
|
|
type AttachmentTextUpdater interface {
|
|
UpdateAttachmentText(mailID, text string) error
|
|
}
|
|
|
|
// AttachmentTextReader is implemented by indexers that can return the stored
|
|
// OCR-extracted attachment text for a mail. Optional add-on to Indexer.
|
|
//
|
|
// PROJ-44: Manticore implements this for the /api/mails/{id}/ocr-text endpoint.
|
|
type AttachmentTextReader interface {
|
|
GetAttachmentText(mailID string) (string, error)
|
|
}
|
|
|
|
// TenantIndexer manages per-tenant Indexer instances.
|
|
// Implemented by ManticoreTenantManager (primary) and TenantIndexManager (legacy Xapian).
|
|
type TenantIndexer interface {
|
|
ForTenant(tenantID *int64) Indexer
|
|
Global() Indexer
|
|
Close() error
|
|
}
|
|
|
|
// New creates an Indexer for the specified backend.
|
|
// Deprecated: use NewManticoreTenantManager instead.
|
|
func New(dir string, batchSize int, backend string) (Indexer, error) {
|
|
switch backend {
|
|
case "xapian":
|
|
return newXapian(dir)
|
|
default:
|
|
return nil, fmt.Errorf("unknown index backend: %q (use manticore via NewManticoreTenantManager)", backend)
|
|
}
|
|
}
|