diff --git a/internal/index/index.go b/internal/index/index.go
index faadef8..1173465 100644
--- a/internal/index/index.go
+++ b/internal/index/index.go
@@ -35,9 +35,16 @@ type SearchRequest struct {
}
// Hit is a single search result.
+//
+// PROJ-44: Snippet and MatchField are populated by the Manticore Search path
+// when a full-text query was provided. They remain empty for filter-only
+// searches (e.g. date range without query) and when the per-hit highlight
+// pass fails — the hit is still returned in that case (no hard error).
type Hit struct {
- ID string `json:"id"`
- Score float64 `json:"score"`
+ ID string `json:"id"`
+ Score float64 `json:"score"`
+ Snippet string `json:"snippet,omitempty"` // HTML-marked excerpt with match tags
+ MatchField string `json:"match_field,omitempty"` // subject|body|attachment_text|attachment_names|from_addr|to_addr
}
// SearchResult holds paginated search results.
@@ -63,6 +70,14 @@ type AttachmentTextUpdater interface {
UpdateAttachmentText(mailID, text string) error
}
+// AttachmentTextReader is implemented by indexers that can return the stored
+// OCR-extracted attachment text for a mail. Optional add-on to Indexer.
+//
+// PROJ-44: Manticore implements this for the /api/mails/{id}/ocr-text endpoint.
+type AttachmentTextReader interface {
+ GetAttachmentText(mailID string) (string, error)
+}
+
// TenantIndexer manages per-tenant Indexer instances.
// Implemented by ManticoreTenantManager (primary) and TenantIndexManager (legacy Xapian).
type TenantIndexer interface {
diff --git a/internal/index/manticore.go b/internal/index/manticore.go
index 7842be7..5a31af8 100644
--- a/internal/index/manticore.go
+++ b/internal/index/manticore.go
@@ -226,6 +226,28 @@ func (idx *manticoreIndex) UpdateAttachmentText(mailID, text string) error {
return nil
}
+// GetAttachmentText returns the stored OCR-extracted text for a mail or
+// "" if the document is not (yet) indexed or has no attachment text.
+// Implements index.AttachmentTextReader (PROJ-44).
+//
+// Errors are returned only for unexpected SQL failures — a missing row maps
+// to ("", nil) so callers can treat it the same as "no OCR text available".
+func (idx *manticoreIndex) GetAttachmentText(mailID string) (string, error) {
+ rowID := hashMailID(mailID)
+ var text string
+ err := idx.db.QueryRow(
+ fmt.Sprintf(`SELECT attachment_text FROM %s WHERE id = ? LIMIT 1`, idx.table),
+ rowID,
+ ).Scan(&text)
+ if err != nil {
+ if err == sql.ErrNoRows {
+ return "", nil
+ }
+ return "", fmt.Errorf("manticore GetAttachmentText %s: %w", idx.table, err)
+ }
+ return text, nil
+}
+
// Delete removes a document by mail ID hash.
func (idx *manticoreIndex) Delete(id string) error {
rowID := hashMailID(id)
@@ -357,6 +379,14 @@ func (idx *manticoreIndex) Search(req SearchRequest) (*SearchResult, error) {
return nil, fmt.Errorf("manticore Search rows: %w", err)
}
+ // PROJ-44: per-hit snippet + match_field heuristic. Only meaningful when
+ // the caller supplied a full-text query. Failures are logged via the
+ // returned error message wrapping but do not drop the hit — see
+ // enrichHitsWithSnippets.
+ if req.Query != "" && len(hits) > 0 {
+ idx.enrichHitsWithSnippets(hits, req.Query)
+ }
+
return &SearchResult{Total: total, Hits: hits}, nil
}
diff --git a/internal/index/manticore_snippet.go b/internal/index/manticore_snippet.go
new file mode 100644
index 0000000..f0e89b4
--- /dev/null
+++ b/internal/index/manticore_snippet.go
@@ -0,0 +1,127 @@
+package index
+
+import (
+ "fmt"
+ "log"
+ "strings"
+)
+
+// matchFieldOrder defines the priority in which fields are probed when
+// determining which one of them caused a hit. Order is taken from PROJ-44:
+// subject > body > attachment_text > attachment_names > from_addr > to_addr.
+//
+// The list is intentionally small — the per-hit cost is one extra Manticore
+// SELECT per probed field until a match is found. With typical page sizes
+// (<=50) and at most 6 probes per hit this stays well below 300 cheap queries
+// per request.
+var matchFieldOrder = []string{
+ "subject",
+ "body",
+ "attachment_text",
+ "attachment_names",
+ "from_addr",
+ "to_addr",
+}
+
+// enrichHitsWithSnippets fills Hit.Snippet and Hit.MatchField for each hit in
+// place. Errors are logged but never propagated — a hit without snippet is
+// still a valid hit (PROJ-44 edge case: "Snippet-Generierung schlägt fehl …
+// kein Hard-Error").
+func (idx *manticoreIndex) enrichHitsWithSnippets(hits []Hit, query string) {
+ for i := range hits {
+ field := idx.detectMatchField(hits[i].ID, query)
+ if field != "" {
+ hits[i].MatchField = field
+ }
+
+ snip, err := idx.buildSnippet(hits[i].ID, query, field)
+ if err != nil {
+ log.Printf("manticore snippet: mail=%s err=%v", hits[i].ID, err)
+ continue
+ }
+ hits[i].Snippet = snip
+ }
+}
+
+// detectMatchField runs a small MATCH() probe per field in matchFieldOrder
+// until one returns a row. Returns "" when no field matches — that can happen
+// for filter-only matches (e.g. date range only) or when the query terms
+// require multiple fields combined.
+func (idx *manticoreIndex) detectMatchField(mailID, query string) string {
+ rowID := hashMailID(mailID)
+ escaped := escapeManticoreMatch(query)
+
+ for _, field := range matchFieldOrder {
+ // SAFETY: field is from a static allow-list above, never user input.
+ matchExpr := fmt.Sprintf("@%s %s", field, escaped)
+ q := fmt.Sprintf(
+ `SELECT id FROM %s WHERE id = ? AND MATCH(?) LIMIT 1`,
+ idx.table,
+ )
+ var got int64
+ err := idx.db.QueryRow(q, rowID, matchExpr).Scan(&got)
+ if err == nil {
+ return field
+ }
+ }
+ return ""
+}
+
+// buildSnippet returns an excerpt of the matched field with ...
+// markers around match words via Manticore's CALL SNIPPETS(...) function.
+// When matchField is empty, "body" is used as a sensible default.
+//
+// Manticore's SNIPPETS expects the source text as the first argument, the
+// table name as the second, and the query as the third. We fetch the source
+// column for the hit first (small SELECT) and then call SNIPPETS in a second
+// query. Two roundtrips per hit is acceptable for typical page sizes.
+func (idx *manticoreIndex) buildSnippet(mailID, query, matchField string) (string, error) {
+ field := matchField
+ if field == "" {
+ field = "body"
+ }
+ // Whitelist guard — never interpolate user-provided field names.
+ allowed := false
+ for _, f := range matchFieldOrder {
+ if f == field {
+ allowed = true
+ break
+ }
+ }
+ if !allowed {
+ return "", fmt.Errorf("manticore snippet: invalid field %q", field)
+ }
+
+ rowID := hashMailID(mailID)
+ source, err := idx.fetchFieldText(field, rowID)
+ if err != nil {
+ return "", err
+ }
+ source = strings.TrimSpace(source)
+ if source == "" {
+ return "", nil
+ }
+
+ // CALL SNIPPETS(text, table, query, ...options).
+ // Manticore returns a single-column, single-row result.
+ row := idx.db.QueryRow(
+ `CALL SNIPPETS(?, ?, ?, 'before_match=', 'after_match=', 'limit=240', 'around=12')`,
+ source, idx.table, query,
+ )
+ var snippet string
+ if err := row.Scan(&snippet); err != nil {
+ return "", fmt.Errorf("call snippets %s: %w", idx.table, err)
+ }
+ return snippet, nil
+}
+
+// fetchFieldText loads a single text column for one row. field must already
+// be validated against matchFieldOrder by the caller.
+func (idx *manticoreIndex) fetchFieldText(field string, rowID int64) (string, error) {
+ q := fmt.Sprintf(`SELECT %s FROM %s WHERE id = ? LIMIT 1`, field, idx.table)
+ var text string
+ if err := idx.db.QueryRow(q, rowID).Scan(&text); err != nil {
+ return "", fmt.Errorf("fetch %s: %w", field, err)
+ }
+ return text, nil
+}