feat(PROJ-44): Snippet + match_field fuer Suche, GetAttachmentText

Hit-Struct um Snippet + MatchField erweitert. enrichHitsWithSnippets
fuellt diese pro Treffer: detectMatchField probt subject>body>
attachment_text>attachment_names>from_addr>to_addr; buildSnippet ruft
CALL SNIPPETS mit <b>-Markern. Snippet-Fehler droppen den Treffer nicht.

AttachmentTextReader-Interface + Manticore-Implementation
GetAttachmentText liefert den indexierten OCR-Text fuer den neuen
/ocr-text-Endpoint.
This commit is contained in:
sysops
2026-05-10 22:20:52 +02:00
parent 5078830469
commit 7b75433999
3 changed files with 174 additions and 2 deletions
+30
View File
@@ -226,6 +226,28 @@ func (idx *manticoreIndex) UpdateAttachmentText(mailID, text string) error {
return nil
}
// GetAttachmentText returns the stored OCR-extracted text for a mail or
// "" if the document is not (yet) indexed or has no attachment text.
// Implements index.AttachmentTextReader (PROJ-44).
//
// Errors are returned only for unexpected SQL failures — a missing row maps
// to ("", nil) so callers can treat it the same as "no OCR text available".
func (idx *manticoreIndex) GetAttachmentText(mailID string) (string, error) {
rowID := hashMailID(mailID)
var text string
err := idx.db.QueryRow(
fmt.Sprintf(`SELECT attachment_text FROM %s WHERE id = ? LIMIT 1`, idx.table),
rowID,
).Scan(&text)
if err != nil {
if err == sql.ErrNoRows {
return "", nil
}
return "", fmt.Errorf("manticore GetAttachmentText %s: %w", idx.table, err)
}
return text, nil
}
// Delete removes a document by mail ID hash.
func (idx *manticoreIndex) Delete(id string) error {
rowID := hashMailID(id)
@@ -357,6 +379,14 @@ func (idx *manticoreIndex) Search(req SearchRequest) (*SearchResult, error) {
return nil, fmt.Errorf("manticore Search rows: %w", err)
}
// PROJ-44: per-hit snippet + match_field heuristic. Only meaningful when
// the caller supplied a full-text query. Failures are logged via the
// returned error message wrapping but do not drop the hit — see
// enrichHitsWithSnippets.
if req.Query != "" && len(hits) > 0 {
idx.enrichHitsWithSnippets(hits, req.Query)
}
return &SearchResult{Total: total, Hits: hits}, nil
}