Files
sysops 5e1a51b028 fix(PROJ-44): SNIPPET via SELECT statt CALL SNIPPETS (Go MySQL-Treiber-Kompatibilitaet)
CALL SNIPPETS liefert einen anderen MySQL-Pakettyp als SELECT, den der
Go-Treiber (go-sql-driver/mysql) mit "malformed packet" ablehnt.
SELECT SNIPPET(text, query) FROM table ist die korrekte Alternative
fuer Manticore 25.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-10 22:53:27 +02:00

128 lines
4.1 KiB
Go

package index
import (
"fmt"
"log"
"strings"
)
// matchFieldOrder defines the priority in which fields are probed when
// determining which one of them caused a hit. Order is taken from PROJ-44:
// subject > body > attachment_text > attachment_names > from_addr > to_addr.
//
// The list is intentionally small — the per-hit cost is one extra Manticore
// SELECT per probed field until a match is found. With typical page sizes
// (<=50) and at most 6 probes per hit this stays well below 300 cheap queries
// per request.
var matchFieldOrder = []string{
"subject",
"body",
"attachment_text",
"attachment_names",
"from_addr",
"to_addr",
}
// enrichHitsWithSnippets fills Hit.Snippet and Hit.MatchField for each hit in
// place. Errors are logged but never propagated — a hit without snippet is
// still a valid hit (PROJ-44 edge case: "Snippet-Generierung schlägt fehl …
// kein Hard-Error").
func (idx *manticoreIndex) enrichHitsWithSnippets(hits []Hit, query string) {
for i := range hits {
field := idx.detectMatchField(hits[i].ID, query)
if field != "" {
hits[i].MatchField = field
}
snip, err := idx.buildSnippet(hits[i].ID, query, field)
if err != nil {
log.Printf("manticore snippet: mail=%s err=%v", hits[i].ID, err)
continue
}
hits[i].Snippet = snip
}
}
// detectMatchField runs a small MATCH() probe per field in matchFieldOrder
// until one returns a row. Returns "" when no field matches — that can happen
// for filter-only matches (e.g. date range only) or when the query terms
// require multiple fields combined.
func (idx *manticoreIndex) detectMatchField(mailID, query string) string {
rowID := hashMailID(mailID)
escaped := escapeManticoreMatch(query)
for _, field := range matchFieldOrder {
// SAFETY: field is from a static allow-list above, never user input.
matchExpr := fmt.Sprintf("@%s %s", field, escaped)
q := fmt.Sprintf(
`SELECT id FROM %s WHERE id = ? AND MATCH(?) LIMIT 1`,
idx.table,
)
var got int64
err := idx.db.QueryRow(q, rowID, matchExpr).Scan(&got)
if err == nil {
return field
}
}
return ""
}
// buildSnippet returns an excerpt of the matched field with <b>...</b>
// markers around match words via Manticore's CALL SNIPPETS(...) function.
// When matchField is empty, "body" is used as a sensible default.
//
// Manticore's SNIPPETS expects the source text as the first argument, the
// table name as the second, and the query as the third. We fetch the source
// column for the hit first (small SELECT) and then call SNIPPETS in a second
// query. Two roundtrips per hit is acceptable for typical page sizes.
func (idx *manticoreIndex) buildSnippet(mailID, query, matchField string) (string, error) {
field := matchField
if field == "" {
field = "body"
}
// Whitelist guard — never interpolate user-provided field names.
allowed := false
for _, f := range matchFieldOrder {
if f == field {
allowed = true
break
}
}
if !allowed {
return "", fmt.Errorf("manticore snippet: invalid field %q", field)
}
rowID := hashMailID(mailID)
source, err := idx.fetchFieldText(field, rowID)
if err != nil {
return "", err
}
source = strings.TrimSpace(source)
if source == "" {
return "", nil
}
// SELECT SNIPPET(text, query) FROM table.
// Manticore 25+ supports SNIPPET() as a SELECT expression; CALL SNIPPETS
// returns a different packet type that the Go MySQL driver mishandles
// ("malformed packet"). Default markers are <b>/<b/>, no options needed.
q := fmt.Sprintf(`SELECT SNIPPET(?, ?) FROM %s LIMIT 1`, idx.table)
row := idx.db.QueryRow(q, source, query)
var snippet string
if err := row.Scan(&snippet); err != nil {
return "", fmt.Errorf("call snippets %s: %w", idx.table, err)
}
return snippet, nil
}
// fetchFieldText loads a single text column for one row. field must already
// be validated against matchFieldOrder by the caller.
func (idx *manticoreIndex) fetchFieldText(field string, rowID int64) (string, error) {
q := fmt.Sprintf(`SELECT %s FROM %s WHERE id = ? LIMIT 1`, field, idx.table)
var text string
if err := idx.db.QueryRow(q, rowID).Scan(&text); err != nil {
return "", fmt.Errorf("fetch %s: %w", field, err)
}
return text, nil
}