package index import ( "fmt" "log" "strings" ) // matchFieldOrder defines the priority in which fields are probed when // determining which one of them caused a hit. Order is taken from PROJ-44: // subject > body > attachment_text > attachment_names > from_addr > to_addr. // // The list is intentionally small — the per-hit cost is one extra Manticore // SELECT per probed field until a match is found. With typical page sizes // (<=50) and at most 6 probes per hit this stays well below 300 cheap queries // per request. var matchFieldOrder = []string{ "subject", "body", "attachment_text", "attachment_names", "from_addr", "to_addr", } // enrichHitsWithSnippets fills Hit.Snippet and Hit.MatchField for each hit in // place. Errors are logged but never propagated — a hit without snippet is // still a valid hit (PROJ-44 edge case: "Snippet-Generierung schlägt fehl … // kein Hard-Error"). func (idx *manticoreIndex) enrichHitsWithSnippets(hits []Hit, query string) { for i := range hits { field := idx.detectMatchField(hits[i].ID, query) if field != "" { hits[i].MatchField = field } snip, err := idx.buildSnippet(hits[i].ID, query, field) if err != nil { log.Printf("manticore snippet: mail=%s err=%v", hits[i].ID, err) continue } hits[i].Snippet = snip } } // detectMatchField runs a small MATCH() probe per field in matchFieldOrder // until one returns a row. Returns "" when no field matches — that can happen // for filter-only matches (e.g. date range only) or when the query terms // require multiple fields combined. func (idx *manticoreIndex) detectMatchField(mailID, query string) string { rowID := hashMailID(mailID) escaped := escapeManticoreMatch(query) for _, field := range matchFieldOrder { // SAFETY: field is from a static allow-list above, never user input. matchExpr := fmt.Sprintf("@%s %s", field, escaped) q := fmt.Sprintf( `SELECT id FROM %s WHERE id = ? AND MATCH(?) LIMIT 1`, idx.table, ) var got int64 err := idx.db.QueryRow(q, rowID, matchExpr).Scan(&got) if err == nil { return field } } return "" } // buildSnippet returns an excerpt of the matched field with ... // markers around match words via Manticore's CALL SNIPPETS(...) function. // When matchField is empty, "body" is used as a sensible default. // // Manticore's SNIPPETS expects the source text as the first argument, the // table name as the second, and the query as the third. We fetch the source // column for the hit first (small SELECT) and then call SNIPPETS in a second // query. Two roundtrips per hit is acceptable for typical page sizes. func (idx *manticoreIndex) buildSnippet(mailID, query, matchField string) (string, error) { field := matchField if field == "" { field = "body" } // Whitelist guard — never interpolate user-provided field names. allowed := false for _, f := range matchFieldOrder { if f == field { allowed = true break } } if !allowed { return "", fmt.Errorf("manticore snippet: invalid field %q", field) } rowID := hashMailID(mailID) source, err := idx.fetchFieldText(field, rowID) if err != nil { return "", err } source = strings.TrimSpace(source) if source == "" { return "", nil } // SELECT SNIPPET(text, query) FROM table. // Manticore 25+ supports SNIPPET() as a SELECT expression; CALL SNIPPETS // returns a different packet type that the Go MySQL driver mishandles // ("malformed packet"). Default markers are /, no options needed. q := fmt.Sprintf(`SELECT SNIPPET(?, ?) FROM %s LIMIT 1`, idx.table) row := idx.db.QueryRow(q, source, query) var snippet string if err := row.Scan(&snippet); err != nil { return "", fmt.Errorf("call snippets %s: %w", idx.table, err) } return snippet, nil } // fetchFieldText loads a single text column for one row. field must already // be validated against matchFieldOrder by the caller. func (idx *manticoreIndex) fetchFieldText(field string, rowID int64) (string, error) { q := fmt.Sprintf(`SELECT %s FROM %s WHERE id = ? LIMIT 1`, field, idx.table) var text string if err := idx.db.QueryRow(q, rowID).Scan(&text); err != nil { return "", fmt.Errorf("fetch %s: %w", field, err) } return text, nil }