Files
archivmail/internal/api/search_handlers.go
T
sysops 507dee6431 feat(PROJ-51): Aufbewahrungsfristen nach Dokumentenart (Retention-Kategorien)
Fuehrt archiving_rules ein (PROJ-43-Basis: Tabelle + CRUD-API + Admin-UI) und
erweitert die Retention-Logik (PROJ-34) um Regel-basierte Fristen, eine
globale Mindestfrist (min_retention_days) sowie Nachvollziehbarkeit der
Frist-Quelle (retain_until_source) in API und Mail-Detailansicht.
2026-06-13 20:48:16 +02:00

553 lines
16 KiB
Go

package api
import (
"fmt"
"net/http"
"net/mail"
"strconv"
"strings"
"time"
"archivmail/internal/audit"
"archivmail/internal/index"
"archivmail/internal/userstore"
"archivmail/pkg/mailparser"
)
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query().Get("q")
fromFilter := r.URL.Query().Get("from")
toFilter := r.URL.Query().Get("to")
dateFromStr := r.URL.Query().Get("date_from")
dateToStr := r.URL.Query().Get("date_to")
sortParam := r.URL.Query().Get("sort") // "relevance", "date_asc", "date_desc"
hasAttachStr := r.URL.Query().Get("has_attachment") // "true" or "false"
pageStr := r.URL.Query().Get("page")
pageSizeStr := r.URL.Query().Get("page_size")
page, _ := strconv.Atoi(pageStr)
pageSize, _ := strconv.Atoi(pageSizeStr)
if pageSize <= 0 {
pageSize = 25
}
req := index.SearchRequest{
Query: q,
Sort: sortParam,
PageSize: pageSize,
Page: page,
}
if hasAttachStr == "true" {
v := true
req.HasAttachment = &v
} else if hasAttachStr == "false" {
v := false
req.HasAttachment = &v
}
// Domain search: @domain.de matches both From AND To fields.
// A value starting with '@' triggers OR-search across XF and XT prefixes.
if strings.HasPrefix(fromFilter, "@") || strings.HasPrefix(toFilter, "@") {
domain := fromFilter
if domain == "" {
domain = toFilter
}
req.OwnEmail = domain
} else {
req.From = fromFilter
req.To = toFilter
}
if dateFromStr != "" {
if t, err := time.Parse(time.RFC3339, dateFromStr); err == nil {
req.DateFrom = &t
} else if t, err := time.Parse(time.DateOnly, dateFromStr); err == nil {
req.DateFrom = &t
}
}
if dateToStr != "" {
if t, err := time.Parse(time.RFC3339, dateToStr); err == nil {
req.DateTo = &t
} else if t, err := time.Parse(time.DateOnly, dateToStr); err == nil {
// end of day for date_to
t = t.Add(24*time.Hour - time.Second)
req.DateTo = &t
}
}
// SEC: domain_auditor without an assigned tenant must not search at all —
// they would otherwise fall through to the global index unfiltered.
sess := sessionFromCtx(r.Context())
if sess.Role == userstore.RoleDomainAuditor && sess.TenantID == nil {
writeError(w, http.StatusForbidden, "access denied")
return
}
// PROJ-21 Phase 4: Use per-tenant index when available; fall back to
// global index + post-filter when the tenant index manager is not wired.
// auditor always uses the global index — they see no-tenant mails only,
// regardless of any tenant_id on their user record.
tenantID := tenantFromCtx(r.Context())
searchIdx := s.idx
usedTenantIndex := false
if s.idxMgr != nil && tenantID != nil && sess.Role != userstore.RoleAuditor {
searchIdx = s.idxMgr.ForTenant(tenantID)
usedTenantIndex = true
}
result, err := searchIdx.Search(req)
if err != nil {
writeError(w, http.StatusInternalServerError, "search failed")
return
}
// Fallback tenant isolation: post-filter when we used the global index
// but the user belongs to a tenant. This is the legacy path; the per-tenant
// index path above makes this unnecessary.
if tenantID != nil && !usedTenantIndex && len(result.Hits) > 0 && sess.Role != userstore.RoleAuditor {
allowedIDs, idErr := s.store.GetAllIDsByTenant(r.Context(), tenantID)
if idErr == nil {
allowed := make(map[string]struct{}, len(allowedIDs))
for _, id := range allowedIDs {
allowed[id] = struct{}{}
}
filtered := result.Hits[:0]
for _, h := range result.Hits {
if _, ok := allowed[h.ID]; ok {
filtered = append(filtered, h)
}
}
result.Hits = filtered
result.Total = len(filtered)
}
}
s.audlog.Log(audit.Entry{
EventType: audit.EventSearch,
Username: sess.Username,
IPAddress: s.remoteIP(r),
Query: q,
Success: true,
})
// Enrich hits with metadata (from, subject, date, size, attachments, thread).
type enrichedHit struct {
ID string `json:"id"`
Score float64 `json:"score"`
From string `json:"from,omitempty"`
To string `json:"to,omitempty"`
Subject string `json:"subject,omitempty"`
Date string `json:"date,omitempty"`
Size int64 `json:"size,omitempty"`
HasAttachments bool `json:"has_attachments"`
ThreadID string `json:"thread_id,omitempty"`
ThreadSize int `json:"thread_size,omitempty"`
Snippet string `json:"snippet,omitempty"` // PROJ-44: HTML excerpt with <b>match</b> tags
MatchField string `json:"match_field,omitempty"` // PROJ-44: subject|body|attachment_text|...
}
// auditor role: restrict results to mails with no tenant assignment.
var auditorAllowedIDs map[string]struct{}
if sess.Role == userstore.RoleAuditor {
ids, idErr := s.store.GetAllIDsWithoutTenant(r.Context())
if idErr != nil {
writeError(w, http.StatusInternalServerError, "failed to load mail list")
return
}
auditorAllowedIDs = make(map[string]struct{}, len(ids))
for _, id := range ids {
auditorAllowedIDs[id] = struct{}{}
}
}
// SEC: For user role, restrict results to mails the user is involved in
// (From, To, or CC). Email comes from the JWT session — no DB lookup needed.
// If email is missing, block all results (fail-safe).
var userEmailFilter string
if sess.Role == userstore.RoleUser {
userEmailFilter = strings.ToLower(sess.Email)
if userEmailFilter == "" {
writeJSON(w, http.StatusOK, map[string]interface{}{"total": 0, "hits": []interface{}{}})
return
}
}
// Batch-load thread info and received_at fallback for all hits
hitIDs := make([]string, len(result.Hits))
for i, h := range result.Hits {
hitIDs[i] = h.ID
}
threadInfo, _ := s.store.GetThreadInfo(r.Context(), hitIDs)
receivedAts := s.store.GetReceivedAts(r.Context(), hitIDs)
enriched := make([]enrichedHit, 0, len(result.Hits))
for _, h := range result.Hits {
eh := enrichedHit{ID: h.ID, Score: h.Score, Snippet: h.Snippet, MatchField: h.MatchField}
if raw, err := s.store.Load(h.ID); err == nil {
eh.Size = int64(len(raw))
if pm, err := mailparser.Parse(raw); err == nil {
eh.From = pm.From
if len(pm.To) > 0 {
eh.To = strings.Join(pm.To, ", ")
}
eh.Subject = pm.Subject
if !pm.Date.IsZero() {
eh.Date = pm.Date.UTC().Format(time.RFC3339)
} else if t, ok := receivedAts[h.ID]; ok && !t.IsZero() {
eh.Date = t.UTC().Format(time.RFC3339)
}
eh.HasAttachments = len(pm.Attachments) > 0
// User isolation: skip mails the user is not involved in.
if userEmailFilter != "" && !mailBelongsToUser(pm, userEmailFilter) {
continue
}
} else if userEmailFilter != "" {
// If mail can't be parsed, deny access to user role.
continue
}
// Auditor isolation: skip mails that belong to a tenant.
if auditorAllowedIDs != nil {
if _, ok := auditorAllowedIDs[h.ID]; !ok {
continue
}
}
}
// PROJ-38: attach thread info
if ti, ok := threadInfo[h.ID]; ok && ti.ThreadSize > 1 {
eh.ThreadID = ti.ThreadID
eh.ThreadSize = ti.ThreadSize
}
enriched = append(enriched, eh)
}
writeJSON(w, http.StatusOK, map[string]interface{}{
"total": result.Total,
"hits": enriched,
})
}
func (s *Server) handleGetMail(w http.ResponseWriter, r *http.Request) {
id := r.PathValue("id")
// SEC-22: Validate mail ID format to prevent path traversal.
if !isValidMailID(id) {
writeError(w, http.StatusBadRequest, "invalid mail id")
return
}
raw, err := s.store.Load(id)
if err != nil {
writeError(w, http.StatusNotFound, "mail not found")
return
}
pm, err := mailparser.Parse(raw)
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to parse mail")
return
}
sess := sessionFromCtx(r.Context())
// Tenant isolation: domain_admin sees only own tenant's mail
if sess.TenantID != nil {
mailTenant, _ := s.store.GetTenantForMail(r.Context(), id)
if mailTenant == nil || *mailTenant != *sess.TenantID {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// auditor: only mails with no tenant assignment.
if sess.Role == userstore.RoleAuditor {
ok, err := s.store.IsWithoutTenant(r.Context(), id)
if err != nil || !ok {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// user: only own mails; domain_auditor: all tenant mails (no filter)
if sess.Role == userstore.RoleUser {
if sess.Email == "" || !mailBelongsToUser(pm, sess.Email) {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
type attachMeta struct {
Index int `json:"index"`
Filename string `json:"filename"`
ContentType string `json:"content_type"`
Size int `json:"size"`
}
attachments := make([]attachMeta, len(pm.Attachments))
for i, a := range pm.Attachments {
attachments[i] = attachMeta{
Index: i,
Filename: a.Filename,
ContentType: a.ContentType,
Size: a.Size,
}
}
var dateStr string
if !pm.Date.IsZero() {
dateStr = pm.Date.UTC().Format(time.RFC3339)
}
// Verify status
vs, _ := s.store.GetVerifyStatus(r.Context(), id)
var verifyOK interface{} = nil
var verifiedAt interface{} = nil
if vs.VerifyOK != nil {
verifyOK = *vs.VerifyOK
}
if vs.VerifiedAt != nil {
verifiedAt = vs.VerifiedAt.UTC().Format(time.RFC3339)
}
// PROJ-38: load thread_id from DB
var threadID string
if ti, err := s.store.GetThreadInfo(r.Context(), []string{id}); err == nil {
if info, ok := ti[id]; ok {
threadID = info.ThreadID
}
}
// PROJ-44: OCR status + extracted character count for the GUI badge and
// the conditional "OCR-Text herunterladen" button. Errors are swallowed
// silently so a missing ocr_chars column on an old DB does not break the
// detail view; defaults are returned by GetOCRMeta in that case.
ocrStatus, ocrChars, _ := s.store.GetOCRMeta(r.Context(), id)
if ocrStatus == "" {
ocrStatus = "pending"
}
// PROJ-51: retention lock + its source for auditor traceability.
var retainUntil interface{} = nil
var retainSource interface{} = nil
if until, source, rerr := s.store.GetRetentionInfo(r.Context(), id); rerr == nil {
if until != nil {
retainUntil = until.UTC().Format(time.RFC3339)
}
if source != "" {
retainSource = source
}
}
writeJSON(w, http.StatusOK, map[string]interface{}{
"id": id,
"from": pm.From,
"to": strings.Join(pm.To, ", "),
"cc": strings.Join(pm.CC, ", "),
"subject": pm.Subject,
"date": dateStr,
"size": len(raw),
"body_html": pm.HTMLBody,
"body_plain": pm.TextBody,
"raw_headers": extractRawHeaders(raw),
"attachments": attachments,
"verify_ok": verifyOK,
"verified_at": verifiedAt,
"thread_id": threadID,
"ocr_status": ocrStatus,
"ocr_chars": ocrChars,
"retain_until": retainUntil,
"retain_until_source": retainSource,
})
}
func (s *Server) handleGetAttachment(w http.ResponseWriter, r *http.Request) {
id := r.PathValue("id")
// SEC-22: Validate mail ID format to prevent path traversal.
if !isValidMailID(id) {
writeError(w, http.StatusBadRequest, "invalid mail id")
return
}
indexStr := r.PathValue("index")
idx, err := strconv.Atoi(indexStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid attachment index")
return
}
raw, err := s.store.Load(id)
if err != nil {
writeError(w, http.StatusNotFound, "mail not found")
return
}
pm, err := mailparser.Parse(raw)
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to parse mail")
return
}
sess := sessionFromCtx(r.Context())
// Tenant isolation
if sess.TenantID != nil {
mailTenant, _ := s.store.GetTenantForMail(r.Context(), id)
if mailTenant == nil || *mailTenant != *sess.TenantID {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// auditor: only mails with no tenant assignment.
if sess.Role == userstore.RoleAuditor {
ok, err := s.store.IsWithoutTenant(r.Context(), id)
if err != nil || !ok {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// user: only own mails; domain_auditor: all tenant mails (no filter)
if sess.Role == userstore.RoleUser {
u, err := s.users.GetByUsername(sess.Username)
if err != nil || !mailBelongsToUser(pm, u.Email) {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
if idx < 0 || idx >= len(pm.Attachments) {
writeError(w, http.StatusNotFound, "attachment not found")
return
}
a := pm.Attachments[idx]
filename := sanitizeFilename(a.Filename)
if filename == "" {
filename = fmt.Sprintf("attachment-%d", idx)
}
w.Header().Set("Content-Type", a.ContentType)
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, filename))
w.Header().Set("Content-Length", strconv.Itoa(len(a.Data)))
w.WriteHeader(http.StatusOK)
w.Write(a.Data)
}
func (s *Server) handleGetRaw(w http.ResponseWriter, r *http.Request) {
id := r.PathValue("id")
// SEC-22: Validate mail ID format to prevent path traversal.
if !isValidMailID(id) {
writeError(w, http.StatusBadRequest, "invalid mail id")
return
}
raw, err := s.store.Load(id)
if err != nil {
writeError(w, http.StatusNotFound, "mail not found")
return
}
sess := sessionFromCtx(r.Context())
// Tenant isolation
if sess.TenantID != nil {
mailTenant, _ := s.store.GetTenantForMail(r.Context(), id)
if mailTenant == nil || *mailTenant != *sess.TenantID {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// auditor: only mails with no tenant assignment.
if sess.Role == userstore.RoleAuditor {
ok, err := s.store.IsWithoutTenant(r.Context(), id)
if err != nil || !ok {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
// SEC-28/29: User only: own mails. Parse failure must NOT grant access.
if sess.Role == userstore.RoleUser {
pm, err := mailparser.Parse(raw)
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to parse mail")
return
}
u, err := s.users.GetByUsername(sess.Username)
if err != nil || !mailBelongsToUser(pm, u.Email) {
writeError(w, http.StatusForbidden, "access denied")
return
}
}
w.Header().Set("Content-Type", "message/rfc822")
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.eml"`, id[:16]))
w.Header().Set("Content-Length", strconv.Itoa(len(raw)))
w.WriteHeader(http.StatusOK)
w.Write(raw)
}
// emailsFromHeader parses a mail header value (e.g. From, To, CC) and returns
// the bare email addresses it contains. Parsing uses net/mail.ParseAddressList
// for correctness. If parsing fails, nil is returned (fail-closed): a
// malformed or attacker-controlled header must not grant access.
func emailsFromHeader(header string) []string {
addrs, err := mail.ParseAddressList(header)
if err != nil {
// SEC: fail-closed — unparseable header grants no access.
return nil
}
out := make([]string, len(addrs))
for i, a := range addrs {
out[i] = strings.ToLower(a.Address)
}
return out
}
// mailBelongsToUser checks if the user's email appears in From, To, or CC.
// Addresses are parsed with net/mail.ParseAddressList so that display names
// ("Name <addr>") do not cause false positives or negatives.
// Falls back to case-insensitive substring matching when parsing fails.
func mailBelongsToUser(pm *mailparser.ParsedMail, userEmail string) bool {
target := strings.ToLower(userEmail)
checkHeader := func(header string) bool {
parsed := emailsFromHeader(header)
for _, addr := range parsed {
// Exact match on parsed address — only valid path after fail-closed parsing.
if addr == target {
return true
}
}
return false
}
if checkHeader(pm.From) {
return true
}
for _, to := range pm.To {
if checkHeader(to) {
return true
}
}
for _, cc := range pm.CC {
if checkHeader(cc) {
return true
}
}
return false
}
// extractRawHeaders returns the header section of a raw RFC 2822 email.
func extractRawHeaders(raw []byte) string {
for i := 0; i < len(raw)-3; i++ {
if raw[i] == '\r' && raw[i+1] == '\n' && raw[i+2] == '\r' && raw[i+3] == '\n' {
return string(raw[:i])
}
if raw[i] == '\n' && raw[i+1] == '\n' {
return string(raw[:i])
}
}
return string(raw)
}