package mailparser import ( "bytes" "encoding/base64" "fmt" "io" "mime" "mime/multipart" "mime/quotedprintable" "net/mail" "strings" "time" ) // Attachment represents a MIME attachment in a parsed email. type Attachment struct { Filename string ContentType string Data []byte Size int } // ParsedMail holds the structured content of a parsed email message. type ParsedMail struct { From string To []string CC []string Subject string MessageID string TextBody string HTMLBody string Date time.Time Attachments []Attachment Raw []byte } // Parse parses a raw RFC 2822 / MIME email and returns a ParsedMail. func Parse(raw []byte) (*ParsedMail, error) { msg, err := mail.ReadMessage(bytes.NewReader(raw)) if err != nil { return nil, fmt.Errorf("mailparser: read message: %w", err) } pm := &ParsedMail{Raw: raw} // From if from := msg.Header.Get("From"); from != "" { addrs, err := mail.ParseAddressList(from) if err == nil && len(addrs) > 0 { pm.From = addrs[0].Address } else { pm.From = from } } // To if to := msg.Header.Get("To"); to != "" { addrs, err := mail.ParseAddressList(to) if err == nil { for _, a := range addrs { pm.To = append(pm.To, a.Address) } } } // CC if cc := msg.Header.Get("Cc"); cc != "" { addrs, err := mail.ParseAddressList(cc) if err == nil { for _, a := range addrs { pm.CC = append(pm.CC, a.Address) } } } // Subject - decode MIME encoded-words pm.Subject = decodeMIMEHeader(msg.Header.Get("Subject")) // Message-ID - strip angle brackets msgID := msg.Header.Get("Message-Id") pm.MessageID = strings.Trim(msgID, "<>") // Date — try go-message parser first, then fallback formats, then zero if d, err := msg.Header.Date(); err == nil { pm.Date = d } else { // Some MUAs emit non-standard variants (e.g. "+0100 (CET)" suffix). // Try common RFC 2822 / non-standard formats before giving up. raw := strings.TrimSpace(msg.Header.Get("Date")) // Strip parenthesised timezone comment: "... +0100 (CET)" → "... +0100" if idx := strings.LastIndex(raw, "("); idx > 0 { raw = strings.TrimSpace(raw[:idx]) } parsed := false for _, layout := range []string{ "Mon, 2 Jan 2006 15:04:05 -0700", "Mon, 02 Jan 2006 15:04:05 -0700", "2 Jan 2006 15:04:05 -0700", "02 Jan 2006 15:04:05 -0700", "Mon, 2 Jan 2006 15:04:05 MST", "Mon, 02 Jan 2006 15:04:05 MST", time.RFC1123Z, time.RFC1123, } { if t, err := time.Parse(layout, raw); err == nil { pm.Date = t parsed = true break } } if !parsed { // Leave pm.Date as zero — storage will use DB DEFAULT NOW() pm.Date = time.Time{} } } // Parse body / MIME parts contentType := msg.Header.Get("Content-Type") mediaType, params, err := mime.ParseMediaType(contentType) if err != nil { // No content-type or parse error: treat as plain text body, _ := io.ReadAll(msg.Body) pm.TextBody = string(body) return pm, nil } if strings.HasPrefix(mediaType, "multipart/") { boundary := params["boundary"] if err := parseMultipart(pm, msg.Body, boundary); err != nil { return nil, fmt.Errorf("mailparser: multipart: %w", err) } } else { body, _ := io.ReadAll(msg.Body) decoded := decodeBody(body, msg.Header.Get("Content-Transfer-Encoding")) if strings.Contains(mediaType, "html") { pm.HTMLBody = string(decoded) } else { pm.TextBody = string(decoded) } } return pm, nil } // parseMultipart walks MIME parts and fills text, html, and attachments. func parseMultipart(pm *ParsedMail, body io.Reader, boundary string) error { mr := multipart.NewReader(body, boundary) for { part, err := mr.NextPart() if err == io.EOF { break } if err != nil { return err } ct := part.Header.Get("Content-Type") mediaType, params, err := mime.ParseMediaType(ct) if err != nil { mediaType = "application/octet-stream" params = map[string]string{} } data, _ := io.ReadAll(part) cte := part.Header.Get("Content-Transfer-Encoding") decoded := decodeBody(data, cte) // Check disposition for attachment disp := part.Header.Get("Content-Disposition") dispType, dispParams, _ := mime.ParseMediaType(disp) filename := dispParams["filename"] if filename == "" { filename = params["name"] } filename = decodeMIMEHeader(filename) if strings.HasPrefix(dispType, "attachment") || filename != "" { pm.Attachments = append(pm.Attachments, Attachment{ Filename: filename, ContentType: mediaType, Data: decoded, Size: len(decoded), }) continue } // Nested multipart if strings.HasPrefix(mediaType, "multipart/") { if err := parseMultipart(pm, bytes.NewReader(decoded), params["boundary"]); err != nil { return err } continue } switch { case strings.Contains(mediaType, "text/plain"): pm.TextBody += string(decoded) case strings.Contains(mediaType, "text/html"): pm.HTMLBody += string(decoded) } } return nil } // decodeBody decodes Content-Transfer-Encoding if needed. func decodeBody(data []byte, cte string) []byte { switch strings.ToLower(strings.TrimSpace(cte)) { case "quoted-printable": decoded, err := io.ReadAll(quotedprintable.NewReader(bytes.NewReader(data))) if err == nil { return decoded } case "base64": clean := bytes.ReplaceAll(data, []byte("\r\n"), []byte{}) clean = bytes.ReplaceAll(clean, []byte("\n"), []byte{}) clean = bytes.ReplaceAll(clean, []byte("\r"), []byte{}) decoded := make([]byte, base64.StdEncoding.DecodedLen(len(clean))) n, err := base64.StdEncoding.Decode(decoded, clean) if err == nil { return decoded[:n] } } return data } // decodeMIMEHeader decodes RFC 2047 encoded-word headers. func decodeMIMEHeader(s string) string { dec := new(mime.WordDecoder) decoded, err := dec.DecodeHeader(s) if err != nil { return s } return decoded }