package mailparser import ( "bufio" "bytes" "strings" ) // SplitMbox splits a raw mbox file into individual RFC 2822 message bytes. // Each message starts with a "From " separator line which is skipped. func SplitMbox(data []byte) [][]byte { var messages [][]byte var current bytes.Buffer scanner := bufio.NewScanner(bytes.NewReader(data)) scanner.Buffer(make([]byte, 1024*1024), 10*1024*1024) inMessage := false for scanner.Scan() { line := scanner.Text() // mbox separator: line starts with "From " but not "From:" header if strings.HasPrefix(line, "From ") && !strings.HasPrefix(line, "From: ") { if inMessage && current.Len() > 0 { messages = append(messages, bytes.TrimSpace(current.Bytes())) current.Reset() } inMessage = true continue } if inMessage { // unescape ">From " lines (mbox quoting) if strings.HasPrefix(line, ">From ") { line = line[1:] } current.WriteString(line) current.WriteByte('\n') } } if inMessage && current.Len() > 0 { messages = append(messages, bytes.TrimSpace(current.Bytes())) } return messages }