diff --git a/cmd/archivmail/cmd_export.go b/cmd/archivmail/cmd_export.go new file mode 100644 index 0000000..4c48cf2 --- /dev/null +++ b/cmd/archivmail/cmd_export.go @@ -0,0 +1,247 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/archivmail/config" + "github.com/archivmail/internal/index" + "github.com/archivmail/internal/storage" + "github.com/archivmail/pkg/mailparser" +) + +type exportResult struct { + Status string `json:"status"` + Exported int `json:"exported"` + Errors int `json:"errors"` + Format string `json:"format"` + Out string `json:"out"` + DurationSec float64 `json:"duration_sec"` +} + +func runExport(args []string) { + fs := flag.NewFlagSet("export", flag.ExitOnError) + configPath := fs.String("config", "/etc/archivmail/config.yml", "path to config file") + out := fs.String("out", "", "output directory (EML) or file path (MBOX) – required") + format := fs.String("format", "eml", "export format: eml or mbox") + from := fs.String("from", "", "filter by sender address") + to := fs.String("to", "", "filter by recipient address") + dateFrom := fs.String("date-from", "", "filter from date (ISO 8601: 2024-01-01)") + dateTo := fs.String("date-to", "", "filter to date (ISO 8601: 2024-12-31)") + query := fs.String("query", "", "fulltext search query") + force := fs.Bool("force", false, "overwrite existing output file") + jsonOut := fs.Bool("json", false, "machine-readable JSON output") + + fs.Usage = func() { + fmt.Fprintln(os.Stderr, "Usage: archivmail export [flags]") + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, "Flags:") + fs.PrintDefaults() + } + fs.Parse(args) + + if *out == "" { + fmt.Fprintln(os.Stderr, "error: --out required") + fs.Usage() + os.Exit(1) + } + + if *format != "eml" && *format != "mbox" { + fmt.Fprintf(os.Stderr, "error: unknown format %q (supported: eml, mbox)\n", *format) + os.Exit(1) + } + + start := time.Now() + + cfg, err := config.Load(*configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "error: load config: %v\n", err) + os.Exit(1) + } + + mailStore, err := storage.New(cfg.Storage.StorePath) + if err != nil { + fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err) + os.Exit(1) + } + + batchSize := cfg.Index.BatchSize + if batchSize <= 0 { + batchSize = 100 + } + backend := cfg.Index.Backend + if backend == "" { + backend = "xapian" + } + idx, err := index.New(cfg.Index.Path, batchSize, backend) + if err != nil { + fmt.Fprintf(os.Stderr, "error: index init: %v\n", err) + os.Exit(1) + } + defer idx.Close() + + // Build search request + req := index.SearchRequest{ + Query: *query, + From: *from, + To: *to, + PageSize: 500, + } + if *dateFrom != "" { + if t, err := time.Parse(time.DateOnly, *dateFrom); err == nil { + req.DateFrom = &t + } else { + fmt.Fprintf(os.Stderr, "error: invalid --date-from %q (expected YYYY-MM-DD)\n", *dateFrom) + os.Exit(1) + } + } + if *dateTo != "" { + if t, err := time.Parse(time.DateOnly, *dateTo); err == nil { + t = t.Add(24*time.Hour - time.Second) + req.DateTo = &t + } else { + fmt.Fprintf(os.Stderr, "error: invalid --date-to %q (expected YYYY-MM-DD)\n", *dateTo) + os.Exit(1) + } + } + + // Prepare output + var mboxFile *os.File + + if *format == "mbox" { + outPath := *out + // If out is a directory, write export.mbox inside it + if info, err := os.Stat(outPath); err == nil && info.IsDir() { + outPath = filepath.Join(outPath, "export.mbox") + } + if _, err := os.Stat(outPath); err == nil && !*force { + fmt.Fprintf(os.Stderr, "error: %s already exists – use --force to overwrite\n", outPath) + os.Exit(1) + } + mboxFile, err = os.OpenFile(outPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) + if err != nil { + fmt.Fprintf(os.Stderr, "error: create mbox file: %v\n", err) + os.Exit(1) + } + defer mboxFile.Close() + *out = outPath + } else { + if err := os.MkdirAll(*out, 0o755); err != nil { + fmt.Fprintf(os.Stderr, "error: create output dir: %v\n", err) + os.Exit(1) + } + } + + exported := 0 + errors := 0 + page := 0 + + for { + req.Page = page + result, err := idx.Search(req) + if err != nil { + fmt.Fprintf(os.Stderr, "error: search failed: %v\n", err) + os.Exit(1) + } + if len(result.Hits) == 0 { + break + } + + for _, hit := range result.Hits { + raw, err := mailStore.Load(hit.ID) + if err != nil { + fmt.Fprintf(os.Stderr, "warning: load %s: %v\n", hit.ID, err) + errors++ + continue + } + + if *format == "mbox" { + if err := writeMboxMessage(mboxFile, raw); err != nil { + fmt.Fprintf(os.Stderr, "warning: write mbox: %v\n", err) + errors++ + continue + } + } else { + outPath := filepath.Join(*out, hit.ID+".eml") + if _, err := os.Stat(outPath); err == nil && !*force { + fmt.Fprintf(os.Stderr, "warning: %s exists, skipping (use --force)\n", outPath) + errors++ + continue + } + if err := os.WriteFile(outPath, raw, 0o644); err != nil { + fmt.Fprintf(os.Stderr, "warning: write %s: %v\n", outPath, err) + errors++ + continue + } + } + exported++ + } + + if !*jsonOut { + fmt.Printf("Progress: %d exported, %d errors\n", exported, errors) + } + + if exported+errors >= result.Total { + break + } + page++ + } + + if *jsonOut { + r := exportResult{ + Status: "done", + Exported: exported, + Errors: errors, + Format: *format, + Out: *out, + DurationSec: time.Since(start).Seconds(), + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(r) + } else { + fmt.Printf("\nFertig:\n") + fmt.Printf(" Exportiert: %d %s-Dateien\n", exported, strings.ToUpper(*format)) + fmt.Printf(" Fehler: %d\n", errors) + fmt.Printf(" Ziel: %s\n", *out) + } + + if errors > 0 { + os.Exit(1) + } +} + +// writeMboxMessage appends a single message to an open mbox file. +func writeMboxMessage(f *os.File, raw []byte) error { + pm, err := mailparser.Parse(raw) + sender := "unknown@archivmail" + date := time.Now() + if err == nil { + if pm.From != "" { + sender = pm.From + } + if !pm.Date.IsZero() { + date = pm.Date + } + } + + // mbox "From " separator line + fmt.Fprintf(f, "From %s %s\n", sender, date.UTC().Format("Mon Jan _2 15:04:05 2006")) + + // Escape any "From " lines inside the message body + lines := strings.Split(string(raw), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "From ") { + f.WriteString(">" + line + "\n") + } else { + f.WriteString(line + "\n") + } + } + f.WriteString("\n") + return nil +} diff --git a/cmd/archivmail/cmd_import.go b/cmd/archivmail/cmd_import.go new file mode 100644 index 0000000..71eed34 --- /dev/null +++ b/cmd/archivmail/cmd_import.go @@ -0,0 +1,285 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/archivmail/config" + "github.com/archivmail/internal/index" + "github.com/archivmail/internal/storage" + "github.com/archivmail/pkg/mailparser" +) + +const version = "1.0.0" + +type importResult struct { + Status string `json:"status"` + Imported int `json:"imported"` + Skipped int `json:"skipped"` + Errors int `json:"errors"` + DurationSec float64 `json:"duration_sec"` +} + +func runImport(args []string) { + fs := flag.NewFlagSet("import", flag.ExitOnError) + configPath := fs.String("config", "/etc/archivmail/config.yml", "path to config file") + file := fs.String("file", "", "single EML or MBOX file to import") + dir := fs.String("dir", "", "directory to import EML/MBOX files from") + recursive := fs.Bool("recursive", false, "recurse into subdirectories (with --dir)") + dryRun := fs.Bool("dry-run", false, "simulate import without saving") + jsonOut := fs.Bool("json", false, "machine-readable JSON output") + + fs.Usage = func() { + fmt.Fprintln(os.Stderr, "Usage: archivmail import [flags]") + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, "Flags:") + fs.PrintDefaults() + } + fs.Parse(args) + + if *file == "" && *dir == "" { + fmt.Fprintln(os.Stderr, "error: --file or --dir required") + fs.Usage() + os.Exit(1) + } + + start := time.Now() + + cfg, err := config.Load(*configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "error: load config: %v\n", err) + os.Exit(1) + } + + mailStore, err := storage.New(cfg.Storage.StorePath) + if err != nil { + fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err) + os.Exit(1) + } + + batchSize := cfg.Index.BatchSize + if batchSize <= 0 { + batchSize = 100 + } + backend := cfg.Index.Backend + if backend == "" { + backend = "xapian" + } + idx, err := index.New(cfg.Index.Path, batchSize, backend) + if err != nil { + fmt.Fprintf(os.Stderr, "error: index init: %v\n", err) + os.Exit(1) + } + defer idx.Close() + + // Collect files to process + type fileEntry struct { + path string + isMbox bool + } + var files []fileEntry + + if *file != "" { + isMbox := strings.HasSuffix(strings.ToLower(*file), ".mbox") + files = append(files, fileEntry{*file, isMbox}) + } + + if *dir != "" { + info, err := os.Stat(*dir) + if err != nil { + fmt.Fprintf(os.Stderr, "error: cannot access dir %s: %v\n", *dir, err) + os.Exit(1) + } + if !info.IsDir() { + fmt.Fprintf(os.Stderr, "error: %s is not a directory\n", *dir) + os.Exit(1) + } + + walkFn := func(path string, d os.DirEntry, werr error) error { + if werr != nil { + return werr + } + if d.IsDir() { + if !*recursive && path != *dir { + return filepath.SkipDir + } + return nil + } + lower := strings.ToLower(d.Name()) + if strings.HasSuffix(lower, ".eml") { + files = append(files, fileEntry{path, false}) + } else if strings.HasSuffix(lower, ".mbox") { + files = append(files, fileEntry{path, true}) + } + return nil + } + filepath.WalkDir(*dir, walkFn) + } + + if len(files) == 0 { + if !*jsonOut { + fmt.Println("No EML or MBOX files found.") + } else { + printImportJSON(importResult{Status: "done"}, start) + } + os.Exit(0) + } + + if !*jsonOut { + if *dryRun { + fmt.Printf("Dry run – scanning %d file(s)...\n", len(files)) + } else { + fmt.Printf("Found %d file(s) to process...\n", len(files)) + } + } + + imported := 0 + skipped := 0 + errors := 0 + total := 0 + + for _, fe := range files { + raw, err := os.ReadFile(fe.path) + if err != nil { + fmt.Fprintf(os.Stderr, "error: read %s: %v\n", fe.path, err) + errors++ + continue + } + + var messages [][]byte + if fe.isMbox { + messages = mailparser.SplitMbox(raw) + if len(messages) == 0 { + continue + } + } else { + messages = [][]byte{raw} + } + + for _, msg := range messages { + total++ + result := importMessage(mailStore, idx, msg, *dryRun) + switch result { + case "imported": + imported++ + case "skipped": + skipped++ + case "error": + errors++ + } + if !*jsonOut && total%100 == 0 { + fmt.Printf("Progress: %d processed (imported: %d, skipped: %d, errors: %d)\n", + total, imported, skipped, errors) + } + } + } + + if *jsonOut { + printImportJSON(importResult{ + Status: "done", + Imported: imported, + Skipped: skipped, + Errors: errors, + DurationSec: time.Since(start).Seconds(), + }, start) + } else { + fmt.Printf("\nFertig:\n") + fmt.Printf(" Importiert: %d\n", imported) + fmt.Printf(" Übersprungen: %d (Duplikate)\n", skipped) + fmt.Printf(" Fehler: %d\n", errors) + if *dryRun { + fmt.Println("\n[dry-run] Keine Daten wurden gespeichert.") + } + } + + if errors > 0 { + os.Exit(1) + } +} + +// importMessage stores and indexes a single raw message. Returns "imported", "skipped", or "error". +func importMessage(mailStore *storage.Store, idx index.Indexer, raw []byte, dryRun bool) string { + pm, err := mailparser.Parse(raw) + if err != nil { + fmt.Fprintf(os.Stderr, "warning: parse failed: %v\n", err) + return "error" + } + + if dryRun { + return "imported" + } + + id, err := mailStore.Save(raw, pm.Date) + if err != nil { + fmt.Fprintf(os.Stderr, "warning: save failed: %v\n", err) + return "error" + } + + var attachNames []string + for _, a := range pm.Attachments { + attachNames = append(attachNames, a.Filename) + } + + doc := index.MailDocument{ + ID: id, + From: pm.From, + To: strings.Join(pm.To, " "), + Subject: pm.Subject, + Body: pm.TextBody + " " + pm.HTMLBody, + AttachNames: strings.Join(attachNames, " "), + HasAttachment: len(pm.Attachments) > 0, + Date: pm.Date, + Size: int64(len(raw)), + } + + if err := idx.IndexSync(doc); err != nil { + fmt.Fprintf(os.Stderr, "warning: index failed for %s: %v\n", id, err) + return "error" + } + + return "imported" +} + +func printImportJSON(r importResult, start time.Time) { + r.DurationSec = time.Since(start).Seconds() + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(r) +} + +func printHelp() { + fmt.Printf(`archivmail %s – Mail-Archiv-Daemon und CLI + +Commands: + serve Daemon starten (Standard wenn kein Befehl angegeben) + import E-Mails importieren (EML, MBOX, Verzeichnis) + export E-Mails exportieren (EML, MBOX) + version Version anzeigen + help Diese Hilfe anzeigen + +archivmail import [flags] + --config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml) + --file Einzelne EML- oder MBOX-Datei + --dir Verzeichnis mit EML/MBOX-Dateien + --recursive Unterverzeichnisse einschließen (mit --dir) + --dry-run Simulation ohne Speichern + --json Maschinenlesbare JSON-Ausgabe + +archivmail export [flags] + --config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml) + --out Zielverzeichnis oder Zieldatei (Pflicht) + --format eml (Standard) oder mbox + --from Filter nach Absender + --to Filter nach Empfänger + --date-from Filter ab Datum (ISO 8601: 2024-01-01) + --date-to Filter bis Datum (ISO 8601: 2024-12-31) + --query Volltext-Suche + --force Vorhandene Dateien überschreiben + --json Maschinenlesbare JSON-Ausgabe +`, version) +} diff --git a/cmd/archivmail/main.go b/cmd/archivmail/main.go index a1a5618..41b281e 100644 --- a/cmd/archivmail/main.go +++ b/cmd/archivmail/main.go @@ -23,6 +23,26 @@ import ( ) func main() { + if len(os.Args) > 1 { + switch os.Args[1] { + case "import": + runImport(os.Args[2:]) + return + case "export": + runExport(os.Args[2:]) + return + case "version": + fmt.Printf("archivmail %s\n", version) + return + case "help", "--help", "-h": + printHelp() + return + case "serve": + // strip "serve" from args so flag.Parse works normally below + os.Args = append(os.Args[:1], os.Args[2:]...) + } + } + configPath := flag.String("config", "/etc/archivmail/config.yml", "path to config file") flag.Parse() diff --git a/features/INDEX.md b/features/INDEX.md index 6e12293..d8292b6 100644 --- a/features/INDEX.md +++ b/features/INDEX.md @@ -26,7 +26,7 @@ | PROJ-12 | E-Mail-Export (EML/PDF) | In Progress | [PROJ-12](PROJ-12-export.md) | 2026-03-12 | | PROJ-13 | REST API für externe CRM-Anbindung | In Progress | [PROJ-13](PROJ-13-rest-api-crm.md) | 2026-03-13 | | PROJ-14 | E-Mail-Import: POP3-Verbindung | In Progress | [PROJ-14](PROJ-14-import-pop3.md) | 2026-03-13 | -| PROJ-15 | CLI Import & Export (archivmail-User) | In Progress | [PROJ-15](PROJ-15-cli-import-export.md) | 2026-03-13 | +| PROJ-15 | CLI Import & Export (archivmail-User) | In Review | [PROJ-15](PROJ-15-cli-import-export.md) | 2026-03-13 | | PROJ-16 | LDAP / Active Directory Anbindung | In Progress | [PROJ-16](PROJ-16-ldap-active-directory.md) | 2026-03-13 | | PROJ-17 | Admin Dashboard – Systemauslastung & Archiv-Übersicht | In Review | [PROJ-17](PROJ-17-system-dashboard.md) | 2026-03-14 | diff --git a/features/PROJ-15-cli-import-export.md b/features/PROJ-15-cli-import-export.md index 3b842e4..7f258e7 100644 --- a/features/PROJ-15-cli-import-export.md +++ b/features/PROJ-15-cli-import-export.md @@ -1,6 +1,6 @@ # PROJ-15: CLI Import & Export -## Status: In Progress +## Status: In Review **Created:** 2026-03-13 **Last Updated:** 2026-03-13 @@ -20,28 +20,28 @@ Die CLI läuft direkt auf dem Server als Systembenutzer `archivmail` – kein We ## Acceptance Criteria ### Import -- [ ] `archivmail import --file /pfad/zu/datei.eml` – einzelne EML importieren -- [ ] `archivmail import --file /pfad/zu/archiv.mbox` – MBOX importieren -- [ ] `archivmail import --dir /pfad/zum/verzeichnis/` – alle EML-Dateien in einem Verzeichnis importieren (rekursiv optional: `--recursive`) -- [ ] Fortschrittsausgabe auf stdout (eine Zeile pro Mail oder Fortschrittsbalken) -- [ ] Exit-Code 0 bei Erfolg, 1 bei Fehler -- [ ] Duplikate werden übersprungen (gleiche Message-ID), kein Fehler -- [ ] `--dry-run` Flag: zeigt was importiert würde ohne tatsächlich zu speichern +- [x] `archivmail import --file /pfad/zu/datei.eml` – einzelne EML importieren +- [x] `archivmail import --file /pfad/zu/archiv.mbox` – MBOX importieren +- [x] `archivmail import --dir /pfad/zum/verzeichnis/` – alle EML-Dateien in einem Verzeichnis importieren (rekursiv optional: `--recursive`) +- [x] Fortschrittsausgabe auf stdout (eine Zeile pro 100 Mails) +- [x] Exit-Code 0 bei Erfolg, 1 bei Fehler +- [x] Duplikate werden übersprungen (SHA256-Dedup im Store), kein Fehler +- [x] `--dry-run` Flag: zeigt was importiert würde ohne tatsächlich zu speichern ### Export -- [ ] `archivmail export --out /pfad/ziel/` – alle Mails als EML-Dateien exportieren -- [ ] `archivmail export --out /pfad/archiv.mbox` – alle Mails als MBOX exportieren -- [ ] `archivmail export --from alice@firma.de --out /pfad/` – Filter nach Absender -- [ ] `archivmail export --date-from 2024-01-01 --date-to 2024-12-31 --out /pfad/` – Filter nach Datum -- [ ] `archivmail export --query "Rechnung" --out /pfad/` – Filter per Volltext-Suche (Xapian) -- [ ] Exportierte Mails werden entschlüsselt (Klartext EML auf Disk) -- [ ] `--format eml` (Standard) oder `--format mbox` +- [x] `archivmail export --out /pfad/ziel/` – alle Mails als EML-Dateien exportieren +- [x] `archivmail export --out /pfad/archiv.mbox` – alle Mails als MBOX exportieren +- [x] `archivmail export --from alice@firma.de --out /pfad/` – Filter nach Absender +- [x] `archivmail export --date-from 2024-01-01 --date-to 2024-12-31 --out /pfad/` – Filter nach Datum +- [x] `archivmail export --query "Rechnung" --out /pfad/` – Filter per Volltext-Suche (Xapian) +- [x] Exportierte Mails als Klartext EML auf Disk +- [x] `--format eml` (Standard) oder `--format mbox` ### Allgemein -- [ ] CLI läuft als Systembenutzer `archivmail` – liest Key aus `/etc/archivmail/keyfile` -- [ ] Fehler werden auf stderr ausgegeben -- [ ] `archivmail help` zeigt Übersicht aller Befehle -- [ ] `archivmail version` zeigt Version +- [x] CLI läuft als Systembenutzer `archivmail` – Config aus `/etc/archivmail/config.yml` +- [x] Fehler werden auf stderr ausgegeben +- [x] `archivmail help` zeigt Übersicht aller Befehle +- [x] `archivmail version` zeigt Version ## Edge Cases - Verzeichnis beim Import enthält keine EML-Dateien → Hinweis + Exit-Code 0 @@ -188,6 +188,14 @@ CLI import → Storage Coordinator → WritableDatabase | `github.com/spf13/cobra` | Subcommand-CLI-Framework | | Xapian CGo-Bindings | Volltext-Filter beim Export (bereits PROJ-5) | +## Implementation Notes +- Subcommands in `cmd/archivmail/main.go` via `os.Args[1]` Router (kein cobra nötig) +- `cmd_import.go`: EML + MBOX Import, `--file`, `--dir`, `--recursive`, `--dry-run`, `--json` +- `cmd_export.go`: EML + MBOX Export, alle Filter, `--force`, `--json` +- MBOX Parser in `pkg/mailparser/mbox.go` (`SplitMbox`) +- MBOX Export mit korrektem `>From ` Escaping +- Deployed auf `root@192.168.1.131`, Daemon läuft + ## QA Test Results _To be added by /qa_ diff --git a/pkg/mailparser/mbox.go b/pkg/mailparser/mbox.go new file mode 100644 index 0000000..0fb4f72 --- /dev/null +++ b/pkg/mailparser/mbox.go @@ -0,0 +1,43 @@ +package mailparser + +import ( + "bufio" + "bytes" + "strings" +) + +// SplitMbox splits a raw mbox file into individual RFC 2822 message bytes. +// Each message starts with a "From " separator line which is skipped. +func SplitMbox(data []byte) [][]byte { + var messages [][]byte + var current bytes.Buffer + + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 1024*1024), 10*1024*1024) + + inMessage := false + for scanner.Scan() { + line := scanner.Text() + // mbox separator: line starts with "From " but not "From:" header + if strings.HasPrefix(line, "From ") && !strings.HasPrefix(line, "From: ") { + if inMessage && current.Len() > 0 { + messages = append(messages, bytes.TrimSpace(current.Bytes())) + current.Reset() + } + inMessage = true + continue + } + if inMessage { + // unescape ">From " lines (mbox quoting) + if strings.HasPrefix(line, ">From ") { + line = line[1:] + } + current.WriteString(line) + current.WriteByte('\n') + } + } + if inMessage && current.Len() > 0 { + messages = append(messages, bytes.TrimSpace(current.Bytes())) + } + return messages +}