diff --git a/cmd/archivmail/cmd_import.go b/cmd/archivmail/cmd_import.go index 6309195..7755e34 100644 --- a/cmd/archivmail/cmd_import.go +++ b/cmd/archivmail/cmd_import.go @@ -284,6 +284,7 @@ Commands: import-piler Aus mailpiler migrieren (pilerexport oder direkte Store-Methode) export E-Mails exportieren (EML, MBOX) reindex Index neu aufbauen (alle oder pro Mandant) + recompress Bestehende Mails nachträglich gzip-komprimieren version Version anzeigen help Diese Hilfe anzeigen @@ -322,5 +323,9 @@ archivmail export [flags] archivmail reindex [flags] --config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml) --tenant Mandanten-ID für partiellen Reindex (0 = alle) + +archivmail recompress [flags] + --config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml) + --dry-run Simulation: zeigt wie viel gespart würde, ohne Dateien zu ändern `, AppVersion) } diff --git a/cmd/archivmail/cmd_recompress.go b/cmd/archivmail/cmd_recompress.go new file mode 100644 index 0000000..57f2a79 --- /dev/null +++ b/cmd/archivmail/cmd_recompress.go @@ -0,0 +1,62 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log/slog" + "os" + + "github.com/archivmail/config" + "github.com/archivmail/internal/storage" +) + +// runRecompress walks the mail store and gzip-compresses any file that is not +// yet compressed. Files are replaced atomically (write to temp, then rename). +// +// Usage: archivmail recompress [--config path] [--dry-run] +func runRecompress(args []string) { + fset := flag.NewFlagSet("recompress", flag.ExitOnError) + configPath := fset.String("config", "/etc/archivmail/config.yml", "path to config file") + dryRun := fset.Bool("dry-run", false, "simulate without writing changes") + _ = fset.Parse(args) + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})) + + cfg, err := config.Load(*configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "error: load config: %v\n", err) + os.Exit(1) + } + + storeCfg := storage.Config{ + Dir: cfg.Storage.StorePath, + Keyfile: cfg.Storage.Keyfile, + DSN: cfg.Database.DSN(), + CompressEnabled: true, + } + mailStore, err := storage.New(storeCfg) + if err != nil { + fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err) + os.Exit(1) + } + defer mailStore.Close() + + if *dryRun { + logger.Info("recompress: DRY-RUN — keine Änderungen werden gespeichert") + } + + stats, err := mailStore.Recompress(context.Background(), *dryRun, logger) + if err != nil { + fmt.Fprintf(os.Stderr, "error: recompress: %v\n", err) + os.Exit(1) + } + + logger.Info("recompress: abgeschlossen", + "total", stats.Total, + "compressed", stats.Compressed, + "already_compressed", stats.AlreadyCompressed, + "skipped_errors", stats.Errors, + "bytes_saved_mb", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024), + ) +} diff --git a/cmd/archivmail/main.go b/cmd/archivmail/main.go index bb1bc91..922cdde 100644 --- a/cmd/archivmail/main.go +++ b/cmd/archivmail/main.go @@ -57,6 +57,9 @@ func main() { case "reindex": runReindex(os.Args[2:]) return + case "recompress": + runRecompress(os.Args[2:]) + return case "version": fmt.Printf("archivmail %s\n", AppVersion) for mod, ver := range Modules { diff --git a/internal/storage/recompress.go b/internal/storage/recompress.go new file mode 100644 index 0000000..61a3e76 --- /dev/null +++ b/internal/storage/recompress.go @@ -0,0 +1,180 @@ +package storage + +import ( + "context" + "fmt" + "io/fs" + "log/slog" + "os" + "path/filepath" +) + +// RecompressStats holds counters from a Recompress run. +type RecompressStats struct { + Total int + Compressed int // files that were compressed in this run + AlreadyCompressed int // files already compressed, skipped + Errors int // files skipped due to errors + BytesSaved int64 // total bytes saved (original − compressed on disk) +} + +// Recompress walks the store directory and gzip-compresses any mail file that +// is not yet compressed. Files are replaced atomically via a temp file + rename. +// Already-compressed files (magic byte 0x01) are skipped. +// If dryRun is true, no files are written but stats are calculated. +func (s *Store) Recompress(ctx context.Context, dryRun bool, logger *slog.Logger) (RecompressStats, error) { + storeDir := filepath.Join(s.dir, "store") + var stats RecompressStats + + err := filepath.WalkDir(storeDir, func(path string, d fs.DirEntry, werr error) error { + if werr != nil { + return werr + } + if d.IsDir() { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + stats.Total++ + id := d.Name() + + // Read raw file bytes (encrypted or not) + raw, err := os.ReadFile(path) + if err != nil { + logger.Warn("recompress: read failed", "id", id, "err", err) + stats.Errors++ + return nil + } + + // Decrypt to get plaintext + plaintext := raw + if s.key != nil { + pt, err := s.decrypt(raw) + if err != nil { + // Pre-encryption file — treat raw bytes as plaintext + pt = raw + } + plaintext = pt + } + + // Already compressed? + if len(plaintext) > 0 && plaintext[0] == magicGzip { + stats.AlreadyCompressed++ + if stats.AlreadyCompressed%1000 == 0 { + logger.Info("recompress: progress", "total", stats.Total, "already_compressed", stats.AlreadyCompressed) + } + return nil + } + + // Compress + compressed, err := compressGzip(plaintext) + if err != nil { + logger.Warn("recompress: compress failed", "id", id, "err", err) + stats.Errors++ + return nil + } + + // Skip if compression doesn't help + if len(compressed) >= len(plaintext) { + stats.AlreadyCompressed++ + return nil + } + + bytesSaved := int64(len(raw)) - int64(len(compressed)) + if s.key != nil { + // After encryption the size changes; estimate savings from plaintext difference + bytesSaved = int64(len(plaintext)) - int64(len(compressed)) + } + + if dryRun { + stats.Compressed++ + stats.BytesSaved += bytesSaved + return nil + } + + // Encrypt compressed content + toWrite := compressed + if s.key != nil { + enc, err := s.encrypt(compressed) + if err != nil { + logger.Warn("recompress: encrypt failed", "id", id, "err", err) + stats.Errors++ + return nil + } + toWrite = enc + } + + // Atomic write: temp file in same directory, then rename + dir := filepath.Dir(path) + tmp, err := os.CreateTemp(dir, ".recompress-*") + if err != nil { + logger.Warn("recompress: temp file failed", "id", id, "err", err) + stats.Errors++ + return nil + } + tmpName := tmp.Name() + + if _, err := tmp.Write(toWrite); err != nil { + _ = tmp.Close() + _ = os.Remove(tmpName) + logger.Warn("recompress: write temp failed", "id", id, "err", err) + stats.Errors++ + return nil + } + if err := tmp.Close(); err != nil { + _ = os.Remove(tmpName) + logger.Warn("recompress: close temp failed", "id", id, "err", err) + stats.Errors++ + return nil + } + + if err := os.Rename(tmpName, path); err != nil { + _ = os.Remove(tmpName) + logger.Warn("recompress: rename failed", "id", id, "err", err) + stats.Errors++ + return nil + } + + // Update storage_objects record if DB available + if s.db != nil { + origSize := int64(len(plaintext)) + compSize := int64(len(toWrite)) + _, _ = s.db.Exec(ctx, ` + INSERT INTO storage_objects (storage_type, path, compression, size_original, size_compressed, checksum) + VALUES ('filesystem', $1, 'gzip', $2, $3, $4) + ON CONFLICT DO NOTHING + `, path, origSize, compSize, id) + // Also update emails.storage_id if the record exists but storage_id is null + _, _ = s.db.Exec(ctx, ` + UPDATE emails SET storage_id = ( + SELECT id FROM storage_objects WHERE checksum = $1 LIMIT 1 + ) WHERE id = $1 AND storage_id IS NULL + `, id) + } + + stats.Compressed++ + stats.BytesSaved += bytesSaved + + if stats.Total%500 == 0 { + logger.Info("recompress: progress", + "total", stats.Total, + "compressed", stats.Compressed, + "already_compressed", stats.AlreadyCompressed, + "bytes_saved", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024), + ) + } + + return nil + }) + + if err != nil { + return stats, fmt.Errorf("recompress: walk: %w", err) + } + + return stats, nil +}