feat(PROJ-36): archivmail recompress — Nachkomprimierung bestehender Mails
Neuer CLI-Subcommand: archivmail recompress [--dry-run] Komprimiert alle unkomprimierten Dateien im Store atomisch (temp + rename). Überspringt bereits komprimierte Dateien (Magic-Byte 0x01). Aktualisiert storage_objects und emails.storage_id in der DB. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -284,6 +284,7 @@ Commands:
|
||||
import-piler Aus mailpiler migrieren (pilerexport oder direkte Store-Methode)
|
||||
export E-Mails exportieren (EML, MBOX)
|
||||
reindex Index neu aufbauen (alle oder pro Mandant)
|
||||
recompress Bestehende Mails nachträglich gzip-komprimieren
|
||||
version Version anzeigen
|
||||
help Diese Hilfe anzeigen
|
||||
|
||||
@@ -322,5 +323,9 @@ archivmail export [flags]
|
||||
archivmail reindex [flags]
|
||||
--config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml)
|
||||
--tenant Mandanten-ID für partiellen Reindex (0 = alle)
|
||||
|
||||
archivmail recompress [flags]
|
||||
--config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml)
|
||||
--dry-run Simulation: zeigt wie viel gespart würde, ohne Dateien zu ändern
|
||||
`, AppVersion)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/archivmail/config"
|
||||
"github.com/archivmail/internal/storage"
|
||||
)
|
||||
|
||||
// runRecompress walks the mail store and gzip-compresses any file that is not
|
||||
// yet compressed. Files are replaced atomically (write to temp, then rename).
|
||||
//
|
||||
// Usage: archivmail recompress [--config path] [--dry-run]
|
||||
func runRecompress(args []string) {
|
||||
fset := flag.NewFlagSet("recompress", flag.ExitOnError)
|
||||
configPath := fset.String("config", "/etc/archivmail/config.yml", "path to config file")
|
||||
dryRun := fset.Bool("dry-run", false, "simulate without writing changes")
|
||||
_ = fset.Parse(args)
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||
|
||||
cfg, err := config.Load(*configPath)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: load config: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
storeCfg := storage.Config{
|
||||
Dir: cfg.Storage.StorePath,
|
||||
Keyfile: cfg.Storage.Keyfile,
|
||||
DSN: cfg.Database.DSN(),
|
||||
CompressEnabled: true,
|
||||
}
|
||||
mailStore, err := storage.New(storeCfg)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer mailStore.Close()
|
||||
|
||||
if *dryRun {
|
||||
logger.Info("recompress: DRY-RUN — keine Änderungen werden gespeichert")
|
||||
}
|
||||
|
||||
stats, err := mailStore.Recompress(context.Background(), *dryRun, logger)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: recompress: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
logger.Info("recompress: abgeschlossen",
|
||||
"total", stats.Total,
|
||||
"compressed", stats.Compressed,
|
||||
"already_compressed", stats.AlreadyCompressed,
|
||||
"skipped_errors", stats.Errors,
|
||||
"bytes_saved_mb", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024),
|
||||
)
|
||||
}
|
||||
@@ -57,6 +57,9 @@ func main() {
|
||||
case "reindex":
|
||||
runReindex(os.Args[2:])
|
||||
return
|
||||
case "recompress":
|
||||
runRecompress(os.Args[2:])
|
||||
return
|
||||
case "version":
|
||||
fmt.Printf("archivmail %s\n", AppVersion)
|
||||
for mod, ver := range Modules {
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// RecompressStats holds counters from a Recompress run.
|
||||
type RecompressStats struct {
|
||||
Total int
|
||||
Compressed int // files that were compressed in this run
|
||||
AlreadyCompressed int // files already compressed, skipped
|
||||
Errors int // files skipped due to errors
|
||||
BytesSaved int64 // total bytes saved (original − compressed on disk)
|
||||
}
|
||||
|
||||
// Recompress walks the store directory and gzip-compresses any mail file that
|
||||
// is not yet compressed. Files are replaced atomically via a temp file + rename.
|
||||
// Already-compressed files (magic byte 0x01) are skipped.
|
||||
// If dryRun is true, no files are written but stats are calculated.
|
||||
func (s *Store) Recompress(ctx context.Context, dryRun bool, logger *slog.Logger) (RecompressStats, error) {
|
||||
storeDir := filepath.Join(s.dir, "store")
|
||||
var stats RecompressStats
|
||||
|
||||
err := filepath.WalkDir(storeDir, func(path string, d fs.DirEntry, werr error) error {
|
||||
if werr != nil {
|
||||
return werr
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
stats.Total++
|
||||
id := d.Name()
|
||||
|
||||
// Read raw file bytes (encrypted or not)
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
logger.Warn("recompress: read failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decrypt to get plaintext
|
||||
plaintext := raw
|
||||
if s.key != nil {
|
||||
pt, err := s.decrypt(raw)
|
||||
if err != nil {
|
||||
// Pre-encryption file — treat raw bytes as plaintext
|
||||
pt = raw
|
||||
}
|
||||
plaintext = pt
|
||||
}
|
||||
|
||||
// Already compressed?
|
||||
if len(plaintext) > 0 && plaintext[0] == magicGzip {
|
||||
stats.AlreadyCompressed++
|
||||
if stats.AlreadyCompressed%1000 == 0 {
|
||||
logger.Info("recompress: progress", "total", stats.Total, "already_compressed", stats.AlreadyCompressed)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compress
|
||||
compressed, err := compressGzip(plaintext)
|
||||
if err != nil {
|
||||
logger.Warn("recompress: compress failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip if compression doesn't help
|
||||
if len(compressed) >= len(plaintext) {
|
||||
stats.AlreadyCompressed++
|
||||
return nil
|
||||
}
|
||||
|
||||
bytesSaved := int64(len(raw)) - int64(len(compressed))
|
||||
if s.key != nil {
|
||||
// After encryption the size changes; estimate savings from plaintext difference
|
||||
bytesSaved = int64(len(plaintext)) - int64(len(compressed))
|
||||
}
|
||||
|
||||
if dryRun {
|
||||
stats.Compressed++
|
||||
stats.BytesSaved += bytesSaved
|
||||
return nil
|
||||
}
|
||||
|
||||
// Encrypt compressed content
|
||||
toWrite := compressed
|
||||
if s.key != nil {
|
||||
enc, err := s.encrypt(compressed)
|
||||
if err != nil {
|
||||
logger.Warn("recompress: encrypt failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
toWrite = enc
|
||||
}
|
||||
|
||||
// Atomic write: temp file in same directory, then rename
|
||||
dir := filepath.Dir(path)
|
||||
tmp, err := os.CreateTemp(dir, ".recompress-*")
|
||||
if err != nil {
|
||||
logger.Warn("recompress: temp file failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
tmpName := tmp.Name()
|
||||
|
||||
if _, err := tmp.Write(toWrite); err != nil {
|
||||
_ = tmp.Close()
|
||||
_ = os.Remove(tmpName)
|
||||
logger.Warn("recompress: write temp failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
_ = os.Remove(tmpName)
|
||||
logger.Warn("recompress: close temp failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpName, path); err != nil {
|
||||
_ = os.Remove(tmpName)
|
||||
logger.Warn("recompress: rename failed", "id", id, "err", err)
|
||||
stats.Errors++
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update storage_objects record if DB available
|
||||
if s.db != nil {
|
||||
origSize := int64(len(plaintext))
|
||||
compSize := int64(len(toWrite))
|
||||
_, _ = s.db.Exec(ctx, `
|
||||
INSERT INTO storage_objects (storage_type, path, compression, size_original, size_compressed, checksum)
|
||||
VALUES ('filesystem', $1, 'gzip', $2, $3, $4)
|
||||
ON CONFLICT DO NOTHING
|
||||
`, path, origSize, compSize, id)
|
||||
// Also update emails.storage_id if the record exists but storage_id is null
|
||||
_, _ = s.db.Exec(ctx, `
|
||||
UPDATE emails SET storage_id = (
|
||||
SELECT id FROM storage_objects WHERE checksum = $1 LIMIT 1
|
||||
) WHERE id = $1 AND storage_id IS NULL
|
||||
`, id)
|
||||
}
|
||||
|
||||
stats.Compressed++
|
||||
stats.BytesSaved += bytesSaved
|
||||
|
||||
if stats.Total%500 == 0 {
|
||||
logger.Info("recompress: progress",
|
||||
"total", stats.Total,
|
||||
"compressed", stats.Compressed,
|
||||
"already_compressed", stats.AlreadyCompressed,
|
||||
"bytes_saved", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return stats, fmt.Errorf("recompress: walk: %w", err)
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
Reference in New Issue
Block a user