feat(PROJ-36): archivmail recompress — Nachkomprimierung bestehender Mails
Neuer CLI-Subcommand: archivmail recompress [--dry-run] Komprimiert alle unkomprimierten Dateien im Store atomisch (temp + rename). Überspringt bereits komprimierte Dateien (Magic-Byte 0x01). Aktualisiert storage_objects und emails.storage_id in der DB. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -284,6 +284,7 @@ Commands:
|
|||||||
import-piler Aus mailpiler migrieren (pilerexport oder direkte Store-Methode)
|
import-piler Aus mailpiler migrieren (pilerexport oder direkte Store-Methode)
|
||||||
export E-Mails exportieren (EML, MBOX)
|
export E-Mails exportieren (EML, MBOX)
|
||||||
reindex Index neu aufbauen (alle oder pro Mandant)
|
reindex Index neu aufbauen (alle oder pro Mandant)
|
||||||
|
recompress Bestehende Mails nachträglich gzip-komprimieren
|
||||||
version Version anzeigen
|
version Version anzeigen
|
||||||
help Diese Hilfe anzeigen
|
help Diese Hilfe anzeigen
|
||||||
|
|
||||||
@@ -322,5 +323,9 @@ archivmail export [flags]
|
|||||||
archivmail reindex [flags]
|
archivmail reindex [flags]
|
||||||
--config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml)
|
--config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml)
|
||||||
--tenant Mandanten-ID für partiellen Reindex (0 = alle)
|
--tenant Mandanten-ID für partiellen Reindex (0 = alle)
|
||||||
|
|
||||||
|
archivmail recompress [flags]
|
||||||
|
--config Pfad zur Konfigurationsdatei (Standard: /etc/archivmail/config.yml)
|
||||||
|
--dry-run Simulation: zeigt wie viel gespart würde, ohne Dateien zu ändern
|
||||||
`, AppVersion)
|
`, AppVersion)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/archivmail/config"
|
||||||
|
"github.com/archivmail/internal/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runRecompress walks the mail store and gzip-compresses any file that is not
|
||||||
|
// yet compressed. Files are replaced atomically (write to temp, then rename).
|
||||||
|
//
|
||||||
|
// Usage: archivmail recompress [--config path] [--dry-run]
|
||||||
|
func runRecompress(args []string) {
|
||||||
|
fset := flag.NewFlagSet("recompress", flag.ExitOnError)
|
||||||
|
configPath := fset.String("config", "/etc/archivmail/config.yml", "path to config file")
|
||||||
|
dryRun := fset.Bool("dry-run", false, "simulate without writing changes")
|
||||||
|
_ = fset.Parse(args)
|
||||||
|
|
||||||
|
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||||
|
|
||||||
|
cfg, err := config.Load(*configPath)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "error: load config: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
storeCfg := storage.Config{
|
||||||
|
Dir: cfg.Storage.StorePath,
|
||||||
|
Keyfile: cfg.Storage.Keyfile,
|
||||||
|
DSN: cfg.Database.DSN(),
|
||||||
|
CompressEnabled: true,
|
||||||
|
}
|
||||||
|
mailStore, err := storage.New(storeCfg)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer mailStore.Close()
|
||||||
|
|
||||||
|
if *dryRun {
|
||||||
|
logger.Info("recompress: DRY-RUN — keine Änderungen werden gespeichert")
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := mailStore.Recompress(context.Background(), *dryRun, logger)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "error: recompress: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("recompress: abgeschlossen",
|
||||||
|
"total", stats.Total,
|
||||||
|
"compressed", stats.Compressed,
|
||||||
|
"already_compressed", stats.AlreadyCompressed,
|
||||||
|
"skipped_errors", stats.Errors,
|
||||||
|
"bytes_saved_mb", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024),
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -57,6 +57,9 @@ func main() {
|
|||||||
case "reindex":
|
case "reindex":
|
||||||
runReindex(os.Args[2:])
|
runReindex(os.Args[2:])
|
||||||
return
|
return
|
||||||
|
case "recompress":
|
||||||
|
runRecompress(os.Args[2:])
|
||||||
|
return
|
||||||
case "version":
|
case "version":
|
||||||
fmt.Printf("archivmail %s\n", AppVersion)
|
fmt.Printf("archivmail %s\n", AppVersion)
|
||||||
for mod, ver := range Modules {
|
for mod, ver := range Modules {
|
||||||
|
|||||||
@@ -0,0 +1,180 @@
|
|||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RecompressStats holds counters from a Recompress run.
|
||||||
|
type RecompressStats struct {
|
||||||
|
Total int
|
||||||
|
Compressed int // files that were compressed in this run
|
||||||
|
AlreadyCompressed int // files already compressed, skipped
|
||||||
|
Errors int // files skipped due to errors
|
||||||
|
BytesSaved int64 // total bytes saved (original − compressed on disk)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recompress walks the store directory and gzip-compresses any mail file that
|
||||||
|
// is not yet compressed. Files are replaced atomically via a temp file + rename.
|
||||||
|
// Already-compressed files (magic byte 0x01) are skipped.
|
||||||
|
// If dryRun is true, no files are written but stats are calculated.
|
||||||
|
func (s *Store) Recompress(ctx context.Context, dryRun bool, logger *slog.Logger) (RecompressStats, error) {
|
||||||
|
storeDir := filepath.Join(s.dir, "store")
|
||||||
|
var stats RecompressStats
|
||||||
|
|
||||||
|
err := filepath.WalkDir(storeDir, func(path string, d fs.DirEntry, werr error) error {
|
||||||
|
if werr != nil {
|
||||||
|
return werr
|
||||||
|
}
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.Total++
|
||||||
|
id := d.Name()
|
||||||
|
|
||||||
|
// Read raw file bytes (encrypted or not)
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("recompress: read failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decrypt to get plaintext
|
||||||
|
plaintext := raw
|
||||||
|
if s.key != nil {
|
||||||
|
pt, err := s.decrypt(raw)
|
||||||
|
if err != nil {
|
||||||
|
// Pre-encryption file — treat raw bytes as plaintext
|
||||||
|
pt = raw
|
||||||
|
}
|
||||||
|
plaintext = pt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Already compressed?
|
||||||
|
if len(plaintext) > 0 && plaintext[0] == magicGzip {
|
||||||
|
stats.AlreadyCompressed++
|
||||||
|
if stats.AlreadyCompressed%1000 == 0 {
|
||||||
|
logger.Info("recompress: progress", "total", stats.Total, "already_compressed", stats.AlreadyCompressed)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress
|
||||||
|
compressed, err := compressGzip(plaintext)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("recompress: compress failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if compression doesn't help
|
||||||
|
if len(compressed) >= len(plaintext) {
|
||||||
|
stats.AlreadyCompressed++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bytesSaved := int64(len(raw)) - int64(len(compressed))
|
||||||
|
if s.key != nil {
|
||||||
|
// After encryption the size changes; estimate savings from plaintext difference
|
||||||
|
bytesSaved = int64(len(plaintext)) - int64(len(compressed))
|
||||||
|
}
|
||||||
|
|
||||||
|
if dryRun {
|
||||||
|
stats.Compressed++
|
||||||
|
stats.BytesSaved += bytesSaved
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encrypt compressed content
|
||||||
|
toWrite := compressed
|
||||||
|
if s.key != nil {
|
||||||
|
enc, err := s.encrypt(compressed)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("recompress: encrypt failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
toWrite = enc
|
||||||
|
}
|
||||||
|
|
||||||
|
// Atomic write: temp file in same directory, then rename
|
||||||
|
dir := filepath.Dir(path)
|
||||||
|
tmp, err := os.CreateTemp(dir, ".recompress-*")
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("recompress: temp file failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
tmpName := tmp.Name()
|
||||||
|
|
||||||
|
if _, err := tmp.Write(toWrite); err != nil {
|
||||||
|
_ = tmp.Close()
|
||||||
|
_ = os.Remove(tmpName)
|
||||||
|
logger.Warn("recompress: write temp failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := tmp.Close(); err != nil {
|
||||||
|
_ = os.Remove(tmpName)
|
||||||
|
logger.Warn("recompress: close temp failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Rename(tmpName, path); err != nil {
|
||||||
|
_ = os.Remove(tmpName)
|
||||||
|
logger.Warn("recompress: rename failed", "id", id, "err", err)
|
||||||
|
stats.Errors++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update storage_objects record if DB available
|
||||||
|
if s.db != nil {
|
||||||
|
origSize := int64(len(plaintext))
|
||||||
|
compSize := int64(len(toWrite))
|
||||||
|
_, _ = s.db.Exec(ctx, `
|
||||||
|
INSERT INTO storage_objects (storage_type, path, compression, size_original, size_compressed, checksum)
|
||||||
|
VALUES ('filesystem', $1, 'gzip', $2, $3, $4)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`, path, origSize, compSize, id)
|
||||||
|
// Also update emails.storage_id if the record exists but storage_id is null
|
||||||
|
_, _ = s.db.Exec(ctx, `
|
||||||
|
UPDATE emails SET storage_id = (
|
||||||
|
SELECT id FROM storage_objects WHERE checksum = $1 LIMIT 1
|
||||||
|
) WHERE id = $1 AND storage_id IS NULL
|
||||||
|
`, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.Compressed++
|
||||||
|
stats.BytesSaved += bytesSaved
|
||||||
|
|
||||||
|
if stats.Total%500 == 0 {
|
||||||
|
logger.Info("recompress: progress",
|
||||||
|
"total", stats.Total,
|
||||||
|
"compressed", stats.Compressed,
|
||||||
|
"already_compressed", stats.AlreadyCompressed,
|
||||||
|
"bytes_saved", fmt.Sprintf("%.1f MB", float64(stats.BytesSaved)/1024/1024),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return stats, fmt.Errorf("recompress: walk: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user