27d45f58e8
Sprint 1: Emails werden vor AES-256-GCM optional gzip-komprimiert (compress: true). Magic-Byte 0x01 als Prefix ermöglicht backward-kompatibles Load() für Legacy-Dateien. Neue DB-Tabelle storage_objects trackt Kompressions-Metadaten. Sprint 2: Attachments werden via SHA-256 dedupliziert — gleicher Anhang in N Mails wird nur einmal gespeichert. Neue Tabellen: attachments, email_attachments. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
110 lines
3.4 KiB
Go
110 lines
3.4 KiB
Go
package storage
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/archivmail/pkg/mailparser"
|
|
)
|
|
|
|
// saveAttachments deduplicates and stores attachments from a parsed email.
|
|
// Each unique attachment (by SHA-256 hash) is stored once on disk.
|
|
// email_attachments links attachments to their email record.
|
|
func (s *Store) saveAttachments(ctx context.Context, emailID string, pm *mailparser.ParsedMail) error {
|
|
if s.db == nil || len(pm.Attachments) == 0 {
|
|
return nil
|
|
}
|
|
|
|
for _, att := range pm.Attachments {
|
|
if len(att.Data) == 0 {
|
|
continue
|
|
}
|
|
|
|
sum := sha256.Sum256(att.Data)
|
|
hash := fmt.Sprintf("%x", sum[:])
|
|
|
|
// Check if this attachment is already stored
|
|
var attID int64
|
|
err := s.db.QueryRow(ctx, `SELECT id FROM attachments WHERE hash = $1`, hash).Scan(&attID)
|
|
if err != nil {
|
|
// Not found — compress and store
|
|
toWrite := att.Data
|
|
compression := "none"
|
|
if s.compressEnabled {
|
|
compressed, cerr := compressGzip(att.Data)
|
|
if cerr == nil && len(compressed) < len(att.Data) {
|
|
toWrite = compressed
|
|
compression = "gzip"
|
|
}
|
|
}
|
|
|
|
attPath := s.attachmentPath(hash)
|
|
if err := os.MkdirAll(filepath.Dir(attPath), 0o755); err != nil {
|
|
return fmt.Errorf("storage: attachment mkdir: %w", err)
|
|
}
|
|
if _, statErr := os.Stat(attPath); os.IsNotExist(statErr) {
|
|
if err := os.WriteFile(attPath, toWrite, 0o644); err != nil {
|
|
return fmt.Errorf("storage: attachment write: %w", err)
|
|
}
|
|
}
|
|
|
|
// Register in storage_objects
|
|
var soID int64
|
|
soErr := s.db.QueryRow(ctx, `
|
|
INSERT INTO storage_objects (storage_type, path, compression, size_original, size_compressed, checksum)
|
|
VALUES ('filesystem', $1, $2, $3, $4, $5)
|
|
RETURNING id
|
|
`, attPath, compression, int64(len(att.Data)), int64(len(toWrite)), hash).Scan(&soID)
|
|
|
|
// Insert attachment record
|
|
var insertErr error
|
|
if soErr == nil {
|
|
insertErr = s.db.QueryRow(ctx, `
|
|
INSERT INTO attachments (filename, mime_type, size_bytes, hash, storage_id)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
ON CONFLICT (hash) DO UPDATE SET hash = EXCLUDED.hash
|
|
RETURNING id
|
|
`, att.Filename, att.ContentType, int64(len(att.Data)), hash, soID).Scan(&attID)
|
|
} else {
|
|
insertErr = s.db.QueryRow(ctx, `
|
|
INSERT INTO attachments (filename, mime_type, size_bytes, hash)
|
|
VALUES ($1, $2, $3, $4)
|
|
ON CONFLICT (hash) DO UPDATE SET hash = EXCLUDED.hash
|
|
RETURNING id
|
|
`, att.Filename, att.ContentType, int64(len(att.Data)), hash).Scan(&attID)
|
|
}
|
|
if insertErr != nil {
|
|
continue // non-fatal: mail is saved, attachment linking is best-effort
|
|
}
|
|
}
|
|
|
|
// Link attachment to email
|
|
_, _ = s.db.Exec(ctx, `
|
|
INSERT INTO email_attachments (email_id, attachment_id)
|
|
VALUES ($1, $2)
|
|
ON CONFLICT DO NOTHING
|
|
`, emailID, attID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// LoadAttachment reads and decompresses an attachment by its SHA-256 hash.
|
|
func (s *Store) LoadAttachment(hash string) ([]byte, error) {
|
|
path := s.attachmentPath(hash)
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("storage: attachment not found: %s", hash)
|
|
}
|
|
return maybeDecompress(data)
|
|
}
|
|
|
|
// attachmentPath returns the on-disk path for a given attachment hash.
|
|
// Uses 2-level 2-char prefix sharding: {dir}/attachments/{ab}/{cd}/{hash}
|
|
func (s *Store) attachmentPath(hash string) string {
|
|
return filepath.Join(s.dir, "attachments", hash[:2], hash[2:4], hash)
|
|
}
|