package storage import ( "context" "crypto/sha256" "fmt" "os" "path/filepath" "archivmail/pkg/mailparser" ) // saveAttachments deduplicates and stores attachments from a parsed email. // Each unique attachment (by SHA-256 hash) is stored once on disk. // email_attachments links attachments to their email record. func (s *Store) saveAttachments(ctx context.Context, emailID string, pm *mailparser.ParsedMail) error { if s.db == nil || len(pm.Attachments) == 0 { return nil } for _, att := range pm.Attachments { if len(att.Data) == 0 { continue } sum := sha256.Sum256(att.Data) hash := fmt.Sprintf("%x", sum[:]) // Check if this attachment is already stored var attID int64 err := s.db.QueryRow(ctx, `SELECT id FROM attachments WHERE hash = $1`, hash).Scan(&attID) if err != nil { // Not found — compress and store toWrite := att.Data compression := "none" if s.compressEnabled { compressed, cerr := compressGzip(att.Data) if cerr == nil && len(compressed) < len(att.Data) { toWrite = compressed compression = "gzip" } } attPath := s.attachmentPath(hash) if err := os.MkdirAll(filepath.Dir(attPath), 0o755); err != nil { return fmt.Errorf("storage: attachment mkdir: %w", err) } if _, statErr := os.Stat(attPath); os.IsNotExist(statErr) { if err := os.WriteFile(attPath, toWrite, 0o644); err != nil { return fmt.Errorf("storage: attachment write: %w", err) } } // Register in storage_objects var soID int64 soErr := s.db.QueryRow(ctx, ` INSERT INTO storage_objects (storage_type, path, compression, size_original, size_compressed, checksum) VALUES ('filesystem', $1, $2, $3, $4, $5) RETURNING id `, attPath, compression, int64(len(att.Data)), int64(len(toWrite)), hash).Scan(&soID) // Insert attachment record var insertErr error if soErr == nil { insertErr = s.db.QueryRow(ctx, ` INSERT INTO attachments (filename, mime_type, size_bytes, hash, storage_id) VALUES ($1, $2, $3, $4, $5) ON CONFLICT (hash) DO UPDATE SET hash = EXCLUDED.hash RETURNING id `, att.Filename, att.ContentType, int64(len(att.Data)), hash, soID).Scan(&attID) } else { insertErr = s.db.QueryRow(ctx, ` INSERT INTO attachments (filename, mime_type, size_bytes, hash) VALUES ($1, $2, $3, $4) ON CONFLICT (hash) DO UPDATE SET hash = EXCLUDED.hash RETURNING id `, att.Filename, att.ContentType, int64(len(att.Data)), hash).Scan(&attID) } if insertErr != nil { continue // non-fatal: mail is saved, attachment linking is best-effort } } // Link attachment to email _, _ = s.db.Exec(ctx, ` INSERT INTO email_attachments (email_id, attachment_id) VALUES ($1, $2) ON CONFLICT DO NOTHING `, emailID, attID) } return nil } // LoadAttachment reads and decompresses an attachment by its SHA-256 hash. func (s *Store) LoadAttachment(hash string) ([]byte, error) { path := s.attachmentPath(hash) data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("storage: attachment not found: %s", hash) } return maybeDecompress(data) } // attachmentPath returns the on-disk path for a given attachment hash. // Uses 2-level 2-char prefix sharding: {dir}/attachments/{ab}/{cd}/{hash} func (s *Store) attachmentPath(hash string) string { return filepath.Join(s.dir, "attachments", hash[:2], hash[2:4], hash) }