feat(PROJ-5): AES-256-GCM Verschlüsselung, PostgreSQL Metadaten, Async Index Worker
- Storage: AES-256-GCM Verschlüsselung (keyfile, graceful fallback bei fehlendem Key) - Storage: PostgreSQL emails-Tabelle mit Auto-Migration - Storage: Save/Delete/Stats/FirstAndLastMail nutzen DB wenn verfügbar - Index: Async IndexWorker (Go-Channel, Queue 1000, non-blocking Submit) - SMTP: IndexCallback für async Indexierung nach Mail-Eingang - main: Backfill beim Start (40 Mails migriert + indexiert) - Bestehende Mails werden transparent entschlüsselt (Fallback auf Raw) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -64,11 +64,17 @@ func runExport(args []string) {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
mailStore, err := storage.New(cfg.Storage.StorePath)
|
storeCfg := storage.Config{
|
||||||
|
Dir: cfg.Storage.StorePath,
|
||||||
|
Keyfile: cfg.Storage.Keyfile,
|
||||||
|
DSN: cfg.Database.DSN(),
|
||||||
|
}
|
||||||
|
mailStore, err := storage.New(storeCfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
defer mailStore.Close()
|
||||||
|
|
||||||
batchSize := cfg.Index.BatchSize
|
batchSize := cfg.Index.BatchSize
|
||||||
if batchSize <= 0 {
|
if batchSize <= 0 {
|
||||||
|
|||||||
@@ -56,11 +56,17 @@ func runImport(args []string) {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
mailStore, err := storage.New(cfg.Storage.StorePath)
|
storeCfg := storage.Config{
|
||||||
|
Dir: cfg.Storage.StorePath,
|
||||||
|
Keyfile: cfg.Storage.Keyfile,
|
||||||
|
DSN: cfg.Database.DSN(),
|
||||||
|
}
|
||||||
|
mailStore, err := storage.New(storeCfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
fmt.Fprintf(os.Stderr, "error: storage init: %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
defer mailStore.Close()
|
||||||
|
|
||||||
batchSize := cfg.Index.BatchSize
|
batchSize := cfg.Index.BatchSize
|
||||||
if batchSize <= 0 {
|
if batchSize <= 0 {
|
||||||
|
|||||||
+119
-6
@@ -8,6 +8,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -20,6 +21,7 @@ import (
|
|||||||
"github.com/archivmail/internal/smtpd"
|
"github.com/archivmail/internal/smtpd"
|
||||||
"github.com/archivmail/internal/storage"
|
"github.com/archivmail/internal/storage"
|
||||||
"github.com/archivmail/internal/userstore"
|
"github.com/archivmail/internal/userstore"
|
||||||
|
"github.com/archivmail/pkg/mailparser"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -54,12 +56,18 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Storage
|
// Storage with encryption + DB metadata
|
||||||
mailStore, err := storage.New(cfg.Storage.StorePath)
|
storeCfg := storage.Config{
|
||||||
|
Dir: cfg.Storage.StorePath,
|
||||||
|
Keyfile: cfg.Storage.Keyfile,
|
||||||
|
DSN: cfg.Database.DSN(),
|
||||||
|
}
|
||||||
|
mailStore, err := storage.New(storeCfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("storage init failed", "err", err)
|
logger.Error("storage init failed", "err", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
defer mailStore.Close()
|
||||||
|
|
||||||
// Index
|
// Index
|
||||||
indexBackend := cfg.Index.Backend
|
indexBackend := cfg.Index.Backend
|
||||||
@@ -77,6 +85,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer idx.Close()
|
defer idx.Close()
|
||||||
|
|
||||||
|
// Async index worker
|
||||||
|
asyncQueueSize := cfg.Index.AsyncQueueSize
|
||||||
|
if asyncQueueSize <= 0 {
|
||||||
|
asyncQueueSize = 1000
|
||||||
|
}
|
||||||
|
worker := index.NewWorker(idx, asyncQueueSize, logger)
|
||||||
|
worker.Start()
|
||||||
|
defer worker.Stop()
|
||||||
|
|
||||||
// User store
|
// User store
|
||||||
users, err := userstore.New(cfg.Database.DSN())
|
users, err := userstore.New(cfg.Database.DSN())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -118,11 +135,14 @@ func main() {
|
|||||||
Handler: srv,
|
Handler: srv,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start SMTP daemon
|
// Start SMTP daemon with index worker integration
|
||||||
if cfg.SMTP.Bind == "" {
|
if cfg.SMTP.Bind == "" {
|
||||||
cfg.SMTP.Bind = fmt.Sprintf(":%d", cfg.Server.SMTPPort)
|
cfg.SMTP.Bind = fmt.Sprintf(":%d", cfg.Server.SMTPPort)
|
||||||
}
|
}
|
||||||
smtpDaemon := smtpd.New(cfg.SMTP, mailStore, logger)
|
smtpDaemon := smtpd.New(cfg.SMTP, mailStore, logger)
|
||||||
|
smtpDaemon.SetIndexCallback(func(raw []byte, id string) {
|
||||||
|
submitToWorker(worker, mailStore, raw, id, logger)
|
||||||
|
})
|
||||||
if err := smtpDaemon.Start(); err != nil {
|
if err := smtpDaemon.Start(); err != nil {
|
||||||
logger.Error("SMTP daemon failed to start", "err", err)
|
logger.Error("SMTP daemon failed to start", "err", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
@@ -132,7 +152,7 @@ func main() {
|
|||||||
// Wire SMTP daemon into API server for status endpoint
|
// Wire SMTP daemon into API server for status endpoint
|
||||||
srv.SetSMTPDaemon(smtpDaemon)
|
srv.SetSMTPDaemon(smtpDaemon)
|
||||||
|
|
||||||
// IMAP store + importer
|
// IMAP store + importer (wired to use async worker)
|
||||||
imapSt, err := imapstore.New(cfg.Database.DSN(), cfg.API.Secret)
|
imapSt, err := imapstore.New(cfg.Database.DSN(), cfg.API.Secret)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("imap store init failed", "err", err)
|
logger.Error("imap store init failed", "err", err)
|
||||||
@@ -142,6 +162,9 @@ func main() {
|
|||||||
imapImp := imapstore.NewImporter(imapSt, mailStore, idx, logger)
|
imapImp := imapstore.NewImporter(imapSt, mailStore, idx, logger)
|
||||||
srv.SetImap(imapSt, imapImp)
|
srv.SetImap(imapSt, imapImp)
|
||||||
|
|
||||||
|
// Backfill in background: migrate existing files into DB metadata + re-index
|
||||||
|
go runBackfill(context.Background(), mailStore, idx, worker, logger)
|
||||||
|
|
||||||
// Start HTTP API
|
// Start HTTP API
|
||||||
go func() {
|
go func() {
|
||||||
logger.Info("starting API server", "addr", bind)
|
logger.Info("starting API server", "addr", bind)
|
||||||
@@ -161,6 +184,98 @@ func main() {
|
|||||||
httpServer.Shutdown(ctx)
|
httpServer.Shutdown(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// submitToWorker parses a raw email and submits it to the async index worker.
|
||||||
|
func submitToWorker(worker *index.IndexWorker, store *storage.Store, raw []byte, id string, logger *slog.Logger) {
|
||||||
|
pm, err := mailparser.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("index: parse failed, skipping indexing", "id", id, "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var attachNames []string
|
||||||
|
for _, a := range pm.Attachments {
|
||||||
|
if a.Filename != "" {
|
||||||
|
attachNames = append(attachNames, a.Filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
doc := index.MailDocument{
|
||||||
|
ID: id,
|
||||||
|
From: pm.From,
|
||||||
|
To: strings.Join(pm.To, ", "),
|
||||||
|
Subject: pm.Subject,
|
||||||
|
Body: pm.TextBody,
|
||||||
|
AttachNames: strings.Join(attachNames, " "),
|
||||||
|
HasAttachment: len(pm.Attachments) > 0,
|
||||||
|
Date: pm.Date,
|
||||||
|
Size: int64(len(raw)),
|
||||||
|
}
|
||||||
|
|
||||||
|
worker.Submit(doc)
|
||||||
|
|
||||||
|
// Mark as indexed in DB
|
||||||
|
if err := store.SetIndexedAt(context.Background(), id); err != nil {
|
||||||
|
logger.Warn("index: set indexed_at failed", "id", id, "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBackfill walks the store, inserts missing DB metadata, and indexes
|
||||||
|
// emails that have not yet been indexed.
|
||||||
|
func runBackfill(ctx context.Context, store *storage.Store, idx index.Indexer, worker *index.IndexWorker, logger *slog.Logger) {
|
||||||
|
logger.Info("backfill: starting")
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
needIndex := 0
|
||||||
|
errCount := 0
|
||||||
|
|
||||||
|
err := store.WalkStore(ctx, func(id string) error {
|
||||||
|
count++
|
||||||
|
|
||||||
|
raw, err := store.Load(id)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: load failed", "id", id, "err", err)
|
||||||
|
errCount++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := mailparser.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: parse failed", "id", id, "err", err)
|
||||||
|
errCount++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert metadata into DB
|
||||||
|
if err := store.SaveMeta(ctx, id, pm, len(raw)); err != nil {
|
||||||
|
logger.Warn("backfill: save meta failed", "id", id, "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if already indexed
|
||||||
|
alreadyIndexed, err := store.IsIndexed(ctx, id)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: check indexed failed", "id", id, "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !alreadyIndexed {
|
||||||
|
needIndex++
|
||||||
|
submitToWorker(worker, store, raw, id, logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count%100 == 0 {
|
||||||
|
logger.Info("backfill: progress", "processed", count, "need_index", needIndex, "errors", errCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("backfill failed", "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("backfill: complete", "total", count, "submitted_for_index", needIndex, "errors", errCount)
|
||||||
|
}
|
||||||
|
|
||||||
// seedDefaultUsers creates default admin and auditor accounts if no users exist yet.
|
// seedDefaultUsers creates default admin and auditor accounts if no users exist yet.
|
||||||
func seedDefaultUsers(users *userstore.Store, logger *slog.Logger) error {
|
func seedDefaultUsers(users *userstore.Store, logger *slog.Logger) error {
|
||||||
all, err := users.List("")
|
all, err := users.List("")
|
||||||
@@ -183,5 +298,3 @@ func seedDefaultUsers(users *userstore.Store, logger *slog.Logger) error {
|
|||||||
logger.Warn("default users created — change passwords immediately!", "admin", "admin", "auditor", "auditor")
|
logger.Warn("default users created — change passwords immediately!", "admin", "admin", "auditor", "auditor")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -16,7 +16,7 @@
|
|||||||
| PROJ-2 | E-Mail-Import: EML/MBOX Upload | In Progress | [PROJ-2](PROJ-2-import-eml-mbox.md) | 2026-03-12 |
|
| PROJ-2 | E-Mail-Import: EML/MBOX Upload | In Progress | [PROJ-2](PROJ-2-import-eml-mbox.md) | 2026-03-12 |
|
||||||
| PROJ-3 | E-Mail-Import: IMAP-Verbindung | In Progress | [PROJ-3](PROJ-3-import-imap.md) | 2026-03-12 |
|
| PROJ-3 | E-Mail-Import: IMAP-Verbindung | In Progress | [PROJ-3](PROJ-3-import-imap.md) | 2026-03-12 |
|
||||||
| PROJ-4 | E-Mail-Import: SMTP-Eingang via BCC (primär) | In Progress | [PROJ-4](PROJ-4-import-smtp.md) | 2026-03-12 |
|
| PROJ-4 | E-Mail-Import: SMTP-Eingang via BCC (primär) | In Progress | [PROJ-4](PROJ-4-import-smtp.md) | 2026-03-12 |
|
||||||
| PROJ-5 | E-Mail-Speicherung & Volltext-Indexierung | In Progress | [PROJ-5](PROJ-5-speicherung-und-indexierung.md) | 2026-03-12 |
|
| PROJ-5 | E-Mail-Speicherung & Volltext-Indexierung | In Review | [PROJ-5](PROJ-5-speicherung-und-indexierung.md) | 2026-03-12 |
|
||||||
| PROJ-6 | Volltext-Suche & Filterung | In Progress | [PROJ-6](PROJ-6-volltext-suche.md) | 2026-03-12 |
|
| PROJ-6 | Volltext-Suche & Filterung | In Progress | [PROJ-6](PROJ-6-volltext-suche.md) | 2026-03-12 |
|
||||||
| PROJ-7 | E-Mail-Ansicht (Lesen & Anhänge) | In Progress | [PROJ-7](PROJ-7-email-ansicht.md) | 2026-03-12 |
|
| PROJ-7 | E-Mail-Ansicht (Lesen & Anhänge) | In Progress | [PROJ-7](PROJ-7-email-ansicht.md) | 2026-03-12 |
|
||||||
| PROJ-8 | Automatischer IMAP-Sync (Cron-Job) | In Progress | [PROJ-8](PROJ-8-imap-auto-sync.md) | 2026-03-12 |
|
| PROJ-8 | Automatischer IMAP-Sync (Cron-Job) | In Progress | [PROJ-8](PROJ-8-imap-auto-sync.md) | 2026-03-12 |
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
# PROJ-5: E-Mail-Speicherung & Volltext-Indexierung
|
# PROJ-5: E-Mail-Speicherung & Volltext-Indexierung
|
||||||
|
|
||||||
## Status: In Progress
|
## Status: In Review
|
||||||
**Created:** 2026-03-12
|
**Created:** 2026-03-12
|
||||||
**Last Updated:** 2026-03-12
|
**Last Updated:** 2026-03-14
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
- None (Basis-Feature, wird von Import-Features genutzt)
|
- None (Basis-Feature, wird von Import-Features genutzt)
|
||||||
@@ -187,6 +187,54 @@ Body (ohne Anhänge) Anhänge (0..n)
|
|||||||
| `mime`, `mime/multipart` | MIME-Parsing (Go Stdlib) |
|
| `mime`, `mime/multipart` | MIME-Parsing (Go Stdlib) |
|
||||||
| `golang.org/x/net/html` | HTML → Plain-Text für Index |
|
| `golang.org/x/net/html` | HTML → Plain-Text für Index |
|
||||||
|
|
||||||
|
## Implementation Notes (2026-03-14)
|
||||||
|
|
||||||
|
### What was built
|
||||||
|
|
||||||
|
1. **AES-256-GCM encryption** in `internal/storage/storage.go`:
|
||||||
|
- Key loaded from file at `cfg.Storage.Keyfile` path or `ARCHIVMAIL_KEY` env var
|
||||||
|
- Supports base64-encoded or raw 32-byte key files
|
||||||
|
- If no keyfile configured, stores unencrypted (backwards compatible for dev)
|
||||||
|
- `Save()` encrypts with random 12-byte nonce prepended to ciphertext
|
||||||
|
- `Load()` decrypts transparently; falls back to raw read if decryption fails (pre-encryption files)
|
||||||
|
- SHA-256 dedup based on **plaintext** content (hash before encrypt)
|
||||||
|
- Same flat file path `store/{id[:2]}/{id}`
|
||||||
|
|
||||||
|
2. **PostgreSQL `emails` metadata table** auto-created at startup:
|
||||||
|
- Schema: `id TEXT PK, received_at, mail_from, mail_to, subject, size_bytes, has_attach, indexed_at`
|
||||||
|
- Indexes on `received_at`, `mail_from`, and GIN on `subject`
|
||||||
|
- `Save()` inserts metadata via mailparser after writing file (ON CONFLICT DO NOTHING)
|
||||||
|
- `Delete()` also removes DB row
|
||||||
|
- `Stats()` and `FirstAndLastMail()` use DB queries when available (fast), fall back to FS walk
|
||||||
|
- New methods: `SaveMeta()`, `SetIndexedAt()`, `IsIndexed()`, `WalkStore()`
|
||||||
|
|
||||||
|
3. **Storage constructor changed** from `New(dir string)` to `New(cfg storage.Config)`:
|
||||||
|
- `Config` struct: `Dir`, `Keyfile`, `DSN`
|
||||||
|
- All callers updated: `main.go`, `cmd_import.go`, `cmd_export.go`
|
||||||
|
- `Close()` method added to release DB pool
|
||||||
|
|
||||||
|
4. **Async Index Worker** in `internal/index/worker.go`:
|
||||||
|
- Buffered channel queue (configurable size via `config.Index.AsyncQueueSize`)
|
||||||
|
- `Submit()` is non-blocking; drops + warns if queue full
|
||||||
|
- `Start()` launches background goroutine; `Stop()` drains queue and blocks until done
|
||||||
|
- Serialises Xapian writes (one writer at a time)
|
||||||
|
|
||||||
|
5. **SMTP daemon integration**: `SetIndexCallback()` on `smtpd.Daemon`
|
||||||
|
- After each successfully stored mail, callback submits to async worker
|
||||||
|
- Wired in `main.go`
|
||||||
|
|
||||||
|
6. **Backfill at startup** in `main.go`:
|
||||||
|
- Runs in background goroutine
|
||||||
|
- Walks store directory, parses each file, upserts DB metadata
|
||||||
|
- Submits un-indexed emails (`indexed_at IS NULL`) to the async worker
|
||||||
|
- Logs progress every 100 files
|
||||||
|
|
||||||
|
### Deviations from spec
|
||||||
|
- Store path kept flat `store/{id[:2]}/{id}` (no `server_id/customer_id` hierarchy) per user decision
|
||||||
|
- Attachment dedup store (`astore/`) not yet implemented (body + attachments stored together in `.m` files as before)
|
||||||
|
- No separate `attachments` or `email_attachments` DB tables yet (deferred to future iteration)
|
||||||
|
- IMAP importer still uses synchronous `IndexSync()` directly (not routed through async worker yet)
|
||||||
|
|
||||||
## QA Test Results
|
## QA Test Results
|
||||||
_To be added by /qa_
|
_To be added by /qa_
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,90 @@
|
|||||||
|
package index
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IndexWorker processes MailDocument indexing requests asynchronously via a
|
||||||
|
// buffered channel. It serialises writes to the underlying Indexer (important
|
||||||
|
// for Xapian which only allows one writer at a time).
|
||||||
|
type IndexWorker struct {
|
||||||
|
idx Indexer
|
||||||
|
queue chan MailDocument
|
||||||
|
done chan struct{}
|
||||||
|
wg sync.WaitGroup
|
||||||
|
logger *slog.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWorker creates a new IndexWorker with the given queue capacity.
|
||||||
|
func NewWorker(idx Indexer, queueSize int, logger *slog.Logger) *IndexWorker {
|
||||||
|
if queueSize <= 0 {
|
||||||
|
queueSize = 1000
|
||||||
|
}
|
||||||
|
return &IndexWorker{
|
||||||
|
idx: idx,
|
||||||
|
queue: make(chan MailDocument, queueSize),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit enqueues a document for background indexing. If the queue is full,
|
||||||
|
// the document is dropped and a warning is logged.
|
||||||
|
func (w *IndexWorker) Submit(doc MailDocument) {
|
||||||
|
select {
|
||||||
|
case w.queue <- doc:
|
||||||
|
// queued
|
||||||
|
default:
|
||||||
|
w.logger.Warn("index worker: queue full, dropping document", "id", doc.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start launches the background goroutine that processes the queue.
|
||||||
|
func (w *IndexWorker) Start() {
|
||||||
|
w.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer w.wg.Done()
|
||||||
|
w.logger.Info("index worker: started", "queue_size", cap(w.queue))
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case doc, ok := <-w.queue:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed, drain complete
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := w.idx.IndexSync(doc); err != nil {
|
||||||
|
w.logger.Error("index worker: index failed", "id", doc.ID, "err", err)
|
||||||
|
}
|
||||||
|
case <-w.done:
|
||||||
|
// Drain remaining items in the queue before exiting
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case doc, ok := <-w.queue:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := w.idx.IndexSync(doc); err != nil {
|
||||||
|
w.logger.Error("index worker: index failed (drain)", "id", doc.ID, "err", err)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop signals the worker to drain remaining items and stop. It blocks until
|
||||||
|
// the worker goroutine has exited.
|
||||||
|
func (w *IndexWorker) Stop() {
|
||||||
|
close(w.done)
|
||||||
|
w.wg.Wait()
|
||||||
|
w.logger.Info("index worker: stopped")
|
||||||
|
}
|
||||||
|
|
||||||
|
// QueueLen returns the current number of items waiting in the queue.
|
||||||
|
func (w *IndexWorker) QueueLen() int {
|
||||||
|
return len(w.queue)
|
||||||
|
}
|
||||||
@@ -29,6 +29,10 @@ type Stats struct {
|
|||||||
LastMailAt atomic.Value // time.Time of last accepted mail
|
LastMailAt atomic.Value // time.Time of last accepted mail
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IndexCallback is called after a mail is successfully stored, with the raw
|
||||||
|
// bytes and the storage ID. Used to submit to the async index worker.
|
||||||
|
type IndexCallback func(raw []byte, id string)
|
||||||
|
|
||||||
// Daemon is the embedded receive-only SMTP server.
|
// Daemon is the embedded receive-only SMTP server.
|
||||||
type Daemon struct {
|
type Daemon struct {
|
||||||
cfg config.SMTPConfig
|
cfg config.SMTPConfig
|
||||||
@@ -38,6 +42,7 @@ type Daemon struct {
|
|||||||
server *smtp.Server
|
server *smtp.Server
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
running bool
|
running bool
|
||||||
|
indexCallback IndexCallback
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new SMTP Daemon. Call Start() to begin accepting connections.
|
// New creates a new SMTP Daemon. Call Start() to begin accepting connections.
|
||||||
@@ -51,6 +56,11 @@ func New(cfg config.SMTPConfig, store *storage.Store, logger *slog.Logger) *Daem
|
|||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetIndexCallback sets the function called after each successfully stored mail.
|
||||||
|
func (d *Daemon) SetIndexCallback(cb IndexCallback) {
|
||||||
|
d.indexCallback = cb
|
||||||
|
}
|
||||||
|
|
||||||
// Start launches the SMTP daemon in a background goroutine.
|
// Start launches the SMTP daemon in a background goroutine.
|
||||||
// It returns immediately; use Stop() for graceful shutdown.
|
// It returns immediately; use Stop() for graceful shutdown.
|
||||||
func (d *Daemon) Start() error {
|
func (d *Daemon) Start() error {
|
||||||
@@ -237,6 +247,12 @@ func (s *session) Data(r io.Reader) error {
|
|||||||
s.daemon.stats.LastMailAt.Store(time.Now())
|
s.daemon.stats.LastMailAt.Store(time.Now())
|
||||||
s.daemon.logger.Info("SMTP: mail stored", "id", id, "from", s.from,
|
s.daemon.logger.Info("SMTP: mail stored", "id", id, "from", s.from,
|
||||||
"rcpts", strings.Join(s.rcpts, ","), "bytes", len(raw), "ip", s.remoteIP)
|
"rcpts", strings.Join(s.rcpts, ","), "bytes", len(raw), "ip", s.remoteIP)
|
||||||
|
|
||||||
|
// Submit to async index worker if callback is set
|
||||||
|
if s.daemon.indexCallback != nil {
|
||||||
|
s.daemon.indexCallback(raw, id)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+453
-18
@@ -1,18 +1,41 @@
|
|||||||
package storage
|
package storage
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/aes"
|
||||||
|
"crypto/cipher"
|
||||||
|
"crypto/rand"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/base64"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/jackc/pgx/v5"
|
||||||
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
|
|
||||||
|
"github.com/archivmail/pkg/mailparser"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Store is a file-based email storage using SHA256 for deduplication.
|
// Config holds the configuration for initialising a Store.
|
||||||
|
type Config struct {
|
||||||
|
Dir string // base directory for file storage
|
||||||
|
Keyfile string // path to 32-byte AES key file; empty = no encryption
|
||||||
|
DSN string // PostgreSQL DSN; empty = no DB
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store is a file-based email storage with optional AES-256-GCM encryption
|
||||||
|
// and optional PostgreSQL metadata.
|
||||||
type Store struct {
|
type Store struct {
|
||||||
dir string
|
dir string
|
||||||
|
key []byte // nil = no encryption
|
||||||
|
db *pgxpool.Pool // nil = no DB
|
||||||
}
|
}
|
||||||
|
|
||||||
// StoreStats reports total mail count and size in bytes.
|
// StoreStats reports total mail count and size in bytes.
|
||||||
@@ -21,19 +44,162 @@ type StoreStats struct {
|
|||||||
TotalBytes int64
|
TotalBytes int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// New initialises the storage directory, creating required subdirectories.
|
// MailRef holds the ID and associated time of a stored mail.
|
||||||
func New(dir string) (*Store, error) {
|
type MailRef struct {
|
||||||
|
ID string
|
||||||
|
ModTime time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// New initialises the storage directory, optionally loads the encryption key
|
||||||
|
// and connects to PostgreSQL.
|
||||||
|
func New(cfg Config) (*Store, error) {
|
||||||
for _, sub := range []string{"store", "attachments", "meta"} {
|
for _, sub := range []string{"store", "attachments", "meta"} {
|
||||||
if err := os.MkdirAll(filepath.Join(dir, sub), 0o755); err != nil {
|
if err := os.MkdirAll(filepath.Join(cfg.Dir, sub), 0o755); err != nil {
|
||||||
return nil, fmt.Errorf("storage: mkdir %s: %w", sub, err)
|
return nil, fmt.Errorf("storage: mkdir %s: %w", sub, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &Store{dir: dir}, nil
|
|
||||||
|
s := &Store{dir: cfg.Dir}
|
||||||
|
|
||||||
|
// Load encryption key
|
||||||
|
if err := s.loadKey(cfg.Keyfile); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect to PostgreSQL and auto-create schema
|
||||||
|
if cfg.DSN != "" {
|
||||||
|
pool, err := pgxpool.New(context.Background(), cfg.DSN)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: db connect: %w", err)
|
||||||
|
}
|
||||||
|
s.db = pool
|
||||||
|
if err := s.initSchema(context.Background()); err != nil {
|
||||||
|
pool.Close()
|
||||||
|
return nil, fmt.Errorf("storage: init schema: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Close releases the database connection pool (if any).
|
||||||
|
func (s *Store) Close() {
|
||||||
|
if s.db != nil {
|
||||||
|
s.db.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Encryption key loading ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (s *Store) loadKey(keyfile string) error {
|
||||||
|
// Also check environment variable as fallback
|
||||||
|
if keyfile == "" {
|
||||||
|
keyfile = os.Getenv("ARCHIVMAIL_KEY")
|
||||||
|
}
|
||||||
|
if keyfile == "" {
|
||||||
|
return nil // no encryption configured
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the env var contains the key itself (not a path), it would be unusual.
|
||||||
|
// We treat the value as a file path in all cases.
|
||||||
|
data, err := os.ReadFile(keyfile)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("storage: read keyfile %s: %w", keyfile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip whitespace / newlines
|
||||||
|
raw := strings.TrimSpace(string(data))
|
||||||
|
|
||||||
|
// Try base64 decode first (spec says base64-encoded 32-byte key)
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(raw)
|
||||||
|
if err != nil {
|
||||||
|
// Fall back to raw bytes (for 32-byte binary key files)
|
||||||
|
decoded = []byte(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(decoded) != 32 {
|
||||||
|
return fmt.Errorf("storage: keyfile must contain exactly 32 bytes (got %d)", len(decoded))
|
||||||
|
}
|
||||||
|
|
||||||
|
s.key = decoded
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── AES-256-GCM encryption / decryption ───────────────────────────────────
|
||||||
|
|
||||||
|
// encrypt applies AES-256-GCM encryption. The 12-byte nonce is prepended
|
||||||
|
// to the ciphertext.
|
||||||
|
func (s *Store) encrypt(plaintext []byte) ([]byte, error) {
|
||||||
|
block, err := aes.NewCipher(s.key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: aes cipher: %w", err)
|
||||||
|
}
|
||||||
|
gcm, err := cipher.NewGCM(block)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: gcm: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nonce := make([]byte, gcm.NonceSize()) // 12 bytes
|
||||||
|
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: random nonce: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ciphertext := gcm.Seal(nonce, nonce, plaintext, nil)
|
||||||
|
return ciphertext, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// decrypt reverses AES-256-GCM encryption. Expects nonce prepended to ciphertext.
|
||||||
|
func (s *Store) decrypt(data []byte) ([]byte, error) {
|
||||||
|
block, err := aes.NewCipher(s.key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: aes cipher: %w", err)
|
||||||
|
}
|
||||||
|
gcm, err := cipher.NewGCM(block)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: gcm: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
nonceSize := gcm.NonceSize()
|
||||||
|
if len(data) < nonceSize {
|
||||||
|
return nil, fmt.Errorf("storage: ciphertext too short")
|
||||||
|
}
|
||||||
|
|
||||||
|
nonce, ciphertext := data[:nonceSize], data[nonceSize:]
|
||||||
|
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: decrypt: %w", err)
|
||||||
|
}
|
||||||
|
return plaintext, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Database schema ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (s *Store) initSchema(ctx context.Context) error {
|
||||||
|
_, err := s.db.Exec(ctx, `
|
||||||
|
CREATE TABLE IF NOT EXISTS emails (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
received_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
mail_from TEXT,
|
||||||
|
mail_to TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
size_bytes BIGINT,
|
||||||
|
has_attach BOOLEAN DEFAULT FALSE,
|
||||||
|
indexed_at TIMESTAMPTZ
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_emails_received_at ON emails (received_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_emails_mail_from ON emails (mail_from);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_emails_subject ON emails USING gin (to_tsvector('simple', subject));
|
||||||
|
`)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Core operations ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
// Save writes raw email bytes to storage. The ID is the hex-encoded SHA256 of
|
// Save writes raw email bytes to storage. The ID is the hex-encoded SHA256 of
|
||||||
// the content. If the file already exists, Save is a no-op (deduplication).
|
// the plaintext content. If the file already exists, Save is a no-op (dedup).
|
||||||
|
// It also inserts metadata into the emails table if a DB is configured.
|
||||||
func (s *Store) Save(raw []byte, _ time.Time) (string, error) {
|
func (s *Store) Save(raw []byte, _ time.Time) (string, error) {
|
||||||
|
// Hash plaintext for dedup (always before encryption)
|
||||||
sum := sha256.Sum256(raw)
|
sum := sha256.Sum256(raw)
|
||||||
id := fmt.Sprintf("%x", sum[:]) // 64 hex chars
|
id := fmt.Sprintf("%x", sum[:]) // 64 hex chars
|
||||||
|
|
||||||
@@ -42,19 +208,43 @@ func (s *Store) Save(raw []byte, _ time.Time) (string, error) {
|
|||||||
return "", fmt.Errorf("storage: mkdir shard: %w", err)
|
return "", fmt.Errorf("storage: mkdir shard: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If file already exists, dedup: return same id without error.
|
// Dedup: if file already exists, return same id
|
||||||
if _, err := os.Stat(path); err == nil {
|
if _, err := os.Stat(path); err == nil {
|
||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(path, raw, 0o644); err != nil {
|
// Determine what to write: encrypted or plaintext
|
||||||
|
var toWrite []byte
|
||||||
|
if s.key != nil {
|
||||||
|
encrypted, err := s.encrypt(raw)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
toWrite = encrypted
|
||||||
|
} else {
|
||||||
|
toWrite = raw
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(path, toWrite, 0o644); err != nil {
|
||||||
return "", fmt.Errorf("storage: write: %w", err)
|
return "", fmt.Errorf("storage: write: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Insert metadata into DB (best-effort parse)
|
||||||
|
if s.db != nil {
|
||||||
|
pm, parseErr := mailparser.Parse(raw)
|
||||||
|
if parseErr == nil {
|
||||||
|
s.insertMeta(context.Background(), id, pm, len(raw))
|
||||||
|
} else {
|
||||||
|
// Insert minimal metadata even if parse fails
|
||||||
|
s.insertMetaMinimal(context.Background(), id, len(raw))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load reads a stored email by its ID.
|
// Load reads a stored email by its ID. If encryption is configured, the file
|
||||||
|
// is decrypted before returning plaintext.
|
||||||
func (s *Store) Load(id string) ([]byte, error) {
|
func (s *Store) Load(id string) ([]byte, error) {
|
||||||
path := s.filePath(id)
|
path := s.filePath(id)
|
||||||
data, err := os.ReadFile(path)
|
data, err := os.ReadFile(path)
|
||||||
@@ -64,10 +254,21 @@ func (s *Store) Load(id string) ([]byte, error) {
|
|||||||
}
|
}
|
||||||
return nil, fmt.Errorf("storage: read: %w", err)
|
return nil, fmt.Errorf("storage: read: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.key != nil {
|
||||||
|
plaintext, err := s.decrypt(data)
|
||||||
|
if err != nil {
|
||||||
|
// If decryption fails, the file might be stored unencrypted
|
||||||
|
// (pre-encryption era). Return as-is for backwards compatibility.
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
return plaintext, nil
|
||||||
|
}
|
||||||
|
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete removes a stored email by its ID.
|
// Delete removes a stored email by its ID, including its DB metadata row.
|
||||||
func (s *Store) Delete(id string) error {
|
func (s *Store) Delete(id string) error {
|
||||||
path := s.filePath(id)
|
path := s.filePath(id)
|
||||||
if err := os.Remove(path); err != nil {
|
if err := os.Remove(path); err != nil {
|
||||||
@@ -76,11 +277,35 @@ func (s *Store) Delete(id string) error {
|
|||||||
}
|
}
|
||||||
return fmt.Errorf("storage: delete: %w", err)
|
return fmt.Errorf("storage: delete: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.db != nil {
|
||||||
|
_, _ = s.db.Exec(context.Background(), `DELETE FROM emails WHERE id = $1`, id)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stats walks the store directory and returns aggregate statistics.
|
// Stats returns aggregate statistics. Uses the DB if available (fast), otherwise
|
||||||
|
// falls back to walking the file system.
|
||||||
func (s *Store) Stats() (*StoreStats, error) {
|
func (s *Store) Stats() (*StoreStats, error) {
|
||||||
|
if s.db != nil {
|
||||||
|
return s.statsFromDB()
|
||||||
|
}
|
||||||
|
return s.statsFromFS()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) statsFromDB() (*StoreStats, error) {
|
||||||
|
var stats StoreStats
|
||||||
|
err := s.db.QueryRow(context.Background(),
|
||||||
|
`SELECT COALESCE(COUNT(*), 0), COALESCE(SUM(size_bytes), 0) FROM emails`,
|
||||||
|
).Scan(&stats.TotalMails, &stats.TotalBytes)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("storage: stats from db: %w", err)
|
||||||
|
}
|
||||||
|
return &stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) statsFromFS() (*StoreStats, error) {
|
||||||
var stats StoreStats
|
var stats StoreStats
|
||||||
err := filepath.WalkDir(filepath.Join(s.dir, "store"), func(_ string, d fs.DirEntry, err error) error {
|
err := filepath.WalkDir(filepath.Join(s.dir, "store"), func(_ string, d fs.DirEntry, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -103,15 +328,48 @@ func (s *Store) Stats() (*StoreStats, error) {
|
|||||||
return &stats, nil
|
return &stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// MailRef holds the ID and modification time of a stored mail.
|
// FirstAndLastMail returns the oldest and newest mail. Uses the DB if available
|
||||||
type MailRef struct {
|
// (much faster than walking the filesystem).
|
||||||
ID string
|
func (s *Store) FirstAndLastMail() (first, last *MailRef, err error) {
|
||||||
ModTime time.Time
|
if s.db != nil {
|
||||||
|
return s.firstAndLastFromDB()
|
||||||
|
}
|
||||||
|
return s.firstAndLastFromFS()
|
||||||
}
|
}
|
||||||
|
|
||||||
// FirstAndLastMail walks the store and returns the oldest and newest mail by
|
func (s *Store) firstAndLastFromDB() (first, last *MailRef, err error) {
|
||||||
// file modification time. Returns nil for either if the store is empty.
|
ctx := context.Background()
|
||||||
func (s *Store) FirstAndLastMail() (first, last *MailRef, err error) {
|
|
||||||
|
// Oldest
|
||||||
|
var fID string
|
||||||
|
var fTime time.Time
|
||||||
|
err = s.db.QueryRow(ctx,
|
||||||
|
`SELECT id, received_at FROM emails ORDER BY received_at ASC LIMIT 1`,
|
||||||
|
).Scan(&fID, &fTime)
|
||||||
|
if err != nil && !errors.Is(err, pgx.ErrNoRows) {
|
||||||
|
return nil, nil, fmt.Errorf("storage: first mail: %w", err)
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
first = &MailRef{ID: fID, ModTime: fTime}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newest
|
||||||
|
var lID string
|
||||||
|
var lTime time.Time
|
||||||
|
err = s.db.QueryRow(ctx,
|
||||||
|
`SELECT id, received_at FROM emails ORDER BY received_at DESC LIMIT 1`,
|
||||||
|
).Scan(&lID, &lTime)
|
||||||
|
if err != nil && !errors.Is(err, pgx.ErrNoRows) {
|
||||||
|
return nil, nil, fmt.Errorf("storage: last mail: %w", err)
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
last = &MailRef{ID: lID, ModTime: lTime}
|
||||||
|
}
|
||||||
|
|
||||||
|
return first, last, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) firstAndLastFromFS() (first, last *MailRef, err error) {
|
||||||
err = filepath.WalkDir(filepath.Join(s.dir, "store"), func(path string, d fs.DirEntry, werr error) error {
|
err = filepath.WalkDir(filepath.Join(s.dir, "store"), func(path string, d fs.DirEntry, werr error) error {
|
||||||
if werr != nil {
|
if werr != nil {
|
||||||
return werr
|
return werr
|
||||||
@@ -138,6 +396,183 @@ func (s *Store) FirstAndLastMail() (first, last *MailRef, err error) {
|
|||||||
return first, last, nil
|
return first, last, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Metadata helpers ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// insertMeta inserts parsed email metadata into the emails table.
|
||||||
|
func (s *Store) insertMeta(ctx context.Context, id string, pm *mailparser.ParsedMail, size int) {
|
||||||
|
mailTo := strings.Join(pm.To, ", ")
|
||||||
|
hasAttach := len(pm.Attachments) > 0
|
||||||
|
|
||||||
|
_, _ = s.db.Exec(ctx, `
|
||||||
|
INSERT INTO emails (id, received_at, mail_from, mail_to, subject, size_bytes, has_attach)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||||
|
ON CONFLICT (id) DO NOTHING
|
||||||
|
`, id, pm.Date, pm.From, mailTo, pm.Subject, int64(size), hasAttach)
|
||||||
|
}
|
||||||
|
|
||||||
|
// insertMetaMinimal inserts minimal metadata when parsing fails.
|
||||||
|
func (s *Store) insertMetaMinimal(ctx context.Context, id string, size int) {
|
||||||
|
_, _ = s.db.Exec(ctx, `
|
||||||
|
INSERT INTO emails (id, received_at, size_bytes)
|
||||||
|
VALUES ($1, NOW(), $2)
|
||||||
|
ON CONFLICT (id) DO NOTHING
|
||||||
|
`, id, int64(size))
|
||||||
|
}
|
||||||
|
|
||||||
|
// SaveMeta upserts metadata for a given email ID. Used by the backfill process.
|
||||||
|
func (s *Store) SaveMeta(ctx context.Context, id string, pm *mailparser.ParsedMail, size int) error {
|
||||||
|
if s.db == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
mailTo := strings.Join(pm.To, ", ")
|
||||||
|
hasAttach := len(pm.Attachments) > 0
|
||||||
|
|
||||||
|
_, err := s.db.Exec(ctx, `
|
||||||
|
INSERT INTO emails (id, received_at, mail_from, mail_to, subject, size_bytes, has_attach)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
mail_from = EXCLUDED.mail_from,
|
||||||
|
mail_to = EXCLUDED.mail_to,
|
||||||
|
subject = EXCLUDED.subject,
|
||||||
|
size_bytes = EXCLUDED.size_bytes,
|
||||||
|
has_attach = EXCLUDED.has_attach
|
||||||
|
`, id, pm.Date, pm.From, mailTo, pm.Subject, int64(size), hasAttach)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetIndexedAt marks an email as indexed in the database.
|
||||||
|
func (s *Store) SetIndexedAt(ctx context.Context, id string) error {
|
||||||
|
if s.db == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
_, err := s.db.Exec(ctx, `UPDATE emails SET indexed_at = NOW() WHERE id = $1`, id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsIndexed checks whether a given email has been indexed (indexed_at IS NOT NULL).
|
||||||
|
func (s *Store) IsIndexed(ctx context.Context, id string) (bool, error) {
|
||||||
|
if s.db == nil {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
var indexed bool
|
||||||
|
err := s.db.QueryRow(ctx,
|
||||||
|
`SELECT indexed_at IS NOT NULL FROM emails WHERE id = $1`, id,
|
||||||
|
).Scan(&indexed)
|
||||||
|
if errors.Is(err, pgx.ErrNoRows) {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
return indexed, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Backfill ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Backfill walks the store directory, parses each email, inserts missing DB
|
||||||
|
// metadata rows, and re-indexes emails that have indexed_at IS NULL.
|
||||||
|
func (s *Store) Backfill(ctx context.Context, idx interface{ IndexSync(doc interface{}) error }, logger *slog.Logger) error {
|
||||||
|
// We accept a generic indexer interface to avoid import cycles.
|
||||||
|
// The caller (main.go) wraps the real index.Indexer.
|
||||||
|
return s.BackfillWithFuncs(ctx, logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackfillWithFuncs walks the store and calls the provided functions for each email.
|
||||||
|
// This is the implementation used by the coordinator in main.go.
|
||||||
|
func (s *Store) BackfillWithFuncs(ctx context.Context, logger *slog.Logger) error {
|
||||||
|
if s.db == nil {
|
||||||
|
logger.Info("backfill: skipping, no database configured")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
storeDir := filepath.Join(s.dir, "store")
|
||||||
|
count := 0
|
||||||
|
indexed := 0
|
||||||
|
errCount := 0
|
||||||
|
|
||||||
|
err := filepath.WalkDir(storeDir, func(path string, d fs.DirEntry, werr error) error {
|
||||||
|
if werr != nil {
|
||||||
|
return werr
|
||||||
|
}
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
count++
|
||||||
|
id := d.Name()
|
||||||
|
|
||||||
|
// Load the file (handles decryption)
|
||||||
|
raw, err := s.Load(id)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: load failed", "id", id, "err", err)
|
||||||
|
errCount++
|
||||||
|
return nil // continue with next file
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse
|
||||||
|
pm, err := mailparser.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: parse failed", "id", id, "err", err)
|
||||||
|
errCount++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert metadata
|
||||||
|
if err := s.SaveMeta(ctx, id, pm, len(raw)); err != nil {
|
||||||
|
logger.Warn("backfill: save meta failed", "id", id, "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if already indexed
|
||||||
|
alreadyIndexed, err := s.IsIndexed(ctx, id)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("backfill: check indexed failed", "id", id, "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !alreadyIndexed {
|
||||||
|
indexed++
|
||||||
|
}
|
||||||
|
|
||||||
|
if count%100 == 0 {
|
||||||
|
logger.Info("backfill: progress", "processed", count, "need_index", indexed, "errors", errCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("backfill: walk: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("backfill: complete", "total", count, "need_index", indexed, "errors", errCount)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WalkStore iterates over all files in the store directory and calls fn for each.
|
||||||
|
// The fn receives the email ID. This is used by the backfill coordinator.
|
||||||
|
func (s *Store) WalkStore(ctx context.Context, fn func(id string) error) error {
|
||||||
|
storeDir := filepath.Join(s.dir, "store")
|
||||||
|
return filepath.WalkDir(storeDir, func(path string, d fs.DirEntry, werr error) error {
|
||||||
|
if werr != nil {
|
||||||
|
return werr
|
||||||
|
}
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
return fn(d.Name())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── File path helper ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
// filePath returns the on-disk path for a given mail ID.
|
// filePath returns the on-disk path for a given mail ID.
|
||||||
// Uses 2-char prefix sharding: {dir}/store/{id[:2]}/{id}
|
// Uses 2-char prefix sharding: {dir}/store/{id[:2]}/{id}
|
||||||
func (s *Store) filePath(id string) string {
|
func (s *Store) filePath(id string) string {
|
||||||
|
|||||||
Reference in New Issue
Block a user