diff --git a/src/lib/api/index.ts b/src/lib/api/index.ts
index eafcdba..fefdb8f 100644
--- a/src/lib/api/index.ts
+++ b/src/lib/api/index.ts
@@ -82,6 +82,9 @@ export type {
ThreadResponse,
MailAttachment,
MailDetail,
+ OCRStatus,
+ SearchMatchField,
+ OCRDownloadResult,
ImapFolder,
ImapAccount,
ImapTestResult,
@@ -95,6 +98,8 @@ export {
getThread,
downloadMailAttachment,
downloadMailRaw,
+ getOCRTextDownloadURL,
+ downloadMailOCRText,
getImapAccounts,
createImapAccount,
deleteImapAccount,
diff --git a/src/lib/api/mail.ts b/src/lib/api/mail.ts
index 0c1045c..523a50d 100644
--- a/src/lib/api/mail.ts
+++ b/src/lib/api/mail.ts
@@ -2,6 +2,16 @@ import { API_BASE, request } from "./core";
// ── Types ────────────────────────────────────────────────────────────────────
+export type OCRStatus = "pending" | "done" | "failed" | "skipped" | "disabled";
+
+export type SearchMatchField =
+ | "subject"
+ | "body"
+ | "attachment_text"
+ | "attachment_names"
+ | "from_addr"
+ | "to_addr";
+
export interface SearchHit {
id: string;
score: number;
@@ -13,6 +23,9 @@ export interface SearchHit {
has_attachments?: boolean;
thread_id?: string;
thread_size?: number;
+ // PROJ-44: Manticore snippet (with ... highlights) and source field
+ snippet?: string;
+ match_field?: SearchMatchField;
}
export interface ThreadMail {
@@ -57,6 +70,9 @@ export interface MailDetail {
verify_ok: boolean | null;
verified_at: string | null;
thread_id?: string;
+ // PROJ-44: OCR status and extracted-text length
+ ocr_status?: OCRStatus;
+ ocr_chars?: number;
}
export interface ImapFolder {
@@ -187,6 +203,37 @@ export async function downloadMailRaw(
return { blob: await res.blob(), filename: `${id}.eml` };
}
+// ── OCR text ──────────────────────────────────────────────────────────────────
+
+/** Direct URL of the OCR-text endpoint. Browser uses cookie auth automatically. */
+export function getOCRTextDownloadURL(mailId: string): string {
+ return `${API_BASE}/api/mails/${mailId}/ocr-text`;
+}
+
+export type OCRDownloadResult =
+ | { kind: "ok"; blob: Blob; filename: string }
+ | { kind: "pending" }
+ | { kind: "not_available" };
+
+/**
+ * Fetch the OCR text. Handles the 202 (pending) and 404 (not available)
+ * cases gracefully so the UI can show different feedback.
+ */
+export async function downloadMailOCRText(
+ id: string
+): Promise {
+ const res = await fetch(`${API_BASE}/api/mails/${id}/ocr-text`, {
+ credentials: "include",
+ });
+ if (res.status === 202) return { kind: "pending" };
+ if (res.status === 404) return { kind: "not_available" };
+ if (!res.ok) throw new Error(`Download fehlgeschlagen: ${res.status}`);
+ const cd = res.headers.get("Content-Disposition") || "";
+ const match = cd.match(/filename="([^"]+)"/);
+ const filename = match ? match[1] : `${id}.ocr.txt`;
+ return { kind: "ok", blob: await res.blob(), filename };
+}
+
// ── IMAP ──────────────────────────────────────────────────────────────────────
export async function getImapAccounts(): Promise {
diff --git a/src/lib/sanitize.ts b/src/lib/sanitize.ts
new file mode 100644
index 0000000..64f48ef
--- /dev/null
+++ b/src/lib/sanitize.ts
@@ -0,0 +1,25 @@
+/**
+ * sanitizeSnippet escapes all HTML in a Manticore snippet, then re-enables
+ * the only markup we trust: … wrappers around match words.
+ *
+ * Manticore SNIPPET()/HIGHLIGHT() output is otherwise plain text plus
+ * configurable `` markers, so this is sufficient for safe rendering
+ * via dangerouslySetInnerHTML.
+ */
+export function sanitizeSnippet(input: string): string {
+ if (!input) return "";
+
+ // 1. Escape every HTML-special character.
+ const escaped = input
+ .replace(/&/g, "&")
+ .replace(//g, ">")
+ .replace(/"/g, """)
+ .replace(/'/g, "'");
+
+ // 2. Re-allow our two whitelisted tags. After step 1 they appear as
+ // "<b>" / "</b>" — restore them verbatim.
+ return escaped
+ .replace(/<b>/g, "")
+ .replace(/<\/b>/g, "");
+}