feat(PROJ-44): API-Types + Sanitize-Helper fuer OCR-GUI

- MailDetail um ocr_status/ocr_chars erweitert
- SearchHit um snippet + match_field erweitert
- Neue API-Funktionen getOCRTextDownloadURL und downloadMailOCRText
  inkl. 202/404-Handling fuer pending/not-available
- src/lib/sanitize.ts: sanitizeSnippet escaped HTML und laesst nur
  <b>-Tags fuer Manticore-Highlights durch
- Re-exports in src/lib/api/index.ts ergaenzt

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
sysops
2026-05-10 22:18:38 +02:00
parent 88e9d0c08c
commit 23a9a7ea37
3 changed files with 77 additions and 0 deletions
+5
View File
@@ -82,6 +82,9 @@ export type {
ThreadResponse, ThreadResponse,
MailAttachment, MailAttachment,
MailDetail, MailDetail,
OCRStatus,
SearchMatchField,
OCRDownloadResult,
ImapFolder, ImapFolder,
ImapAccount, ImapAccount,
ImapTestResult, ImapTestResult,
@@ -95,6 +98,8 @@ export {
getThread, getThread,
downloadMailAttachment, downloadMailAttachment,
downloadMailRaw, downloadMailRaw,
getOCRTextDownloadURL,
downloadMailOCRText,
getImapAccounts, getImapAccounts,
createImapAccount, createImapAccount,
deleteImapAccount, deleteImapAccount,
+47
View File
@@ -2,6 +2,16 @@ import { API_BASE, request } from "./core";
// ── Types ──────────────────────────────────────────────────────────────────── // ── Types ────────────────────────────────────────────────────────────────────
export type OCRStatus = "pending" | "done" | "failed" | "skipped" | "disabled";
export type SearchMatchField =
| "subject"
| "body"
| "attachment_text"
| "attachment_names"
| "from_addr"
| "to_addr";
export interface SearchHit { export interface SearchHit {
id: string; id: string;
score: number; score: number;
@@ -13,6 +23,9 @@ export interface SearchHit {
has_attachments?: boolean; has_attachments?: boolean;
thread_id?: string; thread_id?: string;
thread_size?: number; thread_size?: number;
// PROJ-44: Manticore snippet (with <b>...</b> highlights) and source field
snippet?: string;
match_field?: SearchMatchField;
} }
export interface ThreadMail { export interface ThreadMail {
@@ -57,6 +70,9 @@ export interface MailDetail {
verify_ok: boolean | null; verify_ok: boolean | null;
verified_at: string | null; verified_at: string | null;
thread_id?: string; thread_id?: string;
// PROJ-44: OCR status and extracted-text length
ocr_status?: OCRStatus;
ocr_chars?: number;
} }
export interface ImapFolder { export interface ImapFolder {
@@ -187,6 +203,37 @@ export async function downloadMailRaw(
return { blob: await res.blob(), filename: `${id}.eml` }; return { blob: await res.blob(), filename: `${id}.eml` };
} }
// ── OCR text ──────────────────────────────────────────────────────────────────
/** Direct URL of the OCR-text endpoint. Browser uses cookie auth automatically. */
export function getOCRTextDownloadURL(mailId: string): string {
return `${API_BASE}/api/mails/${mailId}/ocr-text`;
}
export type OCRDownloadResult =
| { kind: "ok"; blob: Blob; filename: string }
| { kind: "pending" }
| { kind: "not_available" };
/**
* Fetch the OCR text. Handles the 202 (pending) and 404 (not available)
* cases gracefully so the UI can show different feedback.
*/
export async function downloadMailOCRText(
id: string
): Promise<OCRDownloadResult> {
const res = await fetch(`${API_BASE}/api/mails/${id}/ocr-text`, {
credentials: "include",
});
if (res.status === 202) return { kind: "pending" };
if (res.status === 404) return { kind: "not_available" };
if (!res.ok) throw new Error(`Download fehlgeschlagen: ${res.status}`);
const cd = res.headers.get("Content-Disposition") || "";
const match = cd.match(/filename="([^"]+)"/);
const filename = match ? match[1] : `${id}.ocr.txt`;
return { kind: "ok", blob: await res.blob(), filename };
}
// ── IMAP ────────────────────────────────────────────────────────────────────── // ── IMAP ──────────────────────────────────────────────────────────────────────
export async function getImapAccounts(): Promise<ImapAccount[]> { export async function getImapAccounts(): Promise<ImapAccount[]> {
+25
View File
@@ -0,0 +1,25 @@
/**
* sanitizeSnippet escapes all HTML in a Manticore snippet, then re-enables
* the only markup we trust: <b>…</b> wrappers around match words.
*
* Manticore SNIPPET()/HIGHLIGHT() output is otherwise plain text plus
* configurable `<b>` markers, so this is sufficient for safe rendering
* via dangerouslySetInnerHTML.
*/
export function sanitizeSnippet(input: string): string {
if (!input) return "";
// 1. Escape every HTML-special character.
const escaped = input
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
// 2. Re-allow our two whitelisted tags. After step 1 they appear as
// "&lt;b&gt;" / "&lt;/b&gt;" — restore them verbatim.
return escaped
.replace(/&lt;b&gt;/g, "<b>")
.replace(/&lt;\/b&gt;/g, "</b>");
}