feat(PROJ-44): API-Types + Sanitize-Helper fuer OCR-GUI
- MailDetail um ocr_status/ocr_chars erweitert - SearchHit um snippet + match_field erweitert - Neue API-Funktionen getOCRTextDownloadURL und downloadMailOCRText inkl. 202/404-Handling fuer pending/not-available - src/lib/sanitize.ts: sanitizeSnippet escaped HTML und laesst nur <b>-Tags fuer Manticore-Highlights durch - Re-exports in src/lib/api/index.ts ergaenzt Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -82,6 +82,9 @@ export type {
|
||||
ThreadResponse,
|
||||
MailAttachment,
|
||||
MailDetail,
|
||||
OCRStatus,
|
||||
SearchMatchField,
|
||||
OCRDownloadResult,
|
||||
ImapFolder,
|
||||
ImapAccount,
|
||||
ImapTestResult,
|
||||
@@ -95,6 +98,8 @@ export {
|
||||
getThread,
|
||||
downloadMailAttachment,
|
||||
downloadMailRaw,
|
||||
getOCRTextDownloadURL,
|
||||
downloadMailOCRText,
|
||||
getImapAccounts,
|
||||
createImapAccount,
|
||||
deleteImapAccount,
|
||||
|
||||
@@ -2,6 +2,16 @@ import { API_BASE, request } from "./core";
|
||||
|
||||
// ── Types ────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type OCRStatus = "pending" | "done" | "failed" | "skipped" | "disabled";
|
||||
|
||||
export type SearchMatchField =
|
||||
| "subject"
|
||||
| "body"
|
||||
| "attachment_text"
|
||||
| "attachment_names"
|
||||
| "from_addr"
|
||||
| "to_addr";
|
||||
|
||||
export interface SearchHit {
|
||||
id: string;
|
||||
score: number;
|
||||
@@ -13,6 +23,9 @@ export interface SearchHit {
|
||||
has_attachments?: boolean;
|
||||
thread_id?: string;
|
||||
thread_size?: number;
|
||||
// PROJ-44: Manticore snippet (with <b>...</b> highlights) and source field
|
||||
snippet?: string;
|
||||
match_field?: SearchMatchField;
|
||||
}
|
||||
|
||||
export interface ThreadMail {
|
||||
@@ -57,6 +70,9 @@ export interface MailDetail {
|
||||
verify_ok: boolean | null;
|
||||
verified_at: string | null;
|
||||
thread_id?: string;
|
||||
// PROJ-44: OCR status and extracted-text length
|
||||
ocr_status?: OCRStatus;
|
||||
ocr_chars?: number;
|
||||
}
|
||||
|
||||
export interface ImapFolder {
|
||||
@@ -187,6 +203,37 @@ export async function downloadMailRaw(
|
||||
return { blob: await res.blob(), filename: `${id}.eml` };
|
||||
}
|
||||
|
||||
// ── OCR text ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Direct URL of the OCR-text endpoint. Browser uses cookie auth automatically. */
|
||||
export function getOCRTextDownloadURL(mailId: string): string {
|
||||
return `${API_BASE}/api/mails/${mailId}/ocr-text`;
|
||||
}
|
||||
|
||||
export type OCRDownloadResult =
|
||||
| { kind: "ok"; blob: Blob; filename: string }
|
||||
| { kind: "pending" }
|
||||
| { kind: "not_available" };
|
||||
|
||||
/**
|
||||
* Fetch the OCR text. Handles the 202 (pending) and 404 (not available)
|
||||
* cases gracefully so the UI can show different feedback.
|
||||
*/
|
||||
export async function downloadMailOCRText(
|
||||
id: string
|
||||
): Promise<OCRDownloadResult> {
|
||||
const res = await fetch(`${API_BASE}/api/mails/${id}/ocr-text`, {
|
||||
credentials: "include",
|
||||
});
|
||||
if (res.status === 202) return { kind: "pending" };
|
||||
if (res.status === 404) return { kind: "not_available" };
|
||||
if (!res.ok) throw new Error(`Download fehlgeschlagen: ${res.status}`);
|
||||
const cd = res.headers.get("Content-Disposition") || "";
|
||||
const match = cd.match(/filename="([^"]+)"/);
|
||||
const filename = match ? match[1] : `${id}.ocr.txt`;
|
||||
return { kind: "ok", blob: await res.blob(), filename };
|
||||
}
|
||||
|
||||
// ── IMAP ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function getImapAccounts(): Promise<ImapAccount[]> {
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* sanitizeSnippet escapes all HTML in a Manticore snippet, then re-enables
|
||||
* the only markup we trust: <b>…</b> wrappers around match words.
|
||||
*
|
||||
* Manticore SNIPPET()/HIGHLIGHT() output is otherwise plain text plus
|
||||
* configurable `<b>` markers, so this is sufficient for safe rendering
|
||||
* via dangerouslySetInnerHTML.
|
||||
*/
|
||||
export function sanitizeSnippet(input: string): string {
|
||||
if (!input) return "";
|
||||
|
||||
// 1. Escape every HTML-special character.
|
||||
const escaped = input
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
|
||||
// 2. Re-allow our two whitelisted tags. After step 1 they appear as
|
||||
// "<b>" / "</b>" — restore them verbatim.
|
||||
return escaped
|
||||
.replace(/<b>/g, "<b>")
|
||||
.replace(/<\/b>/g, "</b>");
|
||||
}
|
||||
Reference in New Issue
Block a user