feat(PROJ-44): API-Types + Sanitize-Helper fuer OCR-GUI
- MailDetail um ocr_status/ocr_chars erweitert - SearchHit um snippet + match_field erweitert - Neue API-Funktionen getOCRTextDownloadURL und downloadMailOCRText inkl. 202/404-Handling fuer pending/not-available - src/lib/sanitize.ts: sanitizeSnippet escaped HTML und laesst nur <b>-Tags fuer Manticore-Highlights durch - Re-exports in src/lib/api/index.ts ergaenzt Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -82,6 +82,9 @@ export type {
|
|||||||
ThreadResponse,
|
ThreadResponse,
|
||||||
MailAttachment,
|
MailAttachment,
|
||||||
MailDetail,
|
MailDetail,
|
||||||
|
OCRStatus,
|
||||||
|
SearchMatchField,
|
||||||
|
OCRDownloadResult,
|
||||||
ImapFolder,
|
ImapFolder,
|
||||||
ImapAccount,
|
ImapAccount,
|
||||||
ImapTestResult,
|
ImapTestResult,
|
||||||
@@ -95,6 +98,8 @@ export {
|
|||||||
getThread,
|
getThread,
|
||||||
downloadMailAttachment,
|
downloadMailAttachment,
|
||||||
downloadMailRaw,
|
downloadMailRaw,
|
||||||
|
getOCRTextDownloadURL,
|
||||||
|
downloadMailOCRText,
|
||||||
getImapAccounts,
|
getImapAccounts,
|
||||||
createImapAccount,
|
createImapAccount,
|
||||||
deleteImapAccount,
|
deleteImapAccount,
|
||||||
|
|||||||
@@ -2,6 +2,16 @@ import { API_BASE, request } from "./core";
|
|||||||
|
|
||||||
// ── Types ────────────────────────────────────────────────────────────────────
|
// ── Types ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type OCRStatus = "pending" | "done" | "failed" | "skipped" | "disabled";
|
||||||
|
|
||||||
|
export type SearchMatchField =
|
||||||
|
| "subject"
|
||||||
|
| "body"
|
||||||
|
| "attachment_text"
|
||||||
|
| "attachment_names"
|
||||||
|
| "from_addr"
|
||||||
|
| "to_addr";
|
||||||
|
|
||||||
export interface SearchHit {
|
export interface SearchHit {
|
||||||
id: string;
|
id: string;
|
||||||
score: number;
|
score: number;
|
||||||
@@ -13,6 +23,9 @@ export interface SearchHit {
|
|||||||
has_attachments?: boolean;
|
has_attachments?: boolean;
|
||||||
thread_id?: string;
|
thread_id?: string;
|
||||||
thread_size?: number;
|
thread_size?: number;
|
||||||
|
// PROJ-44: Manticore snippet (with <b>...</b> highlights) and source field
|
||||||
|
snippet?: string;
|
||||||
|
match_field?: SearchMatchField;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ThreadMail {
|
export interface ThreadMail {
|
||||||
@@ -57,6 +70,9 @@ export interface MailDetail {
|
|||||||
verify_ok: boolean | null;
|
verify_ok: boolean | null;
|
||||||
verified_at: string | null;
|
verified_at: string | null;
|
||||||
thread_id?: string;
|
thread_id?: string;
|
||||||
|
// PROJ-44: OCR status and extracted-text length
|
||||||
|
ocr_status?: OCRStatus;
|
||||||
|
ocr_chars?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ImapFolder {
|
export interface ImapFolder {
|
||||||
@@ -187,6 +203,37 @@ export async function downloadMailRaw(
|
|||||||
return { blob: await res.blob(), filename: `${id}.eml` };
|
return { blob: await res.blob(), filename: `${id}.eml` };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── OCR text ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/** Direct URL of the OCR-text endpoint. Browser uses cookie auth automatically. */
|
||||||
|
export function getOCRTextDownloadURL(mailId: string): string {
|
||||||
|
return `${API_BASE}/api/mails/${mailId}/ocr-text`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type OCRDownloadResult =
|
||||||
|
| { kind: "ok"; blob: Blob; filename: string }
|
||||||
|
| { kind: "pending" }
|
||||||
|
| { kind: "not_available" };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch the OCR text. Handles the 202 (pending) and 404 (not available)
|
||||||
|
* cases gracefully so the UI can show different feedback.
|
||||||
|
*/
|
||||||
|
export async function downloadMailOCRText(
|
||||||
|
id: string
|
||||||
|
): Promise<OCRDownloadResult> {
|
||||||
|
const res = await fetch(`${API_BASE}/api/mails/${id}/ocr-text`, {
|
||||||
|
credentials: "include",
|
||||||
|
});
|
||||||
|
if (res.status === 202) return { kind: "pending" };
|
||||||
|
if (res.status === 404) return { kind: "not_available" };
|
||||||
|
if (!res.ok) throw new Error(`Download fehlgeschlagen: ${res.status}`);
|
||||||
|
const cd = res.headers.get("Content-Disposition") || "";
|
||||||
|
const match = cd.match(/filename="([^"]+)"/);
|
||||||
|
const filename = match ? match[1] : `${id}.ocr.txt`;
|
||||||
|
return { kind: "ok", blob: await res.blob(), filename };
|
||||||
|
}
|
||||||
|
|
||||||
// ── IMAP ──────────────────────────────────────────────────────────────────────
|
// ── IMAP ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
export async function getImapAccounts(): Promise<ImapAccount[]> {
|
export async function getImapAccounts(): Promise<ImapAccount[]> {
|
||||||
|
|||||||
@@ -0,0 +1,25 @@
|
|||||||
|
/**
|
||||||
|
* sanitizeSnippet escapes all HTML in a Manticore snippet, then re-enables
|
||||||
|
* the only markup we trust: <b>…</b> wrappers around match words.
|
||||||
|
*
|
||||||
|
* Manticore SNIPPET()/HIGHLIGHT() output is otherwise plain text plus
|
||||||
|
* configurable `<b>` markers, so this is sufficient for safe rendering
|
||||||
|
* via dangerouslySetInnerHTML.
|
||||||
|
*/
|
||||||
|
export function sanitizeSnippet(input: string): string {
|
||||||
|
if (!input) return "";
|
||||||
|
|
||||||
|
// 1. Escape every HTML-special character.
|
||||||
|
const escaped = input
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">")
|
||||||
|
.replace(/"/g, """)
|
||||||
|
.replace(/'/g, "'");
|
||||||
|
|
||||||
|
// 2. Re-allow our two whitelisted tags. After step 1 they appear as
|
||||||
|
// "<b>" / "</b>" — restore them verbatim.
|
||||||
|
return escaped
|
||||||
|
.replace(/<b>/g, "<b>")
|
||||||
|
.replace(/<\/b>/g, "</b>");
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user