| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- /**
- * MIME Source Parser for Attachment Extraction
- *
- * Parses raw email MIME source to extract attachment metadata and content.
- * Used as a fallback when AppleScript's `mail attachments` returns empty
- * (which happens across all account types: iCloud, Google, Exchange).
- *
- * @module utils/mimeParse
- */
- /**
- * Extract the boundary string from a Content-Type header value
- * (or from any string containing a boundary= parameter).
- */
- function extractBoundary(source) {
- const match = source.match(/boundary="?([^";\s\r\n]+)"?/i);
- return match ? match[1] : null;
- }
- /**
- * Extract a header value from a MIME part header block.
- * Handles folded headers (continuation lines starting with whitespace).
- */
- function getHeader(headers, name) {
- const regex = new RegExp(`^${name}:\\s*(.+(?:\\r?\\n[ \\t]+.+)*)`, "im");
- const match = headers.match(regex);
- if (!match)
- return null;
- // Unfold: replace newline+whitespace with single space
- return match[1].replace(/\r?\n[ \t]+/g, " ").trim();
- }
- /**
- * Extract filename from Content-Disposition or Content-Type headers.
- */
- function extractFilename(headers) {
- // Try Content-Disposition filename first
- const dispHeader = getHeader(headers, "Content-Disposition");
- if (dispHeader) {
- const fnMatch = dispHeader.match(/filename="?([^";\r\n]+)"?/i);
- if (fnMatch)
- return fnMatch[1].trim();
- }
- // Fall back to Content-Type name parameter
- const ctHeader = getHeader(headers, "Content-Type");
- if (ctHeader) {
- const nameMatch = ctHeader.match(/name="?([^";\r\n]+)"?/i);
- if (nameMatch)
- return nameMatch[1].trim();
- }
- return null;
- }
- /**
- * Check if a MIME part has inline disposition (not a real attachment).
- */
- function isInlineDisposition(headers) {
- const dispHeader = getHeader(headers, "Content-Disposition");
- if (!dispHeader)
- return false;
- return dispHeader.toLowerCase().startsWith("inline");
- }
- /**
- * Extract size from Content-Disposition size parameter.
- */
- function extractSize(headers) {
- const dispHeader = getHeader(headers, "Content-Disposition");
- if (dispHeader) {
- const sizeMatch = dispHeader.match(/size=(\d+)/i);
- if (sizeMatch)
- return parseInt(sizeMatch[1], 10);
- }
- return 0;
- }
- /**
- * Extract MIME type from Content-Type header.
- */
- function extractMimeType(headers) {
- const ctHeader = getHeader(headers, "Content-Type");
- if (!ctHeader)
- return "application/octet-stream";
- const typeMatch = ctHeader.match(/^([^;\s]+)/);
- return typeMatch ? typeMatch[1].toLowerCase() : "application/octet-stream";
- }
- /**
- * Estimate decoded size from base64 content length.
- */
- function estimateBase64Size(base64Body) {
- const cleaned = base64Body.replace(/[\s\r\n]/g, "");
- return Math.floor((cleaned.length * 3) / 4);
- }
- /**
- * Split a MIME block into parts using the given boundary.
- * Does not recurse — call walkLeafParts for recursive traversal.
- */
- function splitMimeParts(source, boundary) {
- const parts = [];
- const boundaryDelim = `--${boundary}`;
- const sections = source.split(boundaryDelim);
- for (const section of sections) {
- const trimmed = section.trim();
- if (!trimmed || trimmed.startsWith("--"))
- continue;
- // Split headers from body at first blank line
- const blankLineIdx = trimmed.search(/\r?\n\r?\n/);
- if (blankLineIdx === -1)
- continue;
- const headers = trimmed.substring(0, blankLineIdx);
- const body = trimmed.substring(blankLineIdx).replace(/^\r?\n\r?\n/, "");
- parts.push({ headers, body });
- }
- return parts;
- }
- /**
- * Walk a multipart MIME block and return all non-multipart leaf parts,
- * descending into nested multipart/* containers (alternative, related, mixed).
- */
- function walkLeafParts(source, boundary) {
- const result = [];
- const parts = splitMimeParts(source, boundary);
- for (const part of parts) {
- const ct = getHeader(part.headers, "Content-Type");
- if (ct && /^multipart\//i.test(ct)) {
- const nestedBoundary = extractBoundary(ct);
- if (nestedBoundary) {
- result.push(...walkLeafParts(part.body, nestedBoundary));
- continue;
- }
- }
- result.push(part);
- }
- return result;
- }
- /**
- * Decode a MIME part body to bytes based on its transfer encoding.
- * Supports base64, quoted-printable, and 7bit/8bit/binary (raw).
- */
- function decodeBody(body, encoding) {
- const enc = (encoding || "").toLowerCase().trim();
- if (enc === "base64") {
- return Buffer.from(body.replace(/[\s\r\n]/g, ""), "base64");
- }
- if (enc === "quoted-printable") {
- return decodeQuotedPrintable(body);
- }
- // 7bit, 8bit, binary, or unspecified — treat as raw bytes
- return Buffer.from(body, "binary");
- }
- /**
- * Decode quoted-printable-encoded body to bytes.
- * Handles soft line breaks (=<CRLF>) and =XX hex escapes per RFC 2045 §6.7.
- */
- function decodeQuotedPrintable(body) {
- // Remove soft line breaks: `=` immediately followed by CRLF or LF
- const noSoft = body.replace(/=\r?\n/g, "");
- const bytes = [];
- for (let i = 0; i < noSoft.length; i++) {
- const c = noSoft[i];
- if (c === "=" && i + 2 < noSoft.length) {
- const hex = noSoft.substring(i + 1, i + 3);
- if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
- bytes.push(parseInt(hex, 16));
- i += 2;
- continue;
- }
- }
- bytes.push(c.charCodeAt(0) & 0xff);
- }
- return Buffer.from(bytes);
- }
- /**
- * Estimate body size for metadata when Content-Disposition size is absent.
- */
- function estimateSize(body, encoding) {
- const enc = (encoding || "").toLowerCase().trim();
- if (enc === "base64")
- return estimateBase64Size(body);
- // For other encodings the body length is a reasonable proxy
- return body.length;
- }
- /**
- * Parse MIME source and return metadata for all file attachments.
- * Skips inline dispositions (signature images, etc.). Descends into
- * nested multipart/* containers.
- *
- * @param source - Raw MIME source of the email
- * @returns Array of attachment metadata (name, mimeType, size)
- */
- export function parseMimeAttachments(source) {
- if (!source || !source.trim())
- return [];
- const boundary = extractBoundary(source);
- if (!boundary)
- return [];
- const parts = walkLeafParts(source, boundary);
- const attachments = [];
- for (const part of parts) {
- const filename = extractFilename(part.headers);
- if (!filename)
- continue;
- if (isInlineDisposition(part.headers))
- continue;
- const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
- attachments.push({
- name: filename,
- mimeType: extractMimeType(part.headers),
- size: extractSize(part.headers) || estimateSize(part.body, encoding),
- });
- }
- return attachments;
- }
- /**
- * Extract and decode a specific attachment from MIME source by filename.
- * Supports base64, quoted-printable, and 7bit/8bit/binary transfer encodings.
- * Descends into nested multipart/* containers.
- *
- * @param source - Raw MIME source of the email
- * @param attachmentName - Filename to extract
- * @returns Decoded attachment data, or null if not found
- */
- export function extractMimeAttachment(source, attachmentName) {
- if (!source || !source.trim())
- return null;
- const boundary = extractBoundary(source);
- if (!boundary)
- return null;
- const parts = walkLeafParts(source, boundary);
- for (const part of parts) {
- const filename = extractFilename(part.headers);
- if (filename !== attachmentName)
- continue;
- const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
- const data = decodeBody(part.body, encoding);
- return {
- name: filename,
- mimeType: extractMimeType(part.headers),
- size: extractSize(part.headers) || data.length,
- data,
- };
- }
- return null;
- }
|