Răsfoiți Sursa

feat: extend attachment fallback to lists, nested multipart, non-base64

Builds on the MIME source fallback in 78f1edd with four targeted fixes:

- listMessages: emit fast-path hasAttachments via AppleScript count.
  Skips the MIME source scan to keep list perf; will false-negative
  for MIME-embedded attachments (acceptable — get-message and
  list-attachments do the authoritative check).

- mimeParse: recursive descent into nested multipart containers
  (multipart/alternative, multipart/related) so attachments nested
  alongside text+html bodies or inline images are discovered.

- mimeParse: decode quoted-printable and 7bit/8bit transfer
  encodings in addition to base64. Base64 still covers the common
  case; other encodings now extract correctly instead of returning
  null.

- Document the perf tradeoff on getMessage's source scan and the
  120s timeout on getRawSource.

8 new unit tests (18 total in mimeParse.test.ts), 81 total suite
passing. typecheck + build clean.
Robert Sweet 2 luni în urmă
părinte
comite
16b892f497
3 a modificat fișierele cu 272 adăugiri și 21 ștergeri
  1. 39 4
      src/services/appleMailManager.ts
  2. 141 0
      src/utils/mimeParse.test.ts
  3. 92 17
      src/utils/mimeParse.ts

+ 39 - 4
src/services/appleMailManager.ts

@@ -499,6 +499,12 @@ export class AppleMailManager {
                   set attCount to count of mail attachments of msg
                   if attCount > 0 then set hasAtt to "true"
                 end try
+                -- MIME-embedded attachments are invisible to AppleScript's
+                -- attachment object. Fall back to scanning the raw source.
+                -- This reads the full message source (can be MB-sized for
+                -- messages with large bodies), so it's the slowest part of
+                -- get-message for attachmentless messages. Accepted as the
+                -- cost of correct hasAttachments in the detail view.
                 if hasAtt is "false" then
                   try
                     set rawSrc to source of msg
@@ -597,6 +603,10 @@ export class AppleMailManager {
    * Get the raw MIME source of a message.
    * Used as fallback for attachment extraction when AppleScript
    * mail attachments returns empty.
+   *
+   * Timeout is 2x the default (120s) because `source of msg` returns
+   * the entire raw message including base64-encoded attachments —
+   * a 20MB attachment can take several seconds over Exchange/IMAP.
    */
   getRawSource(id: string): string | null {
     const script = buildAppLevelScript(`
@@ -684,8 +694,12 @@ export class AppleMailManager {
             set msgDate to ${AS_DATE_TO_STRING}
             set msgRead to read status of msg as string
             set msgFlagged to flagged status of msg as string
+            set msgHasAtt to "false"
+            try
+              if (count of mail attachments of msg) > 0 then set msgHasAtt to "true"
+            end try
             if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
-            set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged
+            set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & msgHasAtt
             set msgCount to msgCount + 1
           end if
         end try
@@ -717,8 +731,12 @@ export class AppleMailManager {
                   set msgDate to ${AS_DATE_TO_STRING}
                   set msgRead to read status of msg as string
                   set msgFlagged to flagged status of msg as string
+                  set msgHasAtt to "false"
+                  try
+                    if (count of mail attachments of msg) > 0 then set msgHasAtt to "true"
+                  end try
                   if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
-                  set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & name of mb
+                  set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & name of mb & "|||" & msgHasAtt
                   set msgCount to msgCount + 1
                 end if
               end if
@@ -745,6 +763,14 @@ export class AppleMailManager {
 
   /**
    * Parse message list output from AppleScript.
+   *
+   * Two emission schemas, disambiguated by length:
+   *   7 fields: single-mailbox — ...|hasAtt (mailbox from caller)
+   *   8 fields: all-mailboxes — ...|mailbox|hasAtt
+   *
+   * `hasAttachments` here is the fast-path AppleScript count only; it will
+   * false-negative for MIME-embedded attachments (a known AppleScript
+   * limitation). Use getMessage or list-attachments for authoritative info.
    */
   private parseMessageList(output: string, mailbox: string, account: string): Message[] {
     const items = output.split("|||ITEM|||");
@@ -754,6 +780,15 @@ export class AppleMailManager {
       const parts = item.split("|||");
       if (parts.length < 6) continue;
 
+      let msgMailbox = mailbox;
+      let hasAttachments = false;
+      if (parts.length >= 8) {
+        msgMailbox = parts[6];
+        hasAttachments = parts[7] === "true";
+      } else if (parts.length === 7) {
+        hasAttachments = parts[6] === "true";
+      }
+
       messages.push({
         id: parts[0].trim(),
         subject: parts[1],
@@ -764,9 +799,9 @@ export class AppleMailManager {
         isFlagged: parts[5] === "true",
         isJunk: false,
         isDeleted: false,
-        mailbox: parts.length >= 7 ? parts[6] : mailbox,
+        mailbox: msgMailbox,
         account,
-        hasAttachments: false,
+        hasAttachments,
       });
     }
 

+ 141 - 0
src/utils/mimeParse.test.ts

@@ -152,3 +152,144 @@ describe("extractMimeAttachment", () => {
     expect(extractMimeAttachment("   ", "test.pdf")).toBeNull();
   });
 });
+
+// Nested multipart: mixed container with alternative (text+html) as one child
+// and the attachment as a sibling. This is the most common real-world shape.
+const MIME_NESTED_MULTIPART = `Content-Type: multipart/mixed;
+\tboundary="_outer_"
+
+--_outer_
+Content-Type: multipart/alternative;
+\tboundary="_inner_"
+
+--_inner_
+Content-Type: text/plain; charset="us-ascii"
+
+Plain body
+
+--_inner_
+Content-Type: text/html; charset="us-ascii"
+
+<html><body>HTML body</body></html>
+
+--_inner_--
+
+--_outer_
+Content-Type: application/pdf; name="nested.pdf"
+Content-Disposition: attachment; filename="nested.pdf"; size=42
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjAK
+
+--_outer_--`;
+
+// Attachment nested inside multipart/related (common for HTML emails
+// with inline images that also carry a real file attachment).
+const MIME_DEEPLY_NESTED = `Content-Type: multipart/mixed;
+\tboundary="_L1_"
+
+--_L1_
+Content-Type: multipart/related;
+\tboundary="_L2_"
+
+--_L2_
+Content-Type: text/html
+
+<html>body</html>
+
+--_L2_
+Content-Type: application/pdf; name="deep.pdf"
+Content-Disposition: attachment; filename="deep.pdf"
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjAK
+
+--_L2_--
+
+--_L1_--`;
+
+describe("parseMimeAttachments — nested multipart", () => {
+  it("finds attachments alongside a nested multipart/alternative", () => {
+    const result = parseMimeAttachments(MIME_NESTED_MULTIPART);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("nested.pdf");
+  });
+
+  it("descends into multipart/related to find attachments", () => {
+    const result = parseMimeAttachments(MIME_DEEPLY_NESTED);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("deep.pdf");
+  });
+});
+
+describe("extractMimeAttachment — nested multipart", () => {
+  it("extracts a nested attachment by name", () => {
+    const result = extractMimeAttachment(MIME_NESTED_MULTIPART, "nested.pdf");
+    expect(result).not.toBeNull();
+    expect(result!.name).toBe("nested.pdf");
+    expect(result!.data.length).toBeGreaterThan(0);
+  });
+
+  it("extracts an attachment from deeply nested multipart/related", () => {
+    const result = extractMimeAttachment(MIME_DEEPLY_NESTED, "deep.pdf");
+    expect(result).not.toBeNull();
+    expect(result!.name).toBe("deep.pdf");
+  });
+});
+
+// Quoted-printable and raw (7bit/8bit) encoded attachments.
+// "Hello=0A" decodes to "Hello\n".
+const MIME_QP_ATTACH = `Content-Type: multipart/mixed;
+\tboundary="_QP_"
+
+--_QP_
+Content-Type: text/plain; name="note.txt"
+Content-Disposition: attachment; filename="note.txt"
+Content-Transfer-Encoding: quoted-printable
+
+Hello=0Aworld=21
+
+--_QP_--`;
+
+const MIME_7BIT_ATTACH = `Content-Type: multipart/mixed;
+\tboundary="_7B_"
+
+--_7B_
+Content-Type: text/csv; name="data.csv"
+Content-Disposition: attachment; filename="data.csv"
+Content-Transfer-Encoding: 7bit
+
+id,name
+1,alice
+2,bob
+
+--_7B_--`;
+
+describe("extractMimeAttachment — transfer encodings", () => {
+  it("decodes quoted-printable content", () => {
+    const result = extractMimeAttachment(MIME_QP_ATTACH, "note.txt");
+    expect(result).not.toBeNull();
+    expect(result!.data.toString("utf8")).toBe("Hello\nworld!");
+  });
+
+  it("returns raw bytes for 7bit content", () => {
+    const result = extractMimeAttachment(MIME_7BIT_ATTACH, "data.csv");
+    expect(result).not.toBeNull();
+    expect(result!.data.toString("utf8")).toContain("id,name");
+    expect(result!.data.toString("utf8")).toContain("alice");
+  });
+});
+
+describe("parseMimeAttachments — transfer encodings", () => {
+  it("lists quoted-printable attachments", () => {
+    const result = parseMimeAttachments(MIME_QP_ATTACH);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("note.txt");
+  });
+
+  it("lists 7bit attachments", () => {
+    const result = parseMimeAttachments(MIME_7BIT_ATTACH);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("data.csv");
+  });
+});

+ 92 - 17
src/utils/mimeParse.ts

@@ -13,7 +13,7 @@ export interface MimeAttachmentInfo {
   name: string;
   /** MIME type from Content-Type header */
   mimeType: string;
-  /** Size in bytes from Content-Disposition size parameter, or estimated from base64 */
+  /** Size in bytes from Content-Disposition size parameter, or estimated from body */
   size: number;
 }
 
@@ -22,8 +22,14 @@ export interface MimeAttachmentData extends MimeAttachmentInfo {
   data: Buffer;
 }
 
+interface MimePart {
+  headers: string;
+  body: string;
+}
+
 /**
- * Extract the boundary string from a Content-Type header.
+ * Extract the boundary string from a Content-Type header value
+ * (or from any string containing a boundary= parameter).
  */
 function extractBoundary(source: string): string | null {
   const match = source.match(/boundary="?([^";\s\r\n]+)"?/i);
@@ -101,13 +107,11 @@ function estimateBase64Size(base64Body: string): number {
 }
 
 /**
- * Split MIME source into parts using the boundary.
+ * Split a MIME block into parts using the given boundary.
+ * Does not recurse — call walkLeafParts for recursive traversal.
  */
-function splitMimeParts(
-  source: string,
-  boundary: string
-): Array<{ headers: string; body: string }> {
-  const parts: Array<{ headers: string; body: string }> = [];
+function splitMimeParts(source: string, boundary: string): MimePart[] {
+  const parts: MimePart[] = [];
   const boundaryDelim = `--${boundary}`;
 
   const sections = source.split(boundaryDelim);
@@ -129,9 +133,82 @@ function splitMimeParts(
   return parts;
 }
 
+/**
+ * Walk a multipart MIME block and return all non-multipart leaf parts,
+ * descending into nested multipart/* containers (alternative, related, mixed).
+ */
+function walkLeafParts(source: string, boundary: string): MimePart[] {
+  const result: MimePart[] = [];
+  const parts = splitMimeParts(source, boundary);
+
+  for (const part of parts) {
+    const ct = getHeader(part.headers, "Content-Type");
+    if (ct && /^multipart\//i.test(ct)) {
+      const nestedBoundary = extractBoundary(ct);
+      if (nestedBoundary) {
+        result.push(...walkLeafParts(part.body, nestedBoundary));
+        continue;
+      }
+    }
+    result.push(part);
+  }
+
+  return result;
+}
+
+/**
+ * Decode a MIME part body to bytes based on its transfer encoding.
+ * Supports base64, quoted-printable, and 7bit/8bit/binary (raw).
+ */
+function decodeBody(body: string, encoding: string | null): Buffer {
+  const enc = (encoding || "").toLowerCase().trim();
+  if (enc === "base64") {
+    return Buffer.from(body.replace(/[\s\r\n]/g, ""), "base64");
+  }
+  if (enc === "quoted-printable") {
+    return decodeQuotedPrintable(body);
+  }
+  // 7bit, 8bit, binary, or unspecified — treat as raw bytes
+  return Buffer.from(body, "binary");
+}
+
+/**
+ * Decode quoted-printable-encoded body to bytes.
+ * Handles soft line breaks (=<CRLF>) and =XX hex escapes per RFC 2045 §6.7.
+ */
+function decodeQuotedPrintable(body: string): Buffer {
+  // Remove soft line breaks: `=` immediately followed by CRLF or LF
+  const noSoft = body.replace(/=\r?\n/g, "");
+  const bytes: number[] = [];
+  for (let i = 0; i < noSoft.length; i++) {
+    const c = noSoft[i];
+    if (c === "=" && i + 2 < noSoft.length) {
+      const hex = noSoft.substring(i + 1, i + 3);
+      if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
+        bytes.push(parseInt(hex, 16));
+        i += 2;
+        continue;
+      }
+    }
+    bytes.push(c.charCodeAt(0) & 0xff);
+  }
+  return Buffer.from(bytes);
+}
+
+/**
+ * Estimate body size for metadata when Content-Disposition size is absent.
+ */
+function estimateSize(body: string, encoding: string | null): number {
+  const enc = (encoding || "").toLowerCase().trim();
+  if (enc === "base64") return estimateBase64Size(body);
+  // For other encodings the body length is a reasonable proxy
+  return body.length;
+}
+
 /**
  * Parse MIME source and return metadata for all file attachments.
- * Skips inline dispositions (signature images, etc.).
+ * Skips inline dispositions (signature images, etc.). Descends into
+ * nested multipart/* containers.
  *
  * @param source - Raw MIME source of the email
  * @returns Array of attachment metadata (name, mimeType, size)
@@ -142,7 +219,7 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
   const boundary = extractBoundary(source);
   if (!boundary) return [];
 
-  const parts = splitMimeParts(source, boundary);
+  const parts = walkLeafParts(source, boundary);
   const attachments: MimeAttachmentInfo[] = [];
 
   for (const part of parts) {
@@ -152,12 +229,11 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
     if (isInlineDisposition(part.headers)) continue;
 
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
-    if (!encoding || encoding.toLowerCase() !== "base64") continue;
 
     attachments.push({
       name: filename,
       mimeType: extractMimeType(part.headers),
-      size: extractSize(part.headers) || estimateBase64Size(part.body),
+      size: extractSize(part.headers) || estimateSize(part.body, encoding),
     });
   }
 
@@ -166,6 +242,8 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
 
 /**
  * Extract and decode a specific attachment from MIME source by filename.
+ * Supports base64, quoted-printable, and 7bit/8bit/binary transfer encodings.
+ * Descends into nested multipart/* containers.
  *
  * @param source - Raw MIME source of the email
  * @param attachmentName - Filename to extract
@@ -180,17 +258,14 @@ export function extractMimeAttachment(
   const boundary = extractBoundary(source);
   if (!boundary) return null;
 
-  const parts = splitMimeParts(source, boundary);
+  const parts = walkLeafParts(source, boundary);
 
   for (const part of parts) {
     const filename = extractFilename(part.headers);
     if (filename !== attachmentName) continue;
 
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
-    if (!encoding || encoding.toLowerCase() !== "base64") continue;
-
-    const base64Clean = part.body.replace(/[\s\r\n]/g, "");
-    const data = Buffer.from(base64Clean, "base64");
+    const data = decodeBody(part.body, encoding);
 
     return {
       name: filename,