Bläddra i källkod

feat: extend attachment fallback to lists, nested multipart, non-base64

Builds on the MIME source fallback in 78f1edd with four targeted fixes:

- listMessages: emit fast-path hasAttachments via AppleScript count.
  Skips the MIME source scan to keep list perf; will false-negative
  for MIME-embedded attachments (acceptable — get-message and
  list-attachments do the authoritative check).

- mimeParse: recursive descent into nested multipart containers
  (multipart/alternative, multipart/related) so attachments nested
  alongside text+html bodies or inline images are discovered.

- mimeParse: decode quoted-printable and 7bit/8bit transfer
  encodings in addition to base64. Base64 still covers the common
  case; other encodings now extract correctly instead of returning
  null.

- Document the perf tradeoff on getMessage's source scan and the
  120s timeout on getRawSource.

8 new unit tests (18 total in mimeParse.test.ts), 81 total suite
passing. typecheck + build clean.
Robert Sweet 2 månader sedan
förälder
incheckning
16b892f497
3 ändrade filer med 272 tillägg och 21 borttagningar
  1. 39 4
      src/services/appleMailManager.ts
  2. 141 0
      src/utils/mimeParse.test.ts
  3. 92 17
      src/utils/mimeParse.ts

+ 39 - 4
src/services/appleMailManager.ts

@@ -499,6 +499,12 @@ export class AppleMailManager {
                   set attCount to count of mail attachments of msg
                   set attCount to count of mail attachments of msg
                   if attCount > 0 then set hasAtt to "true"
                   if attCount > 0 then set hasAtt to "true"
                 end try
                 end try
+                -- MIME-embedded attachments are invisible to AppleScript's
+                -- attachment object. Fall back to scanning the raw source.
+                -- This reads the full message source (can be MB-sized for
+                -- messages with large bodies), so it's the slowest part of
+                -- get-message for attachmentless messages. Accepted as the
+                -- cost of correct hasAttachments in the detail view.
                 if hasAtt is "false" then
                 if hasAtt is "false" then
                   try
                   try
                     set rawSrc to source of msg
                     set rawSrc to source of msg
@@ -597,6 +603,10 @@ export class AppleMailManager {
    * Get the raw MIME source of a message.
    * Get the raw MIME source of a message.
    * Used as fallback for attachment extraction when AppleScript
    * Used as fallback for attachment extraction when AppleScript
    * mail attachments returns empty.
    * mail attachments returns empty.
+   *
+   * Timeout is 2x the default (120s) because `source of msg` returns
+   * the entire raw message including base64-encoded attachments —
+   * a 20MB attachment can take several seconds over Exchange/IMAP.
    */
    */
   getRawSource(id: string): string | null {
   getRawSource(id: string): string | null {
     const script = buildAppLevelScript(`
     const script = buildAppLevelScript(`
@@ -684,8 +694,12 @@ export class AppleMailManager {
             set msgDate to ${AS_DATE_TO_STRING}
             set msgDate to ${AS_DATE_TO_STRING}
             set msgRead to read status of msg as string
             set msgRead to read status of msg as string
             set msgFlagged to flagged status of msg as string
             set msgFlagged to flagged status of msg as string
+            set msgHasAtt to "false"
+            try
+              if (count of mail attachments of msg) > 0 then set msgHasAtt to "true"
+            end try
             if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
             if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
-            set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged
+            set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & msgHasAtt
             set msgCount to msgCount + 1
             set msgCount to msgCount + 1
           end if
           end if
         end try
         end try
@@ -717,8 +731,12 @@ export class AppleMailManager {
                   set msgDate to ${AS_DATE_TO_STRING}
                   set msgDate to ${AS_DATE_TO_STRING}
                   set msgRead to read status of msg as string
                   set msgRead to read status of msg as string
                   set msgFlagged to flagged status of msg as string
                   set msgFlagged to flagged status of msg as string
+                  set msgHasAtt to "false"
+                  try
+                    if (count of mail attachments of msg) > 0 then set msgHasAtt to "true"
+                  end try
                   if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
                   if msgCount > 0 then set outputText to outputText & "|||ITEM|||"
-                  set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & name of mb
+                  set outputText to outputText & msgId & "|||" & msgSubject & "|||" & msgSender & "|||" & msgDate & "|||" & msgRead & "|||" & msgFlagged & "|||" & name of mb & "|||" & msgHasAtt
                   set msgCount to msgCount + 1
                   set msgCount to msgCount + 1
                 end if
                 end if
               end if
               end if
@@ -745,6 +763,14 @@ export class AppleMailManager {
 
 
   /**
   /**
    * Parse message list output from AppleScript.
    * Parse message list output from AppleScript.
+   *
+   * Two emission schemas, disambiguated by length:
+   *   7 fields: single-mailbox — ...|hasAtt (mailbox from caller)
+   *   8 fields: all-mailboxes — ...|mailbox|hasAtt
+   *
+   * `hasAttachments` here is the fast-path AppleScript count only; it will
+   * false-negative for MIME-embedded attachments (a known AppleScript
+   * limitation). Use getMessage or list-attachments for authoritative info.
    */
    */
   private parseMessageList(output: string, mailbox: string, account: string): Message[] {
   private parseMessageList(output: string, mailbox: string, account: string): Message[] {
     const items = output.split("|||ITEM|||");
     const items = output.split("|||ITEM|||");
@@ -754,6 +780,15 @@ export class AppleMailManager {
       const parts = item.split("|||");
       const parts = item.split("|||");
       if (parts.length < 6) continue;
       if (parts.length < 6) continue;
 
 
+      let msgMailbox = mailbox;
+      let hasAttachments = false;
+      if (parts.length >= 8) {
+        msgMailbox = parts[6];
+        hasAttachments = parts[7] === "true";
+      } else if (parts.length === 7) {
+        hasAttachments = parts[6] === "true";
+      }
+
       messages.push({
       messages.push({
         id: parts[0].trim(),
         id: parts[0].trim(),
         subject: parts[1],
         subject: parts[1],
@@ -764,9 +799,9 @@ export class AppleMailManager {
         isFlagged: parts[5] === "true",
         isFlagged: parts[5] === "true",
         isJunk: false,
         isJunk: false,
         isDeleted: false,
         isDeleted: false,
-        mailbox: parts.length >= 7 ? parts[6] : mailbox,
+        mailbox: msgMailbox,
         account,
         account,
-        hasAttachments: false,
+        hasAttachments,
       });
       });
     }
     }
 
 

+ 141 - 0
src/utils/mimeParse.test.ts

@@ -152,3 +152,144 @@ describe("extractMimeAttachment", () => {
     expect(extractMimeAttachment("   ", "test.pdf")).toBeNull();
     expect(extractMimeAttachment("   ", "test.pdf")).toBeNull();
   });
   });
 });
 });
+
+// Nested multipart: mixed container with alternative (text+html) as one child
+// and the attachment as a sibling. This is the most common real-world shape.
+const MIME_NESTED_MULTIPART = `Content-Type: multipart/mixed;
+\tboundary="_outer_"
+
+--_outer_
+Content-Type: multipart/alternative;
+\tboundary="_inner_"
+
+--_inner_
+Content-Type: text/plain; charset="us-ascii"
+
+Plain body
+
+--_inner_
+Content-Type: text/html; charset="us-ascii"
+
+<html><body>HTML body</body></html>
+
+--_inner_--
+
+--_outer_
+Content-Type: application/pdf; name="nested.pdf"
+Content-Disposition: attachment; filename="nested.pdf"; size=42
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjAK
+
+--_outer_--`;
+
+// Attachment nested inside multipart/related (common for HTML emails
+// with inline images that also carry a real file attachment).
+const MIME_DEEPLY_NESTED = `Content-Type: multipart/mixed;
+\tboundary="_L1_"
+
+--_L1_
+Content-Type: multipart/related;
+\tboundary="_L2_"
+
+--_L2_
+Content-Type: text/html
+
+<html>body</html>
+
+--_L2_
+Content-Type: application/pdf; name="deep.pdf"
+Content-Disposition: attachment; filename="deep.pdf"
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjAK
+
+--_L2_--
+
+--_L1_--`;
+
+describe("parseMimeAttachments — nested multipart", () => {
+  it("finds attachments alongside a nested multipart/alternative", () => {
+    const result = parseMimeAttachments(MIME_NESTED_MULTIPART);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("nested.pdf");
+  });
+
+  it("descends into multipart/related to find attachments", () => {
+    const result = parseMimeAttachments(MIME_DEEPLY_NESTED);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("deep.pdf");
+  });
+});
+
+describe("extractMimeAttachment — nested multipart", () => {
+  it("extracts a nested attachment by name", () => {
+    const result = extractMimeAttachment(MIME_NESTED_MULTIPART, "nested.pdf");
+    expect(result).not.toBeNull();
+    expect(result!.name).toBe("nested.pdf");
+    expect(result!.data.length).toBeGreaterThan(0);
+  });
+
+  it("extracts an attachment from deeply nested multipart/related", () => {
+    const result = extractMimeAttachment(MIME_DEEPLY_NESTED, "deep.pdf");
+    expect(result).not.toBeNull();
+    expect(result!.name).toBe("deep.pdf");
+  });
+});
+
+// Quoted-printable and raw (7bit/8bit) encoded attachments.
+// "Hello=0A" decodes to "Hello\n".
+const MIME_QP_ATTACH = `Content-Type: multipart/mixed;
+\tboundary="_QP_"
+
+--_QP_
+Content-Type: text/plain; name="note.txt"
+Content-Disposition: attachment; filename="note.txt"
+Content-Transfer-Encoding: quoted-printable
+
+Hello=0Aworld=21
+
+--_QP_--`;
+
+const MIME_7BIT_ATTACH = `Content-Type: multipart/mixed;
+\tboundary="_7B_"
+
+--_7B_
+Content-Type: text/csv; name="data.csv"
+Content-Disposition: attachment; filename="data.csv"
+Content-Transfer-Encoding: 7bit
+
+id,name
+1,alice
+2,bob
+
+--_7B_--`;
+
+describe("extractMimeAttachment — transfer encodings", () => {
+  it("decodes quoted-printable content", () => {
+    const result = extractMimeAttachment(MIME_QP_ATTACH, "note.txt");
+    expect(result).not.toBeNull();
+    expect(result!.data.toString("utf8")).toBe("Hello\nworld!");
+  });
+
+  it("returns raw bytes for 7bit content", () => {
+    const result = extractMimeAttachment(MIME_7BIT_ATTACH, "data.csv");
+    expect(result).not.toBeNull();
+    expect(result!.data.toString("utf8")).toContain("id,name");
+    expect(result!.data.toString("utf8")).toContain("alice");
+  });
+});
+
+describe("parseMimeAttachments — transfer encodings", () => {
+  it("lists quoted-printable attachments", () => {
+    const result = parseMimeAttachments(MIME_QP_ATTACH);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("note.txt");
+  });
+
+  it("lists 7bit attachments", () => {
+    const result = parseMimeAttachments(MIME_7BIT_ATTACH);
+    expect(result).toHaveLength(1);
+    expect(result[0].name).toBe("data.csv");
+  });
+});

+ 92 - 17
src/utils/mimeParse.ts

@@ -13,7 +13,7 @@ export interface MimeAttachmentInfo {
   name: string;
   name: string;
   /** MIME type from Content-Type header */
   /** MIME type from Content-Type header */
   mimeType: string;
   mimeType: string;
-  /** Size in bytes from Content-Disposition size parameter, or estimated from base64 */
+  /** Size in bytes from Content-Disposition size parameter, or estimated from body */
   size: number;
   size: number;
 }
 }
 
 
@@ -22,8 +22,14 @@ export interface MimeAttachmentData extends MimeAttachmentInfo {
   data: Buffer;
   data: Buffer;
 }
 }
 
 
+interface MimePart {
+  headers: string;
+  body: string;
+}
+
 /**
 /**
- * Extract the boundary string from a Content-Type header.
+ * Extract the boundary string from a Content-Type header value
+ * (or from any string containing a boundary= parameter).
  */
  */
 function extractBoundary(source: string): string | null {
 function extractBoundary(source: string): string | null {
   const match = source.match(/boundary="?([^";\s\r\n]+)"?/i);
   const match = source.match(/boundary="?([^";\s\r\n]+)"?/i);
@@ -101,13 +107,11 @@ function estimateBase64Size(base64Body: string): number {
 }
 }
 
 
 /**
 /**
- * Split MIME source into parts using the boundary.
+ * Split a MIME block into parts using the given boundary.
+ * Does not recurse — call walkLeafParts for recursive traversal.
  */
  */
-function splitMimeParts(
-  source: string,
-  boundary: string
-): Array<{ headers: string; body: string }> {
-  const parts: Array<{ headers: string; body: string }> = [];
+function splitMimeParts(source: string, boundary: string): MimePart[] {
+  const parts: MimePart[] = [];
   const boundaryDelim = `--${boundary}`;
   const boundaryDelim = `--${boundary}`;
 
 
   const sections = source.split(boundaryDelim);
   const sections = source.split(boundaryDelim);
@@ -129,9 +133,82 @@ function splitMimeParts(
   return parts;
   return parts;
 }
 }
 
 
+/**
+ * Walk a multipart MIME block and return all non-multipart leaf parts,
+ * descending into nested multipart/* containers (alternative, related, mixed).
+ */
+function walkLeafParts(source: string, boundary: string): MimePart[] {
+  const result: MimePart[] = [];
+  const parts = splitMimeParts(source, boundary);
+
+  for (const part of parts) {
+    const ct = getHeader(part.headers, "Content-Type");
+    if (ct && /^multipart\//i.test(ct)) {
+      const nestedBoundary = extractBoundary(ct);
+      if (nestedBoundary) {
+        result.push(...walkLeafParts(part.body, nestedBoundary));
+        continue;
+      }
+    }
+    result.push(part);
+  }
+
+  return result;
+}
+
+/**
+ * Decode a MIME part body to bytes based on its transfer encoding.
+ * Supports base64, quoted-printable, and 7bit/8bit/binary (raw).
+ */
+function decodeBody(body: string, encoding: string | null): Buffer {
+  const enc = (encoding || "").toLowerCase().trim();
+  if (enc === "base64") {
+    return Buffer.from(body.replace(/[\s\r\n]/g, ""), "base64");
+  }
+  if (enc === "quoted-printable") {
+    return decodeQuotedPrintable(body);
+  }
+  // 7bit, 8bit, binary, or unspecified — treat as raw bytes
+  return Buffer.from(body, "binary");
+}
+
+/**
+ * Decode quoted-printable-encoded body to bytes.
+ * Handles soft line breaks (=<CRLF>) and =XX hex escapes per RFC 2045 §6.7.
+ */
+function decodeQuotedPrintable(body: string): Buffer {
+  // Remove soft line breaks: `=` immediately followed by CRLF or LF
+  const noSoft = body.replace(/=\r?\n/g, "");
+  const bytes: number[] = [];
+  for (let i = 0; i < noSoft.length; i++) {
+    const c = noSoft[i];
+    if (c === "=" && i + 2 < noSoft.length) {
+      const hex = noSoft.substring(i + 1, i + 3);
+      if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
+        bytes.push(parseInt(hex, 16));
+        i += 2;
+        continue;
+      }
+    }
+    bytes.push(c.charCodeAt(0) & 0xff);
+  }
+  return Buffer.from(bytes);
+}
+
+/**
+ * Estimate body size for metadata when Content-Disposition size is absent.
+ */
+function estimateSize(body: string, encoding: string | null): number {
+  const enc = (encoding || "").toLowerCase().trim();
+  if (enc === "base64") return estimateBase64Size(body);
+  // For other encodings the body length is a reasonable proxy
+  return body.length;
+}
+
 /**
 /**
  * Parse MIME source and return metadata for all file attachments.
  * Parse MIME source and return metadata for all file attachments.
- * Skips inline dispositions (signature images, etc.).
+ * Skips inline dispositions (signature images, etc.). Descends into
+ * nested multipart/* containers.
  *
  *
  * @param source - Raw MIME source of the email
  * @param source - Raw MIME source of the email
  * @returns Array of attachment metadata (name, mimeType, size)
  * @returns Array of attachment metadata (name, mimeType, size)
@@ -142,7 +219,7 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
   const boundary = extractBoundary(source);
   const boundary = extractBoundary(source);
   if (!boundary) return [];
   if (!boundary) return [];
 
 
-  const parts = splitMimeParts(source, boundary);
+  const parts = walkLeafParts(source, boundary);
   const attachments: MimeAttachmentInfo[] = [];
   const attachments: MimeAttachmentInfo[] = [];
 
 
   for (const part of parts) {
   for (const part of parts) {
@@ -152,12 +229,11 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
     if (isInlineDisposition(part.headers)) continue;
     if (isInlineDisposition(part.headers)) continue;
 
 
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
-    if (!encoding || encoding.toLowerCase() !== "base64") continue;
 
 
     attachments.push({
     attachments.push({
       name: filename,
       name: filename,
       mimeType: extractMimeType(part.headers),
       mimeType: extractMimeType(part.headers),
-      size: extractSize(part.headers) || estimateBase64Size(part.body),
+      size: extractSize(part.headers) || estimateSize(part.body, encoding),
     });
     });
   }
   }
 
 
@@ -166,6 +242,8 @@ export function parseMimeAttachments(source: string): MimeAttachmentInfo[] {
 
 
 /**
 /**
  * Extract and decode a specific attachment from MIME source by filename.
  * Extract and decode a specific attachment from MIME source by filename.
+ * Supports base64, quoted-printable, and 7bit/8bit/binary transfer encodings.
+ * Descends into nested multipart/* containers.
  *
  *
  * @param source - Raw MIME source of the email
  * @param source - Raw MIME source of the email
  * @param attachmentName - Filename to extract
  * @param attachmentName - Filename to extract
@@ -180,17 +258,14 @@ export function extractMimeAttachment(
   const boundary = extractBoundary(source);
   const boundary = extractBoundary(source);
   if (!boundary) return null;
   if (!boundary) return null;
 
 
-  const parts = splitMimeParts(source, boundary);
+  const parts = walkLeafParts(source, boundary);
 
 
   for (const part of parts) {
   for (const part of parts) {
     const filename = extractFilename(part.headers);
     const filename = extractFilename(part.headers);
     if (filename !== attachmentName) continue;
     if (filename !== attachmentName) continue;
 
 
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
     const encoding = getHeader(part.headers, "Content-Transfer-Encoding");
-    if (!encoding || encoding.toLowerCase() !== "base64") continue;
-
-    const base64Clean = part.body.replace(/[\s\r\n]/g, "");
-    const data = Buffer.from(base64Clean, "base64");
+    const data = decodeBody(part.body, encoding);
 
 
     return {
     return {
       name: filename,
       name: filename,