diff --git a/frontend/src/App.css b/frontend/src/App.css
index ba4eae3..32ff733 100644
--- a/frontend/src/App.css
+++ b/frontend/src/App.css
@@ -433,6 +433,14 @@ body {
background: #e2e8f0;
}
+.metadata {
+ margin-top: 1rem;
+ padding: 1rem 2rem;
+ background: #f9fafb;
+ border-radius: 8px;
+ border: 1px solid #e5e7eb;
+}
+
/* Responsive design */
@media (max-width: 768px) {
.app-header {
diff --git a/frontend/src/components/Search.tsx b/frontend/src/components/Search.tsx
index 3a9aaac..13589dc 100644
--- a/frontend/src/components/Search.tsx
+++ b/frontend/src/components/Search.tsx
@@ -2,7 +2,6 @@ import React, { useState, useEffect } from 'react'
import { useNavigate } from 'react-router-dom'
import ReactMarkdown from 'react-markdown'
import { useMCP } from '../hooks/useMCP'
-import { useAuth } from '../hooks/useAuth'
import Loading from './Loading'
type SearchItem = {
@@ -24,7 +23,7 @@ const Tag = ({ tags }: { tags: string[] }) => {
)
}
-const formatText = ({ filename, content, chunks, tags }: SearchItem) => {
+const TextResult = ({ filename, content, chunks, tags }: SearchItem) => {
return (
{filename || 'Untitled'}
@@ -45,11 +44,12 @@ const formatText = ({ filename, content, chunks, tags }: SearchItem) => {
)
}
-const formatMarkdown = ({ filename, content, chunks, tags, metadata }: SearchItem) => {
+const MarkdownResult = ({ filename, content, chunks, tags, metadata }: SearchItem) => {
return (
{filename || 'Untitled'}
+
{content || 'No content available'}
@@ -70,7 +70,7 @@ const formatMarkdown = ({ filename, content, chunks, tags, metadata }: SearchIte
)
}
-const formatImage = ({ filename, chunks, tags, metadata }: SearchItem) => {
+const ImageResult = ({ filename, chunks, tags, metadata }: SearchItem) => {
const title = metadata?.title || filename || 'Untitled'
const { fetchFile } = useMCP()
const [mime_type, setMimeType] = useState
()
@@ -95,17 +95,66 @@ const formatImage = ({ filename, chunks, tags, metadata }: SearchItem) => {
)
}
+const Metadata = ({ metadata }: { metadata: any }) => {
+ if (!metadata) return null
+ return (
+
+
+ {Object.entries(metadata).map(([key, value]) => (
+ - {key}: {typeof value === 'string' ? value : JSON.stringify(value)}
+ ))}
+
+
+ )
+}
+
+const PDFResult = ({ filename, content, tags, metadata }: SearchItem) => {
+ return (
+
+
{filename || 'Untitled'}
+
+
View PDF
+
+ {content &&
+
+ View Source
+ {content}
+
+
}
+
+ )
+}
+
+const EmailResult = ({ content, tags, metadata }: SearchItem) => {
+ return (
+
+
{metadata?.title || metadata?.subject || 'Untitled'}
+
+
+ {content &&
+ {content}
+
}
+
+ )
+}
+
const SearchResult = ({ result }: { result: SearchItem }) => {
if (result.mime_type.startsWith('image/')) {
- return formatImage(result)
+ return
}
if (result.mime_type.startsWith('text/markdown')) {
- console.log(result)
- return formatMarkdown(result)
+ return
}
if (result.mime_type.startsWith('text/')) {
- return formatText(result)
+ return
}
+ if (result.mime_type.startsWith('application/pdf')) {
+ return
+ }
+ if (result.mime_type.startsWith('message/rfc822')) {
+ return
+ }
+ console.log(result)
return null
}
diff --git a/frontend/src/hooks/useMCP.ts b/frontend/src/hooks/useMCP.ts
index d1f44df..ca2c150 100644
--- a/frontend/src/hooks/useMCP.ts
+++ b/frontend/src/hooks/useMCP.ts
@@ -115,6 +115,9 @@ export const useMCP = () => {
}
const resp = await parseJsonRpcResponse(response)
+ if (resp?.result?.isError) {
+ throw new Error(resp?.result?.content[0].text)
+ }
return resp?.result?.content.map((item: any) => JSON.parse(item.text))
}, [apiCall])
diff --git a/src/memory/api/app.py b/src/memory/api/app.py
index c418bce..57ffbd6 100644
--- a/src/memory/api/app.py
+++ b/src/memory/api/app.py
@@ -6,7 +6,7 @@ import contextlib
import os
import logging
-from fastapi import FastAPI, UploadFile, Request
+from fastapi import FastAPI, UploadFile, Request, HTTPException
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from sqladmin import Admin
@@ -50,6 +50,24 @@ async def serve_react_app(full_path: str):
return FileResponse(settings.STATIC_DIR / "index.html")
+@app.get("/files/{path:path}")
+async def serve_file(path: str):
+ file_path = settings.FILE_STORAGE_DIR / path
+ if not file_path.is_file():
+ raise HTTPException(status_code=404, detail="File not found")
+ return FileResponse(file_path)
+
+
+async def input_type(item: str | UploadFile) -> list[extract.DataChunk]:
+ if not item:
+ return []
+
+ if isinstance(item, str):
+ return extract.extract_text(item)
+ content_type = item.content_type or "application/octet-stream"
+ return extract.extract_data_chunks(content_type, await item.read())
+
+
# SQLAdmin setup with OAuth protection
engine = get_engine()
admin = Admin(app, engine)
@@ -72,16 +90,6 @@ async def health_check(request: Request):
app.mount("/", mcp.streamable_http_app())
-async def input_type(item: str | UploadFile) -> list[extract.DataChunk]:
- if not item:
- return []
-
- if isinstance(item, str):
- return extract.extract_text(item)
- content_type = item.content_type or "application/octet-stream"
- return extract.extract_data_chunks(content_type, await item.read())
-
-
def main(reload: bool = False):
"""Run the FastAPI server in debug mode with auto-reloading."""
import uvicorn
diff --git a/src/memory/api/search/bm25.py b/src/memory/api/search/bm25.py
index 5c68f1d..91e07f5 100644
--- a/src/memory/api/search/bm25.py
+++ b/src/memory/api/search/bm25.py
@@ -23,7 +23,8 @@ async def search_bm25(
) -> list[tuple[SourceData, AnnotatedChunk]]:
with make_session() as db:
items_query = db.query(Chunk.id, Chunk.content).filter(
- Chunk.collection_name.in_(modalities)
+ Chunk.collection_name.in_(modalities),
+ Chunk.content.isnot(None),
)
if source_ids := filters.get("source_ids"):
@@ -46,6 +47,7 @@ async def search_bm25(
item_ids = {
sha256(item.content.lower().strip().encode("utf-8")).hexdigest(): item.id
for item in items
+ if item.content
}
corpus = [item.content.lower().strip() for item in items]
diff --git a/src/memory/api/search/utils.py b/src/memory/api/search/utils.py
index 24a937d..4a9da22 100644
--- a/src/memory/api/search/utils.py
+++ b/src/memory/api/search/utils.py
@@ -1,4 +1,5 @@
import asyncio
+import traceback
from datetime import datetime
import logging
from collections import defaultdict
@@ -28,7 +29,7 @@ class SourceData(BaseModel):
mime_type: str | None
filename: str | None
content_length: int
- contents: dict | None
+ contents: dict | str | None
created_at: datetime | None
@staticmethod
@@ -87,6 +88,7 @@ async def with_timeout(
logger.warning(f"Search timed out after {timeout}s")
return []
except Exception as e:
+ traceback.print_exc()
logger.error(f"Search failed: {e}")
return []
@@ -109,8 +111,14 @@ def group_chunks(
def make_result(source: SourceData, chunks: list[AnnotatedChunk]) -> SearchResult:
contents = source.contents or {}
- tags = contents.pop("tags", [])
- content = contents.pop("content", None)
+ tags = []
+ if isinstance(contents, dict):
+ tags = contents.pop("tags", [])
+ content = contents.pop("content", None)
+ print(content)
+ else:
+ content = contents
+ contents = {}
return SearchResult(
id=source.id,
diff --git a/src/memory/common/db/models/source_items.py b/src/memory/common/db/models/source_items.py
index cd77453..b391b6f 100644
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@@ -93,35 +93,76 @@ class MailMessage(SourceItem):
}
@property
- def parsed_content(self):
+ def parsed_content(self) -> dict[str, Any]:
from memory.parsers.email import parse_email_message
- return parse_email_message(cast(str, self.content), cast(str, self.message_id))
+ return cast(
+ dict[str, Any],
+ parse_email_message(cast(str, self.content), cast(str, self.message_id)),
+ )
@property
def body(self) -> str:
return self.parsed_content["body"]
- @property
- def display_contents(self) -> str | None:
- content = self.parsed_content
- return textwrap.dedent(
- """
+ def format_content(self, content: dict[str, Any]) -> str:
+ sender = (
+ cast(str, self.sender) or content.get("from") or content.get("sender", "")
+ )
+ recipients = (
+ cast(list[str], self.recipients)
+ or content.get("to")
+ or content.get("recipients", [])
+ )
+ date = (
+ cast(datetime, self.sent_at) and self.sent_at.isoformat()
+ ) or content.get("date", "")
+
+ return (
+ textwrap.dedent(
+ """
Subject: {subject}
From: {sender}
To: {recipients}
Date: {date}
- Body:
+ Body:
{body}
"""
- ).format(
- subject=content.get("subject", ""),
- sender=content.get("from", ""),
- recipients=content.get("to", ""),
- date=content.get("date", ""),
- body=content.get("body", ""),
+ )
+ .format(
+ subject=cast(str, self.subject) or content.get("subject", ""),
+ sender=sender,
+ recipients=", ".join(recipients),
+ date=date,
+ body=content.get("body", ""),
+ )
+ .strip()
)
+ @property
+ def display_contents(self) -> dict | None:
+ return {
+ **cast(dict, super().display_contents),
+ "content": self.body,
+ "subject": self.subject,
+ "sender": self.sender,
+ "recipients": self.recipients,
+ "date": cast(datetime | None, self.sent_at) and self.sent_at.isoformat(),
+ }
+
+ def _chunk_contents(self) -> Sequence[extract.DataChunk]:
+ content = self.parsed_content
+ chunks = extract.extract_text(cast(str, self.body))
+
+ def add_header(item: extract.MulitmodalChunk) -> extract.MulitmodalChunk:
+ if isinstance(item, str):
+ return self.format_content(content | {"body": item}).strip()
+ return item
+
+ for chunk in chunks:
+ chunk.data = [add_header(item) for item in chunk.data]
+ return chunks
+
# Add indexes
__table_args__ = (
Index("mail_sent_idx", "sent_at"),
@@ -161,13 +202,22 @@ class EmailAttachment(SourceItem):
def data_chunks(self, metadata: dict[str, Any] = {}) -> Sequence[Chunk]:
if cast(str | None, self.filename):
- contents = pathlib.Path(cast(str, self.filename)).read_bytes()
+ contents = (
+ settings.FILE_STORAGE_DIR / cast(str, self.filename)
+ ).read_bytes()
else:
contents = cast(str, self.content)
chunks = extract.extract_data_chunks(cast(str, self.mime_type), contents)
return [self._make_chunk(c, metadata) for c in chunks]
+ @property
+ def display_contents(self) -> dict:
+ return {
+ **cast(dict, super().display_contents),
+ **self.mail_message.display_contents,
+ }
+
# Add indexes
__table_args__ = (Index("email_attachment_message_idx", "mail_message_id"),)
diff --git a/src/memory/common/summarizer.py b/src/memory/common/summarizer.py
index fe0b51a..9843649 100644
--- a/src/memory/common/summarizer.py
+++ b/src/memory/common/summarizer.py
@@ -8,6 +8,7 @@ from memory.common import settings, chunker
logger = logging.getLogger(__name__)
+MAX_TOKENS = 200000
TAGS_PROMPT = """
The following text is already concise. Please identify 3-5 relevant tags that capture the main topics or themes.
@@ -148,6 +149,12 @@ def summarize(content: str, target_tokens: int | None = None) -> tuple[str, list
content=content,
)
+ if chunker.approx_token_count(prompt) > MAX_TOKENS:
+ logger.warning(
+ f"Prompt too long ({chunker.approx_token_count(prompt)} tokens), truncating"
+ )
+ prompt = truncate(prompt, MAX_TOKENS - 20)
+
try:
if settings.SUMMARIZER_MODEL.startswith("anthropic"):
result = _call_anthropic(prompt)
diff --git a/src/memory/parsers/email.py b/src/memory/parsers/email.py
index b728bb6..bbe93ab 100644
--- a/src/memory/parsers/email.py
+++ b/src/memory/parsers/email.py
@@ -1,4 +1,5 @@
import email
+import email.message
import hashlib
import logging
import pathlib
@@ -6,6 +7,8 @@ from datetime import datetime
from email.utils import parsedate_to_datetime
from typing import TypedDict
+from markdownify import markdownify
+
logger = logging.getLogger(__name__)
@@ -71,33 +74,60 @@ def extract_date(msg: email.message.Message) -> datetime | None: # type: ignore
def extract_body(msg: email.message.Message) -> str: # type: ignore
"""
- Extract plain text body from email message.
+ Extract body from email message, preferring HTML converted to markdown.
Args:
msg: Email message object
Returns:
- Plain text body content
+ Body content as markdown (if HTML found) or plain text
"""
- body = ""
+ html_body = ""
+ plain_body = ""
if not msg.is_multipart():
try:
- return msg.get_payload(decode=True).decode(errors="replace")
+ payload = msg.get_payload(decode=True)
+ if isinstance(payload, bytes):
+ content = payload.decode(errors="replace")
+ else:
+ content = str(payload)
+ content_type = msg.get_content_type()
+ if content_type == "text/html":
+ return markdownify(content).strip()
+ else:
+ return content
except Exception as e:
logger.error(f"Error decoding message body: {str(e)}")
return ""
+ # Extract both HTML and plain text parts
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition", ""))
- if content_type == "text/plain" and "attachment" not in content_disposition:
- try:
- body += part.get_payload(decode=True).decode(errors="replace") + "\n"
- except Exception as e:
- logger.error(f"Error decoding message part: {str(e)}")
- return body
+ if "attachment" in content_disposition:
+ continue
+
+ try:
+ payload = part.get_payload(decode=True)
+ if isinstance(payload, bytes):
+ content = payload.decode(errors="replace")
+ else:
+ content = str(payload)
+
+ if content_type == "text/html":
+ html_body += content + "\n"
+ elif content_type == "text/plain":
+ plain_body += content + "\n"
+ except Exception as e:
+ logger.error(f"Error decoding message part: {str(e)}")
+
+ # Prefer HTML (converted to markdown) over plain text
+ if html_body.strip():
+ return markdownify(html_body).strip()
+ else:
+ return plain_body.strip()
def extract_attachments(msg: email.message.Message) -> list[Attachment]: # type: ignore
diff --git a/src/memory/workers/email.py b/src/memory/workers/email.py
index f1133e7..4c708a7 100644
--- a/src/memory/workers/email.py
+++ b/src/memory/workers/email.py
@@ -61,7 +61,7 @@ def process_attachment(
mime_type=attachment["content_type"],
mail_message=message,
content=content,
- filename=file_path and str(file_path),
+ filename=file_path and str(file_path.relative_to(settings.FILE_STORAGE_DIR)),
)
@@ -149,7 +149,7 @@ def extract_email_uid(
def fetch_email(conn: imaplib.IMAP4_SSL, uid: str) -> RawEmailResponse | None:
try:
- status, msg_data = conn.fetch(uid, "(UID RFC822)")
+ status, msg_data = conn.fetch(uid, "(UID BODY.PEEK[])")
if status != "OK" or not msg_data or not msg_data[0]:
logger.error(f"Error fetching message {uid}")
return None
diff --git a/tests/data/contents.py b/tests/data/contents.py
index e2970d4..e25ab8b 100644
--- a/tests/data/contents.py
+++ b/tests/data/contents.py
@@ -237,6 +237,40 @@ SAMPLE_TEXT = BeautifulSoup(SAMPLE_HTML, "html.parser").get_text()
SECOND_PAGE_MARKDOWN = markdownify(SECOND_PAGE)
SECOND_PAGE_TEXT = BeautifulSoup(SECOND_PAGE, "html.parser").get_text()
+SAMPLE_EMAIL = f"""From: john.doe@techcorp.com
+To: research-team@techcorp.com, jane.smith@university.edu
+CC: newsletter@programming-weekly.com
+Subject: The Evolution of Programming Languages - Research Article
+Date: Wed, 15 Jan 2025 14:30:00 +0000
+Message-ID: <20250115143000.12345@techcorp.com>
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----=_NextPart_000_0001_01DA1234.56789ABC"
+
+This is a multi-part message in MIME format.
+
+------=_NextPart_000_0001_01DA1234.56789ABC
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
+{SAMPLE_HTML}
+
+------=_NextPart_000_0001_01DA1234.56789ABC
+Content-Type: image/png
+Content-Disposition: attachment; filename="lang_timeline.png"
+Content-Transfer-Encoding: base64
+
+iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==
+
+------=_NextPart_000_0001_01DA1234.56789ABC
+Content-Type: image/jpeg
+Content-Disposition: attachment; filename="code_complexity.jpg"
+Content-Transfer-Encoding: base64
+
+/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB
+
+------=_NextPart_000_0001_01DA1234.56789ABC--
+"""
+
def image_hash(image: Image.Image) -> str:
return hashlib.sha256(image.tobytes()).hexdigest()
diff --git a/tests/memory/common/db/models/test_source_item_embeddings.py b/tests/memory/common/db/models/test_source_item_embeddings.py
index 579c7eb..4259bb8 100644
--- a/tests/memory/common/db/models/test_source_item_embeddings.py
+++ b/tests/memory/common/db/models/test_source_item_embeddings.py
@@ -1,4 +1,5 @@
import hashlib
+import textwrap
from datetime import datetime
from typing import Sequence, cast
from unittest.mock import ANY, Mock, call
@@ -20,6 +21,7 @@ from memory.common.db.models.source_items import (
from memory.common.db.models.sources import Book
from memory.common.embedding import embed_source_item
from memory.common.extract import page_to_image
+from memory.parsers.email import parse_email_message
from tests.data.contents import (
CHUNKS,
DATA_DIR,
@@ -27,6 +29,7 @@ from tests.data.contents import (
LANG_TIMELINE_HASH,
CODE_COMPLEXITY,
CODE_COMPLEXITY_HASH,
+ SAMPLE_EMAIL,
SAMPLE_MARKDOWN,
SAMPLE_TEXT,
SECOND_PAGE,
@@ -127,31 +130,41 @@ def test_base_source_item_mixed_embeddings(mock_voyage_client):
] == [LANG_TIMELINE_HASH]
-def test_mail_message_embeddings(mock_voyage_client):
+def test_mail_message_with_attachments_embeddings(mock_voyage_client):
+ email = parse_email_message(SAMPLE_EMAIL, "123")
item = MailMessage(
id=1,
- content=SAMPLE_MARKDOWN,
+ content=SAMPLE_EMAIL,
mime_type="text/html",
modality="text",
- sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
- size=len(SAMPLE_MARKDOWN),
+ sha256=hashlib.sha256(email["body"].encode("utf-8")).hexdigest(),
+ size=len(email["body"]),
tags=["bla"],
message_id="123",
- subject="Test Subject",
- sender="test@example.com",
- recipients=["test@example.com"],
+ subject=email["subject"],
+ sender=email["sender"],
+ recipients=email["recipients"],
folder="INBOX",
sent_at=datetime(2025, 1, 1, 12, 0, 0),
)
+ email_header = textwrap.dedent(
+ f"""
+ Subject: {email["subject"]}
+ From: {email["sender"]}
+ To: {", ".join(email["recipients"])}
+ Date: 2025-01-01T12:00:00
+ Body:
+ """
+ ).lstrip()
metadata = item.as_payload()
- metadata["tags"] = {"bla", "test@example.com"}
+ metadata["tags"] = {"bla", "john.doe@techcorp.com"} | set(email["recipients"])
expected = [
- (CHUNKS[0].strip(), [], metadata),
- (CHUNKS[1].strip(), [], metadata),
+ (email_header + CHUNKS[0].strip(), [], metadata),
+ (email_header + CHUNKS[1].strip().replace("—", "\\\\u2014"), [], metadata),
(
- "test summary",
+ email_header + "test summary",
[],
- metadata | {"tags": {"tag1", "tag2", "bla", "test@example.com"}},
+ metadata | {"tags": {"tag1", "tag2"} | metadata["tags"]},
),
]
@@ -166,7 +179,11 @@ def test_mail_message_embeddings(mock_voyage_client):
assert not mock_voyage_client.multimodal_embed.call_count
assert mock_voyage_client.embed.call_args == call(
- [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+ [
+ email_header + CHUNKS[0].strip(),
+ email_header + CHUNKS[1].strip().replace("—", "\\\\u2014"),
+ email_header + "test summary",
+ ],
model=settings.TEXT_EMBEDDING_MODEL,
input_type="document",
)
diff --git a/tests/memory/common/db/models/test_source_items.py b/tests/memory/common/db/models/test_source_items.py
index 8d838e4..a2f6bc9 100644
--- a/tests/memory/common/db/models/test_source_items.py
+++ b/tests/memory/common/db/models/test_source_items.py
@@ -183,13 +183,27 @@ Subject: Test Subject
Test Body Content"""
mail_message = MailMessage(
- sha256=b"test", content=email_content, message_id=""
+ sha256=b"test",
+ content=email_content,
+ message_id="",
+ sender="sender@example.com",
+ recipients=["recipient@example.com"],
+ subject="Test Subject",
+ size=1024,
+ sent_at=datetime(2023, 1, 1, 12, 0, 0),
)
- expected = (
- "\nSubject: Test Subject\nFrom: \nTo: \nDate: \nBody: \nTest Body Content\n"
- )
- assert mail_message.display_contents == expected
+ assert mail_message.display_contents == {
+ "content": "Test Body Content",
+ "date": "2023-01-01T12:00:00",
+ "filename": None,
+ "mime_type": None,
+ "size": 1024,
+ "subject": "Test Subject",
+ "sender": "sender@example.com",
+ "recipients": ["recipient@example.com"],
+ "tags": None,
+ }
@pytest.mark.parametrize(
diff --git a/tests/memory/parsers/test_email_parsers.py b/tests/memory/parsers/test_email_parsers.py
index 012dd54..dc1887d 100644
--- a/tests/memory/parsers/test_email_parsers.py
+++ b/tests/memory/parsers/test_email_parsers.py
@@ -246,12 +246,11 @@ def test_parse_simple_email():
"subject": "Test Subject",
"sender": "sender@example.com",
"recipients": ["recipient@example.com"],
- "body": "Test body content\n",
+ "body": "Test body content",
"attachments": [],
"sent_at": ANY,
"raw_email": msg.as_string(),
- "hash": b"\xed\xa0\x9b\xd4\t4\x06\xb9l\xa4\xb3*\xe4NpZ\x19\xc2\x9b\x87"
- + b"\xa6\x12\r\x7fS\xb6\xf1\xbe\x95\x9c\x99\xf1",
+ "hash": b"\xa8\x8c\xa9\x16\xae\xe7\x99\xca\xc9\xd1q\x8e\xcb\xfc5+ \x03aZLz\xea\xd2\x05\xb9B\xf1i\xde\xa6\xe2",
}
assert abs(result["sent_at"].timestamp() - test_date.timestamp()) < 86400 # type: ignore
diff --git a/tests/memory/workers/test_email.py b/tests/memory/workers/test_email.py
index f97aecf..9935efd 100644
--- a/tests/memory/workers/test_email.py
+++ b/tests/memory/workers/test_email.py
@@ -1,5 +1,6 @@
import base64
import pathlib
+import textwrap
from datetime import datetime
from typing import cast
from unittest.mock import MagicMock, patch
@@ -100,12 +101,9 @@ def test_process_attachment_disk(attachment_size, max_inline_size, message_id):
assert result is not None
assert not cast(str, result.content)
- assert cast(str, result.filename) == str(
- settings.FILE_STORAGE_DIR
- / "emails"
- / "sender_example_com"
- / "INBOX"
- / "test_with_special_chars.txt"
+ assert (
+ cast(str, result.filename)
+ == "emails/sender_example_com/INBOX/test_with_special_chars.txt"
)
@@ -183,13 +181,7 @@ def test_process_attachments_mixed():
assert cast(str, results[2].content) == "c" * 30
# Verify large attachment has a path
- assert cast(str, results[1].filename) == str(
- settings.FILE_STORAGE_DIR
- / "emails"
- / "sender_example_com"
- / "INBOX"
- / "large.txt"
- )
+ assert cast(str, results[1].filename) == "emails/sender_example_com/INBOX/large.txt"
def test_extract_email_uid_valid():
@@ -256,8 +248,19 @@ def test_create_mail_message(db_session):
assert cast(list[str], mail_message.recipients) == ["recipient@example.com"]
assert mail_message.sent_at.isoformat()[:-6] == "2023-01-01T12:00:00"
assert cast(str, mail_message.content) == raw_email
- assert mail_message.body == "Test body content\n"
+ assert mail_message.body == "Test body content"
assert mail_message.attachments == attachments
+ assert mail_message.display_contents == {
+ "content": "Test body content",
+ "subject": "Test Subject",
+ "sender": "sender@example.com",
+ "recipients": ["recipient@example.com"],
+ "date": "2023-01-01T12:00:00+00:00",
+ "mime_type": "message/rfc822",
+ "size": 412,
+ "tags": ["test"],
+ "filename": None,
+ }
def test_fetch_email(email_provider):