mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-29 07:34:43 +02:00
safe attachments to disk
This commit is contained in:
parent
128f8e3d64
commit
14aa6ff9be
@ -13,7 +13,7 @@ import pathlib
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from memory.common.db.models import EmailAccount, MailMessage, SourceItem
|
from memory.common.db.models import EmailAccount, MailMessage, SourceItem
|
||||||
from memory.common.settings import FILE_STORAGE_DIR, MAX_INLINE_ATTACHMENT_SIZE
|
from memory.common import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -151,16 +151,18 @@ def process_attachment(attachment: Attachment, message_id: str) -> Attachment |
|
|||||||
Returns:
|
Returns:
|
||||||
Processed attachment dictionary with appropriate metadata
|
Processed attachment dictionary with appropriate metadata
|
||||||
"""
|
"""
|
||||||
if attachment["size"] <= MAX_INLINE_ATTACHMENT_SIZE:
|
if not (content := attachment.get("content")):
|
||||||
attachment["content"] = base64.b64encode(attachment["content"]).decode('utf-8')
|
|
||||||
return attachment
|
return attachment
|
||||||
|
|
||||||
|
if attachment["size"] <= settings.MAX_INLINE_ATTACHMENT_SIZE:
|
||||||
|
return {**attachment, "content": base64.b64encode(content).decode('utf-8')}
|
||||||
|
|
||||||
safe_message_id = re.sub(r'[<>\s:/\\]', '_', message_id)
|
safe_message_id = re.sub(r'[<>\s:/\\]', '_', message_id)
|
||||||
unique_id = str(uuid.uuid4())[:8]
|
unique_id = str(uuid.uuid4())[:8]
|
||||||
safe_filename = re.sub(r'[/\\]', '_', attachment["filename"])
|
safe_filename = re.sub(r'[/\\]', '_', attachment["filename"])
|
||||||
|
|
||||||
# Create user subdirectory
|
# Create user subdirectory
|
||||||
user_dir = FILE_STORAGE_DIR / safe_message_id
|
user_dir = settings.FILE_STORAGE_DIR / safe_message_id
|
||||||
user_dir.mkdir(parents=True, exist_ok=True)
|
user_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Final path for the attachment
|
# Final path for the attachment
|
||||||
@ -168,9 +170,8 @@ def process_attachment(attachment: Attachment, message_id: str) -> Attachment |
|
|||||||
|
|
||||||
# Write the file
|
# Write the file
|
||||||
try:
|
try:
|
||||||
file_path.write_bytes(attachment["content"])
|
file_path.write_bytes(content)
|
||||||
attachment["path"] = file_path
|
return {**attachment, "path": file_path}
|
||||||
return attachment
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to save attachment {safe_filename} to disk: {str(e)}")
|
logger.error(f"Failed to save attachment {safe_filename} to disk: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
@ -2,13 +2,17 @@ import email
|
|||||||
import email.mime.multipart
|
import email.mime.multipart
|
||||||
import email.mime.text
|
import email.mime.text
|
||||||
import email.mime.base
|
import email.mime.base
|
||||||
|
import base64
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
from unittest.mock import ANY, MagicMock, patch
|
from unittest.mock import ANY, MagicMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
import imaplib
|
|
||||||
from memory.common.db.models import SourceItem
|
from memory.common.db.models import SourceItem
|
||||||
from memory.common.db.models import MailMessage, EmailAccount
|
from memory.common.db.models import MailMessage, EmailAccount
|
||||||
|
from memory.common import settings
|
||||||
from memory.workers.email import (
|
from memory.workers.email import (
|
||||||
compute_message_hash,
|
compute_message_hash,
|
||||||
create_source_item,
|
create_source_item,
|
||||||
@ -23,8 +27,10 @@ from memory.workers.email import (
|
|||||||
fetch_email,
|
fetch_email,
|
||||||
fetch_email_since,
|
fetch_email_since,
|
||||||
process_folder,
|
process_folder,
|
||||||
|
process_attachment,
|
||||||
|
process_attachments,
|
||||||
)
|
)
|
||||||
from tests.providers.email_provider import MockEmailProvider
|
|
||||||
|
|
||||||
|
|
||||||
# Use a simple counter to generate unique message IDs without calling make_msgid
|
# Use a simple counter to generate unique message IDs without calling make_msgid
|
||||||
@ -57,11 +63,13 @@ def create_email_message(
|
|||||||
|
|
||||||
if attachments:
|
if attachments:
|
||||||
for attachment in attachments:
|
for attachment in attachments:
|
||||||
attachment_part = email.mime.base.MIMEBase("application", "octet-stream")
|
attachment_part = email.mime.base.MIMEBase(
|
||||||
|
"application", "octet-stream"
|
||||||
|
)
|
||||||
attachment_part.set_payload(attachment["content"])
|
attachment_part.set_payload(attachment["content"])
|
||||||
attachment_part.add_header(
|
attachment_part.add_header(
|
||||||
"Content-Disposition",
|
"Content-Disposition",
|
||||||
f"attachment; filename={attachment['filename']}"
|
f"attachment; filename={attachment['filename']}",
|
||||||
)
|
)
|
||||||
msg.attach(attachment_part)
|
msg.attach(attachment_part)
|
||||||
else:
|
else:
|
||||||
@ -89,41 +97,31 @@ def create_email_message(
|
|||||||
"to_addr, cc_addr, bcc_addr, expected",
|
"to_addr, cc_addr, bcc_addr, expected",
|
||||||
[
|
[
|
||||||
# Single recipient in To field
|
# Single recipient in To field
|
||||||
(
|
("recipient@example.com", None, None, ["recipient@example.com"]),
|
||||||
"recipient@example.com",
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
["recipient@example.com"]
|
|
||||||
),
|
|
||||||
# Multiple recipients in To field
|
# Multiple recipients in To field
|
||||||
(
|
(
|
||||||
"recipient1@example.com, recipient2@example.com",
|
"recipient1@example.com, recipient2@example.com",
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
["recipient1@example.com", "recipient2@example.com"]
|
["recipient1@example.com", "recipient2@example.com"],
|
||||||
),
|
),
|
||||||
# To, Cc fields
|
# To, Cc fields
|
||||||
(
|
(
|
||||||
"recipient@example.com",
|
"recipient@example.com",
|
||||||
"cc@example.com",
|
"cc@example.com",
|
||||||
None,
|
None,
|
||||||
["recipient@example.com", "cc@example.com"]
|
["recipient@example.com", "cc@example.com"],
|
||||||
),
|
),
|
||||||
# To, Cc, Bcc fields
|
# To, Cc, Bcc fields
|
||||||
(
|
(
|
||||||
"recipient@example.com",
|
"recipient@example.com",
|
||||||
"cc@example.com",
|
"cc@example.com",
|
||||||
"bcc@example.com",
|
"bcc@example.com",
|
||||||
["recipient@example.com", "cc@example.com", "bcc@example.com"]
|
["recipient@example.com", "cc@example.com", "bcc@example.com"],
|
||||||
),
|
),
|
||||||
# Empty fields
|
# Empty fields
|
||||||
(
|
("", "", "", []),
|
||||||
"",
|
],
|
||||||
"",
|
|
||||||
"",
|
|
||||||
[]
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
def test_extract_recipients(to_addr, cc_addr, bcc_addr, expected):
|
def test_extract_recipients(to_addr, cc_addr, bcc_addr, expected):
|
||||||
msg = create_email_message(to_addrs=to_addr, cc_addrs=cc_addr, bcc_addrs=bcc_addr)
|
msg = create_email_message(to_addrs=to_addr, cc_addrs=cc_addr, bcc_addrs=bcc_addr)
|
||||||
@ -143,7 +141,7 @@ def test_extract_date_missing():
|
|||||||
"Monday, Jan 1, 2023", # Descriptive but not RFC compliant
|
"Monday, Jan 1, 2023", # Descriptive but not RFC compliant
|
||||||
"01/01/2023", # Common format but not RFC compliant
|
"01/01/2023", # Common format but not RFC compliant
|
||||||
"", # Empty string
|
"", # Empty string
|
||||||
]
|
],
|
||||||
)
|
)
|
||||||
def test_extract_date_invalid_formats(date_str):
|
def test_extract_date_invalid_formats(date_str):
|
||||||
msg = create_email_message()
|
msg = create_email_message()
|
||||||
@ -157,7 +155,7 @@ def test_extract_date_invalid_formats(date_str):
|
|||||||
"Mon, 01 Jan 2023 12:00:00 +0000", # RFC 5322 format
|
"Mon, 01 Jan 2023 12:00:00 +0000", # RFC 5322 format
|
||||||
"01 Jan 2023 12:00:00 +0000", # RFC 822 format
|
"01 Jan 2023 12:00:00 +0000", # RFC 822 format
|
||||||
"Mon, 01 Jan 2023 12:00:00 GMT", # With timezone name
|
"Mon, 01 Jan 2023 12:00:00 GMT", # With timezone name
|
||||||
]
|
],
|
||||||
)
|
)
|
||||||
def test_extract_date(date_str):
|
def test_extract_date(date_str):
|
||||||
msg = create_email_message()
|
msg = create_email_message()
|
||||||
@ -170,7 +168,7 @@ def test_extract_date(date_str):
|
|||||||
assert result.day == 1
|
assert result.day == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('multipart', [True, False])
|
@pytest.mark.parametrize("multipart", [True, False])
|
||||||
def test_extract_body_text_plain(multipart):
|
def test_extract_body_text_plain(multipart):
|
||||||
body_content = "This is a test email body"
|
body_content = "This is a test email body"
|
||||||
msg = create_email_message(body=body_content, multipart=multipart)
|
msg = create_email_message(body=body_content, multipart=multipart)
|
||||||
@ -182,9 +180,7 @@ def test_extract_body_text_plain(multipart):
|
|||||||
|
|
||||||
def test_extract_body_with_attachments():
|
def test_extract_body_with_attachments():
|
||||||
body_content = "This is a test email body"
|
body_content = "This is a test email body"
|
||||||
attachments = [
|
attachments = [{"filename": "test.txt", "content": b"attachment content"}]
|
||||||
{"filename": "test.txt", "content": b"attachment content"}
|
|
||||||
]
|
|
||||||
msg = create_email_message(body=body_content, attachments=attachments)
|
msg = create_email_message(body=body_content, attachments=attachments)
|
||||||
assert body_content in extract_body(msg)
|
assert body_content in extract_body(msg)
|
||||||
|
|
||||||
@ -197,7 +193,7 @@ def test_extract_attachments_none():
|
|||||||
def test_extract_attachments_with_files():
|
def test_extract_attachments_with_files():
|
||||||
attachments = [
|
attachments = [
|
||||||
{"filename": "test1.txt", "content": b"content1"},
|
{"filename": "test1.txt", "content": b"content1"},
|
||||||
{"filename": "test2.pdf", "content": b"content2"}
|
{"filename": "test2.pdf", "content": b"content2"},
|
||||||
]
|
]
|
||||||
msg = create_email_message(attachments=attachments)
|
msg = create_email_message(attachments=attachments)
|
||||||
|
|
||||||
@ -212,6 +208,130 @@ def test_extract_attachments_non_multipart():
|
|||||||
assert extract_attachments(msg) == []
|
assert extract_attachments(msg) == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"attachment_size, max_inline_size, message_id",
|
||||||
|
[
|
||||||
|
# Small attachment, should be base64 encoded and returned inline
|
||||||
|
(100, 1000, "<test@example.com>"),
|
||||||
|
# Edge case: exactly at max size, should be base64 encoded
|
||||||
|
(100, 100, "<test@example.com>"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_process_attachment_inline(attachment_size, max_inline_size, message_id):
|
||||||
|
attachment = {
|
||||||
|
"filename": "test.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": attachment_size,
|
||||||
|
"content": b"a" * attachment_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(settings, "MAX_INLINE_ATTACHMENT_SIZE", max_inline_size):
|
||||||
|
result = process_attachment(attachment, message_id)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
# For inline attachments, content should be base64 encoded string
|
||||||
|
assert isinstance(result["content"], str)
|
||||||
|
# Decode the base64 string and compare with the original content
|
||||||
|
decoded_content = base64.b64decode(result["content"].encode('utf-8'))
|
||||||
|
assert decoded_content == attachment["content"]
|
||||||
|
assert "path" not in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"attachment_size, max_inline_size, message_id",
|
||||||
|
[
|
||||||
|
# Large attachment, should be saved to disk
|
||||||
|
(1000, 100, "<test@example.com>"),
|
||||||
|
# Message ID with special characters that need escaping
|
||||||
|
(1000, 100, "<test/with:special\\chars>"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_process_attachment_disk(attachment_size, max_inline_size, message_id):
|
||||||
|
attachment = {
|
||||||
|
"filename": "test.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": attachment_size,
|
||||||
|
"content": b"a" * attachment_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(settings, "MAX_INLINE_ATTACHMENT_SIZE", max_inline_size):
|
||||||
|
result = process_attachment(attachment, message_id)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
# For disk-stored attachments, content should not be modified and path should be set
|
||||||
|
assert "path" in result
|
||||||
|
assert isinstance(result["path"], pathlib.Path)
|
||||||
|
|
||||||
|
# Verify the path contains safe message ID
|
||||||
|
safe_message_id = re.sub(r"[<>\s:/\\]", "_", message_id)
|
||||||
|
assert safe_message_id in str(result["path"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_attachment_write_error():
|
||||||
|
# Create test attachment
|
||||||
|
attachment = {
|
||||||
|
"filename": "test_error.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": 100,
|
||||||
|
"content": b"a" * 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mock write_bytes to raise an exception
|
||||||
|
def mock_write_bytes(self, content):
|
||||||
|
raise IOError("Test write error")
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(settings, "MAX_INLINE_ATTACHMENT_SIZE", 10),
|
||||||
|
patch.object(pathlib.Path, "write_bytes", mock_write_bytes),
|
||||||
|
):
|
||||||
|
assert process_attachment(attachment, "<test@example.com>") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_attachments_empty():
|
||||||
|
assert process_attachments([], "<test@example.com>") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_attachments_mixed():
|
||||||
|
# Create test attachments
|
||||||
|
attachments = [
|
||||||
|
# Small attachment - should be kept inline
|
||||||
|
{
|
||||||
|
"filename": "small.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": 20,
|
||||||
|
"content": b"a" * 20,
|
||||||
|
},
|
||||||
|
# Large attachment - should be stored on disk
|
||||||
|
{
|
||||||
|
"filename": "large.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": 100,
|
||||||
|
"content": b"b" * 100,
|
||||||
|
},
|
||||||
|
# Another small attachment
|
||||||
|
{
|
||||||
|
"filename": "another_small.txt",
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"size": 30,
|
||||||
|
"content": b"c" * 30,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(settings, "MAX_INLINE_ATTACHMENT_SIZE", 50):
|
||||||
|
# Process attachments
|
||||||
|
results = process_attachments(attachments, "<test@example.com>")
|
||||||
|
|
||||||
|
# Verify we have all attachments processed
|
||||||
|
assert len(results) == 3
|
||||||
|
|
||||||
|
# Verify small attachments are base64 encoded
|
||||||
|
assert isinstance(results[0]["content"], str)
|
||||||
|
assert isinstance(results[2]["content"], str)
|
||||||
|
|
||||||
|
# Verify large attachment has a path
|
||||||
|
assert "path" in results[1]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"msg_id, subject, sender, body, expected",
|
"msg_id, subject, sender, body, expected",
|
||||||
[
|
[
|
||||||
@ -220,16 +340,16 @@ def test_extract_attachments_non_multipart():
|
|||||||
"Test Subject",
|
"Test Subject",
|
||||||
"sender@example.com",
|
"sender@example.com",
|
||||||
"Test body",
|
"Test body",
|
||||||
b"\xf2\xbd" # First two bytes of the actual hash
|
b"\xf2\xbd", # First two bytes of the actual hash
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"<different@example.com>",
|
"<different@example.com>",
|
||||||
"Test Subject",
|
"Test Subject",
|
||||||
"sender@example.com",
|
"sender@example.com",
|
||||||
"Test body",
|
"Test body",
|
||||||
b"\xa4\x15" # Will be different from the first hash
|
b"\xa4\x15", # Will be different from the first hash
|
||||||
),
|
),
|
||||||
]
|
],
|
||||||
)
|
)
|
||||||
def test_compute_message_hash(msg_id, subject, sender, body, expected):
|
def test_compute_message_hash(msg_id, subject, sender, body, expected):
|
||||||
result = compute_message_hash(msg_id, subject, sender, body)
|
result = compute_message_hash(msg_id, subject, sender, body)
|
||||||
@ -256,7 +376,7 @@ def test_parse_simple_email():
|
|||||||
to_addrs="recipient@example.com",
|
to_addrs="recipient@example.com",
|
||||||
date=test_date,
|
date=test_date,
|
||||||
body="Test body content",
|
body="Test body content",
|
||||||
message_id=msg_id
|
message_id=msg_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = parse_email_message(msg.as_string())
|
result = parse_email_message(msg.as_string())
|
||||||
@ -274,9 +394,7 @@ def test_parse_simple_email():
|
|||||||
|
|
||||||
|
|
||||||
def test_parse_email_with_attachments():
|
def test_parse_email_with_attachments():
|
||||||
attachments = [
|
attachments = [{"filename": "test.txt", "content": b"attachment content"}]
|
||||||
{"filename": "test.txt", "content": b"attachment content"}
|
|
||||||
]
|
|
||||||
msg = create_email_message(attachments=attachments)
|
msg = create_email_message(attachments=attachments)
|
||||||
|
|
||||||
result = parse_email_message(msg.as_string())
|
result = parse_email_message(msg.as_string())
|
||||||
@ -286,24 +404,24 @@ def test_parse_email_with_attachments():
|
|||||||
|
|
||||||
|
|
||||||
def test_extract_email_uid_valid():
|
def test_extract_email_uid_valid():
|
||||||
msg_data = [(b'1 (UID 12345 RFC822 {1234}', b'raw email content')]
|
msg_data = [(b"1 (UID 12345 RFC822 {1234}", b"raw email content")]
|
||||||
uid, raw_email = extract_email_uid(msg_data)
|
uid, raw_email = extract_email_uid(msg_data)
|
||||||
|
|
||||||
assert uid == "12345"
|
assert uid == "12345"
|
||||||
assert raw_email == b'raw email content'
|
assert raw_email == b"raw email content"
|
||||||
|
|
||||||
|
|
||||||
def test_extract_email_uid_no_match():
|
def test_extract_email_uid_no_match():
|
||||||
msg_data = [(b'1 (RFC822 {1234}', b'raw email content')]
|
msg_data = [(b"1 (RFC822 {1234}", b"raw email content")]
|
||||||
uid, raw_email = extract_email_uid(msg_data)
|
uid, raw_email = extract_email_uid(msg_data)
|
||||||
|
|
||||||
assert uid is None
|
assert uid is None
|
||||||
assert raw_email == b'raw email content'
|
assert raw_email == b"raw email content"
|
||||||
|
|
||||||
|
|
||||||
def test_create_source_item(db_session):
|
def test_create_source_item(db_session):
|
||||||
# Mock data
|
# Mock data
|
||||||
message_hash = b'test_hash_bytes' + bytes(28) # 32 bytes for SHA-256
|
message_hash = b"test_hash_bytes" + bytes(28) # 32 bytes for SHA-256
|
||||||
account_tags = ["work", "important"]
|
account_tags = ["work", "important"]
|
||||||
raw_email_size = 1024
|
raw_email_size = 1024
|
||||||
|
|
||||||
@ -312,7 +430,7 @@ def test_create_source_item(db_session):
|
|||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
message_hash=message_hash,
|
message_hash=message_hash,
|
||||||
account_tags=account_tags,
|
account_tags=account_tags,
|
||||||
raw_email_size=raw_email_size
|
raw_email_size=raw_email_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify the source item was created correctly
|
# Verify the source item was created correctly
|
||||||
@ -339,61 +457,59 @@ def test_create_source_item(db_session):
|
|||||||
(
|
(
|
||||||
lambda db: (
|
lambda db: (
|
||||||
# First create source_item to satisfy foreign key constraint
|
# First create source_item to satisfy foreign key constraint
|
||||||
db.add(SourceItem(
|
db.add(
|
||||||
|
SourceItem(
|
||||||
id=1,
|
id=1,
|
||||||
modality="mail",
|
modality="mail",
|
||||||
sha256=b'some_hash_bytes' + bytes(28),
|
sha256=b"some_hash_bytes" + bytes(28),
|
||||||
tags=["test"],
|
tags=["test"],
|
||||||
byte_length=100,
|
byte_length=100,
|
||||||
mime_type="message/rfc822",
|
mime_type="message/rfc822",
|
||||||
embed_status="RAW"
|
embed_status="RAW",
|
||||||
)),
|
)
|
||||||
|
),
|
||||||
db.flush(),
|
db.flush(),
|
||||||
# Then create mail_message
|
# Then create mail_message
|
||||||
db.add(MailMessage(
|
db.add(
|
||||||
|
MailMessage(
|
||||||
source_id=1,
|
source_id=1,
|
||||||
message_id="<test@example.com>",
|
message_id="<test@example.com>",
|
||||||
subject="Test",
|
subject="Test",
|
||||||
sender="test@example.com",
|
sender="test@example.com",
|
||||||
recipients=["recipient@example.com"],
|
recipients=["recipient@example.com"],
|
||||||
body_raw="Test body"
|
body_raw="Test body",
|
||||||
))
|
)
|
||||||
|
),
|
||||||
),
|
),
|
||||||
"<test@example.com>",
|
"<test@example.com>",
|
||||||
b"unmatched_hash",
|
b"unmatched_hash",
|
||||||
True
|
True,
|
||||||
),
|
),
|
||||||
# Test by non-existent message ID
|
# Test by non-existent message ID
|
||||||
(
|
(lambda db: None, "<nonexistent@example.com>", b"unmatched_hash", False),
|
||||||
lambda db: None,
|
|
||||||
"<nonexistent@example.com>",
|
|
||||||
b"unmatched_hash",
|
|
||||||
False
|
|
||||||
),
|
|
||||||
# Test by hash
|
# Test by hash
|
||||||
(
|
(
|
||||||
lambda db: db.add(SourceItem(
|
lambda db: db.add(
|
||||||
|
SourceItem(
|
||||||
modality="mail",
|
modality="mail",
|
||||||
sha256=b'test_hash_bytes' + bytes(28),
|
sha256=b"test_hash_bytes" + bytes(28),
|
||||||
tags=["test"],
|
tags=["test"],
|
||||||
byte_length=100,
|
byte_length=100,
|
||||||
mime_type="message/rfc822",
|
mime_type="message/rfc822",
|
||||||
embed_status="RAW"
|
embed_status="RAW",
|
||||||
)),
|
)
|
||||||
|
),
|
||||||
"",
|
"",
|
||||||
b'test_hash_bytes' + bytes(28),
|
b"test_hash_bytes" + bytes(28),
|
||||||
True
|
True,
|
||||||
),
|
),
|
||||||
# Test by non-existent hash
|
# Test by non-existent hash
|
||||||
(
|
(lambda db: None, "", b"different_hash_" + bytes(28), False),
|
||||||
lambda db: None,
|
],
|
||||||
"",
|
|
||||||
b'different_hash_' + bytes(28),
|
|
||||||
False
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
def test_check_message_exists(db_session, setup_db, message_id, message_hash, expected_exists):
|
def test_check_message_exists(
|
||||||
|
db_session, setup_db, message_id, message_hash, expected_exists
|
||||||
|
):
|
||||||
# Setup test data
|
# Setup test data
|
||||||
if setup_db:
|
if setup_db:
|
||||||
setup_db(db_session)
|
setup_db(db_session)
|
||||||
@ -412,7 +528,9 @@ def test_create_mail_message(db_session):
|
|||||||
"recipients": ["recipient@example.com"],
|
"recipients": ["recipient@example.com"],
|
||||||
"sent_at": datetime(2023, 1, 1, 12, 0, 0),
|
"sent_at": datetime(2023, 1, 1, 12, 0, 0),
|
||||||
"body": "Test body content",
|
"body": "Test body content",
|
||||||
"attachments": [{"filename": "test.txt", "content_type": "text/plain", "size": 100}]
|
"attachments": [
|
||||||
|
{"filename": "test.txt", "content_type": "text/plain", "size": 100}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
folder = "INBOX"
|
folder = "INBOX"
|
||||||
|
|
||||||
@ -421,7 +539,7 @@ def test_create_mail_message(db_session):
|
|||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
source_id=source_id,
|
source_id=source_id,
|
||||||
parsed_email=parsed_email,
|
parsed_email=parsed_email,
|
||||||
folder=folder
|
folder=folder,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify the mail message was created correctly
|
# Verify the mail message was created correctly
|
||||||
@ -433,7 +551,10 @@ def test_create_mail_message(db_session):
|
|||||||
assert mail_message.recipients == parsed_email["recipients"]
|
assert mail_message.recipients == parsed_email["recipients"]
|
||||||
assert mail_message.sent_at == parsed_email["sent_at"]
|
assert mail_message.sent_at == parsed_email["sent_at"]
|
||||||
assert mail_message.body_raw == parsed_email["body"]
|
assert mail_message.body_raw == parsed_email["body"]
|
||||||
assert mail_message.attachments == {"items": parsed_email["attachments"], "folder": folder}
|
assert mail_message.attachments == {
|
||||||
|
"items": parsed_email["attachments"],
|
||||||
|
"folder": folder,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_email(email_provider):
|
def test_fetch_email(email_provider):
|
||||||
@ -466,37 +587,33 @@ def test_fetch_email_since(email_provider):
|
|||||||
assert uids == ["101", "102"]
|
assert uids == ["101", "102"]
|
||||||
|
|
||||||
# Test with a folder that doesn't exist
|
# Test with a folder that doesn't exist
|
||||||
result = fetch_email_since(email_provider, "NonExistentFolder", datetime(1970, 1, 1))
|
result = fetch_email_since(
|
||||||
|
email_provider, "NonExistentFolder", datetime(1970, 1, 1)
|
||||||
|
)
|
||||||
assert result == []
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
@patch('memory.workers.tasks.email.process_message.delay')
|
def test_process_folder(email_provider):
|
||||||
def test_process_folder(mock_process_message_delay, email_provider):
|
|
||||||
account = MagicMock(spec=EmailAccount)
|
account = MagicMock(spec=EmailAccount)
|
||||||
account.id = 123
|
account.id = 123
|
||||||
account.tags = ["test"]
|
account.tags = ["test"]
|
||||||
|
|
||||||
results = process_folder(email_provider, "INBOX", account, datetime(1970, 1, 1), mock_process_message_delay)
|
results = process_folder(
|
||||||
|
email_provider, "INBOX", account, datetime(1970, 1, 1), MagicMock()
|
||||||
|
)
|
||||||
|
|
||||||
assert results == {
|
assert results == {"messages_found": 2, "new_messages": 2, "errors": 0}
|
||||||
"messages_found": 2,
|
|
||||||
"new_messages": 2,
|
|
||||||
"errors": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@patch('memory.workers.tasks.email.process_message.delay')
|
def test_process_folder_no_emails(email_provider):
|
||||||
def test_process_folder_no_emails(mock_process_message_delay, email_provider):
|
|
||||||
account = MagicMock(spec=EmailAccount)
|
account = MagicMock(spec=EmailAccount)
|
||||||
account.id = 123
|
account.id = 123
|
||||||
email_provider.search = MagicMock(return_value=("OK", [b'']))
|
email_provider.search = MagicMock(return_value=("OK", [b""]))
|
||||||
|
|
||||||
result = process_folder(email_provider, "Empty", account, datetime(1970, 1, 1), mock_process_message_delay)
|
result = process_folder(
|
||||||
assert result == {
|
email_provider, "Empty", account, datetime(1970, 1, 1), MagicMock()
|
||||||
"messages_found": 0,
|
)
|
||||||
"new_messages": 0,
|
assert result == {"messages_found": 0, "new_messages": 0, "errors": 0}
|
||||||
"errors": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def test_process_folder_error(email_provider):
|
def test_process_folder_error(email_provider):
|
||||||
@ -510,10 +627,7 @@ def test_process_folder_error(email_provider):
|
|||||||
|
|
||||||
email_provider.search = raise_exception
|
email_provider.search = raise_exception
|
||||||
|
|
||||||
result = process_folder(email_provider, "INBOX", account, datetime(1970, 1, 1), mock_processor)
|
result = process_folder(
|
||||||
assert result == {
|
email_provider, "INBOX", account, datetime(1970, 1, 1), mock_processor
|
||||||
"messages_found": 0,
|
)
|
||||||
"new_messages": 0,
|
assert result == {"messages_found": 0, "new_messages": 0, "errors": 0}
|
||||||
"errors": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user