email provider

This commit is contained in:
Daniel O'Connell 2025-04-27 19:03:50 +02:00
parent d3117a4e6a
commit 03b5c908ee
7 changed files with 455 additions and 192 deletions

View File

@ -1,21 +1,16 @@
""" """
Database connection utilities. Database connection utilities.
""" """
import os from contextlib import contextmanager
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from memory.common import settings
def get_engine(): def get_engine():
"""Create SQLAlchemy engine from environment variables""" """Create SQLAlchemy engine from environment variables"""
user = os.getenv("POSTGRES_USER", "kb") return create_engine(settings.DB_URL)
password = os.getenv("POSTGRES_PASSWORD", "kb")
host = os.getenv("POSTGRES_HOST", "postgres")
port = os.getenv("POSTGRES_PORT", "5432")
db = os.getenv("POSTGRES_DB", "kb")
return create_engine(f"postgresql://{user}:{password}@{host}:{port}/{db}")
def get_session_factory(): def get_session_factory():
@ -32,6 +27,20 @@ def get_scoped_session():
return scoped_session(session_factory) return scoped_session(session_factory)
@contextmanager
def make_session(): def make_session():
with get_scoped_session() as session: """
Context manager for database sessions.
Yields:
SQLAlchemy session that will be automatically closed
"""
session = get_scoped_session()
try:
yield session yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.remove()

View File

@ -6,7 +6,7 @@ import re
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from email.utils import parsedate_to_datetime from email.utils import parsedate_to_datetime
from typing import Generator from typing import Generator, Callable
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from memory.common.db.models import EmailAccount, MailMessage, SourceItem from memory.common.db.models import EmailAccount, MailMessage, SourceItem
@ -85,13 +85,13 @@ def extract_body(msg: email.message.Message) -> str:
def extract_attachments(msg: email.message.Message) -> list[dict]: def extract_attachments(msg: email.message.Message) -> list[dict]:
""" """
Extract attachment metadata from email. Extract attachment metadata and content from email.
Args: Args:
msg: Email message object msg: Email message object
Returns: Returns:
List of attachment metadata dicts List of attachment dictionaries with metadata and content
""" """
if not msg.is_multipart(): if not msg.is_multipart():
return [] return []
@ -103,11 +103,16 @@ def extract_attachments(msg: email.message.Message) -> list[dict]:
continue continue
if filename := part.get_filename(): if filename := part.get_filename():
try:
content = part.get_payload(decode=True)
attachments.append({ attachments.append({
"filename": filename, "filename": filename,
"content_type": part.get_content_type(), "content_type": part.get_content_type(),
"size": len(part.get_payload(decode=True)) "size": len(content),
"content": content
}) })
except Exception as e:
logger.error(f"Error extracting attachment content for {filename}: {str(e)}")
return attachments return attachments
@ -265,7 +270,7 @@ def fetch_email(conn: imaplib.IMAP4_SSL, uid: str) -> tuple[str, bytes] | None:
def fetch_email_since( def fetch_email_since(
conn: imaplib.IMAP4_SSL, conn: imaplib.IMAP4_SSL,
folder: str, folder: str,
since_date: datetime since_date: datetime = datetime(1970, 1, 1)
) -> list[tuple[str, bytes]]: ) -> list[tuple[str, bytes]]:
""" """
Fetch emails from a folder since a given date. Fetch emails from a folder since a given date.
@ -304,7 +309,8 @@ def process_folder(
conn: imaplib.IMAP4_SSL, conn: imaplib.IMAP4_SSL,
folder: str, folder: str,
account: EmailAccount, account: EmailAccount,
since_date: datetime since_date: datetime,
processor: Callable[[int, str, str, bytes], int | None],
) -> dict: ) -> dict:
""" """
Process a single folder from an email account. Process a single folder from an email account.
@ -319,17 +325,14 @@ def process_folder(
Stats dictionary for the folder Stats dictionary for the folder
""" """
new_messages, errors = 0, 0 new_messages, errors = 0, 0
emails = [] # Initialize to avoid UnboundLocalError emails = []
try: try:
emails = fetch_email_since(conn, folder, since_date) emails = fetch_email_since(conn, folder, since_date)
for uid, raw_email in emails: for uid, raw_email in emails:
try: try:
# Import process_message here to avoid circular imports task = processor(
from memory.workers.tasks.email import process_message
task = process_message.delay(
account_id=account.id, account_id=account.id,
message_id=uid, message_id=uid,
folder=folder, folder=folder,

View File

@ -1,3 +1,4 @@
from datetime import datetime
import os import os
import subprocess import subprocess
import uuid import uuid
@ -8,6 +9,7 @@ from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from memory.common import settings from memory.common import settings
from tests.providers.email_provider import MockEmailProvider
def get_test_db_name() -> str: def get_test_db_name() -> str:
@ -53,7 +55,7 @@ def drop_test_database(test_db_name: str) -> None:
def run_alembic_migrations(db_name: str) -> None: def run_alembic_migrations(db_name: str) -> None:
"""Run all Alembic migrations on the test database.""" """Run all Alembic migrations on the test database."""
project_root = Path(__file__).parent.parent.parent.parent.parent project_root = Path(__file__).parent.parent
alembic_ini = project_root / "db" / "migrations" / "alembic.ini" alembic_ini = project_root / "db" / "migrations" / "alembic.ini"
subprocess.run( subprocess.run(
@ -126,3 +128,48 @@ def db_session(db_engine):
# Close and rollback the session after the test is done # Close and rollback the session after the test is done
session.rollback() session.rollback()
session.close() session.close()
@pytest.fixture
def email_provider():
return MockEmailProvider(
emails_by_folder={
"INBOX": [
{
"uid": 101,
"flags": "\\Seen",
"date": datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": datetime.now().strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "alice@example.com",
"to": "bob@example.com",
"subject": "Test Email 1",
"message_id": "<test-101@example.com>",
"body": "This is test email 1",
},
{
"uid": 102,
"flags": "",
"date": datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": datetime.now().strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "charlie@example.com",
"to": "bob@example.com",
"subject": "Test Email 2",
"message_id": "<test-102@example.com>",
"body": "This is test email 2",
},
],
"Archive": [
{
"uid": 201,
"flags": "\\Seen",
"date": datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": datetime.now().strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "dave@example.com",
"to": "bob@example.com",
"subject": "Archived Email",
"message_id": "<test-201@example.com>",
"body": "This is an archived email",
}
],
}
)

View File

@ -0,0 +1,106 @@
import pytest
from datetime import datetime, timedelta
from memory.common.db.models import EmailAccount
from memory.workers.tasks.email import process_message, sync_account, sync_all_accounts
# from ..email_provider import MockEmailProvider
@pytest.fixture
def sample_emails():
"""Fixture providing a sample set of test emails across different folders."""
now = datetime.now()
yesterday = now - timedelta(days=1)
last_week = now - timedelta(days=7)
return {
"INBOX": [
{
"uid": 101,
"flags": "\\Seen",
"date": now.strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": now.strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "alice@example.com",
"to": "bob@example.com",
"subject": "Recent Test Email",
"message_id": "<test-101@example.com>",
"body": "This is a recent test email"
},
{
"uid": 102,
"flags": "",
"date": yesterday.strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": yesterday.strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "charlie@example.com",
"to": "bob@example.com",
"subject": "Yesterday's Email",
"message_id": "<test-102@example.com>",
"body": "This email was sent yesterday"
}
],
"Sent": [
{
"uid": 201,
"flags": "\\Seen",
"date": yesterday.strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": yesterday.strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "bob@example.com",
"to": "alice@example.com",
"subject": "Re: Test Email",
"message_id": "<test-201@example.com>",
"body": "This is a reply to the test email"
}
],
"Archive": [
{
"uid": 301,
"flags": "\\Seen",
"date": last_week.strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": last_week.strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "david@example.com",
"to": "bob@example.com",
"subject": "Old Email",
"message_id": "<test-301@example.com>",
"body": "This is an old email from last week"
},
{
"uid": 302,
"flags": "\\Seen",
"date": last_week.strftime("%a, %d %b %Y %H:%M:%S +0000"),
"date_internal": last_week.strftime("%d-%b-%Y %H:%M:%S +0000"),
"from": "eve@example.com",
"to": "bob@example.com",
"subject": "Email with Attachment",
"message_id": "<test-302@example.com>",
"body": "This email has an attachment",
"attachments": [
{
"filename": "test.txt",
"maintype": "text",
"subtype": "plain",
"content": b"This is a test attachment"
}
]
}
]
}
@pytest.fixture
def test_email_account(db_session):
"""Create a test email account for integration testing."""
account = EmailAccount(
name="Test Account",
email_address="bob@example.com",
imap_server="imap.example.com",
imap_port=993,
username="bob@example.com",
password="password123",
use_ssl=True,
folders=["INBOX", "Sent", "Archive"],
tags=["test", "integration"],
active=True
)
db_session.add(account)
db_session.commit()
return account

View File

@ -23,8 +23,8 @@ from memory.workers.email import (
fetch_email, fetch_email,
fetch_email_since, fetch_email_since,
process_folder, process_folder,
imap_connection,
) )
from tests.providers.email_provider import MockEmailProvider
# Use a simple counter to generate unique message IDs without calling make_msgid # Use a simple counter to generate unique message IDs without calling make_msgid
@ -436,169 +436,84 @@ def test_create_mail_message(db_session):
assert mail_message.attachments == {"items": parsed_email["attachments"], "folder": folder} assert mail_message.attachments == {"items": parsed_email["attachments"], "folder": folder}
@pytest.mark.parametrize( def test_fetch_email(email_provider):
"fetch_return, fetch_side_effect, extract_uid_return, expected_result", # Configure the provider with sample emails
[ email_provider.select("INBOX")
# Success case
(('OK', ['mock_data']), None, ("12345", b'raw email content'), ("12345", b'raw email content')),
# IMAP error
(('NO', []), None, None, None),
# Exception case
(None, Exception("Test error"), None, None),
]
)
@patch('memory.workers.email.extract_email_uid')
def test_fetch_email(
mock_extract_email_uid, fetch_return, fetch_side_effect, extract_uid_return, expected_result
):
conn = MagicMock(spec=imaplib.IMAP4_SSL)
# Configure mocks # Test fetching an existing email
if fetch_side_effect: result = fetch_email(email_provider, "101")
conn.fetch.side_effect = fetch_side_effect
else:
conn.fetch.return_value = fetch_return
if extract_uid_return: # Verify result contains the expected UID and content
mock_extract_email_uid.return_value = extract_uid_return assert result is not None
uid, content = result
assert uid == "101"
assert b"This is test email 1" in content
uid = "12345" # Test fetching a non-existent email
result = fetch_email(email_provider, "999")
# Call function assert result is None
result = fetch_email(conn, uid)
# Verify expectations
assert result == expected_result
# Verify fetch was called if no exception
if not fetch_side_effect:
conn.fetch.assert_called_once_with(uid, '(UID RFC822)')
@pytest.mark.parametrize( def test_fetch_email_since(email_provider):
"select_return, search_return, select_side_effect, expected_calls, expected_result", # Fetch emails from INBOX folder
[ result = fetch_email_since(email_provider, "INBOX", datetime(1970, 1, 1))
# Successful case with multiple messages
(
('OK', [b'1']),
('OK', [b'1 2 3']),
None,
3,
[("1", b'email1'), ("2", b'email2'), ("3", b'email3')]
),
# No messages found case
(
('OK', [b'0']),
('OK', [b'']),
None,
0,
[]
),
# Error in select
(
('NO', [b'Error']),
None,
None,
0,
[]
),
# Error in search
(
('OK', [b'1']),
('NO', [b'Error']),
None,
0,
[]
),
# Exception in select
(
None,
None,
Exception("Test error"),
0,
[]
),
]
)
@patch('memory.workers.email.fetch_email')
def test_fetch_email_since(
mock_fetch_email, select_return, search_return, select_side_effect, expected_calls, expected_result
):
conn = MagicMock(spec=imaplib.IMAP4_SSL)
# Configure mocks based on parameters # Verify we got the expected number of emails
if select_side_effect: assert len(result) == 2
conn.select.side_effect = select_side_effect
else:
conn.select.return_value = select_return
if search_return: # Verify content of fetched emails
conn.search.return_value = search_return uids = sorted([uid for uid, _ in result])
assert uids == ["101", "102"]
# Configure fetch_email mock if needed # Test with a folder that doesn't exist
if expected_calls > 0: result = fetch_email_since(email_provider, "NonExistentFolder", datetime(1970, 1, 1))
mock_fetch_email.side_effect = [ assert result == []
(f"{i+1}", f"email{i+1}".encode()) for i in range(expected_calls)
]
folder = "INBOX"
since_date = datetime(2023, 1, 1)
result = fetch_email_since(conn, folder, since_date)
assert mock_fetch_email.call_count == expected_calls
assert result == expected_result
@patch('memory.workers.email.fetch_email_since')
def test_process_folder_error(mock_fetch_email_since):
# Setup
conn = MagicMock(spec=imaplib.IMAP4_SSL)
folder = "INBOX"
account = MagicMock(spec=EmailAccount)
since_date = datetime(2023, 1, 1)
# Test exception in fetch_email_since
mock_fetch_email_since.side_effect = Exception("Test error")
# Call function
result = process_folder(conn, folder, account, since_date)
# Verify
assert result["messages_found"] == 0
assert result["new_messages"] == 0
assert result["errors"] == 1
@patch('memory.workers.tasks.email.process_message.delay') @patch('memory.workers.tasks.email.process_message.delay')
@patch('memory.workers.email.fetch_email_since') def test_process_folder(mock_process_message_delay, email_provider):
def test_process_folder(mock_fetch_email_since, mock_process_message_delay):
conn = MagicMock(spec=imaplib.IMAP4_SSL)
folder = "INBOX"
account = MagicMock(spec=EmailAccount) account = MagicMock(spec=EmailAccount)
account.id = 123 account.id = 123
since_date = datetime(2023, 1, 1) account.tags = ["test"]
mock_fetch_email_since.return_value = [ results = process_folder(email_provider, "INBOX", account, datetime(1970, 1, 1), mock_process_message_delay)
("1", b'email1'),
("2", b'email2'),
]
mock_process_message_delay.return_value = MagicMock() assert results == {
"messages_found": 2,
"new_messages": 2,
"errors": 0
}
with patch('builtins.__import__', side_effect=__import__):
result = process_folder(conn, folder, account, since_date)
mock_fetch_email_since.assert_called_once_with(conn, folder, since_date) @patch('memory.workers.tasks.email.process_message.delay')
assert mock_process_message_delay.call_count == 2 def test_process_folder_no_emails(mock_process_message_delay, email_provider):
account = MagicMock(spec=EmailAccount)
account.id = 123
email_provider.search = MagicMock(return_value=("OK", [b'']))
mock_process_message_delay.assert_any_call( result = process_folder(email_provider, "Empty", account, datetime(1970, 1, 1), mock_process_message_delay)
account_id=account.id, assert result == {
message_id="1", "messages_found": 0,
folder=folder, "new_messages": 0,
raw_email='email1' "errors": 0
) }
def test_process_folder_error(email_provider):
account = MagicMock(spec=EmailAccount)
account.id = 123
mock_processor = MagicMock()
def raise_exception(*args):
raise Exception("Test error")
email_provider.search = raise_exception
result = process_folder(email_provider, "INBOX", account, datetime(1970, 1, 1), mock_processor)
assert result == {
"messages_found": 0,
"new_messages": 0,
"errors": 0
}
assert result["messages_found"] == 2
assert result["new_messages"] == 2
assert result["errors"] == 0

View File

View File

@ -0,0 +1,183 @@
import email
from datetime import datetime
from typing import Any
class MockEmailProvider:
"""
Mock IMAP email provider for integration testing.
Can be initialized with predefined emails to return.
"""
def __init__(self, emails_by_folder: dict[str, list[dict[str, Any]]] = None):
"""
Initialize with a dictionary of emails organized by folder.
Args:
emails_by_folder: A dictionary mapping folder names to lists of email dictionaries.
Each email dict should have: 'uid', 'flags', 'date', 'from', 'to', 'subject',
'message_id', 'body', and optionally 'attachments'.
"""
self.emails_by_folder = emails_by_folder or {
"INBOX": [],
"Sent": [],
"Archive": []
}
self.current_folder = None
self.is_connected = False
def _generate_email_string(self, email_data: dict[str, Any]) -> str:
"""Generate a raw email string from the provided email data."""
msg = email.message.EmailMessage()
msg["From"] = email_data.get("from", "sender@example.com")
msg["To"] = email_data.get("to", "recipient@example.com")
msg["Subject"] = email_data.get("subject", "Test Subject")
msg["Message-ID"] = email_data.get("message_id", f"<test-{email_data['uid']}@example.com>")
msg["Date"] = email_data.get("date", datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"))
# Set the body content
msg.set_content(email_data.get("body", f"This is test email body {email_data['uid']}"))
# Add attachments if present
for attachment in email_data.get("attachments", []):
if isinstance(attachment, dict) and "filename" in attachment and "content" in attachment:
msg.add_attachment(
attachment["content"],
maintype=attachment.get("maintype", "application"),
subtype=attachment.get("subtype", "octet-stream"),
filename=attachment["filename"]
)
return msg.as_string()
def login(self, username: str, password: str) -> tuple[str, list[bytes]]:
"""Mock login method."""
self.is_connected = True
return ('OK', [b'Login successful'])
def logout(self) -> tuple[str, list[bytes]]:
"""Mock logout method."""
self.is_connected = False
return ('OK', [b'Logout successful'])
def select(self, folder: str, readonly: bool = False) -> tuple[str, list[bytes]]:
"""
Select a folder and make it the current active folder.
Args:
folder: Folder name to select
readonly: Whether to open in readonly mode
Returns:
IMAP-style response with message count
"""
folder_name = folder.decode() if isinstance(folder, bytes) else folder
self.current_folder = folder_name
message_count = len(self.emails_by_folder.get(folder_name, []))
return ('OK', [str(message_count).encode()])
def list(self, directory: str = '', pattern: str = '*') -> tuple[str, list[bytes]]:
"""List available folders."""
folders = []
for folder in self.emails_by_folder.keys():
folders.append(f'(\\HasNoChildren) "/" "{folder}"'.encode())
return ('OK', folders)
def search(self, charset, criteria):
"""
Handle SEARCH command to find email UIDs.
Args:
charset: Character set (ignored in mock)
criteria: Search criteria (ignored in mock, we return all emails)
Returns:
All email UIDs in the current folder
"""
if not self.current_folder or self.current_folder not in self.emails_by_folder:
return ('OK', [b''])
uids = [str(email["uid"]).encode() for email in self.emails_by_folder[self.current_folder]]
return ('OK', [b' '.join(uids) if uids else b''])
def fetch(self, message_set, message_parts) -> tuple[str, list]:
"""
Handle FETCH command to retrieve email data.
Args:
message_set: Message numbers/UIDs to fetch
message_parts: Parts of the message to fetch
Returns:
Email data in IMAP format
"""
if not self.current_folder or self.current_folder not in self.emails_by_folder:
return ('OK', [None])
# For simplicity, we'll just match the UID with the ID provided
uid = int(message_set.decode() if isinstance(message_set, bytes) else message_set)
# Find the email with the matching UID
for email_data in self.emails_by_folder[self.current_folder]:
if email_data["uid"] == uid:
# Generate email content
email_string = self._generate_email_string(email_data)
flags = email_data.get("flags", "\\Seen")
date = email_data.get("date_internal", "01-Jan-2023 00:00:00 +0000")
# Format the response as expected by the IMAP client
response = [(
f'{uid} (UID {uid} FLAGS ({flags}) INTERNALDATE "{date}" RFC822 '
f'{{{len(email_string)}}}'.encode(),
email_string.encode()
)]
return ('OK', response)
# No matching email found
return ('NO', [b'Email not found'])
# def uid(self, command: str, *args) -> tuple[str, list]:
# """
# Handle UID-based commands like SEARCH and FETCH.
# Args:
# command: The IMAP command (SEARCH, FETCH, etc.)
# *args: Additional arguments for the command
# Returns:
# IMAP-style response appropriate for the command
# """
# if not self.current_folder or self.current_folder not in self.emails_by_folder:
# return ('OK', [b''])
# if command == 'SEARCH':
# # For simplicity, return all UIDs in the current folder
# # A real implementation would parse the search criteria
# uids = [str(email["uid"]).encode() for email in self.emails_by_folder[self.current_folder]]
# return ('OK', [b' '.join(uids) if uids else b''])
# elif command == 'FETCH':
# # Parse the UID from the arguments
# uid_arg = args[0].decode() if isinstance(args[0], bytes) else args[0]
# uid = int(uid_arg)
# # Find the email with the matching UID
# for email_data in self.emails_by_folder[self.current_folder]:
# if email_data["uid"] == uid:
# # Generate email content
# email_string = self._generate_email_string(email_data)
# flags = email_data.get("flags", "\\Seen")
# date = email_data.get("date_internal", "01-Jan-2023 00:00:00 +0000")
# # Format the response as expected by the IMAP client
# response = [(
# f'{uid} (UID {uid} FLAGS ({flags}) INTERNALDATE "{date}" RFC822 '
# f'{{{len(email_string)}}}'.encode(),
# email_string.encode()
# )]
# return ('OK', response)
# # No matching email found
# return ('NO', [b'Email not found'])
# return ('NO', [f'Command {command} not implemented'.encode()])