better handling of errors

This commit is contained in:
Daniel O'Connell 2025-05-27 22:39:24 +02:00
parent f5c3e458d7
commit 1291ca9d08
9 changed files with 39 additions and 32 deletions

View File

@ -212,12 +212,16 @@ def process_content_item(
- Commits database transaction
- Stores vectors in Qdrant
"""
status = "failed"
session.add(item)
session.flush()
chunks_count = embed_source_item(item)
session.flush()
if not chunks_count:
return create_task_result(item, status, content_length=getattr(item, "size", 0))
try:
push_to_qdrant([item], collection_name)
status = "processed"
@ -228,7 +232,6 @@ def process_content_item(
except Exception as e:
logger.error(f"Failed to push embeddings to Qdrant: {e}")
item.embed_status = "FAILED" # type: ignore
status = "failed"
session.commit()
return create_task_result(item, status, content_length=getattr(item, "size", 0))

View File

@ -1,9 +1,9 @@
from datetime import datetime
import os
import subprocess
from unittest.mock import patch
import uuid
from datetime import datetime
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
import qdrant_client
@ -207,10 +207,13 @@ def mock_file_storage(tmp_path: Path):
chunk_storage_dir.mkdir(parents=True, exist_ok=True)
image_storage_dir = tmp_path / "images"
image_storage_dir.mkdir(parents=True, exist_ok=True)
email_storage_dir = tmp_path / "emails"
email_storage_dir.mkdir(parents=True, exist_ok=True)
with (
patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
patch.object(settings, "CHUNK_STORAGE_DIR", chunk_storage_dir),
patch.object(settings, "WEBPAGE_STORAGE_DIR", image_storage_dir),
patch.object(settings, "EMAIL_STORAGE_DIR", email_storage_dir),
):
yield
@ -226,8 +229,11 @@ def qdrant():
@pytest.fixture(autouse=True)
def mock_voyage_client():
def embeder(chunks, *args, **kwargs):
return Mock(embeddings=[[0.1] * 1024] * len(chunks))
with patch.object(voyageai, "Client", autospec=True) as mock_client:
client = mock_client()
client.embed.return_value.embeddings = [[0.1] * 1024]
client.multimodal_embed.return_value.embeddings = [[0.1] * 1024]
client.embed = embeder
client.multimodal_embed = embeder
yield client

View File

@ -16,6 +16,7 @@ from memory.common.db.models import (
EmailAttachment,
BookSection,
BlogPost,
Book,
)
@ -499,9 +500,8 @@ def test_mail_message_attachments_path(sender, folder, expected_path):
sha256=b"test", content="test", sender=sender, folder=folder
)
with patch.object(settings, "FILE_STORAGE_DIR", "/tmp/storage"):
result = mail_message.attachments_path
assert str(result) == f"/tmp/storage/{expected_path}"
assert str(result) == f"{settings.FILE_STORAGE_DIR}/emails/{expected_path}"
@pytest.mark.parametrize(
@ -520,15 +520,8 @@ def test_mail_message_safe_filename(tmp_path, filename, expected):
sha256=b"test", content="test", sender="user@example.com", folder="INBOX"
)
with patch.object(settings, "FILE_STORAGE_DIR", tmp_path):
result = mail_message.safe_filename(filename)
# Check that the path is correct
expected_path = tmp_path / "user_example_com" / "INBOX" / expected
assert result == expected_path
# Check that the directory was created
assert result.parent.exists()
expected = settings.FILE_STORAGE_DIR / f"emails/user_example_com/INBOX/{expected}"
assert mail_message.safe_filename(filename) == expected
@pytest.mark.parametrize(
@ -847,6 +840,7 @@ def test_book_section_data_chunks(pages, expected_chunks):
start_page=10,
end_page=10 + len(pages),
pages=pages,
book=Book(id=1, title="Test Book", author="Test Author"),
)
chunks = book_section.data_chunks()

View File

@ -15,8 +15,8 @@ def mock_embed(mock_voyage_client):
def embed(texts, model, input_type):
return Mock(embeddings=[next(vectors) for _ in texts])
mock_voyage_client.embed.side_effect = embed
mock_voyage_client.multimodal_embed.side_effect = embed
mock_voyage_client.embed = embed
mock_voyage_client.multimodal_embed = embed
return mock_voyage_client

View File

@ -342,7 +342,7 @@ def test_sync_comic_embedding_failure(
assert result == {
"comic_id": 1,
"title": "Test Comic",
"status": "processed",
"status": "failed",
"chunks_count": 0,
"embed_status": "FAILED",
"content_length": 90,

View File

@ -423,7 +423,7 @@ def test_create_task_result_no_title():
[
("success", False, "processed", "STORED"),
("success", True, "failed", "FAILED"),
("empty", False, "processed", "FAILED"),
("empty", False, "failed", "FAILED"),
],
)
def test_process_content_item(

View File

@ -170,7 +170,7 @@ def test_embed_sections(db_session):
@patch("memory.workers.tasks.ebook.parse_ebook")
def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path):
def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path, qdrant):
"""Test successful book synchronization."""
book_file = tmp_path / "test.epub"
book_file.write_text("dummy content")
@ -315,17 +315,18 @@ def test_embed_sections_uses_correct_chunk_size(db_session, mock_voyage_client):
db_session.add_all(sections)
db_session.flush()
return_val = Mock(embeddings=[[0.1] * 1024] * 3)
mock_voyage_client.embed = Mock(return_value=return_val)
ebook.embed_sections(sections)
# Verify that the voyage client was called with the full large content
# Should be called 3 times: once for section content, twice for pages
assert mock_voyage_client.embed.call_count == 3
assert mock_voyage_client.embed.call_count == 1
# Check that the full content was passed to the embedding function
calls = mock_voyage_client.embed.call_args_list
texts = [c[0][0] for c in calls]
texts = mock_voyage_client.embed.call_args[0][0]
assert texts == [
[large_page_1.strip()],
[large_page_2.strip()],
[large_section_content.strip()],
large_page_1.strip(),
large_page_2.strip(),
large_section_content.strip(),
]

View File

@ -283,9 +283,7 @@ def test_reingest_missing_chunks(db_session, qdrant, batch_size):
qd.upsert_vectors(qdrant, chunk.source.modality, [str(chunk.id)], [[1] * 1024])
with patch.object(reingest_chunk, "delay", reingest_chunk):
with patch.object(settings, "CHUNK_REINGEST_SINCE_MINUTES", 60):
with patch.object(embedding, "embed_chunks", return_value=[[1] * 1024]):
result = reingest_missing_chunks(batch_size=batch_size)
result = reingest_missing_chunks(batch_size=batch_size, minutes_ago=60)
assert result == {
"photo": {"correct": 10, "missing": 10, "total": 20},

View File

@ -102,6 +102,7 @@ def test_process_attachment_disk(attachment_size, max_inline_size, message_id):
assert not cast(str, result.content)
assert cast(str, result.filename) == str(
settings.FILE_STORAGE_DIR
/ "emails"
/ "sender_example_com"
/ "INBOX"
/ "test_with_special_chars.txt"
@ -183,7 +184,11 @@ def test_process_attachments_mixed():
# Verify large attachment has a path
assert cast(str, results[1].filename) == str(
settings.FILE_STORAGE_DIR / "sender_example_com" / "INBOX" / "large.txt"
settings.FILE_STORAGE_DIR
/ "emails"
/ "sender_example_com"
/ "INBOX"
/ "large.txt"
)