From 1291ca9d08eadc82ab29f4636b4c3329efda7d61 Mon Sep 17 00:00:00 2001 From: Daniel O'Connell Date: Tue, 27 May 2025 22:39:24 +0200 Subject: [PATCH] better handling of errors --- src/memory/workers/tasks/content_processing.py | 5 ++++- tests/conftest.py | 14 ++++++++++---- tests/memory/common/db/test_models.py | 18 ++++++------------ tests/memory/common/test_embedding.py | 4 ++-- tests/memory/workers/tasks/test_comic_tasks.py | 2 +- .../workers/tasks/test_content_processing.py | 2 +- tests/memory/workers/tasks/test_ebook_tasks.py | 15 ++++++++------- tests/memory/workers/tasks/test_maintenance.py | 4 +--- tests/memory/workers/test_email.py | 7 ++++++- 9 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/memory/workers/tasks/content_processing.py b/src/memory/workers/tasks/content_processing.py index 4a00a91..8fd5ea4 100644 --- a/src/memory/workers/tasks/content_processing.py +++ b/src/memory/workers/tasks/content_processing.py @@ -212,12 +212,16 @@ def process_content_item( - Commits database transaction - Stores vectors in Qdrant """ + status = "failed" session.add(item) session.flush() chunks_count = embed_source_item(item) session.flush() + if not chunks_count: + return create_task_result(item, status, content_length=getattr(item, "size", 0)) + try: push_to_qdrant([item], collection_name) status = "processed" @@ -228,7 +232,6 @@ def process_content_item( except Exception as e: logger.error(f"Failed to push embeddings to Qdrant: {e}") item.embed_status = "FAILED" # type: ignore - status = "failed" session.commit() return create_task_result(item, status, content_length=getattr(item, "size", 0)) diff --git a/tests/conftest.py b/tests/conftest.py index 1ac921a..24874c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,9 @@ -from datetime import datetime import os import subprocess -from unittest.mock import patch import uuid +from datetime import datetime from pathlib import Path +from unittest.mock import Mock, patch import pytest import qdrant_client @@ -207,10 +207,13 @@ def mock_file_storage(tmp_path: Path): chunk_storage_dir.mkdir(parents=True, exist_ok=True) image_storage_dir = tmp_path / "images" image_storage_dir.mkdir(parents=True, exist_ok=True) + email_storage_dir = tmp_path / "emails" + email_storage_dir.mkdir(parents=True, exist_ok=True) with ( patch.object(settings, "FILE_STORAGE_DIR", tmp_path), patch.object(settings, "CHUNK_STORAGE_DIR", chunk_storage_dir), patch.object(settings, "WEBPAGE_STORAGE_DIR", image_storage_dir), + patch.object(settings, "EMAIL_STORAGE_DIR", email_storage_dir), ): yield @@ -226,8 +229,11 @@ def qdrant(): @pytest.fixture(autouse=True) def mock_voyage_client(): + def embeder(chunks, *args, **kwargs): + return Mock(embeddings=[[0.1] * 1024] * len(chunks)) + with patch.object(voyageai, "Client", autospec=True) as mock_client: client = mock_client() - client.embed.return_value.embeddings = [[0.1] * 1024] - client.multimodal_embed.return_value.embeddings = [[0.1] * 1024] + client.embed = embeder + client.multimodal_embed = embeder yield client diff --git a/tests/memory/common/db/test_models.py b/tests/memory/common/db/test_models.py index 817efc3..c2ae0e9 100644 --- a/tests/memory/common/db/test_models.py +++ b/tests/memory/common/db/test_models.py @@ -16,6 +16,7 @@ from memory.common.db.models import ( EmailAttachment, BookSection, BlogPost, + Book, ) @@ -499,9 +500,8 @@ def test_mail_message_attachments_path(sender, folder, expected_path): sha256=b"test", content="test", sender=sender, folder=folder ) - with patch.object(settings, "FILE_STORAGE_DIR", "/tmp/storage"): - result = mail_message.attachments_path - assert str(result) == f"/tmp/storage/{expected_path}" + result = mail_message.attachments_path + assert str(result) == f"{settings.FILE_STORAGE_DIR}/emails/{expected_path}" @pytest.mark.parametrize( @@ -520,15 +520,8 @@ def test_mail_message_safe_filename(tmp_path, filename, expected): sha256=b"test", content="test", sender="user@example.com", folder="INBOX" ) - with patch.object(settings, "FILE_STORAGE_DIR", tmp_path): - result = mail_message.safe_filename(filename) - - # Check that the path is correct - expected_path = tmp_path / "user_example_com" / "INBOX" / expected - assert result == expected_path - - # Check that the directory was created - assert result.parent.exists() + expected = settings.FILE_STORAGE_DIR / f"emails/user_example_com/INBOX/{expected}" + assert mail_message.safe_filename(filename) == expected @pytest.mark.parametrize( @@ -847,6 +840,7 @@ def test_book_section_data_chunks(pages, expected_chunks): start_page=10, end_page=10 + len(pages), pages=pages, + book=Book(id=1, title="Test Book", author="Test Author"), ) chunks = book_section.data_chunks() diff --git a/tests/memory/common/test_embedding.py b/tests/memory/common/test_embedding.py index 03b5f67..927a4dd 100644 --- a/tests/memory/common/test_embedding.py +++ b/tests/memory/common/test_embedding.py @@ -15,8 +15,8 @@ def mock_embed(mock_voyage_client): def embed(texts, model, input_type): return Mock(embeddings=[next(vectors) for _ in texts]) - mock_voyage_client.embed.side_effect = embed - mock_voyage_client.multimodal_embed.side_effect = embed + mock_voyage_client.embed = embed + mock_voyage_client.multimodal_embed = embed return mock_voyage_client diff --git a/tests/memory/workers/tasks/test_comic_tasks.py b/tests/memory/workers/tasks/test_comic_tasks.py index 0647284..2cfa098 100644 --- a/tests/memory/workers/tasks/test_comic_tasks.py +++ b/tests/memory/workers/tasks/test_comic_tasks.py @@ -342,7 +342,7 @@ def test_sync_comic_embedding_failure( assert result == { "comic_id": 1, "title": "Test Comic", - "status": "processed", + "status": "failed", "chunks_count": 0, "embed_status": "FAILED", "content_length": 90, diff --git a/tests/memory/workers/tasks/test_content_processing.py b/tests/memory/workers/tasks/test_content_processing.py index 4bd938d..44cb352 100644 --- a/tests/memory/workers/tasks/test_content_processing.py +++ b/tests/memory/workers/tasks/test_content_processing.py @@ -423,7 +423,7 @@ def test_create_task_result_no_title(): [ ("success", False, "processed", "STORED"), ("success", True, "failed", "FAILED"), - ("empty", False, "processed", "FAILED"), + ("empty", False, "failed", "FAILED"), ], ) def test_process_content_item( diff --git a/tests/memory/workers/tasks/test_ebook_tasks.py b/tests/memory/workers/tasks/test_ebook_tasks.py index 1a34a1e..96213f5 100644 --- a/tests/memory/workers/tasks/test_ebook_tasks.py +++ b/tests/memory/workers/tasks/test_ebook_tasks.py @@ -170,7 +170,7 @@ def test_embed_sections(db_session): @patch("memory.workers.tasks.ebook.parse_ebook") -def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path): +def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path, qdrant): """Test successful book synchronization.""" book_file = tmp_path / "test.epub" book_file.write_text("dummy content") @@ -315,17 +315,18 @@ def test_embed_sections_uses_correct_chunk_size(db_session, mock_voyage_client): db_session.add_all(sections) db_session.flush() + return_val = Mock(embeddings=[[0.1] * 1024] * 3) + mock_voyage_client.embed = Mock(return_value=return_val) ebook.embed_sections(sections) # Verify that the voyage client was called with the full large content # Should be called 3 times: once for section content, twice for pages - assert mock_voyage_client.embed.call_count == 3 + assert mock_voyage_client.embed.call_count == 1 # Check that the full content was passed to the embedding function - calls = mock_voyage_client.embed.call_args_list - texts = [c[0][0] for c in calls] + texts = mock_voyage_client.embed.call_args[0][0] assert texts == [ - [large_page_1.strip()], - [large_page_2.strip()], - [large_section_content.strip()], + large_page_1.strip(), + large_page_2.strip(), + large_section_content.strip(), ] diff --git a/tests/memory/workers/tasks/test_maintenance.py b/tests/memory/workers/tasks/test_maintenance.py index e2201eb..62611c0 100644 --- a/tests/memory/workers/tasks/test_maintenance.py +++ b/tests/memory/workers/tasks/test_maintenance.py @@ -283,9 +283,7 @@ def test_reingest_missing_chunks(db_session, qdrant, batch_size): qd.upsert_vectors(qdrant, chunk.source.modality, [str(chunk.id)], [[1] * 1024]) with patch.object(reingest_chunk, "delay", reingest_chunk): - with patch.object(settings, "CHUNK_REINGEST_SINCE_MINUTES", 60): - with patch.object(embedding, "embed_chunks", return_value=[[1] * 1024]): - result = reingest_missing_chunks(batch_size=batch_size) + result = reingest_missing_chunks(batch_size=batch_size, minutes_ago=60) assert result == { "photo": {"correct": 10, "missing": 10, "total": 20}, diff --git a/tests/memory/workers/test_email.py b/tests/memory/workers/test_email.py index 5aa9b43..f97aecf 100644 --- a/tests/memory/workers/test_email.py +++ b/tests/memory/workers/test_email.py @@ -102,6 +102,7 @@ def test_process_attachment_disk(attachment_size, max_inline_size, message_id): assert not cast(str, result.content) assert cast(str, result.filename) == str( settings.FILE_STORAGE_DIR + / "emails" / "sender_example_com" / "INBOX" / "test_with_special_chars.txt" @@ -183,7 +184,11 @@ def test_process_attachments_mixed(): # Verify large attachment has a path assert cast(str, results[1].filename) == str( - settings.FILE_STORAGE_DIR / "sender_example_com" / "INBOX" / "large.txt" + settings.FILE_STORAGE_DIR + / "emails" + / "sender_example_com" + / "INBOX" + / "large.txt" )