better handling of errors

2025-06-28 23:24:43 +02:00 · 2025-05-27 22:39:24 +02:00 · 2025-05-27 22:39:24 +02:00 · 1291ca9d08
commit 1291ca9d08
parent f5c3e458d7
9 changed files with 39 additions and 32 deletions
--- a/src/memory/workers/tasks/content_processing.py
+++ b/src/memory/workers/tasks/content_processing.py
@ -212,12 +212,16 @@ def process_content_item(
        - Commits database transaction
        - Stores vectors in Qdrant
    """
+    status = "failed"
    session.add(item)
    session.flush()

    chunks_count = embed_source_item(item)
    session.flush()

+    if not chunks_count:
+        return create_task_result(item, status, content_length=getattr(item, "size", 0))
+
    try:
        push_to_qdrant([item], collection_name)
        status = "processed"
@ -228,7 +232,6 @@ def process_content_item(
    except Exception as e:
        logger.error(f"Failed to push embeddings to Qdrant: {e}")
        item.embed_status = "FAILED"  # type: ignore
-        status = "failed"
    session.commit()

    return create_task_result(item, status, content_length=getattr(item, "size", 0))
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,9 +1,9 @@
-from datetime import datetime
 import os
 import subprocess
-from unittest.mock import patch
 import uuid
+from datetime import datetime
 from pathlib import Path
+from unittest.mock import Mock, patch

 import pytest
 import qdrant_client
@ -207,10 +207,13 @@ def mock_file_storage(tmp_path: Path):
    chunk_storage_dir.mkdir(parents=True, exist_ok=True)
    image_storage_dir = tmp_path / "images"
    image_storage_dir.mkdir(parents=True, exist_ok=True)
+    email_storage_dir = tmp_path / "emails"
+    email_storage_dir.mkdir(parents=True, exist_ok=True)
    with (
        patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
        patch.object(settings, "CHUNK_STORAGE_DIR", chunk_storage_dir),
        patch.object(settings, "WEBPAGE_STORAGE_DIR", image_storage_dir),
+        patch.object(settings, "EMAIL_STORAGE_DIR", email_storage_dir),
    ):
        yield

@ -226,8 +229,11 @@ def qdrant():

@pytest.fixture(autouse=True)
 def mock_voyage_client():
+    def embeder(chunks, *args, **kwargs):
+        return Mock(embeddings=[[0.1] * 1024] * len(chunks))
+
    with patch.object(voyageai, "Client", autospec=True) as mock_client:
        client = mock_client()
-        client.embed.return_value.embeddings = [[0.1] * 1024]
-        client.multimodal_embed.return_value.embeddings = [[0.1] * 1024]
+        client.embed = embeder
+        client.multimodal_embed = embeder
        yield client
--- a/tests/memory/common/db/test_models.py
+++ b/tests/memory/common/db/test_models.py
@ -16,6 +16,7 @@ from memory.common.db.models import (
    EmailAttachment,
    BookSection,
    BlogPost,
+    Book,
 )


@ -499,9 +500,8 @@ def test_mail_message_attachments_path(sender, folder, expected_path):
        sha256=b"test", content="test", sender=sender, folder=folder
    )

-    with patch.object(settings, "FILE_STORAGE_DIR", "/tmp/storage"):
    result = mail_message.attachments_path
-        assert str(result) == f"/tmp/storage/{expected_path}"
+    assert str(result) == f"{settings.FILE_STORAGE_DIR}/emails/{expected_path}"


@pytest.mark.parametrize(
@ -520,15 +520,8 @@ def test_mail_message_safe_filename(tmp_path, filename, expected):
        sha256=b"test", content="test", sender="user@example.com", folder="INBOX"
    )

-    with patch.object(settings, "FILE_STORAGE_DIR", tmp_path):
-        result = mail_message.safe_filename(filename)
-
-        # Check that the path is correct
-        expected_path = tmp_path / "user_example_com" / "INBOX" / expected
-        assert result == expected_path
-
-        # Check that the directory was created
-        assert result.parent.exists()
+    expected = settings.FILE_STORAGE_DIR / f"emails/user_example_com/INBOX/{expected}"
+    assert mail_message.safe_filename(filename) == expected


@pytest.mark.parametrize(
@ -847,6 +840,7 @@ def test_book_section_data_chunks(pages, expected_chunks):
        start_page=10,
        end_page=10 + len(pages),
        pages=pages,
+        book=Book(id=1, title="Test Book", author="Test Author"),
    )

    chunks = book_section.data_chunks()
--- a/tests/memory/common/test_embedding.py
+++ b/tests/memory/common/test_embedding.py
@ -15,8 +15,8 @@ def mock_embed(mock_voyage_client):
    def embed(texts, model, input_type):
        return Mock(embeddings=[next(vectors) for _ in texts])

-    mock_voyage_client.embed.side_effect = embed
-    mock_voyage_client.multimodal_embed.side_effect = embed
+    mock_voyage_client.embed = embed
+    mock_voyage_client.multimodal_embed = embed

    return mock_voyage_client

--- a/tests/memory/workers/tasks/test_comic_tasks.py
+++ b/tests/memory/workers/tasks/test_comic_tasks.py
@ -342,7 +342,7 @@ def test_sync_comic_embedding_failure(
    assert result == {
        "comic_id": 1,
        "title": "Test Comic",
-        "status": "processed",
+        "status": "failed",
        "chunks_count": 0,
        "embed_status": "FAILED",
        "content_length": 90,
--- a/tests/memory/workers/tasks/test_content_processing.py
+++ b/tests/memory/workers/tasks/test_content_processing.py
@ -423,7 +423,7 @@ def test_create_task_result_no_title():
    [
        ("success", False, "processed", "STORED"),
        ("success", True, "failed", "FAILED"),
-        ("empty", False, "processed", "FAILED"),
+        ("empty", False, "failed", "FAILED"),
    ],
 )
 def test_process_content_item(
--- a/tests/memory/workers/tasks/test_ebook_tasks.py
+++ b/tests/memory/workers/tasks/test_ebook_tasks.py
@ -170,7 +170,7 @@ def test_embed_sections(db_session):


@patch("memory.workers.tasks.ebook.parse_ebook")
-def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path):
+def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path, qdrant):
    """Test successful book synchronization."""
    book_file = tmp_path / "test.epub"
    book_file.write_text("dummy content")
@ -315,17 +315,18 @@ def test_embed_sections_uses_correct_chunk_size(db_session, mock_voyage_client):
    db_session.add_all(sections)
    db_session.flush()

+    return_val = Mock(embeddings=[[0.1] * 1024] * 3)
+    mock_voyage_client.embed = Mock(return_value=return_val)
    ebook.embed_sections(sections)

    # Verify that the voyage client was called with the full large content
    # Should be called 3 times: once for section content, twice for pages
-    assert mock_voyage_client.embed.call_count == 3
+    assert mock_voyage_client.embed.call_count == 1

    # Check that the full content was passed to the embedding function
-    calls = mock_voyage_client.embed.call_args_list
-    texts = [c[0][0] for c in calls]
+    texts = mock_voyage_client.embed.call_args[0][0]
    assert texts == [
-        [large_page_1.strip()],
-        [large_page_2.strip()],
-        [large_section_content.strip()],
+        large_page_1.strip(),
+        large_page_2.strip(),
+        large_section_content.strip(),
    ]
--- a/tests/memory/workers/tasks/test_maintenance.py
+++ b/tests/memory/workers/tasks/test_maintenance.py
@ -283,9 +283,7 @@ def test_reingest_missing_chunks(db_session, qdrant, batch_size):
        qd.upsert_vectors(qdrant, chunk.source.modality, [str(chunk.id)], [[1] * 1024])

    with patch.object(reingest_chunk, "delay", reingest_chunk):
-        with patch.object(settings, "CHUNK_REINGEST_SINCE_MINUTES", 60):
-            with patch.object(embedding, "embed_chunks", return_value=[[1] * 1024]):
-                result = reingest_missing_chunks(batch_size=batch_size)
+        result = reingest_missing_chunks(batch_size=batch_size, minutes_ago=60)

    assert result == {
        "photo": {"correct": 10, "missing": 10, "total": 20},
--- a/tests/memory/workers/test_email.py
+++ b/tests/memory/workers/test_email.py
@ -102,6 +102,7 @@ def test_process_attachment_disk(attachment_size, max_inline_size, message_id):
    assert not cast(str, result.content)
    assert cast(str, result.filename) == str(
        settings.FILE_STORAGE_DIR
+        / "emails"
        / "sender_example_com"
        / "INBOX"
        / "test_with_special_chars.txt"
@ -183,7 +184,11 @@ def test_process_attachments_mixed():

    # Verify large attachment has a path
    assert cast(str, results[1].filename) == str(
-        settings.FILE_STORAGE_DIR / "sender_example_com" / "INBOX" / "large.txt"
+        settings.FILE_STORAGE_DIR
+        / "emails"
+        / "sender_example_com"
+        / "INBOX"
+        / "large.txt"
    )