mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-28 23:24:43 +02:00
better handling of errors
This commit is contained in:
parent
f5c3e458d7
commit
1291ca9d08
@ -212,12 +212,16 @@ def process_content_item(
|
|||||||
- Commits database transaction
|
- Commits database transaction
|
||||||
- Stores vectors in Qdrant
|
- Stores vectors in Qdrant
|
||||||
"""
|
"""
|
||||||
|
status = "failed"
|
||||||
session.add(item)
|
session.add(item)
|
||||||
session.flush()
|
session.flush()
|
||||||
|
|
||||||
chunks_count = embed_source_item(item)
|
chunks_count = embed_source_item(item)
|
||||||
session.flush()
|
session.flush()
|
||||||
|
|
||||||
|
if not chunks_count:
|
||||||
|
return create_task_result(item, status, content_length=getattr(item, "size", 0))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
push_to_qdrant([item], collection_name)
|
push_to_qdrant([item], collection_name)
|
||||||
status = "processed"
|
status = "processed"
|
||||||
@ -228,7 +232,6 @@ def process_content_item(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to push embeddings to Qdrant: {e}")
|
logger.error(f"Failed to push embeddings to Qdrant: {e}")
|
||||||
item.embed_status = "FAILED" # type: ignore
|
item.embed_status = "FAILED" # type: ignore
|
||||||
status = "failed"
|
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
return create_task_result(item, status, content_length=getattr(item, "size", 0))
|
return create_task_result(item, status, content_length=getattr(item, "size", 0))
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
from datetime import datetime
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from unittest.mock import patch
|
|
||||||
import uuid
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import qdrant_client
|
import qdrant_client
|
||||||
@ -207,10 +207,13 @@ def mock_file_storage(tmp_path: Path):
|
|||||||
chunk_storage_dir.mkdir(parents=True, exist_ok=True)
|
chunk_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
image_storage_dir = tmp_path / "images"
|
image_storage_dir = tmp_path / "images"
|
||||||
image_storage_dir.mkdir(parents=True, exist_ok=True)
|
image_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
email_storage_dir = tmp_path / "emails"
|
||||||
|
email_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
with (
|
with (
|
||||||
patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
|
patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
|
||||||
patch.object(settings, "CHUNK_STORAGE_DIR", chunk_storage_dir),
|
patch.object(settings, "CHUNK_STORAGE_DIR", chunk_storage_dir),
|
||||||
patch.object(settings, "WEBPAGE_STORAGE_DIR", image_storage_dir),
|
patch.object(settings, "WEBPAGE_STORAGE_DIR", image_storage_dir),
|
||||||
|
patch.object(settings, "EMAIL_STORAGE_DIR", email_storage_dir),
|
||||||
):
|
):
|
||||||
yield
|
yield
|
||||||
|
|
||||||
@ -226,8 +229,11 @@ def qdrant():
|
|||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def mock_voyage_client():
|
def mock_voyage_client():
|
||||||
|
def embeder(chunks, *args, **kwargs):
|
||||||
|
return Mock(embeddings=[[0.1] * 1024] * len(chunks))
|
||||||
|
|
||||||
with patch.object(voyageai, "Client", autospec=True) as mock_client:
|
with patch.object(voyageai, "Client", autospec=True) as mock_client:
|
||||||
client = mock_client()
|
client = mock_client()
|
||||||
client.embed.return_value.embeddings = [[0.1] * 1024]
|
client.embed = embeder
|
||||||
client.multimodal_embed.return_value.embeddings = [[0.1] * 1024]
|
client.multimodal_embed = embeder
|
||||||
yield client
|
yield client
|
||||||
|
@ -16,6 +16,7 @@ from memory.common.db.models import (
|
|||||||
EmailAttachment,
|
EmailAttachment,
|
||||||
BookSection,
|
BookSection,
|
||||||
BlogPost,
|
BlogPost,
|
||||||
|
Book,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -499,9 +500,8 @@ def test_mail_message_attachments_path(sender, folder, expected_path):
|
|||||||
sha256=b"test", content="test", sender=sender, folder=folder
|
sha256=b"test", content="test", sender=sender, folder=folder
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch.object(settings, "FILE_STORAGE_DIR", "/tmp/storage"):
|
result = mail_message.attachments_path
|
||||||
result = mail_message.attachments_path
|
assert str(result) == f"{settings.FILE_STORAGE_DIR}/emails/{expected_path}"
|
||||||
assert str(result) == f"/tmp/storage/{expected_path}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -520,15 +520,8 @@ def test_mail_message_safe_filename(tmp_path, filename, expected):
|
|||||||
sha256=b"test", content="test", sender="user@example.com", folder="INBOX"
|
sha256=b"test", content="test", sender="user@example.com", folder="INBOX"
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch.object(settings, "FILE_STORAGE_DIR", tmp_path):
|
expected = settings.FILE_STORAGE_DIR / f"emails/user_example_com/INBOX/{expected}"
|
||||||
result = mail_message.safe_filename(filename)
|
assert mail_message.safe_filename(filename) == expected
|
||||||
|
|
||||||
# Check that the path is correct
|
|
||||||
expected_path = tmp_path / "user_example_com" / "INBOX" / expected
|
|
||||||
assert result == expected_path
|
|
||||||
|
|
||||||
# Check that the directory was created
|
|
||||||
assert result.parent.exists()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -847,6 +840,7 @@ def test_book_section_data_chunks(pages, expected_chunks):
|
|||||||
start_page=10,
|
start_page=10,
|
||||||
end_page=10 + len(pages),
|
end_page=10 + len(pages),
|
||||||
pages=pages,
|
pages=pages,
|
||||||
|
book=Book(id=1, title="Test Book", author="Test Author"),
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = book_section.data_chunks()
|
chunks = book_section.data_chunks()
|
||||||
|
@ -15,8 +15,8 @@ def mock_embed(mock_voyage_client):
|
|||||||
def embed(texts, model, input_type):
|
def embed(texts, model, input_type):
|
||||||
return Mock(embeddings=[next(vectors) for _ in texts])
|
return Mock(embeddings=[next(vectors) for _ in texts])
|
||||||
|
|
||||||
mock_voyage_client.embed.side_effect = embed
|
mock_voyage_client.embed = embed
|
||||||
mock_voyage_client.multimodal_embed.side_effect = embed
|
mock_voyage_client.multimodal_embed = embed
|
||||||
|
|
||||||
return mock_voyage_client
|
return mock_voyage_client
|
||||||
|
|
||||||
|
@ -342,7 +342,7 @@ def test_sync_comic_embedding_failure(
|
|||||||
assert result == {
|
assert result == {
|
||||||
"comic_id": 1,
|
"comic_id": 1,
|
||||||
"title": "Test Comic",
|
"title": "Test Comic",
|
||||||
"status": "processed",
|
"status": "failed",
|
||||||
"chunks_count": 0,
|
"chunks_count": 0,
|
||||||
"embed_status": "FAILED",
|
"embed_status": "FAILED",
|
||||||
"content_length": 90,
|
"content_length": 90,
|
||||||
|
@ -423,7 +423,7 @@ def test_create_task_result_no_title():
|
|||||||
[
|
[
|
||||||
("success", False, "processed", "STORED"),
|
("success", False, "processed", "STORED"),
|
||||||
("success", True, "failed", "FAILED"),
|
("success", True, "failed", "FAILED"),
|
||||||
("empty", False, "processed", "FAILED"),
|
("empty", False, "failed", "FAILED"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_process_content_item(
|
def test_process_content_item(
|
||||||
|
@ -170,7 +170,7 @@ def test_embed_sections(db_session):
|
|||||||
|
|
||||||
|
|
||||||
@patch("memory.workers.tasks.ebook.parse_ebook")
|
@patch("memory.workers.tasks.ebook.parse_ebook")
|
||||||
def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path):
|
def test_sync_book_success(mock_parse, mock_ebook, db_session, tmp_path, qdrant):
|
||||||
"""Test successful book synchronization."""
|
"""Test successful book synchronization."""
|
||||||
book_file = tmp_path / "test.epub"
|
book_file = tmp_path / "test.epub"
|
||||||
book_file.write_text("dummy content")
|
book_file.write_text("dummy content")
|
||||||
@ -315,17 +315,18 @@ def test_embed_sections_uses_correct_chunk_size(db_session, mock_voyage_client):
|
|||||||
db_session.add_all(sections)
|
db_session.add_all(sections)
|
||||||
db_session.flush()
|
db_session.flush()
|
||||||
|
|
||||||
|
return_val = Mock(embeddings=[[0.1] * 1024] * 3)
|
||||||
|
mock_voyage_client.embed = Mock(return_value=return_val)
|
||||||
ebook.embed_sections(sections)
|
ebook.embed_sections(sections)
|
||||||
|
|
||||||
# Verify that the voyage client was called with the full large content
|
# Verify that the voyage client was called with the full large content
|
||||||
# Should be called 3 times: once for section content, twice for pages
|
# Should be called 3 times: once for section content, twice for pages
|
||||||
assert mock_voyage_client.embed.call_count == 3
|
assert mock_voyage_client.embed.call_count == 1
|
||||||
|
|
||||||
# Check that the full content was passed to the embedding function
|
# Check that the full content was passed to the embedding function
|
||||||
calls = mock_voyage_client.embed.call_args_list
|
texts = mock_voyage_client.embed.call_args[0][0]
|
||||||
texts = [c[0][0] for c in calls]
|
|
||||||
assert texts == [
|
assert texts == [
|
||||||
[large_page_1.strip()],
|
large_page_1.strip(),
|
||||||
[large_page_2.strip()],
|
large_page_2.strip(),
|
||||||
[large_section_content.strip()],
|
large_section_content.strip(),
|
||||||
]
|
]
|
||||||
|
@ -283,9 +283,7 @@ def test_reingest_missing_chunks(db_session, qdrant, batch_size):
|
|||||||
qd.upsert_vectors(qdrant, chunk.source.modality, [str(chunk.id)], [[1] * 1024])
|
qd.upsert_vectors(qdrant, chunk.source.modality, [str(chunk.id)], [[1] * 1024])
|
||||||
|
|
||||||
with patch.object(reingest_chunk, "delay", reingest_chunk):
|
with patch.object(reingest_chunk, "delay", reingest_chunk):
|
||||||
with patch.object(settings, "CHUNK_REINGEST_SINCE_MINUTES", 60):
|
result = reingest_missing_chunks(batch_size=batch_size, minutes_ago=60)
|
||||||
with patch.object(embedding, "embed_chunks", return_value=[[1] * 1024]):
|
|
||||||
result = reingest_missing_chunks(batch_size=batch_size)
|
|
||||||
|
|
||||||
assert result == {
|
assert result == {
|
||||||
"photo": {"correct": 10, "missing": 10, "total": 20},
|
"photo": {"correct": 10, "missing": 10, "total": 20},
|
||||||
|
@ -102,6 +102,7 @@ def test_process_attachment_disk(attachment_size, max_inline_size, message_id):
|
|||||||
assert not cast(str, result.content)
|
assert not cast(str, result.content)
|
||||||
assert cast(str, result.filename) == str(
|
assert cast(str, result.filename) == str(
|
||||||
settings.FILE_STORAGE_DIR
|
settings.FILE_STORAGE_DIR
|
||||||
|
/ "emails"
|
||||||
/ "sender_example_com"
|
/ "sender_example_com"
|
||||||
/ "INBOX"
|
/ "INBOX"
|
||||||
/ "test_with_special_chars.txt"
|
/ "test_with_special_chars.txt"
|
||||||
@ -183,7 +184,11 @@ def test_process_attachments_mixed():
|
|||||||
|
|
||||||
# Verify large attachment has a path
|
# Verify large attachment has a path
|
||||||
assert cast(str, results[1].filename) == str(
|
assert cast(str, results[1].filename) == str(
|
||||||
settings.FILE_STORAGE_DIR / "sender_example_com" / "INBOX" / "large.txt"
|
settings.FILE_STORAGE_DIR
|
||||||
|
/ "emails"
|
||||||
|
/ "sender_example_com"
|
||||||
|
/ "INBOX"
|
||||||
|
/ "large.txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user