diff --git a/requirements-api.txt b/requirements-api.txt
index fa6732f..aba6011 100644
--- a/requirements-api.txt
+++ b/requirements-api.txt
@@ -3,4 +3,5 @@ uvicorn==0.29.0
 python-jose==3.3.0
 python-multipart==0.0.9
 sqladmin 
-mcp==1.9.2
\ No newline at end of file
+mcp==1.9.2
+bm25s[full]==0.2.13
\ No newline at end of file
diff --git a/requirements-common.txt b/requirements-common.txt
index dbf0da5..d07028b 100644
--- a/requirements-common.txt
+++ b/requirements-common.txt
@@ -6,5 +6,5 @@ dotenv==0.9.9
 voyageai==0.3.2
 qdrant-client==1.9.0
 anthropic==0.18.1 
-
-bm25s[full]==0.2.13
\ No newline at end of file
+# Pin the httpx version, as newer versions break the anthropic client
+httpx==0.27.0
\ No newline at end of file
diff --git a/run_celery_task.py b/run_celery_task.py
index e9a13f2..146c230 100644
--- a/run_celery_task.py
+++ b/run_celery_task.py
@@ -38,6 +38,7 @@ from memory.workers.tasks.maintenance import (
     CLEAN_COLLECTION,
     REINGEST_CHUNK,
     REINGEST_EMPTY_SOURCE_ITEMS,
+    REINGEST_ALL_EMPTY_SOURCE_ITEMS,
     REINGEST_ITEM,
     REINGEST_MISSING_CHUNKS,
     UPDATE_METADATA_FOR_ITEM,
@@ -67,6 +68,7 @@ TASK_MAPPINGS = {
         "reingest_chunk": REINGEST_CHUNK,
         "reingest_item": REINGEST_ITEM,
         "reingest_empty_source_items": REINGEST_EMPTY_SOURCE_ITEMS,
+        "reingest_all_empty_source_items": REINGEST_ALL_EMPTY_SOURCE_ITEMS,
         "update_metadata_for_item": UPDATE_METADATA_FOR_ITEM,
         "update_metadata_for_source_items": UPDATE_METADATA_FOR_SOURCE_ITEMS,
     },
@@ -316,6 +318,13 @@ def maintenance_reingest_empty_source_items(ctx, item_type):
     execute_task(ctx, "maintenance", "reingest_empty_source_items", item_type=item_type)
 
 
+@maintenance.command("reingest-all-empty-source-items")
+@click.pass_context
+def maintenance_reingest_all_empty_source_items(ctx):
+    """Reingest all empty source items."""
+    execute_task(ctx, "maintenance", "reingest_all_empty_source_items")
+
+
 @maintenance.command("reingest-chunk")
 @click.option("--chunk-id", required=True, help="Chunk ID to reingest")
 @click.pass_context
diff --git a/src/memory/api/MCP/tools.py b/src/memory/api/MCP/tools.py
index 77ba76b..f68533c 100644
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@@ -15,14 +15,60 @@ from memory.common.db.connection import make_session
 
 from memory.common import extract
 from memory.common.db.models import AgentObservation
-from memory.api.search import search, SearchFilters
+from memory.api.search.search import search, SearchFilters
 from memory.common.formatters import observation
 from memory.workers.tasks.content_processing import process_content_item
+from memory.common.collections import ALL_COLLECTIONS, OBSERVATION_COLLECTIONS
 
 logger = logging.getLogger(__name__)
 
 # Create MCP server instance
-mcp = FastMCP("memory", stateless=True)
+mcp = FastMCP("memory", stateless_http=True)
+
+
+def filter_observation_source_ids(
+    tags: list[str] | None = None, observation_types: list[str] | None = None
+):
+    if not tags and not observation_types:
+        return None
+
+    with make_session() as session:
+        items_query = session.query(AgentObservation.id)
+
+        if tags:
+            # Use PostgreSQL array overlap operator with proper array casting
+            items_query = items_query.filter(
+                AgentObservation.tags.op("&&")(sql_cast(tags, ARRAY(Text))),
+            )
+        if observation_types:
+            items_query = items_query.filter(
+                AgentObservation.observation_type.in_(observation_types)
+            )
+        source_ids = [item.id for item in items_query.all()]
+
+    return source_ids
+
+
+def filter_source_ids(
+    modalities: set[str],
+    tags: list[str] | None = None,
+):
+    if not tags:
+        return None
+
+    with make_session() as session:
+        items_query = session.query(SourceItem.id)
+
+        if tags:
+            # Use PostgreSQL array overlap operator with proper array casting
+            items_query = items_query.filter(
+                SourceItem.tags.op("&&")(sql_cast(tags, ARRAY(Text))),
+            )
+        if modalities:
+            items_query = items_query.filter(SourceItem.modality.in_(modalities))
+        source_ids = [item.id for item in items_query.all()]
+
+    return source_ids
 
 
 @mcp.tool()
@@ -48,20 +94,6 @@ async def get_all_tags() -> list[str]:
         - Projects: "project:website-redesign"
         - Contexts: "context:work", "context:late-night"
         - Domains: "domain:finance"
-
-    Example:
-        # Get all tags to ensure consistency
-        tags = await get_all_tags()
-        # Returns: ["ai-safety", "context:work", "functional-programming",
-        #           "machine-learning", "project:thesis", ...]
-
-        # Use to check if a topic has been discussed before
-        if "quantum-computing" in tags:
-            # Search for related observations
-            observations = await search_observations(
-                query="quantum computing",
-                tags=["quantum-computing"]
-            )
     """
     with make_session() as session:
         tags_query = session.query(func.unnest(SourceItem.tags)).distinct()
@@ -93,26 +125,6 @@ async def get_all_subjects() -> list[str]:
         - "ai_beliefs", "ai_safety_beliefs"
         - "learning_preferences"
         - "communication_style"
-
-    Example:
-        # Get all subjects to ensure consistency
-        subjects = await get_all_subjects()
-        # Returns: ["ai_safety_beliefs", "architecture_preferences",
-        #           "programming_philosophy", "work_schedule", ...]
-
-        # Use to check what we know about the user
-        if "programming_style" in subjects:
-            # Get all programming-related observations
-            observations = await search_observations(
-                query="programming",
-                subject="programming_style"
-            )
-
-    Best practices:
-        - Always check existing subjects before creating new ones
-        - Use snake_case for consistency
-        - Be specific but not too granular
-        - Group related observations under same subject
     """
     with make_session() as session:
         return sorted(
@@ -146,20 +158,6 @@ async def get_all_observation_types() -> list[str]:
 
     Returns:
         List of observation types that have actually been used in the system.
-
-    Example:
-        # Check what types of observations exist
-        types = await get_all_observation_types()
-        # Returns: ["behavior", "belief", "contradiction", "preference"]
-
-        # Use to analyze observation distribution
-        for obs_type in types:
-            observations = await search_observations(
-                query="",
-                observation_types=[obs_type],
-                limit=100
-            )
-            print(f"{obs_type}: {len(observations)} observations")
     """
     with make_session() as session:
         return sorted(
@@ -173,7 +171,11 @@ async def get_all_observation_types() -> list[str]:
 
 @mcp.tool()
 async def search_knowledge_base(
-    query: str, previews: bool = False, modalities: list[str] = [], limit: int = 10
+    query: str,
+    previews: bool = False,
+    modalities: set[str] = set(),
+    tags: list[str] = [],
+    limit: int = 10,
 ) -> list[dict]:
     """
     Search through the user's stored knowledge and content.
@@ -283,14 +285,22 @@ async def search_knowledge_base(
     """
     logger.info(f"MCP search for: {query}")
 
+    if not modalities:
+        modalities = set(ALL_COLLECTIONS.keys())
+    modalities = set(modalities) & ALL_COLLECTIONS.keys() - OBSERVATION_COLLECTIONS
+
     upload_data = extract.extract_text(query)
     results = await search(
         upload_data,
         previews=previews,
         modalities=modalities,
         limit=limit,
-        min_text_score=0.3,
-        min_multimodal_score=0.3,
+        min_text_score=0.4,
+        min_multimodal_score=0.25,
+        filters=SearchFilters(
+            tags=tags,
+            source_ids=filter_source_ids(tags=tags, modalities=modalities),
+        ),
     )
 
     # Convert SearchResult objects to dictionaries for MCP
@@ -456,12 +466,13 @@ async def observe(
         mime_type="text/plain",
         sha256=sha256(f"{content}{subject}{observation_type}".encode("utf-8")).digest(),
         inserted_at=datetime.now(timezone.utc),
+        modality="observation",
     )
     try:
         with make_session() as session:
             process_content_item(observation, session)
 
-            if not observation.id:
+            if not cast(int | None, observation.id):
                 raise ValueError("Observation not created")
 
             logger.info(
@@ -600,24 +611,6 @@ async def search_observations(
         - Higher confidence observations are more reliable
         - Recent observations may override older ones on same topic
     """
-    source_ids = None
-    if tags or observation_types:
-        with make_session() as session:
-            items_query = session.query(AgentObservation.id)
-
-            if tags:
-                # Use PostgreSQL array overlap operator with proper array casting
-                items_query = items_query.filter(
-                    AgentObservation.tags.op("&&")(sql_cast(tags, ARRAY(Text))),
-                )
-            if observation_types:
-                items_query = items_query.filter(
-                    AgentObservation.observation_type.in_(observation_types)
-                )
-            source_ids = [item.id for item in items_query.all()]
-            if not source_ids:
-                return []
-
     semantic_text = observation.generate_semantic_text(
         subject=subject or "",
         observation_type="".join(observation_types or []),
@@ -637,18 +630,24 @@ async def search_observations(
             extract.DataChunk(data=[temporal]),
         ],
         previews=True,
-        modalities=["semantic", "temporal"],
+        modalities={"semantic", "temporal"},
         limit=limit,
-        min_text_score=0.8,
         filters=SearchFilters(
             subject=subject,
             confidence=min_confidence,
             tags=tags,
             observation_types=observation_types,
-            source_ids=source_ids,
+            source_ids=filter_observation_source_ids(tags=tags),
         ),
+        timeout=2,
     )
 
     return [
-        cast(dict, cast(dict, result.model_dump()).get("content")) for result in results
+        {
+            "content": r.content,
+            "tags": r.tags,
+            "created_at": r.created_at.isoformat() if r.created_at else None,
+            "metadata": r.metadata,
+        }
+        for r in results
     ]
diff --git a/src/memory/api/app.py b/src/memory/api/app.py
index 4f37c39..51a9d22 100644
--- a/src/memory/api/app.py
+++ b/src/memory/api/app.py
@@ -3,6 +3,7 @@ FastAPI application for the knowledge base.
 """
 
 import contextlib
+import os
 import pathlib
 import logging
 from typing import Annotated, Optional
@@ -105,12 +106,16 @@ def get_file_by_path(path: str):
     return FileResponse(path=file_path, filename=file_path.name)
 
 
-def main():
+def main(reload: bool = False):
     """Run the FastAPI server in debug mode with auto-reloading."""
     import uvicorn
 
     uvicorn.run(
-        "memory.api.app:app", host="0.0.0.0", port=8000, reload=True, log_level="debug"
+        "memory.api.app:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=reload,
+        log_level="debug",
     )
 
 
@@ -118,4 +123,4 @@ if __name__ == "__main__":
     from memory.common.qdrant import setup_qdrant
 
     setup_qdrant()
-    main()
+    main(os.getenv("RELOAD", "false") == "true")
diff --git a/src/memory/api/search.py b/src/memory/api/search.py
deleted file mode 100644
index 6b4b501..0000000
--- a/src/memory/api/search.py
+++ /dev/null
@@ -1,382 +0,0 @@
-"""
-Search endpoints for the knowledge base API.
-"""
-
-import asyncio
-import base64
-from hashlib import sha256
-import io
-import logging
-from collections import defaultdict
-from typing import Any, Callable, Optional, TypedDict, NotRequired
-
-import bm25s
-import Stemmer
-import qdrant_client
-from PIL import Image
-from pydantic import BaseModel
-from qdrant_client.http import models as qdrant_models
-
-from memory.common import embedding, extract, qdrant, settings
-from memory.common.collections import (
-    ALL_COLLECTIONS,
-    MULTIMODAL_COLLECTIONS,
-    TEXT_COLLECTIONS,
-)
-from memory.common.db.connection import make_session
-from memory.common.db.models import Chunk
-
-logger = logging.getLogger(__name__)
-
-
-class AnnotatedChunk(BaseModel):
-    id: str
-    score: float
-    metadata: dict
-    preview: Optional[str | None] = None
-
-
-class SourceData(BaseModel):
-    """Holds source item data to avoid SQLAlchemy session issues"""
-
-    id: int
-    size: int | None
-    mime_type: str | None
-    filename: str | None
-    content: str | dict | None
-    content_length: int
-
-
-class SearchResponse(BaseModel):
-    collection: str
-    results: list[dict]
-
-
-class SearchResult(BaseModel):
-    id: int
-    size: int
-    mime_type: str
-    chunks: list[AnnotatedChunk]
-    content: Optional[str | dict] = None
-    filename: Optional[str] = None
-
-
-class SearchFilters(TypedDict):
-    subject: NotRequired[str | None]
-    confidence: NotRequired[float]
-    tags: NotRequired[list[str] | None]
-    observation_types: NotRequired[list[str] | None]
-    source_ids: NotRequired[list[int] | None]
-
-
-async def with_timeout(
-    call, timeout: int = 2
-) -> list[tuple[SourceData, AnnotatedChunk]]:
-    """
-    Run a function with a timeout.
-
-    Args:
-        call: The function to run
-        timeout: The timeout in seconds
-    """
-    try:
-        return await asyncio.wait_for(call, timeout=timeout)
-    except TimeoutError:
-        logger.warning(f"Search timed out after {timeout}s")
-        return []
-    except Exception as e:
-        logger.error(f"Search failed: {e}")
-        return []
-
-
-def annotated_chunk(
-    chunk: Chunk, search_result: qdrant_models.ScoredPoint, previews: bool
-) -> tuple[SourceData, AnnotatedChunk]:
-    def serialize_item(item: bytes | str | Image.Image) -> str | None:
-        if not previews and not isinstance(item, str):
-            return None
-        if not previews and isinstance(item, str):
-            return item[:100]
-
-        if isinstance(item, Image.Image):
-            buffer = io.BytesIO()
-            format = item.format or "PNG"
-            item.save(buffer, format=format)
-            mime_type = f"image/{format.lower()}"
-            return f"data:{mime_type};base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
-        elif isinstance(item, bytes):
-            return base64.b64encode(item).decode("utf-8")
-        elif isinstance(item, str):
-            return item
-        else:
-            raise ValueError(f"Unsupported item type: {type(item)}")
-
-    metadata = search_result.payload or {}
-    metadata = {
-        k: v
-        for k, v in metadata.items()
-        if k not in ["content", "filename", "size", "content_type", "tags"]
-    }
-
-    # Prefetch all needed source data while in session
-    source = chunk.source
-    source_data = SourceData(
-        id=source.id,
-        size=source.size,
-        mime_type=source.mime_type,
-        filename=source.filename,
-        content=source.display_contents,
-        content_length=len(source.content) if source.content else 0,
-    )
-
-    return source_data, AnnotatedChunk(
-        id=str(chunk.id),
-        score=search_result.score,
-        metadata=metadata,
-        preview=serialize_item(chunk.data[0]) if chunk.data else None,
-    )
-
-
-def group_chunks(chunks: list[tuple[SourceData, AnnotatedChunk]]) -> list[SearchResult]:
-    items = defaultdict(list)
-    source_lookup = {}
-
-    for source, chunk in chunks:
-        items[source.id].append(chunk)
-        source_lookup[source.id] = source
-
-    return [
-        SearchResult(
-            id=source.id,
-            size=source.size or source.content_length,
-            mime_type=source.mime_type or "text/plain",
-            filename=source.filename
-            and source.filename.replace(
-                str(settings.FILE_STORAGE_DIR).lstrip("/"), "/files"
-            ),
-            content=source.content,
-            chunks=sorted(chunks, key=lambda x: x.score, reverse=True),
-        )
-        for source_id, chunks in items.items()
-        for source in [source_lookup[source_id]]
-    ]
-
-
-def query_chunks(
-    client: qdrant_client.QdrantClient,
-    upload_data: list[extract.DataChunk],
-    allowed_modalities: set[str],
-    embedder: Callable,
-    min_score: float = 0.0,
-    limit: int = 10,
-    filters: dict[str, Any] | None = None,
-) -> dict[str, list[qdrant_models.ScoredPoint]]:
-    if not upload_data or not allowed_modalities:
-        return {}
-
-    chunks = [chunk for chunk in upload_data if chunk.data]
-    if not chunks:
-        logger.error(f"No chunks to embed for {allowed_modalities}")
-        return {}
-
-    logger.error(f"Embedding {len(chunks)} chunks for {allowed_modalities}")
-    for c in chunks:
-        logger.error(f"Chunk: {c.data}")
-    vectors = embedder([c.data for c in chunks], input_type="query")
-
-    return {
-        collection: [
-            r
-            for vector in vectors
-            for r in qdrant.search_vectors(
-                client=client,
-                collection_name=collection,
-                query_vector=vector,
-                limit=limit,
-                filter_params=filters,
-            )
-            if r.score >= min_score
-        ]
-        for collection in allowed_modalities
-    }
-
-
-async def search_bm25(
-    query: str,
-    modalities: list[str],
-    limit: int = 10,
-    filters: SearchFilters = SearchFilters(),
-) -> list[tuple[SourceData, AnnotatedChunk]]:
-    with make_session() as db:
-        items_query = db.query(Chunk.id, Chunk.content).filter(
-            Chunk.collection_name.in_(modalities)
-        )
-        if source_ids := filters.get("source_ids"):
-            items_query = items_query.filter(Chunk.source_id.in_(source_ids))
-        items = items_query.all()
-        item_ids = {
-            sha256(item.content.lower().strip().encode("utf-8")).hexdigest(): item.id
-            for item in items
-        }
-        corpus = [item.content.lower().strip() for item in items]
-
-    stemmer = Stemmer.Stemmer("english")
-    corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
-    retriever = bm25s.BM25()
-    retriever.index(corpus_tokens)
-
-    query_tokens = bm25s.tokenize(query, stemmer=stemmer)
-    results, scores = retriever.retrieve(
-        query_tokens, k=min(limit, len(corpus)), corpus=corpus
-    )
-
-    item_scores = {
-        item_ids[sha256(doc.encode("utf-8")).hexdigest()]: score
-        for doc, score in zip(results[0], scores[0])
-    }
-
-    with make_session() as db:
-        chunks = db.query(Chunk).filter(Chunk.id.in_(item_scores.keys())).all()
-        results = []
-        for chunk in chunks:
-            # Prefetch all needed source data while in session
-            source = chunk.source
-            source_data = SourceData(
-                id=source.id,
-                size=source.size,
-                mime_type=source.mime_type,
-                filename=source.filename,
-                content=source.display_contents,
-                content_length=len(source.content) if source.content else 0,
-            )
-
-            annotated = AnnotatedChunk(
-                id=str(chunk.id),
-                score=item_scores[chunk.id],
-                metadata=source.as_payload(),
-                preview=None,
-            )
-            results.append((source_data, annotated))
-
-        return results
-
-
-async def search_embeddings(
-    data: list[extract.DataChunk],
-    previews: Optional[bool] = False,
-    modalities: set[str] = set(),
-    limit: int = 10,
-    min_score: float = 0.3,
-    filters: SearchFilters = SearchFilters(),
-    multimodal: bool = False,
-) -> list[tuple[SourceData, AnnotatedChunk]]:
-    """
-    Search across knowledge base using text query and optional files.
-
-    Parameters:
-    - data: List of data to search in (e.g., text, images, files)
-    - previews: Whether to include previews in the search results
-    - modalities: List of modalities to search in (e.g., "text", "photo", "doc")
-    - limit: Maximum number of results
-    - min_score: Minimum score to include in the search results
-    - filters: Filters to apply to the search results
-    - multimodal: Whether to search in multimodal collections
-    """
-    query_filters = {
-        "must": [
-            {"key": "confidence", "range": {"gte": filters.get("confidence", 0.5)}},
-        ],
-    }
-    if tags := filters.get("tags"):
-        query_filters["must"] += [{"key": "tags", "match": {"any": tags}}]
-    if observation_types := filters.get("observation_types"):
-        query_filters["must"] += [
-            {"key": "observation_type", "match": {"any": observation_types}}
-        ]
-
-    client = qdrant.get_qdrant_client()
-    results = query_chunks(
-        client,
-        data,
-        modalities,
-        embedding.embed_text if not multimodal else embedding.embed_mixed,
-        min_score=min_score,
-        limit=limit,
-        filters=query_filters,
-    )
-    search_results = {k: results.get(k, []) for k in modalities}
-
-    found_chunks = {
-        str(r.id): r for results in search_results.values() for r in results
-    }
-    with make_session() as db:
-        chunks = db.query(Chunk).filter(Chunk.id.in_(found_chunks.keys())).all()
-        return [
-            annotated_chunk(chunk, found_chunks[str(chunk.id)], previews or False)
-            for chunk in chunks
-        ]
-
-
-async def search(
-    data: list[extract.DataChunk],
-    previews: Optional[bool] = False,
-    modalities: list[str] = [],
-    limit: int = 10,
-    min_text_score: float = 0.3,
-    min_multimodal_score: float = 0.3,
-    filters: SearchFilters = {},
-) -> list[SearchResult]:
-    """
-    Search across knowledge base using text query and optional files.
-
-    Parameters:
-    - query: Optional text search query
-    - modalities: List of modalities to search in (e.g., "text", "photo", "doc")
-    - files: Optional files to include in the search context
-    - limit: Maximum number of results per modality
-
-    Returns:
-    - List of search results sorted by score
-    """
-    allowed_modalities = set(modalities or ALL_COLLECTIONS.keys())
-
-    text_embeddings_results = with_timeout(
-        search_embeddings(
-            data,
-            previews,
-            allowed_modalities & TEXT_COLLECTIONS,
-            limit,
-            min_text_score,
-            filters,
-            multimodal=False,
-        )
-    )
-    multimodal_embeddings_results = with_timeout(
-        search_embeddings(
-            data,
-            previews,
-            allowed_modalities & MULTIMODAL_COLLECTIONS,
-            limit,
-            min_multimodal_score,
-            filters,
-            multimodal=True,
-        )
-    )
-    bm25_results = with_timeout(
-        search_bm25(
-            " ".join([c for chunk in data for c in chunk.data if isinstance(c, str)]),
-            modalities,
-            limit=limit,
-            filters=filters,
-        )
-    )
-
-    results = await asyncio.gather(
-        text_embeddings_results,
-        multimodal_embeddings_results,
-        bm25_results,
-        return_exceptions=False,
-    )
-
-    results = group_chunks([c for r in results for c in r])
-    return sorted(results, key=lambda x: max(c.score for c in x.chunks), reverse=True)
diff --git a/src/memory/api/search/__init__.py b/src/memory/api/search/__init__.py
new file mode 100644
index 0000000..f3b1cf1
--- /dev/null
+++ b/src/memory/api/search/__init__.py
@@ -0,0 +1,4 @@
+from .search import search
+from .utils import SearchResult, SearchFilters
+
+__all__ = ["search", "SearchResult", "SearchFilters"]
diff --git a/src/memory/api/search/bm25.py b/src/memory/api/search/bm25.py
new file mode 100644
index 0000000..9acb098
--- /dev/null
+++ b/src/memory/api/search/bm25.py
@@ -0,0 +1,68 @@
+"""
+Search endpoints for the knowledge base API.
+"""
+
+from hashlib import sha256
+import logging
+
+import bm25s
+import Stemmer
+from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters
+
+from memory.common.db.connection import make_session
+from memory.common.db.models import Chunk
+
+logger = logging.getLogger(__name__)
+
+
+async def search_bm25(
+    query: str,
+    modalities: set[str],
+    limit: int = 10,
+    filters: SearchFilters = SearchFilters(),
+) -> list[tuple[SourceData, AnnotatedChunk]]:
+    with make_session() as db:
+        items_query = db.query(Chunk.id, Chunk.content).filter(
+            Chunk.collection_name.in_(modalities)
+        )
+        if source_ids := filters.get("source_ids"):
+            items_query = items_query.filter(Chunk.source_id.in_(source_ids))
+        items = items_query.all()
+        item_ids = {
+            sha256(item.content.lower().strip().encode("utf-8")).hexdigest(): item.id
+            for item in items
+        }
+        corpus = [item.content.lower().strip() for item in items]
+
+    stemmer = Stemmer.Stemmer("english")
+    corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
+    retriever = bm25s.BM25()
+    retriever.index(corpus_tokens)
+
+    query_tokens = bm25s.tokenize(query, stemmer=stemmer)
+    results, scores = retriever.retrieve(
+        query_tokens, k=min(limit, len(corpus)), corpus=corpus
+    )
+
+    item_scores = {
+        item_ids[sha256(doc.encode("utf-8")).hexdigest()]: score
+        for doc, score in zip(results[0], scores[0])
+    }
+
+    with make_session() as db:
+        chunks = db.query(Chunk).filter(Chunk.id.in_(item_scores.keys())).all()
+        results = []
+        for chunk in chunks:
+            # Prefetch all needed source data while in session
+            source_data = SourceData.from_chunk(chunk)
+
+            annotated = AnnotatedChunk(
+                id=str(chunk.id),
+                score=item_scores[chunk.id],
+                metadata=chunk.source.as_payload(),
+                preview=None,
+                search_method="bm25",
+            )
+            results.append((source_data, annotated))
+
+        return results
diff --git a/src/memory/api/search/embeddings.py b/src/memory/api/search/embeddings.py
new file mode 100644
index 0000000..132b6bb
--- /dev/null
+++ b/src/memory/api/search/embeddings.py
@@ -0,0 +1,144 @@
+import base64
+import io
+import logging
+from typing import Any, Callable, Optional
+
+import qdrant_client
+from PIL import Image
+from qdrant_client.http import models as qdrant_models
+
+from memory.common import embedding, extract, qdrant
+from memory.common.db.connection import make_session
+from memory.common.db.models import Chunk
+from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters
+
+logger = logging.getLogger(__name__)
+
+
+def annotated_chunk(
+    chunk: Chunk, search_result: qdrant_models.ScoredPoint, previews: bool
+) -> tuple[SourceData, AnnotatedChunk]:
+    def serialize_item(item: bytes | str | Image.Image) -> str | None:
+        if not previews and not isinstance(item, str):
+            return None
+        if not previews and isinstance(item, str):
+            return item[:100]
+
+        if isinstance(item, Image.Image):
+            buffer = io.BytesIO()
+            format = item.format or "PNG"
+            item.save(buffer, format=format)
+            mime_type = f"image/{format.lower()}"
+            return f"data:{mime_type};base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
+        elif isinstance(item, bytes):
+            return base64.b64encode(item).decode("utf-8")
+        elif isinstance(item, str):
+            return item
+        else:
+            raise ValueError(f"Unsupported item type: {type(item)}")
+
+    metadata = search_result.payload or {}
+    metadata = {
+        k: v
+        for k, v in metadata.items()
+        if k not in ["content", "filename", "size", "content_type", "tags"]
+    }
+
+    # Prefetch all needed source data while in session
+    return SourceData.from_chunk(chunk), AnnotatedChunk(
+        id=str(chunk.id),
+        score=search_result.score,
+        metadata=metadata,
+        preview=serialize_item(chunk.data[0]) if chunk.data else None,
+        search_method="embeddings",
+    )
+
+
+def query_chunks(
+    client: qdrant_client.QdrantClient,
+    upload_data: list[extract.DataChunk],
+    allowed_modalities: set[str],
+    embedder: Callable,
+    min_score: float = 0.3,
+    limit: int = 10,
+    filters: dict[str, Any] | None = None,
+) -> dict[str, list[qdrant_models.ScoredPoint]]:
+    if not upload_data or not allowed_modalities:
+        return {}
+
+    chunks = [chunk for chunk in upload_data if chunk.data]
+    if not chunks:
+        logger.error(f"No chunks to embed for {allowed_modalities}")
+        return {}
+
+    vectors = embedder(chunks, input_type="query")
+
+    return {
+        collection: [
+            r
+            for vector in vectors
+            for r in qdrant.search_vectors(
+                client=client,
+                collection_name=collection,
+                query_vector=vector,
+                limit=limit,
+                filter_params=filters,
+            )
+            if r.score >= min_score
+        ]
+        for collection in allowed_modalities
+    }
+
+
+async def search_embeddings(
+    data: list[extract.DataChunk],
+    previews: Optional[bool] = False,
+    modalities: set[str] = set(),
+    limit: int = 10,
+    min_score: float = 0.3,
+    filters: SearchFilters = SearchFilters(),
+    multimodal: bool = False,
+) -> list[tuple[SourceData, AnnotatedChunk]]:
+    """
+    Search across knowledge base using text query and optional files.
+
+    Parameters:
+    - data: List of data to search in (e.g., text, images, files)
+    - previews: Whether to include previews in the search results
+    - modalities: List of modalities to search in (e.g., "text", "photo", "doc")
+    - limit: Maximum number of results
+    - min_score: Minimum score to include in the search results
+    - filters: Filters to apply to the search results
+    - multimodal: Whether to search in multimodal collections
+    """
+    query_filters = {}
+    if confidence := filters.get("confidence"):
+        query_filters["must"] += [{"key": "confidence", "range": {"gte": confidence}}]
+    if tags := filters.get("tags"):
+        query_filters["must"] += [{"key": "tags", "match": {"any": tags}}]
+    if observation_types := filters.get("observation_types"):
+        query_filters["must"] += [
+            {"key": "observation_type", "match": {"any": observation_types}}
+        ]
+
+    client = qdrant.get_qdrant_client()
+    results = query_chunks(
+        client,
+        data,
+        modalities,
+        embedding.embed_text if not multimodal else embedding.embed_mixed,
+        min_score=min_score,
+        limit=limit,
+        filters=query_filters,
+    )
+    search_results = {k: results.get(k, []) for k in modalities}
+
+    found_chunks = {
+        str(r.id): r for results in search_results.values() for r in results
+    }
+    with make_session() as db:
+        chunks = db.query(Chunk).filter(Chunk.id.in_(found_chunks.keys())).all()
+        return [
+            annotated_chunk(chunk, found_chunks[str(chunk.id)], previews or False)
+            for chunk in chunks
+        ]
diff --git a/src/memory/api/search/search.py b/src/memory/api/search/search.py
new file mode 100644
index 0000000..2e07d5d
--- /dev/null
+++ b/src/memory/api/search/search.py
@@ -0,0 +1,94 @@
+"""
+Search endpoints for the knowledge base API.
+"""
+
+import asyncio
+import logging
+from typing import Optional
+
+from memory.api.search.embeddings import search_embeddings
+from memory.api.search.bm25 import search_bm25
+from memory.api.search.utils import SearchFilters, SearchResult
+
+from memory.api.search.utils import group_chunks, with_timeout
+from memory.common import extract
+from memory.common.collections import (
+    ALL_COLLECTIONS,
+    MULTIMODAL_COLLECTIONS,
+    TEXT_COLLECTIONS,
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def search(
+    data: list[extract.DataChunk],
+    previews: Optional[bool] = False,
+    modalities: set[str] = set(),
+    limit: int = 10,
+    min_text_score: float = 0.4,
+    min_multimodal_score: float = 0.25,
+    filters: SearchFilters = {},
+    timeout: int = 2,
+) -> list[SearchResult]:
+    """
+    Search across knowledge base using text query and optional files.
+
+    Parameters:
+    - query: Optional text search query
+    - modalities: List of modalities to search in (e.g., "text", "photo", "doc")
+    - files: Optional files to include in the search context
+    - limit: Maximum number of results per modality
+
+    Returns:
+    - List of search results sorted by score
+    """
+    allowed_modalities = modalities & ALL_COLLECTIONS.keys()
+
+    text_embeddings_results = with_timeout(
+        search_embeddings(
+            data,
+            previews,
+            allowed_modalities & TEXT_COLLECTIONS,
+            limit,
+            min_text_score,
+            filters,
+            multimodal=False,
+        ),
+        timeout,
+    )
+    multimodal_embeddings_results = with_timeout(
+        search_embeddings(
+            data,
+            previews,
+            allowed_modalities & MULTIMODAL_COLLECTIONS,
+            limit,
+            min_multimodal_score,
+            filters,
+            multimodal=True,
+        ),
+        timeout,
+    )
+    bm25_results = with_timeout(
+        search_bm25(
+            " ".join([c for chunk in data for c in chunk.data if isinstance(c, str)]),
+            modalities,
+            limit=limit,
+            filters=filters,
+        ),
+        timeout,
+    )
+
+    results = await asyncio.gather(
+        text_embeddings_results,
+        multimodal_embeddings_results,
+        bm25_results,
+        return_exceptions=False,
+    )
+    text_results, multi_results, bm25_results = results
+    all_results = text_results + multi_results
+    if len(all_results) < limit:
+        all_results += bm25_results
+
+    results = group_chunks(all_results, previews or False)
+    return sorted(results, key=lambda x: max(c.score for c in x.chunks), reverse=True)
diff --git a/src/memory/api/search/utils.py b/src/memory/api/search/utils.py
new file mode 100644
index 0000000..cf8fe20
--- /dev/null
+++ b/src/memory/api/search/utils.py
@@ -0,0 +1,134 @@
+import asyncio
+from datetime import datetime
+import logging
+from collections import defaultdict
+from typing import Optional, TypedDict, NotRequired
+
+from pydantic import BaseModel
+
+from memory.common import settings
+from memory.common.db.models import Chunk
+
+logger = logging.getLogger(__name__)
+
+
+class AnnotatedChunk(BaseModel):
+    id: str
+    score: float
+    metadata: dict
+    preview: Optional[str | None] = None
+    search_method: str | None = None
+
+
+class SourceData(BaseModel):
+    """Holds source item data to avoid SQLAlchemy session issues"""
+
+    id: int
+    size: int | None
+    mime_type: str | None
+    filename: str | None
+    content_length: int
+    contents: dict | None
+    created_at: datetime | None
+
+    @staticmethod
+    def from_chunk(chunk: Chunk) -> "SourceData":
+        source = chunk.source
+        display_contents = source.display_contents or {}
+        return SourceData(
+            id=source.id,
+            size=source.size,
+            mime_type=source.mime_type,
+            filename=source.filename,
+            content_length=len(source.content) if source.content else 0,
+            contents=display_contents,
+            created_at=source.inserted_at,
+        )
+
+
+class SearchResponse(BaseModel):
+    collection: str
+    results: list[dict]
+
+
+class SearchResult(BaseModel):
+    id: int
+    size: int
+    mime_type: str
+    chunks: list[AnnotatedChunk]
+    content: Optional[str | dict] = None
+    filename: Optional[str] = None
+    tags: list[str] | None = None
+    metadata: dict | None = None
+    created_at: datetime | None = None
+
+
+class SearchFilters(TypedDict):
+    subject: NotRequired[str | None]
+    confidence: NotRequired[float]
+    tags: NotRequired[list[str] | None]
+    observation_types: NotRequired[list[str] | None]
+    source_ids: NotRequired[list[int] | None]
+
+
+async def with_timeout(
+    call, timeout: int = 2
+) -> list[tuple[SourceData, AnnotatedChunk]]:
+    """
+    Run a function with a timeout.
+
+    Args:
+        call: The function to run
+        timeout: The timeout in seconds
+    """
+    try:
+        return await asyncio.wait_for(call, timeout=timeout)
+    except TimeoutError:
+        logger.warning(f"Search timed out after {timeout}s")
+        return []
+    except Exception as e:
+        logger.error(f"Search failed: {e}")
+        return []
+
+
+def group_chunks(
+    chunks: list[tuple[SourceData, AnnotatedChunk]], preview: bool = False
+) -> list[SearchResult]:
+    items = defaultdict(list)
+    source_lookup = {}
+
+    for source, chunk in chunks:
+        items[source.id].append(chunk)
+        source_lookup[source.id] = source
+
+    def get_content(text: str | dict | None) -> str | dict | None:
+        if preview or not text or not isinstance(text, str) or len(text) < 250:
+            return text
+
+        return text[:250] + "..."
+
+    def make_result(source: SourceData, chunks: list[AnnotatedChunk]) -> SearchResult:
+        contents = source.contents or {}
+        tags = contents.pop("tags", [])
+        content = contents.pop("content", None)
+
+        return SearchResult(
+            id=source.id,
+            size=source.size or source.content_length,
+            mime_type=source.mime_type or "text/plain",
+            filename=source.filename
+            and source.filename.replace(
+                str(settings.FILE_STORAGE_DIR).lstrip("/"), "/files"
+            ),
+            content=get_content(content),
+            tags=tags,
+            metadata=contents,
+            chunks=sorted(chunks, key=lambda x: x.score, reverse=True),
+            created_at=source.created_at,
+        )
+
+    return [
+        make_result(source, chunks)
+        for source_id, chunks in items.items()
+        for source in [source_lookup[source_id]]
+    ]
diff --git a/src/memory/common/collections.py b/src/memory/common/collections.py
index f2aee53..98fc2cc 100644
--- a/src/memory/common/collections.py
+++ b/src/memory/common/collections.py
@@ -102,6 +102,7 @@ TEXT_COLLECTIONS = {
 MULTIMODAL_COLLECTIONS = {
     coll for coll, params in ALL_COLLECTIONS.items() if params.get("multimodal")
 }
+OBSERVATION_COLLECTIONS = {"semantic", "temporal"}
 
 TYPES = {
     "doc": ["application/pdf", "application/docx", "application/msword"],
diff --git a/src/memory/common/db/models/source_item.py b/src/memory/common/db/models/source_item.py
index 56e9a44..798954e 100644
--- a/src/memory/common/db/models/source_item.py
+++ b/src/memory/common/db/models/source_item.py
@@ -84,7 +84,7 @@ def clean_filename(filename: str) -> str:
 
 def image_filenames(chunk_id: str, images: list[Image.Image]) -> list[str]:
     for i, image in enumerate(images):
-        if not image.filename:  # type: ignore
+        if not getattr(image, "filename", None):  # type: ignore
             filename = settings.CHUNK_STORAGE_DIR / f"{chunk_id}_{i}.{image.format}"  # type: ignore
             image.save(filename)
             image.filename = str(filename)  # type: ignore
@@ -100,16 +100,6 @@ def add_pics(chunk: str, images: list[Image.Image]) -> list[extract.MulitmodalCh
     ]
 
 
-def merge_metadata(*metadata: dict[str, Any]) -> dict[str, Any]:
-    final = {}
-    for m in metadata:
-        data = m.copy()
-        if tags := set(data.pop("tags", [])):
-            final["tags"] = tags | final.get("tags", set())
-        final |= data
-    return final
-
-
 def chunk_mixed(content: str, image_paths: Sequence[str]) -> list[extract.DataChunk]:
     if not content.strip():
         return []
@@ -241,14 +231,11 @@ class SourceItem(Base):
         return [chunk.id for chunk in self.chunks]
 
     def _chunk_contents(self) -> Sequence[extract.DataChunk]:
-        chunks: list[extract.DataChunk] = []
         content = cast(str | None, self.content)
         if content:
-            chunks = [extract.DataChunk(data=[c]) for c in chunker.chunk_text(content)]
-
-        if content and len(content) > chunker.DEFAULT_CHUNK_TOKENS * 2:
-            summary, tags = summarizer.summarize(content)
-            chunks.append(extract.DataChunk(data=[summary], metadata={"tags": tags}))
+            chunks = extract.extract_text(content)
+        else:
+            chunks = []
 
         mime_type = cast(str | None, self.mime_type)
         if mime_type and mime_type.startswith("image/"):
@@ -272,12 +259,14 @@ class SourceItem(Base):
             file_paths=image_names,
             collection_name=modality,
             embedding_model=collections.collection_model(modality, text, images),
-            item_metadata=merge_metadata(self.as_payload(), data.metadata, metadata),
+            item_metadata=extract.merge_metadata(
+                self.as_payload(), data.metadata, metadata
+            ),
         )
         return chunk
 
     def data_chunks(self, metadata: dict[str, Any] = {}) -> Sequence[Chunk]:
-        return [self._make_chunk(data) for data in self._chunk_contents()]
+        return [self._make_chunk(data, metadata) for data in self._chunk_contents()]
 
     def as_payload(self) -> dict:
         return {
@@ -291,4 +280,7 @@ class SourceItem(Base):
         return {
             "tags": self.tags,
             "size": self.size,
+            "content": self.content,
+            "filename": self.filename,
+            "mime_type": self.mime_type,
         }
diff --git a/src/memory/common/db/models/source_items.py b/src/memory/common/db/models/source_items.py
index 985a2a6..1b1dac5 100644
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@@ -33,7 +33,6 @@ from memory.common.db.models.source_item import (
     SourceItem,
     Chunk,
     clean_filename,
-    merge_metadata,
     chunk_mixed,
 )
 
@@ -326,27 +325,24 @@ class BookSection(SourceItem):
         }
         return {k: v for k, v in vals.items() if v}
 
-    def data_chunks(self, metadata: dict[str, Any] = {}) -> Sequence[Chunk]:
+    def _chunk_contents(self) -> Sequence[extract.DataChunk]:
         content = cast(str, self.content.strip())
         if not content:
             return []
 
         if len([p for p in self.pages if p.strip()]) == 1:
-            return [
-                self._make_chunk(
-                    extract.DataChunk(data=[content]), metadata | {"type": "page"}
-                )
-            ]
+            chunks = extract.extract_text(content, metadata={"type": "page"})
+            if len(chunks) > 1:
+                chunks[-1].metadata["type"] = "summary"
+            return chunks
 
         summary, tags = summarizer.summarize(content)
         return [
-            self._make_chunk(
-                extract.DataChunk(data=[content]),
-                merge_metadata(metadata, {"type": "section", "tags": tags}),
+            extract.DataChunk(
+                data=[content], metadata={"type": "section", "tags": tags}
             ),
-            self._make_chunk(
-                extract.DataChunk(data=[summary]),
-                merge_metadata(metadata, {"type": "summary", "tags": tags}),
+            extract.DataChunk(
+                data=[summary], metadata={"type": "summary", "tags": tags}
             ),
         ]
 
@@ -596,7 +592,7 @@ class AgentObservation(SourceItem):
         )
         semantic_chunk = extract.DataChunk(
             data=[semantic_text],
-            metadata=merge_metadata(metadata, {"embedding_type": "semantic"}),
+            metadata=extract.merge_metadata(metadata, {"embedding_type": "semantic"}),
             modality="semantic",
         )
 
@@ -609,7 +605,7 @@ class AgentObservation(SourceItem):
         )
         temporal_chunk = extract.DataChunk(
             data=[temporal_text],
-            metadata=merge_metadata(metadata, {"embedding_type": "temporal"}),
+            metadata=extract.merge_metadata(metadata, {"embedding_type": "temporal"}),
             modality="temporal",
         )
 
@@ -617,14 +613,14 @@ class AgentObservation(SourceItem):
             self._make_chunk(
                 extract.DataChunk(
                     data=[i],
-                    metadata=merge_metadata(metadata, {"embedding_type": "semantic"}),
+                    metadata=extract.merge_metadata(
+                        metadata, {"embedding_type": "semantic"}
+                    ),
                     modality="semantic",
                 )
             )
             for i in [
                 self.content,
-                self.subject,
-                self.observation_type,
                 self.evidence.get("quote", ""),
             ]
             if i
diff --git a/src/memory/common/embedding.py b/src/memory/common/embedding.py
index 44ae4ea..694d04d 100644
--- a/src/memory/common/embedding.py
+++ b/src/memory/common/embedding.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Iterable, Literal, cast
+from typing import Literal, cast
 
 import voyageai
 
@@ -15,12 +15,22 @@ from memory.common.db.models import Chunk, SourceItem
 logger = logging.getLogger(__name__)
 
 
+def as_string(
+    chunk: extract.MulitmodalChunk | list[extract.MulitmodalChunk],
+) -> str:
+    if isinstance(chunk, str):
+        return chunk.strip()
+    if isinstance(chunk, list):
+        return "\n".join(as_string(i) for i in chunk).strip()
+    return ""
+
+
 def embed_chunks(
     chunks: list[list[extract.MulitmodalChunk]],
     model: str = settings.TEXT_EMBEDDING_MODEL,
     input_type: Literal["document", "query"] = "document",
 ) -> list[Vector]:
-    logger.debug(f"Embedding chunks: {model} - {str(chunks)[:100]} {len(chunks)}")
+    logger.debug(f"Embedding chunks: {model} - {str(chunks)} {len(chunks)}")
     vo = voyageai.Client()  # type: ignore
     if model == settings.MIXED_EMBEDDING_MODEL:
         return vo.multimodal_embed(
@@ -29,17 +39,18 @@ def embed_chunks(
             input_type=input_type,
         ).embeddings
 
-    texts = ["\n".join(i for i in c if isinstance(i, str)) for c in chunks]
+    texts = [as_string(c) for c in chunks]
+    logger.debug(f"Embedding texts: {texts}")
     return cast(
         list[Vector], vo.embed(texts, model=model, input_type=input_type).embeddings
     )
 
 
 def break_chunk(
-    chunk: list[extract.MulitmodalChunk], chunk_size: int = DEFAULT_CHUNK_TOKENS
+    chunk: extract.DataChunk, chunk_size: int = DEFAULT_CHUNK_TOKENS
 ) -> list[extract.MulitmodalChunk]:
     result = []
-    for c in chunk:
+    for c in chunk.data:
         if isinstance(c, str):
             result += chunk_text(c, chunk_size, OVERLAP_TOKENS)
         else:
@@ -48,12 +59,12 @@ def break_chunk(
 
 
 def embed_text(
-    chunks: list[list[extract.MulitmodalChunk]],
+    chunks: list[extract.DataChunk],
     model: str = settings.TEXT_EMBEDDING_MODEL,
     input_type: Literal["document", "query"] = "document",
     chunk_size: int = DEFAULT_CHUNK_TOKENS,
 ) -> list[Vector]:
-    chunked_chunks = [break_chunk(chunk, chunk_size) for chunk in chunks]
+    chunked_chunks = [break_chunk(chunk, chunk_size) for chunk in chunks if chunk.data]
     if not any(chunked_chunks):
         return []
 
@@ -61,12 +72,12 @@ def embed_text(
 
 
 def embed_mixed(
-    items: list[list[extract.MulitmodalChunk]],
+    items: list[extract.DataChunk],
     model: str = settings.MIXED_EMBEDDING_MODEL,
     input_type: Literal["document", "query"] = "document",
     chunk_size: int = DEFAULT_CHUNK_TOKENS,
 ) -> list[Vector]:
-    chunked_chunks = [break_chunk(item, chunk_size) for item in items]
+    chunked_chunks = [break_chunk(item, chunk_size) for item in items if item.data]
     return embed_chunks(chunked_chunks, model, input_type)
 
 
diff --git a/src/memory/common/extract.py b/src/memory/common/extract.py
index 18880eb..3df07df 100644
--- a/src/memory/common/extract.py
+++ b/src/memory/common/extract.py
@@ -6,7 +6,7 @@ import tempfile
 from contextlib import contextmanager
 from typing import Any, Generator, Sequence, cast
 
-from memory.common import chunker
+from memory.common import chunker, summarizer
 import pymupdf  # PyMuPDF
 import pypandoc
 from PIL import Image
@@ -16,6 +16,16 @@ logger = logging.getLogger(__name__)
 MulitmodalChunk = Image.Image | str
 
 
+def merge_metadata(*metadata: dict[str, Any]) -> dict[str, Any]:
+    final = {}
+    for m in metadata:
+        data = m.copy()
+        if tags := set(data.pop("tags", []) or []):
+            final["tags"] = tags | final.get("tags", set())
+        final |= data
+    return final
+
+
 @dataclass
 class DataChunk:
     data: Sequence[MulitmodalChunk]
@@ -109,7 +119,9 @@ def extract_image(content: bytes | str | pathlib.Path) -> list[DataChunk]:
 
 
 def extract_text(
-    content: bytes | str | pathlib.Path, chunk_size: int | None = None
+    content: bytes | str | pathlib.Path,
+    chunk_size: int | None = None,
+    metadata: dict[str, Any] = {},
 ) -> list[DataChunk]:
     if isinstance(content, pathlib.Path):
         content = content.read_text()
@@ -117,8 +129,20 @@ def extract_text(
         content = content.decode("utf-8")
 
     content = cast(str, content)
-    chunks = chunker.chunk_text(content, chunk_size or chunker.DEFAULT_CHUNK_TOKENS)
-    return [DataChunk(data=[c], mime_type="text/plain") for c in chunks if c.strip()]
+    chunks = [
+        DataChunk(data=[c], modality="text", metadata=metadata)
+        for c in chunker.chunk_text(content, chunk_size or chunker.DEFAULT_CHUNK_TOKENS)
+    ]
+    if content and len(content) > chunker.DEFAULT_CHUNK_TOKENS * 2:
+        summary, tags = summarizer.summarize(content)
+        chunks.append(
+            DataChunk(
+                data=[summary],
+                metadata=merge_metadata(metadata, {"tags": tags}),
+                modality="text",
+            )
+        )
+    return chunks
 
 
 def extract_data_chunks(
diff --git a/src/memory/common/qdrant.py b/src/memory/common/qdrant.py
index 106ee22..5e0121e 100644
--- a/src/memory/common/qdrant.py
+++ b/src/memory/common/qdrant.py
@@ -3,7 +3,7 @@ from typing import Any, cast, Generator, Sequence
 
 import qdrant_client
 from qdrant_client.http import models as qdrant_models
-from qdrant_client.http.exceptions import UnexpectedResponse
+from qdrant_client.http.exceptions import UnexpectedResponse, ApiException
 from memory.common import settings
 from memory.common.collections import ALL_COLLECTIONS, Collection, DistanceType, Vector
 
@@ -193,14 +193,18 @@ def delete_points(
         collection_name: Name of the collection
         ids: List of vector IDs to delete
     """
-    client.delete(
-        collection_name=collection_name,
-        points_selector=qdrant_models.PointIdsList(
-            points=ids,  # type: ignore
-        ),
-    )
+    try:
+        client.delete(
+            collection_name=collection_name,
+            points_selector=qdrant_models.PointIdsList(
+                points=ids,  # type: ignore
+            ),
+        )
 
-    logger.debug(f"Deleted {len(ids)} vectors from {collection_name}")
+        logger.debug(f"Deleted {len(ids)} vectors from {collection_name}")
+    except (ApiException, UnexpectedResponse) as e:
+        logger.error(f"Error deleting points from {collection_name}: {e}")
+        raise IOError(f"Error deleting points from {collection_name}: {e}")
 
 
 def get_collection_info(
diff --git a/src/memory/common/summarizer.py b/src/memory/common/summarizer.py
index 8d6fff6..981ced6 100644
--- a/src/memory/common/summarizer.py
+++ b/src/memory/common/summarizer.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import traceback
 from typing import Any
 
 from memory.common import settings, chunker
@@ -131,6 +132,7 @@ def summarize(content: str, target_tokens: int | None = None) -> tuple[str, list
         summary = result.get("summary", "")
         tags = result.get("tags", [])
     except Exception as e:
+        traceback.print_exc()
         logger.error(f"Summarization failed: {e}")
 
     tokens = chunker.approx_token_count(summary)
diff --git a/src/memory/workers/tasks/ebook.py b/src/memory/workers/tasks/ebook.py
index 189f243..16cafdd 100644
--- a/src/memory/workers/tasks/ebook.py
+++ b/src/memory/workers/tasks/ebook.py
@@ -60,6 +60,7 @@ def section_processor(
                 end_page=section.end_page,
                 parent_section_id=None,  # Will be set after flush
                 content=content,
+                filename=book.file_path,
                 size=len(content),
                 mime_type="text/plain",
                 sha256=create_content_hash(
diff --git a/src/memory/workers/tasks/maintenance.py b/src/memory/workers/tasks/maintenance.py
index 998de9b..c831b11 100644
--- a/src/memory/workers/tasks/maintenance.py
+++ b/src/memory/workers/tasks/maintenance.py
@@ -21,6 +21,7 @@ REINGEST_MISSING_CHUNKS = f"{MAINTENANCE_ROOT}.reingest_missing_chunks"
 REINGEST_CHUNK = f"{MAINTENANCE_ROOT}.reingest_chunk"
 REINGEST_ITEM = f"{MAINTENANCE_ROOT}.reingest_item"
 REINGEST_EMPTY_SOURCE_ITEMS = f"{MAINTENANCE_ROOT}.reingest_empty_source_items"
+REINGEST_ALL_EMPTY_SOURCE_ITEMS = f"{MAINTENANCE_ROOT}.reingest_all_empty_source_items"
 UPDATE_METADATA_FOR_SOURCE_ITEMS = (
     f"{MAINTENANCE_ROOT}.update_metadata_for_source_items"
 )
@@ -76,9 +77,9 @@ def reingest_chunk(chunk_id: str, collection: str):
 
         data = chunk.data
         if collection in collections.MULTIMODAL_COLLECTIONS:
-            vector = embedding.embed_mixed(data)[0]
-        elif len(data) == 1 and isinstance(data[0], str):
-            vector = embedding.embed_text([data[0]])[0]
+            vector = embedding.embed_mixed([data])[0]
+        elif collection in collections.TEXT_COLLECTIONS:
+            vector = embedding.embed_text([data])[0]
         else:
             raise ValueError(f"Unsupported data type for collection {collection}")
 
@@ -123,7 +124,10 @@ def reingest_item(item_id: str, item_type: str):
         chunk_ids = [str(c.id) for c in item.chunks if c.id]
         if chunk_ids:
             client = qdrant.get_qdrant_client()
-            qdrant.delete_points(client, item.modality, chunk_ids)
+            try:
+                qdrant.delete_points(client, item.modality, chunk_ids)
+            except IOError as e:
+                logger.error(f"Error deleting chunks for {item_id}: {e}")
 
         for chunk in item.chunks:
             session.delete(chunk)
@@ -151,6 +155,13 @@ def reingest_empty_source_items(item_type: str):
         return {"status": "success", "items": len(item_ids)}
 
 
+@app.task(name=REINGEST_ALL_EMPTY_SOURCE_ITEMS)
+def reingest_all_empty_source_items():
+    logger.info("Reingesting all empty source items")
+    for item_type in SourceItem.registry._class_registry.keys():
+        reingest_empty_source_items.delay(item_type)  # type: ignore
+
+
 def check_batch(batch: Sequence[Chunk]) -> dict:
     client = qdrant.get_qdrant_client()
     by_collection = defaultdict(list)
diff --git a/tests/conftest.py b/tests/conftest.py
index aae9b51..e90eec2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -234,8 +234,10 @@ def mock_voyage_client():
     def embeder(chunks, *args, **kwargs):
         return Mock(embeddings=[[0.1] * 1024] * len(chunks))
 
+    real_client = voyageai.Client
     with patch.object(voyageai, "Client", autospec=True) as mock_client:
         client = mock_client()
+        client.real_client = real_client
         client.embed = embeder
         client.multimodal_embed = embeder
         yield client
@@ -251,7 +253,7 @@ def mock_openai_client():
                 choices=[
                     Mock(
                         message=Mock(
-                            content='{"summary": "test", "tags": ["tag1", "tag2"]}'
+                            content='{"summary": "test summary", "tags": ["tag1", "tag2"]}'
                         )
                     )
                 ]
@@ -267,7 +269,9 @@ def mock_anthropic_client():
         client.messages = Mock()
         client.messages.create = Mock(
             return_value=Mock(
-                content=[Mock(text='{"summary": "test", "tags": ["tag1", "tag2"]}')]
+                content=[
+                    Mock(text='{"summary": "test summary", "tags": ["tag1", "tag2"]}')
+                ]
             )
         )
         yield client
diff --git a/tests/data/code_complexity.jpg b/tests/data/code_complexity.jpg
new file mode 100644
index 0000000..0d0cb0e
Binary files /dev/null and b/tests/data/code_complexity.jpg differ
diff --git a/tests/data/contents.py b/tests/data/contents.py
new file mode 100644
index 0000000..44ce152
--- /dev/null
+++ b/tests/data/contents.py
@@ -0,0 +1,249 @@
+import hashlib
+import pathlib
+from bs4 import BeautifulSoup
+from markdownify import markdownify
+from PIL import Image
+
+DATA_DIR = pathlib.Path(__file__).parent
+
+SAMPLE_HTML = f"""
+<html>
+<body>
+    <h1>The Evolution of Programming Languages</h1>
+    
+    <p>Programming languages have undergone tremendous evolution since the early days of computing. 
+    From the machine code and assembly languages of the 1940s to the high-level, expressive languages 
+    we use today, each generation has built upon the lessons learned from its predecessors. Languages 
+    like FORTRAN and COBOL pioneered the concept of human-readable code, while later innovations like 
+    object-oriented programming in languages such as Smalltalk and C++ revolutionized how we structure 
+    and organize our programs.</p>
+    
+    <img src="{DATA_DIR / "lang_timeline.png"}" alt="Timeline of programming language evolution" width="600" height="400">
+    
+    <p>The rise of functional programming paradigms has brought mathematical rigor and immutability 
+    to the forefront of software development. Languages like Haskell, Lisp, and more recently Rust 
+    and Elm have demonstrated the power of pure functions and type systems in creating more reliable 
+    and maintainable code. These paradigms emphasize the elimination of side effects and the treatment 
+    of computation as the evaluation of mathematical functions.</p>
+ patch
+    <p>Modern development has also seen the emergence of domain-specific languages and the resurgence 
+    of interest in memory safety. The advent of languages like Python and JavaScript has democratized 
+    programming by lowering the barrier to entry, while systems languages like Rust have proven that 
+    performance and safety need not be mutually exclusive. The ongoing development of WebAssembly 
+    promises to bring high-performance computing to web browsers in ways previously unimaginable.</p>
+    
+    <img src="{DATA_DIR / "code_complexity.jpg"}" alt="Visual representation of code complexity over time" width="500" height="300">
+    
+    <p>Looking toward the future, we see emerging trends in quantum programming languages, AI-assisted 
+    code generation, and the continued evolution toward more expressive type systems. The challenge 
+    for tomorrow's language designers will be balancing expressiveness with simplicity, performance 
+    with safety, and innovation with backward compatibility. As computing continues to permeate every 
+    aspect of human life, the languages we use to command these machines will undoubtedly continue 
+    to evolve and shape the digital landscape.</p>
+    
+    <p>The emergence of cloud computing and distributed systems has also driven new paradigms in 
+    language design. Languages like Go and Elixir have been specifically crafted to excel in 
+    concurrent and distributed environments, while the rise of microservices has renewed interest 
+    in polyglot programming approaches. These developments reflect a broader shift toward languages 
+    that are not just powerful tools for individual developers, but robust foundations for building 
+    scalable, resilient systems that can handle the demands of modern internet-scale applications.</p>
+    
+    <p>Perhaps most intriguingly, the intersection of programming languages with artificial intelligence 
+    is opening entirely new frontiers. Differentiable programming languages are enabling new forms of 
+    machine learning research, while large language models are beginning to reshape how we think about 
+    code generation and developer tooling. As we stand on the brink of an era where AI systems may 
+    become active participants in the programming process itself, the very nature of what constitutes 
+    a programming language—and who or what programs in it—may be fundamentally transformed.</p>
+</body>
+</html>
+"""
+SECOND_PAGE = """
+<div>
+    <h2>The Impact of Open Source on Language Development</h2>
+    
+    <p>The open source movement has fundamentally transformed how programming languages are developed, 
+    distributed, and evolved. Unlike the proprietary languages of earlier decades, modern language 
+    development often occurs in public repositories where thousands of contributors can participate 
+    in the design process. Languages like Python, JavaScript, and Rust have benefited enormously 
+    from this collaborative approach, with their ecosystems growing rapidly through community-driven 
+    package managers and extensive third-party libraries.</p>
+    
+    <p>This democratization of language development has led to faster innovation cycles and more 
+    responsive adaptation to developer needs. When a language feature proves problematic or a new 
+    paradigm emerges, open source languages can quickly incorporate changes through their community 
+    governance processes. The result has been an unprecedented period of language experimentation 
+    and refinement, where ideas can be tested, refined, and adopted across multiple language 
+    communities simultaneously.</p>
+    
+    <p>Furthermore, the open source model has enabled the rise of domain-specific languages that 
+    might never have been commercially viable under traditional development models. From specialized 
+    query languages for databases to configuration management tools, the low barrier to entry for 
+    language creation has fostered an explosion of linguistic diversity in computing, each tool 
+    optimized for specific problem domains and user communities.</p>
+    
+    <p>The collaborative nature of open source development has also revolutionized language tooling 
+    and developer experience. Modern languages benefit from rich ecosystems of editors, debuggers, 
+    profilers, and static analysis tools, all developed by passionate communities who understand 
+    the daily challenges faced by practitioners. This has created a virtuous cycle where better 
+    tooling attracts more developers, who in turn contribute improvements that make the language 
+    even more accessible and powerful.</p>
+    
+    <p>Version control systems like Git have enabled unprecedented transparency in language evolution, 
+    allowing developers to trace the reasoning behind every design decision through detailed commit 
+    histories and issue discussions. This historical record serves not only as documentation but as 
+    a learning resource for future language designers, helping them understand the trade-offs and 
+    considerations that shaped successful language features.</p>
+    
+    <p>The economic implications of open source language development cannot be overstated. By removing 
+    licensing barriers and vendor lock-in, open source languages have democratized access to powerful 
+    programming tools across the globe. This has enabled innovation in regions and sectors that might 
+    otherwise have been excluded from the software revolution, fostering a truly global community of 
+    software creators and problem solvers.</p>
+</div>
+"""
+
+CHUNKS: list[str] = [
+    """The Evolution of Programming Languages
+====================================== 
+Programming languages have undergone tremendous evolution since the early days of computing. 
+ From the machine code and assembly languages of the 1940s to the high\\-level, expressive languages 
+ we use today, each generation has built upon the lessons learned from its predecessors. Languages 
+ like FORTRAN and COBOL pioneered the concept of human\\-readable code, while later innovations like 
+ object\\-oriented programming in languages such as Smalltalk and C\\+\\+ revolutionized how we structure 
+ and organize our programs. 
+![Timeline of programming language evolution](/Users/dan/code/memory/tests/data/lang_timeline.png)
+The rise of functional programming paradigms has brought mathematical rigor and immutability 
+ to the forefront of software development. Languages like Haskell, Lisp, and more recently Rust 
+ and Elm have demonstrated the power of pure functions and type systems in creating more reliable 
+ and maintainable code. These paradigms emphasize the elimination of side effects and the treatment 
+ of computation as the evaluation of mathematical functions. 
+Modern development has also seen the emergence of domain\\-specific languages and the resurgence 
+ of interest in memory safety. The advent of languages like Python and JavaScript has democratized 
+ programming by lowering the barrier to entry, while systems languages like Rust have proven that 
+ performance and safety need not be mutually exclusive. The ongoing development of WebAssembly 
+ promises to bring high\\-performance computing to web browsers in ways previously unimaginable. 
+![Visual representation of code complexity over time](/Users/dan/code/memory/tests/data/code_complexity.jpg)
+Looking toward the future, we see emerging trends in quantum programming languages, AI\\-assisted 
+ code generation, and the continued evolution toward more expressive type systems. The challenge 
+ for tomorrow's language designers will be balancing expressiveness with simplicity, performance 
+ with safety, and innovation with backward compatibility. As computing continues to permeate every 
+ aspect of human life, the languages we use to command these machines will undoubtedly continue 
+ to evolve and shape the digital landscape.""",
+    """
+As computing continues to permeate every 
+ aspect of human life, the languages we use to command these machines will undoubtedly continue 
+ to evolve and shape the digital landscape. 
+The emergence of cloud computing and distributed systems has also driven new paradigms in 
+ language design. Languages like Go and Elixir have been specifically crafted to excel in 
+ concurrent and distributed environments, while the rise of microservices has renewed interest 
+ in polyglot programming approaches. These developments reflect a broader shift toward languages 
+ that are not just powerful tools for individual developers, but robust foundations for building 
+ scalable, resilient systems that can handle the demands of modern internet\\-scale applications. 
+Perhaps most intriguingly, the intersection of programming languages with artificial intelligence 
+ is opening entirely new frontiers. Differentiable programming languages are enabling new forms of 
+ machine learning research, while large language models are beginning to reshape how we think about 
+ code generation and developer tooling. As we stand on the brink of an era where AI systems may 
+ become active participants in the programming process itself, the very nature of what constitutes 
+ a programming language—and who or what programs in it—may be fundamentally transformed.""",
+]
+TWO_PAGE_CHUNKS: list[str] = [
+    """
+The Evolution of Programming Languages
+====================================== 
+Programming languages have undergone tremendous evolution since the early days of computing. 
+ From the machine code and assembly languages of the 1940s to the high\-level, expressive languages 
+ we use today, each generation has built upon the lessons learned from its predecessors. Languages 
+ like FORTRAN and COBOL pioneered the concept of human\-readable code, while later innovations like 
+ object\-oriented programming in languages such as Smalltalk and C\+\+ revolutionized how we structure 
+ and organize our programs. 
+![Timeline of programming language evolution](/Users/dan/code/memory/tests/data/lang_timeline.png)
+The rise of functional programming paradigms has brought mathematical rigor and immutability 
+ to the forefront of software development. Languages like Haskell, Lisp, and more recently Rust 
+ and Elm have demonstrated the power of pure functions and type systems in creating more reliable 
+ and maintainable code. These paradigms emphasize the elimination of side effects and the treatment 
+ of computation as the evaluation of mathematical functions. 
+Modern development has also seen the emergence of domain\-specific languages and the resurgence 
+ of interest in memory safety. The advent of languages like Python and JavaScript has democratized 
+ programming by lowering the barrier to entry, while systems languages like Rust have proven that 
+ performance and safety need not be mutually exclusive. The ongoing development of WebAssembly 
+ promises to bring high\-performance computing to web browsers in ways previously unimaginable. 
+![Visual representation of code complexity over time](/Users/dan/code/memory/tests/data/code_complexity.jpg)
+Looking toward the future, we see emerging trends in quantum programming languages, AI\-assisted 
+ code generation, and the continued evolution toward more expressive type systems. The challenge 
+ for tomorrow's language designers will be balancing expressiveness with simplicity, performance 
+ with safety, and innovation with backward compatibility. As computing continues to permeate every 
+ aspect of human life, the languages we use to command these machines will undoubtedly continue 
+ to evolve and shape the digital landscape.
+""",
+    """
+As computing continues to permeate every 
+ aspect of human life, the languages we use to command these machines will undoubtedly continue 
+ to evolve and shape the digital landscape. 
+The emergence of cloud computing and distributed systems has also driven new paradigms in 
+ language design. Languages like Go and Elixir have been specifically crafted to excel in 
+ concurrent and distributed environments, while the rise of microservices has renewed interest 
+ in polyglot programming approaches. These developments reflect a broader shift toward languages 
+ that are not just powerful tools for individual developers, but robust foundations for building 
+ scalable, resilient systems that can handle the demands of modern internet\-scale applications. 
+Perhaps most intriguingly, the intersection of programming languages with artificial intelligence 
+ is opening entirely new frontiers. Differentiable programming languages are enabling new forms of 
+ machine learning research, while large language models are beginning to reshape how we think about 
+ code generation and developer tooling. As we stand on the brink of an era where AI systems may 
+ become active participants in the programming process itself, the very nature of what constitutes 
+ a programming language—and who or what programs in it—may be fundamentally transformed. 
+The Impact of Open Source on Language Development
+------------------------------------------------- 
+The open source movement has fundamentally transformed how programming languages are developed, 
+ distributed, and evolved. Unlike the proprietary languages of earlier decades, modern language 
+ development often occurs in public repositories where thousands of contributors can participate 
+ in the design process. Languages like Python, JavaScript, and Rust have benefited enormously 
+ from this collaborative approach, with their ecosystems growing rapidly through community\-driven 
+ package managers and extensive third\-party libraries. 
+This democratization of language development has led to faster innovation cycles and more 
+ responsive adaptation to developer needs. When a language feature proves problematic or a new 
+ paradigm emerges, open source languages can quickly incorporate changes through their community 
+ governance processes. The result has been an unprecedented period of language experimentation 
+ and refinement, where ideas can be tested, refined, and adopted across multiple language 
+ communities simultaneously.""",
+    """
+The result has been an unprecedented period of language experimentation 
+ and refinement, where ideas can be tested, refined, and adopted across multiple language 
+ communities simultaneously. 
+Furthermore, the open source model has enabled the rise of domain\-specific languages that 
+ might never have been commercially viable under traditional development models. From specialized 
+ query languages for databases to configuration management tools, the low barrier to entry for 
+ language creation has fostered an explosion of linguistic diversity in computing, each tool 
+ optimized for specific problem domains and user communities. 
+The collaborative nature of open source development has also revolutionized language tooling 
+ and developer experience. Modern languages benefit from rich ecosystems of editors, debuggers, 
+ profilers, and static analysis tools, all developed by passionate communities who understand 
+ the daily challenges faced by practitioners. This has created a virtuous cycle where better 
+ tooling attracts more developers, who in turn contribute improvements that make the language 
+ even more accessible and powerful. 
+Version control systems like Git have enabled unprecedented transparency in language evolution, 
+ allowing developers to trace the reasoning behind every design decision through detailed commit 
+ histories and issue discussions. This historical record serves not only as documentation but as 
+ a learning resource for future language designers, helping them understand the trade\-offs and 
+ considerations that shaped successful language features. 
+The economic implications of open source language development cannot be overstated. By removing 
+ licensing barriers and vendor lock\-in, open source languages have democratized access to powerful 
+ programming tools across the globe. This has enabled innovation in regions and sectors that might 
+ otherwise have been excluded from the software revolution, fostering a truly global community of 
+ software creators and problem solvers.
+""",
+]
+
+SAMPLE_MARKDOWN = markdownify(SAMPLE_HTML)
+SAMPLE_TEXT = BeautifulSoup(SAMPLE_HTML, "html.parser").get_text()
+SECOND_PAGE_MARKDOWN = markdownify(SECOND_PAGE)
+SECOND_PAGE_TEXT = BeautifulSoup(SECOND_PAGE, "html.parser").get_text()
+
+
+def image_hash(image: Image.Image) -> str:
+    return hashlib.sha256(image.tobytes()).hexdigest()
+
+
+LANG_TIMELINE = Image.open(DATA_DIR / "lang_timeline.png")
+CODE_COMPLEXITY = Image.open(DATA_DIR / "code_complexity.jpg")
+LANG_TIMELINE_HASH = image_hash(LANG_TIMELINE)
+CODE_COMPLEXITY_HASH = image_hash(CODE_COMPLEXITY)
diff --git a/tests/data/lang_timeline.png b/tests/data/lang_timeline.png
new file mode 100644
index 0000000..5201eb9
Binary files /dev/null and b/tests/data/lang_timeline.png differ
diff --git a/tests/integration/test_real_queries.py b/tests/integration/test_real_queries.py
new file mode 100644
index 0000000..36f0415
--- /dev/null
+++ b/tests/integration/test_real_queries.py
@@ -0,0 +1,1118 @@
+import hashlib
+import itertools
+from datetime import datetime
+from unittest.mock import patch
+
+import pytest
+import voyageai
+
+import memory.common.qdrant as qdrant_tools
+from memory.common import extract
+from memory.common.db.models.source_item import SourceItem
+from memory.common.db.models.source_items import (
+    AgentObservation,
+)
+from memory.common.embedding import embed_source_item, embed_text
+from memory.workers.tasks.content_processing import push_to_qdrant
+from tests.data.contents import SAMPLE_MARKDOWN
+
+
+@pytest.fixture
+def real_voyage_client(mock_voyage_client):
+    real_client = mock_voyage_client.real_client
+    with patch.object(voyageai, "Client", real_client):
+        yield real_client
+
+
+def test_real_source_item_embeddings(real_voyage_client, qdrant):
+    item = SourceItem(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        embed_status="QUEUED",
+    )
+
+    part1, part2, summary = embed_source_item(item)
+    part1.id = "00000000-0000-0000-0000-000000000000"  # type: ignore
+    part2.id = "00000000-0000-0000-0000-000000000001"  # type: ignore
+    summary.id = "00000000-0000-0000-0000-000000000002"  # type: ignore
+    push_to_qdrant([item])
+
+    queries = {
+        "how have programming languages changed?": [0.6756747, 0.6319432, 0.26348075],
+        "evolution of programming languages since 1940s": [0.690, 0.594, 0.330],
+        "functional programming paradigms and immutability": [0.507, 0.412, 0.276],
+        "memory safety in systems programming languages": [0.487, 0.458, 0.348],
+        "FORTRAN and COBOL pioneering human-readable code": [0.535, 0.458, 0.296],
+        "Rust and type systems for reliable code": [0.585, 0.506, 0.456],
+        "WebAssembly high-performance web computing": [0.469, 0.426, 0.296],
+        "object-oriented programming innovations": [0.510, 0.492, 0.333],
+        "cloud computing and distributed systems": [0.40005407, 0.56048, 0.37348732],
+        "AI-assisted code generation trends": [0.51078045, 0.5828345, 0.31309962],
+        "microservices and polyglot programming": [0.5072756, 0.63991153, 0.38507754],
+        "Python JavaScript democratizing programming": [0.524, 0.517, 0.320],
+        "software development methodologies": [0.454, 0.440, 0.356],
+        "computer science history": [0.517, 0.454, 0.299],
+        "programming paradigms comparison": [0.589, 0.525, 0.352],
+        "developer tools and ecosystems": [0.42297083, 0.52246743, 0.39521465],
+        "modern computing trends": [0.5172996, 0.5883902, 0.30886292],
+        "database query languages": [0.47420773, 0.48987937, 0.41980737],
+        "network programming protocols": [0.3547029, 0.42228842, 0.39325726],
+        "machine learning algorithms": [0.39660394, 0.47512275, 0.45423454],
+        "web browser technologies": [0.467, 0.449, 0.439],
+        "software architecture patterns": [0.4430701, 0.4969077, 0.3775082],
+        "mobile app user interface design": [0.2754, 0.332, 0.3863],
+        "cybersecurity threat detection": [0.3436677, 0.38349956, 0.36111486],
+        "project management methodologies": [0.3377, 0.34, 0.3573],
+        "cooking Italian pasta recipes": [0.2627, 0.2388, 0.3065],
+        "professional basketball statistics": [0.2811, 0.2454, 0.3411],
+        "gardening tips for beginners": [0.2953, 0.2848, 0.3309],
+        "travel destinations in Europe": [0.2595, 0.2514, 0.3039],
+        "classical music composers": [0.3066, 0.2838, 0.3173],
+    }
+    for query, (p1, p2, s) in queries.items():
+        search_vector = embed_text(
+            [extract.DataChunk(data=[query])], input_type="query"
+        )[0]
+        results = qdrant_tools.search_vectors(qdrant, "text", search_vector)
+        expected = sorted(
+            [
+                (part1.id, p1),
+                (part2.id, p2),
+                (summary.id, s),
+            ],
+            key=lambda x: x[1],
+            reverse=True,
+        )
+        assert [(r.id, pytest.approx(r.score, abs=0.1)) for r in results] == expected
+
+
+EXPECTED_OBSERVATION_RESULTS = {
+    "What does the user think about functional programming?": {
+        "semantic": [
+            (
+                0.7104,
+                "The user believes functional programming leads to better code quality",
+            ),
+            (0.6792, "I prefer functional programming over OOP"),
+            (
+                0.6772,
+                "Subject: programming_philosophy | Type: belief | Observation: The user believes functional programming leads to better code quality | Quote: Functional programming produces more maintainable code",
+            ),
+            (
+                0.6677,
+                "Subject: programming_paradigms | Type: preference | Observation: The user prefers functional programming over OOP | Quote: I prefer functional programming over OOP",
+            ),
+        ],
+        "temporal": [
+            (
+                0.5816,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.5246,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_paradigms | Observation: The user prefers functional programming over OOP | Confidence: 0.8",
+            ),
+            (
+                0.5214,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+            (
+                0.4645,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: refactoring | Observation: The user always refactors to pure functions | Confidence: 0.8",
+            ),
+        ],
+    },
+    "Does the user prefer functional or object-oriented programming?": {
+        "semantic": [
+            (0.7718, "The user prefers functional programming over OOP"),
+            (
+                0.754,
+                "Subject: programming_paradigms | Type: preference | Observation: The user prefers functional programming over OOP | Quote: I prefer functional programming over OOP",
+            ),
+            (0.7454, "I prefer functional programming over OOP"),
+            (
+                0.6541,
+                "Subject: programming_philosophy | Type: belief | Observation: The user believes functional programming leads to better code quality | Quote: Functional programming produces more maintainable code",
+            ),
+        ],
+        "temporal": [
+            (
+                0.6188,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_paradigms | Observation: The user prefers functional programming over OOP | Confidence: 0.8",
+            ),
+            (
+                0.5902,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.5144,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+            (
+                0.4989,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: refactoring | Observation: The user always refactors to pure functions | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's beliefs about code quality?": {
+        "semantic": [
+            (0.6925, "The user believes code reviews are essential for quality"),
+            (
+                0.68,
+                "The user believes functional programming leads to better code quality",
+            ),
+            (
+                0.6524,
+                "Subject: code_quality | Type: belief | Observation: The user believes code reviews are essential for quality | Quote: Code reviews catch bugs that automated testing misses",
+            ),
+            (
+                0.6466,
+                "Subject: programming_philosophy | Type: belief | Observation: The user believes functional programming leads to better code quality | Quote: Functional programming produces more maintainable code",
+            ),
+        ],
+        "temporal": [
+            (
+                0.5544,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: code_quality | Observation: The user believes code reviews are essential for quality | Confidence: 0.8",
+            ),
+            (
+                0.5397,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.4931,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: testing_philosophy | Observation: The user believes unit tests are a waste of time for prototypes | Confidence: 0.8",
+            ),
+            (
+                0.4674,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+        ],
+    },
+    "How does the user approach debugging code?": {
+        "semantic": [
+            (
+                0.7011,
+                "Subject: debugging_approach | Type: behavior | Observation: The user debugs by adding print statements rather than using a debugger | Quote: When debugging, I just add console.log everywhere",
+            ),
+            (
+                0.6962,
+                "The user debugs by adding print statements rather than using a debugger",
+            ),
+            (0.6788, "When debugging, I just add console.log everywhere"),
+            (
+                0.5357,
+                "Subject: code_quality | Type: belief | Observation: The user believes code reviews are essential for quality | Quote: Code reviews catch bugs that automated testing misses",
+            ),
+        ],
+        "temporal": [
+            (
+                0.6252,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: debugging_approach | Observation: The user debugs by adding print statements rather than using a debugger | Confidence: 0.8",
+            ),
+            (
+                0.476,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+            (
+                0.4424,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: version_control_style | Observation: The user prefers small, focused commits over large feature branches | Confidence: 0.8",
+            ),
+            (
+                0.4402,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: testing_philosophy | Observation: The user believes unit tests are a waste of time for prototypes | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's git and version control habits?": {
+        "semantic": [
+            (
+                0.6474,
+                "Subject: version_control_style | Type: preference | Observation: The user prefers small, focused commits over large feature branches | Quote: I like to commit small, logical changes frequently",
+            ),
+            (0.6424, "I like to commit small, logical changes frequently"),
+            (
+                0.5961,
+                "The user prefers small, focused commits over large feature branches",
+            ),
+            (
+                0.5806,
+                "Subject: git_habits | Type: behavior | Observation: The user writes commit messages in present tense | Quote: Fix bug in parser instead of Fixed bug in parser",
+            ),
+        ],
+        "temporal": [
+            (
+                0.6174,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: version_control_style | Observation: The user prefers small, focused commits over large feature branches | Confidence: 0.8",
+            ),
+            (
+                0.5733,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: git_habits | Observation: The user writes commit messages in present tense | Confidence: 0.8",
+            ),
+            (
+                0.4848,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: editor_preference | Observation: The user prefers Vim over VS Code for editing | Confidence: 0.8",
+            ),
+            (
+                0.4604,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+        ],
+    },
+    "When does the user prefer to work?": {
+        "semantic": [
+            (0.6806, "The user prefers working late at night"),
+            (
+                0.6792,
+                "Subject: work_schedule | Type: behavior | Observation: The user prefers working late at night | Quote: I do my best coding between 10pm and 2am",
+            ),
+            (0.6439, "I do my best coding between 10pm and 2am"),
+            (0.5528, "I use 25-minute work intervals with 5-minute breaks"),
+        ],
+        "temporal": [
+            (
+                0.7023,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_schedule | Observation: The user prefers working late at night | Confidence: 0.8",
+            ),
+            (
+                0.6395,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.6375,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+            (
+                0.6254,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: collaboration_preference | Observation: The user prefers pair programming for complex problems | Confidence: 0.8",
+            ),
+        ],
+    },
+    "How does the user handle productivity and time management?": {
+        "semantic": [
+            (
+                0.579,
+                "Subject: productivity_methods | Type: behavior | Observation: The user takes breaks every 25 minutes using the Pomodoro technique | Quote: I use 25-minute work intervals with 5-minute breaks",
+            ),
+            (0.5731, "I use 25-minute work intervals with 5-minute breaks"),
+            (
+                0.5284,
+                "The user takes breaks every 25 minutes using the Pomodoro technique",
+            ),
+            (0.5153, "I do my best coding between 10pm and 2am"),
+        ],
+        "temporal": [
+            (
+                0.5705,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: productivity_methods | Observation: The user takes breaks every 25 minutes using the Pomodoro technique | Confidence: 0.8",
+            ),
+            (
+                0.5023,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+            (
+                0.4631,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_schedule | Observation: The user prefers working late at night | Confidence: 0.8",
+            ),
+            (
+                0.4626,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: documentation_habits | Observation: The user always writes documentation before implementing features | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What editor does the user prefer?": {
+        "semantic": [
+            (
+                0.6394,
+                "Subject: editor_preference | Type: preference | Observation: The user prefers Vim over VS Code for editing | Quote: Vim makes me more productive than any modern editor",
+            ),
+            (0.6241, "The user prefers Vim over VS Code for editing"),
+            (0.5528, "Vim makes me more productive than any modern editor"),
+            (0.4887, "The user claims to prefer tabs but their code uses spaces"),
+        ],
+        "temporal": [
+            (
+                0.5701,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: editor_preference | Observation: The user prefers Vim over VS Code for editing | Confidence: 0.8",
+            ),
+            (
+                0.4557,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+            (
+                0.4322,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.4283,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: database_preference | Observation: The user prefers PostgreSQL over MongoDB for most applications | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What databases does the user like to use?": {
+        "semantic": [
+            (
+                0.6328,
+                "Subject: database_preference | Type: preference | Observation: The user prefers PostgreSQL over MongoDB for most applications | Quote: Relational databases handle complex queries better than document stores",
+            ),
+            (0.5992, "The user prefers PostgreSQL over MongoDB for most applications"),
+            (
+                0.5352,
+                "Subject: domain_preference | Type: preference | Observation: The user prefers working on backend systems over frontend UI | Quote: I find backend logic more interesting than UI work",
+            ),
+            (0.5186, "The user prefers working on backend systems over frontend UI"),
+        ],
+        "temporal": [
+            (
+                0.5599,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: database_preference | Observation: The user prefers PostgreSQL over MongoDB for most applications | Confidence: 0.8",
+            ),
+            (
+                0.4617,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.4445,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.4365,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: editor_preference | Observation: The user prefers Vim over VS Code for editing | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What programming languages does the user work with?": {
+        "semantic": [
+            (0.7255, "The user primarily works with Python and JavaScript"),
+            (0.6954, "Most of my work is in Python backend and React frontend"),
+            (
+                0.6874,
+                "Subject: primary_languages | Type: general | Observation: The user primarily works with Python and JavaScript | Quote: Most of my work is in Python backend and React frontend",
+            ),
+            (0.6098, "I'm picking up Rust on weekends"),
+        ],
+        "temporal": [
+            (
+                0.5939,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.4679,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: experience_level | Observation: The user has 8 years of professional programming experience | Confidence: 0.8",
+            ),
+            (
+                0.4623,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: learning_activities | Observation: The user is currently learning Rust in their spare time | Confidence: 0.8",
+            ),
+            (
+                0.4514,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What is the user's programming experience level?": {
+        "semantic": [
+            (0.6664, "The user has 8 years of professional programming experience"),
+            (
+                0.6565,
+                "Subject: experience_level | Type: general | Observation: The user has 8 years of professional programming experience | Quote: I've been coding professionally for 8 years",
+            ),
+            (0.5949, "I've been coding professionally for 8 years"),
+            (0.5641, "The user is currently learning Rust in their spare time"),
+        ],
+        "temporal": [
+            (
+                0.5991,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: experience_level | Observation: The user has 8 years of professional programming experience | Confidence: 0.8",
+            ),
+            (
+                0.5041,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.4917,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.4817,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_paradigms | Observation: The user prefers functional programming over OOP | Confidence: 0.8",
+            ),
+        ],
+    },
+    "Where did the user study computer science?": {
+        "semantic": [
+            (0.6863, "I studied CS at Stanford"),
+            (0.649, "The user graduated with a Computer Science degree from Stanford"),
+            (
+                0.6344,
+                "Subject: education_background | Type: general | Observation: The user graduated with a Computer Science degree from Stanford | Quote: I studied CS at Stanford",
+            ),
+            (0.4592, "The user is currently learning Rust in their spare time"),
+        ],
+        "temporal": [
+            (
+                0.5455,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: education_background | Observation: The user graduated with a Computer Science degree from Stanford | Confidence: 0.8",
+            ),
+            (
+                0.3842,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: experience_level | Observation: The user has 8 years of professional programming experience | Confidence: 0.8",
+            ),
+            (
+                0.3792,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.3781,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: learning_activities | Observation: The user is currently learning Rust in their spare time | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What kind of company does the user work at?": {
+        "semantic": [
+            (0.6308, "The user works at a mid-size startup with 50 employees"),
+            (
+                0.5371,
+                "Subject: company_size | Type: general | Observation: The user works at a mid-size startup with 50 employees | Quote: Our company has about 50 people",
+            ),
+            (0.5253, "Most of my work is in Python backend and React frontend"),
+            (0.4902, "I've been coding professionally for 8 years"),
+        ],
+        "temporal": [
+            (
+                0.5309,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: company_size | Observation: The user works at a mid-size startup with 50 employees | Confidence: 0.8",
+            ),
+            (
+                0.4329,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+            (
+                0.4323,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: education_background | Observation: The user graduated with a Computer Science degree from Stanford | Confidence: 0.8",
+            ),
+            (
+                0.419,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_schedule | Observation: The user prefers working late at night | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What does the user think about AI replacing programmers?": {
+        "semantic": [
+            (
+                0.5965,
+                "Subject: ai_future | Type: belief | Observation: The user thinks AI will replace most software developers within 10 years | Quote: AI will make most programmers obsolete by 2035",
+            ),
+            (
+                0.572,
+                "The user thinks AI will replace most software developers within 10 years",
+            ),
+            (0.5715, "AI will make most programmers obsolete by 2035"),
+            (
+                0.4344,
+                "The user believes functional programming leads to better code quality",
+            ),
+        ],
+        "temporal": [
+            (
+                0.4629,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: ai_future | Observation: The user thinks AI will replace most software developers within 10 years | Confidence: 0.8",
+            ),
+            (
+                0.362,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.3308,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: testing_philosophy | Observation: The user believes unit tests are a waste of time for prototypes | Confidence: 0.8",
+            ),
+            (
+                0.328,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: typescript_opinion | Observation: The user now says they love TypeScript but previously called it verbose | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's views on artificial intelligence?": {
+        "semantic": [
+            (
+                0.5885,
+                "Subject: ai_future | Type: belief | Observation: The user thinks AI will replace most software developers within 10 years | Quote: AI will make most programmers obsolete by 2035",
+            ),
+            (
+                0.5661,
+                "The user thinks AI will replace most software developers within 10 years",
+            ),
+            (0.5133, "AI will make most programmers obsolete by 2035"),
+            (0.4927, "I find backend logic more interesting than UI work"),
+        ],
+        "temporal": [
+            (
+                0.5399,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: ai_future | Observation: The user thinks AI will replace most software developers within 10 years | Confidence: 0.8",
+            ),
+            (
+                0.4353,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.4223,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die. | Confidence: 0.8",
+            ),
+            (
+                0.4219,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+        ],
+    },
+    "Has the user changed their mind about TypeScript?": {
+        "semantic": [
+            (
+                0.6174,
+                "The user now says they love TypeScript but previously called it verbose",
+            ),
+            (
+                0.5757,
+                "Subject: typescript_opinion | Type: contradiction | Observation: The user now says they love TypeScript but previously called it verbose | Quote: TypeScript has too much boilerplate vs TypeScript makes my code so much cleaner",
+            ),
+            (
+                0.4924,
+                "TypeScript has too much boilerplate vs TypeScript makes my code so much cleaner",
+            ),
+            (0.4157, "The user always refactors to pure functions"),
+        ],
+        "temporal": [
+            (
+                0.5631,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: typescript_opinion | Observation: The user now says they love TypeScript but previously called it verbose | Confidence: 0.8",
+            ),
+            (
+                0.4016,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+            (
+                0.3827,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.3825,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: editor_preference | Observation: The user prefers Vim over VS Code for editing | Confidence: 0.8",
+            ),
+        ],
+    },
+    "Are there any contradictions in the user's preferences?": {
+        "semantic": [
+            (0.536, "The user claims to prefer tabs but their code uses spaces"),
+            (
+                0.5353,
+                "Subject: indentation_preference | Type: contradiction | Observation: The user claims to prefer tabs but their code uses spaces | Quote: Tabs are better than spaces vs code consistently uses 2-space indentation",
+            ),
+            (
+                0.5321,
+                "Subject: pure_functions | Type: contradiction | Observation: The user said pure functions are yucky | Quote: Pure functions are yucky",
+            ),
+            (
+                0.5058,
+                "Subject: typescript_opinion | Type: contradiction | Observation: The user now says they love TypeScript but previously called it verbose | Quote: TypeScript has too much boilerplate vs TypeScript makes my code so much cleaner",
+            ),
+        ],
+        "temporal": [
+            (
+                0.4763,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+            (
+                0.4693,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.4681,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+            (
+                0.4586,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: typescript_opinion | Observation: The user now says they love TypeScript but previously called it verbose | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What does the user think about software testing?": {
+        "semantic": [
+            (
+                0.6386,
+                "Subject: testing_philosophy | Type: belief | Observation: The user believes unit tests are a waste of time for prototypes | Quote: Writing tests for throwaway code slows development",
+            ),
+            (0.6222, "The user believes unit tests are a waste of time for prototypes"),
+            (
+                0.6152,
+                "Subject: code_quality | Type: belief | Observation: The user believes code reviews are essential for quality | Quote: Code reviews catch bugs that automated testing misses",
+            ),
+            (0.6036, "The user believes code reviews are essential for quality"),
+        ],
+        "temporal": [
+            (
+                0.5881,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: testing_philosophy | Observation: The user believes unit tests are a waste of time for prototypes | Confidence: 0.8",
+            ),
+            (
+                0.5074,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: code_quality | Observation: The user believes code reviews are essential for quality | Confidence: 0.8",
+            ),
+            (
+                0.4863,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.4748,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: debugging_approach | Observation: The user debugs by adding print statements rather than using a debugger | Confidence: 0.8",
+            ),
+        ],
+    },
+    "How does the user approach documentation?": {
+        "semantic": [
+            (
+                0.5966,
+                "Subject: documentation_habits | Type: behavior | Observation: The user always writes documentation before implementing features | Quote: I document the API design before writing any code",
+            ),
+            (
+                0.5473,
+                "The user always writes documentation before implementing features",
+            ),
+            (0.5207, "I document the API design before writing any code"),
+            (
+                0.4954,
+                "Subject: debugging_approach | Type: behavior | Observation: The user debugs by adding print statements rather than using a debugger | Quote: When debugging, I just add console.log everywhere",
+            ),
+        ],
+        "temporal": [
+            (
+                0.4988,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: documentation_habits | Observation: The user always writes documentation before implementing features | Confidence: 0.8",
+            ),
+            (
+                0.4335,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: indentation_preference | Observation: The user claims to prefer tabs but their code uses spaces | Confidence: 0.8",
+            ),
+            (
+                0.4316,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: debugging_approach | Observation: The user debugs by adding print statements rather than using a debugger | Confidence: 0.8",
+            ),
+            (
+                0.4307,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's collaboration preferences?": {
+        "semantic": [
+            (
+                0.651,
+                "Subject: collaboration_preference | Type: preference | Observation: The user prefers pair programming for complex problems | Quote: Two heads are better than one when solving hard problems",
+            ),
+            (0.5848, "The user prefers pair programming for complex problems"),
+            (
+                0.5355,
+                "Subject: version_control_style | Type: preference | Observation: The user prefers small, focused commits over large feature branches | Quote: I like to commit small, logical changes frequently",
+            ),
+            (
+                0.5216,
+                "Subject: domain_preference | Type: preference | Observation: The user prefers working on backend systems over frontend UI | Quote: I find backend logic more interesting than UI work",
+            ),
+        ],
+        "temporal": [
+            (
+                0.6027,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: collaboration_preference | Observation: The user prefers pair programming for complex problems | Confidence: 0.8",
+            ),
+            (
+                0.5101,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: version_control_style | Observation: The user prefers small, focused commits over large feature branches | Confidence: 0.8",
+            ),
+            (
+                0.482,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.4782,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What does the user think about remote work?": {
+        "semantic": [
+            (0.7063, "The user thinks remote work is more productive than office work"),
+            (
+                0.6583,
+                "Subject: work_environment | Type: belief | Observation: The user thinks remote work is more productive than office work | Quote: I get more done working from home",
+            ),
+            (0.6032, "I get more done working from home"),
+            (0.4997, "The user prefers working on backend systems over frontend UI"),
+        ],
+        "temporal": [
+            (
+                0.5934,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+            (
+                0.4173,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_schedule | Observation: The user prefers working late at night | Confidence: 0.8",
+            ),
+            (
+                0.4148,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: collaboration_preference | Observation: The user prefers pair programming for complex problems | Confidence: 0.8",
+            ),
+            (
+                0.4121,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: testing_philosophy | Observation: The user believes unit tests are a waste of time for prototypes | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's productivity methods?": {
+        "semantic": [
+            (
+                0.5723,
+                "Subject: productivity_methods | Type: behavior | Observation: The user takes breaks every 25 minutes using the Pomodoro technique | Quote: I use 25-minute work intervals with 5-minute breaks",
+            ),
+            (
+                0.5261,
+                "The user takes breaks every 25 minutes using the Pomodoro technique",
+            ),
+            (0.5205, "I use 25-minute work intervals with 5-minute breaks"),
+            (0.5107, "The user thinks remote work is more productive than office work"),
+        ],
+        "temporal": [
+            (
+                0.5427,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: productivity_methods | Observation: The user takes breaks every 25 minutes using the Pomodoro technique | Confidence: 0.8",
+            ),
+            (
+                0.4743,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: work_environment | Observation: The user thinks remote work is more productive than office work | Confidence: 0.8",
+            ),
+            (
+                0.4299,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: collaboration_preference | Observation: The user prefers pair programming for complex problems | Confidence: 0.8",
+            ),
+            (
+                0.4227,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: version_control_style | Observation: The user prefers small, focused commits over large feature branches | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What technical skills is the user learning?": {
+        "semantic": [
+            (0.5765, "The user is currently learning Rust in their spare time"),
+            (
+                0.5502,
+                "Subject: learning_activities | Type: general | Observation: The user is currently learning Rust in their spare time | Quote: I'm picking up Rust on weekends",
+            ),
+            (0.5411, "I'm picking up Rust on weekends"),
+            (0.5155, "The user primarily works with Python and JavaScript"),
+        ],
+        "temporal": [
+            (
+                0.5301,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: learning_activities | Observation: The user is currently learning Rust in their spare time | Confidence: 0.8",
+            ),
+            (
+                0.4913,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.481,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: experience_level | Observation: The user has 8 years of professional programming experience | Confidence: 0.8",
+            ),
+            (
+                0.4558,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: education_background | Observation: The user graduated with a Computer Science degree from Stanford | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What does the user think about cooking?": {
+        "semantic": [
+            (0.4888, "I find backend logic more interesting than UI work"),
+            (0.4624, "The user prefers working on backend systems over frontend UI"),
+            (
+                0.4551,
+                "The user believes functional programming leads to better code quality",
+            ),
+            (0.4547, "The user said pure functions are yucky"),
+        ],
+        "temporal": [
+            (
+                0.3812,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: pure_functions | Observation: The user said pure functions are yucky | Confidence: 0.8",
+            ),
+            (
+                0.3773,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_philosophy | Observation: The user believes functional programming leads to better code quality | Confidence: 0.8",
+            ),
+            (
+                0.3686,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: typescript_opinion | Observation: The user now says they love TypeScript but previously called it verbose | Confidence: 0.8",
+            ),
+            (
+                0.3649,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What are the user's travel preferences?": {
+        "semantic": [
+            (
+                0.522,
+                "Subject: domain_preference | Type: preference | Observation: The user prefers working on backend systems over frontend UI | Quote: I find backend logic more interesting than UI work",
+            ),
+            (0.5145, "The user prefers functional programming over OOP"),
+            (0.5079, "The user prefers working on backend systems over frontend UI"),
+            (0.5045, "The user prefers working late at night"),
+        ],
+        "temporal": [
+            (
+                0.4849,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: domain_preference | Observation: The user prefers working on backend systems over frontend UI | Confidence: 0.8",
+            ),
+            (
+                0.4779,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: database_preference | Observation: The user prefers PostgreSQL over MongoDB for most applications | Confidence: 0.8",
+            ),
+            (
+                0.4659,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: collaboration_preference | Observation: The user prefers pair programming for complex problems | Confidence: 0.8",
+            ),
+            (
+                0.4639,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_paradigms | Observation: The user prefers functional programming over OOP | Confidence: 0.8",
+            ),
+        ],
+    },
+    "What music does the user like?": {
+        "semantic": [
+            (
+                0.4927,
+                "Subject: domain_preference | Type: preference | Observation: The user prefers working on backend systems over frontend UI | Quote: I find backend logic more interesting than UI work",
+            ),
+            (0.4906, "The user prefers working late at night"),
+            (0.4904, "The user prefers functional programming over OOP"),
+            (0.4894, "The user primarily works with Python and JavaScript"),
+        ],
+        "temporal": [
+            (
+                0.4674,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: typescript_opinion | Observation: The user now says they love TypeScript but previously called it verbose | Confidence: 0.8",
+            ),
+            (
+                0.4548,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: primary_languages | Observation: The user primarily works with Python and JavaScript | Confidence: 0.8",
+            ),
+            (
+                0.4518,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: programming_paradigms | Observation: The user prefers functional programming over OOP | Confidence: 0.8",
+            ),
+            (
+                0.4496,
+                "Time: 12:00 on Wednesday (afternoon) | Subject: editor_preference | Observation: The user prefers Vim over VS Code for editing | Confidence: 0.8",
+            ),
+        ],
+    },
+}
+
+
+def test_real_observation_embeddings(real_voyage_client, qdrant):
+    beliefs = [
+        ("The user thinks that all men must die.", "All humans are mortal.", "humans"),
+        (
+            "The user believes functional programming leads to better code quality",
+            "Functional programming produces more maintainable code",
+            "programming_philosophy",
+        ),
+        (
+            "The user thinks AI will replace most software developers within 10 years",
+            "AI will make most programmers obsolete by 2035",
+            "ai_future",
+        ),
+        (
+            "The user believes code reviews are essential for quality",
+            "Code reviews catch bugs that automated testing misses",
+            "code_quality",
+        ),
+        (
+            "The user thinks remote work is more productive than office work",
+            "I get more done working from home",
+            "work_environment",
+        ),
+        (
+            "The user believes unit tests are a waste of time for prototypes",
+            "Writing tests for throwaway code slows development",
+            "testing_philosophy",
+        ),
+    ]
+
+    behaviors = [
+        (
+            "The user always refactors to pure functions",
+            "I always refactor to pure functions",
+            "refactoring",
+        ),
+        (
+            "The user writes commit messages in present tense",
+            "Fix bug in parser instead of Fixed bug in parser",
+            "git_habits",
+        ),
+        (
+            "The user prefers working late at night",
+            "I do my best coding between 10pm and 2am",
+            "work_schedule",
+        ),
+        (
+            "The user always writes documentation before implementing features",
+            "I document the API design before writing any code",
+            "documentation_habits",
+        ),
+        (
+            "The user debugs by adding print statements rather than using a debugger",
+            "When debugging, I just add console.log everywhere",
+            "debugging_approach",
+        ),
+        (
+            "The user takes breaks every 25 minutes using the Pomodoro technique",
+            "I use 25-minute work intervals with 5-minute breaks",
+            "productivity_methods",
+        ),
+    ]
+
+    contradictions = [
+        (
+            "The user said pure functions are yucky",
+            "Pure functions are yucky",
+            "pure_functions",
+        ),
+        (
+            "The user now says they love TypeScript but previously called it verbose",
+            "TypeScript has too much boilerplate vs TypeScript makes my code so much cleaner",
+            "typescript_opinion",
+        ),
+        (
+            "The user claims to prefer tabs but their code uses spaces",
+            "Tabs are better than spaces vs code consistently uses 2-space indentation",
+            "indentation_preference",
+        ),
+    ]
+
+    preferences = [
+        (
+            "The user prefers functional programming over OOP",
+            "I prefer functional programming over OOP",
+            "programming_paradigms",
+        ),
+        (
+            "The user prefers Vim over VS Code for editing",
+            "Vim makes me more productive than any modern editor",
+            "editor_preference",
+        ),
+        (
+            "The user prefers working on backend systems over frontend UI",
+            "I find backend logic more interesting than UI work",
+            "domain_preference",
+        ),
+        (
+            "The user prefers small, focused commits over large feature branches",
+            "I like to commit small, logical changes frequently",
+            "version_control_style",
+        ),
+        (
+            "The user prefers PostgreSQL over MongoDB for most applications",
+            "Relational databases handle complex queries better than document stores",
+            "database_preference",
+        ),
+        (
+            "The user prefers pair programming for complex problems",
+            "Two heads are better than one when solving hard problems",
+            "collaboration_preference",
+        ),
+    ]
+
+    general = [
+        ("The user is a human", "The user is a human", "humans"),
+        (
+            "The user has 8 years of professional programming experience",
+            "I've been coding professionally for 8 years",
+            "experience_level",
+        ),
+        (
+            "The user primarily works with Python and JavaScript",
+            "Most of my work is in Python backend and React frontend",
+            "primary_languages",
+        ),
+        (
+            "The user works at a mid-size startup with 50 employees",
+            "Our company has about 50 people",
+            "company_size",
+        ),
+        (
+            "The user graduated with a Computer Science degree from Stanford",
+            "I studied CS at Stanford",
+            "education_background",
+        ),
+        (
+            "The user is currently learning Rust in their spare time",
+            "I'm picking up Rust on weekends",
+            "learning_activities",
+        ),
+    ]
+
+    ids = itertools.count(1)
+    items = [
+        AgentObservation(
+            id=next(ids),
+            content=content,
+            mime_type="text/html",
+            modality="observation",
+            sha256=hashlib.sha256(content.encode("utf-8")).hexdigest(),
+            size=len(content),
+            tags=["bla"],
+            observation_type=observation_type,
+            subject=subject,
+            confidence=0.8,
+            evidence={
+                "quote": quote,
+                "source": "https://en.wikipedia.org/wiki/Human",
+            },
+            agent_model="gpt-4o",
+            inserted_at=datetime(2025, 1, 1, 12, 0, 0),
+            embed_status="QUEUED",
+        )
+        for observation_type, observations in [
+            ("belief", beliefs),
+            ("behavior", behaviors),
+            ("contradiction", contradictions),
+            ("preference", preferences),
+            ("general", general),
+        ]
+        for content, quote, subject in observations
+    ]
+
+    for item in items:
+        embed_source_item(item)
+    push_to_qdrant(items)
+
+    chunk_map = {str(c.id): c for item in items for c in item.chunks}
+
+    def get_top(vector, search_type: str) -> list[tuple[float, str]]:
+        results = qdrant_tools.search_vectors(qdrant, search_type, vector)
+        return [
+            (round(i.score, 4), chunk_map[str(i.id)].content)
+            for i in sorted(results, key=lambda x: x.score, reverse=True)
+        ][:4]
+
+    for query, expected in EXPECTED_OBSERVATION_RESULTS.items():
+        search_vector = embed_text(
+            [extract.DataChunk(data=[query])], input_type="query"
+        )[0]
+        semantic_results = get_top(search_vector, "semantic")
+        temporal_results = get_top(search_vector, "temporal")
+        assert semantic_results == expected["semantic"]
+        assert temporal_results == expected["temporal"]
diff --git a/tests/memory/common/db/models/test_source_item.py b/tests/memory/common/db/models/test_source_item.py
index 5e12a6c..3759ce2 100644
--- a/tests/memory/common/db/models/test_source_item.py
+++ b/tests/memory/common/db/models/test_source_item.py
@@ -1,23 +1,17 @@
 from sqlalchemy.orm import Session
-from unittest.mock import patch, Mock
+from unittest.mock import patch
 from typing import cast
 import pytest
 from PIL import Image
-from datetime import datetime
 from memory.common import settings, chunker, extract
-from memory.common.db.models.sources import Book
 from memory.common.db.models.source_items import (
     Chunk,
     MailMessage,
-    EmailAttachment,
-    BookSection,
-    BlogPost,
 )
 from memory.common.db.models.source_item import (
     SourceItem,
     image_filenames,
     add_pics,
-    merge_metadata,
     clean_filename,
 )
 
@@ -56,114 +50,6 @@ def test_clean_filename(input_filename, expected):
     assert clean_filename(input_filename) == expected
 
 
-@pytest.mark.parametrize(
-    "dicts,expected",
-    [
-        # Empty input
-        ([], {}),
-        # Single dict without tags
-        ([{"key": "value"}], {"key": "value"}),
-        # Single dict with tags as list
-        (
-            [{"key": "value", "tags": ["tag1", "tag2"]}],
-            {"key": "value", "tags": {"tag1", "tag2"}},
-        ),
-        # Single dict with tags as set
-        (
-            [{"key": "value", "tags": {"tag1", "tag2"}}],
-            {"key": "value", "tags": {"tag1", "tag2"}},
-        ),
-        # Multiple dicts without tags
-        (
-            [{"key1": "value1"}, {"key2": "value2"}],
-            {"key1": "value1", "key2": "value2"},
-        ),
-        # Multiple dicts with non-overlapping tags
-        (
-            [
-                {"key1": "value1", "tags": ["tag1"]},
-                {"key2": "value2", "tags": ["tag2"]},
-            ],
-            {"key1": "value1", "key2": "value2", "tags": {"tag1", "tag2"}},
-        ),
-        # Multiple dicts with overlapping tags
-        (
-            [
-                {"key1": "value1", "tags": ["tag1", "tag2"]},
-                {"key2": "value2", "tags": ["tag2", "tag3"]},
-            ],
-            {"key1": "value1", "key2": "value2", "tags": {"tag1", "tag2", "tag3"}},
-        ),
-        # Overlapping keys - later dict wins
-        (
-            [
-                {"key": "value1", "other": "data1"},
-                {"key": "value2", "another": "data2"},
-            ],
-            {"key": "value2", "other": "data1", "another": "data2"},
-        ),
-        # Mixed tags types (list and set)
-        (
-            [
-                {"key1": "value1", "tags": ["tag1", "tag2"]},
-                {"key2": "value2", "tags": {"tag3", "tag4"}},
-            ],
-            {
-                "key1": "value1",
-                "key2": "value2",
-                "tags": {"tag1", "tag2", "tag3", "tag4"},
-            },
-        ),
-        # Empty tags
-        (
-            [{"key": "value", "tags": []}, {"key2": "value2", "tags": []}],
-            {"key": "value", "key2": "value2"},
-        ),
-        # None values
-        (
-            [{"key1": None, "key2": "value"}, {"key3": None}],
-            {"key1": None, "key2": "value", "key3": None},
-        ),
-        # Complex nested structures
-        (
-            [
-                {"nested": {"inner": "value1"}, "list": [1, 2, 3], "tags": ["tag1"]},
-                {"nested": {"inner": "value2"}, "list": [4, 5], "tags": ["tag2"]},
-            ],
-            {"nested": {"inner": "value2"}, "list": [4, 5], "tags": {"tag1", "tag2"}},
-        ),
-        # Boolean and numeric values
-        (
-            [
-                {"bool": True, "int": 42, "float": 3.14, "tags": ["numeric"]},
-                {"bool": False, "int": 100},
-            ],
-            {"bool": False, "int": 100, "float": 3.14, "tags": {"numeric"}},
-        ),
-        # Three or more dicts
-        (
-            [
-                {"a": 1, "tags": ["t1"]},
-                {"b": 2, "tags": ["t2", "t3"]},
-                {"c": 3, "a": 10, "tags": ["t3", "t4"]},
-            ],
-            {"a": 10, "b": 2, "c": 3, "tags": {"t1", "t2", "t3", "t4"}},
-        ),
-        # Dict with only tags
-        ([{"tags": ["tag1", "tag2"]}], {"tags": {"tag1", "tag2"}}),
-        # Empty dicts
-        ([{}, {}], {}),
-        # Mix of empty and non-empty dicts
-        (
-            [{}, {"key": "value", "tags": ["tag"]}, {}],
-            {"key": "value", "tags": {"tag"}},
-        ),
-    ],
-)
-def test_merge_metadata(dicts, expected):
-    assert merge_metadata(*dicts) == expected
-
-
 def test_image_filenames_with_existing_filenames(tmp_path):
     """Test image_filenames when images already have filenames"""
     chunk_id = "test_chunk_123"
diff --git a/tests/memory/common/db/models/test_source_item_embeddings.py b/tests/memory/common/db/models/test_source_item_embeddings.py
new file mode 100644
index 0000000..4fa92dd
--- /dev/null
+++ b/tests/memory/common/db/models/test_source_item_embeddings.py
@@ -0,0 +1,626 @@
+import hashlib
+from datetime import datetime
+from typing import Sequence, cast
+from unittest.mock import ANY, Mock, call
+
+import pymupdf  # PyMuPDF
+import pytest
+
+from memory.common import settings
+from memory.common.db.models.source_item import Chunk, SourceItem
+from memory.common.db.models.source_items import (
+    AgentObservation,
+    BlogPost,
+    BookSection,
+    Comic,
+    EmailAttachment,
+    ForumPost,
+    MailMessage,
+)
+from memory.common.db.models.sources import Book
+from memory.common.embedding import embed_source_item
+from memory.common.extract import page_to_image
+from tests.data.contents import (
+    CHUNKS,
+    LANG_TIMELINE_HASH,
+    SAMPLE_MARKDOWN,
+    SAMPLE_TEXT,
+    image_hash,
+)
+
+
+def compare_chunks(
+    chunks: Sequence[Chunk],
+    expected: Sequence[tuple[str | None, list[str], dict]],
+):
+    data = [
+        (c.content, [image_hash(i) for i in c.images], c.item_metadata) for c in chunks
+    ]
+    assert data == expected
+
+
+def test_base_source_item_text_embeddings(mock_voyage_client):
+    item = SourceItem(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (CHUNKS[0].strip(), cast(list[str], []), metadata),
+        (CHUNKS[1].strip(), cast(list[str], []), metadata),
+        ("test summary", [], metadata | {"tags": {"tag1", "tag2", "bla"}}),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+
+
+def test_base_source_item_mixed_embeddings(mock_voyage_client):
+    item = SourceItem(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        filename=DATA_DIR / "lang_timeline.png",
+        mime_type="image/png",
+        modality="photo",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (CHUNKS[0].strip(), [], metadata),
+        (CHUNKS[1].strip(), [], metadata),
+        ("test summary", [], metadata | {"tags": {"tag1", "tag2", "bla"}}),
+        (None, [LANG_TIMELINE_HASH], {"size": 3465, "source_id": 1, "tags": {"bla"}}),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.embed.call_args == call(
+        [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [[ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert [
+        image_hash(i) for i in mock_voyage_client.multimodal_embed.call_args[0][0][0]
+    ] == [LANG_TIMELINE_HASH]
+
+
+def test_mail_message_embeddings(mock_voyage_client):
+    item = MailMessage(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        message_id="123",
+        subject="Test Subject",
+        sender="test@example.com",
+        recipients=["test@example.com"],
+        folder="INBOX",
+        sent_at=datetime(2025, 1, 1, 12, 0, 0),
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla", "test@example.com"}
+    expected = [
+        (CHUNKS[0].strip(), [], metadata),
+        (CHUNKS[1].strip(), [], metadata),
+        (
+            "test summary",
+            [],
+            metadata | {"tags": {"tag1", "tag2", "bla", "test@example.com"}},
+        ),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+
+
+def test_email_attachment_embeddings_text(mock_voyage_client):
+    item = EmailAttachment(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (CHUNKS[0].strip(), cast(list[str], []), metadata),
+        (CHUNKS[1].strip(), cast(list[str], []), metadata),
+        (
+            "test summary",
+            [],
+            metadata | {"tags": {"tag1", "tag2", "bla"}},
+        ),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+
+
+def test_email_attachment_embeddings_photo(mock_voyage_client):
+    item = EmailAttachment(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        filename=DATA_DIR / "lang_timeline.png",
+        mime_type="image/png",
+        modality="photo",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (None, [LANG_TIMELINE_HASH], metadata),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 0
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [[ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert [
+        image_hash(i) for i in mock_voyage_client.multimodal_embed.call_args[0][0][0]
+    ] == [LANG_TIMELINE_HASH]
+
+
+def test_email_attachment_embeddings_pdf(mock_voyage_client):
+    item = EmailAttachment(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        filename=DATA_DIR / "regulamin.pdf",
+        mime_type="application/pdf",
+        modality="doc",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    with pymupdf.open(item.filename) as pdf:
+        expected = [
+            (
+                None,
+                [image_hash(page_to_image(page))],
+                metadata
+                | {
+                    "page": page.number,
+                    "width": page.rect.width,
+                    "height": page.rect.height,
+                },
+            )
+            for page in pdf.pages()
+        ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 0
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [[ANY], [ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert [
+        [image_hash(a) for a in i]
+        for i in mock_voyage_client.multimodal_embed.call_args[0][0]
+    ] == [page for _, page, _ in expected]
+
+
+def test_email_attachment_embeddings_comic(mock_voyage_client):
+    item = Comic(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        filename=DATA_DIR / "lang_timeline.png",
+        mime_type="image/png",
+        modality="comic",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        title="The Evolution of Programming Languages",
+        author="John Doe",
+        published=datetime(2025, 1, 1, 12, 0, 0),
+        volume="1",
+        issue="1",
+        page=1,
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (
+            "The Evolution of Programming Languages by John Doe",
+            [LANG_TIMELINE_HASH],
+            metadata,
+        ),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 0
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [["The Evolution of Programming Languages by John Doe", ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert (
+        image_hash(mock_voyage_client.multimodal_embed.call_args[0][0][0][1])
+        == LANG_TIMELINE_HASH
+    )
+
+
+def test_book_section_embeddings_single_page(mock_voyage_client):
+    item = BookSection(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        book_id=1,
+        section_title="The Evolution of Programming Languages",
+        section_number=1,
+        section_level=1,
+        start_page=1,
+        end_page=1,
+        pages=[SAMPLE_TEXT],
+        book=Book(
+            id=1,
+            title="Programming Languages",
+            author="John Doe",
+            published=datetime(2025, 1, 1, 12, 0, 0),
+        ),
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (CHUNKS[0].strip(), cast(list[str], []), metadata | {"type": "page"}),
+        (CHUNKS[1].strip(), cast(list[str], []), metadata | {"type": "page"}),
+        (
+            "test summary",
+            [],
+            metadata | {"tags": {"tag1", "tag2", "bla"}, "type": "summary"},
+        ),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [CHUNKS[0].strip(), CHUNKS[1].strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+
+
+def test_book_section_embeddings_multiple_pages(mock_voyage_client):
+    item = BookSection(
+        id=1,
+        content=SAMPLE_MARKDOWN + "\n\n" + SECOND_PAGE,
+        mime_type="text/html",
+        modality="text",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        book_id=1,
+        section_title="The Evolution of Programming Languages",
+        section_number=1,
+        section_level=1,
+        start_page=1,
+        end_page=2,
+        pages=[SAMPLE_TEXT, SECOND_PAGE_TEXT],
+        book=Book(
+            id=1,
+            title="Programming Languages",
+            author="John Doe",
+            published=datetime(2025, 1, 1, 12, 0, 0),
+        ),
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla", "tag1", "tag2"}
+    expected = [
+        (item.content.strip(), cast(list[str], []), metadata | {"type": "section"}),
+        ("test summary", [], metadata | {"type": "summary"}),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [item.content.strip(), "test summary"],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+
+
+@pytest.mark.parametrize(
+    "class_, modality",
+    (
+        (BlogPost, "blog"),
+        (ForumPost, "forum"),
+    ),
+)
+def test_post_embeddings_single_page(mock_voyage_client, class_, modality):
+    item = class_(
+        id=1,
+        content=SAMPLE_MARKDOWN,
+        mime_type="text/html",
+        modality=modality,
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        images=[LANG_TIMELINE.filename, CODE_COMPLEXITY.filename],  # type: ignore
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla", "tag1", "tag2"}
+    expected = [
+        (item.content.strip(), [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH], metadata),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert not mock_voyage_client.embed.call_count
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [[item.content.strip(), ANY, ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert [
+        image_hash(i)
+        for i in mock_voyage_client.multimodal_embed.call_args[0][0][0][1:]
+    ] == [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH]
+
+
+@pytest.mark.parametrize(
+    "class_, modality",
+    (
+        (BlogPost, "blog"),
+        (ForumPost, "forum"),
+    ),
+)
+def test_post_embeddings_multi_page(mock_voyage_client, class_, modality):
+    item = class_(
+        id=1,
+        content=SAMPLE_MARKDOWN + "\n\n" + SECOND_PAGE_MARKDOWN,
+        mime_type="text/html",
+        modality=modality,
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN + SECOND_PAGE_MARKDOWN),
+        tags=["bla"],
+        images=[LANG_TIMELINE.filename, CODE_COMPLEXITY.filename],  # type: ignore
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla", "tag1", "tag2"}
+
+    all_contents = (
+        item.content.strip(),
+        [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH],
+        metadata,
+    )
+    first_chunk = (
+        TWO_PAGE_CHUNKS[0].strip(),
+        [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH],
+        metadata,
+    )
+    second_chunk = (TWO_PAGE_CHUNKS[1].strip(), [], metadata)
+    third_chunk = (TWO_PAGE_CHUNKS[2].strip(), [], metadata)
+    summary = ("test summary", [], metadata)
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(
+        item.data_chunks(),
+        [all_contents, first_chunk, second_chunk, third_chunk, summary],
+    )
+    # embed_source_item first does text embedding, then mixed embedding
+    # so the order of chunks is different than in data_chunks()
+    compare_chunks(
+        embed_source_item(item),
+        [
+            second_chunk,
+            third_chunk,
+            summary,
+            all_contents,
+            first_chunk,
+        ],
+    )
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert mock_voyage_client.multimodal_embed.call_count == 1
+
+    assert mock_voyage_client.embed.call_args == call(
+        [
+            TWO_PAGE_CHUNKS[1].strip(),
+            TWO_PAGE_CHUNKS[2].strip(),
+            "test summary",
+        ],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert mock_voyage_client.multimodal_embed.call_args == call(
+        [[item.content.strip(), ANY, ANY], [TWO_PAGE_CHUNKS[0].strip(), ANY, ANY]],
+        model=settings.MIXED_EMBEDDING_MODEL,
+        input_type="document",
+    )
+    assert [
+        image_hash(i)
+        for i in mock_voyage_client.multimodal_embed.call_args[0][0][0][1:]
+    ] == [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH]
+    assert [
+        image_hash(i)
+        for i in mock_voyage_client.multimodal_embed.call_args[0][0][1][1:]
+    ] == [LANG_TIMELINE_HASH, CODE_COMPLEXITY_HASH]
+
+
+def test_agent_observation_embeddings(mock_voyage_client):
+    item = AgentObservation(
+        id=1,
+        content="The user thinks that all men must die.",
+        mime_type="text/html",
+        modality="observation",
+        sha256=hashlib.sha256(SAMPLE_MARKDOWN.encode("utf-8")).hexdigest(),
+        size=len(SAMPLE_MARKDOWN),
+        tags=["bla"],
+        observation_type="belief",
+        subject="humans",
+        confidence=0.8,
+        evidence={
+            "quote": "All humans are mortal.",
+            "source": "https://en.wikipedia.org/wiki/Human",
+        },
+        agent_model="gpt-4o",
+        inserted_at=datetime(2025, 1, 1, 12, 0, 0),
+    )
+    metadata = item.as_payload()
+    metadata["tags"] = {"bla"}
+    expected = [
+        (
+            "Subject: humans | Type: belief | Observation: The user thinks that all men must die. | Quote: All humans are mortal.",
+            [],
+            metadata | {"embedding_type": "semantic"},
+        ),
+        (
+            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die. | Confidence: 0.8",
+            [],
+            metadata | {"embedding_type": "temporal"},
+        ),
+        (
+            "The user thinks that all men must die.",
+            [],
+            metadata | {"embedding_type": "semantic"},
+        ),
+        ("All humans are mortal.", [], metadata | {"embedding_type": "semantic"}),
+    ]
+
+    mock_voyage_client.embed = Mock(return_value=Mock(embeddings=[[0.1] * 1024] * 3))
+    mock_voyage_client.multimodal_embed = Mock(
+        return_value=Mock(embeddings=[[0.1] * 1024] * 3)
+    )
+    compare_chunks(item.data_chunks(), expected)
+    compare_chunks(embed_source_item(item), expected)
+
+    assert mock_voyage_client.embed.call_count == 1
+    assert not mock_voyage_client.multimodal_embed.call_count
+
+    assert mock_voyage_client.embed.call_args == call(
+        [
+            "Subject: humans | Type: belief | Observation: The user thinks that all men must die. | Quote: All humans are mortal.",
+            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die. | Confidence: 0.8",
+            "The user thinks that all men must die.",
+            "All humans are mortal.",
+        ],
+        model=settings.TEXT_EMBEDDING_MODEL,
+        input_type="document",
+    )
diff --git a/tests/memory/common/db/models/test_source_items.py b/tests/memory/common/db/models/test_source_items.py
index 1a1bf55..aacd0c9 100644
--- a/tests/memory/common/db/models/test_source_items.py
+++ b/tests/memory/common/db/models/test_source_items.py
@@ -14,7 +14,6 @@ from memory.common.db.models.source_items import (
     BlogPost,
     AgentObservation,
 )
-from memory.common.db.models.source_item import merge_metadata
 
 
 @pytest.fixture
@@ -356,7 +355,8 @@ def test_book_section_data_chunks(pages, expected_chunks):
 
     chunks = book_section.data_chunks()
     expected = [
-        (c, merge_metadata(book_section.as_payload(), m)) for c, m in expected_chunks
+        (c, extract.merge_metadata(book_section.as_payload(), m))
+        for c, m in expected_chunks
     ]
     assert [(c.content, c.item_metadata) for c in chunks] == expected
     for c in chunks: