2025-12-17 01:21:17 +01:00
8 changed files with 22 additions and 115 deletions
--- a/src/memory/api/MCP/books.py
+++ b/src/memory/api/MCP/books.py
@ -1,61 +0,0 @@
 import logging
 from sqlalchemy.orm import joinedload
 from memory.api.MCP.tools import mcp
 from memory.common.db.connection import make_session
 from memory.common.db.models import Book, BookSection, BookSectionPayload
 logger = logging.getLogger(__name__)
@mcp.tool()
 async def all_books(sections: bool = False) -> list[dict]:
    """
    Get all books in the database.
    If sections is True, the response will include the sections for each book.
    Args:
        sections: Whether to include sections in the response. Defaults to False.
    Returns:
        List of books in the database.
    """
    options = []
    if sections:
        options = [joinedload(Book.sections)]
    with make_session() as session:
        books = session.query(Book).options(*options).all()
        return [book.as_payload(sections=sections) for book in books]
@mcp.tool()
 def read_book(book_id: int, sections: list[int] = []) -> list[BookSectionPayload]:
    """
    Read a book from the database.
    If sections is provided, only the sections with the given IDs will be returned.
    Args:
        book_id: The ID of the book to read.
        sections: The IDs of the sections to read. Defaults to all sections.
    Returns:
        List of sections in the book, with contents. In the case of nested sections, only the top-level sections are returned.
    """
    with make_session() as session:
        book_sections = session.query(BookSection).filter(
            BookSection.book_id == book_id
        )
        if sections:
            book_sections = book_sections.filter(BookSection.id.in_(sections))
        all_sections = book_sections.all()
        parents = [section.parent_section_id for section in all_sections]
        return [
            section.as_payload()
            for section in all_sections
            if section.id not in parents
        ]
--- a/src/memory/api/MCP/memory.py
+++ b/src/memory/api/MCP/memory.py
@ -180,7 +180,6 @@ async def observe(
        session_id: UUID to group observations from same conversation
        agent_model: AI model making observations (for quality tracking)
    """
    logger.info("MCP: Observing")
    tasks = [
        (
            observation,
@ -238,7 +237,6 @@ async def search_observations(
    Returns: List with content, tags, created_at, metadata
    Results sorted by relevance to your query.
    """
    logger.info("MCP: Searching observations for %s", query)
    semantic_text = observation.generate_semantic_text(
        subject=subject or "",
        observation_type="".join(observation_types or []),
@ -299,7 +297,6 @@ async def create_note(
        confidences: Dict of scores (0.0-1.0), e.g. {"observation_accuracy": 0.9}
        tags: Organization tags for filtering and discovery
    """
    logger.info("MCP: creating note: %s", subject)
    if filename:
        path = pathlib.Path(filename)
        if not path.is_absolute():
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@ -108,3 +108,10 @@ async def get_authenticated_user() -> dict:
        "client_id": access_token.client_id,
        "user": user_info,
    }
@mcp.tool()
 async def send_response(response: str) -> dict:
    """Send a response to the user."""
    logger.info(f"Sending response: {response}")
    return {"response": response}
--- a/src/memory/common/db/models/source_item.py
+++ b/src/memory/common/db/models/source_item.py
@ -28,7 +28,7 @@ from sqlalchemy.dialects.postgresql import BYTEA
 from sqlalchemy.orm import Session, relationship
 from sqlalchemy.types import Numeric
-from memory.common import settings, tokens
+from memory.common import settings
 import memory.common.extract as extract
 import memory.common.collections as collections
 import memory.common.chunker as chunker
@ -125,7 +125,8 @@ def chunk_mixed(content: str, image_paths: Sequence[str]) -> list[extract.DataCh
    )
    chunks: list[extract.DataChunk] = [full_text]
-    if tokens.approx_token_count(content) > chunker.DEFAULT_CHUNK_TOKENS * 2:
+    tokens = chunker.approx_token_count(content)
    if tokens > chunker.DEFAULT_CHUNK_TOKENS * 2:
        chunks += [
            extract.DataChunk(data=add_pics(c, images), metadata={"tags": tags})
            for c in chunker.chunk_text(content)
--- a/src/memory/common/db/models/sources.py
+++ b/src/memory/common/db/models/sources.py
@ -50,9 +50,9 @@ class Book(Base):
        Index("book_title_idx", "title"),
    )
-    def as_payload(self, sections: bool = False) -> dict:
+    def as_payload(self) -> dict:
-        data = {
+        return {
-            "id": self.id,
+            **super().as_payload(),
            "isbn": self.isbn,
            "title": self.title,
            "author": self.author,
@ -63,9 +63,6 @@ class Book(Base):
            "series": self.series,
            "series_number": self.series_number,
        } | (cast(dict, self.book_metadata) or {})
        if sections:
            data["sections"] = [section.as_payload() for section in self.sections]
        return data
 class ArticleFeed(Base):
--- a/src/memory/parsers/ebook.py
+++ b/src/memory/parsers/ebook.py
@ -3,8 +3,7 @@ from dataclasses import dataclass, field
 from typing import Any, cast
 from pathlib import Path
-import fitz
+import fitz  # PyMuPDF
 from memory.common import settings  # PyMuPDF
 logger = logging.getLogger(__name__)
@ -28,7 +27,6 @@ class Ebook:
    title: str
    author: str
    file_path: Path
    relative_path: Path
    metadata: dict[str, Any] = field(default_factory=dict)
    sections: list[Section] = field(default_factory=list)
    full_content: str = ""
@ -182,7 +180,6 @@ def parse_ebook(file_path: str | Path) -> Ebook:
        sections=sections,
        full_content=full_content,
        file_path=path,
        relative_path=path.relative_to(settings.FILE_STORAGE_DIR),
        file_type=path.suffix.lower()[1:],
        n_pages=doc.page_count,
    )
--- a/src/memory/workers/tasks/ebook.py
+++ b/src/memory/workers/tasks/ebook.py
@ -1,13 +1,12 @@
 import logging
 import pathlib
 from datetime import datetime
 from typing import Iterable, cast
 import memory.common.settings as settings
-from memory.common.celery_app import SYNC_BOOK, app
+from memory.parsers.ebook import Ebook, parse_ebook, Section
 from memory.common.db.connection import make_session
 from memory.common.db.models import Book, BookSection
-from memory.parsers.ebook import Ebook, Section, parse_ebook
+from memory.common.db.connection import make_session
 from memory.common.celery_app import app, SYNC_BOOK
 from memory.workers.tasks.content_processing import (
    check_content_exists,
    create_content_hash,
@ -144,18 +143,7 @@ def embed_sections(all_sections: list[BookSection]) -> int:
@app.task(name=SYNC_BOOK)
@safe_task_execution
-def sync_book(
+def sync_book(file_path: str, tags: Iterable[str] = []) -> dict:
    file_path: str,
    tags: Iterable[str] = [],
    title: str = "",
    author: str = "",
    publisher: str = "",
    published: str = "",
    language: str = "",
    edition: str = "",
    series: str = "",
    series_number: int | None = None,
 ) -> dict:
    """
    Synchronize a book from a file path.
@ -166,13 +154,12 @@ def sync_book(
        dict: Summary of what was processed
    """
    ebook = validate_and_parse_book(file_path)
-    logger.info(f"Ebook parsed: {ebook.title}, {ebook.file_path.as_posix()}")
+    logger.info(f"Ebook parsed: {ebook.title}")
    with make_session() as session:
        # Check for existing book
        logger.info(f"Checking for existing book: {ebook.relative_path.as_posix()}")
        existing_book = check_content_exists(
-            session, Book, file_path=ebook.relative_path.as_posix()
+            session, Book, file_path=ebook.file_path.as_posix()
        )
        if existing_book:
            logger.info(f"Book already exists: {existing_book.title}")
@ -188,24 +175,6 @@ def sync_book(
        # Create book and sections with relationships
        book, all_sections = create_book_and_sections(ebook, session, tags)
        if title:
            book.title = title  # type: ignore
        if author:
            book.author = author  # type: ignore
        if publisher:
            book.publisher = publisher  # type: ignore
        if published:
            book.published = datetime.fromisoformat(published)  # type: ignore
        if language:
            book.language = language  # type: ignore
        if edition:
            book.edition = edition  # type: ignore
        if series:
            book.series = series  # type: ignore
        if series_number:
            book.series_number = series_number  # type: ignore
        session.add(book)
        # Embed sections
        logger.info("Embedding sections")
        embedded_count = sum(embed_source_item(section) for section in all_sections)
--- a/tests/memory/common/db/models/test_source_item.py
+++ b/tests/memory/common/db/models/test_source_item.py
@ -3,7 +3,7 @@ from unittest.mock import patch
 from typing import cast
 import pytest
 from PIL import Image
-from memory.common import settings, chunker, extract, tokens
+from memory.common import settings, chunker, extract
 from memory.common.db.models.source_item import (
    Chunk,
 )
@ -610,7 +610,7 @@ def test_chunk_mixed_long_content(tmp_path):
    with (
        patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
        patch.object(chunker, "DEFAULT_CHUNK_TOKENS", 10),
-        patch.object(tokens, "approx_token_count", return_value=100),
+        patch.object(chunker, "approx_token_count", return_value=100),
    ):  # Force it to be > 2 * 10
        result = chunk_mixed(long_content, [])