diff --git a/src/memory/api/MCP/books.py b/src/memory/api/MCP/books.py new file mode 100644 index 0000000..ad102ef --- /dev/null +++ b/src/memory/api/MCP/books.py @@ -0,0 +1,61 @@ +import logging + +from sqlalchemy.orm import joinedload + +from memory.api.MCP.tools import mcp +from memory.common.db.connection import make_session +from memory.common.db.models import Book, BookSection, BookSectionPayload + +logger = logging.getLogger(__name__) + + +@mcp.tool() +async def all_books(sections: bool = False) -> list[dict]: + """ + Get all books in the database. + + If sections is True, the response will include the sections for each book. + + Args: + sections: Whether to include sections in the response. Defaults to False. + + Returns: + List of books in the database. + """ + options = [] + if sections: + options = [joinedload(Book.sections)] + + with make_session() as session: + books = session.query(Book).options(*options).all() + return [book.as_payload(sections=sections) for book in books] + + +@mcp.tool() +def read_book(book_id: int, sections: list[int] = []) -> list[BookSectionPayload]: + """ + Read a book from the database. + + If sections is provided, only the sections with the given IDs will be returned. + + Args: + book_id: The ID of the book to read. + sections: The IDs of the sections to read. Defaults to all sections. + + Returns: + List of sections in the book, with contents. In the case of nested sections, only the top-level sections are returned. + """ + with make_session() as session: + book_sections = session.query(BookSection).filter( + BookSection.book_id == book_id + ) + if sections: + book_sections = book_sections.filter(BookSection.id.in_(sections)) + + all_sections = book_sections.all() + parents = [section.parent_section_id for section in all_sections] + return [ + section.as_payload() + for section in all_sections + if section.id not in parents + ] diff --git a/src/memory/common/db/models/sources.py b/src/memory/common/db/models/sources.py index 78e2df9..40171d2 100644 --- a/src/memory/common/db/models/sources.py +++ b/src/memory/common/db/models/sources.py @@ -50,9 +50,9 @@ class Book(Base): Index("book_title_idx", "title"), ) - def as_payload(self) -> dict: - return { - **super().as_payload(), + def as_payload(self, sections: bool = False) -> dict: + data = { + "id": self.id, "isbn": self.isbn, "title": self.title, "author": self.author, @@ -63,6 +63,9 @@ class Book(Base): "series": self.series, "series_number": self.series_number, } | (cast(dict, self.book_metadata) or {}) + if sections: + data["sections"] = [section.as_payload() for section in self.sections] + return data class ArticleFeed(Base): diff --git a/src/memory/parsers/ebook.py b/src/memory/parsers/ebook.py index 1333fa1..9b09245 100644 --- a/src/memory/parsers/ebook.py +++ b/src/memory/parsers/ebook.py @@ -3,7 +3,8 @@ from dataclasses import dataclass, field from typing import Any, cast from pathlib import Path -import fitz # PyMuPDF +import fitz +from memory.common import settings # PyMuPDF logger = logging.getLogger(__name__) @@ -27,6 +28,7 @@ class Ebook: title: str author: str file_path: Path + relative_path: Path metadata: dict[str, Any] = field(default_factory=dict) sections: list[Section] = field(default_factory=list) full_content: str = "" @@ -180,6 +182,7 @@ def parse_ebook(file_path: str | Path) -> Ebook: sections=sections, full_content=full_content, file_path=path, + relative_path=path.relative_to(settings.FILE_STORAGE_DIR), file_type=path.suffix.lower()[1:], n_pages=doc.page_count, ) diff --git a/src/memory/workers/tasks/ebook.py b/src/memory/workers/tasks/ebook.py index 3501ac1..20a62ef 100644 --- a/src/memory/workers/tasks/ebook.py +++ b/src/memory/workers/tasks/ebook.py @@ -1,12 +1,13 @@ import logging import pathlib +from datetime import datetime from typing import Iterable, cast import memory.common.settings as settings -from memory.parsers.ebook import Ebook, parse_ebook, Section -from memory.common.db.models import Book, BookSection +from memory.common.celery_app import SYNC_BOOK, app from memory.common.db.connection import make_session -from memory.common.celery_app import app, SYNC_BOOK +from memory.common.db.models import Book, BookSection +from memory.parsers.ebook import Ebook, Section, parse_ebook from memory.workers.tasks.content_processing import ( check_content_exists, create_content_hash, @@ -143,7 +144,18 @@ def embed_sections(all_sections: list[BookSection]) -> int: @app.task(name=SYNC_BOOK) @safe_task_execution -def sync_book(file_path: str, tags: Iterable[str] = []) -> dict: +def sync_book( + file_path: str, + tags: Iterable[str] = [], + title: str = "", + author: str = "", + publisher: str = "", + published: str = "", + language: str = "", + edition: str = "", + series: str = "", + series_number: int | None = None, +) -> dict: """ Synchronize a book from a file path. @@ -154,12 +166,13 @@ def sync_book(file_path: str, tags: Iterable[str] = []) -> dict: dict: Summary of what was processed """ ebook = validate_and_parse_book(file_path) - logger.info(f"Ebook parsed: {ebook.title}") + logger.info(f"Ebook parsed: {ebook.title}, {ebook.file_path.as_posix()}") with make_session() as session: # Check for existing book + logger.info(f"Checking for existing book: {ebook.relative_path.as_posix()}") existing_book = check_content_exists( - session, Book, file_path=ebook.file_path.as_posix() + session, Book, file_path=ebook.relative_path.as_posix() ) if existing_book: logger.info(f"Book already exists: {existing_book.title}") @@ -175,6 +188,24 @@ def sync_book(file_path: str, tags: Iterable[str] = []) -> dict: # Create book and sections with relationships book, all_sections = create_book_and_sections(ebook, session, tags) + if title: + book.title = title # type: ignore + if author: + book.author = author # type: ignore + if publisher: + book.publisher = publisher # type: ignore + if published: + book.published = datetime.fromisoformat(published) # type: ignore + if language: + book.language = language # type: ignore + if edition: + book.edition = edition # type: ignore + if series: + book.series = series # type: ignore + if series_number: + book.series_number = series_number # type: ignore + session.add(book) + # Embed sections logger.info("Embedding sections") embedded_count = sum(embed_source_item(section) for section in all_sections)