handle books

This commit is contained in:
EC2 Default User 2025-07-24 21:33:15 +00:00
parent 907375eee5
commit cf456c04d6
4 changed files with 108 additions and 10 deletions

View File

@ -0,0 +1,61 @@
import logging
from sqlalchemy.orm import joinedload
from memory.api.MCP.tools import mcp
from memory.common.db.connection import make_session
from memory.common.db.models import Book, BookSection, BookSectionPayload
logger = logging.getLogger(__name__)
@mcp.tool()
async def all_books(sections: bool = False) -> list[dict]:
"""
Get all books in the database.
If sections is True, the response will include the sections for each book.
Args:
sections: Whether to include sections in the response. Defaults to False.
Returns:
List of books in the database.
"""
options = []
if sections:
options = [joinedload(Book.sections)]
with make_session() as session:
books = session.query(Book).options(*options).all()
return [book.as_payload(sections=sections) for book in books]
@mcp.tool()
def read_book(book_id: int, sections: list[int] = []) -> list[BookSectionPayload]:
"""
Read a book from the database.
If sections is provided, only the sections with the given IDs will be returned.
Args:
book_id: The ID of the book to read.
sections: The IDs of the sections to read. Defaults to all sections.
Returns:
List of sections in the book, with contents. In the case of nested sections, only the top-level sections are returned.
"""
with make_session() as session:
book_sections = session.query(BookSection).filter(
BookSection.book_id == book_id
)
if sections:
book_sections = book_sections.filter(BookSection.id.in_(sections))
all_sections = book_sections.all()
parents = [section.parent_section_id for section in all_sections]
return [
section.as_payload()
for section in all_sections
if section.id not in parents
]

View File

@ -50,9 +50,9 @@ class Book(Base):
Index("book_title_idx", "title"), Index("book_title_idx", "title"),
) )
def as_payload(self) -> dict: def as_payload(self, sections: bool = False) -> dict:
return { data = {
**super().as_payload(), "id": self.id,
"isbn": self.isbn, "isbn": self.isbn,
"title": self.title, "title": self.title,
"author": self.author, "author": self.author,
@ -63,6 +63,9 @@ class Book(Base):
"series": self.series, "series": self.series,
"series_number": self.series_number, "series_number": self.series_number,
} | (cast(dict, self.book_metadata) or {}) } | (cast(dict, self.book_metadata) or {})
if sections:
data["sections"] = [section.as_payload() for section in self.sections]
return data
class ArticleFeed(Base): class ArticleFeed(Base):

View File

@ -3,7 +3,8 @@ from dataclasses import dataclass, field
from typing import Any, cast from typing import Any, cast
from pathlib import Path from pathlib import Path
import fitz # PyMuPDF import fitz
from memory.common import settings # PyMuPDF
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -27,6 +28,7 @@ class Ebook:
title: str title: str
author: str author: str
file_path: Path file_path: Path
relative_path: Path
metadata: dict[str, Any] = field(default_factory=dict) metadata: dict[str, Any] = field(default_factory=dict)
sections: list[Section] = field(default_factory=list) sections: list[Section] = field(default_factory=list)
full_content: str = "" full_content: str = ""
@ -180,6 +182,7 @@ def parse_ebook(file_path: str | Path) -> Ebook:
sections=sections, sections=sections,
full_content=full_content, full_content=full_content,
file_path=path, file_path=path,
relative_path=path.relative_to(settings.FILE_STORAGE_DIR),
file_type=path.suffix.lower()[1:], file_type=path.suffix.lower()[1:],
n_pages=doc.page_count, n_pages=doc.page_count,
) )

View File

@ -1,12 +1,13 @@
import logging import logging
import pathlib import pathlib
from datetime import datetime
from typing import Iterable, cast from typing import Iterable, cast
import memory.common.settings as settings import memory.common.settings as settings
from memory.parsers.ebook import Ebook, parse_ebook, Section from memory.common.celery_app import SYNC_BOOK, app
from memory.common.db.models import Book, BookSection
from memory.common.db.connection import make_session from memory.common.db.connection import make_session
from memory.common.celery_app import app, SYNC_BOOK from memory.common.db.models import Book, BookSection
from memory.parsers.ebook import Ebook, Section, parse_ebook
from memory.workers.tasks.content_processing import ( from memory.workers.tasks.content_processing import (
check_content_exists, check_content_exists,
create_content_hash, create_content_hash,
@ -143,7 +144,18 @@ def embed_sections(all_sections: list[BookSection]) -> int:
@app.task(name=SYNC_BOOK) @app.task(name=SYNC_BOOK)
@safe_task_execution @safe_task_execution
def sync_book(file_path: str, tags: Iterable[str] = []) -> dict: def sync_book(
file_path: str,
tags: Iterable[str] = [],
title: str = "",
author: str = "",
publisher: str = "",
published: str = "",
language: str = "",
edition: str = "",
series: str = "",
series_number: int | None = None,
) -> dict:
""" """
Synchronize a book from a file path. Synchronize a book from a file path.
@ -154,12 +166,13 @@ def sync_book(file_path: str, tags: Iterable[str] = []) -> dict:
dict: Summary of what was processed dict: Summary of what was processed
""" """
ebook = validate_and_parse_book(file_path) ebook = validate_and_parse_book(file_path)
logger.info(f"Ebook parsed: {ebook.title}") logger.info(f"Ebook parsed: {ebook.title}, {ebook.file_path.as_posix()}")
with make_session() as session: with make_session() as session:
# Check for existing book # Check for existing book
logger.info(f"Checking for existing book: {ebook.relative_path.as_posix()}")
existing_book = check_content_exists( existing_book = check_content_exists(
session, Book, file_path=ebook.file_path.as_posix() session, Book, file_path=ebook.relative_path.as_posix()
) )
if existing_book: if existing_book:
logger.info(f"Book already exists: {existing_book.title}") logger.info(f"Book already exists: {existing_book.title}")
@ -175,6 +188,24 @@ def sync_book(file_path: str, tags: Iterable[str] = []) -> dict:
# Create book and sections with relationships # Create book and sections with relationships
book, all_sections = create_book_and_sections(ebook, session, tags) book, all_sections = create_book_and_sections(ebook, session, tags)
if title:
book.title = title # type: ignore
if author:
book.author = author # type: ignore
if publisher:
book.publisher = publisher # type: ignore
if published:
book.published = datetime.fromisoformat(published) # type: ignore
if language:
book.language = language # type: ignore
if edition:
book.edition = edition # type: ignore
if series:
book.series = series # type: ignore
if series_number:
book.series_number = series_number # type: ignore
session.add(book)
# Embed sections # Embed sections
logger.info("Embedding sections") logger.info("Embedding sections")
embedded_count = sum(embed_source_item(section) for section in all_sections) embedded_count = sum(embed_source_item(section) for section in all_sections)