Compare commits

..

No commits in common. "beb94375da6e7f81aca36b530e20064bfc542303" and "86c96da1b94c5055701da4787844bf37e7e43d2c" have entirely different histories.

8 changed files with 22 additions and 115 deletions

View File

@ -1,61 +0,0 @@
import logging
from sqlalchemy.orm import joinedload
from memory.api.MCP.tools import mcp
from memory.common.db.connection import make_session
from memory.common.db.models import Book, BookSection, BookSectionPayload
logger = logging.getLogger(__name__)
@mcp.tool()
async def all_books(sections: bool = False) -> list[dict]:
"""
Get all books in the database.
If sections is True, the response will include the sections for each book.
Args:
sections: Whether to include sections in the response. Defaults to False.
Returns:
List of books in the database.
"""
options = []
if sections:
options = [joinedload(Book.sections)]
with make_session() as session:
books = session.query(Book).options(*options).all()
return [book.as_payload(sections=sections) for book in books]
@mcp.tool()
def read_book(book_id: int, sections: list[int] = []) -> list[BookSectionPayload]:
"""
Read a book from the database.
If sections is provided, only the sections with the given IDs will be returned.
Args:
book_id: The ID of the book to read.
sections: The IDs of the sections to read. Defaults to all sections.
Returns:
List of sections in the book, with contents. In the case of nested sections, only the top-level sections are returned.
"""
with make_session() as session:
book_sections = session.query(BookSection).filter(
BookSection.book_id == book_id
)
if sections:
book_sections = book_sections.filter(BookSection.id.in_(sections))
all_sections = book_sections.all()
parents = [section.parent_section_id for section in all_sections]
return [
section.as_payload()
for section in all_sections
if section.id not in parents
]

View File

@ -180,7 +180,6 @@ async def observe(
session_id: UUID to group observations from same conversation session_id: UUID to group observations from same conversation
agent_model: AI model making observations (for quality tracking) agent_model: AI model making observations (for quality tracking)
""" """
logger.info("MCP: Observing")
tasks = [ tasks = [
( (
observation, observation,
@ -238,7 +237,6 @@ async def search_observations(
Returns: List with content, tags, created_at, metadata Returns: List with content, tags, created_at, metadata
Results sorted by relevance to your query. Results sorted by relevance to your query.
""" """
logger.info("MCP: Searching observations for %s", query)
semantic_text = observation.generate_semantic_text( semantic_text = observation.generate_semantic_text(
subject=subject or "", subject=subject or "",
observation_type="".join(observation_types or []), observation_type="".join(observation_types or []),
@ -299,7 +297,6 @@ async def create_note(
confidences: Dict of scores (0.0-1.0), e.g. {"observation_accuracy": 0.9} confidences: Dict of scores (0.0-1.0), e.g. {"observation_accuracy": 0.9}
tags: Organization tags for filtering and discovery tags: Organization tags for filtering and discovery
""" """
logger.info("MCP: creating note: %s", subject)
if filename: if filename:
path = pathlib.Path(filename) path = pathlib.Path(filename)
if not path.is_absolute(): if not path.is_absolute():

View File

@ -108,3 +108,10 @@ async def get_authenticated_user() -> dict:
"client_id": access_token.client_id, "client_id": access_token.client_id,
"user": user_info, "user": user_info,
} }
@mcp.tool()
async def send_response(response: str) -> dict:
"""Send a response to the user."""
logger.info(f"Sending response: {response}")
return {"response": response}

View File

@ -28,7 +28,7 @@ from sqlalchemy.dialects.postgresql import BYTEA
from sqlalchemy.orm import Session, relationship from sqlalchemy.orm import Session, relationship
from sqlalchemy.types import Numeric from sqlalchemy.types import Numeric
from memory.common import settings, tokens from memory.common import settings
import memory.common.extract as extract import memory.common.extract as extract
import memory.common.collections as collections import memory.common.collections as collections
import memory.common.chunker as chunker import memory.common.chunker as chunker
@ -125,7 +125,8 @@ def chunk_mixed(content: str, image_paths: Sequence[str]) -> list[extract.DataCh
) )
chunks: list[extract.DataChunk] = [full_text] chunks: list[extract.DataChunk] = [full_text]
if tokens.approx_token_count(content) > chunker.DEFAULT_CHUNK_TOKENS * 2: tokens = chunker.approx_token_count(content)
if tokens > chunker.DEFAULT_CHUNK_TOKENS * 2:
chunks += [ chunks += [
extract.DataChunk(data=add_pics(c, images), metadata={"tags": tags}) extract.DataChunk(data=add_pics(c, images), metadata={"tags": tags})
for c in chunker.chunk_text(content) for c in chunker.chunk_text(content)

View File

@ -50,9 +50,9 @@ class Book(Base):
Index("book_title_idx", "title"), Index("book_title_idx", "title"),
) )
def as_payload(self, sections: bool = False) -> dict: def as_payload(self) -> dict:
data = { return {
"id": self.id, **super().as_payload(),
"isbn": self.isbn, "isbn": self.isbn,
"title": self.title, "title": self.title,
"author": self.author, "author": self.author,
@ -63,9 +63,6 @@ class Book(Base):
"series": self.series, "series": self.series,
"series_number": self.series_number, "series_number": self.series_number,
} | (cast(dict, self.book_metadata) or {}) } | (cast(dict, self.book_metadata) or {})
if sections:
data["sections"] = [section.as_payload() for section in self.sections]
return data
class ArticleFeed(Base): class ArticleFeed(Base):

View File

@ -3,8 +3,7 @@ from dataclasses import dataclass, field
from typing import Any, cast from typing import Any, cast
from pathlib import Path from pathlib import Path
import fitz import fitz # PyMuPDF
from memory.common import settings # PyMuPDF
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,7 +27,6 @@ class Ebook:
title: str title: str
author: str author: str
file_path: Path file_path: Path
relative_path: Path
metadata: dict[str, Any] = field(default_factory=dict) metadata: dict[str, Any] = field(default_factory=dict)
sections: list[Section] = field(default_factory=list) sections: list[Section] = field(default_factory=list)
full_content: str = "" full_content: str = ""
@ -182,7 +180,6 @@ def parse_ebook(file_path: str | Path) -> Ebook:
sections=sections, sections=sections,
full_content=full_content, full_content=full_content,
file_path=path, file_path=path,
relative_path=path.relative_to(settings.FILE_STORAGE_DIR),
file_type=path.suffix.lower()[1:], file_type=path.suffix.lower()[1:],
n_pages=doc.page_count, n_pages=doc.page_count,
) )

View File

@ -1,13 +1,12 @@
import logging import logging
import pathlib import pathlib
from datetime import datetime
from typing import Iterable, cast from typing import Iterable, cast
import memory.common.settings as settings import memory.common.settings as settings
from memory.common.celery_app import SYNC_BOOK, app from memory.parsers.ebook import Ebook, parse_ebook, Section
from memory.common.db.connection import make_session
from memory.common.db.models import Book, BookSection from memory.common.db.models import Book, BookSection
from memory.parsers.ebook import Ebook, Section, parse_ebook from memory.common.db.connection import make_session
from memory.common.celery_app import app, SYNC_BOOK
from memory.workers.tasks.content_processing import ( from memory.workers.tasks.content_processing import (
check_content_exists, check_content_exists,
create_content_hash, create_content_hash,
@ -144,18 +143,7 @@ def embed_sections(all_sections: list[BookSection]) -> int:
@app.task(name=SYNC_BOOK) @app.task(name=SYNC_BOOK)
@safe_task_execution @safe_task_execution
def sync_book( def sync_book(file_path: str, tags: Iterable[str] = []) -> dict:
file_path: str,
tags: Iterable[str] = [],
title: str = "",
author: str = "",
publisher: str = "",
published: str = "",
language: str = "",
edition: str = "",
series: str = "",
series_number: int | None = None,
) -> dict:
""" """
Synchronize a book from a file path. Synchronize a book from a file path.
@ -166,13 +154,12 @@ def sync_book(
dict: Summary of what was processed dict: Summary of what was processed
""" """
ebook = validate_and_parse_book(file_path) ebook = validate_and_parse_book(file_path)
logger.info(f"Ebook parsed: {ebook.title}, {ebook.file_path.as_posix()}") logger.info(f"Ebook parsed: {ebook.title}")
with make_session() as session: with make_session() as session:
# Check for existing book # Check for existing book
logger.info(f"Checking for existing book: {ebook.relative_path.as_posix()}")
existing_book = check_content_exists( existing_book = check_content_exists(
session, Book, file_path=ebook.relative_path.as_posix() session, Book, file_path=ebook.file_path.as_posix()
) )
if existing_book: if existing_book:
logger.info(f"Book already exists: {existing_book.title}") logger.info(f"Book already exists: {existing_book.title}")
@ -188,24 +175,6 @@ def sync_book(
# Create book and sections with relationships # Create book and sections with relationships
book, all_sections = create_book_and_sections(ebook, session, tags) book, all_sections = create_book_and_sections(ebook, session, tags)
if title:
book.title = title # type: ignore
if author:
book.author = author # type: ignore
if publisher:
book.publisher = publisher # type: ignore
if published:
book.published = datetime.fromisoformat(published) # type: ignore
if language:
book.language = language # type: ignore
if edition:
book.edition = edition # type: ignore
if series:
book.series = series # type: ignore
if series_number:
book.series_number = series_number # type: ignore
session.add(book)
# Embed sections # Embed sections
logger.info("Embedding sections") logger.info("Embedding sections")
embedded_count = sum(embed_source_item(section) for section in all_sections) embedded_count = sum(embed_source_item(section) for section in all_sections)

View File

@ -3,7 +3,7 @@ from unittest.mock import patch
from typing import cast from typing import cast
import pytest import pytest
from PIL import Image from PIL import Image
from memory.common import settings, chunker, extract, tokens from memory.common import settings, chunker, extract
from memory.common.db.models.source_item import ( from memory.common.db.models.source_item import (
Chunk, Chunk,
) )
@ -610,7 +610,7 @@ def test_chunk_mixed_long_content(tmp_path):
with ( with (
patch.object(settings, "FILE_STORAGE_DIR", tmp_path), patch.object(settings, "FILE_STORAGE_DIR", tmp_path),
patch.object(chunker, "DEFAULT_CHUNK_TOKENS", 10), patch.object(chunker, "DEFAULT_CHUNK_TOKENS", 10),
patch.object(tokens, "approx_token_count", return_value=100), patch.object(chunker, "approx_token_count", return_value=100),
): # Force it to be > 2 * 10 ): # Force it to be > 2 * 10
result = chunk_mixed(long_content, []) result = chunk_mixed(long_content, [])