From a40e0b50fa49e302a1c90f905a6dc1e0a730e127 Mon Sep 17 00:00:00 2001 From: Daniel O'Connell Date: Mon, 2 Jun 2025 22:24:19 +0200 Subject: [PATCH] editable notes --- src/memory/api/MCP/tools.py | 109 ++++++++++++++---- src/memory/common/db/models/source_items.py | 6 +- src/memory/common/embedding.py | 3 - .../workers/tasks/content_processing.py | 2 +- src/memory/workers/tasks/notes.py | 44 ++++--- .../memory/workers/tasks/test_notes_tasks.py | 108 ++++++++++++++--- 6 files changed, 211 insertions(+), 61 deletions(-) diff --git a/src/memory/api/MCP/tools.py b/src/memory/api/MCP/tools.py index adcf122..c7c24d9 100644 --- a/src/memory/api/MCP/tools.py +++ b/src/memory/api/MCP/tools.py @@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]: AI observations (created with 'observe') and other content. Use it to understand the tag taxonomy, ensure consistency, or discover related topics. - When to use: - - Before creating new observations, to use consistent tag naming - - To explore what topics/contexts have been tracked - - To build tag filters for search operations - - To understand the user's areas of interest - - For tag autocomplete or suggestion features - Returns: Sorted list of all unique tags in the system. Tags follow patterns like: - Topics: "machine-learning", "functional-programming" @@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]: identifiers for what observations are about. Use this to understand what aspects of the user have been tracked and ensure consistency. - When to use: - - Before creating new observations, to use existing subject names - - To discover what aspects of the user have been observed - - To build subject filters for targeted searches - - To ensure consistent naming across observations - - To get an overview of the user model - Returns: Sorted list of all unique subjects. Common patterns include: - "programming_style", "programming_philosophy" @@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]: behavior, contradiction, general), this shows what's actually been used. Helpful for understanding the distribution of observation types. - When to use: - - To see what types of observations have been made - - To understand the balance of different observation types - - To check if all standard types are being utilized - - For analytics or reporting on observation patterns - Standard types: - "belief": Opinions or beliefs the user holds - "preference": Things they prefer or favor @@ -642,12 +622,14 @@ async def create_note( tags: list[str] = [], ) -> dict: """ - Create a note when the user asks for something to be noted down. + Create a note when the user asks for something to be noted down or when you think + something is important to note down. Purpose: Use this tool when the user explicitly asks to note, save, or record something for later reference. Notes don't have to be really short - long markdown docs are fine, as long as that was what was asked for. + You can also use this tool to note down things that are important to you. When to use: - User says "note down that..." or "please save this" @@ -702,3 +684,88 @@ async def create_note( "task_id": task.id, "status": "queued", } + + +@mcp.tool() +async def note_files(path: str = "/"): + """ + List all available note files in the user's note storage system. + + Purpose: + This tool provides a way to discover and browse the user's organized note + collection. Notes are stored as Markdown files and can be created either + through the 'create_note' tool or by the user directly. Use this tool to + understand what notes exist before reading or referencing them, or to help + the user navigate their note collection. + + Args: + path: Directory path to search within the notes collection. Use "/" for the + root notes directory, or specify subdirectories like "/projects" or + "/meetings". The path should start with "/" and use forward slashes. + Examples: + - "/" - List all notes in the entire collection + - "/projects" - Only notes in the projects folder + - "/meetings/2024" - Notes in a specific year's meetings folder + + Examples: + # List all notes + all_notes = await note_files("/") + # Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...] + + # List notes in a specific folder + project_notes = await note_files("/projects") + # Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"] + + # Check for meeting notes + meeting_notes = await note_files("/meetings") + # Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"] + """ + root = settings.NOTES_STORAGE_DIR / path.lstrip("/") + return [ + f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}" + for f in root.rglob("*.md") + if f.is_file() + ] + + +@mcp.tool() +def fetch_file(filename: str): + """ + Retrieve the raw content of a file from the user's storage system. + + Purpose: + This tool allows you to read the actual content of files stored in the + user's file system, including notes, documents, images, and other files. + Use this when you need to access the specific content of a file that has + been referenced or when the user asks you to read/examine a particular file. + + Args: + filename: Path to the file to fetch, relative to the file storage directory. + Should start with "/" and use forward slashes. The path structure depends + on how files are organized in the storage system. + Examples: + - "/notes/project_ideas.md" - A note file + - "/documents/report.pdf" - A PDF document + - "/images/diagram.png" - An image file + - "/emails/important_thread.txt" - Saved email content + + Returns: + Raw bytes content of the file. For text files (like Markdown notes), you'll + typically want to decode this as UTF-8 to get readable text: + ```python + content_bytes = await fetch_file("/notes/my_note.md") + content_text = content_bytes.decode('utf-8') + ``` + + Raises: + FileNotFoundError: If the specified file doesn't exist at the given path. + + Security note: + This tool only accesses files within the configured storage directory, + ensuring it cannot read arbitrary system files. + """ + path = settings.FILE_STORAGE_DIR / filename.lstrip("/") + if not path.exists(): + raise FileNotFoundError(f"File not found: {filename}") + + return path.read_bytes() diff --git a/src/memory/common/db/models/source_items.py b/src/memory/common/db/models/source_items.py index 6ae6dbb..b8cf52a 100644 --- a/src/memory/common/db/models/source_items.py +++ b/src/memory/common/db/models/source_items.py @@ -537,12 +537,10 @@ class Note(SourceItem): def save_to_file(self): if not self.filename: - path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md" - else: - path = pathlib.Path(self.filename) + self.filename = f"{self.subject}.md" + path = settings.NOTES_STORAGE_DIR / self.filename path.parent.mkdir(parents=True, exist_ok=True) path.write_text(cast(str, self.content)) - self.filename = path.as_posix() @staticmethod def as_text(content: str, subject: str | None = None) -> str: diff --git a/src/memory/common/embedding.py b/src/memory/common/embedding.py index 694d04d..9a4e752 100644 --- a/src/memory/common/embedding.py +++ b/src/memory/common/embedding.py @@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]: def embed_source_item(item: SourceItem) -> list[Chunk]: chunks = list(item.data_chunks()) - logger.error( - f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}" - ) if not chunks: return [] diff --git a/src/memory/workers/tasks/content_processing.py b/src/memory/workers/tasks/content_processing.py index b026986..6378d0e 100644 --- a/src/memory/workers/tasks/content_processing.py +++ b/src/memory/workers/tasks/content_processing.py @@ -192,7 +192,7 @@ def create_task_result( """ return { f"{type(item).__name__.lower()}_id": item.id, - "title": getattr(item, "title", None), + "title": getattr(item, "title", None) or getattr(item, "subject", None), "status": status, "chunks_count": len(item.chunks), "embed_status": item.embed_status, diff --git a/src/memory/workers/tasks/notes.py b/src/memory/workers/tasks/notes.py index 98c87f8..b76c284 100644 --- a/src/memory/workers/tasks/notes.py +++ b/src/memory/workers/tasks/notes.py @@ -1,6 +1,7 @@ import logging import pathlib +from memory.common import settings from memory.common.db.connection import make_session from memory.common.db.models import Note from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES @@ -22,27 +23,15 @@ def sync_note( content: str, filename: str | None = None, note_type: str | None = None, - confidence: float = 0.5, + confidence: float | None = None, tags: list[str] = [], ): logger.info(f"Syncing note {subject}") text = Note.as_text(content, subject) sha256 = create_content_hash(text) - note = Note( - subject=subject, - content=content, - embed_status="RAW", - size=len(text.encode("utf-8")), - modality="note", - mime_type="text/markdown", - sha256=sha256, - note_type=note_type, - confidence=confidence, - tags=tags, - filename=filename, - ) - note.save_to_file() + if filename: + filename = filename.lstrip("/") with make_session() as session: existing_note = check_content_exists(session, Note, sha256=sha256) @@ -50,6 +39,31 @@ def sync_note( logger.info(f"Note already exists: {existing_note.subject}") return create_task_result(existing_note, "already_exists") + note = session.query(Note).filter(Note.filename == filename).one_or_none() + + if not note: + note = Note( + modality="note", + mime_type="text/markdown", + confidence=confidence or 0.5, + ) + else: + logger.info("Editing preexisting note") + note.content = content # type: ignore + note.subject = subject # type: ignore + note.filename = filename # type: ignore + note.embed_status = "RAW" # type: ignore + note.size = len(text.encode("utf-8")) # type: ignore + note.sha256 = sha256 # type: ignore + + if note_type: + note.note_type = note_type # type: ignore + if confidence: + note.confidence = confidence # type: ignore + if tags: + note.tags = tags # type: ignore + + note.save_to_file() return process_content_item(note, session) diff --git a/tests/memory/workers/tasks/test_notes_tasks.py b/tests/memory/workers/tasks/test_notes_tasks.py index 3212d42..7a15f2c 100644 --- a/tests/memory/workers/tasks/test_notes_tasks.py +++ b/tests/memory/workers/tasks/test_notes_tasks.py @@ -1,6 +1,5 @@ import pytest import pathlib -from decimal import Decimal from unittest.mock import Mock, patch from memory.common.db.models import Note @@ -12,11 +11,10 @@ from memory.common import settings @pytest.fixture def mock_note_data(): """Mock note data for testing.""" - test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md" return { "subject": "Test Note Subject", "content": "This is test note content with enough text to be processed and embedded.", - "filename": str(test_filename), + "filename": "test_note.md", "note_type": "observation", "confidence": 0.8, "tags": ["test", "note"], @@ -79,6 +77,7 @@ def markdown_files_in_storage(): def test_sync_note_success(mock_note_data, db_session, qdrant): """Test successful note synchronization.""" result = notes.sync_note(**mock_note_data) + db_session.commit() # Verify the Note was created in the database note = db_session.query(Note).filter_by(subject="Test Note Subject").first() @@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant): assert note.filename is not None assert note.tags == ["test", "note"] - # Verify the result - assert result["status"] == "processed" - assert result["note_id"] == note.id - assert ( - "subject" not in result - ) # create_task_result doesn't include subject for Note + # Verify the result - updated to match actual return format + assert result == { + "note_id": note.id, + "title": "Test Note Subject", + "status": "processed", + "chunks_count": 1, + "embed_status": "STORED", + "content_length": 93, + } def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant): @@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant): assert float(note.confidence) == 0.5 # Default value, convert Decimal to float assert note.tags == [] # Default empty list assert note.filename is not None and "Minimal Note.md" in note.filename - assert result["status"] == "processed" + + # Updated to match actual return format + assert result == { + "note_id": note.id, + "title": "Minimal Note", + "status": "processed", + "chunks_count": 1, + "embed_status": "STORED", + "content_length": 31, + } def test_sync_note_empty_content(mock_empty_note, db_session, qdrant): @@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant): assert note is not None assert note.subject == "Empty Note" assert note.content == "" - # Empty content with subject header "# Empty Note" still generates chunks - assert result["status"] == "processed" - assert result["chunks_count"] > 0 + + # Updated to match actual return format + assert result == { + "note_id": note.id, + "title": "Empty Note", + "status": "processed", + "chunks_count": 1, + "embed_status": "STORED", + "content_length": 14, + } def test_sync_note_already_exists(mock_note_data, db_session): @@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session): mime_type="text/markdown", size=len(text.encode("utf-8")), embed_status="RAW", - filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"), + filename="existing_note.md", ) db_session.add(existing_note) db_session.commit() result = notes.sync_note(**mock_note_data) - assert result["status"] == "already_exists" - assert result["note_id"] == existing_note.id + # Updated to match actual return format for already_exists case + assert result == { + "note_id": existing_note.id, + "title": "Existing Note", + "status": "already_exists", + "chunks_count": 0, # Existing note has no chunks + "embed_status": "RAW", # Existing note has RAW status + } # Verify no duplicate was created notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all() assert len(notes_with_hash) == 1 +def test_sync_note_edit(mock_note_data, db_session): + """Test note sync when content already exists.""" + # Create the content text the same way sync_note does + text = Note.as_text(mock_note_data["content"], mock_note_data["subject"]) + sha256 = create_content_hash(text) + + # Add existing note with same content hash but different filename to avoid file conflicts + existing_note = Note( + subject="Existing Note", + content=mock_note_data["content"], + sha256=sha256, + modality="note", + tags=["existing"], + mime_type="text/markdown", + size=len(text.encode("utf-8")), + embed_status="RAW", + filename="test_note.md", + ) + db_session.add(existing_note) + db_session.commit() + + result = notes.sync_note( + **{**mock_note_data, "content": "bla bla bla", "subject": "blee"} + ) + + assert result == { + "note_id": existing_note.id, + "status": "processed", + "chunks_count": 1, + "embed_status": "STORED", + "title": "blee", + "content_length": 19, + } + + # Verify no duplicate was created + assert len(db_session.query(Note).all()) == 1 + db_session.refresh(existing_note) + assert existing_note.content == "bla bla bla" # type: ignore + + @pytest.mark.parametrize( "note_type,confidence,tags", [ @@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant): assert note.note_type == note_type assert float(note.confidence) == confidence # Convert Decimal to float assert note.tags == tags - assert result["status"] == "processed" + + # Updated to match actual return format + text = f"# Test Note {note_type}\n\nTest content for parameter testing" + assert result == { + "note_id": note.id, + "title": f"Test Note {note_type}", + "status": "processed", + "chunks_count": 1, + "embed_status": "STORED", + "content_length": len(text.encode("utf-8")), + } def test_sync_note_content_hash_consistency(db_session):