From a40e0b50fa49e302a1c90f905a6dc1e0a730e127 Mon Sep 17 00:00:00 2001
From: Daniel O'Connell <github@ahiru.pl>
Date: Mon, 2 Jun 2025 22:24:19 +0200
Subject: [PATCH] editable notes

---
 src/memory/api/MCP/tools.py                   | 109 ++++++++++++++----
 src/memory/common/db/models/source_items.py   |   6 +-
 src/memory/common/embedding.py                |   3 -
 .../workers/tasks/content_processing.py       |   2 +-
 src/memory/workers/tasks/notes.py             |  44 ++++---
 .../memory/workers/tasks/test_notes_tasks.py  | 108 ++++++++++++++---
 6 files changed, 211 insertions(+), 61 deletions(-)

diff --git a/src/memory/api/MCP/tools.py b/src/memory/api/MCP/tools.py
index adcf122..c7c24d9 100644
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]:
         AI observations (created with 'observe') and other content. Use it to
         understand the tag taxonomy, ensure consistency, or discover related topics.
 
-    When to use:
-        - Before creating new observations, to use consistent tag naming
-        - To explore what topics/contexts have been tracked
-        - To build tag filters for search operations
-        - To understand the user's areas of interest
-        - For tag autocomplete or suggestion features
-
     Returns:
         Sorted list of all unique tags in the system. Tags follow patterns like:
         - Topics: "machine-learning", "functional-programming"
@@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]:
         identifiers for what observations are about. Use this to understand
         what aspects of the user have been tracked and ensure consistency.
 
-    When to use:
-        - Before creating new observations, to use existing subject names
-        - To discover what aspects of the user have been observed
-        - To build subject filters for targeted searches
-        - To ensure consistent naming across observations
-        - To get an overview of the user model
-
     Returns:
         Sorted list of all unique subjects. Common patterns include:
         - "programming_style", "programming_philosophy"
@@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]:
         behavior, contradiction, general), this shows what's actually been used.
         Helpful for understanding the distribution of observation types.
 
-    When to use:
-        - To see what types of observations have been made
-        - To understand the balance of different observation types
-        - To check if all standard types are being utilized
-        - For analytics or reporting on observation patterns
-
     Standard types:
         - "belief": Opinions or beliefs the user holds
         - "preference": Things they prefer or favor
@@ -642,12 +622,14 @@ async def create_note(
     tags: list[str] = [],
 ) -> dict:
     """
-    Create a note when the user asks for something to be noted down.
+    Create a note when the user asks for something to be noted down or when you think
+    something is important to note down.
 
     Purpose:
         Use this tool when the user explicitly asks to note, save, or record
         something for later reference. Notes don't have to be really short - long
         markdown docs are fine, as long as that was what was asked for.
+        You can also use this tool to note down things that are important to you.
 
     When to use:
         - User says "note down that..." or "please save this"
@@ -702,3 +684,88 @@ async def create_note(
         "task_id": task.id,
         "status": "queued",
     }
+
+
+@mcp.tool()
+async def note_files(path: str = "/"):
+    """
+    List all available note files in the user's note storage system.
+
+    Purpose:
+        This tool provides a way to discover and browse the user's organized note
+        collection. Notes are stored as Markdown files and can be created either
+        through the 'create_note' tool or by the user directly. Use this tool to
+        understand what notes exist before reading or referencing them, or to help
+        the user navigate their note collection.
+
+    Args:
+        path: Directory path to search within the notes collection. Use "/" for the
+            root notes directory, or specify subdirectories like "/projects" or
+            "/meetings". The path should start with "/" and use forward slashes.
+            Examples:
+            - "/" - List all notes in the entire collection
+            - "/projects" - Only notes in the projects folder
+            - "/meetings/2024" - Notes in a specific year's meetings folder
+
+    Examples:
+        # List all notes
+        all_notes = await note_files("/")
+        # Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...]
+
+        # List notes in a specific folder
+        project_notes = await note_files("/projects")
+        # Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"]
+
+        # Check for meeting notes
+        meeting_notes = await note_files("/meetings")
+        # Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"]
+    """
+    root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
+    return [
+        f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
+        for f in root.rglob("*.md")
+        if f.is_file()
+    ]
+
+
+@mcp.tool()
+def fetch_file(filename: str):
+    """
+    Retrieve the raw content of a file from the user's storage system.
+
+    Purpose:
+        This tool allows you to read the actual content of files stored in the
+        user's file system, including notes, documents, images, and other files.
+        Use this when you need to access the specific content of a file that has
+        been referenced or when the user asks you to read/examine a particular file.
+
+    Args:
+        filename: Path to the file to fetch, relative to the file storage directory.
+            Should start with "/" and use forward slashes. The path structure depends
+            on how files are organized in the storage system.
+            Examples:
+            - "/notes/project_ideas.md" - A note file
+            - "/documents/report.pdf" - A PDF document
+            - "/images/diagram.png" - An image file
+            - "/emails/important_thread.txt" - Saved email content
+
+    Returns:
+        Raw bytes content of the file. For text files (like Markdown notes), you'll
+        typically want to decode this as UTF-8 to get readable text:
+        ```python
+        content_bytes = await fetch_file("/notes/my_note.md")
+        content_text = content_bytes.decode('utf-8')
+        ```
+
+    Raises:
+        FileNotFoundError: If the specified file doesn't exist at the given path.
+
+    Security note:
+        This tool only accesses files within the configured storage directory,
+        ensuring it cannot read arbitrary system files.
+    """
+    path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {filename}")
+
+    return path.read_bytes()
diff --git a/src/memory/common/db/models/source_items.py b/src/memory/common/db/models/source_items.py
index 6ae6dbb..b8cf52a 100644
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@@ -537,12 +537,10 @@ class Note(SourceItem):
 
     def save_to_file(self):
         if not self.filename:
-            path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md"
-        else:
-            path = pathlib.Path(self.filename)
+            self.filename = f"{self.subject}.md"
+        path = settings.NOTES_STORAGE_DIR / self.filename
         path.parent.mkdir(parents=True, exist_ok=True)
         path.write_text(cast(str, self.content))
-        self.filename = path.as_posix()
 
     @staticmethod
     def as_text(content: str, subject: str | None = None) -> str:
diff --git a/src/memory/common/embedding.py b/src/memory/common/embedding.py
index 694d04d..9a4e752 100644
--- a/src/memory/common/embedding.py
+++ b/src/memory/common/embedding.py
@@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:
 
 def embed_source_item(item: SourceItem) -> list[Chunk]:
     chunks = list(item.data_chunks())
-    logger.error(
-        f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
-    )
     if not chunks:
         return []
 
diff --git a/src/memory/workers/tasks/content_processing.py b/src/memory/workers/tasks/content_processing.py
index b026986..6378d0e 100644
--- a/src/memory/workers/tasks/content_processing.py
+++ b/src/memory/workers/tasks/content_processing.py
@@ -192,7 +192,7 @@ def create_task_result(
     """
     return {
         f"{type(item).__name__.lower()}_id": item.id,
-        "title": getattr(item, "title", None),
+        "title": getattr(item, "title", None) or getattr(item, "subject", None),
         "status": status,
         "chunks_count": len(item.chunks),
         "embed_status": item.embed_status,
diff --git a/src/memory/workers/tasks/notes.py b/src/memory/workers/tasks/notes.py
index 98c87f8..b76c284 100644
--- a/src/memory/workers/tasks/notes.py
+++ b/src/memory/workers/tasks/notes.py
@@ -1,6 +1,7 @@
 import logging
 import pathlib
 
+from memory.common import settings
 from memory.common.db.connection import make_session
 from memory.common.db.models import Note
 from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES
@@ -22,27 +23,15 @@ def sync_note(
     content: str,
     filename: str | None = None,
     note_type: str | None = None,
-    confidence: float = 0.5,
+    confidence: float | None = None,
     tags: list[str] = [],
 ):
     logger.info(f"Syncing note {subject}")
     text = Note.as_text(content, subject)
     sha256 = create_content_hash(text)
 
-    note = Note(
-        subject=subject,
-        content=content,
-        embed_status="RAW",
-        size=len(text.encode("utf-8")),
-        modality="note",
-        mime_type="text/markdown",
-        sha256=sha256,
-        note_type=note_type,
-        confidence=confidence,
-        tags=tags,
-        filename=filename,
-    )
-    note.save_to_file()
+    if filename:
+        filename = filename.lstrip("/")
 
     with make_session() as session:
         existing_note = check_content_exists(session, Note, sha256=sha256)
@@ -50,6 +39,31 @@ def sync_note(
             logger.info(f"Note already exists: {existing_note.subject}")
             return create_task_result(existing_note, "already_exists")
 
+        note = session.query(Note).filter(Note.filename == filename).one_or_none()
+
+        if not note:
+            note = Note(
+                modality="note",
+                mime_type="text/markdown",
+                confidence=confidence or 0.5,
+            )
+        else:
+            logger.info("Editing preexisting note")
+        note.content = content  # type: ignore
+        note.subject = subject  # type: ignore
+        note.filename = filename  # type: ignore
+        note.embed_status = "RAW"  # type: ignore
+        note.size = len(text.encode("utf-8"))  # type: ignore
+        note.sha256 = sha256  # type: ignore
+
+        if note_type:
+            note.note_type = note_type  # type: ignore
+        if confidence:
+            note.confidence = confidence  # type: ignore
+        if tags:
+            note.tags = tags  # type: ignore
+
+        note.save_to_file()
         return process_content_item(note, session)
 
 
diff --git a/tests/memory/workers/tasks/test_notes_tasks.py b/tests/memory/workers/tasks/test_notes_tasks.py
index 3212d42..7a15f2c 100644
--- a/tests/memory/workers/tasks/test_notes_tasks.py
+++ b/tests/memory/workers/tasks/test_notes_tasks.py
@@ -1,6 +1,5 @@
 import pytest
 import pathlib
-from decimal import Decimal
 from unittest.mock import Mock, patch
 
 from memory.common.db.models import Note
@@ -12,11 +11,10 @@ from memory.common import settings
 @pytest.fixture
 def mock_note_data():
     """Mock note data for testing."""
-    test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
     return {
         "subject": "Test Note Subject",
         "content": "This is test note content with enough text to be processed and embedded.",
-        "filename": str(test_filename),
+        "filename": "test_note.md",
         "note_type": "observation",
         "confidence": 0.8,
         "tags": ["test", "note"],
@@ -79,6 +77,7 @@ def markdown_files_in_storage():
 def test_sync_note_success(mock_note_data, db_session, qdrant):
     """Test successful note synchronization."""
     result = notes.sync_note(**mock_note_data)
+    db_session.commit()
 
     # Verify the Note was created in the database
     note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
     assert note.filename is not None
     assert note.tags == ["test", "note"]
 
-    # Verify the result
-    assert result["status"] == "processed"
-    assert result["note_id"] == note.id
-    assert (
-        "subject" not in result
-    )  # create_task_result doesn't include subject for Note
+    # Verify the result - updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Test Note Subject",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 93,
+    }
 
 
 def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
     assert float(note.confidence) == 0.5  # Default value, convert Decimal to float
     assert note.tags == []  # Default empty list
     assert note.filename is not None and "Minimal Note.md" in note.filename
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Minimal Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 31,
+    }
 
 
 def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
     assert note is not None
     assert note.subject == "Empty Note"
     assert note.content == ""
-    # Empty content with subject header "# Empty Note" still generates chunks
-    assert result["status"] == "processed"
-    assert result["chunks_count"] > 0
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Empty Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 14,
+    }
 
 
 def test_sync_note_already_exists(mock_note_data, db_session):
@@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session):
         mime_type="text/markdown",
         size=len(text.encode("utf-8")),
         embed_status="RAW",
-        filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"),
+        filename="existing_note.md",
     )
     db_session.add(existing_note)
     db_session.commit()
 
     result = notes.sync_note(**mock_note_data)
 
-    assert result["status"] == "already_exists"
-    assert result["note_id"] == existing_note.id
+    # Updated to match actual return format for already_exists case
+    assert result == {
+        "note_id": existing_note.id,
+        "title": "Existing Note",
+        "status": "already_exists",
+        "chunks_count": 0,  # Existing note has no chunks
+        "embed_status": "RAW",  # Existing note has RAW status
+    }
 
     # Verify no duplicate was created
     notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
     assert len(notes_with_hash) == 1
 
 
+def test_sync_note_edit(mock_note_data, db_session):
+    """Test note sync when content already exists."""
+    # Create the content text the same way sync_note does
+    text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
+    sha256 = create_content_hash(text)
+
+    # Add existing note with same content hash but different filename to avoid file conflicts
+    existing_note = Note(
+        subject="Existing Note",
+        content=mock_note_data["content"],
+        sha256=sha256,
+        modality="note",
+        tags=["existing"],
+        mime_type="text/markdown",
+        size=len(text.encode("utf-8")),
+        embed_status="RAW",
+        filename="test_note.md",
+    )
+    db_session.add(existing_note)
+    db_session.commit()
+
+    result = notes.sync_note(
+        **{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
+    )
+
+    assert result == {
+        "note_id": existing_note.id,
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "title": "blee",
+        "content_length": 19,
+    }
+
+    # Verify no duplicate was created
+    assert len(db_session.query(Note).all()) == 1
+    db_session.refresh(existing_note)
+    assert existing_note.content == "bla bla bla"  # type: ignore
+
+
 @pytest.mark.parametrize(
     "note_type,confidence,tags",
     [
@@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
     assert note.note_type == note_type
     assert float(note.confidence) == confidence  # Convert Decimal to float
     assert note.tags == tags
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    text = f"# Test Note {note_type}\n\nTest content for parameter testing"
+    assert result == {
+        "note_id": note.id,
+        "title": f"Test Note {note_type}",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": len(text.encode("utf-8")),
+    }
 
 
 def test_sync_note_content_hash_consistency(db_session):