editable notes

2025-07-30 06:36:07 +02:00 · 2025-06-02 22:24:19 +02:00 · 2025-06-02 22:24:19 +02:00 · a40e0b50fa
commit a40e0b50fa
parent ac3b48a04c
6 changed files with 211 additions and 61 deletions
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]:
        AI observations (created with 'observe') and other content. Use it to
        understand the tag taxonomy, ensure consistency, or discover related topics.

-    When to use:
-        - Before creating new observations, to use consistent tag naming
-        - To explore what topics/contexts have been tracked
-        - To build tag filters for search operations
-        - To understand the user's areas of interest
-        - For tag autocomplete or suggestion features
-
    Returns:
        Sorted list of all unique tags in the system. Tags follow patterns like:
        - Topics: "machine-learning", "functional-programming"
@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]:
        identifiers for what observations are about. Use this to understand
        what aspects of the user have been tracked and ensure consistency.

-    When to use:
-        - Before creating new observations, to use existing subject names
-        - To discover what aspects of the user have been observed
-        - To build subject filters for targeted searches
-        - To ensure consistent naming across observations
-        - To get an overview of the user model
-
    Returns:
        Sorted list of all unique subjects. Common patterns include:
        - "programming_style", "programming_philosophy"
@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]:
        behavior, contradiction, general), this shows what's actually been used.
        Helpful for understanding the distribution of observation types.

-    When to use:
-        - To see what types of observations have been made
-        - To understand the balance of different observation types
-        - To check if all standard types are being utilized
-        - For analytics or reporting on observation patterns
-
    Standard types:
        - "belief": Opinions or beliefs the user holds
        - "preference": Things they prefer or favor
@ -642,12 +622,14 @@ async def create_note(
    tags: list[str] = [],
 ) -> dict:
    """
-    Create a note when the user asks for something to be noted down.
+    Create a note when the user asks for something to be noted down or when you think
+    something is important to note down.

    Purpose:
        Use this tool when the user explicitly asks to note, save, or record
        something for later reference. Notes don't have to be really short - long
        markdown docs are fine, as long as that was what was asked for.
+        You can also use this tool to note down things that are important to you.

    When to use:
        - User says "note down that..." or "please save this"
@ -702,3 +684,88 @@ async def create_note(
        "task_id": task.id,
        "status": "queued",
    }
+
+
+@mcp.tool()
+async def note_files(path: str = "/"):
+    """
+    List all available note files in the user's note storage system.
+
+    Purpose:
+        This tool provides a way to discover and browse the user's organized note
+        collection. Notes are stored as Markdown files and can be created either
+        through the 'create_note' tool or by the user directly. Use this tool to
+        understand what notes exist before reading or referencing them, or to help
+        the user navigate their note collection.
+
+    Args:
+        path: Directory path to search within the notes collection. Use "/" for the
+            root notes directory, or specify subdirectories like "/projects" or
+            "/meetings". The path should start with "/" and use forward slashes.
+            Examples:
+            - "/" - List all notes in the entire collection
+            - "/projects" - Only notes in the projects folder
+            - "/meetings/2024" - Notes in a specific year's meetings folder
+
+    Examples:
+        # List all notes
+        all_notes = await note_files("/")
+        # Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...]
+
+        # List notes in a specific folder
+        project_notes = await note_files("/projects")
+        # Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"]
+
+        # Check for meeting notes
+        meeting_notes = await note_files("/meetings")
+        # Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"]
+    """
+    root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
+    return [
+        f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
+        for f in root.rglob("*.md")
+        if f.is_file()
+    ]
+
+
+@mcp.tool()
+def fetch_file(filename: str):
+    """
+    Retrieve the raw content of a file from the user's storage system.
+
+    Purpose:
+        This tool allows you to read the actual content of files stored in the
+        user's file system, including notes, documents, images, and other files.
+        Use this when you need to access the specific content of a file that has
+        been referenced or when the user asks you to read/examine a particular file.
+
+    Args:
+        filename: Path to the file to fetch, relative to the file storage directory.
+            Should start with "/" and use forward slashes. The path structure depends
+            on how files are organized in the storage system.
+            Examples:
+            - "/notes/project_ideas.md" - A note file
+            - "/documents/report.pdf" - A PDF document
+            - "/images/diagram.png" - An image file
+            - "/emails/important_thread.txt" - Saved email content
+
+    Returns:
+        Raw bytes content of the file. For text files (like Markdown notes), you'll
+        typically want to decode this as UTF-8 to get readable text:
+        ```python
+        content_bytes = await fetch_file("/notes/my_note.md")
+        content_text = content_bytes.decode('utf-8')
+        ```
+
+    Raises:
+        FileNotFoundError: If the specified file doesn't exist at the given path.
+
+    Security note:
+        This tool only accesses files within the configured storage directory,
+        ensuring it cannot read arbitrary system files.
+    """
+    path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {filename}")
+
+    return path.read_bytes()
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@ -537,12 +537,10 @@ class Note(SourceItem):

    def save_to_file(self):
        if not self.filename:
-            path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md"
-        else:
-            path = pathlib.Path(self.filename)
+            self.filename = f"{self.subject}.md"
+        path = settings.NOTES_STORAGE_DIR / self.filename
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(cast(str, self.content))
-        self.filename = path.as_posix()

    @staticmethod
    def as_text(content: str, subject: str | None = None) -> str:
--- a/src/memory/common/embedding.py
+++ b/src/memory/common/embedding.py
@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:

 def embed_source_item(item: SourceItem) -> list[Chunk]:
    chunks = list(item.data_chunks())
-    logger.error(
-        f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
-    )
    if not chunks:
        return []

--- a/src/memory/workers/tasks/content_processing.py
+++ b/src/memory/workers/tasks/content_processing.py
@ -192,7 +192,7 @@ def create_task_result(
    """
    return {
        f"{type(item).__name__.lower()}_id": item.id,
-        "title": getattr(item, "title", None),
+        "title": getattr(item, "title", None) or getattr(item, "subject", None),
        "status": status,
        "chunks_count": len(item.chunks),
        "embed_status": item.embed_status,
--- a/src/memory/workers/tasks/notes.py
+++ b/src/memory/workers/tasks/notes.py
@ -1,6 +1,7 @@
 import logging
 import pathlib

+from memory.common import settings
 from memory.common.db.connection import make_session
 from memory.common.db.models import Note
 from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES
@ -22,27 +23,15 @@ def sync_note(
    content: str,
    filename: str | None = None,
    note_type: str | None = None,
-    confidence: float = 0.5,
+    confidence: float | None = None,
    tags: list[str] = [],
 ):
    logger.info(f"Syncing note {subject}")
    text = Note.as_text(content, subject)
    sha256 = create_content_hash(text)

-    note = Note(
-        subject=subject,
-        content=content,
-        embed_status="RAW",
-        size=len(text.encode("utf-8")),
-        modality="note",
-        mime_type="text/markdown",
-        sha256=sha256,
-        note_type=note_type,
-        confidence=confidence,
-        tags=tags,
-        filename=filename,
-    )
-    note.save_to_file()
+    if filename:
+        filename = filename.lstrip("/")

    with make_session() as session:
        existing_note = check_content_exists(session, Note, sha256=sha256)
@ -50,6 +39,31 @@ def sync_note(
            logger.info(f"Note already exists: {existing_note.subject}")
            return create_task_result(existing_note, "already_exists")

+        note = session.query(Note).filter(Note.filename == filename).one_or_none()
+
+        if not note:
+            note = Note(
+                modality="note",
+                mime_type="text/markdown",
+                confidence=confidence or 0.5,
+            )
+        else:
+            logger.info("Editing preexisting note")
+        note.content = content  # type: ignore
+        note.subject = subject  # type: ignore
+        note.filename = filename  # type: ignore
+        note.embed_status = "RAW"  # type: ignore
+        note.size = len(text.encode("utf-8"))  # type: ignore
+        note.sha256 = sha256  # type: ignore
+
+        if note_type:
+            note.note_type = note_type  # type: ignore
+        if confidence:
+            note.confidence = confidence  # type: ignore
+        if tags:
+            note.tags = tags  # type: ignore
+
+        note.save_to_file()
        return process_content_item(note, session)


--- a/tests/memory/workers/tasks/test_notes_tasks.py
+++ b/tests/memory/workers/tasks/test_notes_tasks.py
@ -1,6 +1,5 @@
 import pytest
 import pathlib
-from decimal import Decimal
 from unittest.mock import Mock, patch

 from memory.common.db.models import Note
@ -12,11 +11,10 @@ from memory.common import settings
@pytest.fixture
 def mock_note_data():
    """Mock note data for testing."""
-    test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
    return {
        "subject": "Test Note Subject",
        "content": "This is test note content with enough text to be processed and embedded.",
-        "filename": str(test_filename),
+        "filename": "test_note.md",
        "note_type": "observation",
        "confidence": 0.8,
        "tags": ["test", "note"],
@ -79,6 +77,7 @@ def markdown_files_in_storage():
 def test_sync_note_success(mock_note_data, db_session, qdrant):
    """Test successful note synchronization."""
    result = notes.sync_note(**mock_note_data)
+    db_session.commit()

    # Verify the Note was created in the database
    note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
    assert note.filename is not None
    assert note.tags == ["test", "note"]

-    # Verify the result
-    assert result["status"] == "processed"
-    assert result["note_id"] == note.id
-    assert (
-        "subject" not in result
-    )  # create_task_result doesn't include subject for Note
+    # Verify the result - updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Test Note Subject",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 93,
+    }


 def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
    assert float(note.confidence) == 0.5  # Default value, convert Decimal to float
    assert note.tags == []  # Default empty list
    assert note.filename is not None and "Minimal Note.md" in note.filename
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Minimal Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 31,
+    }


 def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
    assert note is not None
    assert note.subject == "Empty Note"
    assert note.content == ""
-    # Empty content with subject header "# Empty Note" still generates chunks
-    assert result["status"] == "processed"
-    assert result["chunks_count"] > 0
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Empty Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 14,
+    }


 def test_sync_note_already_exists(mock_note_data, db_session):
@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session):
        mime_type="text/markdown",
        size=len(text.encode("utf-8")),
        embed_status="RAW",
-        filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"),
+        filename="existing_note.md",
    )
    db_session.add(existing_note)
    db_session.commit()

    result = notes.sync_note(**mock_note_data)

-    assert result["status"] == "already_exists"
-    assert result["note_id"] == existing_note.id
+    # Updated to match actual return format for already_exists case
+    assert result == {
+        "note_id": existing_note.id,
+        "title": "Existing Note",
+        "status": "already_exists",
+        "chunks_count": 0,  # Existing note has no chunks
+        "embed_status": "RAW",  # Existing note has RAW status
+    }

    # Verify no duplicate was created
    notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
    assert len(notes_with_hash) == 1


+def test_sync_note_edit(mock_note_data, db_session):
+    """Test note sync when content already exists."""
+    # Create the content text the same way sync_note does
+    text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
+    sha256 = create_content_hash(text)
+
+    # Add existing note with same content hash but different filename to avoid file conflicts
+    existing_note = Note(
+        subject="Existing Note",
+        content=mock_note_data["content"],
+        sha256=sha256,
+        modality="note",
+        tags=["existing"],
+        mime_type="text/markdown",
+        size=len(text.encode("utf-8")),
+        embed_status="RAW",
+        filename="test_note.md",
+    )
+    db_session.add(existing_note)
+    db_session.commit()
+
+    result = notes.sync_note(
+        **{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
+    )
+
+    assert result == {
+        "note_id": existing_note.id,
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "title": "blee",
+        "content_length": 19,
+    }
+
+    # Verify no duplicate was created
+    assert len(db_session.query(Note).all()) == 1
+    db_session.refresh(existing_note)
+    assert existing_note.content == "bla bla bla"  # type: ignore
+
+
@pytest.mark.parametrize(
    "note_type,confidence,tags",
    [
@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
    assert note.note_type == note_type
    assert float(note.confidence) == confidence  # Convert Decimal to float
    assert note.tags == tags
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    text = f"# Test Note {note_type}\n\nTest content for parameter testing"
+    assert result == {
+        "note_id": note.id,
+        "title": f"Test Note {note_type}",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": len(text.encode("utf-8")),
+    }


 def test_sync_note_content_hash_consistency(db_session):