editable notes

2025-07-30 22:56:08 +02:00 · 2025-06-02 22:24:19 +02:00 · 2025-06-02 22:24:19 +02:00 · a40e0b50fa
commit a40e0b50fa
parent ac3b48a04c
6 changed files with 211 additions and 61 deletions
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]:
        AI observations (created with 'observe') and other content. Use it to
        understand the tag taxonomy, ensure consistency, or discover related topics.
    When to use:
        - Before creating new observations, to use consistent tag naming
        - To explore what topics/contexts have been tracked
        - To build tag filters for search operations
        - To understand the user's areas of interest
        - For tag autocomplete or suggestion features
    Returns:
        Sorted list of all unique tags in the system. Tags follow patterns like:
        - Topics: "machine-learning", "functional-programming"
@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]:
        identifiers for what observations are about. Use this to understand
        what aspects of the user have been tracked and ensure consistency.
    When to use:
        - Before creating new observations, to use existing subject names
        - To discover what aspects of the user have been observed
        - To build subject filters for targeted searches
        - To ensure consistent naming across observations
        - To get an overview of the user model
    Returns:
        Sorted list of all unique subjects. Common patterns include:
        - "programming_style", "programming_philosophy"
@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]:
        behavior, contradiction, general), this shows what's actually been used.
        Helpful for understanding the distribution of observation types.
    When to use:
        - To see what types of observations have been made
        - To understand the balance of different observation types
        - To check if all standard types are being utilized
        - For analytics or reporting on observation patterns
    Standard types:
        - "belief": Opinions or beliefs the user holds
        - "preference": Things they prefer or favor
@ -642,12 +622,14 @@ async def create_note(
    tags: list[str] = [],
 ) -> dict:
    """
-    Create a note when the user asks for something to be noted down.
+    Create a note when the user asks for something to be noted down or when you think
    something is important to note down.
    Purpose:
        Use this tool when the user explicitly asks to note, save, or record
        something for later reference. Notes don't have to be really short - long
        markdown docs are fine, as long as that was what was asked for.
        You can also use this tool to note down things that are important to you.
    When to use:
        - User says "note down that..." or "please save this"
@ -702,3 +684,88 @@ async def create_note(
        "task_id": task.id,
        "status": "queued",
    }
@mcp.tool()
 async def note_files(path: str = "/"):
    """
    List all available note files in the user's note storage system.
    Purpose:
        This tool provides a way to discover and browse the user's organized note
        collection. Notes are stored as Markdown files and can be created either
        through the 'create_note' tool or by the user directly. Use this tool to
        understand what notes exist before reading or referencing them, or to help
        the user navigate their note collection.
    Args:
        path: Directory path to search within the notes collection. Use "/" for the
            root notes directory, or specify subdirectories like "/projects" or
            "/meetings". The path should start with "/" and use forward slashes.
            Examples:
            - "/" - List all notes in the entire collection
            - "/projects" - Only notes in the projects folder
            - "/meetings/2024" - Notes in a specific year's meetings folder
    Examples:
        # List all notes
        all_notes = await note_files("/")
        # Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...]
        # List notes in a specific folder
        project_notes = await note_files("/projects")
        # Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"]
        # Check for meeting notes
        meeting_notes = await note_files("/meetings")
        # Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"]
    """
    root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
    return [
        f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
        for f in root.rglob("*.md")
        if f.is_file()
    ]
@mcp.tool()
 def fetch_file(filename: str):
    """
    Retrieve the raw content of a file from the user's storage system.
    Purpose:
        This tool allows you to read the actual content of files stored in the
        user's file system, including notes, documents, images, and other files.
        Use this when you need to access the specific content of a file that has
        been referenced or when the user asks you to read/examine a particular file.
    Args:
        filename: Path to the file to fetch, relative to the file storage directory.
            Should start with "/" and use forward slashes. The path structure depends
            on how files are organized in the storage system.
            Examples:
            - "/notes/project_ideas.md" - A note file
            - "/documents/report.pdf" - A PDF document
            - "/images/diagram.png" - An image file
            - "/emails/important_thread.txt" - Saved email content
    Returns:
        Raw bytes content of the file. For text files (like Markdown notes), you'll
        typically want to decode this as UTF-8 to get readable text:
        ```python
        content_bytes = await fetch_file("/notes/my_note.md")
        content_text = content_bytes.decode('utf-8')
        ```
    Raises:
        FileNotFoundError: If the specified file doesn't exist at the given path.
    Security note:
        This tool only accesses files within the configured storage directory,
        ensuring it cannot read arbitrary system files.
    """
    path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
    if not path.exists():
        raise FileNotFoundError(f"File not found: {filename}")
    return path.read_bytes()
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@ -537,12 +537,10 @@ class Note(SourceItem):
    def save_to_file(self):
        if not self.filename:
-            path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md"
+            self.filename = f"{self.subject}.md"
-        else:
+        path = settings.NOTES_STORAGE_DIR / self.filename
            path = pathlib.Path(self.filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(cast(str, self.content))
        self.filename = path.as_posix()
    @staticmethod
    def as_text(content: str, subject: str | None = None) -> str:
--- a/src/memory/common/embedding.py
+++ b/src/memory/common/embedding.py
@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:
 def embed_source_item(item: SourceItem) -> list[Chunk]:
    chunks = list(item.data_chunks())
    logger.error(
        f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
    )
    if not chunks:
        return []
--- a/src/memory/workers/tasks/content_processing.py
+++ b/src/memory/workers/tasks/content_processing.py
@ -192,7 +192,7 @@ def create_task_result(
    """
    return {
        f"{type(item).__name__.lower()}_id": item.id,
-        "title": getattr(item, "title", None),
+        "title": getattr(item, "title", None) or getattr(item, "subject", None),
        "status": status,
        "chunks_count": len(item.chunks),
        "embed_status": item.embed_status,
--- a/src/memory/workers/tasks/notes.py
+++ b/src/memory/workers/tasks/notes.py
@ -1,6 +1,7 @@
 import logging
 import pathlib
 from memory.common import settings
 from memory.common.db.connection import make_session
 from memory.common.db.models import Note
 from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES
@ -22,27 +23,15 @@ def sync_note(
    content: str,
    filename: str | None = None,
    note_type: str | None = None,
-    confidence: float = 0.5,
+    confidence: float | None = None,
    tags: list[str] = [],
 ):
    logger.info(f"Syncing note {subject}")
    text = Note.as_text(content, subject)
    sha256 = create_content_hash(text)
-    note = Note(
+    if filename:
-        subject=subject,
+        filename = filename.lstrip("/")
        content=content,
        embed_status="RAW",
        size=len(text.encode("utf-8")),
        modality="note",
        mime_type="text/markdown",
        sha256=sha256,
        note_type=note_type,
        confidence=confidence,
        tags=tags,
        filename=filename,
    )
    note.save_to_file()
    with make_session() as session:
        existing_note = check_content_exists(session, Note, sha256=sha256)
@ -50,6 +39,31 @@ def sync_note(
            logger.info(f"Note already exists: {existing_note.subject}")
            return create_task_result(existing_note, "already_exists")
        note = session.query(Note).filter(Note.filename == filename).one_or_none()
        if not note:
            note = Note(
                modality="note",
                mime_type="text/markdown",
                confidence=confidence or 0.5,
            )
        else:
            logger.info("Editing preexisting note")
        note.content = content  # type: ignore
        note.subject = subject  # type: ignore
        note.filename = filename  # type: ignore
        note.embed_status = "RAW"  # type: ignore
        note.size = len(text.encode("utf-8"))  # type: ignore
        note.sha256 = sha256  # type: ignore
        if note_type:
            note.note_type = note_type  # type: ignore
        if confidence:
            note.confidence = confidence  # type: ignore
        if tags:
            note.tags = tags  # type: ignore
        note.save_to_file()
        return process_content_item(note, session)
--- a/tests/memory/workers/tasks/test_notes_tasks.py
+++ b/tests/memory/workers/tasks/test_notes_tasks.py
@ -1,6 +1,5 @@
 import pytest
 import pathlib
 from decimal import Decimal
 from unittest.mock import Mock, patch
 from memory.common.db.models import Note
@ -12,11 +11,10 @@ from memory.common import settings
@pytest.fixture
 def mock_note_data():
    """Mock note data for testing."""
    test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
    return {
        "subject": "Test Note Subject",
        "content": "This is test note content with enough text to be processed and embedded.",
-        "filename": str(test_filename),
+        "filename": "test_note.md",
        "note_type": "observation",
        "confidence": 0.8,
        "tags": ["test", "note"],
@ -79,6 +77,7 @@ def markdown_files_in_storage():
 def test_sync_note_success(mock_note_data, db_session, qdrant):
    """Test successful note synchronization."""
    result = notes.sync_note(**mock_note_data)
    db_session.commit()
    # Verify the Note was created in the database
    note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
    assert note.filename is not None
    assert note.tags == ["test", "note"]
-    # Verify the result
+    # Verify the result - updated to match actual return format
-    assert result["status"] == "processed"
+    assert result == {
-    assert result["note_id"] == note.id
+        "note_id": note.id,
-    assert (
+        "title": "Test Note Subject",
-        "subject" not in result
+        "status": "processed",
-    )  # create_task_result doesn't include subject for Note
+        "chunks_count": 1,
        "embed_status": "STORED",
        "content_length": 93,
    }
 def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
    assert float(note.confidence) == 0.5  # Default value, convert Decimal to float
    assert note.tags == []  # Default empty list
    assert note.filename is not None and "Minimal Note.md" in note.filename
-    assert result["status"] == "processed"
+
    # Updated to match actual return format
    assert result == {
        "note_id": note.id,
        "title": "Minimal Note",
        "status": "processed",
        "chunks_count": 1,
        "embed_status": "STORED",
        "content_length": 31,
    }
 def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
    assert note is not None
    assert note.subject == "Empty Note"
    assert note.content == ""
-    # Empty content with subject header "# Empty Note" still generates chunks
+
-    assert result["status"] == "processed"
+    # Updated to match actual return format
-    assert result["chunks_count"] > 0
+    assert result == {
        "note_id": note.id,
        "title": "Empty Note",
        "status": "processed",
        "chunks_count": 1,
        "embed_status": "STORED",
        "content_length": 14,
    }
 def test_sync_note_already_exists(mock_note_data, db_session):
@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session):
        mime_type="text/markdown",
        size=len(text.encode("utf-8")),
        embed_status="RAW",
-        filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"),
+        filename="existing_note.md",
    )
    db_session.add(existing_note)
    db_session.commit()
    result = notes.sync_note(**mock_note_data)
-    assert result["status"] == "already_exists"
+    # Updated to match actual return format for already_exists case
-    assert result["note_id"] == existing_note.id
+    assert result == {
        "note_id": existing_note.id,
        "title": "Existing Note",
        "status": "already_exists",
        "chunks_count": 0,  # Existing note has no chunks
        "embed_status": "RAW",  # Existing note has RAW status
    }
    # Verify no duplicate was created
    notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
    assert len(notes_with_hash) == 1
 def test_sync_note_edit(mock_note_data, db_session):
    """Test note sync when content already exists."""
    # Create the content text the same way sync_note does
    text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
    sha256 = create_content_hash(text)
    # Add existing note with same content hash but different filename to avoid file conflicts
    existing_note = Note(
        subject="Existing Note",
        content=mock_note_data["content"],
        sha256=sha256,
        modality="note",
        tags=["existing"],
        mime_type="text/markdown",
        size=len(text.encode("utf-8")),
        embed_status="RAW",
        filename="test_note.md",
    )
    db_session.add(existing_note)
    db_session.commit()
    result = notes.sync_note(
        **{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
    )
    assert result == {
        "note_id": existing_note.id,
        "status": "processed",
        "chunks_count": 1,
        "embed_status": "STORED",
        "title": "blee",
        "content_length": 19,
    }
    # Verify no duplicate was created
    assert len(db_session.query(Note).all()) == 1
    db_session.refresh(existing_note)
    assert existing_note.content == "bla bla bla"  # type: ignore
@pytest.mark.parametrize(
    "note_type,confidence,tags",
    [
@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
    assert note.note_type == note_type
    assert float(note.confidence) == confidence  # Convert Decimal to float
    assert note.tags == tags
-    assert result["status"] == "processed"
+
    # Updated to match actual return format
    text = f"# Test Note {note_type}\n\nTest content for parameter testing"
    assert result == {
        "note_id": note.id,
        "title": f"Test Note {note_type}",
        "status": "processed",
        "chunks_count": 1,
        "embed_status": "STORED",
        "content_length": len(text.encode("utf-8")),
    }
 def test_sync_note_content_hash_consistency(db_session):