editable notes

This commit is contained in:
Daniel O'Connell 2025-06-02 22:24:19 +02:00
parent ac3b48a04c
commit a40e0b50fa
6 changed files with 211 additions and 61 deletions

View File

@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]:
AI observations (created with 'observe') and other content. Use it to AI observations (created with 'observe') and other content. Use it to
understand the tag taxonomy, ensure consistency, or discover related topics. understand the tag taxonomy, ensure consistency, or discover related topics.
When to use:
- Before creating new observations, to use consistent tag naming
- To explore what topics/contexts have been tracked
- To build tag filters for search operations
- To understand the user's areas of interest
- For tag autocomplete or suggestion features
Returns: Returns:
Sorted list of all unique tags in the system. Tags follow patterns like: Sorted list of all unique tags in the system. Tags follow patterns like:
- Topics: "machine-learning", "functional-programming" - Topics: "machine-learning", "functional-programming"
@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]:
identifiers for what observations are about. Use this to understand identifiers for what observations are about. Use this to understand
what aspects of the user have been tracked and ensure consistency. what aspects of the user have been tracked and ensure consistency.
When to use:
- Before creating new observations, to use existing subject names
- To discover what aspects of the user have been observed
- To build subject filters for targeted searches
- To ensure consistent naming across observations
- To get an overview of the user model
Returns: Returns:
Sorted list of all unique subjects. Common patterns include: Sorted list of all unique subjects. Common patterns include:
- "programming_style", "programming_philosophy" - "programming_style", "programming_philosophy"
@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]:
behavior, contradiction, general), this shows what's actually been used. behavior, contradiction, general), this shows what's actually been used.
Helpful for understanding the distribution of observation types. Helpful for understanding the distribution of observation types.
When to use:
- To see what types of observations have been made
- To understand the balance of different observation types
- To check if all standard types are being utilized
- For analytics or reporting on observation patterns
Standard types: Standard types:
- "belief": Opinions or beliefs the user holds - "belief": Opinions or beliefs the user holds
- "preference": Things they prefer or favor - "preference": Things they prefer or favor
@ -642,12 +622,14 @@ async def create_note(
tags: list[str] = [], tags: list[str] = [],
) -> dict: ) -> dict:
""" """
Create a note when the user asks for something to be noted down. Create a note when the user asks for something to be noted down or when you think
something is important to note down.
Purpose: Purpose:
Use this tool when the user explicitly asks to note, save, or record Use this tool when the user explicitly asks to note, save, or record
something for later reference. Notes don't have to be really short - long something for later reference. Notes don't have to be really short - long
markdown docs are fine, as long as that was what was asked for. markdown docs are fine, as long as that was what was asked for.
You can also use this tool to note down things that are important to you.
When to use: When to use:
- User says "note down that..." or "please save this" - User says "note down that..." or "please save this"
@ -702,3 +684,88 @@ async def create_note(
"task_id": task.id, "task_id": task.id,
"status": "queued", "status": "queued",
} }
@mcp.tool()
async def note_files(path: str = "/"):
"""
List all available note files in the user's note storage system.
Purpose:
This tool provides a way to discover and browse the user's organized note
collection. Notes are stored as Markdown files and can be created either
through the 'create_note' tool or by the user directly. Use this tool to
understand what notes exist before reading or referencing them, or to help
the user navigate their note collection.
Args:
path: Directory path to search within the notes collection. Use "/" for the
root notes directory, or specify subdirectories like "/projects" or
"/meetings". The path should start with "/" and use forward slashes.
Examples:
- "/" - List all notes in the entire collection
- "/projects" - Only notes in the projects folder
- "/meetings/2024" - Notes in a specific year's meetings folder
Examples:
# List all notes
all_notes = await note_files("/")
# Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...]
# List notes in a specific folder
project_notes = await note_files("/projects")
# Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"]
# Check for meeting notes
meeting_notes = await note_files("/meetings")
# Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"]
"""
root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
return [
f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
for f in root.rglob("*.md")
if f.is_file()
]
@mcp.tool()
def fetch_file(filename: str):
"""
Retrieve the raw content of a file from the user's storage system.
Purpose:
This tool allows you to read the actual content of files stored in the
user's file system, including notes, documents, images, and other files.
Use this when you need to access the specific content of a file that has
been referenced or when the user asks you to read/examine a particular file.
Args:
filename: Path to the file to fetch, relative to the file storage directory.
Should start with "/" and use forward slashes. The path structure depends
on how files are organized in the storage system.
Examples:
- "/notes/project_ideas.md" - A note file
- "/documents/report.pdf" - A PDF document
- "/images/diagram.png" - An image file
- "/emails/important_thread.txt" - Saved email content
Returns:
Raw bytes content of the file. For text files (like Markdown notes), you'll
typically want to decode this as UTF-8 to get readable text:
```python
content_bytes = await fetch_file("/notes/my_note.md")
content_text = content_bytes.decode('utf-8')
```
Raises:
FileNotFoundError: If the specified file doesn't exist at the given path.
Security note:
This tool only accesses files within the configured storage directory,
ensuring it cannot read arbitrary system files.
"""
path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
if not path.exists():
raise FileNotFoundError(f"File not found: {filename}")
return path.read_bytes()

View File

@ -537,12 +537,10 @@ class Note(SourceItem):
def save_to_file(self): def save_to_file(self):
if not self.filename: if not self.filename:
path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md" self.filename = f"{self.subject}.md"
else: path = settings.NOTES_STORAGE_DIR / self.filename
path = pathlib.Path(self.filename)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(cast(str, self.content)) path.write_text(cast(str, self.content))
self.filename = path.as_posix()
@staticmethod @staticmethod
def as_text(content: str, subject: str | None = None) -> str: def as_text(content: str, subject: str | None = None) -> str:

View File

@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:
def embed_source_item(item: SourceItem) -> list[Chunk]: def embed_source_item(item: SourceItem) -> list[Chunk]:
chunks = list(item.data_chunks()) chunks = list(item.data_chunks())
logger.error(
f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
)
if not chunks: if not chunks:
return [] return []

View File

@ -192,7 +192,7 @@ def create_task_result(
""" """
return { return {
f"{type(item).__name__.lower()}_id": item.id, f"{type(item).__name__.lower()}_id": item.id,
"title": getattr(item, "title", None), "title": getattr(item, "title", None) or getattr(item, "subject", None),
"status": status, "status": status,
"chunks_count": len(item.chunks), "chunks_count": len(item.chunks),
"embed_status": item.embed_status, "embed_status": item.embed_status,

View File

@ -1,6 +1,7 @@
import logging import logging
import pathlib import pathlib
from memory.common import settings
from memory.common.db.connection import make_session from memory.common.db.connection import make_session
from memory.common.db.models import Note from memory.common.db.models import Note
from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES
@ -22,27 +23,15 @@ def sync_note(
content: str, content: str,
filename: str | None = None, filename: str | None = None,
note_type: str | None = None, note_type: str | None = None,
confidence: float = 0.5, confidence: float | None = None,
tags: list[str] = [], tags: list[str] = [],
): ):
logger.info(f"Syncing note {subject}") logger.info(f"Syncing note {subject}")
text = Note.as_text(content, subject) text = Note.as_text(content, subject)
sha256 = create_content_hash(text) sha256 = create_content_hash(text)
note = Note( if filename:
subject=subject, filename = filename.lstrip("/")
content=content,
embed_status="RAW",
size=len(text.encode("utf-8")),
modality="note",
mime_type="text/markdown",
sha256=sha256,
note_type=note_type,
confidence=confidence,
tags=tags,
filename=filename,
)
note.save_to_file()
with make_session() as session: with make_session() as session:
existing_note = check_content_exists(session, Note, sha256=sha256) existing_note = check_content_exists(session, Note, sha256=sha256)
@ -50,6 +39,31 @@ def sync_note(
logger.info(f"Note already exists: {existing_note.subject}") logger.info(f"Note already exists: {existing_note.subject}")
return create_task_result(existing_note, "already_exists") return create_task_result(existing_note, "already_exists")
note = session.query(Note).filter(Note.filename == filename).one_or_none()
if not note:
note = Note(
modality="note",
mime_type="text/markdown",
confidence=confidence or 0.5,
)
else:
logger.info("Editing preexisting note")
note.content = content # type: ignore
note.subject = subject # type: ignore
note.filename = filename # type: ignore
note.embed_status = "RAW" # type: ignore
note.size = len(text.encode("utf-8")) # type: ignore
note.sha256 = sha256 # type: ignore
if note_type:
note.note_type = note_type # type: ignore
if confidence:
note.confidence = confidence # type: ignore
if tags:
note.tags = tags # type: ignore
note.save_to_file()
return process_content_item(note, session) return process_content_item(note, session)

View File

@ -1,6 +1,5 @@
import pytest import pytest
import pathlib import pathlib
from decimal import Decimal
from unittest.mock import Mock, patch from unittest.mock import Mock, patch
from memory.common.db.models import Note from memory.common.db.models import Note
@ -12,11 +11,10 @@ from memory.common import settings
@pytest.fixture @pytest.fixture
def mock_note_data(): def mock_note_data():
"""Mock note data for testing.""" """Mock note data for testing."""
test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
return { return {
"subject": "Test Note Subject", "subject": "Test Note Subject",
"content": "This is test note content with enough text to be processed and embedded.", "content": "This is test note content with enough text to be processed and embedded.",
"filename": str(test_filename), "filename": "test_note.md",
"note_type": "observation", "note_type": "observation",
"confidence": 0.8, "confidence": 0.8,
"tags": ["test", "note"], "tags": ["test", "note"],
@ -79,6 +77,7 @@ def markdown_files_in_storage():
def test_sync_note_success(mock_note_data, db_session, qdrant): def test_sync_note_success(mock_note_data, db_session, qdrant):
"""Test successful note synchronization.""" """Test successful note synchronization."""
result = notes.sync_note(**mock_note_data) result = notes.sync_note(**mock_note_data)
db_session.commit()
# Verify the Note was created in the database # Verify the Note was created in the database
note = db_session.query(Note).filter_by(subject="Test Note Subject").first() note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
assert note.filename is not None assert note.filename is not None
assert note.tags == ["test", "note"] assert note.tags == ["test", "note"]
# Verify the result # Verify the result - updated to match actual return format
assert result["status"] == "processed" assert result == {
assert result["note_id"] == note.id "note_id": note.id,
assert ( "title": "Test Note Subject",
"subject" not in result "status": "processed",
) # create_task_result doesn't include subject for Note "chunks_count": 1,
"embed_status": "STORED",
"content_length": 93,
}
def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant): def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
assert float(note.confidence) == 0.5 # Default value, convert Decimal to float assert float(note.confidence) == 0.5 # Default value, convert Decimal to float
assert note.tags == [] # Default empty list assert note.tags == [] # Default empty list
assert note.filename is not None and "Minimal Note.md" in note.filename assert note.filename is not None and "Minimal Note.md" in note.filename
assert result["status"] == "processed"
# Updated to match actual return format
assert result == {
"note_id": note.id,
"title": "Minimal Note",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": 31,
}
def test_sync_note_empty_content(mock_empty_note, db_session, qdrant): def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
assert note is not None assert note is not None
assert note.subject == "Empty Note" assert note.subject == "Empty Note"
assert note.content == "" assert note.content == ""
# Empty content with subject header "# Empty Note" still generates chunks
assert result["status"] == "processed" # Updated to match actual return format
assert result["chunks_count"] > 0 assert result == {
"note_id": note.id,
"title": "Empty Note",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": 14,
}
def test_sync_note_already_exists(mock_note_data, db_session): def test_sync_note_already_exists(mock_note_data, db_session):
@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session):
mime_type="text/markdown", mime_type="text/markdown",
size=len(text.encode("utf-8")), size=len(text.encode("utf-8")),
embed_status="RAW", embed_status="RAW",
filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"), filename="existing_note.md",
) )
db_session.add(existing_note) db_session.add(existing_note)
db_session.commit() db_session.commit()
result = notes.sync_note(**mock_note_data) result = notes.sync_note(**mock_note_data)
assert result["status"] == "already_exists" # Updated to match actual return format for already_exists case
assert result["note_id"] == existing_note.id assert result == {
"note_id": existing_note.id,
"title": "Existing Note",
"status": "already_exists",
"chunks_count": 0, # Existing note has no chunks
"embed_status": "RAW", # Existing note has RAW status
}
# Verify no duplicate was created # Verify no duplicate was created
notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all() notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
assert len(notes_with_hash) == 1 assert len(notes_with_hash) == 1
def test_sync_note_edit(mock_note_data, db_session):
"""Test note sync when content already exists."""
# Create the content text the same way sync_note does
text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
sha256 = create_content_hash(text)
# Add existing note with same content hash but different filename to avoid file conflicts
existing_note = Note(
subject="Existing Note",
content=mock_note_data["content"],
sha256=sha256,
modality="note",
tags=["existing"],
mime_type="text/markdown",
size=len(text.encode("utf-8")),
embed_status="RAW",
filename="test_note.md",
)
db_session.add(existing_note)
db_session.commit()
result = notes.sync_note(
**{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
)
assert result == {
"note_id": existing_note.id,
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"title": "blee",
"content_length": 19,
}
# Verify no duplicate was created
assert len(db_session.query(Note).all()) == 1
db_session.refresh(existing_note)
assert existing_note.content == "bla bla bla" # type: ignore
@pytest.mark.parametrize( @pytest.mark.parametrize(
"note_type,confidence,tags", "note_type,confidence,tags",
[ [
@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
assert note.note_type == note_type assert note.note_type == note_type
assert float(note.confidence) == confidence # Convert Decimal to float assert float(note.confidence) == confidence # Convert Decimal to float
assert note.tags == tags assert note.tags == tags
assert result["status"] == "processed"
# Updated to match actual return format
text = f"# Test Note {note_type}\n\nTest content for parameter testing"
assert result == {
"note_id": note.id,
"title": f"Test Note {note_type}",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": len(text.encode("utf-8")),
}
def test_sync_note_content_hash_consistency(db_session): def test_sync_note_content_hash_consistency(db_session):