editable notes

This commit is contained in:
Daniel O'Connell 2025-06-02 22:24:19 +02:00
parent ac3b48a04c
commit a40e0b50fa
6 changed files with 211 additions and 61 deletions

View File

@ -80,13 +80,6 @@ async def get_all_tags() -> list[str]:
AI observations (created with 'observe') and other content. Use it to
understand the tag taxonomy, ensure consistency, or discover related topics.
When to use:
- Before creating new observations, to use consistent tag naming
- To explore what topics/contexts have been tracked
- To build tag filters for search operations
- To understand the user's areas of interest
- For tag autocomplete or suggestion features
Returns:
Sorted list of all unique tags in the system. Tags follow patterns like:
- Topics: "machine-learning", "functional-programming"
@ -110,13 +103,6 @@ async def get_all_subjects() -> list[str]:
identifiers for what observations are about. Use this to understand
what aspects of the user have been tracked and ensure consistency.
When to use:
- Before creating new observations, to use existing subject names
- To discover what aspects of the user have been observed
- To build subject filters for targeted searches
- To ensure consistent naming across observations
- To get an overview of the user model
Returns:
Sorted list of all unique subjects. Common patterns include:
- "programming_style", "programming_philosophy"
@ -142,12 +128,6 @@ async def get_all_observation_types() -> list[str]:
behavior, contradiction, general), this shows what's actually been used.
Helpful for understanding the distribution of observation types.
When to use:
- To see what types of observations have been made
- To understand the balance of different observation types
- To check if all standard types are being utilized
- For analytics or reporting on observation patterns
Standard types:
- "belief": Opinions or beliefs the user holds
- "preference": Things they prefer or favor
@ -642,12 +622,14 @@ async def create_note(
tags: list[str] = [],
) -> dict:
"""
Create a note when the user asks for something to be noted down.
Create a note when the user asks for something to be noted down or when you think
something is important to note down.
Purpose:
Use this tool when the user explicitly asks to note, save, or record
something for later reference. Notes don't have to be really short - long
markdown docs are fine, as long as that was what was asked for.
You can also use this tool to note down things that are important to you.
When to use:
- User says "note down that..." or "please save this"
@ -702,3 +684,88 @@ async def create_note(
"task_id": task.id,
"status": "queued",
}
@mcp.tool()
async def note_files(path: str = "/"):
"""
List all available note files in the user's note storage system.
Purpose:
This tool provides a way to discover and browse the user's organized note
collection. Notes are stored as Markdown files and can be created either
through the 'create_note' tool or by the user directly. Use this tool to
understand what notes exist before reading or referencing them, or to help
the user navigate their note collection.
Args:
path: Directory path to search within the notes collection. Use "/" for the
root notes directory, or specify subdirectories like "/projects" or
"/meetings". The path should start with "/" and use forward slashes.
Examples:
- "/" - List all notes in the entire collection
- "/projects" - Only notes in the projects folder
- "/meetings/2024" - Notes in a specific year's meetings folder
Examples:
# List all notes
all_notes = await note_files("/")
# Returns: ["/notes/project_ideas.md", "/notes/meetings/daily_standup.md", ...]
# List notes in a specific folder
project_notes = await note_files("/projects")
# Returns: ["/notes/projects/website_redesign.md", "/notes/projects/mobile_app.md"]
# Check for meeting notes
meeting_notes = await note_files("/meetings")
# Returns: ["/notes/meetings/2024-01-15.md", "/notes/meetings/weekly_review.md"]
"""
root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
return [
f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
for f in root.rglob("*.md")
if f.is_file()
]
@mcp.tool()
def fetch_file(filename: str):
"""
Retrieve the raw content of a file from the user's storage system.
Purpose:
This tool allows you to read the actual content of files stored in the
user's file system, including notes, documents, images, and other files.
Use this when you need to access the specific content of a file that has
been referenced or when the user asks you to read/examine a particular file.
Args:
filename: Path to the file to fetch, relative to the file storage directory.
Should start with "/" and use forward slashes. The path structure depends
on how files are organized in the storage system.
Examples:
- "/notes/project_ideas.md" - A note file
- "/documents/report.pdf" - A PDF document
- "/images/diagram.png" - An image file
- "/emails/important_thread.txt" - Saved email content
Returns:
Raw bytes content of the file. For text files (like Markdown notes), you'll
typically want to decode this as UTF-8 to get readable text:
```python
content_bytes = await fetch_file("/notes/my_note.md")
content_text = content_bytes.decode('utf-8')
```
Raises:
FileNotFoundError: If the specified file doesn't exist at the given path.
Security note:
This tool only accesses files within the configured storage directory,
ensuring it cannot read arbitrary system files.
"""
path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
if not path.exists():
raise FileNotFoundError(f"File not found: {filename}")
return path.read_bytes()

View File

@ -537,12 +537,10 @@ class Note(SourceItem):
def save_to_file(self):
if not self.filename:
path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md"
else:
path = pathlib.Path(self.filename)
self.filename = f"{self.subject}.md"
path = settings.NOTES_STORAGE_DIR / self.filename
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(cast(str, self.content))
self.filename = path.as_posix()
@staticmethod
def as_text(content: str, subject: str | None = None) -> str:

View File

@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:
def embed_source_item(item: SourceItem) -> list[Chunk]:
chunks = list(item.data_chunks())
logger.error(
f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
)
if not chunks:
return []

View File

@ -192,7 +192,7 @@ def create_task_result(
"""
return {
f"{type(item).__name__.lower()}_id": item.id,
"title": getattr(item, "title", None),
"title": getattr(item, "title", None) or getattr(item, "subject", None),
"status": status,
"chunks_count": len(item.chunks),
"embed_status": item.embed_status,

View File

@ -1,6 +1,7 @@
import logging
import pathlib
from memory.common import settings
from memory.common.db.connection import make_session
from memory.common.db.models import Note
from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES
@ -22,27 +23,15 @@ def sync_note(
content: str,
filename: str | None = None,
note_type: str | None = None,
confidence: float = 0.5,
confidence: float | None = None,
tags: list[str] = [],
):
logger.info(f"Syncing note {subject}")
text = Note.as_text(content, subject)
sha256 = create_content_hash(text)
note = Note(
subject=subject,
content=content,
embed_status="RAW",
size=len(text.encode("utf-8")),
modality="note",
mime_type="text/markdown",
sha256=sha256,
note_type=note_type,
confidence=confidence,
tags=tags,
filename=filename,
)
note.save_to_file()
if filename:
filename = filename.lstrip("/")
with make_session() as session:
existing_note = check_content_exists(session, Note, sha256=sha256)
@ -50,6 +39,31 @@ def sync_note(
logger.info(f"Note already exists: {existing_note.subject}")
return create_task_result(existing_note, "already_exists")
note = session.query(Note).filter(Note.filename == filename).one_or_none()
if not note:
note = Note(
modality="note",
mime_type="text/markdown",
confidence=confidence or 0.5,
)
else:
logger.info("Editing preexisting note")
note.content = content # type: ignore
note.subject = subject # type: ignore
note.filename = filename # type: ignore
note.embed_status = "RAW" # type: ignore
note.size = len(text.encode("utf-8")) # type: ignore
note.sha256 = sha256 # type: ignore
if note_type:
note.note_type = note_type # type: ignore
if confidence:
note.confidence = confidence # type: ignore
if tags:
note.tags = tags # type: ignore
note.save_to_file()
return process_content_item(note, session)

View File

@ -1,6 +1,5 @@
import pytest
import pathlib
from decimal import Decimal
from unittest.mock import Mock, patch
from memory.common.db.models import Note
@ -12,11 +11,10 @@ from memory.common import settings
@pytest.fixture
def mock_note_data():
"""Mock note data for testing."""
test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
return {
"subject": "Test Note Subject",
"content": "This is test note content with enough text to be processed and embedded.",
"filename": str(test_filename),
"filename": "test_note.md",
"note_type": "observation",
"confidence": 0.8,
"tags": ["test", "note"],
@ -79,6 +77,7 @@ def markdown_files_in_storage():
def test_sync_note_success(mock_note_data, db_session, qdrant):
"""Test successful note synchronization."""
result = notes.sync_note(**mock_note_data)
db_session.commit()
# Verify the Note was created in the database
note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@ -95,12 +94,15 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
assert note.filename is not None
assert note.tags == ["test", "note"]
# Verify the result
assert result["status"] == "processed"
assert result["note_id"] == note.id
assert (
"subject" not in result
) # create_task_result doesn't include subject for Note
# Verify the result - updated to match actual return format
assert result == {
"note_id": note.id,
"title": "Test Note Subject",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": 93,
}
def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@ -115,7 +117,16 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
assert float(note.confidence) == 0.5 # Default value, convert Decimal to float
assert note.tags == [] # Default empty list
assert note.filename is not None and "Minimal Note.md" in note.filename
assert result["status"] == "processed"
# Updated to match actual return format
assert result == {
"note_id": note.id,
"title": "Minimal Note",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": 31,
}
def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
assert note is not None
assert note.subject == "Empty Note"
assert note.content == ""
# Empty content with subject header "# Empty Note" still generates chunks
assert result["status"] == "processed"
assert result["chunks_count"] > 0
# Updated to match actual return format
assert result == {
"note_id": note.id,
"title": "Empty Note",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": 14,
}
def test_sync_note_already_exists(mock_note_data, db_session):
@ -148,21 +166,67 @@ def test_sync_note_already_exists(mock_note_data, db_session):
mime_type="text/markdown",
size=len(text.encode("utf-8")),
embed_status="RAW",
filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"),
filename="existing_note.md",
)
db_session.add(existing_note)
db_session.commit()
result = notes.sync_note(**mock_note_data)
assert result["status"] == "already_exists"
assert result["note_id"] == existing_note.id
# Updated to match actual return format for already_exists case
assert result == {
"note_id": existing_note.id,
"title": "Existing Note",
"status": "already_exists",
"chunks_count": 0, # Existing note has no chunks
"embed_status": "RAW", # Existing note has RAW status
}
# Verify no duplicate was created
notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
assert len(notes_with_hash) == 1
def test_sync_note_edit(mock_note_data, db_session):
"""Test note sync when content already exists."""
# Create the content text the same way sync_note does
text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
sha256 = create_content_hash(text)
# Add existing note with same content hash but different filename to avoid file conflicts
existing_note = Note(
subject="Existing Note",
content=mock_note_data["content"],
sha256=sha256,
modality="note",
tags=["existing"],
mime_type="text/markdown",
size=len(text.encode("utf-8")),
embed_status="RAW",
filename="test_note.md",
)
db_session.add(existing_note)
db_session.commit()
result = notes.sync_note(
**{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
)
assert result == {
"note_id": existing_note.id,
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"title": "blee",
"content_length": 19,
}
# Verify no duplicate was created
assert len(db_session.query(Note).all()) == 1
db_session.refresh(existing_note)
assert existing_note.content == "bla bla bla" # type: ignore
@pytest.mark.parametrize(
"note_type,confidence,tags",
[
@ -187,7 +251,17 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
assert note.note_type == note_type
assert float(note.confidence) == confidence # Convert Decimal to float
assert note.tags == tags
assert result["status"] == "processed"
# Updated to match actual return format
text = f"# Test Note {note_type}\n\nTest content for parameter testing"
assert result == {
"note_id": note.id,
"title": f"Test Note {note_type}",
"status": "processed",
"chunks_count": 1,
"embed_status": "STORED",
"content_length": len(text.encode("utf-8")),
}
def test_sync_note_content_hash_consistency(db_session):