synch people

This commit is contained in:
mruwnik 2025-12-24 14:38:14 +00:00
parent 47629fc5fb
commit 5d79fa349e
9 changed files with 738 additions and 19 deletions

View File

@ -71,6 +71,7 @@ SYNC_GITHUB_ITEM = f"{GITHUB_ROOT}.sync_github_item"
# People tasks # People tasks
SYNC_PERSON = f"{PEOPLE_ROOT}.sync_person" SYNC_PERSON = f"{PEOPLE_ROOT}.sync_person"
UPDATE_PERSON = f"{PEOPLE_ROOT}.update_person" UPDATE_PERSON = f"{PEOPLE_ROOT}.update_person"
SYNC_PROFILE_FROM_FILE = f"{PEOPLE_ROOT}.sync_profile_from_file"
def get_broker_url() -> str: def get_broker_url() -> str:

View File

@ -2,8 +2,10 @@
Database models for tracking people. Database models for tracking people.
""" """
import re
from typing import Annotated, Sequence, cast from typing import Annotated, Sequence, cast
import yaml
from sqlalchemy import ( from sqlalchemy import (
ARRAY, ARRAY,
BigInteger, BigInteger,
@ -15,6 +17,7 @@ from sqlalchemy import (
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
import memory.common.extract as extract import memory.common.extract as extract
from memory.common import settings
from memory.common.db.models.source_item import ( from memory.common.db.models.source_item import (
SourceItem, SourceItem,
@ -93,3 +96,70 @@ class Person(SourceItem):
@classmethod @classmethod
def get_collections(cls) -> list[str]: def get_collections(cls) -> list[str]:
return ["person"] return ["person"]
def to_profile_markdown(self) -> str:
"""Serialize Person to markdown with YAML frontmatter."""
frontmatter = {
"identifier": self.identifier,
"display_name": self.display_name,
}
if self.aliases:
frontmatter["aliases"] = list(self.aliases)
if self.contact_info:
frontmatter["contact_info"] = dict(self.contact_info)
if self.tags:
frontmatter["tags"] = list(self.tags)
yaml_str = yaml.dump(frontmatter, default_flow_style=False, allow_unicode=True)
parts = ["---", yaml_str.strip(), "---"]
if self.content:
parts.append("")
parts.append(self.content)
return "\n".join(parts)
@classmethod
def from_profile_markdown(cls, content: str) -> dict:
"""Parse profile markdown with YAML frontmatter into Person fields."""
# Match YAML frontmatter between --- delimiters
frontmatter_pattern = r"^---\s*\n(.*?)\n---\s*\n?"
match = re.match(frontmatter_pattern, content, re.DOTALL)
if not match:
# No frontmatter, return empty dict
return {"notes": content.strip() if content.strip() else None}
yaml_content = match.group(1)
body = content[match.end() :].strip()
try:
data = yaml.safe_load(yaml_content) or {}
except yaml.YAMLError:
return {"notes": content.strip() if content.strip() else None}
result = {}
if "identifier" in data:
result["identifier"] = data["identifier"]
if "display_name" in data:
result["display_name"] = data["display_name"]
if "aliases" in data:
result["aliases"] = data["aliases"]
if "contact_info" in data:
result["contact_info"] = data["contact_info"]
if "tags" in data:
result["tags"] = data["tags"]
if body:
result["notes"] = body
return result
def get_profile_path(self) -> str:
"""Get the relative path for this person's profile note."""
return f"{settings.PROFILES_FOLDER}/{self.identifier}.md"
def save_profile_note(self) -> None:
"""Save this person's data to a profile note file."""
path = settings.NOTES_STORAGE_DIR / self.get_profile_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(self.to_profile_markdown())

View File

@ -76,6 +76,7 @@ WEBPAGE_STORAGE_DIR = pathlib.Path(
NOTES_STORAGE_DIR = pathlib.Path( NOTES_STORAGE_DIR = pathlib.Path(
os.getenv("NOTES_STORAGE_DIR", FILE_STORAGE_DIR / "notes") os.getenv("NOTES_STORAGE_DIR", FILE_STORAGE_DIR / "notes")
) )
PROFILES_FOLDER = os.getenv("PROFILES_FOLDER", "profiles")
DISCORD_STORAGE_DIR = pathlib.Path( DISCORD_STORAGE_DIR = pathlib.Path(
os.getenv("DISCORD_STORAGE_DIR", FILE_STORAGE_DIR / "discord") os.getenv("DISCORD_STORAGE_DIR", FILE_STORAGE_DIR / "discord")
) )

View File

@ -142,6 +142,10 @@ def _needs_reindex(existing: GithubItem, new_data: GithubIssueData) -> bool:
if existing_fields != new_fields: if existing_fields != new_fields:
return True return True
# Check if PR is missing pr_data (needs backfill)
if new_data["kind"] == "pr" and new_data.get("pr_data") and not existing.pr_data:
return True
return False return False
@ -431,8 +435,12 @@ def sync_github_repo(repo_id: int, force_full: bool = False) -> dict[str, Any]:
@app.task(name=SYNC_ALL_GITHUB_REPOS) @app.task(name=SYNC_ALL_GITHUB_REPOS)
def sync_all_github_repos() -> list[dict[str, Any]]: def sync_all_github_repos(force_full: bool = False) -> list[dict[str, Any]]:
"""Trigger sync for all active GitHub repos.""" """Trigger sync for all active GitHub repos.
Args:
force_full: If True, re-sync all items instead of incremental sync.
"""
with make_session() as session: with make_session() as session:
active_repos = ( active_repos = (
session.query(GithubRepo) session.query(GithubRepo)
@ -445,9 +453,11 @@ def sync_all_github_repos() -> list[dict[str, Any]]:
{ {
"repo_id": repo.id, "repo_id": repo.id,
"repo_path": repo.repo_path, "repo_path": repo.repo_path,
"task_id": sync_github_repo.delay(repo.id).id, "task_id": sync_github_repo.delay(repo.id, force_full=force_full).id,
} }
for repo in active_repos for repo in active_repos
] ]
logger.info(f"Scheduled sync for {len(results)} active GitHub repos") logger.info(
f"Scheduled {'full' if force_full else 'incremental'} sync for {len(results)} active GitHub repos"
)
return results return results

View File

@ -13,6 +13,7 @@ from memory.common.celery_app import (
SYNC_NOTES, SYNC_NOTES,
SETUP_GIT_NOTES, SETUP_GIT_NOTES,
TRACK_GIT_CHANGES, TRACK_GIT_CHANGES,
SYNC_PROFILE_FROM_FILE,
) )
from memory.workers.tasks.content_processing import ( from memory.workers.tasks.content_processing import (
check_content_exists, check_content_exists,
@ -149,20 +150,44 @@ def sync_notes(folder: str):
logger.info(f"Syncing notes from {folder}") logger.info(f"Syncing notes from {folder}")
new_notes = 0 new_notes = 0
new_profiles = 0
all_files = list(path.rglob("*.md")) all_files = list(path.rglob("*.md"))
# Import here to avoid circular imports
from memory.common.db.models import Person
from memory.workers.tasks.people import sync_profile_from_file
with make_session() as session: with make_session() as session:
for filename in all_files: for filename in all_files:
if not check_content_exists(session, Note, filename=filename.as_posix()): relative_path = filename.relative_to(path).as_posix()
new_notes += 1
sync_note.delay( # Check if this is a profile file
subject=filename.stem, if relative_path.startswith(f"{settings.PROFILES_FOLDER}/"):
content=filename.read_text(), # Check if person already exists
filename=filename.relative_to(path).as_posix(), identifier = filename.stem
existing = (
session.query(Person)
.filter(Person.identifier == identifier)
.first()
) )
if not existing:
new_profiles += 1
sync_profile_from_file.delay(relative_path)
else:
if not check_content_exists(
session, Note, filename=filename.as_posix()
):
new_notes += 1
sync_note.delay(
subject=filename.stem,
content=filename.read_text(),
filename=relative_path,
)
return { return {
"notes_num": len(all_files), "notes_num": len(all_files),
"new_notes": new_notes, "new_notes": new_notes,
"new_profiles": new_profiles,
} }
@ -233,12 +258,20 @@ def track_git_changes():
if not file.exists(): if not file.exists():
logger.warning(f"File not found: {filename}") logger.warning(f"File not found: {filename}")
continue continue
sync_note.delay(
subject=file.stem, # Check if this is a profile file
content=file.read_text(), if filename.startswith(f"{settings.PROFILES_FOLDER}/"):
filename=filename, # Import here to avoid circular imports
save_to_file=False, from memory.workers.tasks.people import sync_profile_from_file
)
sync_profile_from_file.delay(filename)
else:
sync_note.delay(
subject=file.stem,
content=file.read_text(),
filename=filename,
save_to_file=False,
)
return { return {
"status": "success", "status": "success",

View File

@ -4,9 +4,10 @@ Celery tasks for tracking people.
import logging import logging
from memory.common import settings
from memory.common.db.connection import make_session from memory.common.db.connection import make_session
from memory.common.db.models import Person from memory.common.db.models import Person
from memory.common.celery_app import app, SYNC_PERSON, UPDATE_PERSON from memory.common.celery_app import app, SYNC_PERSON, UPDATE_PERSON, SYNC_PROFILE_FROM_FILE
from memory.workers.tasks.content_processing import ( from memory.workers.tasks.content_processing import (
check_content_exists, check_content_exists,
create_content_hash, create_content_hash,
@ -14,6 +15,7 @@ from memory.workers.tasks.content_processing import (
process_content_item, process_content_item,
safe_task_execution, safe_task_execution,
) )
from memory.workers.tasks.notes import git_tracking
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -29,6 +31,25 @@ def _deep_merge(base: dict, updates: dict) -> dict:
return result return result
def _save_profile_note(person_id: int, save_to_file: bool = True) -> None:
"""Save person data to profile note file with git tracking."""
if not save_to_file:
return
with make_session() as session:
person = session.get(Person, person_id)
if not person:
logger.warning(f"Person not found for profile save: {person_id}")
return
profile_path = person.get_profile_path()
with git_tracking(
settings.NOTES_STORAGE_DIR,
f"Sync profile {profile_path}: {person.display_name}",
):
person.save_profile_note()
@app.task(name=SYNC_PERSON) @app.task(name=SYNC_PERSON)
@safe_task_execution @safe_task_execution
def sync_person( def sync_person(
@ -38,6 +59,7 @@ def sync_person(
contact_info: dict | None = None, contact_info: dict | None = None,
tags: list[str] | None = None, tags: list[str] | None = None,
notes: str | None = None, notes: str | None = None,
save_to_file: bool = True,
): ):
""" """
Create or update a person in the knowledge base. Create or update a person in the knowledge base.
@ -49,6 +71,7 @@ def sync_person(
contact_info: Contact information dict contact_info: Contact information dict
tags: Categorization tags tags: Categorization tags
notes: Free-form notes about the person notes: Free-form notes about the person
save_to_file: Whether to save to profile note file (default True)
""" """
logger.info(f"Syncing person: {identifier}") logger.info(f"Syncing person: {identifier}")
@ -82,7 +105,13 @@ def sync_person(
size=len(notes or ""), size=len(notes or ""),
) )
return process_content_item(person, session) result = process_content_item(person, session)
# Save profile note outside transaction (git operations are slow)
if result.get("status") == "processed":
_save_profile_note(result.get("person_id"), save_to_file)
return result
@app.task(name=UPDATE_PERSON) @app.task(name=UPDATE_PERSON)
@ -95,6 +124,7 @@ def update_person(
tags: list[str] | None = None, tags: list[str] | None = None,
notes: str | None = None, notes: str | None = None,
replace_notes: bool = False, replace_notes: bool = False,
save_to_file: bool = True,
): ):
""" """
Update a person with merge semantics. Update a person with merge semantics.
@ -105,6 +135,9 @@ def update_person(
- contact_info: Deep merge with existing - contact_info: Deep merge with existing
- tags: Union with existing - tags: Union with existing
- notes: Append to existing (or replace if replace_notes=True) - notes: Append to existing (or replace if replace_notes=True)
Args:
save_to_file: Whether to save to profile note file (default True)
""" """
logger.info(f"Updating person: {identifier}") logger.info(f"Updating person: {identifier}")
@ -142,4 +175,89 @@ def update_person(
person.size = len(person.content or "") person.size = len(person.content or "")
person.embed_status = "RAW" # Re-embed with updated content person.embed_status = "RAW" # Re-embed with updated content
return process_content_item(person, session) result = process_content_item(person, session)
# Save profile note outside transaction (git operations are slow)
if result.get("status") == "processed":
_save_profile_note(result.get("person_id"), save_to_file)
return result
@app.task(name=SYNC_PROFILE_FROM_FILE)
@safe_task_execution
def sync_profile_from_file(filename: str):
"""
Sync a profile note file to a Person record.
Reads a markdown file with YAML frontmatter and creates/updates
the corresponding Person record. Does NOT save back to file
to avoid infinite loops.
Args:
filename: Relative path to the profile file (e.g., "profiles/john_doe.md")
"""
file_path = settings.NOTES_STORAGE_DIR / filename
if not file_path.exists():
logger.warning(f"Profile file not found: {filename}")
return {"status": "not_found", "filename": filename}
content = file_path.read_text()
data = Person.from_profile_markdown(content)
if "identifier" not in data:
# Try to infer identifier from filename
stem = file_path.stem # e.g., "john_doe" from "profiles/john_doe.md"
data["identifier"] = stem
if "display_name" not in data:
# Use identifier as display name if not provided
data["display_name"] = data["identifier"].replace("_", " ").title()
identifier = data["identifier"]
logger.info(f"Syncing profile from file: {filename} -> {identifier}")
with make_session() as session:
person = session.query(Person).filter(Person.identifier == identifier).first()
if person:
# Update existing person with merge semantics
if "display_name" in data:
person.display_name = data["display_name"]
if "aliases" in data:
existing_aliases = set(person.aliases or [])
new_aliases = existing_aliases | set(data["aliases"])
person.aliases = list(new_aliases)
if "contact_info" in data:
existing_contact = dict(person.contact_info or {})
person.contact_info = _deep_merge(existing_contact, data["contact_info"])
if "tags" in data:
existing_tags = set(person.tags or [])
new_tags = existing_tags | set(data["tags"])
person.tags = list(new_tags)
if "notes" in data:
# Replace notes from file (file is source of truth)
person.content = data["notes"]
person.sha256 = create_content_hash(f"person:{identifier}")
person.size = len(person.content or "")
person.embed_status = "RAW"
return process_content_item(person, session)
else:
# Create new person
sha256 = create_content_hash(f"person:{identifier}")
person = Person(
identifier=identifier,
display_name=data.get("display_name", identifier),
aliases=data.get("aliases", []),
contact_info=data.get("contact_info", {}),
tags=data.get("tags", []),
content=data.get("notes"),
modality="person",
mime_type="text/plain",
sha256=sha256,
size=len(data.get("notes") or ""),
)
return process_content_item(person, session)

View File

@ -247,3 +247,185 @@ def test_person_unique_identifier(db_session, qdrant):
with pytest.raises(Exception): # Should raise IntegrityError with pytest.raises(Exception): # Should raise IntegrityError
db_session.commit() db_session.commit()
def test_person_to_profile_markdown(person_data):
"""Test serializing Person to profile markdown."""
sha256 = create_content_hash(f"person:{person_data['identifier']}")
person = Person(**person_data, sha256=sha256, size=100)
markdown = person.to_profile_markdown()
# Should have YAML frontmatter
assert markdown.startswith("---")
assert "identifier: alice_chen" in markdown
assert "display_name: Alice Chen" in markdown
assert "aliases:" in markdown
assert "- '@alice_c'" in markdown or "- @alice_c" in markdown
assert "contact_info:" in markdown
assert "email: alice@example.com" in markdown
assert "tags:" in markdown
assert "- work" in markdown
# Should have content after frontmatter
assert "Tech lead on Platform team" in markdown
def test_person_to_profile_markdown_minimal(minimal_person_data):
"""Test serializing minimal Person to profile markdown."""
sha256 = create_content_hash(f"person:{minimal_person_data['identifier']}")
person = Person(**minimal_person_data, sha256=sha256, size=0)
markdown = person.to_profile_markdown()
assert markdown.startswith("---")
assert "identifier: bob_smith" in markdown
assert "display_name: Bob Smith" in markdown
# Should not have empty arrays/dicts in output
assert "aliases:" not in markdown or "aliases: []" not in markdown
def test_person_from_profile_markdown():
"""Test parsing profile markdown back to Person fields."""
markdown = """---
identifier: john_doe
display_name: John Doe
aliases:
- "@johnd"
- john.doe@work.com
contact_info:
email: john@example.com
phone: "555-9876"
tags:
- friend
- climbing
---
Met John at the climbing gym. Great belayer."""
data = Person.from_profile_markdown(markdown)
assert data["identifier"] == "john_doe"
assert data["display_name"] == "John Doe"
assert data["aliases"] == ["@johnd", "john.doe@work.com"]
assert data["contact_info"]["email"] == "john@example.com"
assert data["contact_info"]["phone"] == "555-9876"
assert data["tags"] == ["friend", "climbing"]
assert "Met John at the climbing gym" in data["notes"]
def test_person_from_profile_markdown_no_frontmatter():
"""Test parsing markdown without frontmatter."""
markdown = "Just some notes about a person."
data = Person.from_profile_markdown(markdown)
assert data["notes"] == "Just some notes about a person."
assert "identifier" not in data
def test_person_from_profile_markdown_empty_body():
"""Test parsing markdown with frontmatter but no body."""
markdown = """---
identifier: jane_smith
display_name: Jane Smith
---
"""
data = Person.from_profile_markdown(markdown)
assert data["identifier"] == "jane_smith"
assert data["display_name"] == "Jane Smith"
assert "notes" not in data or data.get("notes") is None
def test_person_profile_roundtrip(person_data):
"""Test that Person -> markdown -> dict preserves data."""
sha256 = create_content_hash(f"person:{person_data['identifier']}")
person = Person(**person_data, sha256=sha256, size=100)
markdown = person.to_profile_markdown()
data = Person.from_profile_markdown(markdown)
assert data["identifier"] == person.identifier
assert data["display_name"] == person.display_name
assert set(data["aliases"]) == set(person.aliases)
assert data["contact_info"] == person.contact_info
assert set(data["tags"]) == set(person.tags)
assert data["notes"] == person.content
def test_person_get_profile_path():
"""Test getting the profile path for a person."""
sha256 = create_content_hash("person:test_user")
person = Person(
identifier="test_user",
display_name="Test User",
modality="person",
sha256=sha256,
size=0,
)
path = person.get_profile_path()
# Should be in profiles folder with .md extension
assert path.endswith(".md")
assert "test_user" in path
assert "/" in path # Should have folder separator
def test_person_save_profile_note(tmp_path):
"""Test saving Person data to a profile note file."""
from unittest.mock import patch
sha256 = create_content_hash("person:file_test_user")
person = Person(
identifier="file_test_user",
display_name="File Test User",
aliases=["@filetest"],
contact_info={"email": "filetest@example.com"},
tags=["test"],
content="Test notes content.",
modality="person",
sha256=sha256,
size=20,
)
with patch("memory.common.settings.NOTES_STORAGE_DIR", tmp_path):
person.save_profile_note()
# Verify file was created
profile_path = tmp_path / "profiles" / "file_test_user.md"
assert profile_path.exists()
# Verify content
content = profile_path.read_text()
assert "identifier: file_test_user" in content
assert "display_name: File Test User" in content
assert "@filetest" in content
assert "email: filetest@example.com" in content
assert "Test notes content." in content
def test_person_save_profile_note_creates_directory(tmp_path):
"""Test that save_profile_note creates the profiles directory if needed."""
from unittest.mock import patch
sha256 = create_content_hash("person:dir_test_user")
person = Person(
identifier="dir_test_user",
display_name="Dir Test User",
modality="person",
sha256=sha256,
size=0,
)
# profiles directory doesn't exist yet
profiles_dir = tmp_path / "profiles"
assert not profiles_dir.exists()
with patch("memory.common.settings.NOTES_STORAGE_DIR", tmp_path):
person.save_profile_note()
# Directory should now exist
assert profiles_dir.exists()
assert (profiles_dir / "dir_test_user.md").exists()

View File

@ -980,3 +980,172 @@ def test_track_git_changes_logging(
# Verify logging for changes scenario # Verify logging for changes scenario
mock_logger.info.assert_any_call("Tracking git changes") mock_logger.info.assert_any_call("Tracking git changes")
mock_logger.info.assert_any_call("Changed files: ['test.md']") mock_logger.info.assert_any_call("Changed files: ['test.md']")
# Profile handling tests
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.people.sync_profile_from_file")
def test_sync_notes_routes_profiles_to_sync_profile_from_file(
mock_sync_profile, mock_sync_note, db_session, tmp_path
):
"""Test that sync_notes routes profile files to sync_profile_from_file."""
from unittest.mock import Mock
# Create notes dir with profile and regular notes
notes_dir = tmp_path / "notes"
notes_dir.mkdir(parents=True, exist_ok=True)
# Create regular note
regular_note = notes_dir / "regular_note.md"
regular_note.write_text("Regular note content")
# Create profiles directory with profile file
profiles_dir = notes_dir / "profiles"
profiles_dir.mkdir(exist_ok=True)
profile_file = profiles_dir / "john_doe.md"
profile_file.write_text(
"""---
identifier: john_doe
display_name: John Doe
---
Profile notes."""
)
mock_sync_note.delay.return_value = Mock(id="task-note")
mock_sync_profile.delay.return_value = Mock(id="task-profile")
with patch("memory.common.settings.NOTES_STORAGE_DIR", notes_dir):
with patch("memory.common.settings.PROFILES_FOLDER", "profiles"):
result = notes.sync_notes(str(notes_dir))
# Should have found 2 files total
assert result["notes_num"] == 2
# Regular note should go to sync_note
assert mock_sync_note.delay.call_count == 1
note_call_args = mock_sync_note.delay.call_args
assert note_call_args[1]["subject"] == "regular_note"
# Profile should go to sync_profile_from_file
assert mock_sync_profile.delay.call_count == 1
profile_call_args = mock_sync_profile.delay.call_args
assert "profiles/john_doe.md" in profile_call_args[0][0]
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.people.sync_profile_from_file")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
def test_track_git_changes_routes_profiles_to_sync_profile_from_file(
mock_check_git, mock_git_command, mock_sync_profile, mock_sync_note, tmp_path
):
"""Test that track_git_changes routes profile files to sync_profile_from_file."""
from unittest.mock import Mock
# Create notes dir structure
notes_dir = tmp_path / "notes"
notes_dir.mkdir(parents=True, exist_ok=True)
(notes_dir / ".git").mkdir() # Fake git repo
# Create regular note and profile file
regular_note = notes_dir / "regular_note.md"
regular_note.write_text("Regular note content")
profiles_dir = notes_dir / "profiles"
profiles_dir.mkdir(exist_ok=True)
profile_file = profiles_dir / "jane_doe.md"
profile_file.write_text(
"""---
identifier: jane_doe
display_name: Jane Doe
---
Jane's notes."""
)
# Mock git commands to return both files as changed
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit
]
mock_git_command.side_effect = [
Mock(), # pull command
Mock(
returncode=0, stdout="regular_note.md\nprofiles/jane_doe.md\n"
), # diff command
]
mock_sync_note.delay.return_value = Mock(id="task-note")
mock_sync_profile.delay.return_value = Mock(id="task-profile")
with patch("memory.common.settings.NOTES_STORAGE_DIR", notes_dir):
with patch("memory.common.settings.PROFILES_FOLDER", "profiles"):
result = notes.track_git_changes()
assert result["status"] == "success"
assert "regular_note.md" in result["changed_files"]
assert "profiles/jane_doe.md" in result["changed_files"]
# Regular note should go to sync_note
assert mock_sync_note.delay.call_count == 1
note_call_args = mock_sync_note.delay.call_args
assert note_call_args[1]["subject"] == "regular_note"
assert note_call_args[1]["save_to_file"] is False
# Profile should go to sync_profile_from_file
assert mock_sync_profile.delay.call_count == 1
profile_call_args = mock_sync_profile.delay.call_args
assert profile_call_args[0][0] == "profiles/jane_doe.md"
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.people.sync_profile_from_file")
def test_sync_notes_skips_existing_profiles(
mock_sync_profile, mock_sync_note, db_session, tmp_path
):
"""Test that sync_notes skips profiles that already have a Person record."""
from contextlib import contextmanager
from unittest.mock import Mock
from memory.common.db.models import Person
from memory.workers.tasks.content_processing import create_content_hash
# Create notes dir with profile
notes_dir = tmp_path / "notes"
profiles_dir = notes_dir / "profiles"
profiles_dir.mkdir(parents=True, exist_ok=True)
profile_file = profiles_dir / "existing_person.md"
profile_file.write_text("Profile content")
# Create existing Person in database
sha256 = create_content_hash("person:existing_person")
existing_person = Person(
identifier="existing_person",
display_name="Existing Person",
modality="person",
mime_type="text/plain",
sha256=sha256,
size=0,
)
db_session.add(existing_person)
db_session.commit()
mock_sync_profile.delay.return_value = Mock(id="task-profile")
@contextmanager
def _mock_session():
yield db_session
with patch("memory.workers.tasks.notes.make_session", _mock_session):
with patch("memory.common.settings.NOTES_STORAGE_DIR", notes_dir):
with patch("memory.common.settings.PROFILES_FOLDER", "profiles"):
result = notes.sync_notes(str(notes_dir))
# Should not call sync_profile_from_file for existing person
assert mock_sync_profile.delay.call_count == 0
assert result["new_profiles"] == 0

View File

@ -402,3 +402,138 @@ def test_update_person_first_notes(mock_make_session, qdrant):
assert person.content == "First notes!" assert person.content == "First notes!"
# Should not have separator when there were no previous notes # Should not have separator when there were no previous notes
assert "---" not in person.content assert "---" not in person.content
@pytest.fixture
def mock_make_session_with_file(db_session, tmp_path):
"""Mock make_session, embedding functions, and provide temp directory for files."""
@contextmanager
def _mock_session():
yield db_session
with patch("memory.workers.tasks.people.make_session", _mock_session):
with patch(
"memory.common.embedding.embed_source_item",
side_effect=lambda item: [_make_mock_chunk(item.id or 1)],
):
with patch("memory.workers.tasks.content_processing.push_to_qdrant"):
with patch("memory.common.settings.NOTES_STORAGE_DIR", tmp_path):
# Create profiles directory
(tmp_path / "profiles").mkdir(exist_ok=True)
yield db_session, tmp_path
def test_sync_profile_from_file_new_person(mock_make_session_with_file, qdrant):
"""Test syncing a new person from a profile file."""
db_session, tmp_path = mock_make_session_with_file
# Create a profile file
profile_content = """---
identifier: new_profile_person
display_name: New Profile Person
aliases:
- "@newprofile"
contact_info:
email: new@example.com
tags:
- test
---
Notes from the profile file."""
profile_path = tmp_path / "profiles" / "new_profile_person.md"
profile_path.write_text(profile_content)
result = people.sync_profile_from_file("profiles/new_profile_person.md")
assert result["status"] == "processed"
person = db_session.query(Person).filter_by(identifier="new_profile_person").first()
assert person is not None
assert person.display_name == "New Profile Person"
assert "@newprofile" in person.aliases
assert person.contact_info["email"] == "new@example.com"
assert "test" in person.tags
assert "Notes from the profile file" in person.content
def test_sync_profile_from_file_update_existing(mock_make_session_with_file, qdrant):
"""Test syncing updates to an existing person from a profile file."""
db_session, tmp_path = mock_make_session_with_file
# Create person first
people.sync_person(
identifier="existing_profile_person",
display_name="Old Name",
aliases=["@old_alias"],
tags=["old_tag"],
notes="Old notes.",
save_to_file=False,
)
# Create updated profile file
profile_content = """---
identifier: existing_profile_person
display_name: New Name
aliases:
- "@new_alias"
contact_info:
twitter: "@updated"
tags:
- new_tag
---
New notes from file."""
profile_path = tmp_path / "profiles" / "existing_profile_person.md"
profile_path.write_text(profile_content)
result = people.sync_profile_from_file("profiles/existing_profile_person.md")
assert result["status"] == "processed"
person = db_session.query(Person).filter_by(identifier="existing_profile_person").first()
assert person.display_name == "New Name"
# Aliases should be merged
assert "@old_alias" in person.aliases
assert "@new_alias" in person.aliases
# Tags should be merged
assert "old_tag" in person.tags
assert "new_tag" in person.tags
# Contact info should be merged
assert person.contact_info["twitter"] == "@updated"
# Notes should be replaced (file is source of truth)
assert person.content == "New notes from file."
def test_sync_profile_from_file_not_found(mock_make_session_with_file, qdrant):
"""Test syncing a profile file that doesn't exist."""
db_session, tmp_path = mock_make_session_with_file
result = people.sync_profile_from_file("profiles/nonexistent.md")
assert result["status"] == "not_found"
def test_sync_profile_from_file_infer_identifier(mock_make_session_with_file, qdrant):
"""Test that identifier is inferred from filename if not in frontmatter."""
db_session, tmp_path = mock_make_session_with_file
# Create profile without identifier in frontmatter
profile_content = """---
display_name: Inferred Person
---
Notes."""
profile_path = tmp_path / "profiles" / "inferred_person.md"
profile_path.write_text(profile_content)
result = people.sync_profile_from_file("profiles/inferred_person.md")
assert result["status"] == "processed"
person = db_session.query(Person).filter_by(identifier="inferred_person").first()
assert person is not None
assert person.display_name == "Inferred Person"