Compare commits

...

2 Commits

Author SHA1 Message Date
Daniel O'Connell
f241110d38 proper notes path 2025-07-05 22:38:41 +00:00
Daniel O'Connell
288c2995e5 synch notes 2025-07-05 23:58:47 +02:00
5 changed files with 604 additions and 6 deletions

View File

@ -15,6 +15,7 @@ OBSERVATIONS_ROOT = "memory.workers.tasks.observations"
SYNC_NOTES = f"{NOTES_ROOT}.sync_notes"
SYNC_NOTE = f"{NOTES_ROOT}.sync_note"
SETUP_GIT_NOTES = f"{NOTES_ROOT}.setup_git_notes"
TRACK_GIT_CHANGES = f"{NOTES_ROOT}.track_git_changes"
SYNC_OBSERVATION = f"{OBSERVATIONS_ROOT}.sync_observation"
SYNC_ALL_COMICS = f"{COMIC_ROOT}.sync_all_comics"
SYNC_SMBC = f"{COMIC_ROOT}.sync_smbc"

View File

@ -105,6 +105,7 @@ COMIC_SYNC_INTERVAL = int(os.getenv("COMIC_SYNC_INTERVAL", 60 * 60 * 24))
ARTICLE_FEED_SYNC_INTERVAL = int(os.getenv("ARTICLE_FEED_SYNC_INTERVAL", 30 * 60))
CLEAN_COLLECTION_INTERVAL = int(os.getenv("CLEAN_COLLECTION_INTERVAL", 24 * 60 * 60))
CHUNK_REINGEST_INTERVAL = int(os.getenv("CHUNK_REINGEST_INTERVAL", 60 * 60))
NOTES_SYNC_INTERVAL = int(os.getenv("NOTES_SYNC_INTERVAL", 15 * 60))
CHUNK_REINGEST_SINCE_MINUTES = int(os.getenv("CHUNK_REINGEST_SINCE_MINUTES", 60 * 24))

View File

@ -7,6 +7,7 @@ from memory.common.celery_app import (
REINGEST_MISSING_CHUNKS,
SYNC_ALL_COMICS,
SYNC_ALL_ARTICLE_FEEDS,
TRACK_GIT_CHANGES,
)
logger = logging.getLogger(__name__)
@ -33,4 +34,8 @@ app.conf.beat_schedule = {
"task": SYNC_ALL_ARTICLE_FEEDS,
"schedule": settings.ARTICLE_FEED_SYNC_INTERVAL,
},
"sync-notes-changes": {
"task": TRACK_GIT_CHANGES,
"schedule": settings.NOTES_SYNC_INTERVAL,
},
}

View File

@ -7,7 +7,13 @@ import shlex
from memory.common import settings
from memory.common.db.connection import make_session
from memory.common.db.models import Note
from memory.common.celery_app import app, SYNC_NOTE, SYNC_NOTES, SETUP_GIT_NOTES
from memory.common.celery_app import (
app,
SYNC_NOTE,
SYNC_NOTES,
SETUP_GIT_NOTES,
TRACK_GIT_CHANGES,
)
from memory.workers.tasks.content_processing import (
check_content_exists,
create_content_hash,
@ -41,6 +47,22 @@ def git_command(repo_root: pathlib.Path, *args: str, force: bool = False):
return res
def check_git_command(repo_root: pathlib.Path, *args: str, force: bool = False):
res = git_command(repo_root, *args, force=force)
if not res:
raise RuntimeError(f"`{' '.join(args)}` failed")
if res.returncode != 0:
logger.error(f"Git command failed: {res.returncode}")
logger.error(f"stderr: {res.stderr}")
if res.stdout:
logger.error(f"stdout: {res.stdout}")
raise RuntimeError(
f"`{' '.join(args)}` failed with return code {res.returncode}"
)
return res.stdout.strip()
@contextlib.contextmanager
def git_tracking(repo_root: pathlib.Path, commit_message: str = "Sync note"):
git_command(repo_root, "fetch")
@ -63,6 +85,7 @@ def sync_note(
note_type: str | None = None,
confidences: dict[str, float] = {},
tags: list[str] = [],
save_to_file: bool = True,
):
logger.info(f"Syncing note {subject}")
text = Note.as_text(content, subject)
@ -101,10 +124,11 @@ def sync_note(
note.tags = tags # type: ignore
note.update_confidences(confidences)
with git_tracking(
settings.NOTES_STORAGE_DIR, f"Sync note {filename}: {subject}"
):
note.save_to_file()
if save_to_file:
with git_tracking(
settings.NOTES_STORAGE_DIR, f"Sync note {filename}: {subject}"
):
note.save_to_file()
return process_content_item(note, session)
@ -123,7 +147,7 @@ def sync_notes(folder: str):
sync_note.delay(
subject=filename.stem,
content=filename.read_text(),
filename=filename.as_posix(),
filename=filename.relative_to(path).as_posix(),
)
return {
@ -148,3 +172,65 @@ def setup_git_notes(origin: str, email: str, name: str):
git_command(settings.NOTES_STORAGE_DIR, "commit", "-m", "Initial commit")
git_command(settings.NOTES_STORAGE_DIR, "push", "-u", "origin", "main")
return {"status": "success"}
@app.task(name=TRACK_GIT_CHANGES)
@safe_task_execution
def track_git_changes():
"""Track git changes by noting current commit, pulling new commits, and listing changed files."""
logger.info("Tracking git changes")
repo_root = settings.NOTES_STORAGE_DIR
if not (repo_root / ".git").exists():
logger.warning("Git repository not found")
return {"status": "no_git_repo"}
current_branch = check_git_command(repo_root, "rev-parse", "--abbrev-ref", "HEAD")
current_commit = check_git_command(repo_root, "rev-parse", "HEAD")
check_git_command(repo_root, "fetch", "origin")
git_command(repo_root, "pull", "origin", current_branch)
latest_commit = check_git_command(
repo_root, "rev-parse", f"origin/{current_branch}"
)
# Check if there are any changes
if current_commit == latest_commit:
logger.info("No new changes")
return {
"status": "no_changes",
"current_commit": current_commit,
"latest_commit": latest_commit,
"changed_files": [],
}
# Get list of changed files between current and latest commit
diff_result = git_command(
repo_root, "diff", "--name-only", f"{current_commit}..{latest_commit}"
)
if diff_result and diff_result.returncode == 0:
changed_files = [
f.strip() for f in diff_result.stdout.strip().split("\n") if f.strip()
]
logger.info(f"Changed files: {changed_files}")
else:
logger.error("Failed to get changed files")
return {"status": "error", "error": "Failed to get changed files"}
for filename in changed_files:
file = settings.NOTES_STORAGE_DIR / filename
if not file.exists():
logger.warning(f"File not found: {filename}")
continue
sync_note.delay(
subject=file.stem,
content=file.read_text(),
filename=filename,
save_to_file=False,
)
return {
"status": "success",
"current_commit": current_commit,
"latest_commit": latest_commit,
"changed_files": changed_files,
}

View File

@ -475,3 +475,508 @@ def test_sync_notes_handles_file_read_errors(mock_sync_note, db_session):
# Should catch the error and return error status
assert result["status"] == "error"
assert "File read error" in result["error"]
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_success(mock_git_command):
"""Test check_git_command with successful git command execution."""
# Mock successful git command
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = " main \n" # Test that it strips whitespace
mock_result.stderr = ""
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
result = notes.check_git_command(repo_root, "branch", "--show-current")
assert result == "main"
mock_git_command.assert_called_once_with(
repo_root, "branch", "--show-current", force=False
)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_with_force(mock_git_command):
"""Test check_git_command with force=True parameter."""
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = "output"
mock_result.stderr = ""
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
result = notes.check_git_command(repo_root, "status", force=True)
assert result == "output"
mock_git_command.assert_called_once_with(repo_root, "status", force=True)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_no_git_repo(mock_git_command):
"""Test check_git_command when git_command returns None (no git repo)."""
mock_git_command.return_value = None
repo_root = pathlib.Path("/test/repo")
with pytest.raises(RuntimeError, match=r"`status` failed"):
notes.check_git_command(repo_root, "status")
mock_git_command.assert_called_once_with(repo_root, "status", force=False)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_git_failure(mock_git_command):
"""Test check_git_command when git command fails with non-zero return code."""
mock_result = Mock()
mock_result.returncode = 1
mock_result.stdout = "fatal: not a git repository"
mock_result.stderr = "error: unknown command"
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
with pytest.raises(
RuntimeError, match=r"`branch --invalid` failed with return code 1"
):
notes.check_git_command(repo_root, "branch", "--invalid")
mock_git_command.assert_called_once_with(
repo_root, "branch", "--invalid", force=False
)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_multiple_args(mock_git_command):
"""Test check_git_command with multiple arguments."""
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = "commit-hash"
mock_result.stderr = ""
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
result = notes.check_git_command(repo_root, "rev-parse", "--short", "HEAD")
assert result == "commit-hash"
mock_git_command.assert_called_once_with(
repo_root, "rev-parse", "--short", "HEAD", force=False
)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_empty_stdout(mock_git_command):
"""Test check_git_command when git command succeeds but returns empty stdout."""
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = ""
mock_result.stderr = ""
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
result = notes.check_git_command(repo_root, "diff", "--exit-code")
assert result == ""
mock_git_command.assert_called_once_with(
repo_root, "diff", "--exit-code", force=False
)
@patch("memory.workers.tasks.notes.git_command")
def test_check_git_command_whitespace_handling(mock_git_command):
"""Test check_git_command properly strips whitespace from stdout."""
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = "\n\n some output with spaces \n\n"
mock_result.stderr = ""
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
result = notes.check_git_command(repo_root, "log", "--oneline", "-1")
assert result == "some output with spaces"
mock_git_command.assert_called_once_with(
repo_root, "log", "--oneline", "-1", force=False
)
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.logger")
def test_check_git_command_logs_errors(mock_logger, mock_git_command):
"""Test check_git_command logs error details when git command fails."""
mock_result = Mock()
mock_result.returncode = 128
mock_result.stdout = "some output"
mock_result.stderr = "fatal: repository not found"
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
with pytest.raises(RuntimeError):
notes.check_git_command(repo_root, "clone", "invalid-url")
# Verify error logging
mock_logger.error.assert_any_call("Git command failed: 128")
mock_logger.error.assert_any_call("stderr: fatal: repository not found")
mock_logger.error.assert_any_call("stdout: some output")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.logger")
def test_check_git_command_logs_errors_no_stdout(mock_logger, mock_git_command):
"""Test check_git_command logs appropriately when there's no stdout."""
mock_result = Mock()
mock_result.returncode = 1
mock_result.stdout = ""
mock_result.stderr = "error: command failed"
mock_git_command.return_value = mock_result
repo_root = pathlib.Path("/test/repo")
with pytest.raises(RuntimeError):
notes.check_git_command(repo_root, "invalid-command")
# Verify error logging - should not log stdout when empty
mock_logger.error.assert_any_call("Git command failed: 1")
mock_logger.error.assert_any_call("stderr: error: command failed")
# stdout logging should not have been called since stdout is empty
stdout_calls = [
call for call in mock_logger.error.call_args_list if "stdout:" in str(call)
]
assert len(stdout_calls) == 0
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_no_git_repo(mock_settings):
"""Test track_git_changes when no git repository exists."""
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = False
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
result = notes.track_git_changes()
assert result == {"status": "no_git_repo"}
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_no_changes(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes when there are no new changes."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands to return same commit hash
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin (no return needed)
"abc123", # latest commit (same as current)
]
mock_git_command.return_value = Mock() # pull command
result = notes.track_git_changes()
assert result == {
"status": "no_changes",
"current_commit": "abc123",
"latest_commit": "abc123",
"changed_files": [],
}
# Should not call sync_note when no changes
mock_sync_note.delay.assert_not_called()
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_diff_failure(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes when diff command fails."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit (different from current)
]
# Mock pull command success, diff command failure
mock_git_command.side_effect = [
Mock(), # pull command
Mock(returncode=1, stdout="", stderr="diff failed"), # diff command fails
]
result = notes.track_git_changes()
assert result == {
"status": "error",
"error": "Failed to get changed files",
}
# Should not call sync_note when diff fails
mock_sync_note.delay.assert_not_called()
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_diff_returns_none(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes when diff command returns None."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit (different from current)
]
# Mock pull command success, diff command returns None
mock_git_command.side_effect = [
Mock(), # pull command
None, # diff command returns None
]
result = notes.track_git_changes()
assert result == {
"status": "error",
"error": "Failed to get changed files",
}
# Should not call sync_note when diff returns None
mock_sync_note.delay.assert_not_called()
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_empty_diff(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes when diff returns empty (no actual file changes)."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit (different from current)
]
# Mock pull command success, diff command returns empty
mock_git_command.side_effect = [
Mock(), # pull command
Mock(returncode=0, stdout=""), # diff command returns empty
]
result = notes.track_git_changes()
assert result == {
"status": "success",
"current_commit": "abc123",
"latest_commit": "def456",
"changed_files": [],
}
# Should not call sync_note when no files changed
mock_sync_note.delay.assert_not_called()
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_whitespace_in_filenames(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes handles whitespace in filenames correctly."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit (different from current)
]
# Mock diff with whitespace and empty lines
mock_git_command.side_effect = [
Mock(), # pull command
Mock(
returncode=0, stdout=" file1.md \n\n file2.md \n\n"
), # diff with whitespace
]
# Mock file reading
mock_file1 = Mock()
mock_file1.stem = "file1"
mock_file1.read_text.return_value = "Content 1"
mock_file1.as_posix.return_value = "file1.md"
mock_file2 = Mock()
mock_file2.stem = "file2"
mock_file2.read_text.return_value = "Content 2"
mock_file2.as_posix.return_value = "file2.md"
with patch("memory.workers.tasks.notes.pathlib.Path") as mock_path:
mock_path.side_effect = [mock_file1, mock_file2]
result = notes.track_git_changes()
assert result == {
"status": "success",
"current_commit": "abc123",
"latest_commit": "def456",
"changed_files": ["file1.md", "file2.md"],
}
# Should call sync_note for each non-empty file
assert mock_sync_note.delay.call_count == 2
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
def test_track_git_changes_feature_branch(
mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes works with feature branches."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Mock git commands for feature branch
mock_check_git.side_effect = [
"feature/notes-sync", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit from origin/feature/notes-sync
]
mock_git_command.side_effect = [
Mock(), # pull origin feature/notes-sync
Mock(returncode=0, stdout="feature_file.md\n"), # diff command
]
# Mock file reading
mock_file = Mock()
mock_file.stem = "feature_file"
mock_file.read_text.return_value = "Feature content"
mock_file.as_posix.return_value = "feature_file.md"
with patch("memory.workers.tasks.notes.pathlib.Path") as mock_path:
mock_path.return_value = mock_file
result = notes.track_git_changes()
assert result == {
"status": "success",
"current_commit": "abc123",
"latest_commit": "def456",
"changed_files": ["feature_file.md"],
}
# Verify correct branch was used in git commands
mock_git_command.assert_any_call(
mock_repo_root, "pull", "origin", "feature/notes-sync"
)
mock_check_git.assert_any_call(
mock_repo_root, "rev-parse", "origin/feature/notes-sync"
)
@patch("memory.workers.tasks.notes.sync_note")
@patch("memory.workers.tasks.notes.git_command")
@patch("memory.workers.tasks.notes.check_git_command")
@patch("memory.workers.tasks.notes.settings")
@patch("memory.workers.tasks.notes.logger")
def test_track_git_changes_logging(
mock_logger, mock_settings, mock_check_git, mock_git_command, mock_sync_note
):
"""Test track_git_changes logs appropriately."""
# Mock git repo exists
mock_repo_root = Mock()
mock_repo_root.__truediv__ = Mock(return_value=Mock())
mock_repo_root.__truediv__.return_value.exists.return_value = True
mock_settings.NOTES_STORAGE_DIR = mock_repo_root
# Test no changes scenario
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"abc123", # latest commit (same as current)
]
mock_git_command.return_value = Mock() # pull command
notes.track_git_changes()
# Verify logging
mock_logger.info.assert_any_call("Tracking git changes")
mock_logger.info.assert_any_call("No new changes")
# Reset mocks for changes scenario
mock_logger.reset_mock()
mock_check_git.side_effect = [
"main", # current branch
"abc123", # current commit
None, # fetch origin
"def456", # latest commit (different)
]
mock_git_command.side_effect = [
Mock(), # pull command
Mock(returncode=0, stdout="test.md\n"), # diff command
]
mock_file = Mock()
mock_file.stem = "test"
mock_file.read_text.return_value = "Test content"
mock_file.as_posix.return_value = "test.md"
with patch("memory.workers.tasks.notes.pathlib.Path") as mock_path:
mock_path.return_value = mock_file
notes.track_git_changes()
# Verify logging for changes scenario
mock_logger.info.assert_any_call("Tracking git changes")
mock_logger.info.assert_any_call("Changed files: ['test.md']")