shorter tool descriptions + time tool

search my new confidence scores
muliple dimemnsions for confidence values
2025-10-02 05:02:35 +02:00 · 2025-06-03 13:45:38 +02:00 · 2025-06-03 13:00:49 +02:00 · 2025-06-03 12:18:20 +02:00 · 2025-06-02 22:24:19 +02:00
19 changed files with 790 additions and 856 deletions
--- a/db/migrations/versions/20250603_115642_add_confidences.py
+++ b/db/migrations/versions/20250603_115642_add_confidences.py
@ -0,0 +1,79 @@
+"""Add confidences
+
+Revision ID: 152f8b4b52e8
+Revises: ba301527a2eb
+Create Date: 2025-06-03 11:56:42.302327
+
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = "152f8b4b52e8"
+down_revision: Union[str, None] = "ba301527a2eb"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "confidence_score",
+        sa.Column("id", sa.BigInteger(), nullable=False),
+        sa.Column("source_item_id", sa.BigInteger(), nullable=False),
+        sa.Column("confidence_type", sa.Text(), nullable=False),
+        sa.Column("score", sa.Numeric(precision=3, scale=2), nullable=False),
+        sa.CheckConstraint("score >= 0.0 AND score <= 1.0", name="score_range_check"),
+        sa.ForeignKeyConstraint(
+            ["source_item_id"], ["source_item.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "source_item_id", "confidence_type", name="unique_source_confidence_type"
+        ),
+    )
+    op.create_index("confidence_score_idx", "confidence_score", ["score"], unique=False)
+    op.create_index(
+        "confidence_source_idx", "confidence_score", ["source_item_id"], unique=False
+    )
+    op.create_index(
+        "confidence_type_idx", "confidence_score", ["confidence_type"], unique=False
+    )
+    op.drop_index("agent_obs_confidence_idx", table_name="agent_observation")
+    op.drop_column("agent_observation", "confidence")
+    op.drop_index("note_confidence_idx", table_name="notes")
+    op.drop_column("notes", "confidence")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "notes",
+        sa.Column(
+            "confidence",
+            sa.NUMERIC(precision=3, scale=2),
+            server_default=sa.text("0.5"),
+            autoincrement=False,
+            nullable=False,
+        ),
+    )
+    op.create_index("note_confidence_idx", "notes", ["confidence"], unique=False)
+    op.add_column(
+        "agent_observation",
+        sa.Column(
+            "confidence",
+            sa.NUMERIC(precision=3, scale=2),
+            server_default=sa.text("0.5"),
+            autoincrement=False,
+            nullable=False,
+        ),
+    )
+    op.create_index(
+        "agent_obs_confidence_idx", "agent_observation", ["confidence"], unique=False
+    )
+    op.drop_index("confidence_type_idx", table_name="confidence_score")
+    op.drop_index("confidence_source_idx", table_name="confidence_score")
+    op.drop_index("confidence_score_idx", table_name="confidence_score")
+    op.drop_table("confidence_score")
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@ -7,6 +7,7 @@ import pathlib
 from datetime import datetime, timezone

 from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel
 from sqlalchemy import Text, func
 from sqlalchemy import cast as sql_cast
 from sqlalchemy.dialects.postgresql import ARRAY
@ -70,29 +71,17 @@ def filter_source_ids(
    return source_ids


+@mcp.tool()
+async def get_current_time() -> dict:
+    """Get the current time in UTC."""
+    return {"current_time": datetime.now(timezone.utc).isoformat()}
+
+
@mcp.tool()
 async def get_all_tags() -> list[str]:
    """
    Get all unique tags used across the entire knowledge base.
-
-    Purpose:
-        This tool retrieves all tags that have been used in the system, both from
-        AI observations (created with 'observe') and other content. Use it to
-        understand the tag taxonomy, ensure consistency, or discover related topics.
-
-    When to use:
-        - Before creating new observations, to use consistent tag naming
-        - To explore what topics/contexts have been tracked
-        - To build tag filters for search operations
-        - To understand the user's areas of interest
-        - For tag autocomplete or suggestion features
-
-    Returns:
-        Sorted list of all unique tags in the system. Tags follow patterns like:
-        - Topics: "machine-learning", "functional-programming"
-        - Projects: "project:website-redesign"
-        - Contexts: "context:work", "context:late-night"
-        - Domains: "domain:finance"
+    Returns sorted list of tags from both observations and content.
    """
    with make_session() as session:
        tags_query = session.query(func.unnest(SourceItem.tags)).distinct()
@ -103,27 +92,7 @@ async def get_all_tags() -> list[str]:
 async def get_all_subjects() -> list[str]:
    """
    Get all unique subjects from observations about the user.
-
-    Purpose:
-        This tool retrieves all subject identifiers that have been used in
-        observations (created with 'observe'). Subjects are the consistent
-        identifiers for what observations are about. Use this to understand
-        what aspects of the user have been tracked and ensure consistency.
-
-    When to use:
-        - Before creating new observations, to use existing subject names
-        - To discover what aspects of the user have been observed
-        - To build subject filters for targeted searches
-        - To ensure consistent naming across observations
-        - To get an overview of the user model
-
-    Returns:
-        Sorted list of all unique subjects. Common patterns include:
-        - "programming_style", "programming_philosophy"
-        - "work_habits", "work_schedule"
-        - "ai_beliefs", "ai_safety_beliefs"
-        - "learning_preferences"
-        - "communication_style"
+    Returns sorted list of subject identifiers used in observations.
    """
    with make_session() as session:
        return sorted(
@ -134,29 +103,8 @@ async def get_all_subjects() -> list[str]:
@mcp.tool()
 async def get_all_observation_types() -> list[str]:
    """
-    Get all unique observation types that have been used.
-
-    Purpose:
-        This tool retrieves the distinct observation types that have been recorded
-        in the system. While the standard types are predefined (belief, preference,
-        behavior, contradiction, general), this shows what's actually been used.
-        Helpful for understanding the distribution of observation types.
-
-    When to use:
-        - To see what types of observations have been made
-        - To understand the balance of different observation types
-        - To check if all standard types are being utilized
-        - For analytics or reporting on observation patterns
-
-    Standard types:
-        - "belief": Opinions or beliefs the user holds
-        - "preference": Things they prefer or favor
-        - "behavior": Patterns in how they act or work
-        - "contradiction": Noted inconsistencies
-        - "general": Observations that don't fit other categories
-
-    Returns:
-        List of observation types that have actually been used in the system.
+    Get all observation types that have been used.
+    Standard types are belief, preference, behavior, contradiction, general, but there can be more.
    """
    with make_session() as session:
        return sorted(
@ -177,110 +125,19 @@ async def search_knowledge_base(
    limit: int = 10,
 ) -> list[dict]:
    """
-    Search through the user's stored knowledge and content.
-
-    Purpose:
-        This tool searches the user's personal knowledge base - a collection of
-        their saved content including emails, documents, blog posts, books, and
-        more. Use this alongside 'search_observations' to build a complete picture:
-        - search_knowledge_base: Finds user's actual content and information
-        - search_observations: Finds AI-generated insights about the user
-        Together they enable deeply personalized, context-aware assistance.
-
-    When to use:
-        - User asks about something they've read/written/received
-        - You need to find specific content the user has saved
-        - User references a document, email, or article
-        - To provide quotes or information from user's sources
-        - To understand context from user's past communications
-        - When user says "that article about..." or similar references
-
-    How it works:
-        Uses hybrid search combining semantic understanding with keyword matching.
-        This means it finds content based on meaning AND specific terms, giving
-        you the best of both approaches. Results are ranked by relevance.
+    Search user's stored content including emails, documents, articles, books.
+    Use to find specific information the user has saved or received.
+    Combine with search_observations for complete user context.

    Args:
-        query: Natural language search query. Be descriptive about what you're
-            looking for. The search understands meaning but also values exact terms.
-            Examples:
-            - "email about project deadline from last week"
-            - "functional programming articles comparing Haskell and Scala"
-            - "that blog post about AI safety and alignment"
-            - "recipe for chocolate cake Sarah sent me"
-            Pro tip: Include both concepts and specific keywords for best results.
+        query: Natural language search query - be descriptive about what you're looking for
+        previews: Include actual content in results - when false only a snippet is returned
+        modalities: Filter by type: email, blog, book, forum, photo, comic, webpage (empty = all)
+        tags: Filter by tags - content must have at least one matching tag
+        limit: Max results (1-100)

-        previews: Whether to include content snippets in results.
-            - True: Returns preview text and image previews (useful for quick scanning)
-            - False: Returns just metadata (faster, less data)
-            Default is False.
-
-        modalities: Types of content to search. Leave empty to search all.
-            Available types:
-            - 'email': Email messages
-            - 'blog': Blog posts and articles
-            - 'book': Book sections and ebooks
-            - 'forum': Forum posts (e.g., LessWrong, Reddit)
-            - 'observation': AI observations (use search_observations instead)
-            - 'photo': Images with extracted text
-            - 'comic': Comics and graphic content
-            - 'webpage': General web pages
-            Examples:
-            - ["email"] - only emails
-            - ["blog", "forum"] - articles and forum posts
-            - [] - search everything
-
-        limit: Maximum results to return (1-100). Default 10.
-            Increase for comprehensive searches, decrease for quick lookups.
-
-    Returns:
-        List of search results ranked by relevance, each containing:
-        - id: Unique identifier for the source item
-        - score: Relevance score (0-1, higher is better)
-        - chunks: Matching content segments with metadata
-        - content: Full details including:
-            - For emails: sender, recipient, subject, date
-            - For blogs: author, title, url, publish date
-            - For books: title, author, chapter info
-            - Type-specific fields for each modality
-        - filename: Path to file if content is stored on disk
-
-    Examples:
-        # Find specific email
-        results = await search_knowledge_base(
-            query="Sarah deadline project proposal next Friday",
-            modalities=["email"],
-            previews=True,
-            limit=5
-        )
-
-        # Search for technical articles
-        results = await search_knowledge_base(
-            query="functional programming monads category theory",
-            modalities=["blog", "book"],
-            limit=20
-        )
-
-        # Find everything about a topic
-        results = await search_knowledge_base(
-            query="machine learning deployment kubernetes docker",
-            previews=True
-        )
-
-        # Quick lookup of a remembered document
-        results = await search_knowledge_base(
-            query="tax forms 2023 accountant recommendations",
-            modalities=["email"],
-            limit=3
-        )
-
-    Best practices:
-        - Include context in queries ("email from Sarah" vs just "Sarah")
-        - Use modalities to filter when you know the content type
-        - Enable previews when you need to verify content before using
-        - Combine with search_observations for complete context
-        - Higher scores (>0.7) indicate strong matches
-        - If no results, try broader queries or different phrasing
+    Returns: List of search results with id, score, chunks, content, filename
+    Higher scores (>0.7) indicate strong matches.
    """
    logger.info(f"MCP search for: {query}")

@ -302,171 +159,70 @@ async def search_knowledge_base(
        ),
    )

-    # Convert SearchResult objects to dictionaries for MCP
    return [result.model_dump() for result in results]


+class RawObservation(BaseModel):
+    subject: str
+    content: str
+    observation_type: str = "general"
+    confidences: dict[str, float] = {}
+    evidence: dict | None = None
+    tags: list[str] = []
+
+
@mcp.tool()
 async def observe(
-    content: str,
-    subject: str,
-    observation_type: str = "general",
-    confidence: float = 0.8,
-    evidence: dict | None = None,
-    tags: list[str] | None = None,
+    observations: list[RawObservation],
    session_id: str | None = None,
    agent_model: str = "unknown",
 ) -> dict:
    """
-    Record an observation about the user to build long-term understanding.
+    Record observations about the user for long-term understanding.
+    Use proactively when user expresses preferences, behaviors, beliefs, or contradictions.
+    Be specific and detailed - observations should make sense months later.

-    Purpose:
-        This tool is part of a memory system designed to help AI agents build a
-        deep, persistent understanding of users over time. Use it to record any
-        notable information about the user's preferences, beliefs, behaviors, or
-        characteristics. These observations accumulate to create a comprehensive
-        model of the user that improves future interactions.
-
-    Quick Reference:
-        # Most common patterns:
-        observe(content="User prefers X over Y because...", subject="preferences", observation_type="preference")
-        observe(content="User always/often does X when Y", subject="work_habits", observation_type="behavior")
-        observe(content="User believes/thinks X about Y", subject="beliefs_on_topic", observation_type="belief")
-        observe(content="User said X but previously said Y", subject="topic", observation_type="contradiction")
-
-    When to use:
-        - User expresses a preference or opinion
-        - You notice a behavioral pattern
-        - User reveals information about their work/life/interests
-        - You spot a contradiction with previous statements
-        - Any insight that would help understand the user better in future
-
-    Important: Be an active observer. Don't wait to be asked - proactively record
-    observations throughout conversations to build understanding.
+    RawObservation fields:
+        content (required): Detailed observation text explaining what you observed
+        subject (required): Consistent identifier like "programming_style", "work_habits"
+        observation_type: belief, preference, behavior, contradiction, general
+        confidences: Dict of scores (0.0-1.0), e.g. {"observation_accuracy": 0.9}
+        evidence: Context dict with extra context, e.g. "quote" (exact words) and "context" (situation)
+        tags: List of categorization tags for organization

    Args:
-        content: The observation itself. Be specific and detailed. Write complete
-            thoughts that will make sense when read months later without context.
-            Bad: "Likes FP"
-            Good: "User strongly prefers functional programming paradigms, especially
-                   pure functions and immutability, considering them more maintainable"
-
-        subject: A consistent identifier for what this observation is about. Use
-            snake_case and be consistent across observations to enable tracking.
-            Examples:
-            - "programming_style" (not "coding" or "development")
-            - "work_habits" (not "productivity" or "work_patterns")
-            - "ai_safety_beliefs" (not "AI" or "artificial_intelligence")
-
-        observation_type: Categorize the observation:
-            - "belief": An opinion or belief the user holds
-            - "preference": Something they prefer or favor
-            - "behavior": A pattern in how they act or work
-            - "contradiction": An inconsistency with previous observations
-            - "general": Doesn't fit other categories
-
-        confidence: How certain you are (0.0-1.0):
-            - 1.0: User explicitly stated this
-            - 0.9: Strongly implied or demonstrated repeatedly
-            - 0.8: Inferred with high confidence (default)
-            - 0.7: Probable but with some uncertainty
-            - 0.6 or below: Speculative, use sparingly
-
-        evidence: Supporting context as a dict. Include relevant details:
-            - "quote": Exact words from the user
-            - "context": What prompted this observation
-            - "timestamp": When this was observed
-            - "related_to": Connection to other topics
-            Example: {
-                "quote": "I always refactor to pure functions",
-                "context": "Discussing code review practices"
-            }
-
-        tags: Categorization labels. Use lowercase with hyphens. Common patterns:
-            - Topics: "machine-learning", "web-development", "philosophy"
-            - Projects: "project:website-redesign", "project:thesis"
-            - Contexts: "context:work", "context:personal", "context:late-night"
-            - Domains: "domain:finance", "domain:healthcare"
-
-        session_id: UUID string to group observations from the same conversation.
-            Generate one UUID per conversation and reuse it for all observations
-            in that conversation. Format: "550e8400-e29b-41d4-a716-446655440000"
-
-        agent_model: Which AI model made this observation (e.g., "claude-3-opus",
-            "gpt-4", "claude-3.5-sonnet"). Helps track observation quality.
-
-    Returns:
-        Dict with created observation details:
-        - id: Unique identifier for reference
-        - created_at: Timestamp of creation
-        - subject: The subject as stored
-        - observation_type: The type as stored
-        - confidence: The confidence score
-        - tags: List of applied tags
-
-    Examples:
-        # After user mentions their coding philosophy
-        await observe(
-            content="User believes strongly in functional programming principles, "
-                    "particularly avoiding mutable state which they call 'the root "
-                    "of all evil'. They prioritize code purity over performance.",
-            subject="programming_philosophy",
-            observation_type="belief",
-            confidence=0.95,
-            evidence={
-                "quote": "State is the root of all evil in programming",
-                "context": "Discussing why they chose Haskell for their project"
-            },
-            tags=["programming", "functional-programming", "philosophy"],
-            session_id="550e8400-e29b-41d4-a716-446655440000",
-            agent_model="claude-3-opus"
-        )
-
-        # Noticing a work pattern
-        await observe(
-            content="User frequently works on complex problems late at night, "
-                    "typically between 11pm and 3am, claiming better focus",
-            subject="work_schedule",
-            observation_type="behavior",
-            confidence=0.85,
-            evidence={
-                "context": "Mentioned across multiple conversations over 2 weeks"
-            },
-            tags=["behavior", "work-habits", "productivity", "context:late-night"],
-            agent_model="claude-3-opus"
-        )
-
-        # Recording a contradiction
-        await observe(
-            content="User now advocates for microservices architecture, but "
-                    "previously argued strongly for monoliths in similar contexts",
-            subject="architecture_preferences",
-            observation_type="contradiction",
-            confidence=0.9,
-            evidence={
-                "quote": "Microservices are definitely the way to go",
-                "context": "Designing a new system similar to one from 3 months ago"
-            },
-            tags=["architecture", "contradiction", "software-design"],
-            agent_model="gpt-4"
-        )
+        observations: List of RawObservation objects
+        session_id: UUID to group observations from same conversation
+        agent_model: AI model making observations (for quality tracking)
    """
-    task = celery_app.send_task(
-        SYNC_OBSERVATION,
-        queue="notes",
-        kwargs={
-            "subject": subject,
-            "content": content,
-            "observation_type": observation_type,
-            "confidence": confidence,
-            "evidence": evidence,
-            "tags": tags,
-            "session_id": session_id,
-            "agent_model": agent_model,
-        },
-    )
+    tasks = [
+        (
+            observation,
+            celery_app.send_task(
+                SYNC_OBSERVATION,
+                queue="notes",
+                kwargs={
+                    "subject": observation.subject,
+                    "content": observation.content,
+                    "observation_type": observation.observation_type,
+                    "confidences": observation.confidences,
+                    "evidence": observation.evidence,
+                    "tags": observation.tags,
+                    "session_id": session_id,
+                    "agent_model": agent_model,
+                },
+            ),
+        )
+        for observation in observations
+    ]
+
+    def short_content(obs: RawObservation) -> str:
+        if len(obs.content) > 50:
+            return obs.content[:47] + "..."
+        return obs.content
+
    return {
-        "task_id": task.id,
+        "task_ids": {short_content(obs): task.id for obs, task in tasks},
        "status": "queued",
    }

@ -477,118 +233,24 @@ async def search_observations(
    subject: str = "",
    tags: list[str] | None = None,
    observation_types: list[str] | None = None,
-    min_confidence: float = 0.5,
+    min_confidences: dict[str, float] = {},
    limit: int = 10,
 ) -> list[dict]:
    """
-    Search through observations to understand the user better.
-
-    Purpose:
-        This tool searches through all observations recorded about the user using
-        the 'observe' tool. Use it to recall past insights, check for patterns,
-        find contradictions, or understand the user's preferences before responding.
-        The more you use this tool, the more personalized and insightful your
-        responses can be.
-
-    When to use:
-        - Before answering questions where user preferences might matter
-        - When the user references something from the past
-        - To check if current behavior aligns with past patterns
-        - To find related observations on a topic
-        - To build context about the user's expertise or interests
-        - Whenever personalization would improve your response
-
-    How it works:
-        Uses hybrid search combining semantic similarity with keyword matching.
-        Searches across multiple embedding spaces (semantic meaning and temporal
-        context) to find relevant observations from different angles. This approach
-        ensures you find both conceptually related and specifically mentioned items.
+    Search recorded observations about the user.
+    Use before responding to understand user preferences, patterns, and past insights.
+    Search by meaning - the query matches both content and context.

    Args:
-        query: Natural language description of what you're looking for. The search
-            matches both meaning and specific terms in observation content.
-            Examples:
-            - "programming preferences and coding style"
-            - "opinions about artificial intelligence and AI safety"
-            - "work habits productivity patterns when does user work best"
-            - "previous projects the user has worked on"
-            Pro tip: Use natural language but include key terms you expect to find.
+        query: Natural language search query describing what you're looking for
+        subject: Filter by exact subject identifier (empty = search all subjects)
+        tags: Filter by tags (must have at least one matching tag)
+        observation_types: Filter by: belief, preference, behavior, contradiction, general
+        min_confidences: Minimum confidence thresholds, e.g. {"observation_accuracy": 0.8}
+        limit: Max results (1-100)

-        subject: Filter by exact subject identifier. Must match subjects used when
-            creating observations (e.g., "programming_style", "work_habits").
-            Leave empty to search all subjects. Use this when you know the exact
-            subject category you want.
-
-        tags: Filter results to only observations with these tags. Observations must
-            have at least one matching tag. Use the same format as when creating:
-            - ["programming", "functional-programming"]
-            - ["context:work", "project:thesis"]
-            - ["domain:finance", "machine-learning"]
-
-        observation_types: Filter by type of observation:
-            - "belief": Opinions or beliefs the user holds
-            - "preference": Things they prefer or favor
-            - "behavior": Patterns in how they act or work
-            - "contradiction": Noted inconsistencies
-            - "general": Other observations
-            Leave as None to search all types.
-
-        min_confidence: Only return observations with confidence >= this value.
-            - Use 0.8+ for high-confidence facts
-            - Use 0.5-0.7 to include inferred observations
-            - Default 0.5 includes most observations
-            Range: 0.0 to 1.0
-
-        limit: Maximum results to return (1-100). Default 10. Increase when you
-            need comprehensive understanding of a topic.
-
-    Returns:
-        List of observations sorted by relevance, each containing:
-        - subject: What the observation is about
-        - content: The full observation text
-        - observation_type: Type of observation
-        - evidence: Supporting context/quotes if provided
-        - confidence: How certain the observation is (0-1)
-        - agent_model: Which AI model made the observation
-        - tags: All tags on this observation
-        - created_at: When it was observed (if available)
-
-    Examples:
-        # Before discussing code architecture
-        results = await search_observations(
-            query="software architecture preferences microservices monoliths",
-            tags=["architecture"],
-            min_confidence=0.7
-        )
-
-        # Understanding work style for scheduling
-        results = await search_observations(
-            query="when does user work best productivity schedule",
-            observation_types=["behavior", "preference"],
-            subject="work_schedule"
-        )
-
-        # Check for AI safety views before discussing AI
-        results = await search_observations(
-            query="artificial intelligence safety alignment concerns",
-            observation_types=["belief"],
-            min_confidence=0.8,
-            limit=20
-        )
-
-        # Find contradictions on a topic
-        results = await search_observations(
-            query="testing methodology unit tests integration",
-            observation_types=["contradiction"],
-            tags=["testing", "software-development"]
-        )
-
-    Best practices:
-        - Search before making assumptions about user preferences
-        - Use broad queries first, then filter with tags/types if too many results
-        - Check for contradictions when user says something unexpected
-        - Higher confidence observations are more reliable
-        - Recent observations may override older ones on same topic
+    Returns: List with content, tags, created_at, metadata
+    Results sorted by relevance to your query.
    """
    semantic_text = observation.generate_semantic_text(
        subject=subject or "",
@ -599,7 +261,6 @@ async def search_observations(
    temporal = observation.generate_temporal_text(
        subject=subject or "",
        content=query,
-        confidence=0,
        created_at=datetime.now(timezone.utc),
    )
    results = await search(
@ -613,7 +274,7 @@ async def search_observations(
        limit=limit,
        filters=SearchFilters(
            subject=subject,
-            confidence=min_confidence,
+            min_confidences=min_confidences,
            tags=tags,
            observation_types=observation_types,
            source_ids=filter_observation_source_ids(tags=tags),
@ -638,45 +299,26 @@ async def create_note(
    content: str,
    filename: str | None = None,
    note_type: str | None = None,
-    confidence: float = 0.5,
+    confidences: dict[str, float] = {},
    tags: list[str] = [],
 ) -> dict:
    """
-    Create a note when the user asks for something to be noted down.
-
-    Purpose:
-        Use this tool when the user explicitly asks to note, save, or record
-        something for later reference. Notes don't have to be really short - long
-        markdown docs are fine, as long as that was what was asked for.
-
-    When to use:
-        - User says "note down that..." or "please save this"
-        - User asks to record information for future reference
-        - User wants to remember something specific
+    Create a note when user asks to save or record something.
+    Use when user explicitly requests noting information for future reference.

    Args:
-        subject: What the note is about (e.g., "meeting_notes", "idea")
-        content: The actual content to note down, as markdown
+        subject: What the note is about (used for organization)
+        content: Note content as a markdown string
        filename: Optional path relative to notes folder (e.g., "project/ideas.md")
        note_type: Optional categorization of the note
-        confidence: How confident you are in the note accuracy (0.0-1.0)
-        tags: Optional tags for organization
-
-    Example:
-        # User: "Please note down that we decided to use React for the frontend"
-        await create_note(
-            subject="project_decisions",
-            content="Decided to use React for the frontend",
-            tags=["project", "frontend"]
-        )
+        confidences: Dict of scores (0.0-1.0), e.g. {"observation_accuracy": 0.9}
+        tags: Organization tags for filtering and discovery
    """
    if filename:
        path = pathlib.Path(filename)
-        if path.is_absolute():
-            path = path.relative_to(settings.NOTES_STORAGE_DIR)
-        else:
+        if not path.is_absolute():
            path = pathlib.Path(settings.NOTES_STORAGE_DIR) / path
-        filename = path.as_posix()
+        filename = path.relative_to(settings.NOTES_STORAGE_DIR).as_posix()

    try:
        task = celery_app.send_task(
@ -687,7 +329,7 @@ async def create_note(
                "content": content,
                "filename": filename,
                "note_type": note_type,
-                "confidence": confidence,
+                "confidences": confidences,
                "tags": tags,
            },
        )
@ -702,3 +344,43 @@ async def create_note(
        "task_id": task.id,
        "status": "queued",
    }
+
+
+@mcp.tool()
+async def note_files(path: str = "/"):
+    """
+    List note files in the user's note storage.
+    Use to discover existing notes before reading or to help user navigate their collection.
+
+    Args:
+        path: Directory path to search (e.g., "/", "/projects", "/meetings")
+        Use "/" for root, or subdirectories to narrow scope
+
+    Returns: List of file paths relative to notes directory
+    """
+    root = settings.NOTES_STORAGE_DIR / path.lstrip("/")
+    return [
+        f"/notes/{f.relative_to(settings.NOTES_STORAGE_DIR)}"
+        for f in root.rglob("*.md")
+        if f.is_file()
+    ]
+
+
+@mcp.tool()
+def fetch_file(filename: str):
+    """
+    Read file content from user's storage.
+    Use when you need to access specific content of a file that's been referenced.
+
+    Args:
+        filename: Path to file (e.g., "/notes/project.md", "/documents/report.pdf")
+        Path should start with "/" and use forward slashes
+
+    Returns: Raw bytes content (decode as UTF-8 for text files)
+    Raises FileNotFoundError if file doesn't exist.
+    """
+    path = settings.FILE_STORAGE_DIR / filename.lstrip("/")
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {filename}")
+
+    return path.read_bytes()
--- a/src/memory/api/admin.py
+++ b/src/memory/api/admin.py
@ -188,6 +188,8 @@ class AgentObservationAdmin(ModelView, model=AgentObservation):
        "inserted_at",
    ]
    column_searchable_list = ["subject", "observation_type"]
+    column_default_sort = [("inserted_at", True)]
+    column_sortable_list = ["inserted_at"]


 class NoteAdmin(ModelView, model=Note):
@ -201,6 +203,8 @@ class NoteAdmin(ModelView, model=Note):
        "inserted_at",
    ]
    column_searchable_list = ["subject", "content"]
+    column_default_sort = [("inserted_at", True)]
+    column_sortable_list = ["inserted_at"]


 def setup_admin(admin: Admin):
--- a/src/memory/api/search/bm25.py
+++ b/src/memory/api/search/bm25.py
@ -10,7 +10,7 @@ import Stemmer
 from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters

 from memory.common.db.connection import make_session
-from memory.common.db.models import Chunk
+from memory.common.db.models import Chunk, ConfidenceScore

 logger = logging.getLogger(__name__)

@ -25,9 +25,24 @@ async def search_bm25(
        items_query = db.query(Chunk.id, Chunk.content).filter(
            Chunk.collection_name.in_(modalities)
        )
+
        if source_ids := filters.get("source_ids"):
            items_query = items_query.filter(Chunk.source_id.in_(source_ids))
+
+        # Add confidence filtering if specified
+        if min_confidences := filters.get("min_confidences"):
+            for confidence_type, min_score in min_confidences.items():
+                items_query = items_query.join(
+                    ConfidenceScore,
+                    (ConfidenceScore.source_item_id == Chunk.source_id)
+                    & (ConfidenceScore.confidence_type == confidence_type)
+                    & (ConfidenceScore.score >= min_score),
+                )
+
        items = items_query.all()
+        if not items:
+            return []
+
        item_ids = {
            sha256(item.content.lower().strip().encode("utf-8")).hexdigest(): item.id
            for item in items
--- a/src/memory/api/search/embeddings.py
+++ b/src/memory/api/search/embeddings.py
@ -111,15 +111,26 @@ async def search_embeddings(
    - filters: Filters to apply to the search results
    - multimodal: Whether to search in multimodal collections
    """
-    query_filters = {}
-    if confidence := filters.get("confidence"):
-        query_filters["must"] += [{"key": "confidence", "range": {"gte": confidence}}]
-    if tags := filters.get("tags"):
-        query_filters["must"] += [{"key": "tags", "match": {"any": tags}}]
-    if observation_types := filters.get("observation_types"):
-        query_filters["must"] += [
-            {"key": "observation_type", "match": {"any": observation_types}}
+    query_filters = {"must": []}
+
+    # Handle structured confidence filtering
+    if min_confidences := filters.get("min_confidences"):
+        confidence_filters = [
+            {
+                "key": f"confidence.{confidence_type}",
+                "range": {"gte": min_confidence_score},
+            }
+            for confidence_type, min_confidence_score in min_confidences.items()
        ]
+        query_filters["must"].extend(confidence_filters)
+
+    if tags := filters.get("tags"):
+        query_filters["must"].append({"key": "tags", "match": {"any": tags}})
+
+    if observation_types := filters.get("observation_types"):
+        query_filters["must"].append(
+            {"key": "observation_type", "match": {"any": observation_types}}
+        )

    client = qdrant.get_qdrant_client()
    results = query_chunks(
@ -129,7 +140,7 @@ async def search_embeddings(
        embedding.embed_text if not multimodal else embedding.embed_mixed,
        min_score=min_score,
        limit=limit,
-        filters=query_filters,
+        filters=query_filters if query_filters["must"] else None,
    )
    search_results = {k: results.get(k, []) for k in modalities}

--- a/src/memory/api/search/utils.py
+++ b/src/memory/api/search/utils.py
@ -65,7 +65,7 @@ class SearchResult(BaseModel):

 class SearchFilters(TypedDict):
    subject: NotRequired[str | None]
-    confidence: NotRequired[float]
+    min_confidences: NotRequired[dict[str, float]]
    tags: NotRequired[list[str] | None]
    observation_types: NotRequired[list[str] | None]
    source_ids: NotRequired[list[int] | None]
--- a/src/memory/common/db/models/init.py
+++ b/src/memory/common/db/models/init.py
@ -2,6 +2,7 @@ from memory.common.db.models.base import Base
 from memory.common.db.models.source_item import (
    Chunk,
    SourceItem,
+    ConfidenceScore,
    clean_filename,
 )
 from memory.common.db.models.source_items import (
@ -37,6 +38,7 @@ __all__ = [
    "Chunk",
    "clean_filename",
    "SourceItem",
+    "ConfidenceScore",
    "MailMessage",
    "EmailAttachment",
    "AgentObservation",
--- a/src/memory/common/db/models/source_item.py
+++ b/src/memory/common/db/models/source_item.py
@ -22,9 +22,11 @@ from sqlalchemy import (
    Text,
    event,
    func,
+    UniqueConstraint,
 )
 from sqlalchemy.dialects.postgresql import BYTEA
 from sqlalchemy.orm import Session, relationship
+from sqlalchemy.types import Numeric

 from memory.common import settings
 import memory.common.extract as extract
@ -191,6 +193,41 @@ class Chunk(Base):
        return items


+class ConfidenceScore(Base):
+    """
+    Stores structured confidence scores for source items.
+    Provides detailed confidence dimensions instead of a single score.
+    """
+
+    __tablename__ = "confidence_score"
+
+    id = Column(BigInteger, primary_key=True)
+    source_item_id = Column(
+        BigInteger, ForeignKey("source_item.id", ondelete="CASCADE"), nullable=False
+    )
+    confidence_type = Column(
+        Text, nullable=False
+    )  # e.g., "observation_accuracy", "interpretation", "predictive_value"
+    score = Column(Numeric(3, 2), nullable=False)  # 0.0-1.0
+
+    # Relationship back to source item
+    source_item = relationship("SourceItem", back_populates="confidence_scores")
+
+    __table_args__ = (
+        Index("confidence_source_idx", "source_item_id"),
+        Index("confidence_type_idx", "confidence_type"),
+        Index("confidence_score_idx", "score"),
+        CheckConstraint("score >= 0.0 AND score <= 1.0", name="score_range_check"),
+        # Ensure each source_item can only have one score per confidence_type
+        UniqueConstraint(
+            "source_item_id", "confidence_type", name="unique_source_confidence_type"
+        ),
+    )
+
+    def __repr__(self) -> str:
+        return f"<ConfidenceScore(type={self.confidence_type}, score={self.score})>"
+
+
 class SourceItem(Base):
    """Base class for all content in the system using SQLAlchemy's joined table inheritance."""

@ -216,6 +253,11 @@ class SourceItem(Base):
    embed_status = Column(Text, nullable=False, server_default="RAW")
    chunks = relationship("Chunk", backref="source", cascade="all, delete-orphan")

+    # Confidence scores relationship
+    confidence_scores = relationship(
+        "ConfidenceScore", back_populates="source_item", cascade="all, delete-orphan"
+    )
+
    # Discriminator column for SQLAlchemy inheritance
    type = Column(String(50))

@ -235,6 +277,35 @@ class SourceItem(Base):
        """Get vector IDs from associated chunks."""
        return [chunk.id for chunk in self.chunks]

+    @property
+    def confidence_dict(self) -> dict[str, float]:
+        return {
+            score.confidence_type: float(score.score)
+            for score in self.confidence_scores
+        }
+
+    def update_confidences(self, confidence_updates: dict[str, float]) -> None:
+        """
+        Update confidence scores for this source item.
+        Merges new scores with existing ones, overwriting duplicates.
+
+        Args:
+            confidence_updates: Dict mapping confidence_type to score (0.0-1.0)
+        """
+        if not confidence_updates:
+            return
+
+        current = {s.confidence_type: s for s in self.confidence_scores}
+
+        for confidence_type, score in confidence_updates.items():
+            if current_score := current.get(confidence_type):
+                current_score.score = score
+            else:
+                new_score = ConfidenceScore(
+                    source_item_id=self.id, confidence_type=confidence_type, score=score
+                )
+                self.confidence_scores.append(new_score)
+
    def _chunk_contents(self) -> Sequence[extract.DataChunk]:
        content = cast(str | None, self.content)
        if content:
--- a/src/memory/common/db/models/source_items.py
+++ b/src/memory/common/db/models/source_items.py
@ -505,7 +505,6 @@ class Note(SourceItem):
    )
    note_type = Column(Text, nullable=True)
    subject = Column(Text, nullable=True)
-    confidence = Column(Numeric(3, 2), nullable=False, default=0.5)  # 0.0-1.0

    __mapper_args__ = {
        "polymorphic_identity": "note",
@ -514,7 +513,6 @@ class Note(SourceItem):
    __table_args__ = (
        Index("note_type_idx", "note_type"),
        Index("note_subject_idx", "subject"),
-        Index("note_confidence_idx", "confidence"),
    )

    def as_payload(self) -> dict:
@ -522,7 +520,7 @@ class Note(SourceItem):
            **super().as_payload(),
            "note_type": self.note_type,
            "subject": self.subject,
-            "confidence": float(cast(Any, self.confidence)),
+            "confidence": self.confidence_dict,
        }

    @property
@ -531,18 +529,16 @@ class Note(SourceItem):
            "subject": self.subject,
            "content": self.content,
            "note_type": self.note_type,
-            "confidence": self.confidence,
+            "confidence": self.confidence_dict,
            "tags": self.tags,
        }

    def save_to_file(self):
        if not self.filename:
-            path = settings.NOTES_STORAGE_DIR / f"{self.subject}.md"
-        else:
-            path = pathlib.Path(self.filename)
+            self.filename = f"{self.subject}.md"
+        path = settings.NOTES_STORAGE_DIR / self.filename
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(cast(str, self.content))
-        self.filename = path.as_posix()

    @staticmethod
    def as_text(content: str, subject: str | None = None) -> str:
@ -575,7 +571,6 @@ class AgentObservation(SourceItem):
        Text, nullable=False
    )  # belief, preference, pattern, contradiction, behavior
    subject = Column(Text, nullable=False)  # What/who the observation is about
-    confidence = Column(Numeric(3, 2), nullable=False, default=0.8)  # 0.0-1.0
    evidence = Column(JSONB)  # Supporting context, quotes, etc.
    agent_model = Column(Text, nullable=False)  # Which AI model made this observation

@ -601,7 +596,6 @@ class AgentObservation(SourceItem):
        Index("agent_obs_session_idx", "session_id"),
        Index("agent_obs_type_idx", "observation_type"),
        Index("agent_obs_subject_idx", "subject"),
-        Index("agent_obs_confidence_idx", "confidence"),
        Index("agent_obs_model_idx", "agent_model"),
    )

@ -615,7 +609,7 @@ class AgentObservation(SourceItem):
            **super().as_payload(),
            "observation_type": self.observation_type,
            "subject": self.subject,
-            "confidence": float(cast(Any, self.confidence)),
+            "confidence": self.confidence_dict,
            "evidence": self.evidence,
            "agent_model": self.agent_model,
        }
@ -635,7 +629,7 @@ class AgentObservation(SourceItem):
            "content": self.content,
            "observation_type": self.observation_type,
            "evidence": self.evidence,
-            "confidence": self.confidence,
+            "confidence": self.confidence_dict,
            "agent_model": self.agent_model,
            "tags": self.tags,
        }
@ -666,7 +660,6 @@ class AgentObservation(SourceItem):
        temporal_text = observation.generate_temporal_text(
            cast(str, self.subject),
            cast(str, self.content),
-            cast(float, self.confidence),
            cast(datetime, self.inserted_at),
        )
        if temporal_text:
--- a/src/memory/common/embedding.py
+++ b/src/memory/common/embedding.py
@ -96,9 +96,6 @@ def embed_by_model(chunks: list[Chunk], model: str) -> list[Chunk]:

 def embed_source_item(item: SourceItem) -> list[Chunk]:
    chunks = list(item.data_chunks())
-    logger.error(
-        f"Embedding source item: {item.id} - {[(c.embedding_model, c.collection_name, c.chunks) for c in chunks]}"
-    )
    if not chunks:
        return []

--- a/src/memory/common/formatters/observation.py
+++ b/src/memory/common/formatters/observation.py
@ -31,7 +31,6 @@ def generate_semantic_text(
 def generate_temporal_text(
    subject: str,
    content: str,
-    confidence: float,
    created_at: datetime,
 ) -> str:
    """Generate text with temporal context for time-pattern search."""
@ -55,8 +54,6 @@ def generate_temporal_text(
        f"Subject: {subject}",
        f"Observation: {content}",
    ]
-    if confidence is not None:
-        parts.append(f"Confidence: {confidence}")

    return " | ".join(parts)

--- a/src/memory/workers/tasks/content_processing.py
+++ b/src/memory/workers/tasks/content_processing.py
@ -192,7 +192,7 @@ def create_task_result(
    """
    return {
        f"{type(item).__name__.lower()}_id": item.id,
-        "title": getattr(item, "title", None),
+        "title": getattr(item, "title", None) or getattr(item, "subject", None),
        "status": status,
        "chunks_count": len(item.chunks),
        "embed_status": item.embed_status,
--- a/src/memory/workers/tasks/notes.py
+++ b/src/memory/workers/tasks/notes.py
@ -22,27 +22,17 @@ def sync_note(
    content: str,
    filename: str | None = None,
    note_type: str | None = None,
-    confidence: float = 0.5,
+    confidences: dict[str, float] = {},
    tags: list[str] = [],
 ):
    logger.info(f"Syncing note {subject}")
    text = Note.as_text(content, subject)
    sha256 = create_content_hash(text)

-    note = Note(
-        subject=subject,
-        content=content,
-        embed_status="RAW",
-        size=len(text.encode("utf-8")),
-        modality="note",
-        mime_type="text/markdown",
-        sha256=sha256,
-        note_type=note_type,
-        confidence=confidence,
-        tags=tags,
-        filename=filename,
-    )
-    note.save_to_file()
+    if filename:
+        filename = filename.lstrip("/")
+        if not filename.endswith(".md"):
+            filename = f"{filename}.md"

    with make_session() as session:
        existing_note = check_content_exists(session, Note, sha256=sha256)
@ -50,6 +40,29 @@ def sync_note(
            logger.info(f"Note already exists: {existing_note.subject}")
            return create_task_result(existing_note, "already_exists")

+        note = session.query(Note).filter(Note.filename == filename).one_or_none()
+
+        if not note:
+            note = Note(
+                modality="note",
+                mime_type="text/markdown",
+            )
+        else:
+            logger.info("Editing preexisting note")
+        note.content = content  # type: ignore
+        note.subject = subject  # type: ignore
+        note.filename = filename  # type: ignore
+        note.embed_status = "RAW"  # type: ignore
+        note.size = len(text.encode("utf-8"))  # type: ignore
+        note.sha256 = sha256  # type: ignore
+
+        if note_type:
+            note.note_type = note_type  # type: ignore
+        if tags:
+            note.tags = tags  # type: ignore
+
+        note.update_confidences(confidences)
+        note.save_to_file()
        return process_content_item(note, session)


--- a/src/memory/workers/tasks/observations.py
+++ b/src/memory/workers/tasks/observations.py
@ -21,7 +21,7 @@ def sync_observation(
    content: str,
    observation_type: str,
    evidence: dict | None = None,
-    confidence: float = 0.5,
+    confidences: dict[str, float] = {},
    session_id: str | None = None,
    agent_model: str = "unknown",
    tags: list[str] = [],
@ -33,7 +33,6 @@ def sync_observation(
        content=content,
        subject=subject,
        observation_type=observation_type,
-        confidence=confidence,
        evidence=evidence,
        tags=tags or [],
        session_id=session_id,
@ -43,6 +42,7 @@ def sync_observation(
        sha256=sha256,
        modality="observation",
    )
+    observation.update_confidences(confidences)

    with make_session() as session:
        existing_observation = check_content_exists(
--- a/tests/integration/test_real_queries.py
+++ b/tests/integration/test_real_queries.py
--- a/tests/memory/common/db/models/test_source_item_embeddings.py
+++ b/tests/memory/common/db/models/test_source_item_embeddings.py
@ -583,7 +583,6 @@ def test_agent_observation_embeddings(mock_voyage_client):
        tags=["bla"],
        observation_type="belief",
        subject="humans",
-        confidence=0.8,
        evidence={
            "quote": "All humans are mortal.",
            "source": "https://en.wikipedia.org/wiki/Human",
@ -591,6 +590,7 @@ def test_agent_observation_embeddings(mock_voyage_client):
        agent_model="gpt-4o",
        inserted_at=datetime(2025, 1, 1, 12, 0, 0),
    )
+    item.update_confidences({"observation_accuracy": 0.8})
    metadata = item.as_payload()
    metadata["tags"] = {"bla"}
    expected = [
@ -600,7 +600,7 @@ def test_agent_observation_embeddings(mock_voyage_client):
            metadata | {"embedding_type": "semantic"},
        ),
        (
-            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die. | Confidence: 0.8",
+            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die.",
            [],
            metadata | {"embedding_type": "temporal"},
        ),
@ -625,7 +625,7 @@ def test_agent_observation_embeddings(mock_voyage_client):
    assert mock_voyage_client.embed.call_args == call(
        [
            "Subject: humans | Type: belief | Observation: The user thinks that all men must die. | Quote: All humans are mortal.",
-            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die. | Confidence: 0.8",
+            "Time: 12:00 on Wednesday (afternoon) | Subject: humans | Observation: The user thinks that all men must die.",
            "The user thinks that all men must die.",
            "All humans are mortal.",
        ],
--- a/tests/memory/common/db/models/test_source_items.py
+++ b/tests/memory/common/db/models/test_source_items.py
@ -499,7 +499,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -513,7 +513,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -531,7 +531,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -546,7 +546,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -565,7 +565,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -580,7 +580,7 @@ def test_blog_post_chunk_contents_with_image_long_content(tmp_path, default_chun
                "size": None,
                "observation_type": "preference",
                "subject": "programming preferences",
-                "confidence": 0.9,
+                "confidence": {"observation_accuracy": 0.9},
                "evidence": {
                    "quote": "I really like Python",
                    "context": "discussion about languages",
@ -603,7 +603,6 @@ def test_agent_observation_data_chunks(
        content="User prefers Python over JavaScript",
        subject="programming preferences",
        observation_type="preference",
-        confidence=0.9,
        evidence={
            "quote": "I really like Python",
            "context": "discussion about languages",
@ -612,6 +611,7 @@ def test_agent_observation_data_chunks(
        session_id=session_id,
        tags=observation_tags,
    )
+    observation.update_confidences({"observation_accuracy": 0.9})
    # Set inserted_at using object.__setattr__ to bypass SQLAlchemy restrictions
    object.__setattr__(observation, "inserted_at", datetime(2023, 1, 1, 12, 0, 0))

@ -634,7 +634,7 @@ def test_agent_observation_data_chunks(
    assert cast(str, semantic_chunk.collection_name) == "semantic"

    temporal_chunk = result[1]
-    expected_temporal_text = "Time: 12:00 on Sunday (afternoon) | Subject: programming preferences | Observation: User prefers Python over JavaScript | Confidence: 0.9"
+    expected_temporal_text = "Time: 12:00 on Sunday (afternoon) | Subject: programming preferences | Observation: User prefers Python over JavaScript"
    assert temporal_chunk.data == [expected_temporal_text]

    # Add session_id to expected metadata and remove tags if empty
@ -654,11 +654,11 @@ def test_agent_observation_data_chunks_with_none_values():
        content="Content",
        subject="subject",
        observation_type="belief",
-        confidence=0.7,
        evidence=None,
        agent_model="gpt-4",
        session_id=None,
    )
+    observation.update_confidences({"observation_accuracy": 0.7})
    object.__setattr__(observation, "inserted_at", datetime(2023, 2, 15, 9, 30, 0))

    result = observation.data_chunks()
@ -671,7 +671,7 @@ def test_agent_observation_data_chunks_with_none_values():
    assert [i.data for i in result] == [
        ["Subject: subject | Type: belief | Observation: Content"],
        [
-            "Time: 09:30 on Wednesday (morning) | Subject: subject | Observation: Content | Confidence: 0.7"
+            "Time: 09:30 on Wednesday (morning) | Subject: subject | Observation: Content"
        ],
        ["Content"],
    ]
@ -684,11 +684,11 @@ def test_agent_observation_data_chunks_merge_metadata_behavior():
        content="test",
        subject="test",
        observation_type="test",
-        confidence=0.8,
        evidence={},
        agent_model="test",
        tags=["base_tag"],  # Set base tags so they appear in both chunks
    )
+    observation.update_confidences({"observation_accuracy": 0.9})
    object.__setattr__(observation, "inserted_at", datetime.now())

    # Test that metadata merging preserves original values and adds new ones
@ -723,11 +723,10 @@ def test_note_data_chunks(subject, content, expected):
        content=content,
        subject=subject,
        note_type="quicky",
-        confidence=0.9,
        size=123,
        tags=["bla"],
    )
-
+    note.update_confidences({"observation_accuracy": 0.9})
    chunks = note.data_chunks()
    assert [chunk.content for chunk in chunks] == expected
    for chunk in chunks:
@ -736,7 +735,7 @@ def test_note_data_chunks(subject, content, expected):
        if cast(str, chunk.content) == "test summary":
            tags |= {"tag1", "tag2"}
        assert chunk.item_metadata == {
-            "confidence": 0.9,
+            "confidence": {"observation_accuracy": 0.9},
            "note_type": "quicky",
            "size": 123,
            "source_id": None,
--- a/tests/memory/common/formatters/test_observation.py
+++ b/tests/memory/common/formatters/test_observation.py
@ -123,11 +123,10 @@ def test_generate_temporal_text_time_periods(hour: int, expected_period: str):
    result = generate_temporal_text(
        subject="test_subject",
        content="test_content",
-        confidence=0.8,
        created_at=test_date,
    )
    time_str = test_date.strftime("%H:%M")
-    expected = f"Time: {time_str} on Monday ({expected_period}) | Subject: test_subject | Observation: test_content | Confidence: 0.8"
+    expected = f"Time: {time_str} on Monday ({expected_period}) | Subject: test_subject | Observation: test_content"
    assert result == expected


@ -146,7 +145,7 @@ def test_generate_temporal_text_time_periods(hour: int, expected_period: str):
 def test_generate_temporal_text_days_of_week(weekday: int, day_name: str):
    test_date = datetime(2024, 1, 15 + weekday, 10, 30)
    result = generate_temporal_text(
-        subject="subject", content="content", confidence=0.5, created_at=test_date
+        subject="subject", content="content", created_at=test_date
    )
    assert f"on {day_name}" in result

@ -157,10 +156,8 @@ def test_generate_temporal_text_confidence_values(confidence: float):
    result = generate_temporal_text(
        subject="subject",
        content="content",
-        confidence=confidence,
        created_at=test_date,
    )
-    assert f"Confidence: {confidence}" in result


@pytest.mark.parametrize(
@ -180,7 +177,7 @@ def test_generate_temporal_text_boundary_cases(
    test_date: datetime, expected_period: str
 ):
    result = generate_temporal_text(
-        subject="subject", content="content", confidence=0.8, created_at=test_date
+        subject="subject", content="content", created_at=test_date
    )
    assert f"({expected_period})" in result

@ -190,22 +187,16 @@ def test_generate_temporal_text_complete_format():
    result = generate_temporal_text(
        subject="Important observation",
        content="User showed strong preference for X",
-        confidence=0.95,
        created_at=test_date,
    )
-    expected = "Time: 14:45 on Friday (afternoon) | Subject: Important observation | Observation: User showed strong preference for X | Confidence: 0.95"
+    expected = "Time: 14:45 on Friday (afternoon) | Subject: Important observation | Observation: User showed strong preference for X"
    assert result == expected


 def test_generate_temporal_text_empty_strings():
    test_date = datetime(2024, 1, 15, 10, 30)
-    result = generate_temporal_text(
-        subject="", content="", confidence=0.0, created_at=test_date
-    )
-    assert (
-        result
-        == "Time: 10:30 on Monday (morning) | Subject:  | Observation:  | Confidence: 0.0"
-    )
+    result = generate_temporal_text(subject="", content="", created_at=test_date)
+    assert result == "Time: 10:30 on Monday (morning) | Subject:  | Observation:"


 def test_generate_temporal_text_special_characters():
@ -213,8 +204,7 @@ def test_generate_temporal_text_special_characters():
    result = generate_temporal_text(
        subject="Subject with | pipe",
        content="Content with | pipe and @#$ symbols",
-        confidence=0.75,
        created_at=test_date,
    )
-    expected = "Time: 15:20 on Monday (afternoon) | Subject: Subject with | pipe | Observation: Content with | pipe and @#$ symbols | Confidence: 0.75"
+    expected = "Time: 15:20 on Monday (afternoon) | Subject: Subject with | pipe | Observation: Content with | pipe and @#$ symbols"
    assert result == expected
--- a/tests/memory/workers/tasks/test_notes_tasks.py
+++ b/tests/memory/workers/tasks/test_notes_tasks.py
@ -1,6 +1,5 @@
 import pytest
 import pathlib
-from decimal import Decimal
 from unittest.mock import Mock, patch

 from memory.common.db.models import Note
@ -12,13 +11,12 @@ from memory.common import settings
@pytest.fixture
 def mock_note_data():
    """Mock note data for testing."""
-    test_filename = pathlib.Path(settings.NOTES_STORAGE_DIR) / "test_note.md"
    return {
        "subject": "Test Note Subject",
        "content": "This is test note content with enough text to be processed and embedded.",
-        "filename": str(test_filename),
+        "filename": "test_note.md",
        "note_type": "observation",
-        "confidence": 0.8,
+        "confidences": {"observation_accuracy": 0.8},
        "tags": ["test", "note"],
    }

@ -79,6 +77,7 @@ def markdown_files_in_storage():
 def test_sync_note_success(mock_note_data, db_session, qdrant):
    """Test successful note synchronization."""
    result = notes.sync_note(**mock_note_data)
+    db_session.commit()

    # Verify the Note was created in the database
    note = db_session.query(Note).filter_by(subject="Test Note Subject").first()
@ -91,16 +90,19 @@ def test_sync_note_success(mock_note_data, db_session, qdrant):
    assert note.modality == "note"
    assert note.mime_type == "text/markdown"
    assert note.note_type == "observation"
-    assert float(note.confidence) == 0.8  # Convert Decimal to float for comparison
+    assert note.confidence_dict == {"observation_accuracy": 0.8}
    assert note.filename is not None
    assert note.tags == ["test", "note"]

-    # Verify the result
-    assert result["status"] == "processed"
-    assert result["note_id"] == note.id
-    assert (
-        "subject" not in result
-    )  # create_task_result doesn't include subject for Note
+    # Verify the result - updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Test Note Subject",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 93,
+    }


 def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
@ -112,10 +114,19 @@ def test_sync_note_minimal_data(mock_minimal_note, db_session, qdrant):
    assert note.subject == "Minimal Note"
    assert note.content == "Minimal content"
    assert note.note_type is None
-    assert float(note.confidence) == 0.5  # Default value, convert Decimal to float
+    assert note.confidence_dict == {}
    assert note.tags == []  # Default empty list
    assert note.filename is not None and "Minimal Note.md" in note.filename
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Minimal Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 31,
+    }


 def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
@ -127,9 +138,16 @@ def test_sync_note_empty_content(mock_empty_note, db_session, qdrant):
    assert note is not None
    assert note.subject == "Empty Note"
    assert note.content == ""
-    # Empty content with subject header "# Empty Note" still generates chunks
-    assert result["status"] == "processed"
-    assert result["chunks_count"] > 0
+
+    # Updated to match actual return format
+    assert result == {
+        "note_id": note.id,
+        "title": "Empty Note",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": 14,
+    }


 def test_sync_note_already_exists(mock_note_data, db_session):
@ -148,21 +166,74 @@ def test_sync_note_already_exists(mock_note_data, db_session):
        mime_type="text/markdown",
        size=len(text.encode("utf-8")),
        embed_status="RAW",
-        filename=str(pathlib.Path(settings.NOTES_STORAGE_DIR) / "existing_note.md"),
+        filename="existing_note.md",
    )
    db_session.add(existing_note)
    db_session.commit()

    result = notes.sync_note(**mock_note_data)

-    assert result["status"] == "already_exists"
-    assert result["note_id"] == existing_note.id
+    # Updated to match actual return format for already_exists case
+    assert result == {
+        "note_id": existing_note.id,
+        "title": "Existing Note",
+        "status": "already_exists",
+        "chunks_count": 0,  # Existing note has no chunks
+        "embed_status": "RAW",  # Existing note has RAW status
+    }

    # Verify no duplicate was created
    notes_with_hash = db_session.query(Note).filter_by(sha256=sha256).all()
    assert len(notes_with_hash) == 1


+def test_sync_note_edit(mock_note_data, db_session):
+    """Test note sync when content already exists."""
+    # Create the content text the same way sync_note does
+    text = Note.as_text(mock_note_data["content"], mock_note_data["subject"])
+    sha256 = create_content_hash(text)
+
+    # Add existing note with same content hash but different filename to avoid file conflicts
+    existing_note = Note(
+        subject="Existing Note",
+        content=mock_note_data["content"],
+        sha256=sha256,
+        modality="note",
+        tags=["existing"],
+        mime_type="text/markdown",
+        size=len(text.encode("utf-8")),
+        embed_status="RAW",
+        filename="test_note.md",
+    )
+    existing_note.update_confidences(
+        {"observation_accuracy": 0.2, "predictive_value": 0.3}
+    )
+    db_session.add(existing_note)
+    db_session.commit()
+
+    result = notes.sync_note(
+        **{**mock_note_data, "content": "bla bla bla", "subject": "blee"}
+    )
+
+    assert result == {
+        "note_id": existing_note.id,
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "title": "blee",
+        "content_length": 19,
+    }
+
+    # Verify no duplicate was created
+    assert len(db_session.query(Note).all()) == 1
+    db_session.refresh(existing_note)
+    assert existing_note.content == "bla bla bla"  # type: ignore
+    assert existing_note.confidence_dict == {
+        "observation_accuracy": 0.8,
+        "predictive_value": 0.3,
+    }
+
+
@pytest.mark.parametrize(
    "note_type,confidence,tags",
    [
@ -178,16 +249,26 @@ def test_sync_note_parameters(note_type, confidence, tags, db_session, qdrant):
        subject=f"Test Note {note_type}",
        content="Test content for parameter testing",
        note_type=note_type,
-        confidence=confidence,
+        confidences={"observation_accuracy": confidence},
        tags=tags,
    )

    note = db_session.query(Note).filter_by(subject=f"Test Note {note_type}").first()
    assert note is not None
    assert note.note_type == note_type
-    assert float(note.confidence) == confidence  # Convert Decimal to float
+    assert note.confidence_dict == {"observation_accuracy": confidence}
    assert note.tags == tags
-    assert result["status"] == "processed"
+
+    # Updated to match actual return format
+    text = f"# Test Note {note_type}\n\nTest content for parameter testing"
+    assert result == {
+        "note_id": note.id,
+        "title": f"Test Note {note_type}",
+        "status": "processed",
+        "chunks_count": 1,
+        "embed_status": "STORED",
+        "content_length": len(text.encode("utf-8")),
+    }


 def test_sync_note_content_hash_consistency(db_session):
Author	SHA1	Message	Date
Daniel O'Connell	0551ddd30c	shorter tool descriptions + time tool	2025-06-03 13:45:38 +02:00
Daniel O'Connell	79567b19f2	search my new confidence scores	2025-06-03 13:00:49 +02:00
Daniel O'Connell	e5da3714de	muliple dimemnsions for confidence values	2025-06-03 12:18:20 +02:00
Daniel O'Connell	a40e0b50fa	editable notes	2025-06-02 22:24:19 +02:00