search my new confidence scores

2025-12-13 15:41:20 +01:00 · 2025-06-03 13:00:49 +02:00 · 2025-06-03 13:00:49 +02:00 · 79567b19f2
commit 79567b19f2
parent e5da3714de
5 changed files with 56 additions and 30 deletions
--- a/src/memory/api/MCP/tools.py
+++ b/src/memory/api/MCP/tools.py
@ -291,7 +291,7 @@ async def observe(
    content: str,
    subject: str,
    observation_type: str = "general",
-    confidence: float = 0.8,
+    confidences: dict[str, float] = {},
    evidence: dict | None = None,
    tags: list[str] | None = None,
    session_id: str | None = None,
@ -345,12 +345,16 @@ async def observe(
            - "contradiction": An inconsistency with previous observations
            - "general": Doesn't fit other categories

-        confidence: How certain you are (0.0-1.0):
+        confidences: How certain you are (0.0-1.0) in a given aspect of the observation:
            - 1.0: User explicitly stated this
            - 0.9: Strongly implied or demonstrated repeatedly
            - 0.8: Inferred with high confidence (default)
            - 0.7: Probable but with some uncertainty
            - 0.6 or below: Speculative, use sparingly
+            Provided as a mapping of <aspect>: <confidence>
+            Examples:
+            - {"observation_accuracy": 0.95}
+            - {"observation_accuracy": 0.8, "interpretation": 0.5}

        evidence: Supporting context as a dict. Include relevant details:
            - "quote": Exact words from the user
@ -392,7 +396,7 @@ async def observe(
                    "of all evil'. They prioritize code purity over performance.",
            subject="programming_philosophy",
            observation_type="belief",
-            confidence=0.95,
+            confidences={"observation_accuracy": 0.95},
            evidence={
                "quote": "State is the root of all evil in programming",
                "context": "Discussing why they chose Haskell for their project"
@ -408,7 +412,7 @@ async def observe(
                    "typically between 11pm and 3am, claiming better focus",
            subject="work_schedule",
            observation_type="behavior",
-            confidence=0.85,
+            confidences={"observation_accuracy": 0.85},
            evidence={
                "context": "Mentioned across multiple conversations over 2 weeks"
            },
@ -422,7 +426,7 @@ async def observe(
                    "previously argued strongly for monoliths in similar contexts",
            subject="architecture_preferences",
            observation_type="contradiction",
-            confidence=0.9,
+            confidences={"observation_accuracy": 0.9},
            evidence={
                "quote": "Microservices are definitely the way to go",
                "context": "Designing a new system similar to one from 3 months ago"
@ -438,7 +442,7 @@ async def observe(
            "subject": subject,
            "content": content,
            "observation_type": observation_type,
-            "confidence": confidence,
+            "confidences": confidences,
            "evidence": evidence,
            "tags": tags,
            "session_id": session_id,
@ -457,7 +461,7 @@ async def search_observations(
    subject: str = "",
    tags: list[str] | None = None,
    observation_types: list[str] | None = None,
-    min_confidence: float = 0.5,
+    min_confidences: dict[str, float] = {},
    limit: int = 10,
 ) -> list[dict]:
    """
@ -478,12 +482,6 @@ async def search_observations(
        - To build context about the user's expertise or interests
        - Whenever personalization would improve your response

-    How it works:
-        Uses hybrid search combining semantic similarity with keyword matching.
-        Searches across multiple embedding spaces (semantic meaning and temporal
-        context) to find relevant observations from different angles. This approach
-        ensures you find both conceptually related and specifically mentioned items.
-
    Args:
        query: Natural language description of what you're looking for. The search
            matches both meaning and specific terms in observation content.
@ -513,10 +511,9 @@ async def search_observations(
            - "general": Other observations
            Leave as None to search all types.

-        min_confidence: Only return observations with confidence >= this value.
-            - Use 0.8+ for high-confidence facts
-            - Use 0.5-0.7 to include inferred observations
-            - Default 0.5 includes most observations
+        min_confidences: Only return observations with confidence >= this value, e.g.
+        {"observation_accuracy": 0.8, "interpretation": 0.5} facts where you were confident
+        that you observed the fact but are not necessarily sure about the interpretation.
            Range: 0.0 to 1.0

        limit: Maximum results to return (1-100). Default 10. Increase when you
@ -579,7 +576,6 @@ async def search_observations(
    temporal = observation.generate_temporal_text(
        subject=subject or "",
        content=query,
-        confidence=0,
        created_at=datetime.now(timezone.utc),
    )
    results = await search(
@ -593,7 +589,7 @@ async def search_observations(
        limit=limit,
        filters=SearchFilters(
            subject=subject,
-            confidence=min_confidence,
+            min_confidences=min_confidences,
            tags=tags,
            observation_types=observation_types,
            source_ids=filter_observation_source_ids(tags=tags),
--- a/src/memory/api/admin.py
+++ b/src/memory/api/admin.py
@ -188,6 +188,8 @@ class AgentObservationAdmin(ModelView, model=AgentObservation):
        "inserted_at",
    ]
    column_searchable_list = ["subject", "observation_type"]
+    column_default_sort = [("inserted_at", True)]
+    column_sortable_list = ["inserted_at"]


 class NoteAdmin(ModelView, model=Note):
@ -201,6 +203,8 @@ class NoteAdmin(ModelView, model=Note):
        "inserted_at",
    ]
    column_searchable_list = ["subject", "content"]
+    column_default_sort = [("inserted_at", True)]
+    column_sortable_list = ["inserted_at"]


 def setup_admin(admin: Admin):
--- a/src/memory/api/search/bm25.py
+++ b/src/memory/api/search/bm25.py
@ -10,7 +10,7 @@ import Stemmer
 from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters

 from memory.common.db.connection import make_session
-from memory.common.db.models import Chunk
+from memory.common.db.models import Chunk, ConfidenceScore

 logger = logging.getLogger(__name__)

@ -25,9 +25,24 @@ async def search_bm25(
        items_query = db.query(Chunk.id, Chunk.content).filter(
            Chunk.collection_name.in_(modalities)
        )
+
        if source_ids := filters.get("source_ids"):
            items_query = items_query.filter(Chunk.source_id.in_(source_ids))
+
+        # Add confidence filtering if specified
+        if min_confidences := filters.get("min_confidences"):
+            for confidence_type, min_score in min_confidences.items():
+                items_query = items_query.join(
+                    ConfidenceScore,
+                    (ConfidenceScore.source_item_id == Chunk.source_id)
+                    & (ConfidenceScore.confidence_type == confidence_type)
+                    & (ConfidenceScore.score >= min_score),
+                )
+
        items = items_query.all()
+        if not items:
+            return []
+
        item_ids = {
            sha256(item.content.lower().strip().encode("utf-8")).hexdigest(): item.id
            for item in items
--- a/src/memory/api/search/embeddings.py
+++ b/src/memory/api/search/embeddings.py
@ -111,15 +111,26 @@ async def search_embeddings(
    - filters: Filters to apply to the search results
    - multimodal: Whether to search in multimodal collections
    """
-    query_filters = {}
-    if confidence := filters.get("confidence"):
-        query_filters["must"] += [{"key": "confidence", "range": {"gte": confidence}}]
-    if tags := filters.get("tags"):
-        query_filters["must"] += [{"key": "tags", "match": {"any": tags}}]
-    if observation_types := filters.get("observation_types"):
-        query_filters["must"] += [
-            {"key": "observation_type", "match": {"any": observation_types}}
+    query_filters = {"must": []}
+
+    # Handle structured confidence filtering
+    if min_confidences := filters.get("min_confidences"):
+        confidence_filters = [
+            {
+                "key": f"confidence.{confidence_type}",
+                "range": {"gte": min_confidence_score},
+            }
+            for confidence_type, min_confidence_score in min_confidences.items()
        ]
+        query_filters["must"].extend(confidence_filters)
+
+    if tags := filters.get("tags"):
+        query_filters["must"].append({"key": "tags", "match": {"any": tags}})
+
+    if observation_types := filters.get("observation_types"):
+        query_filters["must"].append(
+            {"key": "observation_type", "match": {"any": observation_types}}
+        )

    client = qdrant.get_qdrant_client()
    results = query_chunks(
@ -129,7 +140,7 @@ async def search_embeddings(
        embedding.embed_text if not multimodal else embedding.embed_mixed,
        min_score=min_score,
        limit=limit,
-        filters=query_filters,
+        filters=query_filters if query_filters["must"] else None,
    )
    search_results = {k: results.get(k, []) for k in modalities}

--- a/src/memory/api/search/utils.py
+++ b/src/memory/api/search/utils.py
@ -65,7 +65,7 @@ class SearchResult(BaseModel):

 class SearchFilters(TypedDict):
    subject: NotRequired[str | None]
-    confidence: NotRequired[float]
+    min_confidences: NotRequired[dict[str, float]]
    tags: NotRequired[list[str] | None]
    observation_types: NotRequired[list[str] | None]
    source_ids: NotRequired[list[int] | None]