diff --git a/src/memory/api/search/bm25.py b/src/memory/api/search/bm25.py index 351b898..a1ee228 100644 --- a/src/memory/api/search/bm25.py +++ b/src/memory/api/search/bm25.py @@ -170,11 +170,41 @@ async def search_bm25_chunks( """ Search chunks using PostgreSQL full-text search. + Runs separate searches for each data chunk and merges results, + similar to how embedding search handles multiple query variants. + Returns: - Dictionary mapping chunk IDs to their normalized scores (0-1 range) """ - query = " ".join([c for chunk in data for c in chunk.data if isinstance(c, str)]) - return await asyncio.wait_for( - search_bm25(query, modalities, limit, filters), - timeout, - ) + # Extract query strings from each data chunk + queries = [ + " ".join(c for c in chunk.data if isinstance(c, str)) + for chunk in data + ] + queries = [q.strip() for q in queries if q.strip()] + + if not queries: + return {} + + # Run separate searches for each query in parallel + async def run_search(query: str) -> dict[str, float]: + return await search_bm25(query, modalities, limit, filters) + + try: + results = await asyncio.wait_for( + asyncio.gather(*[run_search(q) for q in queries], return_exceptions=True), + timeout, + ) + except asyncio.TimeoutError: + return {} + + # Merge results - take max score for each chunk across all queries + merged: dict[str, float] = {} + for result in results: + if isinstance(result, Exception): + continue + for chunk_id, score in result.items(): + if chunk_id not in merged or score > merged[chunk_id]: + merged[chunk_id] = score + + return merged diff --git a/src/memory/api/search/constants.py b/src/memory/api/search/constants.py index 1de5830..06bb981 100644 --- a/src/memory/api/search/constants.py +++ b/src/memory/api/search/constants.py @@ -22,6 +22,11 @@ QUERY_TERM_BOOST = 0.005 # Bonus when query terms match the source title (stronger signal) TITLE_MATCH_BOOST = 0.01 +# Bonus when source title matches LLM-recalled content exactly +# This is larger than regular title boost because it's a strong signal +# that the user is looking for specific known content +RECALLED_TITLE_BOOST = 0.05 + # Bonus multiplier for popularity (applied as: score * (1 + POPULARITY_BOOST * (popularity - 1))) # This gives a small boost to popular items without dominating relevance POPULARITY_BOOST = 0.02 diff --git a/src/memory/api/search/query_analysis.py b/src/memory/api/search/query_analysis.py index 4e64424..f4da869 100644 --- a/src/memory/api/search/query_analysis.py +++ b/src/memory/api/search/query_analysis.py @@ -6,6 +6,7 @@ Uses a fast LLM (Haiku) to analyze natural language queries and extract: - Source hints: author names, domains, or specific sources - Cleaned query: the actual search terms with meta-language removed - Query variants: alternative phrasings to search +- Recalled content: specific titles/essays the LLM recalls that match the query This runs in parallel with HyDE for maximum efficiency. """ @@ -206,13 +207,16 @@ def _build_prompt() -> str: "modalities": [], // From: {modality_names} (empty = search all) "sources": [], // Specific sources/authors mentioned "cleaned_query": "", // Query with meta-language removed - "query_variants": [] // 1-3 alternative phrasings + "query_variants": [], // 1-3 alternative phrasings + "recalled_content": [] // Specific titles/essays/concepts you recall that match }} Guidelines: - - "on lesswrong" -> forum, "comic about" -> comic, etc. - - Remove "there was something about", "I remember reading", etc. - - Generate useful query variants + - Only restrict modalities when VERY confident about content type + - When unsure, return empty modalities to search all + - Remove meta-language like "there was something about", "I remember reading" + - For recalled_content: if you recognize the topic, suggest specific titles/essays + that might be relevant (e.g., "predetermined conclusions" -> "The Bottom Line") Return ONLY valid JSON. """) @@ -232,6 +236,7 @@ class QueryAnalysis: sources: list[str] = field(default_factory=list) cleaned_query: str = "" query_variants: list[str] = field(default_factory=list) + recalled_content: list[str] = field(default_factory=list) # Titles/essays LLM recalls success: bool = False @@ -300,12 +305,14 @@ async def analyze_query( result.sources = data.get("sources", []) result.cleaned_query = data.get("cleaned_query", query) result.query_variants = data.get("query_variants", []) + result.recalled_content = data.get("recalled_content", []) result.success = True logger.debug( f"Query analysis: '{query[:40]}...' -> " f"modalities={result.modalities}, " - f"cleaned='{result.cleaned_query[:30]}...'" + f"cleaned='{result.cleaned_query[:30]}...', " + f"recalled={result.recalled_content}" ) except json.JSONDecodeError as e: diff --git a/src/memory/api/search/search.py b/src/memory/api/search/search.py index 06c931c..6b8fb0f 100644 --- a/src/memory/api/search/search.py +++ b/src/memory/api/search/search.py @@ -19,6 +19,7 @@ from memory.api.search.constants import ( RERANK_CANDIDATE_MULTIPLIER, QUERY_TERM_BOOST, TITLE_MATCH_BOOST, + RECALLED_TITLE_BOOST, POPULARITY_BOOST, RECENCY_BOOST_MAX, RECENCY_HALF_LIFE_DAYS, @@ -93,17 +94,22 @@ def deduplicate_by_source(chunks: list[Chunk]) -> list[Chunk]: def apply_source_boosts( chunks: list[Chunk], query_terms: set[str], + recalled_titles: list[str] | None = None, ) -> None: """ Apply title, popularity, and recency boosts to chunks in a single DB query. - Title boost: chunks get boosted when query terms appear in source title + - Recalled title boost: chunks get large boost if source title matches recalled content - Popularity boost: chunks get boosted based on source karma/popularity - Recency boost: newer content gets a small boost that decays over time """ if not chunks: return + # Normalize recalled titles for matching + recalled_lower = [t.lower() for t in (recalled_titles or [])] + source_ids = list({chunk.source_id for chunk in chunks}) now = datetime.now(timezone.utc) @@ -124,12 +130,18 @@ def apply_source_boosts( score = chunk.relevance_score or 0 # Apply title boost if query terms match - if query_terms: - title = source_data.get("title", "") - if title: - matches = sum(1 for term in query_terms if term in title) - if matches > 0: - score += TITLE_MATCH_BOOST * (matches / len(query_terms)) + title = source_data.get("title", "") + if query_terms and title: + matches = sum(1 for term in query_terms if term in title) + if matches > 0: + score += TITLE_MATCH_BOOST * (matches / len(query_terms)) + + # Apply recalled title boost - large boost if title matches LLM-recalled content + if recalled_lower and title: + for recalled in recalled_lower: + if recalled in title or title in recalled: + score += RECALLED_TITLE_BOOST + break # Apply popularity boost popularity = source_data.get("popularity", 1.0) @@ -254,22 +266,23 @@ def _apply_query_analysis( query_text: str, data: list[extract.DataChunk], modalities: set[str], -) -> tuple[str, list[extract.DataChunk], set[str], list[str]]: +) -> tuple[str, list[extract.DataChunk], set[str], list[str], list[str]]: """ Apply query analysis results to modify query, data, and modalities. Returns: - (updated_query_text, updated_data, updated_modalities, query_variants) + (updated_query_text, updated_data, updated_modalities, query_variants, recalled_content) """ query_variants: list[str] = [] + recalled_content: list[str] = [] if not (analysis_result and analysis_result.success): - return query_text, data, modalities, query_variants + return query_text, data, modalities, query_variants, recalled_content - # Use detected modalities if any + # Log detected modalities but don't restrict - content may exist in multiple modalities + # (e.g., "the sequences" is both forum posts AND a book compilation) if analysis_result.modalities: - modalities = analysis_result.modalities - logger.debug(f"Query analysis modalities: {modalities}") + logger.debug(f"Query analysis detected modalities: {analysis_result.modalities}") # Use cleaned query if analysis_result.cleaned_query and analysis_result.cleaned_query != query_text: @@ -282,19 +295,25 @@ def _apply_query_analysis( # Collect query variants query_variants.extend(analysis_result.query_variants) - return query_text, data, modalities, query_variants + # Collect recalled content (titles/essays the LLM remembers) + recalled_content.extend(analysis_result.recalled_content) + if recalled_content: + logger.debug(f"Query analysis recalled: {recalled_content}") + + return query_text, data, modalities, query_variants, recalled_content def _build_search_data( data: list[extract.DataChunk], hyde_doc: str | None, query_variants: list[str], + recalled_content: list[str], query_text: str, ) -> list[extract.DataChunk]: """ Build the list of data chunks to search with. - Includes original query, HyDE expansion, and query variants. + Includes original query, HyDE expansion, query variants, and recalled content. """ search_data = list(data) @@ -307,9 +326,67 @@ def _build_search_data( for variant in query_variants[:3]: search_data.append(extract.DataChunk(data=[variant])) + # Add recalled content (titles/essays the LLM remembers that match the query) + for title in recalled_content[:3]: + search_data.append(extract.DataChunk(data=[title])) + return search_data +def _fetch_chunks_by_title( + titles: list[str], + modalities: set[str], + limit_per_title: int = 5, +) -> dict[str, float]: + """ + Fetch chunks from sources whose titles match the given titles. + + This ensures recalled content from LLM makes it into the candidate pool + even if BM25/embedding search doesn't rank it highly. + """ + if not titles or not modalities: + return {} + + # Normalize titles for matching + titles_lower = [t.lower() for t in titles[:5]] + + with make_session() as db: + # Query sources in requested modalities + # We need to fetch the polymorphic models to get their title attributes + sources = ( + db.query(SourceItem) + .filter(SourceItem.modality.in_(modalities)) + .limit(500) # Reasonable limit for title scanning + .all() + ) + + # Filter sources whose titles match any of the recalled titles + matching_source_ids = [] + for source in sources: + title = getattr(source, "title", None) + if title: + title_lower = title.lower() + for recalled in titles_lower: + if recalled in title_lower or title_lower in recalled: + matching_source_ids.append(source.id) + break + + if not matching_source_ids: + return {} + + # Fetch chunks for matching sources + chunks = ( + db.query(Chunk.id) + .filter(Chunk.source_id.in_(matching_source_ids[:limit_per_title * len(titles)])) + .limit(limit_per_title * len(matching_source_ids)) + .all() + ) + + # Give these chunks a baseline score so they're included in fusion + # The actual boost will be applied later by apply_source_boosts + return {str(c.id): 0.5 for c in chunks} + + async def _run_searches( search_data: list[extract.DataChunk], data: list[extract.DataChunk], @@ -318,6 +395,7 @@ async def _run_searches( filters: SearchFilters, timeout: int, use_bm25: bool, + recalled_titles: list[str] | None = None, ) -> dict[str, float]: """ Run embedding and optionally BM25 searches in parallel, returning fused scores. @@ -329,9 +407,10 @@ async def _run_searches( if use_bm25: # Run both searches in parallel + # Note: BM25 uses search_data to include query variants and recalled content results = await asyncio.gather( embedding_task, - search_bm25_chunks(data, modalities, internal_limit, filters, timeout), + search_bm25_chunks(search_data, modalities, internal_limit, filters, timeout), return_exceptions=True, ) @@ -347,7 +426,17 @@ async def _run_searches( bm25_scores = {} # Fuse scores from both methods using Reciprocal Rank Fusion - return fuse_scores_rrf(embedding_scores, bm25_scores) + fused = fuse_scores_rrf(embedding_scores, bm25_scores) + + # Add chunks from recalled titles (direct title match) + # This ensures LLM-recalled content makes it into the candidate pool + if recalled_titles: + title_chunks = _fetch_chunks_by_title(recalled_titles, modalities) + for chunk_id, score in title_chunks.items(): + if chunk_id not in fused: + fused[chunk_id] = score + + return fused def _fetch_chunks( @@ -391,6 +480,7 @@ def _fetch_chunks( def _apply_boosts( chunks: list[Chunk], data: list[extract.DataChunk], + recalled_content: list[str] | None = None, ) -> None: """ Apply query term, title, popularity, and recency boosts to chunks. @@ -407,10 +497,10 @@ def _apply_boosts( query_terms = extract_query_terms(query_text) apply_query_term_boost(chunks, query_terms) # Apply title + popularity boosts (single DB query) - apply_source_boosts(chunks, query_terms) + apply_source_boosts(chunks, query_terms, recalled_content) else: - # No query terms, just apply popularity boost - apply_source_boosts(chunks, set()) + # No query terms, just apply popularity and recalled title boosts + apply_source_boosts(chunks, set(), recalled_content) async def _apply_reranking( @@ -486,23 +576,24 @@ async def search_chunks( ) # Apply query analysis results - query_text, data, modalities, query_variants = _apply_query_analysis( + query_text, data, modalities, query_variants, recalled_content = _apply_query_analysis( analysis_result, query_text, data, modalities ) - # Build search data with HyDE and variants - search_data = _build_search_data(data, hyde_doc, query_variants, query_text) + # Build search data with HyDE, variants, and recalled content + search_data = _build_search_data(data, hyde_doc, query_variants, recalled_content, query_text) # Run searches and fuse scores fused_scores = await _run_searches( - search_data, data, modalities, internal_limit, filters, timeout, use_bm25 + search_data, data, modalities, internal_limit, filters, timeout, use_bm25, + recalled_titles=recalled_content, ) # Fetch chunks from database chunks = _fetch_chunks(fused_scores, limit, use_reranking) - # Apply various boosts - _apply_boosts(chunks, data) + # Apply various boosts including recalled content title matching + _apply_boosts(chunks, data, recalled_content) # Apply reranking if enabled chunks = await _apply_reranking(chunks, query_text, limit, use_reranking)