Add LLM-recalled content for half-remembered searches

- Query analysis now extracts recalled_content (titles/essays the LLM
  recognizes as relevant, e.g., "predetermined conclusions" → "The Bottom Line")
- Added title-based chunk fetching to ensure recalled content appears in
  results even when BM25/embedding search doesn't rank it highly
- Fixed BM25 to run separate searches for each query variant and merge
  results (was concatenating into one AND query)
- Added RECALLED_TITLE_BOOST (0.05) for sources matching recalled titles

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
mruwnik 2025-12-21 17:48:42 +00:00
parent 7e1770f384
commit 0cc1d7f6c7
4 changed files with 168 additions and 35 deletions

View File

@ -170,11 +170,41 @@ async def search_bm25_chunks(
""" """
Search chunks using PostgreSQL full-text search. Search chunks using PostgreSQL full-text search.
Runs separate searches for each data chunk and merges results,
similar to how embedding search handles multiple query variants.
Returns: Returns:
- Dictionary mapping chunk IDs to their normalized scores (0-1 range) - Dictionary mapping chunk IDs to their normalized scores (0-1 range)
""" """
query = " ".join([c for chunk in data for c in chunk.data if isinstance(c, str)]) # Extract query strings from each data chunk
return await asyncio.wait_for( queries = [
search_bm25(query, modalities, limit, filters), " ".join(c for c in chunk.data if isinstance(c, str))
timeout, for chunk in data
) ]
queries = [q.strip() for q in queries if q.strip()]
if not queries:
return {}
# Run separate searches for each query in parallel
async def run_search(query: str) -> dict[str, float]:
return await search_bm25(query, modalities, limit, filters)
try:
results = await asyncio.wait_for(
asyncio.gather(*[run_search(q) for q in queries], return_exceptions=True),
timeout,
)
except asyncio.TimeoutError:
return {}
# Merge results - take max score for each chunk across all queries
merged: dict[str, float] = {}
for result in results:
if isinstance(result, Exception):
continue
for chunk_id, score in result.items():
if chunk_id not in merged or score > merged[chunk_id]:
merged[chunk_id] = score
return merged

View File

@ -22,6 +22,11 @@ QUERY_TERM_BOOST = 0.005
# Bonus when query terms match the source title (stronger signal) # Bonus when query terms match the source title (stronger signal)
TITLE_MATCH_BOOST = 0.01 TITLE_MATCH_BOOST = 0.01
# Bonus when source title matches LLM-recalled content exactly
# This is larger than regular title boost because it's a strong signal
# that the user is looking for specific known content
RECALLED_TITLE_BOOST = 0.05
# Bonus multiplier for popularity (applied as: score * (1 + POPULARITY_BOOST * (popularity - 1))) # Bonus multiplier for popularity (applied as: score * (1 + POPULARITY_BOOST * (popularity - 1)))
# This gives a small boost to popular items without dominating relevance # This gives a small boost to popular items without dominating relevance
POPULARITY_BOOST = 0.02 POPULARITY_BOOST = 0.02

View File

@ -6,6 +6,7 @@ Uses a fast LLM (Haiku) to analyze natural language queries and extract:
- Source hints: author names, domains, or specific sources - Source hints: author names, domains, or specific sources
- Cleaned query: the actual search terms with meta-language removed - Cleaned query: the actual search terms with meta-language removed
- Query variants: alternative phrasings to search - Query variants: alternative phrasings to search
- Recalled content: specific titles/essays the LLM recalls that match the query
This runs in parallel with HyDE for maximum efficiency. This runs in parallel with HyDE for maximum efficiency.
""" """
@ -206,13 +207,16 @@ def _build_prompt() -> str:
"modalities": [], // From: {modality_names} (empty = search all) "modalities": [], // From: {modality_names} (empty = search all)
"sources": [], // Specific sources/authors mentioned "sources": [], // Specific sources/authors mentioned
"cleaned_query": "", // Query with meta-language removed "cleaned_query": "", // Query with meta-language removed
"query_variants": [] // 1-3 alternative phrasings "query_variants": [], // 1-3 alternative phrasings
"recalled_content": [] // Specific titles/essays/concepts you recall that match
}} }}
Guidelines: Guidelines:
- "on lesswrong" -> forum, "comic about" -> comic, etc. - Only restrict modalities when VERY confident about content type
- Remove "there was something about", "I remember reading", etc. - When unsure, return empty modalities to search all
- Generate useful query variants - Remove meta-language like "there was something about", "I remember reading"
- For recalled_content: if you recognize the topic, suggest specific titles/essays
that might be relevant (e.g., "predetermined conclusions" -> "The Bottom Line")
Return ONLY valid JSON. Return ONLY valid JSON.
""") """)
@ -232,6 +236,7 @@ class QueryAnalysis:
sources: list[str] = field(default_factory=list) sources: list[str] = field(default_factory=list)
cleaned_query: str = "" cleaned_query: str = ""
query_variants: list[str] = field(default_factory=list) query_variants: list[str] = field(default_factory=list)
recalled_content: list[str] = field(default_factory=list) # Titles/essays LLM recalls
success: bool = False success: bool = False
@ -300,12 +305,14 @@ async def analyze_query(
result.sources = data.get("sources", []) result.sources = data.get("sources", [])
result.cleaned_query = data.get("cleaned_query", query) result.cleaned_query = data.get("cleaned_query", query)
result.query_variants = data.get("query_variants", []) result.query_variants = data.get("query_variants", [])
result.recalled_content = data.get("recalled_content", [])
result.success = True result.success = True
logger.debug( logger.debug(
f"Query analysis: '{query[:40]}...' -> " f"Query analysis: '{query[:40]}...' -> "
f"modalities={result.modalities}, " f"modalities={result.modalities}, "
f"cleaned='{result.cleaned_query[:30]}...'" f"cleaned='{result.cleaned_query[:30]}...', "
f"recalled={result.recalled_content}"
) )
except json.JSONDecodeError as e: except json.JSONDecodeError as e:

View File

@ -19,6 +19,7 @@ from memory.api.search.constants import (
RERANK_CANDIDATE_MULTIPLIER, RERANK_CANDIDATE_MULTIPLIER,
QUERY_TERM_BOOST, QUERY_TERM_BOOST,
TITLE_MATCH_BOOST, TITLE_MATCH_BOOST,
RECALLED_TITLE_BOOST,
POPULARITY_BOOST, POPULARITY_BOOST,
RECENCY_BOOST_MAX, RECENCY_BOOST_MAX,
RECENCY_HALF_LIFE_DAYS, RECENCY_HALF_LIFE_DAYS,
@ -93,17 +94,22 @@ def deduplicate_by_source(chunks: list[Chunk]) -> list[Chunk]:
def apply_source_boosts( def apply_source_boosts(
chunks: list[Chunk], chunks: list[Chunk],
query_terms: set[str], query_terms: set[str],
recalled_titles: list[str] | None = None,
) -> None: ) -> None:
""" """
Apply title, popularity, and recency boosts to chunks in a single DB query. Apply title, popularity, and recency boosts to chunks in a single DB query.
- Title boost: chunks get boosted when query terms appear in source title - Title boost: chunks get boosted when query terms appear in source title
- Recalled title boost: chunks get large boost if source title matches recalled content
- Popularity boost: chunks get boosted based on source karma/popularity - Popularity boost: chunks get boosted based on source karma/popularity
- Recency boost: newer content gets a small boost that decays over time - Recency boost: newer content gets a small boost that decays over time
""" """
if not chunks: if not chunks:
return return
# Normalize recalled titles for matching
recalled_lower = [t.lower() for t in (recalled_titles or [])]
source_ids = list({chunk.source_id for chunk in chunks}) source_ids = list({chunk.source_id for chunk in chunks})
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
@ -124,12 +130,18 @@ def apply_source_boosts(
score = chunk.relevance_score or 0 score = chunk.relevance_score or 0
# Apply title boost if query terms match # Apply title boost if query terms match
if query_terms: title = source_data.get("title", "")
title = source_data.get("title", "") if query_terms and title:
if title: matches = sum(1 for term in query_terms if term in title)
matches = sum(1 for term in query_terms if term in title) if matches > 0:
if matches > 0: score += TITLE_MATCH_BOOST * (matches / len(query_terms))
score += TITLE_MATCH_BOOST * (matches / len(query_terms))
# Apply recalled title boost - large boost if title matches LLM-recalled content
if recalled_lower and title:
for recalled in recalled_lower:
if recalled in title or title in recalled:
score += RECALLED_TITLE_BOOST
break
# Apply popularity boost # Apply popularity boost
popularity = source_data.get("popularity", 1.0) popularity = source_data.get("popularity", 1.0)
@ -254,22 +266,23 @@ def _apply_query_analysis(
query_text: str, query_text: str,
data: list[extract.DataChunk], data: list[extract.DataChunk],
modalities: set[str], modalities: set[str],
) -> tuple[str, list[extract.DataChunk], set[str], list[str]]: ) -> tuple[str, list[extract.DataChunk], set[str], list[str], list[str]]:
""" """
Apply query analysis results to modify query, data, and modalities. Apply query analysis results to modify query, data, and modalities.
Returns: Returns:
(updated_query_text, updated_data, updated_modalities, query_variants) (updated_query_text, updated_data, updated_modalities, query_variants, recalled_content)
""" """
query_variants: list[str] = [] query_variants: list[str] = []
recalled_content: list[str] = []
if not (analysis_result and analysis_result.success): if not (analysis_result and analysis_result.success):
return query_text, data, modalities, query_variants return query_text, data, modalities, query_variants, recalled_content
# Use detected modalities if any # Log detected modalities but don't restrict - content may exist in multiple modalities
# (e.g., "the sequences" is both forum posts AND a book compilation)
if analysis_result.modalities: if analysis_result.modalities:
modalities = analysis_result.modalities logger.debug(f"Query analysis detected modalities: {analysis_result.modalities}")
logger.debug(f"Query analysis modalities: {modalities}")
# Use cleaned query # Use cleaned query
if analysis_result.cleaned_query and analysis_result.cleaned_query != query_text: if analysis_result.cleaned_query and analysis_result.cleaned_query != query_text:
@ -282,19 +295,25 @@ def _apply_query_analysis(
# Collect query variants # Collect query variants
query_variants.extend(analysis_result.query_variants) query_variants.extend(analysis_result.query_variants)
return query_text, data, modalities, query_variants # Collect recalled content (titles/essays the LLM remembers)
recalled_content.extend(analysis_result.recalled_content)
if recalled_content:
logger.debug(f"Query analysis recalled: {recalled_content}")
return query_text, data, modalities, query_variants, recalled_content
def _build_search_data( def _build_search_data(
data: list[extract.DataChunk], data: list[extract.DataChunk],
hyde_doc: str | None, hyde_doc: str | None,
query_variants: list[str], query_variants: list[str],
recalled_content: list[str],
query_text: str, query_text: str,
) -> list[extract.DataChunk]: ) -> list[extract.DataChunk]:
""" """
Build the list of data chunks to search with. Build the list of data chunks to search with.
Includes original query, HyDE expansion, and query variants. Includes original query, HyDE expansion, query variants, and recalled content.
""" """
search_data = list(data) search_data = list(data)
@ -307,9 +326,67 @@ def _build_search_data(
for variant in query_variants[:3]: for variant in query_variants[:3]:
search_data.append(extract.DataChunk(data=[variant])) search_data.append(extract.DataChunk(data=[variant]))
# Add recalled content (titles/essays the LLM remembers that match the query)
for title in recalled_content[:3]:
search_data.append(extract.DataChunk(data=[title]))
return search_data return search_data
def _fetch_chunks_by_title(
titles: list[str],
modalities: set[str],
limit_per_title: int = 5,
) -> dict[str, float]:
"""
Fetch chunks from sources whose titles match the given titles.
This ensures recalled content from LLM makes it into the candidate pool
even if BM25/embedding search doesn't rank it highly.
"""
if not titles or not modalities:
return {}
# Normalize titles for matching
titles_lower = [t.lower() for t in titles[:5]]
with make_session() as db:
# Query sources in requested modalities
# We need to fetch the polymorphic models to get their title attributes
sources = (
db.query(SourceItem)
.filter(SourceItem.modality.in_(modalities))
.limit(500) # Reasonable limit for title scanning
.all()
)
# Filter sources whose titles match any of the recalled titles
matching_source_ids = []
for source in sources:
title = getattr(source, "title", None)
if title:
title_lower = title.lower()
for recalled in titles_lower:
if recalled in title_lower or title_lower in recalled:
matching_source_ids.append(source.id)
break
if not matching_source_ids:
return {}
# Fetch chunks for matching sources
chunks = (
db.query(Chunk.id)
.filter(Chunk.source_id.in_(matching_source_ids[:limit_per_title * len(titles)]))
.limit(limit_per_title * len(matching_source_ids))
.all()
)
# Give these chunks a baseline score so they're included in fusion
# The actual boost will be applied later by apply_source_boosts
return {str(c.id): 0.5 for c in chunks}
async def _run_searches( async def _run_searches(
search_data: list[extract.DataChunk], search_data: list[extract.DataChunk],
data: list[extract.DataChunk], data: list[extract.DataChunk],
@ -318,6 +395,7 @@ async def _run_searches(
filters: SearchFilters, filters: SearchFilters,
timeout: int, timeout: int,
use_bm25: bool, use_bm25: bool,
recalled_titles: list[str] | None = None,
) -> dict[str, float]: ) -> dict[str, float]:
""" """
Run embedding and optionally BM25 searches in parallel, returning fused scores. Run embedding and optionally BM25 searches in parallel, returning fused scores.
@ -329,9 +407,10 @@ async def _run_searches(
if use_bm25: if use_bm25:
# Run both searches in parallel # Run both searches in parallel
# Note: BM25 uses search_data to include query variants and recalled content
results = await asyncio.gather( results = await asyncio.gather(
embedding_task, embedding_task,
search_bm25_chunks(data, modalities, internal_limit, filters, timeout), search_bm25_chunks(search_data, modalities, internal_limit, filters, timeout),
return_exceptions=True, return_exceptions=True,
) )
@ -347,7 +426,17 @@ async def _run_searches(
bm25_scores = {} bm25_scores = {}
# Fuse scores from both methods using Reciprocal Rank Fusion # Fuse scores from both methods using Reciprocal Rank Fusion
return fuse_scores_rrf(embedding_scores, bm25_scores) fused = fuse_scores_rrf(embedding_scores, bm25_scores)
# Add chunks from recalled titles (direct title match)
# This ensures LLM-recalled content makes it into the candidate pool
if recalled_titles:
title_chunks = _fetch_chunks_by_title(recalled_titles, modalities)
for chunk_id, score in title_chunks.items():
if chunk_id not in fused:
fused[chunk_id] = score
return fused
def _fetch_chunks( def _fetch_chunks(
@ -391,6 +480,7 @@ def _fetch_chunks(
def _apply_boosts( def _apply_boosts(
chunks: list[Chunk], chunks: list[Chunk],
data: list[extract.DataChunk], data: list[extract.DataChunk],
recalled_content: list[str] | None = None,
) -> None: ) -> None:
""" """
Apply query term, title, popularity, and recency boosts to chunks. Apply query term, title, popularity, and recency boosts to chunks.
@ -407,10 +497,10 @@ def _apply_boosts(
query_terms = extract_query_terms(query_text) query_terms = extract_query_terms(query_text)
apply_query_term_boost(chunks, query_terms) apply_query_term_boost(chunks, query_terms)
# Apply title + popularity boosts (single DB query) # Apply title + popularity boosts (single DB query)
apply_source_boosts(chunks, query_terms) apply_source_boosts(chunks, query_terms, recalled_content)
else: else:
# No query terms, just apply popularity boost # No query terms, just apply popularity and recalled title boosts
apply_source_boosts(chunks, set()) apply_source_boosts(chunks, set(), recalled_content)
async def _apply_reranking( async def _apply_reranking(
@ -486,23 +576,24 @@ async def search_chunks(
) )
# Apply query analysis results # Apply query analysis results
query_text, data, modalities, query_variants = _apply_query_analysis( query_text, data, modalities, query_variants, recalled_content = _apply_query_analysis(
analysis_result, query_text, data, modalities analysis_result, query_text, data, modalities
) )
# Build search data with HyDE and variants # Build search data with HyDE, variants, and recalled content
search_data = _build_search_data(data, hyde_doc, query_variants, query_text) search_data = _build_search_data(data, hyde_doc, query_variants, recalled_content, query_text)
# Run searches and fuse scores # Run searches and fuse scores
fused_scores = await _run_searches( fused_scores = await _run_searches(
search_data, data, modalities, internal_limit, filters, timeout, use_bm25 search_data, data, modalities, internal_limit, filters, timeout, use_bm25,
recalled_titles=recalled_content,
) )
# Fetch chunks from database # Fetch chunks from database
chunks = _fetch_chunks(fused_scores, limit, use_reranking) chunks = _fetch_chunks(fused_scores, limit, use_reranking)
# Apply various boosts # Apply various boosts including recalled content title matching
_apply_boosts(chunks, data) _apply_boosts(chunks, data, recalled_content)
# Apply reranking if enabled # Apply reranking if enabled
chunks = await _apply_reranking(chunks, query_text, limit, use_reranking) chunks = await _apply_reranking(chunks, query_text, limit, use_reranking)