mirror of
https://github.com/mruwnik/memory.git
synced 2026-01-02 09:12:58 +01:00
Add LLM-recalled content for half-remembered searches
- Query analysis now extracts recalled_content (titles/essays the LLM recognizes as relevant, e.g., "predetermined conclusions" → "The Bottom Line") - Added title-based chunk fetching to ensure recalled content appears in results even when BM25/embedding search doesn't rank it highly - Fixed BM25 to run separate searches for each query variant and merge results (was concatenating into one AND query) - Added RECALLED_TITLE_BOOST (0.05) for sources matching recalled titles 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7e1770f384
commit
0cc1d7f6c7
@ -170,11 +170,41 @@ async def search_bm25_chunks(
|
|||||||
"""
|
"""
|
||||||
Search chunks using PostgreSQL full-text search.
|
Search chunks using PostgreSQL full-text search.
|
||||||
|
|
||||||
|
Runs separate searches for each data chunk and merges results,
|
||||||
|
similar to how embedding search handles multiple query variants.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
- Dictionary mapping chunk IDs to their normalized scores (0-1 range)
|
- Dictionary mapping chunk IDs to their normalized scores (0-1 range)
|
||||||
"""
|
"""
|
||||||
query = " ".join([c for chunk in data for c in chunk.data if isinstance(c, str)])
|
# Extract query strings from each data chunk
|
||||||
return await asyncio.wait_for(
|
queries = [
|
||||||
search_bm25(query, modalities, limit, filters),
|
" ".join(c for c in chunk.data if isinstance(c, str))
|
||||||
timeout,
|
for chunk in data
|
||||||
)
|
]
|
||||||
|
queries = [q.strip() for q in queries if q.strip()]
|
||||||
|
|
||||||
|
if not queries:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Run separate searches for each query in parallel
|
||||||
|
async def run_search(query: str) -> dict[str, float]:
|
||||||
|
return await search_bm25(query, modalities, limit, filters)
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = await asyncio.wait_for(
|
||||||
|
asyncio.gather(*[run_search(q) for q in queries], return_exceptions=True),
|
||||||
|
timeout,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Merge results - take max score for each chunk across all queries
|
||||||
|
merged: dict[str, float] = {}
|
||||||
|
for result in results:
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
continue
|
||||||
|
for chunk_id, score in result.items():
|
||||||
|
if chunk_id not in merged or score > merged[chunk_id]:
|
||||||
|
merged[chunk_id] = score
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|||||||
@ -22,6 +22,11 @@ QUERY_TERM_BOOST = 0.005
|
|||||||
# Bonus when query terms match the source title (stronger signal)
|
# Bonus when query terms match the source title (stronger signal)
|
||||||
TITLE_MATCH_BOOST = 0.01
|
TITLE_MATCH_BOOST = 0.01
|
||||||
|
|
||||||
|
# Bonus when source title matches LLM-recalled content exactly
|
||||||
|
# This is larger than regular title boost because it's a strong signal
|
||||||
|
# that the user is looking for specific known content
|
||||||
|
RECALLED_TITLE_BOOST = 0.05
|
||||||
|
|
||||||
# Bonus multiplier for popularity (applied as: score * (1 + POPULARITY_BOOST * (popularity - 1)))
|
# Bonus multiplier for popularity (applied as: score * (1 + POPULARITY_BOOST * (popularity - 1)))
|
||||||
# This gives a small boost to popular items without dominating relevance
|
# This gives a small boost to popular items without dominating relevance
|
||||||
POPULARITY_BOOST = 0.02
|
POPULARITY_BOOST = 0.02
|
||||||
|
|||||||
@ -6,6 +6,7 @@ Uses a fast LLM (Haiku) to analyze natural language queries and extract:
|
|||||||
- Source hints: author names, domains, or specific sources
|
- Source hints: author names, domains, or specific sources
|
||||||
- Cleaned query: the actual search terms with meta-language removed
|
- Cleaned query: the actual search terms with meta-language removed
|
||||||
- Query variants: alternative phrasings to search
|
- Query variants: alternative phrasings to search
|
||||||
|
- Recalled content: specific titles/essays the LLM recalls that match the query
|
||||||
|
|
||||||
This runs in parallel with HyDE for maximum efficiency.
|
This runs in parallel with HyDE for maximum efficiency.
|
||||||
"""
|
"""
|
||||||
@ -206,13 +207,16 @@ def _build_prompt() -> str:
|
|||||||
"modalities": [], // From: {modality_names} (empty = search all)
|
"modalities": [], // From: {modality_names} (empty = search all)
|
||||||
"sources": [], // Specific sources/authors mentioned
|
"sources": [], // Specific sources/authors mentioned
|
||||||
"cleaned_query": "", // Query with meta-language removed
|
"cleaned_query": "", // Query with meta-language removed
|
||||||
"query_variants": [] // 1-3 alternative phrasings
|
"query_variants": [], // 1-3 alternative phrasings
|
||||||
|
"recalled_content": [] // Specific titles/essays/concepts you recall that match
|
||||||
}}
|
}}
|
||||||
|
|
||||||
Guidelines:
|
Guidelines:
|
||||||
- "on lesswrong" -> forum, "comic about" -> comic, etc.
|
- Only restrict modalities when VERY confident about content type
|
||||||
- Remove "there was something about", "I remember reading", etc.
|
- When unsure, return empty modalities to search all
|
||||||
- Generate useful query variants
|
- Remove meta-language like "there was something about", "I remember reading"
|
||||||
|
- For recalled_content: if you recognize the topic, suggest specific titles/essays
|
||||||
|
that might be relevant (e.g., "predetermined conclusions" -> "The Bottom Line")
|
||||||
|
|
||||||
Return ONLY valid JSON.
|
Return ONLY valid JSON.
|
||||||
""")
|
""")
|
||||||
@ -232,6 +236,7 @@ class QueryAnalysis:
|
|||||||
sources: list[str] = field(default_factory=list)
|
sources: list[str] = field(default_factory=list)
|
||||||
cleaned_query: str = ""
|
cleaned_query: str = ""
|
||||||
query_variants: list[str] = field(default_factory=list)
|
query_variants: list[str] = field(default_factory=list)
|
||||||
|
recalled_content: list[str] = field(default_factory=list) # Titles/essays LLM recalls
|
||||||
success: bool = False
|
success: bool = False
|
||||||
|
|
||||||
|
|
||||||
@ -300,12 +305,14 @@ async def analyze_query(
|
|||||||
result.sources = data.get("sources", [])
|
result.sources = data.get("sources", [])
|
||||||
result.cleaned_query = data.get("cleaned_query", query)
|
result.cleaned_query = data.get("cleaned_query", query)
|
||||||
result.query_variants = data.get("query_variants", [])
|
result.query_variants = data.get("query_variants", [])
|
||||||
|
result.recalled_content = data.get("recalled_content", [])
|
||||||
result.success = True
|
result.success = True
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Query analysis: '{query[:40]}...' -> "
|
f"Query analysis: '{query[:40]}...' -> "
|
||||||
f"modalities={result.modalities}, "
|
f"modalities={result.modalities}, "
|
||||||
f"cleaned='{result.cleaned_query[:30]}...'"
|
f"cleaned='{result.cleaned_query[:30]}...', "
|
||||||
|
f"recalled={result.recalled_content}"
|
||||||
)
|
)
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
|
|||||||
@ -19,6 +19,7 @@ from memory.api.search.constants import (
|
|||||||
RERANK_CANDIDATE_MULTIPLIER,
|
RERANK_CANDIDATE_MULTIPLIER,
|
||||||
QUERY_TERM_BOOST,
|
QUERY_TERM_BOOST,
|
||||||
TITLE_MATCH_BOOST,
|
TITLE_MATCH_BOOST,
|
||||||
|
RECALLED_TITLE_BOOST,
|
||||||
POPULARITY_BOOST,
|
POPULARITY_BOOST,
|
||||||
RECENCY_BOOST_MAX,
|
RECENCY_BOOST_MAX,
|
||||||
RECENCY_HALF_LIFE_DAYS,
|
RECENCY_HALF_LIFE_DAYS,
|
||||||
@ -93,17 +94,22 @@ def deduplicate_by_source(chunks: list[Chunk]) -> list[Chunk]:
|
|||||||
def apply_source_boosts(
|
def apply_source_boosts(
|
||||||
chunks: list[Chunk],
|
chunks: list[Chunk],
|
||||||
query_terms: set[str],
|
query_terms: set[str],
|
||||||
|
recalled_titles: list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Apply title, popularity, and recency boosts to chunks in a single DB query.
|
Apply title, popularity, and recency boosts to chunks in a single DB query.
|
||||||
|
|
||||||
- Title boost: chunks get boosted when query terms appear in source title
|
- Title boost: chunks get boosted when query terms appear in source title
|
||||||
|
- Recalled title boost: chunks get large boost if source title matches recalled content
|
||||||
- Popularity boost: chunks get boosted based on source karma/popularity
|
- Popularity boost: chunks get boosted based on source karma/popularity
|
||||||
- Recency boost: newer content gets a small boost that decays over time
|
- Recency boost: newer content gets a small boost that decays over time
|
||||||
"""
|
"""
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Normalize recalled titles for matching
|
||||||
|
recalled_lower = [t.lower() for t in (recalled_titles or [])]
|
||||||
|
|
||||||
source_ids = list({chunk.source_id for chunk in chunks})
|
source_ids = list({chunk.source_id for chunk in chunks})
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
|
|
||||||
@ -124,12 +130,18 @@ def apply_source_boosts(
|
|||||||
score = chunk.relevance_score or 0
|
score = chunk.relevance_score or 0
|
||||||
|
|
||||||
# Apply title boost if query terms match
|
# Apply title boost if query terms match
|
||||||
if query_terms:
|
title = source_data.get("title", "")
|
||||||
title = source_data.get("title", "")
|
if query_terms and title:
|
||||||
if title:
|
matches = sum(1 for term in query_terms if term in title)
|
||||||
matches = sum(1 for term in query_terms if term in title)
|
if matches > 0:
|
||||||
if matches > 0:
|
score += TITLE_MATCH_BOOST * (matches / len(query_terms))
|
||||||
score += TITLE_MATCH_BOOST * (matches / len(query_terms))
|
|
||||||
|
# Apply recalled title boost - large boost if title matches LLM-recalled content
|
||||||
|
if recalled_lower and title:
|
||||||
|
for recalled in recalled_lower:
|
||||||
|
if recalled in title or title in recalled:
|
||||||
|
score += RECALLED_TITLE_BOOST
|
||||||
|
break
|
||||||
|
|
||||||
# Apply popularity boost
|
# Apply popularity boost
|
||||||
popularity = source_data.get("popularity", 1.0)
|
popularity = source_data.get("popularity", 1.0)
|
||||||
@ -254,22 +266,23 @@ def _apply_query_analysis(
|
|||||||
query_text: str,
|
query_text: str,
|
||||||
data: list[extract.DataChunk],
|
data: list[extract.DataChunk],
|
||||||
modalities: set[str],
|
modalities: set[str],
|
||||||
) -> tuple[str, list[extract.DataChunk], set[str], list[str]]:
|
) -> tuple[str, list[extract.DataChunk], set[str], list[str], list[str]]:
|
||||||
"""
|
"""
|
||||||
Apply query analysis results to modify query, data, and modalities.
|
Apply query analysis results to modify query, data, and modalities.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(updated_query_text, updated_data, updated_modalities, query_variants)
|
(updated_query_text, updated_data, updated_modalities, query_variants, recalled_content)
|
||||||
"""
|
"""
|
||||||
query_variants: list[str] = []
|
query_variants: list[str] = []
|
||||||
|
recalled_content: list[str] = []
|
||||||
|
|
||||||
if not (analysis_result and analysis_result.success):
|
if not (analysis_result and analysis_result.success):
|
||||||
return query_text, data, modalities, query_variants
|
return query_text, data, modalities, query_variants, recalled_content
|
||||||
|
|
||||||
# Use detected modalities if any
|
# Log detected modalities but don't restrict - content may exist in multiple modalities
|
||||||
|
# (e.g., "the sequences" is both forum posts AND a book compilation)
|
||||||
if analysis_result.modalities:
|
if analysis_result.modalities:
|
||||||
modalities = analysis_result.modalities
|
logger.debug(f"Query analysis detected modalities: {analysis_result.modalities}")
|
||||||
logger.debug(f"Query analysis modalities: {modalities}")
|
|
||||||
|
|
||||||
# Use cleaned query
|
# Use cleaned query
|
||||||
if analysis_result.cleaned_query and analysis_result.cleaned_query != query_text:
|
if analysis_result.cleaned_query and analysis_result.cleaned_query != query_text:
|
||||||
@ -282,19 +295,25 @@ def _apply_query_analysis(
|
|||||||
# Collect query variants
|
# Collect query variants
|
||||||
query_variants.extend(analysis_result.query_variants)
|
query_variants.extend(analysis_result.query_variants)
|
||||||
|
|
||||||
return query_text, data, modalities, query_variants
|
# Collect recalled content (titles/essays the LLM remembers)
|
||||||
|
recalled_content.extend(analysis_result.recalled_content)
|
||||||
|
if recalled_content:
|
||||||
|
logger.debug(f"Query analysis recalled: {recalled_content}")
|
||||||
|
|
||||||
|
return query_text, data, modalities, query_variants, recalled_content
|
||||||
|
|
||||||
|
|
||||||
def _build_search_data(
|
def _build_search_data(
|
||||||
data: list[extract.DataChunk],
|
data: list[extract.DataChunk],
|
||||||
hyde_doc: str | None,
|
hyde_doc: str | None,
|
||||||
query_variants: list[str],
|
query_variants: list[str],
|
||||||
|
recalled_content: list[str],
|
||||||
query_text: str,
|
query_text: str,
|
||||||
) -> list[extract.DataChunk]:
|
) -> list[extract.DataChunk]:
|
||||||
"""
|
"""
|
||||||
Build the list of data chunks to search with.
|
Build the list of data chunks to search with.
|
||||||
|
|
||||||
Includes original query, HyDE expansion, and query variants.
|
Includes original query, HyDE expansion, query variants, and recalled content.
|
||||||
"""
|
"""
|
||||||
search_data = list(data)
|
search_data = list(data)
|
||||||
|
|
||||||
@ -307,9 +326,67 @@ def _build_search_data(
|
|||||||
for variant in query_variants[:3]:
|
for variant in query_variants[:3]:
|
||||||
search_data.append(extract.DataChunk(data=[variant]))
|
search_data.append(extract.DataChunk(data=[variant]))
|
||||||
|
|
||||||
|
# Add recalled content (titles/essays the LLM remembers that match the query)
|
||||||
|
for title in recalled_content[:3]:
|
||||||
|
search_data.append(extract.DataChunk(data=[title]))
|
||||||
|
|
||||||
return search_data
|
return search_data
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_chunks_by_title(
|
||||||
|
titles: list[str],
|
||||||
|
modalities: set[str],
|
||||||
|
limit_per_title: int = 5,
|
||||||
|
) -> dict[str, float]:
|
||||||
|
"""
|
||||||
|
Fetch chunks from sources whose titles match the given titles.
|
||||||
|
|
||||||
|
This ensures recalled content from LLM makes it into the candidate pool
|
||||||
|
even if BM25/embedding search doesn't rank it highly.
|
||||||
|
"""
|
||||||
|
if not titles or not modalities:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Normalize titles for matching
|
||||||
|
titles_lower = [t.lower() for t in titles[:5]]
|
||||||
|
|
||||||
|
with make_session() as db:
|
||||||
|
# Query sources in requested modalities
|
||||||
|
# We need to fetch the polymorphic models to get their title attributes
|
||||||
|
sources = (
|
||||||
|
db.query(SourceItem)
|
||||||
|
.filter(SourceItem.modality.in_(modalities))
|
||||||
|
.limit(500) # Reasonable limit for title scanning
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter sources whose titles match any of the recalled titles
|
||||||
|
matching_source_ids = []
|
||||||
|
for source in sources:
|
||||||
|
title = getattr(source, "title", None)
|
||||||
|
if title:
|
||||||
|
title_lower = title.lower()
|
||||||
|
for recalled in titles_lower:
|
||||||
|
if recalled in title_lower or title_lower in recalled:
|
||||||
|
matching_source_ids.append(source.id)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not matching_source_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Fetch chunks for matching sources
|
||||||
|
chunks = (
|
||||||
|
db.query(Chunk.id)
|
||||||
|
.filter(Chunk.source_id.in_(matching_source_ids[:limit_per_title * len(titles)]))
|
||||||
|
.limit(limit_per_title * len(matching_source_ids))
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Give these chunks a baseline score so they're included in fusion
|
||||||
|
# The actual boost will be applied later by apply_source_boosts
|
||||||
|
return {str(c.id): 0.5 for c in chunks}
|
||||||
|
|
||||||
|
|
||||||
async def _run_searches(
|
async def _run_searches(
|
||||||
search_data: list[extract.DataChunk],
|
search_data: list[extract.DataChunk],
|
||||||
data: list[extract.DataChunk],
|
data: list[extract.DataChunk],
|
||||||
@ -318,6 +395,7 @@ async def _run_searches(
|
|||||||
filters: SearchFilters,
|
filters: SearchFilters,
|
||||||
timeout: int,
|
timeout: int,
|
||||||
use_bm25: bool,
|
use_bm25: bool,
|
||||||
|
recalled_titles: list[str] | None = None,
|
||||||
) -> dict[str, float]:
|
) -> dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Run embedding and optionally BM25 searches in parallel, returning fused scores.
|
Run embedding and optionally BM25 searches in parallel, returning fused scores.
|
||||||
@ -329,9 +407,10 @@ async def _run_searches(
|
|||||||
|
|
||||||
if use_bm25:
|
if use_bm25:
|
||||||
# Run both searches in parallel
|
# Run both searches in parallel
|
||||||
|
# Note: BM25 uses search_data to include query variants and recalled content
|
||||||
results = await asyncio.gather(
|
results = await asyncio.gather(
|
||||||
embedding_task,
|
embedding_task,
|
||||||
search_bm25_chunks(data, modalities, internal_limit, filters, timeout),
|
search_bm25_chunks(search_data, modalities, internal_limit, filters, timeout),
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -347,7 +426,17 @@ async def _run_searches(
|
|||||||
bm25_scores = {}
|
bm25_scores = {}
|
||||||
|
|
||||||
# Fuse scores from both methods using Reciprocal Rank Fusion
|
# Fuse scores from both methods using Reciprocal Rank Fusion
|
||||||
return fuse_scores_rrf(embedding_scores, bm25_scores)
|
fused = fuse_scores_rrf(embedding_scores, bm25_scores)
|
||||||
|
|
||||||
|
# Add chunks from recalled titles (direct title match)
|
||||||
|
# This ensures LLM-recalled content makes it into the candidate pool
|
||||||
|
if recalled_titles:
|
||||||
|
title_chunks = _fetch_chunks_by_title(recalled_titles, modalities)
|
||||||
|
for chunk_id, score in title_chunks.items():
|
||||||
|
if chunk_id not in fused:
|
||||||
|
fused[chunk_id] = score
|
||||||
|
|
||||||
|
return fused
|
||||||
|
|
||||||
|
|
||||||
def _fetch_chunks(
|
def _fetch_chunks(
|
||||||
@ -391,6 +480,7 @@ def _fetch_chunks(
|
|||||||
def _apply_boosts(
|
def _apply_boosts(
|
||||||
chunks: list[Chunk],
|
chunks: list[Chunk],
|
||||||
data: list[extract.DataChunk],
|
data: list[extract.DataChunk],
|
||||||
|
recalled_content: list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Apply query term, title, popularity, and recency boosts to chunks.
|
Apply query term, title, popularity, and recency boosts to chunks.
|
||||||
@ -407,10 +497,10 @@ def _apply_boosts(
|
|||||||
query_terms = extract_query_terms(query_text)
|
query_terms = extract_query_terms(query_text)
|
||||||
apply_query_term_boost(chunks, query_terms)
|
apply_query_term_boost(chunks, query_terms)
|
||||||
# Apply title + popularity boosts (single DB query)
|
# Apply title + popularity boosts (single DB query)
|
||||||
apply_source_boosts(chunks, query_terms)
|
apply_source_boosts(chunks, query_terms, recalled_content)
|
||||||
else:
|
else:
|
||||||
# No query terms, just apply popularity boost
|
# No query terms, just apply popularity and recalled title boosts
|
||||||
apply_source_boosts(chunks, set())
|
apply_source_boosts(chunks, set(), recalled_content)
|
||||||
|
|
||||||
|
|
||||||
async def _apply_reranking(
|
async def _apply_reranking(
|
||||||
@ -486,23 +576,24 @@ async def search_chunks(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Apply query analysis results
|
# Apply query analysis results
|
||||||
query_text, data, modalities, query_variants = _apply_query_analysis(
|
query_text, data, modalities, query_variants, recalled_content = _apply_query_analysis(
|
||||||
analysis_result, query_text, data, modalities
|
analysis_result, query_text, data, modalities
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build search data with HyDE and variants
|
# Build search data with HyDE, variants, and recalled content
|
||||||
search_data = _build_search_data(data, hyde_doc, query_variants, query_text)
|
search_data = _build_search_data(data, hyde_doc, query_variants, recalled_content, query_text)
|
||||||
|
|
||||||
# Run searches and fuse scores
|
# Run searches and fuse scores
|
||||||
fused_scores = await _run_searches(
|
fused_scores = await _run_searches(
|
||||||
search_data, data, modalities, internal_limit, filters, timeout, use_bm25
|
search_data, data, modalities, internal_limit, filters, timeout, use_bm25,
|
||||||
|
recalled_titles=recalled_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fetch chunks from database
|
# Fetch chunks from database
|
||||||
chunks = _fetch_chunks(fused_scores, limit, use_reranking)
|
chunks = _fetch_chunks(fused_scores, limit, use_reranking)
|
||||||
|
|
||||||
# Apply various boosts
|
# Apply various boosts including recalled content title matching
|
||||||
_apply_boosts(chunks, data)
|
_apply_boosts(chunks, data, recalled_content)
|
||||||
|
|
||||||
# Apply reranking if enabled
|
# Apply reranking if enabled
|
||||||
chunks = await _apply_reranking(chunks, query_text, limit, use_reranking)
|
chunks = await _apply_reranking(chunks, query_text, limit, use_reranking)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user