mirror of
https://github.com/mruwnik/memory.git
synced 2026-01-02 17:22:58 +01:00
- Include 2-letter terms (AI, ML) in query term extraction (was > 2, now >= 2) - Add guard for empty data before accessing data[0].data[0] in scorer - Preserve chunks without content in reranking instead of silently dropping - Remove legacy wrapper functions (apply_title_boost, apply_popularity_boost) - Update tests to use apply_source_boosts directly 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
584 lines
21 KiB
Python
584 lines
21 KiB
Python
"""
|
|
Tests for search module functions including RRF fusion, query term boosting,
|
|
title boosting, and source deduplication.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
from memory.api.search.search import (
|
|
extract_query_terms,
|
|
apply_query_term_boost,
|
|
deduplicate_by_source,
|
|
apply_source_boosts,
|
|
fuse_scores_rrf,
|
|
)
|
|
from memory.api.search.constants import (
|
|
STOPWORDS,
|
|
QUERY_TERM_BOOST,
|
|
TITLE_MATCH_BOOST,
|
|
POPULARITY_BOOST,
|
|
RECENCY_BOOST_MAX,
|
|
RECENCY_HALF_LIFE_DAYS,
|
|
RRF_K,
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# extract_query_terms tests
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"query,expected",
|
|
[
|
|
("machine learning algorithms", {"machine", "learning", "algorithms"}),
|
|
("MACHINE Learning ALGORITHMS", {"machine", "learning", "algorithms"}),
|
|
("", set()),
|
|
("the is a an of to", set()), # Only stopwords
|
|
],
|
|
)
|
|
def test_extract_query_terms_basic(query, expected):
|
|
"""Should extract meaningful terms, lowercase them, and filter stopwords."""
|
|
assert extract_query_terms(query) == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"query,must_include,must_exclude",
|
|
[
|
|
(
|
|
"the quick brown fox jumps with the lazy dog",
|
|
{"quick", "brown", "jumps", "lazy", "fox", "dog"},
|
|
{"the", "with"},
|
|
),
|
|
(
|
|
"what is the best approach for neural networks",
|
|
{"best", "approach", "neural", "networks"},
|
|
{"what", "the", "for"},
|
|
),
|
|
],
|
|
)
|
|
def test_extract_query_terms_filtering(query, must_include, must_exclude):
|
|
"""Should filter stopwords while keeping meaningful terms."""
|
|
terms = extract_query_terms(query)
|
|
for term in must_include:
|
|
assert term in terms, f"'{term}' should be in terms"
|
|
for term in must_exclude:
|
|
assert term not in terms, f"'{term}' should not be in terms"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"query,included,excluded",
|
|
[
|
|
# 2-letter terms like "ai", "ml" should be INCLUDED (important acronyms)
|
|
# 1-letter words like "a" and stopwords like "is" should be excluded
|
|
("AI is a new ML model", {"ai", "ml", "new", "model"}, {"is", "a"}),
|
|
],
|
|
)
|
|
def test_extract_query_terms_short_words(query, included, excluded):
|
|
"""Should include 2-letter words but filter 1-letter words and stopwords."""
|
|
terms = extract_query_terms(query)
|
|
for term in included:
|
|
assert term in terms, f"'{term}' should be in terms"
|
|
for term in excluded:
|
|
assert term not in terms, f"'{term}' should not be in terms"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"word",
|
|
["the", "is", "are", "was", "were", "be", "been", "have", "has", "had",
|
|
"do", "does", "did", "to", "of", "in", "for", "on", "with", "at", "by"],
|
|
)
|
|
def test_common_stopwords_in_set(word):
|
|
"""Verify common stopwords are in the STOPWORDS set."""
|
|
assert word in STOPWORDS
|
|
|
|
|
|
# ============================================================================
|
|
# apply_query_term_boost tests
|
|
# ============================================================================
|
|
|
|
|
|
def _make_chunk(content: str, source_id: int = 1, score: float = 0.5):
|
|
"""Create a mock chunk with given content and score."""
|
|
chunk = MagicMock()
|
|
chunk.content = content
|
|
chunk.source_id = source_id
|
|
chunk.relevance_score = score
|
|
return chunk
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"content,query_terms,initial_score,expected_boost_fraction",
|
|
[
|
|
("machine learning is powerful", {"machine", "learning"}, 0.5, 1.0), # Both match
|
|
("machine vision systems", {"machine", "learning"}, 0.5, 0.5), # One of two
|
|
("deep neural networks", {"machine", "learning"}, 0.5, 0.0), # No match
|
|
("MACHINE Learning AlGoRiThMs", {"machine", "learning", "algorithms"}, 0.5, 1.0), # Case insensitive
|
|
],
|
|
)
|
|
def test_apply_query_term_boost(content, query_terms, initial_score, expected_boost_fraction):
|
|
"""Should boost chunks based on query term matches."""
|
|
chunks = [_make_chunk(content, score=initial_score)]
|
|
apply_query_term_boost(chunks, query_terms)
|
|
expected = initial_score + QUERY_TERM_BOOST * expected_boost_fraction
|
|
assert chunks[0].relevance_score == pytest.approx(expected)
|
|
|
|
|
|
def test_apply_query_term_boost_empty_inputs():
|
|
"""Should handle empty query_terms or chunks."""
|
|
chunks = [_make_chunk("machine learning", score=0.5)]
|
|
apply_query_term_boost(chunks, set())
|
|
assert chunks[0].relevance_score == 0.5
|
|
|
|
apply_query_term_boost([], {"machine"}) # Should not raise
|
|
|
|
|
|
def test_apply_query_term_boost_none_values():
|
|
"""Should handle None content and relevance_score."""
|
|
chunk_none_content = MagicMock()
|
|
chunk_none_content.content = None
|
|
chunk_none_content.relevance_score = 0.5
|
|
apply_query_term_boost([chunk_none_content], {"machine"})
|
|
assert chunk_none_content.relevance_score == 0.5
|
|
|
|
chunk_none_score = MagicMock()
|
|
chunk_none_score.content = "machine learning"
|
|
chunk_none_score.relevance_score = None
|
|
apply_query_term_boost([chunk_none_score], {"machine", "learning"})
|
|
assert chunk_none_score.relevance_score == pytest.approx(QUERY_TERM_BOOST)
|
|
|
|
|
|
def test_apply_query_term_boost_multiple_chunks():
|
|
"""Should boost each chunk independently."""
|
|
chunks = [
|
|
_make_chunk("machine learning", score=0.5),
|
|
_make_chunk("deep networks", score=0.6),
|
|
_make_chunk("machine vision", score=0.4),
|
|
]
|
|
query_terms = {"machine", "learning"}
|
|
apply_query_term_boost(chunks, query_terms)
|
|
|
|
assert chunks[0].relevance_score == pytest.approx(0.5 + QUERY_TERM_BOOST)
|
|
assert chunks[1].relevance_score == 0.6 # No match
|
|
assert chunks[2].relevance_score == pytest.approx(0.4 + QUERY_TERM_BOOST * 0.5)
|
|
|
|
|
|
# ============================================================================
|
|
# deduplicate_by_source tests
|
|
# ============================================================================
|
|
|
|
|
|
def _make_source_chunk(source_id: int, score: float):
|
|
"""Create a mock chunk with given source_id and score."""
|
|
chunk = MagicMock()
|
|
chunk.source_id = source_id
|
|
chunk.relevance_score = score
|
|
return chunk
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"chunks_data,expected_count,expected_scores",
|
|
[
|
|
# Multiple chunks per source - keep highest
|
|
([(1, 0.5), (1, 0.8), (1, 0.3), (2, 0.6)], 2, {1: 0.8, 2: 0.6}),
|
|
# Single chunk per source - keep all
|
|
([(1, 0.5), (2, 0.6), (3, 0.7)], 3, {1: 0.5, 2: 0.6, 3: 0.7}),
|
|
# Empty list
|
|
([], 0, {}),
|
|
],
|
|
)
|
|
def test_deduplicate_by_source(chunks_data, expected_count, expected_scores):
|
|
"""Should keep only highest scoring chunk per source."""
|
|
chunks = [_make_source_chunk(sid, score) for sid, score in chunks_data]
|
|
result = deduplicate_by_source(chunks)
|
|
|
|
assert len(result) == expected_count
|
|
for chunk in result:
|
|
assert chunk.relevance_score == expected_scores[chunk.source_id]
|
|
|
|
|
|
def test_deduplicate_by_source_preserves_objects():
|
|
"""Should return the actual chunk objects, not copies."""
|
|
chunk1 = _make_source_chunk(1, 0.5)
|
|
chunk2 = _make_source_chunk(1, 0.8)
|
|
result = deduplicate_by_source([chunk1, chunk2])
|
|
assert result[0] is chunk2
|
|
|
|
|
|
def test_deduplicate_by_source_none_scores():
|
|
"""Should handle None relevance_score as 0."""
|
|
chunk1 = _make_source_chunk(1, None)
|
|
chunk2 = _make_source_chunk(1, 0.5)
|
|
result = deduplicate_by_source([chunk1, chunk2])
|
|
assert result[0].relevance_score == 0.5
|
|
|
|
|
|
# ============================================================================
|
|
# apply_source_boosts tests (title + popularity + recency)
|
|
# ============================================================================
|
|
|
|
|
|
def _make_boost_chunk(source_id: int, score: float = 0.5):
|
|
"""Create a mock chunk for boost tests."""
|
|
chunk = MagicMock()
|
|
chunk.source_id = source_id
|
|
chunk.relevance_score = score
|
|
return chunk
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"title,query_terms,initial_score,expected_boost_fraction",
|
|
[
|
|
("Machine Learning Tutorial", {"machine", "learning"}, 0.5, 1.0),
|
|
("Machine Vision Systems", {"machine", "learning"}, 0.5, 0.5),
|
|
("Deep Neural Networks", {"machine", "learning"}, 0.5, 0.0),
|
|
("MACHINE LEARNING Tutorial", {"machine", "learning"}, 0.5, 1.0), # Case insensitive
|
|
],
|
|
)
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_apply_source_boosts_title(mock_make_session, title, query_terms, initial_score, expected_boost_fraction):
|
|
"""Should boost chunks when title matches query terms."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = title
|
|
mock_source.popularity = 1.0 # Default popularity, no boost
|
|
mock_source.inserted_at = None # No recency boost
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_boost_chunk(1, initial_score)]
|
|
apply_source_boosts(chunks, query_terms)
|
|
|
|
expected = initial_score + TITLE_MATCH_BOOST * expected_boost_fraction
|
|
assert chunks[0].relevance_score == pytest.approx(expected)
|
|
|
|
|
|
def test_apply_source_boosts_empty_inputs():
|
|
"""Should not modify chunks if query_terms or chunks is empty."""
|
|
chunks = [_make_boost_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
assert chunks[0].relevance_score == 0.5
|
|
|
|
apply_source_boosts([], {"machine"}) # Should not raise
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_apply_source_boosts_none_title(mock_make_session):
|
|
"""Should handle sources with None or missing title."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
# Source with None title
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0 # Default popularity, no boost
|
|
mock_source.inserted_at = None # No recency boost
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_boost_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, {"machine"})
|
|
assert chunks[0].relevance_score == 0.5
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"popularity,initial_score,expected_multiplier",
|
|
[
|
|
(1.0, 0.5, 1.0), # Default popularity, no change
|
|
(2.0, 0.5, 1.0 + POPULARITY_BOOST), # High popularity
|
|
(0.5, 0.5, 1.0 - POPULARITY_BOOST * 0.5), # Low popularity
|
|
(1.5, 1.0, 1.0 + POPULARITY_BOOST * 0.5), # Moderate popularity
|
|
],
|
|
)
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_apply_source_boosts_popularity(mock_make_session, popularity, initial_score, expected_multiplier):
|
|
"""Should boost chunks based on source popularity."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.popularity = popularity
|
|
mock_source.inserted_at = None # No recency boost
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_boost_chunk(1, initial_score)]
|
|
apply_source_boosts(chunks, set()) # No query terms, just popularity
|
|
|
|
expected = initial_score * expected_multiplier
|
|
assert chunks[0].relevance_score == pytest.approx(expected)
|
|
|
|
|
|
def test_apply_source_boosts_empty_chunks():
|
|
"""Should handle empty chunks list."""
|
|
apply_source_boosts([], set()) # Should not raise
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_apply_source_boosts_multiple_sources(mock_make_session):
|
|
"""Should apply different boosts per source."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
source1 = MagicMock()
|
|
source1.id = 1
|
|
source1.popularity = 2.0 # High karma
|
|
source1.inserted_at = None # No recency boost
|
|
source2 = MagicMock()
|
|
source2.id = 2
|
|
source2.popularity = 1.0 # Default
|
|
source2.inserted_at = None # No recency boost
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [source1, source2]
|
|
|
|
chunks = [_make_boost_chunk(1, 0.5), _make_boost_chunk(2, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# Source 1 should be boosted
|
|
assert chunks[0].relevance_score == pytest.approx(0.5 * (1.0 + POPULARITY_BOOST))
|
|
# Source 2 should be unchanged (popularity = 1.0)
|
|
assert chunks[1].relevance_score == 0.5
|
|
|
|
|
|
# ============================================================================
|
|
# fuse_scores_rrf tests
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"embedding_scores,bm25_scores,expected_key,expected_score",
|
|
[
|
|
# Both sources have same ranking
|
|
({"a": 0.9, "b": 0.7}, {"a": 0.8, "b": 0.6}, "a", 2 / (RRF_K + 1)),
|
|
# Item only in embeddings
|
|
({"a": 0.9, "b": 0.7}, {"a": 0.8}, "b", 1 / (RRF_K + 2)),
|
|
# Item only in BM25
|
|
({"a": 0.9}, {"a": 0.8, "b": 0.7}, "b", 1 / (RRF_K + 2)),
|
|
# Single item in both
|
|
({"a": 0.9}, {"a": 0.8}, "a", 2 / (RRF_K + 1)),
|
|
],
|
|
)
|
|
def test_fuse_scores_rrf_basic(embedding_scores, bm25_scores, expected_key, expected_score):
|
|
"""Should compute RRF scores correctly."""
|
|
result = fuse_scores_rrf(embedding_scores, bm25_scores)
|
|
assert result[expected_key] == pytest.approx(expected_score)
|
|
|
|
|
|
def test_fuse_scores_rrf_different_rankings():
|
|
"""Should handle items ranked differently in each source."""
|
|
embedding_scores = {"a": 0.9, "b": 0.5} # a=1, b=2
|
|
bm25_scores = {"a": 0.3, "b": 0.8} # b=1, a=2
|
|
|
|
result = fuse_scores_rrf(embedding_scores, bm25_scores)
|
|
|
|
# Both should have same RRF score (1/61 + 1/62)
|
|
expected = 1 / (RRF_K + 1) + 1 / (RRF_K + 2)
|
|
assert result["a"] == pytest.approx(expected)
|
|
assert result["b"] == pytest.approx(expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"embedding_scores,bm25_scores,expected_len",
|
|
[
|
|
({}, {}, 0),
|
|
({}, {"a": 0.8, "b": 0.6}, 2),
|
|
({"a": 0.9, "b": 0.7}, {}, 2),
|
|
],
|
|
)
|
|
def test_fuse_scores_rrf_empty_inputs(embedding_scores, bm25_scores, expected_len):
|
|
"""Should handle empty inputs gracefully."""
|
|
result = fuse_scores_rrf(embedding_scores, bm25_scores)
|
|
assert len(result) == expected_len
|
|
|
|
|
|
def test_fuse_scores_rrf_many_items():
|
|
"""Should handle many items correctly."""
|
|
embedding_scores = {str(i): 1.0 - i * 0.01 for i in range(100)}
|
|
bm25_scores = {str(i): 1.0 - i * 0.01 for i in range(100)}
|
|
|
|
result = fuse_scores_rrf(embedding_scores, bm25_scores)
|
|
|
|
assert len(result) == 100
|
|
assert result["0"] > result["99"] # First should have highest score
|
|
|
|
|
|
def test_fuse_scores_rrf_only_ranks_matter():
|
|
"""RRF should only care about ranks, not score magnitudes."""
|
|
# Same ranking, different score scales
|
|
result1 = fuse_scores_rrf(
|
|
{"a": 0.99, "b": 0.98, "c": 0.97},
|
|
{"a": 100, "b": 50, "c": 1},
|
|
)
|
|
result2 = fuse_scores_rrf(
|
|
{"a": 0.5, "b": 0.4, "c": 0.3},
|
|
{"a": 0.9, "b": 0.8, "c": 0.7},
|
|
)
|
|
|
|
# RRF scores should be identical since rankings are the same
|
|
assert result1["a"] == pytest.approx(result2["a"])
|
|
assert result1["b"] == pytest.approx(result2["b"])
|
|
assert result1["c"] == pytest.approx(result2["c"])
|
|
|
|
|
|
# ============================================================================
|
|
# apply_source_boosts recency tests
|
|
# ============================================================================
|
|
|
|
|
|
def _make_recency_chunk(source_id: int, score: float = 0.5):
|
|
"""Create a mock chunk for recency boost tests."""
|
|
chunk = MagicMock()
|
|
chunk.source_id = source_id
|
|
chunk.relevance_score = score
|
|
return chunk
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_new_content(mock_make_session):
|
|
"""Brand new content should get full recency boost."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
now = datetime.now(timezone.utc)
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0
|
|
mock_source.inserted_at = now # Just inserted
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# Should get nearly full recency boost
|
|
expected = 0.5 + RECENCY_BOOST_MAX
|
|
assert chunks[0].relevance_score == pytest.approx(expected, rel=0.01)
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_half_life_decay(mock_make_session):
|
|
"""Content at half-life age should get half the boost."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
now = datetime.now(timezone.utc)
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0
|
|
mock_source.inserted_at = now - timedelta(days=RECENCY_HALF_LIFE_DAYS)
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# Should get half the recency boost
|
|
expected = 0.5 + RECENCY_BOOST_MAX * 0.5
|
|
assert chunks[0].relevance_score == pytest.approx(expected, rel=0.01)
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_old_content(mock_make_session):
|
|
"""Very old content should get minimal recency boost."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
now = datetime.now(timezone.utc)
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0
|
|
mock_source.inserted_at = now - timedelta(days=365) # 1 year old
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# Should get very little boost (about 0.5^4 ≈ 0.0625 of max)
|
|
assert chunks[0].relevance_score > 0.5
|
|
assert chunks[0].relevance_score < 0.5 + RECENCY_BOOST_MAX * 0.1
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_none_timestamp(mock_make_session):
|
|
"""Should handle None inserted_at gracefully."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0
|
|
mock_source.inserted_at = None
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# No recency boost applied
|
|
assert chunks[0].relevance_score == 0.5
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_timezone_naive(mock_make_session):
|
|
"""Should handle timezone-naive timestamps."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
# Timezone-naive timestamp
|
|
naive_dt = datetime.now().replace(tzinfo=None)
|
|
mock_source = MagicMock()
|
|
mock_source.id = 1
|
|
mock_source.title = None
|
|
mock_source.popularity = 1.0
|
|
mock_source.inserted_at = naive_dt
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [mock_source]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5)]
|
|
apply_source_boosts(chunks, set()) # Should not raise
|
|
|
|
# Should get nearly full boost since it's very recent
|
|
assert chunks[0].relevance_score > 0.5
|
|
|
|
|
|
@patch("memory.api.search.search.make_session")
|
|
def test_recency_boost_ordering(mock_make_session):
|
|
"""Newer content should rank higher than older content."""
|
|
mock_session = MagicMock()
|
|
mock_make_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
|
mock_make_session.return_value.__exit__ = MagicMock(return_value=None)
|
|
|
|
now = datetime.now(timezone.utc)
|
|
source_new = MagicMock()
|
|
source_new.id = 1
|
|
source_new.title = None
|
|
source_new.popularity = 1.0
|
|
source_new.inserted_at = now - timedelta(days=1)
|
|
|
|
source_old = MagicMock()
|
|
source_old.id = 2
|
|
source_old.title = None
|
|
source_old.popularity = 1.0
|
|
source_old.inserted_at = now - timedelta(days=180)
|
|
|
|
mock_session.query.return_value.filter.return_value.all.return_value = [source_new, source_old]
|
|
|
|
chunks = [_make_recency_chunk(1, 0.5), _make_recency_chunk(2, 0.5)]
|
|
apply_source_boosts(chunks, set())
|
|
|
|
# Newer content should have higher score
|
|
assert chunks[0].relevance_score > chunks[1].relevance_score
|