diff --git a/src/memory/api/search/hyde.py b/src/memory/api/search/hyde.py
new file mode 100644
index 0000000..ed523a2
--- /dev/null
+++ b/src/memory/api/search/hyde.py
@@ -0,0 +1,151 @@
+"""
+HyDE (Hypothetical Document Embeddings) for query expansion.
+
+When users search with vague or conceptual queries like "that article about not using
+specific words", the query embedding may not match the actual document well. HyDE
+generates a hypothetical document that would answer the query, then embeds that
+instead. This bridges the gap between query terminology and document terminology.
+
+Example:
+- Query: "saying what you mean not using specific words"
+- HyDE generates: "An article discussing the technique of 'tabooing' words - replacing
+  specific terms with their definitions to clarify thinking and avoid confused debates..."
+- The hypothetical document embeds closer to the actual "Taboo Your Words" article.
+
+Reference: https://arxiv.org/abs/2212.10496
+"""
+
+import asyncio
+import logging
+from typing import Optional
+
+from memory.common import settings
+from memory.common.llms import create_provider, LLMSettings, Message
+
+logger = logging.getLogger(__name__)
+
+# System prompt for generating hypothetical documents
+HYDE_SYSTEM_PROMPT = """You are a search assistant helping to find documents in a knowledge base.
+Given a user's search query, write a short passage (2-3 sentences) that would appear in a
+document that answers their query. Write as if you are excerpting from an actual article.
+
+Do NOT:
+- Ask clarifying questions
+- Say "I don't know" or "I'm not sure"
+- Include meta-commentary like "This article discusses..."
+- Use phrases like "The document might say..."
+
+DO:
+- Write in the style of the target document (article, blog post, book excerpt)
+- Use specific terminology that would appear in such a document
+- Be concise and direct
+- Include key concepts and vocabulary related to the query"""
+
+# Cache for recent HyDE expansions (simple in-memory cache)
+_hyde_cache: dict[str, str] = {}
+_CACHE_MAX_SIZE = 100
+
+
+async def expand_query_hyde(
+    query: str,
+    model: Optional[str] = None,
+    timeout: float = 5.0,
+) -> Optional[str]:
+    """
+    Expand a query using HyDE (Hypothetical Document Embeddings).
+
+    Generates a hypothetical document passage that would answer the query,
+    which can then be embedded for better semantic matching.
+
+    Args:
+        query: The user's search query
+        model: LLM model to use (defaults to SUMMARIZER_MODEL)
+        timeout: Maximum time to wait for LLM response
+
+    Returns:
+        A hypothetical document passage, or None if generation fails/times out
+    """
+    # Check cache first
+    cache_key = query.lower().strip()
+    if cache_key in _hyde_cache:
+        logger.debug(f"HyDE cache hit for: {query[:50]}...")
+        return _hyde_cache[cache_key]
+
+    try:
+        provider = create_provider(model=model or settings.SUMMARIZER_MODEL)
+
+        messages = [
+            Message.user(text=f"Search query: {query}")
+        ]
+
+        llm_settings = LLMSettings(
+            temperature=0.3,  # Lower temperature for more focused output
+            max_tokens=200,   # Short passages only
+        )
+
+        # Run with timeout
+        hypothetical_doc = await asyncio.wait_for(
+            provider.agenerate(
+                messages=messages,
+                system_prompt=HYDE_SYSTEM_PROMPT,
+                settings=llm_settings,
+            ),
+            timeout=timeout,
+        )
+
+        if hypothetical_doc:
+            hypothetical_doc = hypothetical_doc.strip()
+
+            # Cache the result
+            if len(_hyde_cache) >= _CACHE_MAX_SIZE:
+                # Simple eviction: clear half the cache
+                keys_to_remove = list(_hyde_cache.keys())[:_CACHE_MAX_SIZE // 2]
+                for key in keys_to_remove:
+                    del _hyde_cache[key]
+            _hyde_cache[cache_key] = hypothetical_doc
+
+            logger.debug(f"HyDE expansion: '{query[:30]}...' -> '{hypothetical_doc[:50]}...'")
+            return hypothetical_doc
+
+    except asyncio.TimeoutError:
+        logger.warning(f"HyDE expansion timed out for: {query[:50]}...")
+    except Exception as e:
+        logger.error(f"HyDE expansion failed: {e}")
+
+    return None
+
+
+async def get_hyde_chunks(
+    query: str,
+    model: Optional[str] = None,
+    timeout: float = 5.0,
+) -> list[str]:
+    """
+    Get both original query and HyDE-expanded version for embedding.
+
+    Returns a list containing:
+    1. The original query (always)
+    2. The HyDE-expanded hypothetical document (if generation succeeds)
+
+    This allows the search to match on both the literal query terms
+    and the expanded semantic meaning.
+
+    Args:
+        query: The user's search query
+        model: LLM model to use for HyDE expansion
+        timeout: Maximum time to wait for HyDE generation
+
+    Returns:
+        List of strings to embed (original query + optional HyDE expansion)
+    """
+    chunks = [query]
+
+    # Only expand queries that are vague/conceptual (more than a few words)
+    # Short specific queries like "Taboo Your Words" don't need expansion
+    word_count = len(query.split())
+    if word_count >= 4:
+        hyde_doc = await expand_query_hyde(query, model, timeout)
+        if hyde_doc:
+            chunks.append(hyde_doc)
+
+    return chunks
diff --git a/src/memory/api/search/search.py b/src/memory/api/search/search.py
index 00dcb8f..ba7f796 100644
--- a/src/memory/api/search/search.py
+++ b/src/memory/api/search/search.py
@@ -17,6 +17,9 @@ from memory.api.search import scorer
 if settings.ENABLE_BM25_SEARCH:
     from memory.api.search.bm25 import search_bm25_chunks
 
+if settings.ENABLE_HYDE_EXPANSION:
+    from memory.api.search.hyde import expand_query_hyde
+
 from memory.api.search.types import SearchConfig, SearchFilters, SearchResult
 
 logger = logging.getLogger(__name__)
@@ -87,14 +90,35 @@ async def search_chunks(
 
     Combines results using weighted score fusion, giving bonus to documents
     that match both semantically and lexically.
+
+    If HyDE is enabled, also generates a hypothetical document from the query
+    and includes it in the embedding search for better semantic matching.
     """
     # Search for more candidates than requested, fuse scores, then return top N
     # This helps find results that rank well in one method but not the other
     internal_limit = limit * CANDIDATE_MULTIPLIER
 
+    # Extract query text for HyDE expansion
+    search_data = list(data)  # Copy to avoid modifying original
+    if settings.ENABLE_HYDE_EXPANSION:
+        query_text = " ".join(
+            c for chunk in data for c in chunk.data if isinstance(c, str)
+        )
+        # Only expand queries with 4+ words (short queries are usually specific enough)
+        if len(query_text.split()) >= 4:
+            try:
+                hyde_doc = await expand_query_hyde(
+                    query_text, timeout=settings.HYDE_TIMEOUT
+                )
+                if hyde_doc:
+                    logger.debug(f"HyDE expansion: '{query_text[:30]}...' -> '{hyde_doc[:50]}...'")
+                    search_data.append(extract.DataChunk(data=[hyde_doc]))
+            except Exception as e:
+                logger.warning(f"HyDE expansion failed, using original query: {e}")
+
     # Run embedding search
     embedding_scores = await search_chunks_embeddings(
-        data, modalities, internal_limit, filters, timeout
+        search_data, modalities, internal_limit, filters, timeout
     )
 
     # Run BM25 search if enabled
diff --git a/src/memory/common/settings.py b/src/memory/common/settings.py
index dce52f0..0b4f457 100644
--- a/src/memory/common/settings.py
+++ b/src/memory/common/settings.py
@@ -176,6 +176,8 @@ LLM_USAGE_REDIS_PREFIX = os.getenv("LLM_USAGE_REDIS_PREFIX", "llm_usage")
 ENABLE_EMBEDDING_SEARCH = boolean_env("ENABLE_EMBEDDING_SEARCH", True)
 ENABLE_BM25_SEARCH = boolean_env("ENABLE_BM25_SEARCH", True)
 ENABLE_SEARCH_SCORING = boolean_env("ENABLE_SEARCH_SCORING", True)
+ENABLE_HYDE_EXPANSION = boolean_env("ENABLE_HYDE_EXPANSION", True)
+HYDE_TIMEOUT = float(os.getenv("HYDE_TIMEOUT", "3.0"))
 MAX_PREVIEW_LENGTH = int(os.getenv("MAX_PREVIEW_LENGTH", DEFAULT_CHUNK_TOKENS * 16))
 MAX_NON_PREVIEW_LENGTH = int(os.getenv("MAX_NON_PREVIEW_LENGTH", 2000))