tweaks for search scoring

This commit is contained in:
EC2 Default User 2025-06-28 19:22:03 +00:00
parent 8eb6374cac
commit 96c2f22b16
3 changed files with 21 additions and 10 deletions

View File

@ -156,7 +156,9 @@ services:
STATIC_DIR: "/app/static" STATIC_DIR: "/app/static"
VOYAGE_API_KEY: ${VOYAGE_API_KEY} VOYAGE_API_KEY: ${VOYAGE_API_KEY}
ENABLE_BM25_SEARCH: false ENABLE_BM25_SEARCH: false
secrets: [postgres_password] OPENAI_API_KEY_FILE: /run/secrets/openai_key
ANTHROPIC_API_KEY_FILE: /run/secrets/anthropic_key
secrets: [postgres_password, openai_key, anthropic_key]
volumes: volumes:
- ./memory_files:/app/memory_files:rw - ./memory_files:/app/memory_files:rw
healthcheck: healthcheck:

View File

@ -29,10 +29,16 @@ Please always return a summary of any images provided.
async def score_chunk(query: str, chunk: Chunk) -> Chunk: async def score_chunk(query: str, chunk: Chunk) -> Chunk:
try:
data = chunk.data data = chunk.data
except Exception as e:
print(f"Error getting chunk data: {e}, {type(e)}")
return chunk
chunk_text = "\n".join(text for text in data if isinstance(text, str)) chunk_text = "\n".join(text for text in data if isinstance(text, str))
images = [image for image in data if isinstance(image, Image.Image)] images = [image for image in data if isinstance(image, Image.Image)]
prompt = SCORE_CHUNK_PROMPT.format(query=query, chunk=chunk_text) prompt = SCORE_CHUNK_PROMPT.format(query=query, chunk=chunk_text)
try:
response = await asyncio.to_thread( response = await asyncio.to_thread(
llms.call, llms.call,
prompt, prompt,
@ -40,6 +46,9 @@ async def score_chunk(query: str, chunk: Chunk) -> Chunk:
images=images, images=images,
system_prompt=SCORE_CHUNK_SYSTEM_PROMPT, system_prompt=SCORE_CHUNK_SYSTEM_PROMPT,
) )
except Exception as e:
print(f"Error scoring chunk: {e}, {type(e)}")
return chunk
soup = BeautifulSoup(response, "html.parser") soup = BeautifulSoup(response, "html.parser")
if not (score := soup.find("score")): if not (score := soup.find("score")):

View File

@ -77,7 +77,7 @@ async def search(
modalities: set[str] = set(), modalities: set[str] = set(),
limit: int = 10, limit: int = 10,
filters: SearchFilters = {}, filters: SearchFilters = {},
timeout: int = 2, timeout: int = 20,
) -> list[SearchResult]: ) -> list[SearchResult]:
""" """
Search across knowledge base using text query and optional files. Search across knowledge base using text query and optional files.