mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-28 23:24:43 +02:00
better search
This commit is contained in:
parent
5e836337e2
commit
8855e8715a
@ -1,6 +1,7 @@
|
||||
import { useState, useEffect } from 'react'
|
||||
import ReactMarkdown from 'react-markdown'
|
||||
import { useMCP } from '@/hooks/useMCP'
|
||||
import { SERVER_URL } from '@/hooks/useAuth'
|
||||
|
||||
export type SearchItem = {
|
||||
filename: string
|
||||
@ -24,10 +25,10 @@ export const Tag = ({ tags }: { tags: string[] }) => {
|
||||
export const TextResult = ({ filename, content, chunks, tags, metadata }: SearchItem) => {
|
||||
return (
|
||||
<div className="search-result-card">
|
||||
<h4>{filename || 'Untitled'}</h4>
|
||||
<h4>{filename || metadata?.title || metadata?.url || 'Untitled'}</h4>
|
||||
<Tag tags={tags} />
|
||||
<Metadata metadata={metadata} />
|
||||
<p className="result-content">{content || 'No content available'}</p>
|
||||
{content && <p className="result-content">{content}</p>}
|
||||
{chunks && chunks.length > 0 && (
|
||||
<details className="result-chunks">
|
||||
<summary>Relevant sections:</summary>
|
||||
@ -46,12 +47,14 @@ export const TextResult = ({ filename, content, chunks, tags, metadata }: Search
|
||||
export const MarkdownResult = ({ filename, content, chunks, tags, metadata }: SearchItem) => {
|
||||
return (
|
||||
<div className="search-result-card">
|
||||
<h4>{filename || 'Untitled'}</h4>
|
||||
<h4>{filename || metadata?.title || metadata?.url || 'Untitled'}</h4>
|
||||
<Tag tags={tags} />
|
||||
<Metadata metadata={metadata} />
|
||||
{content && (
|
||||
<div className="markdown-content">
|
||||
<ReactMarkdown>{content || 'No content available'}</ReactMarkdown>
|
||||
<ReactMarkdown>{content}</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
{chunks && chunks.length > 0 && (
|
||||
<details className="result-chunks">
|
||||
<summary>Relevant sections:</summary>
|
||||
@ -76,7 +79,7 @@ export const ImageResult = ({ filename, tags, metadata }: SearchItem) => {
|
||||
const [content, setContent] = useState<string>()
|
||||
useEffect(() => {
|
||||
const fetchImage = async () => {
|
||||
const files = await fetchFile(filename.replace('/app/memory_files/', ''))
|
||||
const files = await fetchFile(filename)
|
||||
const {mime_type, content} = files[0]
|
||||
setMimeType(mime_type)
|
||||
setContent(content)
|
||||
@ -94,13 +97,26 @@ export const ImageResult = ({ filename, tags, metadata }: SearchItem) => {
|
||||
)
|
||||
}
|
||||
|
||||
const MetadataItem = ({ item, value }: { item: string, value: any }) => {
|
||||
if (item === "url") {
|
||||
return <li><a href={value}>{value}</a></li>
|
||||
}
|
||||
if (item === "filename") {
|
||||
return <li><a href={`${SERVER_URL}/files/${value}`}>{value}</a></li>
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
return <li>{item}: {value}</li>
|
||||
}
|
||||
return <li>{item}: {JSON.stringify(value)}</li>
|
||||
}
|
||||
|
||||
export const Metadata = ({ metadata }: { metadata: any }) => {
|
||||
if (!metadata) return null
|
||||
return (
|
||||
<div className="metadata">
|
||||
<ul>
|
||||
{Object.entries(metadata).map(([key, value]) => (
|
||||
<li key={key}>{key}: {typeof value === 'string' ? value : JSON.stringify(value)}</li>
|
||||
<MetadataItem key={key} item={key} value={value} />
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
@ -112,7 +128,7 @@ export const PDFResult = ({ filename, content, tags, metadata }: SearchItem) =>
|
||||
<div className="search-result-card">
|
||||
<h4>{filename || 'Untitled'}</h4>
|
||||
<Tag tags={tags} />
|
||||
<a href={`http://localhost:8000/files/${filename}`}>View PDF</a>
|
||||
<a href={`${SERVER_URL}/files/${filename}`}>View PDF</a>
|
||||
<Metadata metadata={metadata} />
|
||||
{content && <div className="markdown-content">
|
||||
<details>
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
|
||||
const SERVER_URL = import.meta.env.VITE_SERVER_URL || 'http://localhost:8000'
|
||||
const SESSION_COOKIE_NAME = import.meta.env.VITE_SESSION_COOKIE_NAME || 'session_id'
|
||||
export const SERVER_URL = import.meta.env.VITE_SERVER_URL || 'http://localhost:8000'
|
||||
export const SESSION_COOKIE_NAME = import.meta.env.VITE_SESSION_COOKIE_NAME || 'session_id'
|
||||
|
||||
// Cookie utilities
|
||||
const getCookie = (name: string) => {
|
||||
|
@ -96,6 +96,7 @@ class EmailAttachmentAdmin(ModelView, model=EmailAttachment):
|
||||
column_searchable_list = [
|
||||
"filename",
|
||||
"mime_type",
|
||||
"id",
|
||||
]
|
||||
|
||||
|
||||
@ -103,7 +104,7 @@ class BlogPostAdmin(ModelView, model=BlogPost):
|
||||
column_list = source_columns(
|
||||
BlogPost, "title", "author", "url", "published", "domain"
|
||||
)
|
||||
column_searchable_list = ["title", "author", "domain"]
|
||||
column_searchable_list = ["title", "author", "domain", "id", "url"]
|
||||
|
||||
|
||||
class ForumPostAdmin(ModelView, model=ForumPost):
|
||||
@ -118,7 +119,7 @@ class ForumPostAdmin(ModelView, model=ForumPost):
|
||||
"comments",
|
||||
"score",
|
||||
)
|
||||
column_searchable_list = ["title", "authors"]
|
||||
column_searchable_list = ["title", "authors", "id"]
|
||||
|
||||
|
||||
class PhotoAdmin(ModelView, model=Photo):
|
||||
@ -127,7 +128,7 @@ class PhotoAdmin(ModelView, model=Photo):
|
||||
|
||||
class ComicAdmin(ModelView, model=Comic):
|
||||
column_list = source_columns(Comic, "title", "author", "published", "volume")
|
||||
column_searchable_list = ["title", "author"]
|
||||
column_searchable_list = ["title", "author", "id"]
|
||||
|
||||
|
||||
class BookSectionAdmin(ModelView, model=BookSection):
|
||||
@ -139,12 +140,12 @@ class BookSectionAdmin(ModelView, model=BookSection):
|
||||
"start_page",
|
||||
"end_page",
|
||||
)
|
||||
column_searchable_list = ["section_title"]
|
||||
column_searchable_list = ["section_title", "id"]
|
||||
|
||||
|
||||
class MiscDocAdmin(ModelView, model=MiscDoc):
|
||||
column_list = source_columns(MiscDoc, "path")
|
||||
column_searchable_list = ["path"]
|
||||
column_searchable_list = ["path", "id"]
|
||||
|
||||
|
||||
class BookAdmin(ModelView, model=Book):
|
||||
@ -156,7 +157,7 @@ class BookAdmin(ModelView, model=Book):
|
||||
"series_number",
|
||||
"published",
|
||||
]
|
||||
column_searchable_list = ["title", "author"]
|
||||
column_searchable_list = ["title", "author", "id"]
|
||||
|
||||
|
||||
class ArticleFeedAdmin(ModelView, model=ArticleFeed):
|
||||
@ -170,7 +171,7 @@ class ArticleFeedAdmin(ModelView, model=ArticleFeed):
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
column_searchable_list = ["title", "url"]
|
||||
column_searchable_list = ["title", "url", "id"]
|
||||
|
||||
|
||||
class EmailAccountAdmin(ModelView, model=EmailAccount):
|
||||
@ -186,7 +187,7 @@ class EmailAccountAdmin(ModelView, model=EmailAccount):
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
column_searchable_list = ["name", "email_address"]
|
||||
column_searchable_list = ["name", "email_address", "id"]
|
||||
|
||||
|
||||
class AgentObservationAdmin(ModelView, model=AgentObservation):
|
||||
@ -199,7 +200,7 @@ class AgentObservationAdmin(ModelView, model=AgentObservation):
|
||||
"evidence",
|
||||
"inserted_at",
|
||||
]
|
||||
column_searchable_list = ["subject", "observation_type"]
|
||||
column_searchable_list = ["subject", "observation_type", "id"]
|
||||
column_default_sort = [("inserted_at", True)]
|
||||
column_sortable_list = ["inserted_at"]
|
||||
|
||||
@ -214,7 +215,7 @@ class NoteAdmin(ModelView, model=Note):
|
||||
"tags",
|
||||
"inserted_at",
|
||||
]
|
||||
column_searchable_list = ["subject", "content"]
|
||||
column_searchable_list = ["subject", "content", "id"]
|
||||
column_default_sort = [("inserted_at", True)]
|
||||
column_sortable_list = ["inserted_at"]
|
||||
|
||||
|
@ -5,6 +5,7 @@ FastAPI application for the knowledge base.
|
||||
import contextlib
|
||||
import os
|
||||
import logging
|
||||
import mimetypes
|
||||
|
||||
from fastapi import FastAPI, UploadFile, Request, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
@ -55,7 +56,12 @@ async def serve_file(path: str):
|
||||
file_path = settings.FILE_STORAGE_DIR / path
|
||||
if not file_path.is_file():
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
return FileResponse(file_path)
|
||||
|
||||
mime_type, _ = mimetypes.guess_type(str(file_path))
|
||||
if mime_type is None:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
return FileResponse(file_path, media_type=mime_type)
|
||||
|
||||
|
||||
async def input_type(item: str | UploadFile) -> list[extract.DataChunk]:
|
||||
@ -71,13 +77,6 @@ async def input_type(item: str | UploadFile) -> list[extract.DataChunk]:
|
||||
# SQLAdmin setup with OAuth protection
|
||||
engine = get_engine()
|
||||
admin = Admin(app, engine)
|
||||
admin.app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # [settings.SERVER_URL],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Setup admin with OAuth protection using existing OAuth provider
|
||||
setup_admin(admin)
|
||||
|
@ -8,7 +8,7 @@ import qdrant_client
|
||||
from PIL import Image
|
||||
from qdrant_client.http import models as qdrant_models
|
||||
|
||||
from memory.common import embedding, extract, qdrant
|
||||
from memory.common import embedding, extract, qdrant, settings
|
||||
from memory.common.db.connection import make_session
|
||||
from memory.common.db.models import Chunk
|
||||
from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters
|
||||
@ -22,9 +22,16 @@ def annotated_chunk(
|
||||
def serialize_item(item: bytes | str | Image.Image) -> str | None:
|
||||
if not previews and not isinstance(item, str):
|
||||
return None
|
||||
if not previews and isinstance(item, str):
|
||||
return item[:100]
|
||||
|
||||
if (
|
||||
not previews
|
||||
and isinstance(item, str)
|
||||
and len(item) > settings.MAX_NON_PREVIEW_LENGTH
|
||||
):
|
||||
return item[: settings.MAX_NON_PREVIEW_LENGTH] + "..."
|
||||
elif isinstance(item, str):
|
||||
if len(item) > settings.MAX_PREVIEW_LENGTH:
|
||||
return None
|
||||
return item
|
||||
if isinstance(item, Image.Image):
|
||||
buffer = io.BytesIO()
|
||||
format = item.format or "PNG"
|
||||
@ -33,8 +40,6 @@ def annotated_chunk(
|
||||
return f"data:{mime_type};base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
|
||||
elif isinstance(item, bytes):
|
||||
return base64.b64encode(item).decode("utf-8")
|
||||
elif isinstance(item, str):
|
||||
return item
|
||||
else:
|
||||
raise ValueError(f"Unsupported item type: {type(item)}")
|
||||
|
||||
|
@ -42,7 +42,7 @@ class SourceData(BaseModel):
|
||||
mime_type=source.mime_type,
|
||||
filename=source.filename,
|
||||
content_length=len(source.content) if source.content else 0,
|
||||
contents=display_contents,
|
||||
contents={k: v for k, v in display_contents.items() if v is not None},
|
||||
created_at=source.inserted_at,
|
||||
)
|
||||
|
||||
@ -104,11 +104,10 @@ def group_chunks(
|
||||
source_lookup[source.id] = source
|
||||
|
||||
def get_content(text: str | dict | None) -> str | dict | None:
|
||||
if preview or not text or not isinstance(text, str) or len(text) < 250:
|
||||
if isinstance(text, str) and len(text) > settings.MAX_PREVIEW_LENGTH:
|
||||
return None
|
||||
return text
|
||||
|
||||
return text[:250] + "..."
|
||||
|
||||
def make_result(source: SourceData, chunks: list[AnnotatedChunk]) -> SearchResult:
|
||||
contents = source.contents or {}
|
||||
tags = []
|
||||
|
@ -369,9 +369,11 @@ class SourceItem(Base):
|
||||
|
||||
@property
|
||||
def display_contents(self) -> str | dict | None:
|
||||
payload = self.as_payload()
|
||||
payload.pop("id", None) # type: ignore
|
||||
return {
|
||||
**payload,
|
||||
"tags": self.tags,
|
||||
"size": self.size,
|
||||
"content": self.content,
|
||||
"filename": self.filename,
|
||||
"mime_type": self.mime_type,
|
||||
|
@ -135,6 +135,8 @@ SUMMARIZER_MODEL = os.getenv("SUMMARIZER_MODEL", "anthropic/claude-3-haiku-20240
|
||||
# Search settings
|
||||
ENABLE_EMBEDDING_SEARCH = boolean_env("ENABLE_EMBEDDING_SEARCH", True)
|
||||
ENABLE_BM25_SEARCH = boolean_env("ENABLE_BM25_SEARCH", True)
|
||||
MAX_PREVIEW_LENGTH = int(os.getenv("MAX_PREVIEW_LENGTH", DEFAULT_CHUNK_TOKENS * 8))
|
||||
MAX_NON_PREVIEW_LENGTH = int(os.getenv("MAX_NON_PREVIEW_LENGTH", 2000))
|
||||
|
||||
# API settings
|
||||
SERVER_URL = os.getenv("SERVER_URL", "http://localhost:8000")
|
||||
|
@ -30,7 +30,7 @@ def create_book_from_ebook(ebook, tags: Iterable[str] = []) -> Book:
|
||||
publisher=ebook.metadata.get("creator"),
|
||||
language=ebook.metadata.get("language"),
|
||||
total_pages=ebook.n_pages,
|
||||
file_path=ebook.file_path.as_posix(),
|
||||
file_path=ebook.file_path.relative_to(settings.FILE_STORAGE_DIR).as_posix(),
|
||||
book_metadata=ebook.metadata,
|
||||
tags=tags,
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user