diff --git a/frontend/src/components/search/results.tsx b/frontend/src/components/search/results.tsx index 34b1634..79ab3ba 100644 --- a/frontend/src/components/search/results.tsx +++ b/frontend/src/components/search/results.tsx @@ -1,6 +1,7 @@ import { useState, useEffect } from 'react' import ReactMarkdown from 'react-markdown' import { useMCP } from '@/hooks/useMCP' +import { SERVER_URL } from '@/hooks/useAuth' export type SearchItem = { filename: string @@ -24,10 +25,10 @@ export const Tag = ({ tags }: { tags: string[] }) => { export const TextResult = ({ filename, content, chunks, tags, metadata }: SearchItem) => { return (
-

{filename || 'Untitled'}

+

{filename || metadata?.title || metadata?.url || 'Untitled'}

-

{content || 'No content available'}

+ {content &&

{content}

} {chunks && chunks.length > 0 && (
Relevant sections: @@ -46,12 +47,14 @@ export const TextResult = ({ filename, content, chunks, tags, metadata }: Search export const MarkdownResult = ({ filename, content, chunks, tags, metadata }: SearchItem) => { return (
-

{filename || 'Untitled'}

+

{filename || metadata?.title || metadata?.url || 'Untitled'}

-
- {content || 'No content available'} -
+ {content && ( +
+ {content} +
+ )} {chunks && chunks.length > 0 && (
Relevant sections: @@ -76,7 +79,7 @@ export const ImageResult = ({ filename, tags, metadata }: SearchItem) => { const [content, setContent] = useState() useEffect(() => { const fetchImage = async () => { - const files = await fetchFile(filename.replace('/app/memory_files/', '')) + const files = await fetchFile(filename) const {mime_type, content} = files[0] setMimeType(mime_type) setContent(content) @@ -94,13 +97,26 @@ export const ImageResult = ({ filename, tags, metadata }: SearchItem) => { ) } +const MetadataItem = ({ item, value }: { item: string, value: any }) => { + if (item === "url") { + return
  • {value}
  • + } + if (item === "filename") { + return
  • {value}
  • + } + if (typeof value === 'string') { + return
  • {item}: {value}
  • + } + return
  • {item}: {JSON.stringify(value)}
  • +} + export const Metadata = ({ metadata }: { metadata: any }) => { if (!metadata) return null return (
      {Object.entries(metadata).map(([key, value]) => ( -
    • {key}: {typeof value === 'string' ? value : JSON.stringify(value)}
    • + ))}
    @@ -112,7 +128,7 @@ export const PDFResult = ({ filename, content, tags, metadata }: SearchItem) =>

    {filename || 'Untitled'}

    - View PDF + View PDF {content &&
    diff --git a/frontend/src/hooks/useAuth.ts b/frontend/src/hooks/useAuth.ts index 557689f..70bce09 100644 --- a/frontend/src/hooks/useAuth.ts +++ b/frontend/src/hooks/useAuth.ts @@ -1,7 +1,7 @@ import { useState, useEffect, useCallback } from 'react' -const SERVER_URL = import.meta.env.VITE_SERVER_URL || 'http://localhost:8000' -const SESSION_COOKIE_NAME = import.meta.env.VITE_SESSION_COOKIE_NAME || 'session_id' +export const SERVER_URL = import.meta.env.VITE_SERVER_URL || 'http://localhost:8000' +export const SESSION_COOKIE_NAME = import.meta.env.VITE_SESSION_COOKIE_NAME || 'session_id' // Cookie utilities const getCookie = (name: string) => { diff --git a/src/memory/api/admin.py b/src/memory/api/admin.py index 9d9bc08..9d0d031 100644 --- a/src/memory/api/admin.py +++ b/src/memory/api/admin.py @@ -96,6 +96,7 @@ class EmailAttachmentAdmin(ModelView, model=EmailAttachment): column_searchable_list = [ "filename", "mime_type", + "id", ] @@ -103,7 +104,7 @@ class BlogPostAdmin(ModelView, model=BlogPost): column_list = source_columns( BlogPost, "title", "author", "url", "published", "domain" ) - column_searchable_list = ["title", "author", "domain"] + column_searchable_list = ["title", "author", "domain", "id", "url"] class ForumPostAdmin(ModelView, model=ForumPost): @@ -118,7 +119,7 @@ class ForumPostAdmin(ModelView, model=ForumPost): "comments", "score", ) - column_searchable_list = ["title", "authors"] + column_searchable_list = ["title", "authors", "id"] class PhotoAdmin(ModelView, model=Photo): @@ -127,7 +128,7 @@ class PhotoAdmin(ModelView, model=Photo): class ComicAdmin(ModelView, model=Comic): column_list = source_columns(Comic, "title", "author", "published", "volume") - column_searchable_list = ["title", "author"] + column_searchable_list = ["title", "author", "id"] class BookSectionAdmin(ModelView, model=BookSection): @@ -139,12 +140,12 @@ class BookSectionAdmin(ModelView, model=BookSection): "start_page", "end_page", ) - column_searchable_list = ["section_title"] + column_searchable_list = ["section_title", "id"] class MiscDocAdmin(ModelView, model=MiscDoc): column_list = source_columns(MiscDoc, "path") - column_searchable_list = ["path"] + column_searchable_list = ["path", "id"] class BookAdmin(ModelView, model=Book): @@ -156,7 +157,7 @@ class BookAdmin(ModelView, model=Book): "series_number", "published", ] - column_searchable_list = ["title", "author"] + column_searchable_list = ["title", "author", "id"] class ArticleFeedAdmin(ModelView, model=ArticleFeed): @@ -170,7 +171,7 @@ class ArticleFeedAdmin(ModelView, model=ArticleFeed): "created_at", "updated_at", ] - column_searchable_list = ["title", "url"] + column_searchable_list = ["title", "url", "id"] class EmailAccountAdmin(ModelView, model=EmailAccount): @@ -186,7 +187,7 @@ class EmailAccountAdmin(ModelView, model=EmailAccount): "created_at", "updated_at", ] - column_searchable_list = ["name", "email_address"] + column_searchable_list = ["name", "email_address", "id"] class AgentObservationAdmin(ModelView, model=AgentObservation): @@ -199,7 +200,7 @@ class AgentObservationAdmin(ModelView, model=AgentObservation): "evidence", "inserted_at", ] - column_searchable_list = ["subject", "observation_type"] + column_searchable_list = ["subject", "observation_type", "id"] column_default_sort = [("inserted_at", True)] column_sortable_list = ["inserted_at"] @@ -214,7 +215,7 @@ class NoteAdmin(ModelView, model=Note): "tags", "inserted_at", ] - column_searchable_list = ["subject", "content"] + column_searchable_list = ["subject", "content", "id"] column_default_sort = [("inserted_at", True)] column_sortable_list = ["inserted_at"] diff --git a/src/memory/api/app.py b/src/memory/api/app.py index 2f1974c..3b7d939 100644 --- a/src/memory/api/app.py +++ b/src/memory/api/app.py @@ -5,6 +5,7 @@ FastAPI application for the knowledge base. import contextlib import os import logging +import mimetypes from fastapi import FastAPI, UploadFile, Request, HTTPException from fastapi.responses import FileResponse @@ -55,7 +56,12 @@ async def serve_file(path: str): file_path = settings.FILE_STORAGE_DIR / path if not file_path.is_file(): raise HTTPException(status_code=404, detail="File not found") - return FileResponse(file_path) + + mime_type, _ = mimetypes.guess_type(str(file_path)) + if mime_type is None: + mime_type = "application/octet-stream" + + return FileResponse(file_path, media_type=mime_type) async def input_type(item: str | UploadFile) -> list[extract.DataChunk]: @@ -71,13 +77,6 @@ async def input_type(item: str | UploadFile) -> list[extract.DataChunk]: # SQLAdmin setup with OAuth protection engine = get_engine() admin = Admin(app, engine) -admin.app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # [settings.SERVER_URL], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) # Setup admin with OAuth protection using existing OAuth provider setup_admin(admin) diff --git a/src/memory/api/search/embeddings.py b/src/memory/api/search/embeddings.py index ea4682d..717c0b3 100644 --- a/src/memory/api/search/embeddings.py +++ b/src/memory/api/search/embeddings.py @@ -8,7 +8,7 @@ import qdrant_client from PIL import Image from qdrant_client.http import models as qdrant_models -from memory.common import embedding, extract, qdrant +from memory.common import embedding, extract, qdrant, settings from memory.common.db.connection import make_session from memory.common.db.models import Chunk from memory.api.search.utils import SourceData, AnnotatedChunk, SearchFilters @@ -22,9 +22,16 @@ def annotated_chunk( def serialize_item(item: bytes | str | Image.Image) -> str | None: if not previews and not isinstance(item, str): return None - if not previews and isinstance(item, str): - return item[:100] - + if ( + not previews + and isinstance(item, str) + and len(item) > settings.MAX_NON_PREVIEW_LENGTH + ): + return item[: settings.MAX_NON_PREVIEW_LENGTH] + "..." + elif isinstance(item, str): + if len(item) > settings.MAX_PREVIEW_LENGTH: + return None + return item if isinstance(item, Image.Image): buffer = io.BytesIO() format = item.format or "PNG" @@ -33,8 +40,6 @@ def annotated_chunk( return f"data:{mime_type};base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}" elif isinstance(item, bytes): return base64.b64encode(item).decode("utf-8") - elif isinstance(item, str): - return item else: raise ValueError(f"Unsupported item type: {type(item)}") diff --git a/src/memory/api/search/utils.py b/src/memory/api/search/utils.py index 1833235..98ae1c1 100644 --- a/src/memory/api/search/utils.py +++ b/src/memory/api/search/utils.py @@ -42,7 +42,7 @@ class SourceData(BaseModel): mime_type=source.mime_type, filename=source.filename, content_length=len(source.content) if source.content else 0, - contents=display_contents, + contents={k: v for k, v in display_contents.items() if v is not None}, created_at=source.inserted_at, ) @@ -104,10 +104,9 @@ def group_chunks( source_lookup[source.id] = source def get_content(text: str | dict | None) -> str | dict | None: - if preview or not text or not isinstance(text, str) or len(text) < 250: - return text - - return text[:250] + "..." + if isinstance(text, str) and len(text) > settings.MAX_PREVIEW_LENGTH: + return None + return text def make_result(source: SourceData, chunks: list[AnnotatedChunk]) -> SearchResult: contents = source.contents or {} diff --git a/src/memory/common/db/models/source_item.py b/src/memory/common/db/models/source_item.py index d6e68a5..1f52aad 100644 --- a/src/memory/common/db/models/source_item.py +++ b/src/memory/common/db/models/source_item.py @@ -369,9 +369,11 @@ class SourceItem(Base): @property def display_contents(self) -> str | dict | None: + payload = self.as_payload() + payload.pop("id", None) # type: ignore return { + **payload, "tags": self.tags, - "size": self.size, "content": self.content, "filename": self.filename, "mime_type": self.mime_type, diff --git a/src/memory/common/settings.py b/src/memory/common/settings.py index 652faf0..4033351 100644 --- a/src/memory/common/settings.py +++ b/src/memory/common/settings.py @@ -135,6 +135,8 @@ SUMMARIZER_MODEL = os.getenv("SUMMARIZER_MODEL", "anthropic/claude-3-haiku-20240 # Search settings ENABLE_EMBEDDING_SEARCH = boolean_env("ENABLE_EMBEDDING_SEARCH", True) ENABLE_BM25_SEARCH = boolean_env("ENABLE_BM25_SEARCH", True) +MAX_PREVIEW_LENGTH = int(os.getenv("MAX_PREVIEW_LENGTH", DEFAULT_CHUNK_TOKENS * 8)) +MAX_NON_PREVIEW_LENGTH = int(os.getenv("MAX_NON_PREVIEW_LENGTH", 2000)) # API settings SERVER_URL = os.getenv("SERVER_URL", "http://localhost:8000") diff --git a/src/memory/workers/tasks/ebook.py b/src/memory/workers/tasks/ebook.py index 23bd4b8..3501ac1 100644 --- a/src/memory/workers/tasks/ebook.py +++ b/src/memory/workers/tasks/ebook.py @@ -30,7 +30,7 @@ def create_book_from_ebook(ebook, tags: Iterable[str] = []) -> Book: publisher=ebook.metadata.get("creator"), language=ebook.metadata.get("language"), total_pages=ebook.n_pages, - file_path=ebook.file_path.as_posix(), + file_path=ebook.file_path.relative_to(settings.FILE_STORAGE_DIR).as_posix(), book_metadata=ebook.metadata, tags=tags, )