Compare commits

..

No commits in common. "a3daea883b774c3fe1b9329a4bc4332a12ed1ce9" and "8855e8715acedb1f1ba6c2187ffef78ffe5aba3f" have entirely different histories.

11 changed files with 29 additions and 94 deletions

4
.gitignore vendored
View File

@ -1,8 +1,4 @@
Books
CLAUDE.md
memory_files
venv
.env
.DS_Store
secrets/

View File

@ -141,9 +141,6 @@ services:
build:
context: .
dockerfile: docker/api/Dockerfile
args:
SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
SESSION_COOKIE_NAME: "${SESSION_COOKIE_NAME:-session_id}"
restart: unless-stopped
networks: [kbnet]
depends_on: [postgres, rabbitmq, qdrant]
@ -155,7 +152,6 @@ services:
VITE_SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
STATIC_DIR: "/app/static"
VOYAGE_API_KEY: ${VOYAGE_API_KEY}
ENABLE_BM25_SEARCH: false
secrets: [postgres_password]
volumes:
- ./memory_files:/app/memory_files:rw

View File

@ -1,5 +1,14 @@
# Backend base stage
FROM python:3.11-slim AS backend-base
# Frontend build stage
FROM node:18-alpine AS frontend-builder
WORKDIR /frontend
COPY frontend/package*.json ./
RUN npm install
COPY frontend/ ./
RUN npm run build
# Backend build stage
FROM python:3.11-slim
WORKDIR /app
@ -10,37 +19,18 @@ RUN apt-get update && apt-get install -y \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy and install Python requirements
# Copy requirements files and setup
COPY requirements ./requirements/
RUN mkdir src
COPY setup.py ./
# Do an initial install to get the dependencies cached
RUN pip install -e ".[api]"
# Frontend build stage
FROM node:18-alpine AS frontend-builder
WORKDIR /frontend
COPY frontend/package*.json ./
RUN npm install
COPY frontend/ ./
# Set Vite environment variables for build from build args
ARG SERVER_URL
ARG SESSION_COOKIE_NAME
ENV VITE_SERVER_URL=${SERVER_URL}
ENV VITE_SESSION_COOKIE_NAME=${SESSION_COOKIE_NAME}
RUN npm run build
# Final stage
FROM backend-base
# Install the package with Python source code
# Install the package with common dependencies
COPY src/ ./src/
RUN pip install -e ".[api]"
# Copy frontend build output from frontend stage
# Copy frontend build output from previous stage
COPY --from=frontend-builder /frontend/dist ./static/
# Run as non-root user

View File

@ -6,23 +6,18 @@ import asyncio
import logging
from typing import Optional
from memory.common import extract, settings
from memory.api.search.embeddings import search_embeddings
from memory.api.search.bm25 import search_bm25
from memory.api.search.utils import SearchFilters, SearchResult
from memory.api.search.utils import group_chunks, with_timeout
from memory.common import extract
from memory.common.collections import (
ALL_COLLECTIONS,
MULTIMODAL_COLLECTIONS,
TEXT_COLLECTIONS,
)
from memory.api.search.embeddings import search_embeddings
if settings.ENABLE_BM25_SEARCH:
from memory.api.search.bm25 import search_bm25
from memory.api.search.utils import (
SearchFilters,
SearchResult,
group_chunks,
with_timeout,
)
from memory.common import settings
logger = logging.getLogger(__name__)

View File

@ -370,7 +370,7 @@ class SourceItem(Base):
@property
def display_contents(self) -> str | dict | None:
payload = self.as_payload()
payload.pop("source_id", None) # type: ignore
payload.pop("id", None) # type: ignore
return {
**payload,
"tags": self.tags,

View File

@ -21,7 +21,7 @@ from sqlalchemy import (
Text,
func,
)
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR, UUID
from sqlalchemy.orm import relationship
from memory.common import settings
@ -31,6 +31,7 @@ import memory.common.formatters.observation as observation
from memory.common.db.models.source_item import (
SourceItem,
Chunk,
SourceItemPayload,
clean_filename,
chunk_mixed,
@ -91,9 +92,9 @@ class MailMessage(SourceItem):
def as_payload(self) -> MailMessagePayload:
base_payload = super().as_payload() | {
"tags": (cast(list[str], self.tags) or [])
"tags": cast(list[str], self.tags)
+ [cast(str, self.sender)]
+ (cast(list[str], self.recipients) or [])
+ cast(list[str], self.recipients)
}
return MailMessagePayload(
**cast(dict, base_payload),
@ -575,11 +576,6 @@ class ForumPost(SourceItem):
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
return chunk_mixed(cast(str, self.content), cast(list[str], self.images))
@classmethod
def get_collections(cls) -> list[str]:
# Very sad that I didn't keep the names consistent... Qdrant doesn't allow renaming collections
return ["forum"]
class MiscDoc(SourceItem):
__tablename__ = "misc_doc"

View File

@ -532,36 +532,6 @@ class NadiaXyzParser(BaseHTMLParser):
]
class SlateStarCodexParser(BaseHTMLParser):
"""Parser for slatestarcodex.com (Scott Alexander's blog)."""
article_selector = ".post, .hentry, [id^='post-']"
title_selector = "h1.pjgm-posttitle, h1"
author_selector = ".author.vcard a, .url.fn.n"
date_selector = ".entry-date"
date_format = "%B %d, %Y" # "January 21, 2021" format
content_selector = ".pjgm-postcontent"
author = "Scott Alexander"
remove_selectors = BaseHTMLParser.remove_selectors + [
".pjgm-postmeta",
".pjgm-postutility",
".pjgm-navigation",
"#pjgm-navbelow",
"#comments",
".commentlist",
".widget-area",
"#left-sidebar",
"#primary",
".sidebar-toggle",
".aar_div", # Advertisement divs
".pjgm-header",
".pjgm-footer",
"#pjgm-menubar",
"#pjgm-bigtitle",
]
class BloombergParser(BaseHTMLParser):
"""Parser for bloomberg.com."""
@ -608,7 +578,6 @@ PARSER_REGISTRY = {
r"theredhandfiles\.com": TheRedHandFilesParser,
r"rachelbythebay\.com": RachelByTheBayParser,
r"nadia\.xyz": NadiaXyzParser,
r"slatestarcodex\.com": SlateStarCodexParser,
}

View File

@ -4,10 +4,8 @@ from typing import cast
import pytest
from PIL import Image
from memory.common import settings, chunker, extract
from memory.common.db.models.source_item import (
Chunk,
)
from memory.common.db.models.source_items import (
Chunk,
MailMessage,
)
from memory.common.db.models.source_item import (

View File

@ -203,8 +203,6 @@ Test Body Content"""
"sender": "sender@example.com",
"recipients": ["recipient@example.com"],
"tags": None,
"folder": None,
"message_id": "<test@example.com>",
}

View File

@ -3,7 +3,6 @@ from pathlib import Path
from unittest.mock import patch, Mock
from memory.common.db.models import Book, BookSection
from memory.common import settings
from memory.parsers.ebook import Ebook, Section
from memory.workers.tasks import ebook
@ -47,7 +46,7 @@ def mock_ebook():
end_page=20,
),
],
file_path=settings.FILE_STORAGE_DIR / "test/book.epub",
file_path=Path("/test/book.epub"),
n_pages=20,
)
@ -71,7 +70,7 @@ def test_create_book_from_ebook(mock_ebook):
assert book.author == "Test Author" # type: ignore
assert book.publisher == "Test Publisher" # type: ignore
assert book.language == "en" # type: ignore
assert book.file_path == "test/book.epub" # type: ignore
assert book.file_path == "/test/book.epub" # type: ignore
assert book.total_pages == 20 # type: ignore
assert book.book_metadata == { # type: ignore
"language": "en",

View File

@ -257,8 +257,6 @@ def test_create_mail_message(db_session):
"recipients": ["recipient@example.com"],
"date": "2023-01-01T12:00:00+00:00",
"mime_type": "message/rfc822",
"folder": "INBOX",
"message_id": "321",
"size": 412,
"tags": ["test"],
"filename": None,