mirror of
https://github.com/mruwnik/memory.git
synced 2025-07-29 14:16:09 +02:00
Compare commits
6 Commits
8855e8715a
...
a3daea883b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a3daea883b | ||
![]() |
80020e2a61 | ||
![]() |
a424c5f4d0 | ||
![]() |
f0d441ffe9 | ||
![]() |
1538d38bf6 | ||
![]() |
4049cf15b4 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
|
||||
Books
|
||||
CLAUDE.md
|
||||
memory_files
|
||||
venv
|
||||
.env
|
||||
.DS_Store
|
||||
secrets/
|
||||
|
@ -141,6 +141,9 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/api/Dockerfile
|
||||
args:
|
||||
SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
|
||||
SESSION_COOKIE_NAME: "${SESSION_COOKIE_NAME:-session_id}"
|
||||
restart: unless-stopped
|
||||
networks: [kbnet]
|
||||
depends_on: [postgres, rabbitmq, qdrant]
|
||||
@ -152,6 +155,7 @@ services:
|
||||
VITE_SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
|
||||
STATIC_DIR: "/app/static"
|
||||
VOYAGE_API_KEY: ${VOYAGE_API_KEY}
|
||||
ENABLE_BM25_SEARCH: false
|
||||
secrets: [postgres_password]
|
||||
volumes:
|
||||
- ./memory_files:/app/memory_files:rw
|
||||
|
@ -1,14 +1,5 @@
|
||||
# Frontend build stage
|
||||
FROM node:18-alpine AS frontend-builder
|
||||
|
||||
WORKDIR /frontend
|
||||
COPY frontend/package*.json ./
|
||||
RUN npm install
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# Backend build stage
|
||||
FROM python:3.11-slim
|
||||
# Backend base stage
|
||||
FROM python:3.11-slim AS backend-base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@ -19,18 +10,37 @@ RUN apt-get update && apt-get install -y \
|
||||
python3-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements files and setup
|
||||
# Copy and install Python requirements
|
||||
COPY requirements ./requirements/
|
||||
RUN mkdir src
|
||||
COPY setup.py ./
|
||||
# Do an initial install to get the dependencies cached
|
||||
RUN pip install -e ".[api]"
|
||||
|
||||
# Install the package with common dependencies
|
||||
# Frontend build stage
|
||||
FROM node:18-alpine AS frontend-builder
|
||||
|
||||
WORKDIR /frontend
|
||||
COPY frontend/package*.json ./
|
||||
RUN npm install
|
||||
COPY frontend/ ./
|
||||
|
||||
# Set Vite environment variables for build from build args
|
||||
ARG SERVER_URL
|
||||
ARG SESSION_COOKIE_NAME
|
||||
ENV VITE_SERVER_URL=${SERVER_URL}
|
||||
ENV VITE_SESSION_COOKIE_NAME=${SESSION_COOKIE_NAME}
|
||||
|
||||
RUN npm run build
|
||||
|
||||
# Final stage
|
||||
FROM backend-base
|
||||
|
||||
# Install the package with Python source code
|
||||
COPY src/ ./src/
|
||||
RUN pip install -e ".[api]"
|
||||
|
||||
# Copy frontend build output from previous stage
|
||||
# Copy frontend build output from frontend stage
|
||||
COPY --from=frontend-builder /frontend/dist ./static/
|
||||
|
||||
# Run as non-root user
|
||||
|
@ -6,18 +6,23 @@ import asyncio
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from memory.api.search.embeddings import search_embeddings
|
||||
from memory.api.search.bm25 import search_bm25
|
||||
from memory.api.search.utils import SearchFilters, SearchResult
|
||||
|
||||
from memory.api.search.utils import group_chunks, with_timeout
|
||||
from memory.common import extract
|
||||
from memory.common import extract, settings
|
||||
from memory.common.collections import (
|
||||
ALL_COLLECTIONS,
|
||||
MULTIMODAL_COLLECTIONS,
|
||||
TEXT_COLLECTIONS,
|
||||
)
|
||||
from memory.common import settings
|
||||
from memory.api.search.embeddings import search_embeddings
|
||||
|
||||
if settings.ENABLE_BM25_SEARCH:
|
||||
from memory.api.search.bm25 import search_bm25
|
||||
|
||||
from memory.api.search.utils import (
|
||||
SearchFilters,
|
||||
SearchResult,
|
||||
group_chunks,
|
||||
with_timeout,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -370,7 +370,7 @@ class SourceItem(Base):
|
||||
@property
|
||||
def display_contents(self) -> str | dict | None:
|
||||
payload = self.as_payload()
|
||||
payload.pop("id", None) # type: ignore
|
||||
payload.pop("source_id", None) # type: ignore
|
||||
return {
|
||||
**payload,
|
||||
"tags": self.tags,
|
||||
|
@ -21,7 +21,7 @@ from sqlalchemy import (
|
||||
Text,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR, UUID
|
||||
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from memory.common import settings
|
||||
@ -31,7 +31,6 @@ import memory.common.formatters.observation as observation
|
||||
|
||||
from memory.common.db.models.source_item import (
|
||||
SourceItem,
|
||||
Chunk,
|
||||
SourceItemPayload,
|
||||
clean_filename,
|
||||
chunk_mixed,
|
||||
@ -92,9 +91,9 @@ class MailMessage(SourceItem):
|
||||
|
||||
def as_payload(self) -> MailMessagePayload:
|
||||
base_payload = super().as_payload() | {
|
||||
"tags": cast(list[str], self.tags)
|
||||
"tags": (cast(list[str], self.tags) or [])
|
||||
+ [cast(str, self.sender)]
|
||||
+ cast(list[str], self.recipients)
|
||||
+ (cast(list[str], self.recipients) or [])
|
||||
}
|
||||
return MailMessagePayload(
|
||||
**cast(dict, base_payload),
|
||||
@ -576,6 +575,11 @@ class ForumPost(SourceItem):
|
||||
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
||||
return chunk_mixed(cast(str, self.content), cast(list[str], self.images))
|
||||
|
||||
@classmethod
|
||||
def get_collections(cls) -> list[str]:
|
||||
# Very sad that I didn't keep the names consistent... Qdrant doesn't allow renaming collections
|
||||
return ["forum"]
|
||||
|
||||
|
||||
class MiscDoc(SourceItem):
|
||||
__tablename__ = "misc_doc"
|
||||
|
@ -532,6 +532,36 @@ class NadiaXyzParser(BaseHTMLParser):
|
||||
]
|
||||
|
||||
|
||||
class SlateStarCodexParser(BaseHTMLParser):
|
||||
"""Parser for slatestarcodex.com (Scott Alexander's blog)."""
|
||||
|
||||
article_selector = ".post, .hentry, [id^='post-']"
|
||||
title_selector = "h1.pjgm-posttitle, h1"
|
||||
author_selector = ".author.vcard a, .url.fn.n"
|
||||
date_selector = ".entry-date"
|
||||
date_format = "%B %d, %Y" # "January 21, 2021" format
|
||||
content_selector = ".pjgm-postcontent"
|
||||
author = "Scott Alexander"
|
||||
|
||||
remove_selectors = BaseHTMLParser.remove_selectors + [
|
||||
".pjgm-postmeta",
|
||||
".pjgm-postutility",
|
||||
".pjgm-navigation",
|
||||
"#pjgm-navbelow",
|
||||
"#comments",
|
||||
".commentlist",
|
||||
".widget-area",
|
||||
"#left-sidebar",
|
||||
"#primary",
|
||||
".sidebar-toggle",
|
||||
".aar_div", # Advertisement divs
|
||||
".pjgm-header",
|
||||
".pjgm-footer",
|
||||
"#pjgm-menubar",
|
||||
"#pjgm-bigtitle",
|
||||
]
|
||||
|
||||
|
||||
class BloombergParser(BaseHTMLParser):
|
||||
"""Parser for bloomberg.com."""
|
||||
|
||||
@ -578,6 +608,7 @@ PARSER_REGISTRY = {
|
||||
r"theredhandfiles\.com": TheRedHandFilesParser,
|
||||
r"rachelbythebay\.com": RachelByTheBayParser,
|
||||
r"nadia\.xyz": NadiaXyzParser,
|
||||
r"slatestarcodex\.com": SlateStarCodexParser,
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,8 +4,10 @@ from typing import cast
|
||||
import pytest
|
||||
from PIL import Image
|
||||
from memory.common import settings, chunker, extract
|
||||
from memory.common.db.models.source_items import (
|
||||
from memory.common.db.models.source_item import (
|
||||
Chunk,
|
||||
)
|
||||
from memory.common.db.models.source_items import (
|
||||
MailMessage,
|
||||
)
|
||||
from memory.common.db.models.source_item import (
|
||||
|
@ -203,6 +203,8 @@ Test Body Content"""
|
||||
"sender": "sender@example.com",
|
||||
"recipients": ["recipient@example.com"],
|
||||
"tags": None,
|
||||
"folder": None,
|
||||
"message_id": "<test@example.com>",
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@ from pathlib import Path
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from memory.common.db.models import Book, BookSection
|
||||
from memory.common import settings
|
||||
from memory.parsers.ebook import Ebook, Section
|
||||
from memory.workers.tasks import ebook
|
||||
|
||||
@ -46,7 +47,7 @@ def mock_ebook():
|
||||
end_page=20,
|
||||
),
|
||||
],
|
||||
file_path=Path("/test/book.epub"),
|
||||
file_path=settings.FILE_STORAGE_DIR / "test/book.epub",
|
||||
n_pages=20,
|
||||
)
|
||||
|
||||
@ -70,7 +71,7 @@ def test_create_book_from_ebook(mock_ebook):
|
||||
assert book.author == "Test Author" # type: ignore
|
||||
assert book.publisher == "Test Publisher" # type: ignore
|
||||
assert book.language == "en" # type: ignore
|
||||
assert book.file_path == "/test/book.epub" # type: ignore
|
||||
assert book.file_path == "test/book.epub" # type: ignore
|
||||
assert book.total_pages == 20 # type: ignore
|
||||
assert book.book_metadata == { # type: ignore
|
||||
"language": "en",
|
||||
|
@ -257,6 +257,8 @@ def test_create_mail_message(db_session):
|
||||
"recipients": ["recipient@example.com"],
|
||||
"date": "2023-01-01T12:00:00+00:00",
|
||||
"mime_type": "message/rfc822",
|
||||
"folder": "INBOX",
|
||||
"message_id": "321",
|
||||
"size": 412,
|
||||
"tags": ["test"],
|
||||
"filename": None,
|
||||
|
Loading…
x
Reference in New Issue
Block a user