mirror of
https://github.com/mruwnik/memory.git
synced 2025-07-30 06:36:07 +02:00
Compare commits
6 Commits
8855e8715a
...
a3daea883b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a3daea883b | ||
![]() |
80020e2a61 | ||
![]() |
a424c5f4d0 | ||
![]() |
f0d441ffe9 | ||
![]() |
1538d38bf6 | ||
![]() |
4049cf15b4 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +1,8 @@
|
|||||||
|
|
||||||
|
Books
|
||||||
|
CLAUDE.md
|
||||||
memory_files
|
memory_files
|
||||||
|
venv
|
||||||
.env
|
.env
|
||||||
.DS_Store
|
.DS_Store
|
||||||
secrets/
|
secrets/
|
||||||
|
@ -141,6 +141,9 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/api/Dockerfile
|
dockerfile: docker/api/Dockerfile
|
||||||
|
args:
|
||||||
|
SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
|
||||||
|
SESSION_COOKIE_NAME: "${SESSION_COOKIE_NAME:-session_id}"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [kbnet]
|
||||||
depends_on: [postgres, rabbitmq, qdrant]
|
depends_on: [postgres, rabbitmq, qdrant]
|
||||||
@ -152,6 +155,7 @@ services:
|
|||||||
VITE_SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
|
VITE_SERVER_URL: "${SERVER_URL:-http://localhost:8000}"
|
||||||
STATIC_DIR: "/app/static"
|
STATIC_DIR: "/app/static"
|
||||||
VOYAGE_API_KEY: ${VOYAGE_API_KEY}
|
VOYAGE_API_KEY: ${VOYAGE_API_KEY}
|
||||||
|
ENABLE_BM25_SEARCH: false
|
||||||
secrets: [postgres_password]
|
secrets: [postgres_password]
|
||||||
volumes:
|
volumes:
|
||||||
- ./memory_files:/app/memory_files:rw
|
- ./memory_files:/app/memory_files:rw
|
||||||
|
@ -1,14 +1,5 @@
|
|||||||
# Frontend build stage
|
# Backend base stage
|
||||||
FROM node:18-alpine AS frontend-builder
|
FROM python:3.11-slim AS backend-base
|
||||||
|
|
||||||
WORKDIR /frontend
|
|
||||||
COPY frontend/package*.json ./
|
|
||||||
RUN npm install
|
|
||||||
COPY frontend/ ./
|
|
||||||
RUN npm run build
|
|
||||||
|
|
||||||
# Backend build stage
|
|
||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@ -19,18 +10,37 @@ RUN apt-get update && apt-get install -y \
|
|||||||
python3-dev \
|
python3-dev \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy requirements files and setup
|
# Copy and install Python requirements
|
||||||
COPY requirements ./requirements/
|
COPY requirements ./requirements/
|
||||||
RUN mkdir src
|
RUN mkdir src
|
||||||
COPY setup.py ./
|
COPY setup.py ./
|
||||||
# Do an initial install to get the dependencies cached
|
# Do an initial install to get the dependencies cached
|
||||||
RUN pip install -e ".[api]"
|
RUN pip install -e ".[api]"
|
||||||
|
|
||||||
# Install the package with common dependencies
|
# Frontend build stage
|
||||||
|
FROM node:18-alpine AS frontend-builder
|
||||||
|
|
||||||
|
WORKDIR /frontend
|
||||||
|
COPY frontend/package*.json ./
|
||||||
|
RUN npm install
|
||||||
|
COPY frontend/ ./
|
||||||
|
|
||||||
|
# Set Vite environment variables for build from build args
|
||||||
|
ARG SERVER_URL
|
||||||
|
ARG SESSION_COOKIE_NAME
|
||||||
|
ENV VITE_SERVER_URL=${SERVER_URL}
|
||||||
|
ENV VITE_SESSION_COOKIE_NAME=${SESSION_COOKIE_NAME}
|
||||||
|
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Final stage
|
||||||
|
FROM backend-base
|
||||||
|
|
||||||
|
# Install the package with Python source code
|
||||||
COPY src/ ./src/
|
COPY src/ ./src/
|
||||||
RUN pip install -e ".[api]"
|
RUN pip install -e ".[api]"
|
||||||
|
|
||||||
# Copy frontend build output from previous stage
|
# Copy frontend build output from frontend stage
|
||||||
COPY --from=frontend-builder /frontend/dist ./static/
|
COPY --from=frontend-builder /frontend/dist ./static/
|
||||||
|
|
||||||
# Run as non-root user
|
# Run as non-root user
|
||||||
|
@ -6,18 +6,23 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from memory.api.search.embeddings import search_embeddings
|
from memory.common import extract, settings
|
||||||
from memory.api.search.bm25 import search_bm25
|
|
||||||
from memory.api.search.utils import SearchFilters, SearchResult
|
|
||||||
|
|
||||||
from memory.api.search.utils import group_chunks, with_timeout
|
|
||||||
from memory.common import extract
|
|
||||||
from memory.common.collections import (
|
from memory.common.collections import (
|
||||||
ALL_COLLECTIONS,
|
ALL_COLLECTIONS,
|
||||||
MULTIMODAL_COLLECTIONS,
|
MULTIMODAL_COLLECTIONS,
|
||||||
TEXT_COLLECTIONS,
|
TEXT_COLLECTIONS,
|
||||||
)
|
)
|
||||||
from memory.common import settings
|
from memory.api.search.embeddings import search_embeddings
|
||||||
|
|
||||||
|
if settings.ENABLE_BM25_SEARCH:
|
||||||
|
from memory.api.search.bm25 import search_bm25
|
||||||
|
|
||||||
|
from memory.api.search.utils import (
|
||||||
|
SearchFilters,
|
||||||
|
SearchResult,
|
||||||
|
group_chunks,
|
||||||
|
with_timeout,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -370,7 +370,7 @@ class SourceItem(Base):
|
|||||||
@property
|
@property
|
||||||
def display_contents(self) -> str | dict | None:
|
def display_contents(self) -> str | dict | None:
|
||||||
payload = self.as_payload()
|
payload = self.as_payload()
|
||||||
payload.pop("id", None) # type: ignore
|
payload.pop("source_id", None) # type: ignore
|
||||||
return {
|
return {
|
||||||
**payload,
|
**payload,
|
||||||
"tags": self.tags,
|
"tags": self.tags,
|
||||||
|
@ -21,7 +21,7 @@ from sqlalchemy import (
|
|||||||
Text,
|
Text,
|
||||||
func,
|
func,
|
||||||
)
|
)
|
||||||
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR, UUID
|
from sqlalchemy.dialects.postgresql import JSONB, TSVECTOR
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
|
|
||||||
from memory.common import settings
|
from memory.common import settings
|
||||||
@ -31,7 +31,6 @@ import memory.common.formatters.observation as observation
|
|||||||
|
|
||||||
from memory.common.db.models.source_item import (
|
from memory.common.db.models.source_item import (
|
||||||
SourceItem,
|
SourceItem,
|
||||||
Chunk,
|
|
||||||
SourceItemPayload,
|
SourceItemPayload,
|
||||||
clean_filename,
|
clean_filename,
|
||||||
chunk_mixed,
|
chunk_mixed,
|
||||||
@ -92,9 +91,9 @@ class MailMessage(SourceItem):
|
|||||||
|
|
||||||
def as_payload(self) -> MailMessagePayload:
|
def as_payload(self) -> MailMessagePayload:
|
||||||
base_payload = super().as_payload() | {
|
base_payload = super().as_payload() | {
|
||||||
"tags": cast(list[str], self.tags)
|
"tags": (cast(list[str], self.tags) or [])
|
||||||
+ [cast(str, self.sender)]
|
+ [cast(str, self.sender)]
|
||||||
+ cast(list[str], self.recipients)
|
+ (cast(list[str], self.recipients) or [])
|
||||||
}
|
}
|
||||||
return MailMessagePayload(
|
return MailMessagePayload(
|
||||||
**cast(dict, base_payload),
|
**cast(dict, base_payload),
|
||||||
@ -576,6 +575,11 @@ class ForumPost(SourceItem):
|
|||||||
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
||||||
return chunk_mixed(cast(str, self.content), cast(list[str], self.images))
|
return chunk_mixed(cast(str, self.content), cast(list[str], self.images))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_collections(cls) -> list[str]:
|
||||||
|
# Very sad that I didn't keep the names consistent... Qdrant doesn't allow renaming collections
|
||||||
|
return ["forum"]
|
||||||
|
|
||||||
|
|
||||||
class MiscDoc(SourceItem):
|
class MiscDoc(SourceItem):
|
||||||
__tablename__ = "misc_doc"
|
__tablename__ = "misc_doc"
|
||||||
|
@ -532,6 +532,36 @@ class NadiaXyzParser(BaseHTMLParser):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class SlateStarCodexParser(BaseHTMLParser):
|
||||||
|
"""Parser for slatestarcodex.com (Scott Alexander's blog)."""
|
||||||
|
|
||||||
|
article_selector = ".post, .hentry, [id^='post-']"
|
||||||
|
title_selector = "h1.pjgm-posttitle, h1"
|
||||||
|
author_selector = ".author.vcard a, .url.fn.n"
|
||||||
|
date_selector = ".entry-date"
|
||||||
|
date_format = "%B %d, %Y" # "January 21, 2021" format
|
||||||
|
content_selector = ".pjgm-postcontent"
|
||||||
|
author = "Scott Alexander"
|
||||||
|
|
||||||
|
remove_selectors = BaseHTMLParser.remove_selectors + [
|
||||||
|
".pjgm-postmeta",
|
||||||
|
".pjgm-postutility",
|
||||||
|
".pjgm-navigation",
|
||||||
|
"#pjgm-navbelow",
|
||||||
|
"#comments",
|
||||||
|
".commentlist",
|
||||||
|
".widget-area",
|
||||||
|
"#left-sidebar",
|
||||||
|
"#primary",
|
||||||
|
".sidebar-toggle",
|
||||||
|
".aar_div", # Advertisement divs
|
||||||
|
".pjgm-header",
|
||||||
|
".pjgm-footer",
|
||||||
|
"#pjgm-menubar",
|
||||||
|
"#pjgm-bigtitle",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class BloombergParser(BaseHTMLParser):
|
class BloombergParser(BaseHTMLParser):
|
||||||
"""Parser for bloomberg.com."""
|
"""Parser for bloomberg.com."""
|
||||||
|
|
||||||
@ -578,6 +608,7 @@ PARSER_REGISTRY = {
|
|||||||
r"theredhandfiles\.com": TheRedHandFilesParser,
|
r"theredhandfiles\.com": TheRedHandFilesParser,
|
||||||
r"rachelbythebay\.com": RachelByTheBayParser,
|
r"rachelbythebay\.com": RachelByTheBayParser,
|
||||||
r"nadia\.xyz": NadiaXyzParser,
|
r"nadia\.xyz": NadiaXyzParser,
|
||||||
|
r"slatestarcodex\.com": SlateStarCodexParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,8 +4,10 @@ from typing import cast
|
|||||||
import pytest
|
import pytest
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from memory.common import settings, chunker, extract
|
from memory.common import settings, chunker, extract
|
||||||
from memory.common.db.models.source_items import (
|
from memory.common.db.models.source_item import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
)
|
||||||
|
from memory.common.db.models.source_items import (
|
||||||
MailMessage,
|
MailMessage,
|
||||||
)
|
)
|
||||||
from memory.common.db.models.source_item import (
|
from memory.common.db.models.source_item import (
|
||||||
|
@ -203,6 +203,8 @@ Test Body Content"""
|
|||||||
"sender": "sender@example.com",
|
"sender": "sender@example.com",
|
||||||
"recipients": ["recipient@example.com"],
|
"recipients": ["recipient@example.com"],
|
||||||
"tags": None,
|
"tags": None,
|
||||||
|
"folder": None,
|
||||||
|
"message_id": "<test@example.com>",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ from pathlib import Path
|
|||||||
from unittest.mock import patch, Mock
|
from unittest.mock import patch, Mock
|
||||||
|
|
||||||
from memory.common.db.models import Book, BookSection
|
from memory.common.db.models import Book, BookSection
|
||||||
|
from memory.common import settings
|
||||||
from memory.parsers.ebook import Ebook, Section
|
from memory.parsers.ebook import Ebook, Section
|
||||||
from memory.workers.tasks import ebook
|
from memory.workers.tasks import ebook
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ def mock_ebook():
|
|||||||
end_page=20,
|
end_page=20,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
file_path=Path("/test/book.epub"),
|
file_path=settings.FILE_STORAGE_DIR / "test/book.epub",
|
||||||
n_pages=20,
|
n_pages=20,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -70,7 +71,7 @@ def test_create_book_from_ebook(mock_ebook):
|
|||||||
assert book.author == "Test Author" # type: ignore
|
assert book.author == "Test Author" # type: ignore
|
||||||
assert book.publisher == "Test Publisher" # type: ignore
|
assert book.publisher == "Test Publisher" # type: ignore
|
||||||
assert book.language == "en" # type: ignore
|
assert book.language == "en" # type: ignore
|
||||||
assert book.file_path == "/test/book.epub" # type: ignore
|
assert book.file_path == "test/book.epub" # type: ignore
|
||||||
assert book.total_pages == 20 # type: ignore
|
assert book.total_pages == 20 # type: ignore
|
||||||
assert book.book_metadata == { # type: ignore
|
assert book.book_metadata == { # type: ignore
|
||||||
"language": "en",
|
"language": "en",
|
||||||
|
@ -257,6 +257,8 @@ def test_create_mail_message(db_session):
|
|||||||
"recipients": ["recipient@example.com"],
|
"recipients": ["recipient@example.com"],
|
||||||
"date": "2023-01-01T12:00:00+00:00",
|
"date": "2023-01-01T12:00:00+00:00",
|
||||||
"mime_type": "message/rfc822",
|
"mime_type": "message/rfc822",
|
||||||
|
"folder": "INBOX",
|
||||||
|
"message_id": "321",
|
||||||
"size": 412,
|
"size": 412,
|
||||||
"tags": ["test"],
|
"tags": ["test"],
|
||||||
"filename": None,
|
"filename": None,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user