mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-08 13:24:41 +02:00
proper chunks path
This commit is contained in:
parent
44de394eb1
commit
c6cd809eb7
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
|
memory_files
|
||||||
.env
|
.env
|
||||||
.DS_Store
|
.DS_Store
|
||||||
secrets/
|
secrets/
|
||||||
|
@ -2,21 +2,21 @@ version: "3.9"
|
|||||||
|
|
||||||
# --------------------------------------------------------------------- networks
|
# --------------------------------------------------------------------- networks
|
||||||
networks:
|
networks:
|
||||||
kbnet: # internal overlay – NOT exposed
|
kbnet:
|
||||||
|
# internal overlay – NOT exposed
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|
||||||
# --------------------------------------------------------------------- secrets
|
# --------------------------------------------------------------------- secrets
|
||||||
secrets:
|
secrets:
|
||||||
postgres_password: {file: ./secrets/postgres_password.txt}
|
postgres_password: { file: ./secrets/postgres_password.txt }
|
||||||
jwt_secret: {file: ./secrets/jwt_secret.txt}
|
jwt_secret: { file: ./secrets/jwt_secret.txt }
|
||||||
openai_key: {file: ./secrets/openai_key.txt}
|
openai_key: { file: ./secrets/openai_key.txt }
|
||||||
|
|
||||||
# --------------------------------------------------------------------- volumes
|
# --------------------------------------------------------------------- volumes
|
||||||
volumes:
|
volumes:
|
||||||
db_data: {} # Postgres
|
db_data: {} # Postgres
|
||||||
qdrant_data: {} # Qdrant
|
qdrant_data: {} # Qdrant
|
||||||
rabbitmq_data: {} # RabbitMQ
|
rabbitmq_data: {} # RabbitMQ
|
||||||
file_storage: {} # File storage
|
|
||||||
|
|
||||||
# ------------------------------ X-templates ----------------------------
|
# ------------------------------ X-templates ----------------------------
|
||||||
x-common-env: &env
|
x-common-env: &env
|
||||||
@ -26,60 +26,60 @@ x-common-env: &env
|
|||||||
FILE_STORAGE_DIR: /app/memory_files
|
FILE_STORAGE_DIR: /app/memory_files
|
||||||
TZ: "Etc/UTC"
|
TZ: "Etc/UTC"
|
||||||
|
|
||||||
|
|
||||||
x-worker-base: &worker-base
|
x-worker-base: &worker-base
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/workers/Dockerfile
|
dockerfile: docker/workers/Dockerfile
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
security_opt: ["no-new-privileges=true"]
|
security_opt: [ "no-new-privileges=true" ]
|
||||||
depends_on: [postgres, rabbitmq, qdrant]
|
depends_on: [ postgres, rabbitmq, qdrant ]
|
||||||
env_file: [.env]
|
env_file: [ .env ]
|
||||||
environment: &worker-env
|
environment: &worker-env
|
||||||
<<: *env
|
<<: *env
|
||||||
POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password
|
POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password
|
||||||
# DSNs are built in worker entrypoint from user + pw files
|
# DSNs are built in worker entrypoint from user + pw files
|
||||||
QDRANT_URL: http://qdrant:6333
|
QDRANT_URL: http://qdrant:6333
|
||||||
OPENAI_API_KEY_FILE: /run/secrets/openai_key
|
OPENAI_API_KEY_FILE: /run/secrets/openai_key
|
||||||
secrets: [postgres_password, openai_key]
|
secrets: [ postgres_password, openai_key ]
|
||||||
read_only: true
|
read_only: true
|
||||||
tmpfs: [/tmp,/var/tmp]
|
tmpfs: [ /tmp, /var/tmp ]
|
||||||
cap_drop: [ALL]
|
cap_drop: [ ALL ]
|
||||||
volumes:
|
volumes:
|
||||||
- file_storage:/app/memory_files:rw
|
- ./memory_files:/app/memory_files:rw
|
||||||
logging:
|
logging:
|
||||||
options: {max-size: "10m", max-file: "3"}
|
options: { max-size: "10m", max-file: "3" }
|
||||||
|
user: kb
|
||||||
|
|
||||||
# ================================ SERVICES ============================
|
# ================================ SERVICES ============================
|
||||||
|
|
||||||
services:
|
services:
|
||||||
# ----------------------------------------------------------------- data layer
|
# ----------------------------------------------------------------- data layer
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:15
|
image: postgres:15
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
environment:
|
environment:
|
||||||
<<: *env
|
<<: *env
|
||||||
POSTGRES_USER: kb
|
POSTGRES_USER: kb
|
||||||
POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password
|
POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password
|
||||||
POSTGRES_DB: kb
|
POSTGRES_DB: kb
|
||||||
secrets: [postgres_password]
|
secrets: [ postgres_password ]
|
||||||
volumes:
|
volumes:
|
||||||
- db_data:/var/lib/postgresql/data:rw
|
- db_data:/var/lib/postgresql/data:rw
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -U kb"]
|
test: [ "CMD-SHELL", "pg_isready -U kb" ]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
mem_limit: 4g
|
mem_limit: 4g
|
||||||
cpus: "1.5"
|
cpus: "1.5"
|
||||||
security_opt: ["no-new-privileges=true"]
|
security_opt: [ "no-new-privileges=true" ]
|
||||||
|
|
||||||
rabbitmq:
|
rabbitmq:
|
||||||
image: rabbitmq:3.13-management
|
image: rabbitmq:3.13-management
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
environment:
|
environment:
|
||||||
<<: *env
|
<<: *env
|
||||||
RABBITMQ_DEFAULT_USER: "kb"
|
RABBITMQ_DEFAULT_USER: "kb"
|
||||||
@ -87,20 +87,21 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- rabbitmq_data:/var/lib/rabbitmq:rw
|
- rabbitmq_data:/var/lib/rabbitmq:rw
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "rabbitmq-diagnostics", "ping"]
|
test: [ "CMD", "rabbitmq-diagnostics", "ping" ]
|
||||||
interval: 15s
|
interval: 15s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
mem_limit: 512m
|
mem_limit: 512m
|
||||||
cpus: "0.5"
|
cpus: "0.5"
|
||||||
security_opt: ["no-new-privileges=true"]
|
security_opt: [ "no-new-privileges=true" ]
|
||||||
ports: # UI only on localhost
|
ports:
|
||||||
|
# UI only on localhost
|
||||||
- "127.0.0.1:15672:15672"
|
- "127.0.0.1:15672:15672"
|
||||||
|
|
||||||
qdrant:
|
qdrant:
|
||||||
image: qdrant/qdrant:v1.14.0
|
image: qdrant/qdrant:v1.14.0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
volumes:
|
volumes:
|
||||||
- qdrant_data:/qdrant/storage:rw
|
- qdrant_data:/qdrant/storage:rw
|
||||||
tmpfs:
|
tmpfs:
|
||||||
@ -108,16 +109,16 @@ services:
|
|||||||
- /var/tmp
|
- /var/tmp
|
||||||
- /qdrant/snapshots:rw
|
- /qdrant/snapshots:rw
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "wget", "-q", "-T", "2", "-O", "-", "localhost:6333/ready"]
|
test: [ "CMD", "wget", "-q", "-T", "2", "-O", "-", "localhost:6333/ready" ]
|
||||||
interval: 15s
|
interval: 15s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
mem_limit: 4g
|
mem_limit: 4g
|
||||||
cpus: "2"
|
cpus: "2"
|
||||||
security_opt: ["no-new-privileges=true"]
|
security_opt: [ "no-new-privileges=true" ]
|
||||||
cap_drop: [ALL]
|
cap_drop: [ ALL ]
|
||||||
|
|
||||||
# ------------------------------------------------------------ API / gateway
|
# ------------------------------------------------------------ API / gateway
|
||||||
# api:
|
# api:
|
||||||
# build:
|
# build:
|
||||||
# context: .
|
# context: .
|
||||||
@ -148,7 +149,7 @@ services:
|
|||||||
traefik:
|
traefik:
|
||||||
image: traefik:v3.0
|
image: traefik:v3.0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
command:
|
command:
|
||||||
- "--providers.docker=true"
|
- "--providers.docker=true"
|
||||||
- "--providers.docker.network=kbnet"
|
- "--providers.docker.network=kbnet"
|
||||||
@ -166,55 +167,55 @@ services:
|
|||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
# - ./acme.json:/acme.json:rw
|
# - ./acme.json:/acme.json:rw
|
||||||
|
|
||||||
# ------------------------------------------------------------ Celery workers
|
# ------------------------------------------------------------ Celery workers
|
||||||
worker-email:
|
worker-email:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "email"
|
QUEUES: "email"
|
||||||
deploy: {resources: {limits: {cpus: "2", memory: 3g}}}
|
deploy: { resources: { limits: { cpus: "2", memory: 3g } } }
|
||||||
|
|
||||||
worker-text:
|
worker-text:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "medium_embed"
|
QUEUES: "medium_embed"
|
||||||
deploy: {resources: {limits: {cpus: "2", memory: 3g}}}
|
deploy: { resources: { limits: { cpus: "2", memory: 3g } } }
|
||||||
|
|
||||||
worker-photo:
|
worker-photo:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "photo_embed"
|
QUEUES: "photo_embed"
|
||||||
deploy: {resources: {limits: {cpus: "4", memory: 4g}}}
|
deploy: { resources: { limits: { cpus: "4", memory: 4g } } }
|
||||||
|
|
||||||
worker-ocr:
|
worker-ocr:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "low_ocr"
|
QUEUES: "low_ocr"
|
||||||
deploy: {resources: {limits: {cpus: "4", memory: 4g}}}
|
deploy: { resources: { limits: { cpus: "4", memory: 4g } } }
|
||||||
|
|
||||||
worker-git:
|
worker-git:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "git_summary"
|
QUEUES: "git_summary"
|
||||||
deploy: {resources: {limits: {cpus: "1", memory: 1g}}}
|
deploy: { resources: { limits: { cpus: "1", memory: 1g } } }
|
||||||
|
|
||||||
worker-rss:
|
worker-rss:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "rss"
|
QUEUES: "rss"
|
||||||
deploy: {resources: {limits: {cpus: "0.5", memory: 512m}}}
|
deploy: { resources: { limits: { cpus: "0.5", memory: 512m } } }
|
||||||
|
|
||||||
worker-docs:
|
worker-docs:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
QUEUES: "docs"
|
QUEUES: "docs"
|
||||||
deploy: {resources: {limits: {cpus: "1", memory: 1g}}}
|
deploy: { resources: { limits: { cpus: "1", memory: 1g } } }
|
||||||
|
|
||||||
ingest-hub:
|
ingest-hub:
|
||||||
<<: *worker-base
|
<<: *worker-base
|
||||||
@ -224,21 +225,21 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
<<: *worker-env
|
<<: *worker-env
|
||||||
volumes:
|
volumes:
|
||||||
- file_storage:/app/memory_files:rw
|
- ./memory_files:/app/memory_files:rw
|
||||||
tmpfs:
|
tmpfs:
|
||||||
- /tmp
|
- /tmp
|
||||||
- /var/tmp
|
- /var/tmp
|
||||||
- /var/log/supervisor
|
- /var/log/supervisor
|
||||||
- /var/run/supervisor
|
- /var/run/supervisor
|
||||||
deploy: {resources: {limits: {cpus: "0.5", memory: 512m}}}
|
deploy: { resources: { limits: { cpus: "0.5", memory: 512m } } }
|
||||||
|
|
||||||
# ------------------------------------------------------------ watchtower (auto-update)
|
# ------------------------------------------------------------ watchtower (auto-update)
|
||||||
watchtower:
|
watchtower:
|
||||||
image: containrrr/watchtower
|
image: containrrr/watchtower
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command: ["--schedule", "0 0 4 * * *", "--cleanup"]
|
command: [ "--schedule", "0 0 4 * * *", "--cleanup" ]
|
||||||
volumes: ["/var/run/docker.sock:/var/run/docker.sock:ro"]
|
volumes: [ "/var/run/docker.sock:/var/run/docker.sock:ro" ]
|
||||||
networks: [kbnet]
|
networks: [ kbnet ]
|
||||||
|
|
||||||
# ------------------------------------------------------------------- profiles: observability (opt-in)
|
# ------------------------------------------------------------------- profiles: observability (opt-in)
|
||||||
# services:
|
# services:
|
||||||
|
@ -21,8 +21,11 @@ RUN chmod +x entry.sh
|
|||||||
# Create required tmpfs directories for supervisor
|
# Create required tmpfs directories for supervisor
|
||||||
RUN mkdir -p /var/log/supervisor /var/run/supervisor
|
RUN mkdir -p /var/log/supervisor /var/run/supervisor
|
||||||
|
|
||||||
|
# Create storage directory
|
||||||
|
RUN mkdir -p /app/memory_files
|
||||||
|
|
||||||
# Create user and set permissions
|
# Create user and set permissions
|
||||||
RUN useradd -m kb && chown -R kb /app /var/log/supervisor /var/run/supervisor
|
RUN useradd -m kb && chown -R kb /app /var/log/supervisor /var/run/supervisor /app/memory_files
|
||||||
USER kb
|
USER kb
|
||||||
|
|
||||||
# Default queues to process
|
# Default queues to process
|
||||||
|
@ -17,6 +17,8 @@ RUN apt-get update && apt-get install -y \
|
|||||||
COPY docker/workers/entry.sh ./entry.sh
|
COPY docker/workers/entry.sh ./entry.sh
|
||||||
RUN chmod +x entry.sh
|
RUN chmod +x entry.sh
|
||||||
|
|
||||||
|
RUN mkdir -p /app/memory_files
|
||||||
|
|
||||||
# Create user and set permissions
|
# Create user and set permissions
|
||||||
RUN useradd -m kb && chown -R kb /app
|
RUN useradd -m kb && chown -R kb /app
|
||||||
USER kb
|
USER kb
|
||||||
|
@ -128,13 +128,13 @@ def embed_page(page: dict[str, Any]) -> list[Vector]:
|
|||||||
|
|
||||||
def write_to_file(chunk_id: str, item: extract.MulitmodalChunk) -> pathlib.Path:
|
def write_to_file(chunk_id: str, item: extract.MulitmodalChunk) -> pathlib.Path:
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
filename = settings.FILE_STORAGE_DIR / f"{chunk_id}.txt"
|
filename = settings.CHUNK_STORAGE_DIR / f"{chunk_id}.txt"
|
||||||
filename.write_text(item)
|
filename.write_text(item)
|
||||||
elif isinstance(item, bytes):
|
elif isinstance(item, bytes):
|
||||||
filename = settings.FILE_STORAGE_DIR / f"{chunk_id}.bin"
|
filename = settings.CHUNK_STORAGE_DIR / f"{chunk_id}.bin"
|
||||||
filename.write_bytes(item)
|
filename.write_bytes(item)
|
||||||
elif isinstance(item, Image.Image):
|
elif isinstance(item, Image.Image):
|
||||||
filename = settings.FILE_STORAGE_DIR / f"{chunk_id}.png"
|
filename = settings.CHUNK_STORAGE_DIR / f"{chunk_id}.png"
|
||||||
item.save(filename)
|
item.save(filename)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported content type: {type(item)}")
|
raise ValueError(f"Unsupported content type: {type(item)}")
|
||||||
@ -156,13 +156,13 @@ def make_chunk(
|
|||||||
content = "\n\n".join(contents)
|
content = "\n\n".join(contents)
|
||||||
model = settings.TEXT_EMBEDDING_MODEL
|
model = settings.TEXT_EMBEDDING_MODEL
|
||||||
elif len(contents) == 1:
|
elif len(contents) == 1:
|
||||||
filename = (write_to_file(chunk_id, contents[0]),)
|
filename = write_to_file(chunk_id, contents[0]).absolute().as_posix()
|
||||||
model = settings.MIXED_EMBEDDING_MODEL
|
model = settings.MIXED_EMBEDDING_MODEL
|
||||||
else:
|
else:
|
||||||
for i, item in enumerate(contents):
|
for i, item in enumerate(contents):
|
||||||
write_to_file(f"{chunk_id}_{i}", item)
|
write_to_file(f"{chunk_id}_{i}", item)
|
||||||
model = settings.MIXED_EMBEDDING_MODEL
|
model = settings.MIXED_EMBEDDING_MODEL
|
||||||
filename = (settings.FILE_STORAGE_DIR / f"{chunk_id}_*",)
|
filename = (settings.CHUNK_STORAGE_DIR / f"{chunk_id}_*").absolute().as_posix()
|
||||||
|
|
||||||
return Chunk(
|
return Chunk(
|
||||||
id=chunk_id,
|
id=chunk_id,
|
||||||
|
@ -4,9 +4,11 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
def boolean_env(key: str, default: bool = False) -> bool:
|
def boolean_env(key: str, default: bool = False) -> bool:
|
||||||
return os.getenv(key, "0").lower() in ("1", "true", "yes")
|
return os.getenv(key, "0").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
|
||||||
# Database settings
|
# Database settings
|
||||||
DB_USER = os.getenv("DB_USER", "kb")
|
DB_USER = os.getenv("DB_USER", "kb")
|
||||||
if password_file := os.getenv("POSTGRES_PASSWORD_FILE"):
|
if password_file := os.getenv("POSTGRES_PASSWORD_FILE"):
|
||||||
@ -18,16 +20,26 @@ DB_HOST = os.getenv("DB_HOST", "postgres")
|
|||||||
DB_PORT = os.getenv("DB_PORT", "5432")
|
DB_PORT = os.getenv("DB_PORT", "5432")
|
||||||
DB_NAME = os.getenv("DB_NAME", "kb")
|
DB_NAME = os.getenv("DB_NAME", "kb")
|
||||||
|
|
||||||
def make_db_url(user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=DB_PORT, db=DB_NAME):
|
|
||||||
|
def make_db_url(
|
||||||
|
user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=DB_PORT, db=DB_NAME
|
||||||
|
):
|
||||||
return f"postgresql://{user}:{password}@{host}:{port}/{db}"
|
return f"postgresql://{user}:{password}@{host}:{port}/{db}"
|
||||||
|
|
||||||
|
|
||||||
DB_URL = os.getenv("DATABASE_URL", make_db_url())
|
DB_URL = os.getenv("DATABASE_URL", make_db_url())
|
||||||
|
|
||||||
|
|
||||||
FILE_STORAGE_DIR = pathlib.Path(os.getenv("FILE_STORAGE_DIR", "/tmp/memory_files"))
|
FILE_STORAGE_DIR = pathlib.Path(os.getenv("FILE_STORAGE_DIR", "/tmp/memory_files"))
|
||||||
FILE_STORAGE_DIR.mkdir(parents=True, exist_ok=True)
|
FILE_STORAGE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
CHUNK_STORAGE_DIR = pathlib.Path(
|
||||||
|
os.getenv("CHUNK_STORAGE_DIR", FILE_STORAGE_DIR / "chunks")
|
||||||
|
)
|
||||||
|
CHUNK_STORAGE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
# Maximum attachment size to store directly in the database (10MB)
|
# Maximum attachment size to store directly in the database (10MB)
|
||||||
MAX_INLINE_ATTACHMENT_SIZE = int(os.getenv("MAX_INLINE_ATTACHMENT_SIZE", 1 * 1024 * 1024))
|
MAX_INLINE_ATTACHMENT_SIZE = int(
|
||||||
|
os.getenv("MAX_INLINE_ATTACHMENT_SIZE", 1 * 1024 * 1024)
|
||||||
|
)
|
||||||
|
|
||||||
# Qdrant settings
|
# Qdrant settings
|
||||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
|
QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
|
||||||
|
@ -199,8 +199,11 @@ def email_provider():
|
|||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def mock_file_storage(tmp_path: Path):
|
def mock_file_storage(tmp_path: Path):
|
||||||
|
chunk_storage_dir = tmp_path / "chunks"
|
||||||
|
chunk_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
with patch("memory.common.settings.FILE_STORAGE_DIR", tmp_path):
|
with patch("memory.common.settings.FILE_STORAGE_DIR", tmp_path):
|
||||||
yield
|
with patch("memory.common.settings.CHUNK_STORAGE_DIR", chunk_storage_dir):
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -1,18 +1,20 @@
|
|||||||
import uuid
|
|
||||||
import pytest
|
|
||||||
from unittest.mock import Mock, patch
|
|
||||||
from PIL import Image
|
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import uuid
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from memory.common import settings
|
from memory.common import settings
|
||||||
from memory.common.embedding import (
|
from memory.common.embedding import (
|
||||||
get_modality,
|
embed,
|
||||||
embed_text,
|
|
||||||
embed_file,
|
embed_file,
|
||||||
embed_mixed,
|
embed_mixed,
|
||||||
embed_page,
|
embed_page,
|
||||||
embed,
|
embed_text,
|
||||||
write_to_file,
|
get_modality,
|
||||||
make_chunk,
|
make_chunk,
|
||||||
|
write_to_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -116,7 +118,7 @@ def test_write_to_file_text(mock_file_storage):
|
|||||||
|
|
||||||
file_path = write_to_file(chunk_id, content)
|
file_path = write_to_file(chunk_id, content)
|
||||||
|
|
||||||
assert file_path == settings.FILE_STORAGE_DIR / f"{chunk_id}.txt"
|
assert file_path == settings.CHUNK_STORAGE_DIR / f"{chunk_id}.txt"
|
||||||
assert file_path.exists()
|
assert file_path.exists()
|
||||||
assert file_path.read_text() == content
|
assert file_path.read_text() == content
|
||||||
|
|
||||||
@ -128,7 +130,7 @@ def test_write_to_file_bytes(mock_file_storage):
|
|||||||
|
|
||||||
file_path = write_to_file(chunk_id, content)
|
file_path = write_to_file(chunk_id, content)
|
||||||
|
|
||||||
assert file_path == settings.FILE_STORAGE_DIR / f"{chunk_id}.bin"
|
assert file_path == settings.CHUNK_STORAGE_DIR / f"{chunk_id}.bin"
|
||||||
assert file_path.exists()
|
assert file_path.exists()
|
||||||
assert file_path.read_bytes() == content
|
assert file_path.read_bytes() == content
|
||||||
|
|
||||||
@ -140,7 +142,7 @@ def test_write_to_file_image(mock_file_storage):
|
|||||||
|
|
||||||
file_path = write_to_file(chunk_id, img)
|
file_path = write_to_file(chunk_id, img)
|
||||||
|
|
||||||
assert file_path == settings.FILE_STORAGE_DIR / f"{chunk_id}.png"
|
assert file_path == settings.CHUNK_STORAGE_DIR / f"{chunk_id}.png"
|
||||||
assert file_path.exists()
|
assert file_path.exists()
|
||||||
# Verify it's a valid image file by opening it
|
# Verify it's a valid image file by opening it
|
||||||
image = Image.open(file_path)
|
image = Image.open(file_path)
|
||||||
@ -192,8 +194,8 @@ def test_make_chunk_single_image(mock_file_storage, db_session):
|
|||||||
|
|
||||||
assert chunk.id == "00000000-0000-0000-0000-000000000002"
|
assert chunk.id == "00000000-0000-0000-0000-000000000002"
|
||||||
assert chunk.content is None
|
assert chunk.content is None
|
||||||
assert chunk.file_path == (
|
assert chunk.file_path == str(
|
||||||
settings.FILE_STORAGE_DIR / "00000000-0000-0000-0000-000000000002.png",
|
settings.CHUNK_STORAGE_DIR / "00000000-0000-0000-0000-000000000002.png",
|
||||||
)
|
)
|
||||||
assert chunk.embedding_model == settings.MIXED_EMBEDDING_MODEL
|
assert chunk.embedding_model == settings.MIXED_EMBEDDING_MODEL
|
||||||
assert chunk.vector == vector
|
assert chunk.vector == vector
|
||||||
@ -217,8 +219,8 @@ def test_make_chunk_mixed_content(mock_file_storage, db_session):
|
|||||||
|
|
||||||
assert chunk.id == "00000000-0000-0000-0000-000000000003"
|
assert chunk.id == "00000000-0000-0000-0000-000000000003"
|
||||||
assert chunk.content is None
|
assert chunk.content is None
|
||||||
assert chunk.file_path == (
|
assert chunk.file_path == str(
|
||||||
settings.FILE_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_*",
|
settings.CHUNK_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_*",
|
||||||
)
|
)
|
||||||
assert chunk.embedding_model == settings.MIXED_EMBEDDING_MODEL
|
assert chunk.embedding_model == settings.MIXED_EMBEDDING_MODEL
|
||||||
assert chunk.vector == vector
|
assert chunk.vector == vector
|
||||||
@ -226,8 +228,8 @@ def test_make_chunk_mixed_content(mock_file_storage, db_session):
|
|||||||
|
|
||||||
# Verify the files exist
|
# Verify the files exist
|
||||||
assert (
|
assert (
|
||||||
settings.FILE_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_0.txt"
|
settings.CHUNK_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_0.txt"
|
||||||
).exists()
|
).exists()
|
||||||
assert (
|
assert (
|
||||||
settings.FILE_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_1.png"
|
settings.CHUNK_STORAGE_DIR / "00000000-0000-0000-0000-000000000003_1.png"
|
||||||
).exists()
|
).exists()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user