mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-08 13:24:41 +02:00
120 lines
3.9 KiB
Python
120 lines
3.9 KiB
Python
import os
|
|
import pathlib
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def boolean_env(key: str, default: bool = False) -> bool:
|
|
return os.getenv(key, "0").lower() in ("1", "true", "yes")
|
|
|
|
|
|
# Database settings
|
|
DB_USER = os.getenv("DB_USER", "kb")
|
|
if password_file := os.getenv("POSTGRES_PASSWORD_FILE"):
|
|
DB_PASSWORD = pathlib.Path(password_file).read_text().strip()
|
|
else:
|
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "kb")
|
|
|
|
DB_HOST = os.getenv("DB_HOST", "postgres")
|
|
DB_PORT = os.getenv("DB_PORT", "5432")
|
|
DB_NAME = os.getenv("DB_NAME", "kb")
|
|
|
|
|
|
def make_db_url(
|
|
user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=DB_PORT, db=DB_NAME
|
|
):
|
|
return f"postgresql://{user}:{password}@{host}:{port}/{db}"
|
|
|
|
|
|
DB_URL = os.getenv("DATABASE_URL", make_db_url())
|
|
|
|
# Celery settings
|
|
RABBITMQ_USER = os.getenv("RABBITMQ_USER", "kb")
|
|
RABBITMQ_PASSWORD = os.getenv("RABBITMQ_PASSWORD", "kb")
|
|
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq")
|
|
|
|
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", f"db+{DB_URL}")
|
|
|
|
|
|
# File storage settings
|
|
FILE_STORAGE_DIR = pathlib.Path(os.getenv("FILE_STORAGE_DIR", "/tmp/memory_files"))
|
|
EBOOK_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("EBOOK_STORAGE_DIR", FILE_STORAGE_DIR / "ebooks")
|
|
)
|
|
EMAIL_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("EMAIL_STORAGE_DIR", FILE_STORAGE_DIR / "emails")
|
|
)
|
|
CHUNK_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("CHUNK_STORAGE_DIR", FILE_STORAGE_DIR / "chunks")
|
|
)
|
|
COMIC_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("COMIC_STORAGE_DIR", FILE_STORAGE_DIR / "comics")
|
|
)
|
|
PHOTO_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("PHOTO_STORAGE_DIR", FILE_STORAGE_DIR / "photos")
|
|
)
|
|
WEBPAGE_STORAGE_DIR = pathlib.Path(
|
|
os.getenv("WEBPAGE_STORAGE_DIR", FILE_STORAGE_DIR / "webpages")
|
|
)
|
|
|
|
storage_dirs = [
|
|
FILE_STORAGE_DIR,
|
|
EBOOK_STORAGE_DIR,
|
|
EMAIL_STORAGE_DIR,
|
|
CHUNK_STORAGE_DIR,
|
|
COMIC_STORAGE_DIR,
|
|
PHOTO_STORAGE_DIR,
|
|
WEBPAGE_STORAGE_DIR,
|
|
]
|
|
for dir in storage_dirs:
|
|
dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Maximum attachment size to store directly in the database (10MB)
|
|
MAX_INLINE_ATTACHMENT_SIZE = int(
|
|
os.getenv("MAX_INLINE_ATTACHMENT_SIZE", 1 * 1024 * 1024)
|
|
)
|
|
|
|
# Qdrant settings
|
|
QDRANT_HOST = os.getenv("QDRANT_HOST", "qdrant")
|
|
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
|
QDRANT_GRPC_PORT = int(os.getenv("QDRANT_GRPC_PORT", "6334"))
|
|
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", None)
|
|
QDRANT_PREFER_GRPC = boolean_env("QDRANT_PREFER_GRPC", False)
|
|
QDRANT_TIMEOUT = int(os.getenv("QDRANT_TIMEOUT", "60"))
|
|
|
|
|
|
# Worker settings
|
|
# Intervals are in seconds
|
|
EMAIL_SYNC_INTERVAL = int(os.getenv("EMAIL_SYNC_INTERVAL", 60 * 60))
|
|
COMIC_SYNC_INTERVAL = int(os.getenv("COMIC_SYNC_INTERVAL", 60 * 60))
|
|
ARTICLE_FEED_SYNC_INTERVAL = int(os.getenv("ARTICLE_FEED_SYNC_INTERVAL", 30 * 60))
|
|
CLEAN_COLLECTION_INTERVAL = int(os.getenv("CLEAN_COLLECTION_INTERVAL", 24 * 60 * 60))
|
|
CHUNK_REINGEST_INTERVAL = int(os.getenv("CHUNK_REINGEST_INTERVAL", 60 * 60))
|
|
|
|
CHUNK_REINGEST_SINCE_MINUTES = int(os.getenv("CHUNK_REINGEST_SINCE_MINUTES", 60 * 24))
|
|
|
|
# Embedding settings
|
|
TEXT_EMBEDDING_MODEL = os.getenv("TEXT_EMBEDDING_MODEL", "voyage-3-large")
|
|
MIXED_EMBEDDING_MODEL = os.getenv("MIXED_EMBEDDING_MODEL", "voyage-multimodal-3")
|
|
EMBEDDING_MAX_WORKERS = int(os.getenv("EMBEDDING_MAX_WORKERS", 50))
|
|
|
|
# VoyageAI max context window
|
|
EMBEDDING_MAX_TOKENS = int(os.getenv("EMBEDDING_MAX_TOKENS", 32000))
|
|
# Optimal chunk size for semantic search
|
|
DEFAULT_CHUNK_TOKENS = int(os.getenv("DEFAULT_CHUNK_TOKENS", 512))
|
|
OVERLAP_TOKENS = int(os.getenv("OVERLAP_TOKENS", 50))
|
|
|
|
|
|
# LLM settings
|
|
if openai_key_file := os.getenv("OPENAI_API_KEY_FILE"):
|
|
OPENAI_API_KEY = pathlib.Path(openai_key_file).read_text().strip()
|
|
else:
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
|
|
|
if anthropic_key_file := os.getenv("ANTHROPIC_API_KEY_FILE"):
|
|
ANTHROPIC_API_KEY = pathlib.Path(anthropic_key_file).read_text().strip()
|
|
else:
|
|
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
|
SUMMARIZER_MODEL = os.getenv("SUMMARIZER_MODEL", "anthropic/claude-3-haiku-20240307")
|