diff --git a/src/memory/common/db/models/source_item.py b/src/memory/common/db/models/source_item.py index 2062605..8ec4a43 100644 --- a/src/memory/common/db/models/source_item.py +++ b/src/memory/common/db/models/source_item.py @@ -81,7 +81,7 @@ def handle_duplicate_sha256(session, flush_context, instances): def clean_filename(filename: str) -> str: - return re.sub(r"[^a-zA-Z0-9_]", "_", filename).strip("_") + return re.sub(r"[^a-zA-Z0-9_]", "_", filename).strip("_")[:30] def image_filenames(chunk_id: str, images: list[Image.Image]) -> list[str]: diff --git a/src/memory/common/settings.py b/src/memory/common/settings.py index 871ad41..8a84a9d 100644 --- a/src/memory/common/settings.py +++ b/src/memory/common/settings.py @@ -99,7 +99,7 @@ QDRANT_TIMEOUT = int(os.getenv("QDRANT_TIMEOUT", "60")) # Worker settings # Intervals are in seconds EMAIL_SYNC_INTERVAL = int(os.getenv("EMAIL_SYNC_INTERVAL", 60 * 60)) -COMIC_SYNC_INTERVAL = int(os.getenv("COMIC_SYNC_INTERVAL", 60 * 60)) +COMIC_SYNC_INTERVAL = int(os.getenv("COMIC_SYNC_INTERVAL", 60 * 60 * 24)) ARTICLE_FEED_SYNC_INTERVAL = int(os.getenv("ARTICLE_FEED_SYNC_INTERVAL", 30 * 60)) CLEAN_COLLECTION_INTERVAL = int(os.getenv("CLEAN_COLLECTION_INTERVAL", 24 * 60 * 60)) CHUNK_REINGEST_INTERVAL = int(os.getenv("CHUNK_REINGEST_INTERVAL", 60 * 60)) diff --git a/src/memory/workers/ingest.py b/src/memory/workers/ingest.py index 06b092a..da59d7d 100644 --- a/src/memory/workers/ingest.py +++ b/src/memory/workers/ingest.py @@ -5,6 +5,8 @@ from memory.common.celery_app import ( app, CLEAN_ALL_COLLECTIONS, REINGEST_MISSING_CHUNKS, + SYNC_ALL_COMICS, + SYNC_ALL_ARTICLE_FEEDS, ) logger = logging.getLogger(__name__) @@ -24,11 +26,11 @@ app.conf.beat_schedule = { "schedule": settings.EMAIL_SYNC_INTERVAL, }, "sync-all-comics": { - "task": "memory.workers.tasks.comic.sync_all_comics", + "task": SYNC_ALL_COMICS, "schedule": settings.COMIC_SYNC_INTERVAL, }, "sync-all-article-feeds": { - "task": "memory.workers.tasks.blogs.sync_all_article_feeds", + "task": SYNC_ALL_ARTICLE_FEEDS, "schedule": settings.ARTICLE_FEED_SYNC_INTERVAL, }, } diff --git a/src/memory/workers/tasks/comic.py b/src/memory/workers/tasks/comic.py index de75ff3..4bebd17 100644 --- a/src/memory/workers/tasks/comic.py +++ b/src/memory/workers/tasks/comic.py @@ -91,7 +91,9 @@ def sync_comic( file_type = image_url.split(".")[-1] mime_type = f"image/{file_type}" filename = ( - settings.COMIC_STORAGE_DIR / clean_filename(author) / f"{title}.{file_type}" + settings.COMIC_STORAGE_DIR + / clean_filename(author) + / f"{clean_filename(title)}.{file_type}" ) filename.parent.mkdir(parents=True, exist_ok=True)