mirror of
https://github.com/mruwnik/memory.git
synced 2025-10-23 15:16:35 +02:00
command to add blog
This commit is contained in:
parent
b68e15d3ab
commit
a2d107fad7
@ -41,6 +41,7 @@ UPDATE_METADATA_FOR_ITEM = f"{MAINTENANCE_ROOT}.update_metadata_for_item"
|
|||||||
SYNC_WEBPAGE = f"{BLOGS_ROOT}.sync_webpage"
|
SYNC_WEBPAGE = f"{BLOGS_ROOT}.sync_webpage"
|
||||||
SYNC_ARTICLE_FEED = f"{BLOGS_ROOT}.sync_article_feed"
|
SYNC_ARTICLE_FEED = f"{BLOGS_ROOT}.sync_article_feed"
|
||||||
SYNC_ALL_ARTICLE_FEEDS = f"{BLOGS_ROOT}.sync_all_article_feeds"
|
SYNC_ALL_ARTICLE_FEEDS = f"{BLOGS_ROOT}.sync_all_article_feeds"
|
||||||
|
ADD_ARTICLE_FEED = f"{BLOGS_ROOT}.add_article_feed"
|
||||||
SYNC_WEBSITE_ARCHIVE = f"{BLOGS_ROOT}.sync_website_archive"
|
SYNC_WEBSITE_ARCHIVE = f"{BLOGS_ROOT}.sync_website_archive"
|
||||||
|
|
||||||
|
|
||||||
|
@ -264,35 +264,35 @@ def get_archive_fetcher(url: str) -> ArchiveFetcher | None:
|
|||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
# "https://archive.ph/o/IQUoT/https://www.bloomberg.com/opinion/authors/ARbTQlRLRjE/matthew-s-levine",
|
"https://archive.ph/o/IQUoT/https://www.bloomberg.com/opinion/authors/ARbTQlRLRjE/matthew-s-levine",
|
||||||
# "https://www.rifters.com/crawl/",
|
"https://www.rifters.com/crawl/",
|
||||||
# "https://rachelbythebay.com/w/",
|
"https://rachelbythebay.com/w/",
|
||||||
# "https://danluu.com/",
|
"https://danluu.com/",
|
||||||
# "https://guzey.com",
|
"https://guzey.com",
|
||||||
# "https://aphyr.com/",
|
"https://aphyr.com/",
|
||||||
# "https://www.applieddivinitystudies.com/",
|
"https://www.applieddivinitystudies.com/",
|
||||||
# "https://www.imightbewrong.org/",
|
"https://www.imightbewrong.org/",
|
||||||
# "https://www.kvetch.au/",
|
"https://www.kvetch.au/",
|
||||||
# "https://www.overcomingbias.com/",
|
"https://www.overcomingbias.com/",
|
||||||
# "https://samkriss.substack.com/",
|
"https://samkriss.substack.com/",
|
||||||
# "https://www.richardhanania.com/",
|
"https://www.richardhanania.com/",
|
||||||
# "https://skunkledger.substack.com/",
|
"https://skunkledger.substack.com/",
|
||||||
# "https://taipology.substack.com/",
|
"https://taipology.substack.com/",
|
||||||
# "https://putanumonit.com/",
|
"https://putanumonit.com/",
|
||||||
# "https://www.flyingmachinestudios.com/",
|
"https://www.flyingmachinestudios.com/",
|
||||||
# "https://www.theintrinsicperspective.com/",
|
"https://www.theintrinsicperspective.com/",
|
||||||
# "https://www.strangeloopcanon.com/",
|
"https://www.strangeloopcanon.com/",
|
||||||
# "https://slimemoldtimemold.com/",
|
"https://slimemoldtimemold.com/",
|
||||||
# "https://zeroinputagriculture.substack.com/",
|
"https://zeroinputagriculture.substack.com/",
|
||||||
# "https://nayafia.substack.com",
|
"https://nayafia.substack.com",
|
||||||
# "https://www.paulgraham.com/articles.html",
|
"https://www.paulgraham.com/articles.html",
|
||||||
# "https://mcfunley.com/writing",
|
"https://mcfunley.com/writing",
|
||||||
# "https://www.bitsaboutmoney.com/",
|
"https://www.bitsaboutmoney.com/",
|
||||||
# "https://akarlin.com",
|
"https://akarlin.com",
|
||||||
# "https://www.exurbe.com/",
|
"https://www.exurbe.com/",
|
||||||
# "https://acoup.blog/",
|
"https://acoup.blog/",
|
||||||
# "https://www.theredhandfiles.com/",
|
"https://www.theredhandfiles.com/",
|
||||||
# "https://karlin.blog/",
|
"https://karlin.blog/",
|
||||||
"https://slatestarcodex.com/",
|
"https://slatestarcodex.com/",
|
||||||
"https://www.astralcodexten.com/",
|
"https://www.astralcodexten.com/",
|
||||||
"https://nayafia.substack.com",
|
"https://nayafia.substack.com",
|
||||||
|
@ -12,6 +12,7 @@ from memory.common.celery_app import (
|
|||||||
SYNC_WEBPAGE,
|
SYNC_WEBPAGE,
|
||||||
SYNC_ARTICLE_FEED,
|
SYNC_ARTICLE_FEED,
|
||||||
SYNC_ALL_ARTICLE_FEEDS,
|
SYNC_ALL_ARTICLE_FEEDS,
|
||||||
|
ADD_ARTICLE_FEED,
|
||||||
SYNC_WEBSITE_ARCHIVE,
|
SYNC_WEBSITE_ARCHIVE,
|
||||||
)
|
)
|
||||||
from memory.workers.tasks.content_processing import (
|
from memory.workers.tasks.content_processing import (
|
||||||
@ -169,10 +170,52 @@ def sync_all_article_feeds() -> list[dict]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
@app.task(name=ADD_ARTICLE_FEED)
|
||||||
|
def add_article_feed(
|
||||||
|
url: str,
|
||||||
|
title: str | None = None,
|
||||||
|
description: str | None = None,
|
||||||
|
tags: Iterable[str] = [],
|
||||||
|
active: bool = True,
|
||||||
|
check_interval: int = 60 * 24, # 24 hours
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Add a new ArticleFeed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL of the feed
|
||||||
|
title: Title of the feed (optional)
|
||||||
|
description: Description of the feed (optional)
|
||||||
|
tags: Tags to apply to the feed
|
||||||
|
active: Whether the feed is active
|
||||||
|
check_interval: Interval in minutes to check the feed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Summary of the added feed
|
||||||
|
"""
|
||||||
|
with make_session() as session:
|
||||||
|
feed = session.query(ArticleFeed).filter(ArticleFeed.url == url).first()
|
||||||
|
if feed:
|
||||||
|
logger.info(f"Feed already exists: {url}")
|
||||||
|
return {"status": "error", "error": "Feed already exists"}
|
||||||
|
|
||||||
|
feed = ArticleFeed(
|
||||||
|
url=url,
|
||||||
|
title=title or url,
|
||||||
|
description=description,
|
||||||
|
active=active,
|
||||||
|
check_interval=check_interval,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
session.add(feed)
|
||||||
|
session.commit()
|
||||||
|
return {"status": "success", "feed_id": feed.id}
|
||||||
|
|
||||||
|
|
||||||
@app.task(name=SYNC_WEBSITE_ARCHIVE)
|
@app.task(name=SYNC_WEBSITE_ARCHIVE)
|
||||||
@safe_task_execution
|
@safe_task_execution
|
||||||
def sync_website_archive(
|
def sync_website_archive(
|
||||||
url: str, tags: Iterable[str] = [], max_pages: int = 100
|
url: str, tags: Iterable[str] = [], max_pages: int = 100, add_feed: bool = True
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Synchronize all articles from a website's archive.
|
Synchronize all articles from a website's archive.
|
||||||
@ -187,6 +230,16 @@ def sync_website_archive(
|
|||||||
"""
|
"""
|
||||||
logger.info(f"Starting archive sync for: {url}")
|
logger.info(f"Starting archive sync for: {url}")
|
||||||
|
|
||||||
|
if add_feed:
|
||||||
|
with make_session() as session:
|
||||||
|
feed = session.query(ArticleFeed).filter(ArticleFeed.url == url).first()
|
||||||
|
if not feed:
|
||||||
|
feed = ArticleFeed(
|
||||||
|
url=url,
|
||||||
|
title=url,
|
||||||
|
active=True,
|
||||||
|
)
|
||||||
|
|
||||||
# Get archive fetcher for the website
|
# Get archive fetcher for the website
|
||||||
fetcher = get_archive_fetcher(url)
|
fetcher = get_archive_fetcher(url)
|
||||||
if not fetcher:
|
if not fetcher:
|
||||||
@ -200,10 +253,10 @@ def sync_website_archive(
|
|||||||
new_articles = 0
|
new_articles = 0
|
||||||
task_ids = []
|
task_ids = []
|
||||||
|
|
||||||
|
with make_session() as session:
|
||||||
for feed_item in fetcher.fetch_all_items():
|
for feed_item in fetcher.fetch_all_items():
|
||||||
articles_found += 1
|
articles_found += 1
|
||||||
|
|
||||||
with make_session() as session:
|
|
||||||
existing = check_content_exists(session, BlogPost, url=feed_item.url)
|
existing = check_content_exists(session, BlogPost, url=feed_item.url)
|
||||||
if existing:
|
if existing:
|
||||||
continue
|
continue
|
||||||
|
@ -27,6 +27,7 @@ from memory.common import settings
|
|||||||
from memory.common.celery_app import (
|
from memory.common.celery_app import (
|
||||||
SYNC_ALL_ARTICLE_FEEDS,
|
SYNC_ALL_ARTICLE_FEEDS,
|
||||||
SYNC_ARTICLE_FEED,
|
SYNC_ARTICLE_FEED,
|
||||||
|
ADD_ARTICLE_FEED,
|
||||||
SYNC_WEBPAGE,
|
SYNC_WEBPAGE,
|
||||||
SYNC_WEBSITE_ARCHIVE,
|
SYNC_WEBSITE_ARCHIVE,
|
||||||
SYNC_ALL_COMICS,
|
SYNC_ALL_COMICS,
|
||||||
@ -49,6 +50,7 @@ from memory.common.celery_app import (
|
|||||||
UPDATE_METADATA_FOR_ITEM,
|
UPDATE_METADATA_FOR_ITEM,
|
||||||
UPDATE_METADATA_FOR_SOURCE_ITEMS,
|
UPDATE_METADATA_FOR_SOURCE_ITEMS,
|
||||||
SETUP_GIT_NOTES,
|
SETUP_GIT_NOTES,
|
||||||
|
TRACK_GIT_CHANGES,
|
||||||
app,
|
app,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -78,6 +80,7 @@ TASK_MAPPINGS = {
|
|||||||
"sync_article_feed": SYNC_ARTICLE_FEED,
|
"sync_article_feed": SYNC_ARTICLE_FEED,
|
||||||
"sync_all_article_feeds": SYNC_ALL_ARTICLE_FEEDS,
|
"sync_all_article_feeds": SYNC_ALL_ARTICLE_FEEDS,
|
||||||
"sync_website_archive": SYNC_WEBSITE_ARCHIVE,
|
"sync_website_archive": SYNC_WEBSITE_ARCHIVE,
|
||||||
|
"add_article_feed": ADD_ARTICLE_FEED,
|
||||||
},
|
},
|
||||||
"comic": {
|
"comic": {
|
||||||
"sync_all_comics": SYNC_ALL_COMICS,
|
"sync_all_comics": SYNC_ALL_COMICS,
|
||||||
@ -92,6 +95,7 @@ TASK_MAPPINGS = {
|
|||||||
},
|
},
|
||||||
"notes": {
|
"notes": {
|
||||||
"setup_git_notes": SETUP_GIT_NOTES,
|
"setup_git_notes": SETUP_GIT_NOTES,
|
||||||
|
"track_git_changes": TRACK_GIT_CHANGES,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
QUEUE_MAPPINGS = {
|
QUEUE_MAPPINGS = {
|
||||||
@ -249,6 +253,13 @@ def notes_setup_git_notes(ctx, origin, email, name):
|
|||||||
execute_task(ctx, "notes", "setup_git_notes", origin=origin, email=email, name=name)
|
execute_task(ctx, "notes", "setup_git_notes", origin=origin, email=email, name=name)
|
||||||
|
|
||||||
|
|
||||||
|
@notes.command("track-git-changes")
|
||||||
|
@click.pass_context
|
||||||
|
def notes_track_git_changes(ctx):
|
||||||
|
"""Track git changes."""
|
||||||
|
execute_task(ctx, "notes", "track_git_changes")
|
||||||
|
|
||||||
|
|
||||||
@cli.group()
|
@cli.group()
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def maintenance(ctx):
|
def maintenance(ctx):
|
||||||
@ -376,6 +387,34 @@ def blogs_sync_website_archive(ctx, url):
|
|||||||
execute_task(ctx, "blogs", "sync_website_archive", url=url)
|
execute_task(ctx, "blogs", "sync_website_archive", url=url)
|
||||||
|
|
||||||
|
|
||||||
|
@blogs.command("add-article-feed")
|
||||||
|
@click.option("--url", required=True, help="URL of the feed")
|
||||||
|
@click.option("--title", help="Title of the feed")
|
||||||
|
@click.option("--description", help="Description of the feed")
|
||||||
|
@click.option("--tags", help="Comma-separated tags to apply to the feed", default="")
|
||||||
|
@click.option("--active", is_flag=True, help="Whether the feed is active")
|
||||||
|
@click.option(
|
||||||
|
"--check-interval",
|
||||||
|
type=int,
|
||||||
|
help="Interval in minutes to check the feed",
|
||||||
|
default=60 * 24, # 24 hours
|
||||||
|
)
|
||||||
|
@click.pass_context
|
||||||
|
def blogs_add_article_feed(ctx, url, title, description, tags, active, check_interval):
|
||||||
|
"""Add a new article feed."""
|
||||||
|
execute_task(
|
||||||
|
ctx,
|
||||||
|
"blogs",
|
||||||
|
"add_article_feed",
|
||||||
|
url=url,
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
tags=tags.split(","),
|
||||||
|
active=active,
|
||||||
|
check_interval=check_interval,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@cli.group()
|
@cli.group()
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def comic(ctx):
|
def comic(ctx):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user