mirror of
https://github.com/mruwnik/memory.git
synced 2025-10-02 05:02:35 +02:00
add blogs
This commit is contained in:
parent
862251fedb
commit
b68e15d3ab
@ -106,6 +106,7 @@ ARTICLE_FEED_SYNC_INTERVAL = int(os.getenv("ARTICLE_FEED_SYNC_INTERVAL", 30 * 60
|
|||||||
CLEAN_COLLECTION_INTERVAL = int(os.getenv("CLEAN_COLLECTION_INTERVAL", 24 * 60 * 60))
|
CLEAN_COLLECTION_INTERVAL = int(os.getenv("CLEAN_COLLECTION_INTERVAL", 24 * 60 * 60))
|
||||||
CHUNK_REINGEST_INTERVAL = int(os.getenv("CHUNK_REINGEST_INTERVAL", 60 * 60))
|
CHUNK_REINGEST_INTERVAL = int(os.getenv("CHUNK_REINGEST_INTERVAL", 60 * 60))
|
||||||
NOTES_SYNC_INTERVAL = int(os.getenv("NOTES_SYNC_INTERVAL", 15 * 60))
|
NOTES_SYNC_INTERVAL = int(os.getenv("NOTES_SYNC_INTERVAL", 15 * 60))
|
||||||
|
LESSWRONG_SYNC_INTERVAL = int(os.getenv("LESSWRONG_SYNC_INTERVAL", 60 * 60 * 24))
|
||||||
|
|
||||||
CHUNK_REINGEST_SINCE_MINUTES = int(os.getenv("CHUNK_REINGEST_SINCE_MINUTES", 60 * 24))
|
CHUNK_REINGEST_SINCE_MINUTES = int(os.getenv("CHUNK_REINGEST_SINCE_MINUTES", 60 * 24))
|
||||||
|
|
||||||
|
@ -264,35 +264,38 @@ def get_archive_fetcher(url: str) -> ArchiveFetcher | None:
|
|||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
"https://archive.ph/o/IQUoT/https://www.bloomberg.com/opinion/authors/ARbTQlRLRjE/matthew-s-levine",
|
# "https://archive.ph/o/IQUoT/https://www.bloomberg.com/opinion/authors/ARbTQlRLRjE/matthew-s-levine",
|
||||||
"https://www.rifters.com/crawl/",
|
# "https://www.rifters.com/crawl/",
|
||||||
"https://rachelbythebay.com/w/",
|
# "https://rachelbythebay.com/w/",
|
||||||
"https://danluu.com/",
|
# "https://danluu.com/",
|
||||||
"https://guzey.com",
|
# "https://guzey.com",
|
||||||
"https://aphyr.com/",
|
# "https://aphyr.com/",
|
||||||
"https://www.applieddivinitystudies.com/",
|
# "https://www.applieddivinitystudies.com/",
|
||||||
"https://www.imightbewrong.org/",
|
# "https://www.imightbewrong.org/",
|
||||||
"https://www.kvetch.au/",
|
# "https://www.kvetch.au/",
|
||||||
"https://www.overcomingbias.com/",
|
# "https://www.overcomingbias.com/",
|
||||||
"https://samkriss.substack.com/",
|
# "https://samkriss.substack.com/",
|
||||||
"https://www.richardhanania.com/",
|
# "https://www.richardhanania.com/",
|
||||||
"https://skunkledger.substack.com/",
|
# "https://skunkledger.substack.com/",
|
||||||
"https://taipology.substack.com/",
|
# "https://taipology.substack.com/",
|
||||||
"https://putanumonit.com/",
|
# "https://putanumonit.com/",
|
||||||
"https://www.flyingmachinestudios.com/",
|
# "https://www.flyingmachinestudios.com/",
|
||||||
"https://www.theintrinsicperspective.com/",
|
# "https://www.theintrinsicperspective.com/",
|
||||||
"https://www.strangeloopcanon.com/",
|
# "https://www.strangeloopcanon.com/",
|
||||||
"https://slimemoldtimemold.com/",
|
# "https://slimemoldtimemold.com/",
|
||||||
"https://zeroinputagriculture.substack.com/",
|
# "https://zeroinputagriculture.substack.com/",
|
||||||
"https://nayafia.substack.com",
|
# "https://nayafia.substack.com",
|
||||||
"https://www.paulgraham.com/articles.html",
|
# "https://www.paulgraham.com/articles.html",
|
||||||
"https://mcfunley.com/writing",
|
# "https://mcfunley.com/writing",
|
||||||
"https://www.bitsaboutmoney.com/",
|
# "https://www.bitsaboutmoney.com/",
|
||||||
"https://akarlin.com",
|
# "https://akarlin.com",
|
||||||
"https://www.exurbe.com/",
|
# "https://www.exurbe.com/",
|
||||||
"https://acoup.blog/",
|
# "https://acoup.blog/",
|
||||||
"https://www.theredhandfiles.com/",
|
# "https://www.theredhandfiles.com/",
|
||||||
"https://karlin.blog/",
|
# "https://karlin.blog/",
|
||||||
"https://slatestarcodex.com/",
|
"https://slatestarcodex.com/",
|
||||||
"https://www.astralcodexten.com/",
|
"https://www.astralcodexten.com/",
|
||||||
|
"https://nayafia.substack.com",
|
||||||
|
"https://homosabiens.substack.com",
|
||||||
|
"https://usefulfictions.substack.com",
|
||||||
]
|
]
|
||||||
|
@ -609,6 +609,9 @@ PARSER_REGISTRY = {
|
|||||||
r"rachelbythebay\.com": RachelByTheBayParser,
|
r"rachelbythebay\.com": RachelByTheBayParser,
|
||||||
r"nadia\.xyz": NadiaXyzParser,
|
r"nadia\.xyz": NadiaXyzParser,
|
||||||
r"slatestarcodex\.com": SlateStarCodexParser,
|
r"slatestarcodex\.com": SlateStarCodexParser,
|
||||||
|
r"nayafia\.substack\.com": SubstackParser,
|
||||||
|
r"homosabiens\.substack\.com": SubstackParser,
|
||||||
|
r"usefulfictions\.substack\.com": SubstackParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -237,6 +237,7 @@ def fetch_lesswrong(
|
|||||||
|
|
||||||
def fetch_lesswrong_posts(
|
def fetch_lesswrong_posts(
|
||||||
since: datetime | None = None,
|
since: datetime | None = None,
|
||||||
|
until: datetime | None = None,
|
||||||
min_karma: int = 10,
|
min_karma: int = 10,
|
||||||
limit: int = 50,
|
limit: int = 50,
|
||||||
cooldown: float = 0.5,
|
cooldown: float = 0.5,
|
||||||
@ -280,6 +281,9 @@ def fetch_lesswrong_posts(
|
|||||||
break
|
break
|
||||||
|
|
||||||
for post in page_posts:
|
for post in page_posts:
|
||||||
|
published_at = post.get("published_at")
|
||||||
|
if published_at and until and published_at > until:
|
||||||
|
break
|
||||||
yield post
|
yield post
|
||||||
|
|
||||||
last_item = page_posts[-1]
|
last_item = page_posts[-1]
|
||||||
|
@ -8,6 +8,7 @@ from memory.common.celery_app import (
|
|||||||
SYNC_ALL_COMICS,
|
SYNC_ALL_COMICS,
|
||||||
SYNC_ALL_ARTICLE_FEEDS,
|
SYNC_ALL_ARTICLE_FEEDS,
|
||||||
TRACK_GIT_CHANGES,
|
TRACK_GIT_CHANGES,
|
||||||
|
SYNC_LESSWRONG,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -38,4 +39,8 @@ app.conf.beat_schedule = {
|
|||||||
"task": TRACK_GIT_CHANGES,
|
"task": TRACK_GIT_CHANGES,
|
||||||
"schedule": settings.NOTES_SYNC_INTERVAL,
|
"schedule": settings.NOTES_SYNC_INTERVAL,
|
||||||
},
|
},
|
||||||
|
"sync-lesswrong": {
|
||||||
|
"task": SYNC_LESSWRONG,
|
||||||
|
"schedule": settings.LESSWRONG_SYNC_INTERVAL,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,8 @@ def sync_lesswrong_post(
|
|||||||
@app.task(name=SYNC_LESSWRONG)
|
@app.task(name=SYNC_LESSWRONG)
|
||||||
@safe_task_execution
|
@safe_task_execution
|
||||||
def sync_lesswrong(
|
def sync_lesswrong(
|
||||||
since: str = (datetime.now() - timedelta(days=30)).isoformat(),
|
since: str | None = None,
|
||||||
|
until: str | None = None,
|
||||||
min_karma: int = 10,
|
min_karma: int = 10,
|
||||||
limit: int = 50,
|
limit: int = 50,
|
||||||
cooldown: float = 0.5,
|
cooldown: float = 0.5,
|
||||||
@ -57,9 +58,27 @@ def sync_lesswrong(
|
|||||||
af: bool = False,
|
af: bool = False,
|
||||||
tags: list[str] = [],
|
tags: list[str] = [],
|
||||||
):
|
):
|
||||||
|
if until:
|
||||||
|
end_date = datetime.fromisoformat(until)
|
||||||
|
else:
|
||||||
|
end_date = datetime.now() - timedelta(hours=8)
|
||||||
|
|
||||||
logger.info(f"Syncing LessWrong posts since {since}")
|
logger.info(f"Syncing LessWrong posts since {since}")
|
||||||
|
|
||||||
|
if since:
|
||||||
start_date = datetime.fromisoformat(since)
|
start_date = datetime.fromisoformat(since)
|
||||||
posts = fetch_lesswrong_posts(start_date, min_karma, limit, cooldown, max_items, af)
|
else:
|
||||||
|
start_date = end_date - timedelta(days=30)
|
||||||
|
|
||||||
|
posts = fetch_lesswrong_posts(
|
||||||
|
since=start_date,
|
||||||
|
until=end_date,
|
||||||
|
min_karma=min_karma,
|
||||||
|
limit=limit,
|
||||||
|
cooldown=cooldown,
|
||||||
|
max_items=max_items,
|
||||||
|
af=af,
|
||||||
|
)
|
||||||
|
|
||||||
posts_num, new_posts = 0, 0
|
posts_num, new_posts = 0, 0
|
||||||
with make_session() as session:
|
with make_session() as session:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timezone
|
||||||
from unittest.mock import Mock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from memory.common.db.models import ForumPost
|
from memory.common.db.models import ForumPost
|
||||||
@ -228,17 +228,19 @@ def test_sync_lesswrong_success(mock_fetch, mock_lesswrong_post, db_session):
|
|||||||
assert result["since"] == "2024-01-01T00:00:00"
|
assert result["since"] == "2024-01-01T00:00:00"
|
||||||
assert result["min_karma"] == 10
|
assert result["min_karma"] == 10
|
||||||
assert result["max_items"] == 100
|
assert result["max_items"] == 100
|
||||||
assert result["af"] == False
|
assert not result["af"]
|
||||||
|
|
||||||
# Verify fetch_lesswrong_posts was called with correct arguments
|
# Verify fetch_lesswrong_posts was called with correct arguments (kwargs)
|
||||||
mock_fetch.assert_called_once_with(
|
mock_fetch.assert_called_once()
|
||||||
datetime.fromisoformat("2024-01-01T00:00:00"),
|
kwargs = mock_fetch.call_args.kwargs
|
||||||
10, # min_karma
|
assert kwargs["since"] == datetime.fromisoformat("2024-01-01T00:00:00")
|
||||||
50, # limit
|
assert kwargs["min_karma"] == 10
|
||||||
0.1, # cooldown
|
assert kwargs["limit"] == 50
|
||||||
100, # max_items
|
assert kwargs["cooldown"] == 0.1
|
||||||
False, # af
|
assert kwargs["max_items"] == 100
|
||||||
)
|
assert kwargs["af"] is False
|
||||||
|
assert "until" in kwargs
|
||||||
|
assert isinstance(kwargs["until"], datetime)
|
||||||
|
|
||||||
# Verify sync_lesswrong_post was called for the new post
|
# Verify sync_lesswrong_post was called for the new post
|
||||||
mock_sync_post.delay.assert_called_once_with(mock_lesswrong_post, ["test"])
|
mock_sync_post.delay.assert_called_once_with(mock_lesswrong_post, ["test"])
|
||||||
@ -343,11 +345,14 @@ def test_sync_lesswrong_since_parameter(mock_fetch, db_session):
|
|||||||
forums.sync_lesswrong(since="2024-01-01T00:00:00")
|
forums.sync_lesswrong(since="2024-01-01T00:00:00")
|
||||||
expected_since = datetime.fromisoformat("2024-01-01T00:00:00")
|
expected_since = datetime.fromisoformat("2024-01-01T00:00:00")
|
||||||
|
|
||||||
# Verify fetch was called with correct since date
|
# Verify fetch was called with correct since date (kwargs)
|
||||||
call_args = mock_fetch.call_args[0]
|
kwargs = mock_fetch.call_args.kwargs
|
||||||
actual_since = call_args[0]
|
actual_since = kwargs["since"]
|
||||||
|
|
||||||
assert actual_since == expected_since
|
assert actual_since == expected_since
|
||||||
|
assert "until" in kwargs
|
||||||
|
assert isinstance(kwargs["until"], datetime)
|
||||||
|
assert kwargs["until"] >= actual_since
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -373,14 +378,14 @@ def test_sync_lesswrong_parameters(
|
|||||||
max_items=500,
|
max_items=500,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify fetch was called with correct parameters
|
# Verify fetch was called with correct parameters (kwargs)
|
||||||
call_args = mock_fetch.call_args[0]
|
kwargs = mock_fetch.call_args.kwargs
|
||||||
|
|
||||||
assert call_args[1] == min_karma # min_karma
|
assert kwargs["min_karma"] == min_karma
|
||||||
assert call_args[2] == limit # limit
|
assert kwargs["limit"] == limit
|
||||||
assert call_args[3] == cooldown # cooldown
|
assert kwargs["cooldown"] == cooldown
|
||||||
assert call_args[4] == 500 # max_items
|
assert kwargs["max_items"] == 500
|
||||||
assert call_args[5] == af_value # af
|
assert kwargs["af"] == af_value
|
||||||
|
|
||||||
assert result["min_karma"] == min_karma
|
assert result["min_karma"] == min_karma
|
||||||
assert result["af"] == af_value
|
assert result["af"] == af_value
|
||||||
|
Loading…
x
Reference in New Issue
Block a user