minor fixes

This commit is contained in:
EC2 Default User 2025-06-04 00:13:35 +00:00
parent ac9bdb1dfc
commit 3d9f8ae55f
5 changed files with 42 additions and 17 deletions

View File

@ -269,7 +269,7 @@ class Comic(SourceItem):
return {k: v for k, v in payload.items() if v is not None}
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
image = Image.open(pathlib.Path(cast(str, self.filename)))
image = Image.open(settings.FILE_STORAGE_DIR / cast(str, self.filename))
description = f"{self.title} by {self.author}"
return [extract.DataChunk(data=[image, description])]

View File

@ -1,8 +1,9 @@
import json
import logging
import traceback
from typing import Any
from bs4 import BeautifulSoup
from memory.common import settings, chunker
logger = logging.getLogger(__name__)
@ -12,11 +13,13 @@ The following text is already concise. Please identify 3-5 relevant tags that ca
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
Return your response as JSON with this format:
{{
"summary": "{summary}",
"tags": ["tag1", "tag2", "tag3"]
}}
Return your response as XML with this format:
<summary>{summary}</summary>
<tags>
<tag>tag1</tag>
<tag>tag2</tag>
<tag>tag3</tag>
</tags>
Text:
{content}
@ -28,17 +31,28 @@ Also provide 3-5 relevant tags that capture the main topics or themes.
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
Return your response as JSON with this format:
{{
"summary": "your summary here",
"tags": ["tag1", "tag2", "tag3"]
}}
Return your response as XML with this format:
<summary>your summary here</summary>
<tags>
<tag>tag1</tag>
<tag>tag2</tag>
<tag>tag3</tag>
</tags>
Text to summarize:
{content}
"""
def parse_response(response: str) -> dict[str, Any]:
"""Parse the response from the summarizer."""
soup = BeautifulSoup(response, "xml")
summary = soup.find("summary").text
tags = [tag.text for tag in soup.find_all("tag")]
return {"summary": summary, "tags": tags}
def _call_openai(prompt: str) -> dict[str, Any]:
"""Call OpenAI API for summarization."""
import openai
@ -58,7 +72,7 @@ def _call_openai(prompt: str) -> dict[str, Any]:
temperature=0.3,
max_tokens=2048,
)
return json.loads(response.choices[0].message.content or "{}")
return parse_response(response.choices[0].message.content or "")
except Exception as e:
logger.error(f"OpenAI API error: {e}")
raise
@ -73,13 +87,14 @@ def _call_anthropic(prompt: str) -> dict[str, Any]:
response = client.messages.create(
model=settings.SUMMARIZER_MODEL.split("/")[1],
messages=[{"role": "user", "content": prompt}],
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid JSON.",
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid XML.",
temperature=0.3,
max_tokens=2048,
)
return json.loads(response.content[0].text)
return parse_response(response.content[0].text)
except Exception as e:
logger.error(f"Anthropic API error: {e}")
logger.error(response.content[0].text)
raise

View File

@ -294,4 +294,5 @@ feeds = [
"https://www.theredhandfiles.com/",
"https://karlin.blog/",
"https://slatestarcodex.com/",
"https://www.astralcodexten.com/",
]

View File

@ -75,6 +75,7 @@ def sync_comic(
published_date: datetime | None = None,
):
"""Synchronize a comic from a URL."""
logger.info(f"syncing comic {url}")
with make_session() as session:
existing_comic = check_content_exists(session, Comic, url=url)
if existing_comic:
@ -101,7 +102,7 @@ def sync_comic(
url=url,
published=published_date,
author=author,
filename=filename.resolve().as_posix(),
filename=filename.resolve().relative_to(settings.FILE_STORAGE_DIR).as_posix(),
mime_type=mime_type,
size=len(response.content),
sha256=create_content_hash(f"{image_url}{published_date}"),

View File

@ -84,6 +84,7 @@ TASK_MAPPINGS = {
"sync_smbc": SYNC_SMBC,
"sync_xkcd": SYNC_XKCD,
"sync_comic": SYNC_COMIC,
"full_sync_comics": "memory.workers.tasks.comic.full_sync_comic",
},
"forums": {
"sync_lesswrong": SYNC_LESSWRONG,
@ -422,6 +423,13 @@ def comic_sync_comic(ctx, image_url, title, author, published_date):
)
@comic.command("full-sync-comics")
@click.pass_context
def comic_full_sync_comics(ctx):
"""Full sync comics."""
execute_task(ctx, "comic", "full_sync_comics")
@cli.group()
@click.pass_context
def forums(ctx):
@ -442,7 +450,7 @@ def forums_sync_lesswrong(ctx, since_date, min_karma, limit, cooldown, max_items
ctx,
"forums",
"sync_lesswrong",
since_date=since_date,
since=since_date,
min_karma=min_karma,
limit=limit,
cooldown=cooldown,