minor fixes

This commit is contained in:
EC2 Default User 2025-06-04 00:13:35 +00:00
parent ac9bdb1dfc
commit 3d9f8ae55f
5 changed files with 42 additions and 17 deletions

View File

@ -269,7 +269,7 @@ class Comic(SourceItem):
return {k: v for k, v in payload.items() if v is not None} return {k: v for k, v in payload.items() if v is not None}
def _chunk_contents(self) -> Sequence[extract.DataChunk]: def _chunk_contents(self) -> Sequence[extract.DataChunk]:
image = Image.open(pathlib.Path(cast(str, self.filename))) image = Image.open(settings.FILE_STORAGE_DIR / cast(str, self.filename))
description = f"{self.title} by {self.author}" description = f"{self.title} by {self.author}"
return [extract.DataChunk(data=[image, description])] return [extract.DataChunk(data=[image, description])]

View File

@ -1,8 +1,9 @@
import json
import logging import logging
import traceback import traceback
from typing import Any from typing import Any
from bs4 import BeautifulSoup
from memory.common import settings, chunker from memory.common import settings, chunker
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -12,11 +13,13 @@ The following text is already concise. Please identify 3-5 relevant tags that ca
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning". Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
Return your response as JSON with this format: Return your response as XML with this format:
{{ <summary>{summary}</summary>
"summary": "{summary}", <tags>
"tags": ["tag1", "tag2", "tag3"] <tag>tag1</tag>
}} <tag>tag2</tag>
<tag>tag3</tag>
</tags>
Text: Text:
{content} {content}
@ -28,17 +31,28 @@ Also provide 3-5 relevant tags that capture the main topics or themes.
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning". Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
Return your response as JSON with this format: Return your response as XML with this format:
{{
"summary": "your summary here", <summary>your summary here</summary>
"tags": ["tag1", "tag2", "tag3"] <tags>
}} <tag>tag1</tag>
<tag>tag2</tag>
<tag>tag3</tag>
</tags>
Text to summarize: Text to summarize:
{content} {content}
""" """
def parse_response(response: str) -> dict[str, Any]:
"""Parse the response from the summarizer."""
soup = BeautifulSoup(response, "xml")
summary = soup.find("summary").text
tags = [tag.text for tag in soup.find_all("tag")]
return {"summary": summary, "tags": tags}
def _call_openai(prompt: str) -> dict[str, Any]: def _call_openai(prompt: str) -> dict[str, Any]:
"""Call OpenAI API for summarization.""" """Call OpenAI API for summarization."""
import openai import openai
@ -58,7 +72,7 @@ def _call_openai(prompt: str) -> dict[str, Any]:
temperature=0.3, temperature=0.3,
max_tokens=2048, max_tokens=2048,
) )
return json.loads(response.choices[0].message.content or "{}") return parse_response(response.choices[0].message.content or "")
except Exception as e: except Exception as e:
logger.error(f"OpenAI API error: {e}") logger.error(f"OpenAI API error: {e}")
raise raise
@ -73,13 +87,14 @@ def _call_anthropic(prompt: str) -> dict[str, Any]:
response = client.messages.create( response = client.messages.create(
model=settings.SUMMARIZER_MODEL.split("/")[1], model=settings.SUMMARIZER_MODEL.split("/")[1],
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid JSON.", system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid XML.",
temperature=0.3, temperature=0.3,
max_tokens=2048, max_tokens=2048,
) )
return json.loads(response.content[0].text) return parse_response(response.content[0].text)
except Exception as e: except Exception as e:
logger.error(f"Anthropic API error: {e}") logger.error(f"Anthropic API error: {e}")
logger.error(response.content[0].text)
raise raise

View File

@ -294,4 +294,5 @@ feeds = [
"https://www.theredhandfiles.com/", "https://www.theredhandfiles.com/",
"https://karlin.blog/", "https://karlin.blog/",
"https://slatestarcodex.com/", "https://slatestarcodex.com/",
"https://www.astralcodexten.com/",
] ]

View File

@ -75,6 +75,7 @@ def sync_comic(
published_date: datetime | None = None, published_date: datetime | None = None,
): ):
"""Synchronize a comic from a URL.""" """Synchronize a comic from a URL."""
logger.info(f"syncing comic {url}")
with make_session() as session: with make_session() as session:
existing_comic = check_content_exists(session, Comic, url=url) existing_comic = check_content_exists(session, Comic, url=url)
if existing_comic: if existing_comic:
@ -101,7 +102,7 @@ def sync_comic(
url=url, url=url,
published=published_date, published=published_date,
author=author, author=author,
filename=filename.resolve().as_posix(), filename=filename.resolve().relative_to(settings.FILE_STORAGE_DIR).as_posix(),
mime_type=mime_type, mime_type=mime_type,
size=len(response.content), size=len(response.content),
sha256=create_content_hash(f"{image_url}{published_date}"), sha256=create_content_hash(f"{image_url}{published_date}"),

View File

@ -84,6 +84,7 @@ TASK_MAPPINGS = {
"sync_smbc": SYNC_SMBC, "sync_smbc": SYNC_SMBC,
"sync_xkcd": SYNC_XKCD, "sync_xkcd": SYNC_XKCD,
"sync_comic": SYNC_COMIC, "sync_comic": SYNC_COMIC,
"full_sync_comics": "memory.workers.tasks.comic.full_sync_comic",
}, },
"forums": { "forums": {
"sync_lesswrong": SYNC_LESSWRONG, "sync_lesswrong": SYNC_LESSWRONG,
@ -422,6 +423,13 @@ def comic_sync_comic(ctx, image_url, title, author, published_date):
) )
@comic.command("full-sync-comics")
@click.pass_context
def comic_full_sync_comics(ctx):
"""Full sync comics."""
execute_task(ctx, "comic", "full_sync_comics")
@cli.group() @cli.group()
@click.pass_context @click.pass_context
def forums(ctx): def forums(ctx):
@ -442,7 +450,7 @@ def forums_sync_lesswrong(ctx, since_date, min_karma, limit, cooldown, max_items
ctx, ctx,
"forums", "forums",
"sync_lesswrong", "sync_lesswrong",
since_date=since_date, since=since_date,
min_karma=min_karma, min_karma=min_karma,
limit=limit, limit=limit,
cooldown=cooldown, cooldown=cooldown,