mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-28 15:14:45 +02:00
minor fixes
This commit is contained in:
parent
ac9bdb1dfc
commit
3d9f8ae55f
@ -269,7 +269,7 @@ class Comic(SourceItem):
|
||||
return {k: v for k, v in payload.items() if v is not None}
|
||||
|
||||
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
||||
image = Image.open(pathlib.Path(cast(str, self.filename)))
|
||||
image = Image.open(settings.FILE_STORAGE_DIR / cast(str, self.filename))
|
||||
description = f"{self.title} by {self.author}"
|
||||
return [extract.DataChunk(data=[image, description])]
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from typing import Any
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from memory.common import settings, chunker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -12,11 +13,13 @@ The following text is already concise. Please identify 3-5 relevant tags that ca
|
||||
|
||||
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
||||
|
||||
Return your response as JSON with this format:
|
||||
{{
|
||||
"summary": "{summary}",
|
||||
"tags": ["tag1", "tag2", "tag3"]
|
||||
}}
|
||||
Return your response as XML with this format:
|
||||
<summary>{summary}</summary>
|
||||
<tags>
|
||||
<tag>tag1</tag>
|
||||
<tag>tag2</tag>
|
||||
<tag>tag3</tag>
|
||||
</tags>
|
||||
|
||||
Text:
|
||||
{content}
|
||||
@ -28,17 +31,28 @@ Also provide 3-5 relevant tags that capture the main topics or themes.
|
||||
|
||||
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
||||
|
||||
Return your response as JSON with this format:
|
||||
{{
|
||||
"summary": "your summary here",
|
||||
"tags": ["tag1", "tag2", "tag3"]
|
||||
}}
|
||||
Return your response as XML with this format:
|
||||
|
||||
<summary>your summary here</summary>
|
||||
<tags>
|
||||
<tag>tag1</tag>
|
||||
<tag>tag2</tag>
|
||||
<tag>tag3</tag>
|
||||
</tags>
|
||||
|
||||
Text to summarize:
|
||||
{content}
|
||||
"""
|
||||
|
||||
|
||||
def parse_response(response: str) -> dict[str, Any]:
|
||||
"""Parse the response from the summarizer."""
|
||||
soup = BeautifulSoup(response, "xml")
|
||||
summary = soup.find("summary").text
|
||||
tags = [tag.text for tag in soup.find_all("tag")]
|
||||
return {"summary": summary, "tags": tags}
|
||||
|
||||
|
||||
def _call_openai(prompt: str) -> dict[str, Any]:
|
||||
"""Call OpenAI API for summarization."""
|
||||
import openai
|
||||
@ -58,7 +72,7 @@ def _call_openai(prompt: str) -> dict[str, Any]:
|
||||
temperature=0.3,
|
||||
max_tokens=2048,
|
||||
)
|
||||
return json.loads(response.choices[0].message.content or "{}")
|
||||
return parse_response(response.choices[0].message.content or "")
|
||||
except Exception as e:
|
||||
logger.error(f"OpenAI API error: {e}")
|
||||
raise
|
||||
@ -73,13 +87,14 @@ def _call_anthropic(prompt: str) -> dict[str, Any]:
|
||||
response = client.messages.create(
|
||||
model=settings.SUMMARIZER_MODEL.split("/")[1],
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid JSON.",
|
||||
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid XML.",
|
||||
temperature=0.3,
|
||||
max_tokens=2048,
|
||||
)
|
||||
return json.loads(response.content[0].text)
|
||||
return parse_response(response.content[0].text)
|
||||
except Exception as e:
|
||||
logger.error(f"Anthropic API error: {e}")
|
||||
logger.error(response.content[0].text)
|
||||
raise
|
||||
|
||||
|
||||
|
@ -294,4 +294,5 @@ feeds = [
|
||||
"https://www.theredhandfiles.com/",
|
||||
"https://karlin.blog/",
|
||||
"https://slatestarcodex.com/",
|
||||
"https://www.astralcodexten.com/",
|
||||
]
|
||||
|
@ -75,6 +75,7 @@ def sync_comic(
|
||||
published_date: datetime | None = None,
|
||||
):
|
||||
"""Synchronize a comic from a URL."""
|
||||
logger.info(f"syncing comic {url}")
|
||||
with make_session() as session:
|
||||
existing_comic = check_content_exists(session, Comic, url=url)
|
||||
if existing_comic:
|
||||
@ -101,7 +102,7 @@ def sync_comic(
|
||||
url=url,
|
||||
published=published_date,
|
||||
author=author,
|
||||
filename=filename.resolve().as_posix(),
|
||||
filename=filename.resolve().relative_to(settings.FILE_STORAGE_DIR).as_posix(),
|
||||
mime_type=mime_type,
|
||||
size=len(response.content),
|
||||
sha256=create_content_hash(f"{image_url}{published_date}"),
|
||||
|
@ -84,6 +84,7 @@ TASK_MAPPINGS = {
|
||||
"sync_smbc": SYNC_SMBC,
|
||||
"sync_xkcd": SYNC_XKCD,
|
||||
"sync_comic": SYNC_COMIC,
|
||||
"full_sync_comics": "memory.workers.tasks.comic.full_sync_comic",
|
||||
},
|
||||
"forums": {
|
||||
"sync_lesswrong": SYNC_LESSWRONG,
|
||||
@ -422,6 +423,13 @@ def comic_sync_comic(ctx, image_url, title, author, published_date):
|
||||
)
|
||||
|
||||
|
||||
@comic.command("full-sync-comics")
|
||||
@click.pass_context
|
||||
def comic_full_sync_comics(ctx):
|
||||
"""Full sync comics."""
|
||||
execute_task(ctx, "comic", "full_sync_comics")
|
||||
|
||||
|
||||
@cli.group()
|
||||
@click.pass_context
|
||||
def forums(ctx):
|
||||
@ -442,7 +450,7 @@ def forums_sync_lesswrong(ctx, since_date, min_karma, limit, cooldown, max_items
|
||||
ctx,
|
||||
"forums",
|
||||
"sync_lesswrong",
|
||||
since_date=since_date,
|
||||
since=since_date,
|
||||
min_karma=min_karma,
|
||||
limit=limit,
|
||||
cooldown=cooldown,
|
||||
|
Loading…
x
Reference in New Issue
Block a user