mirror of
https://github.com/mruwnik/memory.git
synced 2025-06-28 15:14:45 +02:00
minor fixes
This commit is contained in:
parent
ac9bdb1dfc
commit
3d9f8ae55f
@ -269,7 +269,7 @@ class Comic(SourceItem):
|
|||||||
return {k: v for k, v in payload.items() if v is not None}
|
return {k: v for k, v in payload.items() if v is not None}
|
||||||
|
|
||||||
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
def _chunk_contents(self) -> Sequence[extract.DataChunk]:
|
||||||
image = Image.open(pathlib.Path(cast(str, self.filename)))
|
image = Image.open(settings.FILE_STORAGE_DIR / cast(str, self.filename))
|
||||||
description = f"{self.title} by {self.author}"
|
description = f"{self.title} by {self.author}"
|
||||||
return [extract.DataChunk(data=[image, description])]
|
return [extract.DataChunk(data=[image, description])]
|
||||||
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from memory.common import settings, chunker
|
from memory.common import settings, chunker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -12,11 +13,13 @@ The following text is already concise. Please identify 3-5 relevant tags that ca
|
|||||||
|
|
||||||
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
||||||
|
|
||||||
Return your response as JSON with this format:
|
Return your response as XML with this format:
|
||||||
{{
|
<summary>{summary}</summary>
|
||||||
"summary": "{summary}",
|
<tags>
|
||||||
"tags": ["tag1", "tag2", "tag3"]
|
<tag>tag1</tag>
|
||||||
}}
|
<tag>tag2</tag>
|
||||||
|
<tag>tag3</tag>
|
||||||
|
</tags>
|
||||||
|
|
||||||
Text:
|
Text:
|
||||||
{content}
|
{content}
|
||||||
@ -28,17 +31,28 @@ Also provide 3-5 relevant tags that capture the main topics or themes.
|
|||||||
|
|
||||||
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
Tags should be lowercase and use hyphens instead of spaces, e.g. "machine-learning" instead of "Machine Learning".
|
||||||
|
|
||||||
Return your response as JSON with this format:
|
Return your response as XML with this format:
|
||||||
{{
|
|
||||||
"summary": "your summary here",
|
<summary>your summary here</summary>
|
||||||
"tags": ["tag1", "tag2", "tag3"]
|
<tags>
|
||||||
}}
|
<tag>tag1</tag>
|
||||||
|
<tag>tag2</tag>
|
||||||
|
<tag>tag3</tag>
|
||||||
|
</tags>
|
||||||
|
|
||||||
Text to summarize:
|
Text to summarize:
|
||||||
{content}
|
{content}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_response(response: str) -> dict[str, Any]:
|
||||||
|
"""Parse the response from the summarizer."""
|
||||||
|
soup = BeautifulSoup(response, "xml")
|
||||||
|
summary = soup.find("summary").text
|
||||||
|
tags = [tag.text for tag in soup.find_all("tag")]
|
||||||
|
return {"summary": summary, "tags": tags}
|
||||||
|
|
||||||
|
|
||||||
def _call_openai(prompt: str) -> dict[str, Any]:
|
def _call_openai(prompt: str) -> dict[str, Any]:
|
||||||
"""Call OpenAI API for summarization."""
|
"""Call OpenAI API for summarization."""
|
||||||
import openai
|
import openai
|
||||||
@ -58,7 +72,7 @@ def _call_openai(prompt: str) -> dict[str, Any]:
|
|||||||
temperature=0.3,
|
temperature=0.3,
|
||||||
max_tokens=2048,
|
max_tokens=2048,
|
||||||
)
|
)
|
||||||
return json.loads(response.choices[0].message.content or "{}")
|
return parse_response(response.choices[0].message.content or "")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"OpenAI API error: {e}")
|
logger.error(f"OpenAI API error: {e}")
|
||||||
raise
|
raise
|
||||||
@ -73,13 +87,14 @@ def _call_anthropic(prompt: str) -> dict[str, Any]:
|
|||||||
response = client.messages.create(
|
response = client.messages.create(
|
||||||
model=settings.SUMMARIZER_MODEL.split("/")[1],
|
model=settings.SUMMARIZER_MODEL.split("/")[1],
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid JSON.",
|
system="You are a helpful assistant that creates concise summaries and identifies key topics. Always respond with valid XML.",
|
||||||
temperature=0.3,
|
temperature=0.3,
|
||||||
max_tokens=2048,
|
max_tokens=2048,
|
||||||
)
|
)
|
||||||
return json.loads(response.content[0].text)
|
return parse_response(response.content[0].text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Anthropic API error: {e}")
|
logger.error(f"Anthropic API error: {e}")
|
||||||
|
logger.error(response.content[0].text)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@ -294,4 +294,5 @@ feeds = [
|
|||||||
"https://www.theredhandfiles.com/",
|
"https://www.theredhandfiles.com/",
|
||||||
"https://karlin.blog/",
|
"https://karlin.blog/",
|
||||||
"https://slatestarcodex.com/",
|
"https://slatestarcodex.com/",
|
||||||
|
"https://www.astralcodexten.com/",
|
||||||
]
|
]
|
||||||
|
@ -75,6 +75,7 @@ def sync_comic(
|
|||||||
published_date: datetime | None = None,
|
published_date: datetime | None = None,
|
||||||
):
|
):
|
||||||
"""Synchronize a comic from a URL."""
|
"""Synchronize a comic from a URL."""
|
||||||
|
logger.info(f"syncing comic {url}")
|
||||||
with make_session() as session:
|
with make_session() as session:
|
||||||
existing_comic = check_content_exists(session, Comic, url=url)
|
existing_comic = check_content_exists(session, Comic, url=url)
|
||||||
if existing_comic:
|
if existing_comic:
|
||||||
@ -101,7 +102,7 @@ def sync_comic(
|
|||||||
url=url,
|
url=url,
|
||||||
published=published_date,
|
published=published_date,
|
||||||
author=author,
|
author=author,
|
||||||
filename=filename.resolve().as_posix(),
|
filename=filename.resolve().relative_to(settings.FILE_STORAGE_DIR).as_posix(),
|
||||||
mime_type=mime_type,
|
mime_type=mime_type,
|
||||||
size=len(response.content),
|
size=len(response.content),
|
||||||
sha256=create_content_hash(f"{image_url}{published_date}"),
|
sha256=create_content_hash(f"{image_url}{published_date}"),
|
||||||
|
@ -84,6 +84,7 @@ TASK_MAPPINGS = {
|
|||||||
"sync_smbc": SYNC_SMBC,
|
"sync_smbc": SYNC_SMBC,
|
||||||
"sync_xkcd": SYNC_XKCD,
|
"sync_xkcd": SYNC_XKCD,
|
||||||
"sync_comic": SYNC_COMIC,
|
"sync_comic": SYNC_COMIC,
|
||||||
|
"full_sync_comics": "memory.workers.tasks.comic.full_sync_comic",
|
||||||
},
|
},
|
||||||
"forums": {
|
"forums": {
|
||||||
"sync_lesswrong": SYNC_LESSWRONG,
|
"sync_lesswrong": SYNC_LESSWRONG,
|
||||||
@ -422,6 +423,13 @@ def comic_sync_comic(ctx, image_url, title, author, published_date):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@comic.command("full-sync-comics")
|
||||||
|
@click.pass_context
|
||||||
|
def comic_full_sync_comics(ctx):
|
||||||
|
"""Full sync comics."""
|
||||||
|
execute_task(ctx, "comic", "full_sync_comics")
|
||||||
|
|
||||||
|
|
||||||
@cli.group()
|
@cli.group()
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def forums(ctx):
|
def forums(ctx):
|
||||||
@ -442,7 +450,7 @@ def forums_sync_lesswrong(ctx, since_date, min_karma, limit, cooldown, max_items
|
|||||||
ctx,
|
ctx,
|
||||||
"forums",
|
"forums",
|
||||||
"sync_lesswrong",
|
"sync_lesswrong",
|
||||||
since_date=since_date,
|
since=since_date,
|
||||||
min_karma=min_karma,
|
min_karma=min_karma,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
cooldown=cooldown,
|
cooldown=cooldown,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user