run discord collector

This commit is contained in:
Daniel O'Connell 2025-10-12 23:43:44 +02:00
parent e086b4a3a6
commit 08d17c28dd
10 changed files with 47 additions and 11 deletions

View File

@ -183,6 +183,10 @@ services:
dockerfile: docker/ingest_hub/Dockerfile dockerfile: docker/ingest_hub/Dockerfile
environment: environment:
<<: *worker-env <<: *worker-env
DISCORD_API_PORT: 8000
DISCORD_BOT_TOKEN: ${DISCORD_BOT_TOKEN}
DISCORD_NOTIFICATIONS_ENABLED: true
DISCORD_COLLECTOR_ENABLED: true
volumes: volumes:
- ./memory_files:/app/memory_files:rw - ./memory_files:/app/memory_files:rw
tmpfs: tmpfs:

View File

@ -11,10 +11,10 @@ RUN apt-get update && apt-get install -y \
COPY requirements ./requirements/ COPY requirements ./requirements/
COPY setup.py ./ COPY setup.py ./
RUN mkdir src RUN mkdir src
RUN pip install -e ".[common]" RUN pip install -e ".[ingesters]"
COPY src/ ./src/ COPY src/ ./src/
RUN pip install -e ".[common]" RUN pip install -e ".[ingesters]"
# Create and copy entrypoint script # Create and copy entrypoint script
COPY docker/workers/entry.sh ./entry.sh COPY docker/workers/entry.sh ./entry.sh

View File

@ -14,3 +14,12 @@ stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0 stderr_logfile_maxbytes=0
autorestart=true autorestart=true
startsecs=10 startsecs=10
[program:discord-api]
command=uvicorn memory.discord.api:app --host 0.0.0.0 --port %(ENV_DISCORD_API_PORT)s
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autorestart=true
startsecs=10

View File

@ -4,5 +4,4 @@ python-jose==3.3.0
python-multipart==0.0.9 python-multipart==0.0.9
sqladmin==0.20.1 sqladmin==0.20.1
mcp==1.10.0 mcp==1.10.0
bm25s[full]==0.2.13 bm25s[full]==0.2.13
discord.py==2.3.2

View File

@ -0,0 +1,3 @@
discord.py==2.3.2
uvicorn==0.29.0
fastapi==0.112.2

View File

@ -17,6 +17,7 @@ common_requires = read_requirements("requirements-common.txt")
parsers_requires = read_requirements("requirements-parsers.txt") parsers_requires = read_requirements("requirements-parsers.txt")
api_requires = read_requirements("requirements-api.txt") api_requires = read_requirements("requirements-api.txt")
dev_requires = read_requirements("requirements-dev.txt") dev_requires = read_requirements("requirements-dev.txt")
ingesters_requires = read_requirements("requirements-ingesters.txt")
setup( setup(
name="memory", name="memory",
@ -28,6 +29,11 @@ setup(
"api": api_requires + common_requires + parsers_requires, "api": api_requires + common_requires + parsers_requires,
"common": common_requires + parsers_requires, "common": common_requires + parsers_requires,
"dev": dev_requires, "dev": dev_requires,
"all": api_requires + common_requires + dev_requires + parsers_requires, "ingesters": common_requires + parsers_requires + ingesters_requires,
"all": api_requires
+ common_requires
+ dev_requires
+ parsers_requires
+ ingesters_requires,
}, },
) )

View File

@ -177,6 +177,6 @@ DISCORD_NOTIFICATIONS_ENABLED = bool(
DISCORD_COLLECTOR_ENABLED = boolean_env("DISCORD_COLLECTOR_ENABLED", True) DISCORD_COLLECTOR_ENABLED = boolean_env("DISCORD_COLLECTOR_ENABLED", True)
DISCORD_COLLECT_DMS = boolean_env("DISCORD_COLLECT_DMS", True) DISCORD_COLLECT_DMS = boolean_env("DISCORD_COLLECT_DMS", True)
DISCORD_COLLECT_BOTS = boolean_env("DISCORD_COLLECT_BOTS", True) DISCORD_COLLECT_BOTS = boolean_env("DISCORD_COLLECT_BOTS", True)
DISCORD_COLLECTOR_PORT = int(os.getenv("DISCORD_COLLECTOR_PORT", 8001)) DISCORD_COLLECTOR_PORT = int(os.getenv("DISCORD_COLLECTOR_PORT", 8000))
DISCORD_COLLECTOR_SERVER_URL = os.getenv("DISCORD_COLLECTOR_SERVER_URL", "127.0.0.1") DISCORD_COLLECTOR_SERVER_URL = os.getenv("DISCORD_COLLECTOR_SERVER_URL", "127.0.0.1")
DISCORD_CONTEXT_WINDOW = int(os.getenv("DISCORD_CONTEXT_WINDOW", 10)) DISCORD_CONTEXT_WINDOW = int(os.getenv("DISCORD_CONTEXT_WINDOW", 10))

View File

@ -14,7 +14,7 @@ from pydantic import BaseModel
import uvicorn import uvicorn
from memory.common import settings from memory.common import settings
from memory.workers.discord.collector import MessageCollector from memory.discord.collector import MessageCollector
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -25,9 +25,12 @@ logger = logging.getLogger(__name__)
# Pure functions for Discord entity creation/updates # Pure functions for Discord entity creation/updates
def create_or_update_server( def create_or_update_server(
session: Session | scoped_session, guild: discord.Guild session: Session | scoped_session, guild: discord.Guild | None
) -> DiscordServer: ) -> DiscordServer | None:
"""Get or create DiscordServer record (pure DB operation)""" """Get or create DiscordServer record (pure DB operation)"""
if not guild:
return None
server = session.query(DiscordServer).get(guild.id) server = session.query(DiscordServer).get(guild.id)
if not server: if not server:
@ -53,7 +56,10 @@ def create_or_update_server(
def determine_channel_metadata(channel) -> tuple[str, int | None, str]: def determine_channel_metadata(channel) -> tuple[str, int | None, str]:
"""Pure function to determine channel type, server_id, and name""" """Pure function to determine channel type, server_id, and name"""
if isinstance(channel, discord.DMChannel): if isinstance(channel, discord.DMChannel):
return "dm", None, f"DM with {channel.recipient.name}" desc = (
f"DM with {channel.recipient.name}" if channel.recipient else "Unknown DM"
)
return ("dm", None, desc)
elif isinstance(channel, discord.GroupChannel): elif isinstance(channel, discord.GroupChannel):
return "group_dm", None, channel.name or "Group DM" return "group_dm", None, channel.name or "Group DM"
elif isinstance( elif isinstance(
@ -73,8 +79,11 @@ def determine_channel_metadata(channel) -> tuple[str, int | None, str]:
def create_or_update_channel( def create_or_update_channel(
session: Session | scoped_session, channel session: Session | scoped_session, channel
) -> DiscordChannel: ) -> DiscordChannel | None:
"""Get or create DiscordChannel record (pure DB operation)""" """Get or create DiscordChannel record (pure DB operation)"""
if not channel:
return None
discord_channel = session.query(DiscordChannel).get(channel.id) discord_channel = session.query(DiscordChannel).get(channel.id)
if not discord_channel: if not discord_channel:
@ -98,6 +107,9 @@ def create_or_update_user(
session: Session | scoped_session, user: discord.User | discord.Member session: Session | scoped_session, user: discord.User | discord.Member
) -> DiscordUser: ) -> DiscordUser:
"""Get or create DiscordUser record (pure DB operation)""" """Get or create DiscordUser record (pure DB operation)"""
if not user:
return None
discord_user = session.query(DiscordUser).get(user.id) discord_user = session.query(DiscordUser).get(user.id)
if not discord_user: if not discord_user:

View File

@ -115,6 +115,9 @@ def sync_article_feed(feed_id: int) -> dict:
try: try:
for feed_item in parser.parse_feed(): for feed_item in parser.parse_feed():
if not feed_item.url:
continue
articles_found += 1 articles_found += 1
existing = check_content_exists(session, BlogPost, url=feed_item.url) existing = check_content_exists(session, BlogPost, url=feed_item.url)