diff --git a/docker-compose.yaml b/docker-compose.yaml index 18af07e..dee38e3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -183,6 +183,10 @@ services: dockerfile: docker/ingest_hub/Dockerfile environment: <<: *worker-env + DISCORD_API_PORT: 8000 + DISCORD_BOT_TOKEN: ${DISCORD_BOT_TOKEN} + DISCORD_NOTIFICATIONS_ENABLED: true + DISCORD_COLLECTOR_ENABLED: true volumes: - ./memory_files:/app/memory_files:rw tmpfs: diff --git a/docker/ingest_hub/Dockerfile b/docker/ingest_hub/Dockerfile index 801c1e6..5cbb5d3 100644 --- a/docker/ingest_hub/Dockerfile +++ b/docker/ingest_hub/Dockerfile @@ -11,10 +11,10 @@ RUN apt-get update && apt-get install -y \ COPY requirements ./requirements/ COPY setup.py ./ RUN mkdir src -RUN pip install -e ".[common]" +RUN pip install -e ".[ingesters]" COPY src/ ./src/ -RUN pip install -e ".[common]" +RUN pip install -e ".[ingesters]" # Create and copy entrypoint script COPY docker/workers/entry.sh ./entry.sh diff --git a/docker/ingest_hub/supervisor.conf b/docker/ingest_hub/supervisor.conf index b6e9872..0f53377 100644 --- a/docker/ingest_hub/supervisor.conf +++ b/docker/ingest_hub/supervisor.conf @@ -14,3 +14,12 @@ stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 autorestart=true startsecs=10 + +[program:discord-api] +command=uvicorn memory.discord.api:app --host 0.0.0.0 --port %(ENV_DISCORD_API_PORT)s +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=true +startsecs=10 diff --git a/requirements/requirements-api.txt b/requirements/requirements-api.txt index e96bea8..9b01a9b 100644 --- a/requirements/requirements-api.txt +++ b/requirements/requirements-api.txt @@ -4,5 +4,4 @@ python-jose==3.3.0 python-multipart==0.0.9 sqladmin==0.20.1 mcp==1.10.0 -bm25s[full]==0.2.13 -discord.py==2.3.2 \ No newline at end of file +bm25s[full]==0.2.13 \ No newline at end of file diff --git a/requirements/requirements-ingesters.txt b/requirements/requirements-ingesters.txt new file mode 100644 index 0000000..a079c9b --- /dev/null +++ b/requirements/requirements-ingesters.txt @@ -0,0 +1,3 @@ +discord.py==2.3.2 +uvicorn==0.29.0 +fastapi==0.112.2 \ No newline at end of file diff --git a/setup.py b/setup.py index 31a47bb..7e60b99 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ common_requires = read_requirements("requirements-common.txt") parsers_requires = read_requirements("requirements-parsers.txt") api_requires = read_requirements("requirements-api.txt") dev_requires = read_requirements("requirements-dev.txt") +ingesters_requires = read_requirements("requirements-ingesters.txt") setup( name="memory", @@ -28,6 +29,11 @@ setup( "api": api_requires + common_requires + parsers_requires, "common": common_requires + parsers_requires, "dev": dev_requires, - "all": api_requires + common_requires + dev_requires + parsers_requires, + "ingesters": common_requires + parsers_requires + ingesters_requires, + "all": api_requires + + common_requires + + dev_requires + + parsers_requires + + ingesters_requires, }, ) diff --git a/src/memory/common/settings.py b/src/memory/common/settings.py index 86d2490..793e698 100644 --- a/src/memory/common/settings.py +++ b/src/memory/common/settings.py @@ -177,6 +177,6 @@ DISCORD_NOTIFICATIONS_ENABLED = bool( DISCORD_COLLECTOR_ENABLED = boolean_env("DISCORD_COLLECTOR_ENABLED", True) DISCORD_COLLECT_DMS = boolean_env("DISCORD_COLLECT_DMS", True) DISCORD_COLLECT_BOTS = boolean_env("DISCORD_COLLECT_BOTS", True) -DISCORD_COLLECTOR_PORT = int(os.getenv("DISCORD_COLLECTOR_PORT", 8001)) +DISCORD_COLLECTOR_PORT = int(os.getenv("DISCORD_COLLECTOR_PORT", 8000)) DISCORD_COLLECTOR_SERVER_URL = os.getenv("DISCORD_COLLECTOR_SERVER_URL", "127.0.0.1") DISCORD_CONTEXT_WINDOW = int(os.getenv("DISCORD_CONTEXT_WINDOW", 10)) diff --git a/src/memory/workers/discord/api.py b/src/memory/discord/api.py similarity index 98% rename from src/memory/workers/discord/api.py rename to src/memory/discord/api.py index 346ebb7..129e96e 100644 --- a/src/memory/workers/discord/api.py +++ b/src/memory/discord/api.py @@ -14,7 +14,7 @@ from pydantic import BaseModel import uvicorn from memory.common import settings -from memory.workers.discord.collector import MessageCollector +from memory.discord.collector import MessageCollector logger = logging.getLogger(__name__) diff --git a/src/memory/workers/discord/collector.py b/src/memory/discord/collector.py similarity index 97% rename from src/memory/workers/discord/collector.py rename to src/memory/discord/collector.py index bd92425..f42ff76 100644 --- a/src/memory/workers/discord/collector.py +++ b/src/memory/discord/collector.py @@ -25,9 +25,12 @@ logger = logging.getLogger(__name__) # Pure functions for Discord entity creation/updates def create_or_update_server( - session: Session | scoped_session, guild: discord.Guild -) -> DiscordServer: + session: Session | scoped_session, guild: discord.Guild | None +) -> DiscordServer | None: """Get or create DiscordServer record (pure DB operation)""" + if not guild: + return None + server = session.query(DiscordServer).get(guild.id) if not server: @@ -53,7 +56,10 @@ def create_or_update_server( def determine_channel_metadata(channel) -> tuple[str, int | None, str]: """Pure function to determine channel type, server_id, and name""" if isinstance(channel, discord.DMChannel): - return "dm", None, f"DM with {channel.recipient.name}" + desc = ( + f"DM with {channel.recipient.name}" if channel.recipient else "Unknown DM" + ) + return ("dm", None, desc) elif isinstance(channel, discord.GroupChannel): return "group_dm", None, channel.name or "Group DM" elif isinstance( @@ -73,8 +79,11 @@ def determine_channel_metadata(channel) -> tuple[str, int | None, str]: def create_or_update_channel( session: Session | scoped_session, channel -) -> DiscordChannel: +) -> DiscordChannel | None: """Get or create DiscordChannel record (pure DB operation)""" + if not channel: + return None + discord_channel = session.query(DiscordChannel).get(channel.id) if not discord_channel: @@ -98,6 +107,9 @@ def create_or_update_user( session: Session | scoped_session, user: discord.User | discord.Member ) -> DiscordUser: """Get or create DiscordUser record (pure DB operation)""" + if not user: + return None + discord_user = session.query(DiscordUser).get(user.id) if not discord_user: diff --git a/src/memory/workers/tasks/blogs.py b/src/memory/workers/tasks/blogs.py index b1d6337..ca0ca14 100644 --- a/src/memory/workers/tasks/blogs.py +++ b/src/memory/workers/tasks/blogs.py @@ -115,6 +115,9 @@ def sync_article_feed(feed_id: int) -> dict: try: for feed_item in parser.parse_feed(): + if not feed_item.url: + continue + articles_found += 1 existing = check_content_exists(session, BlogPost, url=feed_item.url)