diff --git a/Dockerfile b/Dockerfile index 8e1ade8..9062041 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,4 +15,4 @@ COPY . . EXPOSE 8000 # Runs when the container is started -CMD honcho start +CMD gunicorn web.app:app --bind 0.0.0.0:8000 --workers 4 diff --git a/docker-compose.yml b/docker-compose.yml index 5f3a83a..1145ff0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,19 @@ services: - app: - image: richardr1126/cosmere-feed-bsky + firehose: + container_name: firehose + image: richardr1126/firehose + build: + context: ./firehose + env_file: + - .env + volumes: + - feeddata:/var/data + stdin_open: true + tty: true + + web: + container_name: web + image: richardr1126/cosmere-feed build: context: . env_file: diff --git a/firehose/Dockerfile b/firehose/Dockerfile new file mode 100644 index 0000000..198cc8f --- /dev/null +++ b/firehose/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.12.8-slim-bookworm + +# create a volume for the sqlite database, so that it persists between container restarts +# need persistent storage attached to server +VOLUME /var/data/ +WORKDIR /usr/src/app/ + +# Copy package files and install dependencies +COPY ../requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code +COPY . . + +# Runs when the container is started +CMD python start_stream.py diff --git a/firehose/data_filter.py b/firehose/data_filter.py index 4954a04..e81de55 100644 --- a/firehose/data_filter.py +++ b/firehose/data_filter.py @@ -3,7 +3,7 @@ from collections import defaultdict from atproto import models, Client, IdResolver from utils.logger import logger -from firehose.database import db, Post, init_client +from database import db, Post, init_client PHRASES = [ '17th shard', diff --git a/firehose/data_stream.py b/firehose/data_stream.py index b28982a..c395254 100644 --- a/firehose/data_stream.py +++ b/firehose/data_stream.py @@ -10,7 +10,7 @@ ) from atproto.exceptions import FirehoseError -from firehose.database import SubscriptionState, db +from database import SubscriptionState, db from utils.logger import logger # Define the types of records we're interested in and their corresponding namespace IDs @@ -123,7 +123,7 @@ def _run(name, operations_callback, stream_stop_event=None): SubscriptionState.create(service=name, cursor=0) # Initialize the firehose client w/o a cursor for now - client = FirehoseSubscribeReposClient(params) + client = FirehoseSubscribeReposClient() def on_message_handler(message: firehose_models.MessageFrame) -> None: """ @@ -156,10 +156,10 @@ def on_message_handler(message: firehose_models.MessageFrame) -> None: # Update the client's parameters with the new cursor client.update_params(models.ComAtprotoSyncSubscribeRepos.Params(cursor=commit.seq)) # Persist the new cursor in the database - try: - SubscriptionState.update(cursor=commit.seq).where(SubscriptionState.service == name).execute() - except Exception as e: - logger.error(f"Failed to update cursor in database: {e}") + # try: + # SubscriptionState.update(cursor=commit.seq).where(SubscriptionState.service == name).execute() + # except Exception as e: + # logger.error(f"Failed to update cursor in database: {e}") if not commit.blocks: # Skip if there are no blocks to process diff --git a/firehose/requirements.txt b/firehose/requirements.txt new file mode 100644 index 0000000..430bf42 --- /dev/null +++ b/firehose/requirements.txt @@ -0,0 +1,4 @@ +atproto +peewee +python-dotenv +apscheduler \ No newline at end of file diff --git a/start_stream.py b/firehose/start_stream.py similarity index 89% rename from start_stream.py rename to firehose/start_stream.py index 21314b4..f0628c8 100644 --- a/start_stream.py +++ b/firehose/start_stream.py @@ -4,8 +4,8 @@ from utils import config from utils.logger import logger -import firehose.data_stream as data_stream -from firehose.data_filter import operations_callback +import data_stream as data_stream +from data_filter import operations_callback def main(): stream_stop_event = threading.Event() diff --git a/firehose/utils/config.py b/firehose/utils/config.py new file mode 100644 index 0000000..bdf74dd --- /dev/null +++ b/firehose/utils/config.py @@ -0,0 +1,27 @@ +import os +from utils.logger import logger + +SERVICE_DID = os.environ.get('SERVICE_DID', None) +HOSTNAME = os.environ.get('HOSTNAME', None) +HANDLE = os.environ.get('HANDLE', None) +PASSWORD = os.environ.get('PASSWORD', None) + +if HOSTNAME is None: + raise RuntimeError('You should set "HOSTNAME" environment variable first.') + +if SERVICE_DID is None: + SERVICE_DID = f'did:web:{HOSTNAME}' + + +CHRONOLOGICAL_TRENDING_URI = os.environ.get('CHRONOLOGICAL_TRENDING_URI') +if CHRONOLOGICAL_TRENDING_URI is None: + raise RuntimeError('Publish your feed first (run publish_feed.py) to obtain Feed URI. ' + 'Set this URI to "CHRONOLOGICAL_TRENDING_URI" environment variable.') + +# logger.info(f'HANDLE: {HANDLE}') +# logger.info(f'PASSWORD: {PASSWORD}') +if HANDLE is None: + raise RuntimeError('You should set "HANDLE" environment variable first.') + +if PASSWORD is None: + raise RuntimeError('You should set "PASSWORD" environment variable first.') diff --git a/firehose/utils/logger.py b/firehose/utils/logger.py new file mode 100644 index 0000000..c146410 --- /dev/null +++ b/firehose/utils/logger.py @@ -0,0 +1,4 @@ +import logging + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO)