poller docker
Some checks failed
Build and Push Agent Docker Image / build (push) Failing after 1m5s
Build and Push Poller Docker Image / build (push) Failing after 43s
Build and Push Web Docker Image / build (push) Successful in 3m35s

This commit is contained in:
2025-12-12 17:52:44 +01:00
parent feabb681e8
commit 4ce3eb0abd
7 changed files with 124 additions and 13 deletions

View File

@@ -0,0 +1,30 @@
name: Build and Push Poller Docker Image
on:
push:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Login to Gitea Container Registry
uses: docker/login-action@v3
with:
registry: git.seaturtle.pw
username: ${{ gitea.actor }}
password: ${{ secrets.ACTIONS_PUSH_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
uses: docker/build-push-action@v6
with:
context: ./poller
push: true
tags: git.seaturtle.pw/cavepedia/cavepediav2-poller:latest

8
poller/.dockerignore Normal file
View File

@@ -0,0 +1,8 @@
.venv/
__pycache__/
*.pyc
*.pyo
.git
.gitignore
README.md
.python-version

View File

@@ -1 +0,0 @@
3.11

19
poller/Dockerfile Normal file
View File

@@ -0,0 +1,19 @@
# syntax=docker/dockerfile:1
FROM python:3.14-slim
WORKDIR /app
# Install uv for fast dependency management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Copy dependency files
COPY pyproject.toml uv.lock ./
# Install dependencies
RUN uv sync --frozen --no-dev --no-install-project
# Copy application code
COPY main.py ./
CMD ["uv", "run", "main.py"]

View File

@@ -12,3 +12,47 @@ Every 5 minutes, this polls for new documents as follows:
* A temporary public S3 file link is generated using a presigned s3 url. * A temporary public S3 file link is generated using a presigned s3 url.
5. Checks the `embeddings` table for any rows that have been OCR'd, but do not have embeddings generated, then generates embeddings with cohere. 5. Checks the `embeddings` table for any rows that have been OCR'd, but do not have embeddings generated, then generates embeddings with cohere.
* No batching is used with cohere. * No batching is used with cohere.
## Environment Variables
| Variable | Required | Default | Description |
|----------|----------|---------|-------------|
| `COHERE_API_KEY` | Yes | - | Cohere API key for embeddings |
| `S3_ACCESS_KEY` | Yes | - | S3/MinIO access key |
| `S3_SECRET_KEY` | Yes | - | S3/MinIO secret key |
| `DB_PASSWORD` | Yes | - | PostgreSQL password |
| `ANTHROPIC_API_KEY` | Yes | - | Claude API key for OCR |
| `DB_HOST` | No | localhost | PostgreSQL host |
| `DB_PORT` | No | 5432 | PostgreSQL port |
| `DB_NAME` | No | cavepediav2_db | PostgreSQL database name |
| `DB_USER` | No | cavepediav2_user | PostgreSQL username |
| `S3_ENDPOINT` | No | https://s3.bigcavemaps.com | S3 endpoint URL |
| `S3_REGION` | No | eu | S3 region |
## Development
```bash
# Create .env file with required variables
cp .env.example .env
# Install dependencies
uv sync
# Run
python main.py
```
## Deployment
The poller is automatically built and pushed to `git.seaturtle.pw/cavepedia/cavepediav2-poller:latest` on push to main.
```bash
docker run \
-e COHERE_API_KEY="xxx" \
-e S3_ACCESS_KEY="xxx" \
-e S3_SECRET_KEY="xxx" \
-e DB_PASSWORD="xxx" \
-e DB_HOST="postgres" \
-e ANTHROPIC_API_KEY="xxx" \
git.seaturtle.pw/cavepedia/cavepediav2-poller:latest
```

View File

@@ -27,26 +27,37 @@ logger.addHandler(logHandler)
##### #####
dotenv.load_dotenv('/home/pew/scripts-private/loser/cavepedia-v2/poller.env') # Load .env file if it exists (for local dev)
dotenv.load_dotenv()
COHERE_API_KEY = os.getenv('COHERE_API_KEY') # Required environment variables
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY') COHERE_API_KEY = os.environ['COHERE_API_KEY']
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY') S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
S3_ENDPOINT = os.environ.get('S3_ENDPOINT', 'https://s3.bigcavemaps.com')
S3_REGION = os.environ.get('S3_REGION', 'eu')
# Database config
DB_HOST = os.environ.get('DB_HOST', 'localhost')
DB_PORT = int(os.environ.get('DB_PORT', '5432'))
DB_NAME = os.environ.get('DB_NAME', 'cavepediav2_db')
DB_USER = os.environ.get('DB_USER', 'cavepediav2_user')
DB_PASSWORD = os.environ['DB_PASSWORD']
s3 = boto3.client( s3 = boto3.client(
's3', 's3',
aws_access_key_id=S3_ACCESS_KEY, aws_access_key_id=S3_ACCESS_KEY,
aws_secret_access_key=S3_SECRET_KEY, aws_secret_access_key=S3_SECRET_KEY,
endpoint_url='https://s3.bigcavemaps.com', endpoint_url=S3_ENDPOINT,
region_name='eu', region_name=S3_REGION,
) )
co = cohere.ClientV2(api_key=COHERE_API_KEY) co = cohere.ClientV2(api_key=COHERE_API_KEY)
conn = psycopg.connect( conn = psycopg.connect(
host='::1', host=DB_HOST,
port=9030, port=DB_PORT,
dbname='cavepediav2_db', dbname=DB_NAME,
user='cavepediav2_user', user=DB_USER,
password='cavepediav2_pw', password=DB_PASSWORD,
row_factory=dict_row, row_factory=dict_row,
) )

View File

@@ -3,7 +3,7 @@ name = "poller"
version = "1.0.0" version = "1.0.0"
description = "Cavepedia v2 Poller" description = "Cavepedia v2 Poller"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.14"
dependencies = [ dependencies = [
"anthropic>=0.52.0", "anthropic>=0.52.0",
"boto3>=1.42.4", "boto3>=1.42.4",