poller docker
This commit is contained in:
30
.gitea/workflows/build-push-poller.yaml
Normal file
30
.gitea/workflows/build-push-poller.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
name: Build and Push Poller Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Login to Gitea Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: git.seaturtle.pw
|
||||||
|
username: ${{ gitea.actor }}
|
||||||
|
password: ${{ secrets.ACTIONS_PUSH_TOKEN }}
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: ./poller
|
||||||
|
push: true
|
||||||
|
tags: git.seaturtle.pw/cavepedia/cavepediav2-poller:latest
|
||||||
8
poller/.dockerignore
Normal file
8
poller/.dockerignore
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
.venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
README.md
|
||||||
|
.python-version
|
||||||
@@ -1 +0,0 @@
|
|||||||
3.11
|
|
||||||
19
poller/Dockerfile
Normal file
19
poller/Dockerfile
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# syntax=docker/dockerfile:1
|
||||||
|
|
||||||
|
FROM python:3.14-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install uv for fast dependency management
|
||||||
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||||
|
|
||||||
|
# Copy dependency files
|
||||||
|
COPY pyproject.toml uv.lock ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN uv sync --frozen --no-dev --no-install-project
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY main.py ./
|
||||||
|
|
||||||
|
CMD ["uv", "run", "main.py"]
|
||||||
@@ -12,3 +12,47 @@ Every 5 minutes, this polls for new documents as follows:
|
|||||||
* A temporary public S3 file link is generated using a presigned s3 url.
|
* A temporary public S3 file link is generated using a presigned s3 url.
|
||||||
5. Checks the `embeddings` table for any rows that have been OCR'd, but do not have embeddings generated, then generates embeddings with cohere.
|
5. Checks the `embeddings` table for any rows that have been OCR'd, but do not have embeddings generated, then generates embeddings with cohere.
|
||||||
* No batching is used with cohere.
|
* No batching is used with cohere.
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable | Required | Default | Description |
|
||||||
|
|----------|----------|---------|-------------|
|
||||||
|
| `COHERE_API_KEY` | Yes | - | Cohere API key for embeddings |
|
||||||
|
| `S3_ACCESS_KEY` | Yes | - | S3/MinIO access key |
|
||||||
|
| `S3_SECRET_KEY` | Yes | - | S3/MinIO secret key |
|
||||||
|
| `DB_PASSWORD` | Yes | - | PostgreSQL password |
|
||||||
|
| `ANTHROPIC_API_KEY` | Yes | - | Claude API key for OCR |
|
||||||
|
| `DB_HOST` | No | localhost | PostgreSQL host |
|
||||||
|
| `DB_PORT` | No | 5432 | PostgreSQL port |
|
||||||
|
| `DB_NAME` | No | cavepediav2_db | PostgreSQL database name |
|
||||||
|
| `DB_USER` | No | cavepediav2_user | PostgreSQL username |
|
||||||
|
| `S3_ENDPOINT` | No | https://s3.bigcavemaps.com | S3 endpoint URL |
|
||||||
|
| `S3_REGION` | No | eu | S3 region |
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create .env file with required variables
|
||||||
|
cp .env.example .env
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
# Run
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
The poller is automatically built and pushed to `git.seaturtle.pw/cavepedia/cavepediav2-poller:latest` on push to main.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run \
|
||||||
|
-e COHERE_API_KEY="xxx" \
|
||||||
|
-e S3_ACCESS_KEY="xxx" \
|
||||||
|
-e S3_SECRET_KEY="xxx" \
|
||||||
|
-e DB_PASSWORD="xxx" \
|
||||||
|
-e DB_HOST="postgres" \
|
||||||
|
-e ANTHROPIC_API_KEY="xxx" \
|
||||||
|
git.seaturtle.pw/cavepedia/cavepediav2-poller:latest
|
||||||
|
```
|
||||||
|
|||||||
@@ -27,26 +27,37 @@ logger.addHandler(logHandler)
|
|||||||
|
|
||||||
#####
|
#####
|
||||||
|
|
||||||
dotenv.load_dotenv('/home/pew/scripts-private/loser/cavepedia-v2/poller.env')
|
# Load .env file if it exists (for local dev)
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
|
# Required environment variables
|
||||||
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY')
|
COHERE_API_KEY = os.environ['COHERE_API_KEY']
|
||||||
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY')
|
S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
|
||||||
|
S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
|
||||||
|
S3_ENDPOINT = os.environ.get('S3_ENDPOINT', 'https://s3.bigcavemaps.com')
|
||||||
|
S3_REGION = os.environ.get('S3_REGION', 'eu')
|
||||||
|
|
||||||
|
# Database config
|
||||||
|
DB_HOST = os.environ.get('DB_HOST', 'localhost')
|
||||||
|
DB_PORT = int(os.environ.get('DB_PORT', '5432'))
|
||||||
|
DB_NAME = os.environ.get('DB_NAME', 'cavepediav2_db')
|
||||||
|
DB_USER = os.environ.get('DB_USER', 'cavepediav2_user')
|
||||||
|
DB_PASSWORD = os.environ['DB_PASSWORD']
|
||||||
|
|
||||||
s3 = boto3.client(
|
s3 = boto3.client(
|
||||||
's3',
|
's3',
|
||||||
aws_access_key_id=S3_ACCESS_KEY,
|
aws_access_key_id=S3_ACCESS_KEY,
|
||||||
aws_secret_access_key=S3_SECRET_KEY,
|
aws_secret_access_key=S3_SECRET_KEY,
|
||||||
endpoint_url='https://s3.bigcavemaps.com',
|
endpoint_url=S3_ENDPOINT,
|
||||||
region_name='eu',
|
region_name=S3_REGION,
|
||||||
)
|
)
|
||||||
co = cohere.ClientV2(api_key=COHERE_API_KEY)
|
co = cohere.ClientV2(api_key=COHERE_API_KEY)
|
||||||
conn = psycopg.connect(
|
conn = psycopg.connect(
|
||||||
host='::1',
|
host=DB_HOST,
|
||||||
port=9030,
|
port=DB_PORT,
|
||||||
dbname='cavepediav2_db',
|
dbname=DB_NAME,
|
||||||
user='cavepediav2_user',
|
user=DB_USER,
|
||||||
password='cavepediav2_pw',
|
password=DB_PASSWORD,
|
||||||
row_factory=dict_row,
|
row_factory=dict_row,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "poller"
|
|||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
description = "Cavepedia v2 Poller"
|
description = "Cavepedia v2 Poller"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anthropic>=0.52.0",
|
"anthropic>=0.52.0",
|
||||||
"boto3>=1.42.4",
|
"boto3>=1.42.4",
|
||||||
|
|||||||
Reference in New Issue
Block a user