From 7578c9aab0cc3ca0251ac02432932d285e87aa8b Mon Sep 17 00:00:00 2001 From: Paul Walko Date: Tue, 16 Dec 2025 04:21:28 +0100 Subject: [PATCH] file list --- poller/main.py | 18 ++++++++++++++++++ web/agent/src/agent.py | 3 ++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/poller/main.py b/poller/main.py index bbc48cb..4ff2b81 100644 --- a/poller/main.py +++ b/poller/main.py @@ -355,6 +355,23 @@ def fix_pages(): i -= 1 +def upload_file_list(): + """Upload a list of all processed files to S3""" + BUCKET_PUBLIC = "cavepediav2-public" + + rows = conn.execute("SELECT key FROM metadata WHERE split = true ORDER BY key") + files = [row["key"] for row in rows] + + content = "\n".join(files) + s3.put_object( + Bucket=BUCKET_PUBLIC, + Key="files.txt", + Body=content.encode("utf-8"), + ContentType="text/plain", + ) + logger.info(f"Uploaded file list with {len(files)} files to s3://{BUCKET_PUBLIC}/files.txt") + + if __name__ == "__main__": create_tables() @@ -364,6 +381,7 @@ if __name__ == "__main__": check_batches() ocr_main() embeddings_main() + upload_file_list() logger.info("sleeping 5 minutes") time.sleep(5 * 60) diff --git a/web/agent/src/agent.py b/web/agent/src/agent.py index fc7076b..072e06d 100644 --- a/web/agent/src/agent.py +++ b/web/agent/src/agent.py @@ -46,7 +46,8 @@ IMPORTANT RULES: 1. Always cite your sources at the end of each response when possible. 2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts. 3. Provide accurate, helpful, and safety-conscious information. -4. You specialize in creating ascii art diagrams or maps.""" +4. You specialize in creating ascii art diagrams or maps. +5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional.""" def create_agent(user_roles: list[str] | None = None):