diff --git a/poller/main.py b/poller/main.py index bbc48cb..4ff2b81 100644 --- a/poller/main.py +++ b/poller/main.py @@ -355,6 +355,23 @@ def fix_pages(): i -= 1 +def upload_file_list(): + """Upload a list of all processed files to S3""" + BUCKET_PUBLIC = "cavepediav2-public" + + rows = conn.execute("SELECT key FROM metadata WHERE split = true ORDER BY key") + files = [row["key"] for row in rows] + + content = "\n".join(files) + s3.put_object( + Bucket=BUCKET_PUBLIC, + Key="files.txt", + Body=content.encode("utf-8"), + ContentType="text/plain", + ) + logger.info(f"Uploaded file list with {len(files)} files to s3://{BUCKET_PUBLIC}/files.txt") + + if __name__ == "__main__": create_tables() @@ -364,6 +381,7 @@ if __name__ == "__main__": check_batches() ocr_main() embeddings_main() + upload_file_list() logger.info("sleeping 5 minutes") time.sleep(5 * 60) diff --git a/web/agent/src/agent.py b/web/agent/src/agent.py index fc7076b..072e06d 100644 --- a/web/agent/src/agent.py +++ b/web/agent/src/agent.py @@ -46,7 +46,8 @@ IMPORTANT RULES: 1. Always cite your sources at the end of each response when possible. 2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts. 3. Provide accurate, helpful, and safety-conscious information. -4. You specialize in creating ascii art diagrams or maps.""" +4. You specialize in creating ascii art diagrams or maps. +5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional.""" def create_agent(user_roles: list[str] | None = None):