file list
All checks were successful
Build and Push Agent Docker Image / build (push) Successful in 1m29s
Build and Push Poller Docker Image / lint (push) Successful in 31s
Build and Push Poller Docker Image / build (push) Successful in 1m0s

This commit is contained in:
2025-12-16 04:21:28 +01:00
parent 00fc4e367f
commit 7578c9aab0
2 changed files with 20 additions and 1 deletions

View File

@@ -355,6 +355,23 @@ def fix_pages():
i -= 1
def upload_file_list():
"""Upload a list of all processed files to S3"""
BUCKET_PUBLIC = "cavepediav2-public"
rows = conn.execute("SELECT key FROM metadata WHERE split = true ORDER BY key")
files = [row["key"] for row in rows]
content = "\n".join(files)
s3.put_object(
Bucket=BUCKET_PUBLIC,
Key="files.txt",
Body=content.encode("utf-8"),
ContentType="text/plain",
)
logger.info(f"Uploaded file list with {len(files)} files to s3://{BUCKET_PUBLIC}/files.txt")
if __name__ == "__main__":
create_tables()
@@ -364,6 +381,7 @@ if __name__ == "__main__":
check_batches()
ocr_main()
embeddings_main()
upload_file_list()
logger.info("sleeping 5 minutes")
time.sleep(5 * 60)