file list
This commit is contained in:
@@ -355,6 +355,23 @@ def fix_pages():
|
||||
i -= 1
|
||||
|
||||
|
||||
def upload_file_list():
|
||||
"""Upload a list of all processed files to S3"""
|
||||
BUCKET_PUBLIC = "cavepediav2-public"
|
||||
|
||||
rows = conn.execute("SELECT key FROM metadata WHERE split = true ORDER BY key")
|
||||
files = [row["key"] for row in rows]
|
||||
|
||||
content = "\n".join(files)
|
||||
s3.put_object(
|
||||
Bucket=BUCKET_PUBLIC,
|
||||
Key="files.txt",
|
||||
Body=content.encode("utf-8"),
|
||||
ContentType="text/plain",
|
||||
)
|
||||
logger.info(f"Uploaded file list with {len(files)} files to s3://{BUCKET_PUBLIC}/files.txt")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_tables()
|
||||
|
||||
@@ -364,6 +381,7 @@ if __name__ == "__main__":
|
||||
check_batches()
|
||||
ocr_main()
|
||||
embeddings_main()
|
||||
upload_file_list()
|
||||
|
||||
logger.info("sleeping 5 minutes")
|
||||
time.sleep(5 * 60)
|
||||
|
||||
@@ -46,7 +46,8 @@ IMPORTANT RULES:
|
||||
1. Always cite your sources at the end of each response when possible.
|
||||
2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts.
|
||||
3. Provide accurate, helpful, and safety-conscious information.
|
||||
4. You specialize in creating ascii art diagrams or maps."""
|
||||
4. You specialize in creating ascii art diagrams or maps.
|
||||
5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional."""
|
||||
|
||||
|
||||
def create_agent(user_roles: list[str] | None = None):
|
||||
|
||||
Reference in New Issue
Block a user