54 lines
1.3 KiB
Python
54 lines
1.3 KiB
Python
from pgvector.psycopg import register_vector, Bit
|
|
from psycopg.rows import dict_row
|
|
from urllib.parse import unquote
|
|
from pypdf import PdfReader, PdfWriter
|
|
import anthropic
|
|
import cohere
|
|
import dotenv
|
|
import datetime
|
|
import io
|
|
import json
|
|
import minio
|
|
import numpy as np
|
|
import os
|
|
import psycopg
|
|
import time
|
|
import logging
|
|
from pythonjsonlogger.json import JsonFormatter
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logger.setLevel(logging.INFO)
|
|
logHandler = logging.StreamHandler()
|
|
formatter = JsonFormatter("{asctime}{message}", style="{")
|
|
logHandler.setFormatter(formatter)
|
|
logger.addHandler(logHandler)
|
|
|
|
#####
|
|
|
|
dotenv.load_dotenv('/home/paul/scripts-private/lech/cavepedia-v2/poller.env')
|
|
|
|
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
|
|
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY')
|
|
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY')
|
|
|
|
s3 = minio.Minio(
|
|
's3.bigcavemaps.com',
|
|
access_key=MINIO_ACCESS_KEY,
|
|
secret_key=MINIO_SECRET_KEY,
|
|
region='kansascity',
|
|
)
|
|
|
|
def getobject():
|
|
bucket = 'cavepedia-v2'
|
|
key = 'public/var/fyi/VAR-FYI 1982-01.pdf'
|
|
with s3.get_object(bucket, key) as obj:
|
|
with open('/tmp/file.pdf', 'wb') as f:
|
|
while True:
|
|
chunk = obj.read(1024)
|
|
if not chunk:
|
|
break
|
|
f.write(chunk)
|
|
|
|
if __name__ == '__main__':
|
|
getobject()
|