add mcp, batching

This commit is contained in:
2025-12-07 04:35:21 +01:00
parent 30f68a9d04
commit d6bc34d138
14 changed files with 1973 additions and 138 deletions

1
mcp/.python-version Normal file
View File

@@ -0,0 +1 @@
3.11

5
mcp/README.md Normal file
View File

@@ -0,0 +1,5 @@
# cavepedia-v2 mcp
# todo
- signout endpoint
- auth

Binary file not shown.

Binary file not shown.

13
mcp/pyproject.toml Normal file
View File

@@ -0,0 +1,13 @@
[project]
name = "cavepediav2-mcp"
version = "0.1.0"
description = "MCP for cavepediav2"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"asyncio>=4.0.0",
"cohere>=5.20.0",
"dotenv>=0.9.9",
"fastmcp>=2.13.3",
"psycopg[binary]>=3.3.2",
]

48
mcp/search.py Normal file
View File

@@ -0,0 +1,48 @@
from pgvector.psycopg import register_vector, Bit
from psycopg.rows import dict_row
from urllib.parse import unquote
import anthropic
import cohere
import dotenv
import datetime
import json
import minio
import numpy as np
import os
import psycopg
import time
dotenv.load_dotenv('/home/paul/scripts-private/lech/cavepedia-v2/poller.env')
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
co = cohere.ClientV2(COHERE_API_KEY)
conn = psycopg.connect(
host='127.0.0.1',
port=4010,
dbname='cavepediav2_db',
user='cavepediav2_user',
password='cavepediav2_pw',
row_factory=dict_row,
)
def embed(text, input_type):
resp = co.embed(
texts=[text],
model='embed-v4.0',
input_type=input_type,
embedding_types=['float'],
)
return resp.embeddings.float[0]
def search():
query = 'links trip with not more than 2 people'
query_embedding = embed(query, 'search_query')
rows = conn.execute('SELECT * FROM embeddings ORDER BY embedding <=> %s::vector LIMIT 5', (query_embedding,)).fetchall()
for row in rows:
print(row['bucket'])
print(row['key'])
if __name__ == '__main__':
search()

79
mcp/server.py Normal file
View File

@@ -0,0 +1,79 @@
from fastmcp import FastMCP
from fastmcp.server.auth.providers.auth0 import Auth0Provider
from psycopg.rows import dict_row
import cohere
import dotenv
import psycopg
import os
dotenv.load_dotenv('/home/pew/scripts-private/loser/cavepedia-v2/poller.env')
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
co = cohere.ClientV2(COHERE_API_KEY)
conn = psycopg.connect(
host='::1',
port=9030,
dbname='cavepediav2_db',
user='cavepediav2_user',
password='cavepediav2_pw',
row_factory=dict_row,
)
# The Auth0Provider utilizes Auth0 OIDC configuration
auth_provider = Auth0Provider(
config_url="https://dev-jao4so0av61ny4mr.us.auth0.com/.well-known/openid-configuration",
client_id="oONcxma5PNFwYLhrDC4o0PUuAmqDekzM",
client_secret="4Z7Wl12ALEtDmNAoERQe7lK2YD9x6jz7H25FiMxRp518dnag-IS2NLLScnmbe4-b",
audience="https://dev-jao4so0av61ny4mr.us.auth0.com/me/",
base_url="https://mcp.caving.dev",
# redirect_path="/auth/callback" # Default value, customize if needed
)
mcp = FastMCP("Cavepedia MCP")
def embed(text, input_type):
resp = co.embed(
texts=[text],
model='embed-v4.0',
input_type=input_type,
embedding_types=['float'],
)
return resp.embeddings.float[0]
def search(query) -> list[dict]:
query_embedding = embed(query, 'search_query')
rows = conn.execute('SELECT * FROM embeddings WHERE embedding IS NOT NULL ORDER BY embedding <=> %s::vector LIMIT 5', (query_embedding,)).fetchall()
docs = []
for row in rows:
docs.append({ 'key': row['key'], 'content': row['content']})
return docs
@mcp.tool
def get_cave_location(cave: str, state: str, county: str) -> list[dict]:
"""Lookup cave location as coordinates. Returns up to 5 matches, ordered by most to least relevant."""
return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.')
@mcp.tool
def general_caving_information(query: str) -> list[dict]:
"""General purpose endpoint for any topic related to caves. Returns up to 5 mates, orderd by most to least relevant."""
return search(query)
# Add a protected tool to test authentication
@mcp.tool
async def get_token_info() -> dict:
"""Returns information about the Auth0 token."""
from fastmcp.server.dependencies import get_access_token
token = get_access_token()
return {
"issuer": token.claims.get("iss"),
"audience": token.claims.get("aud"),
"scope": token.claims.get("scope")
}
if __name__ == "__main__":
mcp.run(transport='http', host='::1', port=9031)

27
mcp/test/client.py Normal file
View File

@@ -0,0 +1,27 @@
import asyncio
from fastmcp import Client
client = Client("http://[::1]:8031/mcp")
async def test_get_cave_location(cave: str, state: str, county: str):
async with client:
resp = await client.call_tool("get_cave_location", {"cave": cave, "state": state, "county": county})
print()
print(cave)
for item in resp.structured_content['result']:
print(item)
async def test_general_caving_information(query: str):
async with client:
resp = await client.call_tool("general_caving_information", {"query": query})
print()
print(query)
for item in resp.structured_content['result']:
print(item)
asyncio.run(test_get_cave_location("Nellies Cave", "VA", "Montgomery"))
asyncio.run(test_get_cave_location("links cave", "VA", "Giles"))
#asyncio.run(test_get_cave_location("new river", "VA", "Giles"))
#asyncio.run(test_get_cave_location("tawneys", "VA", "Giles"))
#asyncio.run(test_get_cave_location("staty fork", "WV", "Pocahontas"))
#asyncio.run(test_general_caving_information("broken sunnto"))

1483
mcp/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff