sonnet, optimize some

This commit is contained in:
2025-12-17 20:40:31 +01:00
parent 6e7d071dea
commit 5e66859246
4 changed files with 43 additions and 26 deletions

View File

@@ -59,7 +59,7 @@ def embed(text, input_type):
assert resp.embeddings.float_ is not None assert resp.embeddings.float_ is not None
return resp.embeddings.float_[0] return resp.embeddings.float_[0]
def search(query, roles: list[str]) -> list[dict]: def search(query, roles: list[str], limit: int = 5) -> list[dict]:
query_embedding = embed(query, 'search_query') query_embedding = embed(query, 'search_query')
if not roles: if not roles:
@@ -67,34 +67,31 @@ def search(query, roles: list[str]) -> list[dict]:
return [] return []
rows = conn.execute( rows = conn.execute(
'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT 5', 'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
(roles, query_embedding) (roles, query_embedding, limit)
).fetchall() ).fetchall()
docs = []
for row in rows: return [{'key': row['key'], 'content': row['content']} for row in rows]
docs.append({ 'key': row['key'], 'content': row['content']})
return docs
@mcp.tool @mcp.tool
def get_cave_location(cave: str, state: str, county: str) -> list[dict]: def get_cave_location(cave: str, state: str, county: str) -> list[dict]:
"""Lookup cave location as coordinates. Returns up to 5 matches, ordered by most to least relevant.""" """Lookup cave location as coordinates."""
roles = get_user_roles() roles = get_user_roles()
return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.', roles) return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.', roles)
@mcp.tool @mcp.tool
def general_caving_information(query: str) -> list[dict]: def general_caving_information(query: str) -> list[dict]:
"""General purpose endpoint for any topic related to caves. Returns up to 5 matches, ordered by most to least relevant.""" """General purpose search for any topic related to caves."""
roles = get_user_roles() roles = get_user_roles()
return search(query, roles) return search(query, roles)
@mcp.tool @mcp.tool
def get_document_page(document: str, page: int) -> dict: def get_document_page(key: str) -> dict:
"""Lookup a specific page of a document by its path and page number. Document should be the path like 'nss/compasstape/issue_20.pdf'.""" """Fetch full content for a document page. Pass the exact 'key' value from search results."""
roles = get_user_roles() roles = get_user_roles()
if not roles: if not roles:
return {"error": "No roles assigned"} return {"error": "No roles assigned"}
key = f"{document}/page-{page}.pdf"
row = conn.execute( row = conn.execute(
'SELECT key, content FROM embeddings WHERE key = %s AND role = ANY(%s)', 'SELECT key, content FROM embeddings WHERE key = %s AND role = ANY(%s)',
(key, roles) (key, roles)

View File

@@ -6,7 +6,8 @@ import os
import logging import logging
import httpx import httpx
from pydantic_ai import Agent from pydantic_ai import Agent, ModelMessage, RunContext
from pydantic_ai.settings import ModelSettings
# Set up logging based on environment # Set up logging based on environment
log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
@@ -20,6 +21,12 @@ CAVE_MCP_URL = os.getenv("CAVE_MCP_URL", "https://mcp.caving.dev/mcp")
logger.info("Initializing Cavepedia agent...") logger.info("Initializing Cavepedia agent...")
def limit_history(ctx: RunContext[None], messages: list[ModelMessage]) -> list[ModelMessage]:
"""Limit conversation history to manage token usage and request size."""
# Keep only the last few messages to avoid large requests hitting Cloudflare limits
return messages[-4:]
def check_mcp_available(url: str, timeout: float = 5.0) -> bool: def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
"""Check if MCP server is reachable via health endpoint.""" """Check if MCP server is reachable via health endpoint."""
try: try:
@@ -38,14 +45,16 @@ def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
MCP_AVAILABLE = check_mcp_available(CAVE_MCP_URL) MCP_AVAILABLE = check_mcp_available(CAVE_MCP_URL)
logger.info(f"MCP server available: {MCP_AVAILABLE}") logger.info(f"MCP server available: {MCP_AVAILABLE}")
AGENT_INSTRUCTIONS = """You are a helpful caving assistant. Help users with all aspects of caving including cave exploration, safety, surveying techniques, cave locations, geology, equipment, history, conservation, and any other caving-related topics. AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveying, locations, geology, equipment, history, conservation.
IMPORTANT RULES: Rules:
1. Always cite your sources at the end of each response when possible. 1. Cite sources when possible.
2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts. 2. Say when uncertain. Never hallucinate.
3. Provide accurate, helpful, and safety-conscious information. 3. Be safety-conscious.
4. You specialize in creating ascii art diagrams or maps. 4. Can create ascii diagrams/maps.
5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional.""" 5. Be direct—no sycophantic phrases.
6. Keep responses concise.
7. Use tools sparingly—one search usually suffices. Answer from your knowledge when possible."""
def create_agent(user_roles: list[str] | None = None): def create_agent(user_roles: list[str] | None = None):
@@ -75,9 +84,11 @@ def create_agent(user_roles: list[str] | None = None):
logger.info("MCP server unavailable - running without MCP tools") logger.info("MCP server unavailable - running without MCP tools")
return Agent( return Agent(
model="openrouter:google/gemini-3-pro-preview", model="anthropic:claude-sonnet-4-5",
toolsets=toolsets if toolsets else None, toolsets=toolsets if toolsets else None,
instructions=AGENT_INSTRUCTIONS, instructions=AGENT_INSTRUCTIONS,
history_processors=[limit_history],
model_settings=ModelSettings(max_tokens=4096),
) )

View File

@@ -7,6 +7,8 @@ import sys
import json import json
import logging import logging
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic_ai.usage import UsageLimits
from pydantic_ai.settings import ModelSettings
# Load environment variables BEFORE importing agent # Load environment variables BEFORE importing agent
load_dotenv() load_dotenv()
@@ -20,8 +22,8 @@ logging.basicConfig(
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Validate required environment variables # Validate required environment variables
if not os.getenv("OPENROUTER_API_KEY"): if not os.getenv("ANTHROPIC_API_KEY"):
logger.error("OPENROUTER_API_KEY environment variable is required") logger.error("ANTHROPIC_API_KEY environment variable is required")
sys.exit(1) sys.exit(1)
import uvicorn import uvicorn
@@ -57,8 +59,16 @@ async def handle_agent_request(request: Request) -> Response:
# Create agent with the user's roles # Create agent with the user's roles
agent = create_agent(user_roles) agent = create_agent(user_roles)
# Dispatch the request using AGUIAdapter # Dispatch the request using AGUIAdapter with usage limits
return await AGUIAdapter.dispatch_request(request, agent=agent) return await AGUIAdapter.dispatch_request(
request,
agent=agent,
usage_limits=UsageLimits(
request_limit=5, # Max 5 LLM requests per query
tool_calls_limit=3, # Max 3 tool calls per query
),
model_settings=ModelSettings(max_tokens=4096),
)
async def health(request: Request) -> Response: async def health(request: Request) -> Response:

View File

@@ -114,7 +114,6 @@ export default function CopilotKitPage() {
<div className="flex-1 flex justify-center py-8 px-2 overflow-hidden relative"> <div className="flex-1 flex justify-center py-8 px-2 overflow-hidden relative">
<div className="h-full w-full max-w-5xl flex flex-col"> <div className="h-full w-full max-w-5xl flex flex-col">
<CopilotChat <CopilotChat
instructions={"You are a knowledgeable caving assistant. Help users with all aspects of caving including cave exploration, safety, surveying techniques, cave locations, geology, equipment, history, conservation, and any other caving-related topics. Provide accurate, helpful, and safety-conscious information. CRITICAL: Always cite sources at the end of each response."}
labels={{ labels={{
title: "AI Cartwright", title: "AI Cartwright",
initial: "Hello! I'm here to help with anything related to caving. Ask me about caves, techniques, safety, equipment, or anything else caving-related!", initial: "Hello! I'm here to help with anything related to caving. Ask me about caves, techniques, safety, equipment, or anything else caving-related!",