diff --git a/mcp/server.py b/mcp/server.py index e1911f2..b067027 100644 --- a/mcp/server.py +++ b/mcp/server.py @@ -59,7 +59,7 @@ def embed(text, input_type): assert resp.embeddings.float_ is not None return resp.embeddings.float_[0] -def search(query, roles: list[str]) -> list[dict]: +def search(query, roles: list[str], limit: int = 5) -> list[dict]: query_embedding = embed(query, 'search_query') if not roles: @@ -67,34 +67,31 @@ def search(query, roles: list[str]) -> list[dict]: return [] rows = conn.execute( - 'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT 5', - (roles, query_embedding) + 'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s', + (roles, query_embedding, limit) ).fetchall() - docs = [] - for row in rows: - docs.append({ 'key': row['key'], 'content': row['content']}) - return docs + + return [{'key': row['key'], 'content': row['content']} for row in rows] @mcp.tool def get_cave_location(cave: str, state: str, county: str) -> list[dict]: - """Lookup cave location as coordinates. Returns up to 5 matches, ordered by most to least relevant.""" + """Lookup cave location as coordinates.""" roles = get_user_roles() return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.', roles) @mcp.tool def general_caving_information(query: str) -> list[dict]: - """General purpose endpoint for any topic related to caves. Returns up to 5 matches, ordered by most to least relevant.""" + """General purpose search for any topic related to caves.""" roles = get_user_roles() return search(query, roles) @mcp.tool -def get_document_page(document: str, page: int) -> dict: - """Lookup a specific page of a document by its path and page number. Document should be the path like 'nss/compasstape/issue_20.pdf'.""" +def get_document_page(key: str) -> dict: + """Fetch full content for a document page. Pass the exact 'key' value from search results.""" roles = get_user_roles() if not roles: return {"error": "No roles assigned"} - key = f"{document}/page-{page}.pdf" row = conn.execute( 'SELECT key, content FROM embeddings WHERE key = %s AND role = ANY(%s)', (key, roles) diff --git a/web/agent/src/agent.py b/web/agent/src/agent.py index c63167a..8a8a112 100644 --- a/web/agent/src/agent.py +++ b/web/agent/src/agent.py @@ -6,7 +6,8 @@ import os import logging import httpx -from pydantic_ai import Agent +from pydantic_ai import Agent, ModelMessage, RunContext +from pydantic_ai.settings import ModelSettings # Set up logging based on environment log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO @@ -20,6 +21,12 @@ CAVE_MCP_URL = os.getenv("CAVE_MCP_URL", "https://mcp.caving.dev/mcp") logger.info("Initializing Cavepedia agent...") + +def limit_history(ctx: RunContext[None], messages: list[ModelMessage]) -> list[ModelMessage]: + """Limit conversation history to manage token usage and request size.""" + # Keep only the last few messages to avoid large requests hitting Cloudflare limits + return messages[-4:] + def check_mcp_available(url: str, timeout: float = 5.0) -> bool: """Check if MCP server is reachable via health endpoint.""" try: @@ -38,14 +45,16 @@ def check_mcp_available(url: str, timeout: float = 5.0) -> bool: MCP_AVAILABLE = check_mcp_available(CAVE_MCP_URL) logger.info(f"MCP server available: {MCP_AVAILABLE}") -AGENT_INSTRUCTIONS = """You are a helpful caving assistant. Help users with all aspects of caving including cave exploration, safety, surveying techniques, cave locations, geology, equipment, history, conservation, and any other caving-related topics. +AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveying, locations, geology, equipment, history, conservation. -IMPORTANT RULES: -1. Always cite your sources at the end of each response when possible. -2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts. -3. Provide accurate, helpful, and safety-conscious information. -4. You specialize in creating ascii art diagrams or maps. -5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional.""" +Rules: +1. Cite sources when possible. +2. Say when uncertain. Never hallucinate. +3. Be safety-conscious. +4. Can create ascii diagrams/maps. +5. Be direct—no sycophantic phrases. +6. Keep responses concise. +7. Use tools sparingly—one search usually suffices. Answer from your knowledge when possible.""" def create_agent(user_roles: list[str] | None = None): @@ -75,9 +84,11 @@ def create_agent(user_roles: list[str] | None = None): logger.info("MCP server unavailable - running without MCP tools") return Agent( - model="openrouter:google/gemini-3-pro-preview", + model="anthropic:claude-sonnet-4-5", toolsets=toolsets if toolsets else None, instructions=AGENT_INSTRUCTIONS, + history_processors=[limit_history], + model_settings=ModelSettings(max_tokens=4096), ) diff --git a/web/agent/src/main.py b/web/agent/src/main.py index dfcb908..de95c98 100644 --- a/web/agent/src/main.py +++ b/web/agent/src/main.py @@ -7,6 +7,8 @@ import sys import json import logging from dotenv import load_dotenv +from pydantic_ai.usage import UsageLimits +from pydantic_ai.settings import ModelSettings # Load environment variables BEFORE importing agent load_dotenv() @@ -20,8 +22,8 @@ logging.basicConfig( logger = logging.getLogger(__name__) # Validate required environment variables -if not os.getenv("OPENROUTER_API_KEY"): - logger.error("OPENROUTER_API_KEY environment variable is required") +if not os.getenv("ANTHROPIC_API_KEY"): + logger.error("ANTHROPIC_API_KEY environment variable is required") sys.exit(1) import uvicorn @@ -57,8 +59,16 @@ async def handle_agent_request(request: Request) -> Response: # Create agent with the user's roles agent = create_agent(user_roles) - # Dispatch the request using AGUIAdapter - return await AGUIAdapter.dispatch_request(request, agent=agent) + # Dispatch the request using AGUIAdapter with usage limits + return await AGUIAdapter.dispatch_request( + request, + agent=agent, + usage_limits=UsageLimits( + request_limit=5, # Max 5 LLM requests per query + tool_calls_limit=3, # Max 3 tool calls per query + ), + model_settings=ModelSettings(max_tokens=4096), + ) async def health(request: Request) -> Response: diff --git a/web/src/app/page.tsx b/web/src/app/page.tsx index f05c5a3..43726f6 100644 --- a/web/src/app/page.tsx +++ b/web/src/app/page.tsx @@ -114,7 +114,6 @@ export default function CopilotKitPage() {