sonnet, optimize some

2025-12-17 20:40:31 +01:00
parent 6e7d071dea
commit 5e66859246
4 changed files with 43 additions and 26 deletions
--- a/mcp/server.py
+++ b/mcp/server.py
@@ -59,7 +59,7 @@ def embed(text, input_type):
    assert resp.embeddings.float_ is not None
    return resp.embeddings.float_[0]
-def search(query, roles: list[str]) -> list[dict]:
+def search(query, roles: list[str], limit: int = 5) -> list[dict]:
    query_embedding = embed(query, 'search_query')
    if not roles:
@@ -67,34 +67,31 @@ def search(query, roles: list[str]) -> list[dict]:
        return []
    rows = conn.execute(
-        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT 5',
+        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
-        (roles, query_embedding)
+        (roles, query_embedding, limit)
    ).fetchall()
-    docs = []
+
-    for row in rows:
+    return [{'key': row['key'], 'content': row['content']} for row in rows]
        docs.append({ 'key': row['key'], 'content': row['content']})
    return docs
@mcp.tool
 def get_cave_location(cave: str, state: str, county: str) -> list[dict]:
-    """Lookup cave location as coordinates. Returns up to 5 matches, ordered by most to least relevant."""
+    """Lookup cave location as coordinates."""
    roles = get_user_roles()
    return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.', roles)
@mcp.tool
 def general_caving_information(query: str) -> list[dict]:
-    """General purpose endpoint for any topic related to caves. Returns up to 5 matches, ordered by most to least relevant."""
+    """General purpose search for any topic related to caves."""
    roles = get_user_roles()
    return search(query, roles)
@mcp.tool
-def get_document_page(document: str, page: int) -> dict:
+def get_document_page(key: str) -> dict:
-    """Lookup a specific page of a document by its path and page number. Document should be the path like 'nss/compasstape/issue_20.pdf'."""
+    """Fetch full content for a document page. Pass the exact 'key' value from search results."""
    roles = get_user_roles()
    if not roles:
        return {"error": "No roles assigned"}
    key = f"{document}/page-{page}.pdf"
    row = conn.execute(
        'SELECT key, content FROM embeddings WHERE key = %s AND role = ANY(%s)',
        (key, roles)
--- a/web/agent/src/agent.py
+++ b/web/agent/src/agent.py
@@ -6,7 +6,8 @@ import os
 import logging
 import httpx
-from pydantic_ai import Agent
+from pydantic_ai import Agent, ModelMessage, RunContext
 from pydantic_ai.settings import ModelSettings
 # Set up logging based on environment
 log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
@@ -20,6 +21,12 @@ CAVE_MCP_URL = os.getenv("CAVE_MCP_URL", "https://mcp.caving.dev/mcp")
 logger.info("Initializing Cavepedia agent...")
 def limit_history(ctx: RunContext[None], messages: list[ModelMessage]) -> list[ModelMessage]:
    """Limit conversation history to manage token usage and request size."""
    # Keep only the last few messages to avoid large requests hitting Cloudflare limits
    return messages[-4:]
 def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
    """Check if MCP server is reachable via health endpoint."""
    try:
@@ -38,14 +45,16 @@ def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
 MCP_AVAILABLE = check_mcp_available(CAVE_MCP_URL)
 logger.info(f"MCP server available: {MCP_AVAILABLE}")
-AGENT_INSTRUCTIONS = """You are a helpful caving assistant. Help users with all aspects of caving including cave exploration, safety, surveying techniques, cave locations, geology, equipment, history, conservation, and any other caving-related topics.
+AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveying, locations, geology, equipment, history, conservation.
-IMPORTANT RULES:
+Rules:
-1. Always cite your sources at the end of each response when possible.
+1. Cite sources when possible.
-2. If you're not certain about information, say so clearly. You may infer some information, but NOT make up information or hallucinate facts.
+2. Say when uncertain. Never hallucinate.
-3. Provide accurate, helpful, and safety-conscious information.
+3. Be safety-conscious.
-4. You specialize in creating ascii art diagrams or maps.
+4. Can create ascii diagrams/maps.
-5. Never use sycophantic phrases like "you're absolutely right", "great question", or excessive praise. Be direct and professional."""
+5. Be direct—no sycophantic phrases.
 6. Keep responses concise.
 7. Use tools sparingly—one search usually suffices. Answer from your knowledge when possible."""
 def create_agent(user_roles: list[str] | None = None):
@@ -75,9 +84,11 @@ def create_agent(user_roles: list[str] | None = None):
        logger.info("MCP server unavailable - running without MCP tools")
    return Agent(
-        model="openrouter:google/gemini-3-pro-preview",
+        model="anthropic:claude-sonnet-4-5",
        toolsets=toolsets if toolsets else None,
        instructions=AGENT_INSTRUCTIONS,
        history_processors=[limit_history],
        model_settings=ModelSettings(max_tokens=4096),
    )
--- a/web/agent/src/main.py
+++ b/web/agent/src/main.py
@@ -7,6 +7,8 @@ import sys
 import json
 import logging
 from dotenv import load_dotenv
 from pydantic_ai.usage import UsageLimits
 from pydantic_ai.settings import ModelSettings
 # Load environment variables BEFORE importing agent
 load_dotenv()
@@ -20,8 +22,8 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 # Validate required environment variables
-if not os.getenv("OPENROUTER_API_KEY"):
+if not os.getenv("ANTHROPIC_API_KEY"):
-    logger.error("OPENROUTER_API_KEY environment variable is required")
+    logger.error("ANTHROPIC_API_KEY environment variable is required")
    sys.exit(1)
 import uvicorn
@@ -57,8 +59,16 @@ async def handle_agent_request(request: Request) -> Response:
    # Create agent with the user's roles
    agent = create_agent(user_roles)
-    # Dispatch the request using AGUIAdapter
+    # Dispatch the request using AGUIAdapter with usage limits
-    return await AGUIAdapter.dispatch_request(request, agent=agent)
+    return await AGUIAdapter.dispatch_request(
        request,
        agent=agent,
        usage_limits=UsageLimits(
            request_limit=5,      # Max 5 LLM requests per query
            tool_calls_limit=3,   # Max 3 tool calls per query
        ),
        model_settings=ModelSettings(max_tokens=4096),
    )
 async def health(request: Request) -> Response:
--- a/web/src/app/page.tsx
+++ b/web/src/app/page.tsx
@@ -114,7 +114,6 @@ export default function CopilotKitPage() {
      <div className="flex-1 flex justify-center py-8 px-2 overflow-hidden relative">
        <div className="h-full w-full max-w-5xl flex flex-col">
          <CopilotChat
            instructions={"You are a knowledgeable caving assistant. Help users with all aspects of caving including cave exploration, safety, surveying techniques, cave locations, geology, equipment, history, conservation, and any other caving-related topics. Provide accurate, helpful, and safety-conscious information. CRITICAL: Always cite sources at the end of each response."}
            labels={{
              title: "AI Cartwright",
              initial: "Hello! I'm here to help with anything related to caving. Ask me about caves, techniques, safety, equipment, or anything else caving-related!",