rerank

human readable source
tell user when tool limit is reached
2025-12-24 22:49:34 +01:00 · 2025-12-24 22:32:09 +01:00 · 2025-12-24 21:47:41 +01:00 · 2025-12-24 21:30:31 +01:00 · 2025-12-24 00:26:20 +01:00 · 2025-12-23 23:57:17 +01:00
6 changed files with 109 additions and 31 deletions
--- a/mcp/server.py
+++ b/mcp/server.py
@@ -59,23 +59,38 @@ def embed(text, input_type):
    assert resp.embeddings.float_ is not None
    return resp.embeddings.float_[0]
-def search(query, roles: list[str], limit: int = 3, max_content_length: int = 1500) -> list[dict]:
+def search(query, roles: list[str], top_n: int = 3, max_content_length: int = 1500) -> list[dict]:
    """Search with vector similarity, then rerank with Cohere for better relevance."""
    query_embedding = embed(query, 'search_query')
    if not roles:
        return []
    # Fetch more candidates for reranking
    candidate_limit = top_n * 4
    rows = conn.execute(
        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
-        (roles, query_embedding, limit)
+        (roles, query_embedding, candidate_limit)
    ).fetchall()
    if not rows:
        return []
    # Rerank with Cohere for better relevance
    rerank_resp = co.rerank(
        query=query,
        documents=[row['content'] or '' for row in rows],
        model='rerank-v3.5',
        top_n=top_n,
    )
    docs = []
-    for row in rows:
+    for result in rerank_resp.results:
        row = rows[result.index]
        content = row['content'] or ''
        if len(content) > max_content_length:
            content = content[:max_content_length] + '...[truncated, use get_document_page for full text]'
-        docs.append({'key': row['key'], 'content': content})
+        docs.append({'key': row['key'], 'content': content, 'relevance': round(result.relevance_score, 3)})
    return docs
@mcp.tool
--- a/web/agent/pyproject.toml
+++ b/web/agent/pyproject.toml
@@ -12,4 +12,6 @@ dependencies = [
    "ag-ui-protocol",
    "python-dotenv",
    "httpx",
    "logfire>=4.16.0",
    "python-json-logger>=4.0.0",
 ]
--- a/web/agent/src/agent.py
+++ b/web/agent/src/agent.py
@@ -5,18 +5,32 @@ PydanticAI agent with MCP tools from Cavepedia server.
 import os
 import logging
 import httpx
 import logfire
-from pydantic_ai import Agent, ModelMessage, RunContext
+# Set up logging BEFORE logfire (otherwise basicConfig is ignored)
-from pydantic_ai.settings import ModelSettings
+from pythonjsonlogger import jsonlogger
-# Set up logging based on environment
+log_level = os.getenv("LOG_LEVEL", "INFO").upper()
-log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
+handler = logging.StreamHandler()
 handler.setFormatter(jsonlogger.JsonFormatter("%(asctime)s %(name)s %(levelname)s %(message)s"))
 logging.basicConfig(
-    level=log_level,
+    level=getattr(logging, log_level, logging.INFO),
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    handlers=[handler],
 )
 logger = logging.getLogger(__name__)
 # Configure Logfire for observability
 logfire.configure(
    environment=os.getenv('ENVIRONMENT', 'development'),
 )
 logfire.instrument_pydantic_ai()
 logfire.instrument_httpx()
 from typing import Any
 from pydantic_ai import Agent, ModelMessage, RunContext
 from pydantic_ai.settings import ModelSettings
 from pydantic_ai.mcp import CallToolFunc
 CAVE_MCP_URL = os.getenv("CAVE_MCP_URL", "https://mcp.caving.dev/mcp")
 logger.info(f"Initializing Cavepedia agent with CAVE_MCP_URL={CAVE_MCP_URL}")
@@ -64,13 +78,36 @@ def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
 AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveying, locations, geology, equipment, history, conservation.
 Rules:
-1. ALWAYS cite sources at the end of every reply. Use the 'key' from search results (e.g., "Source: vpi/trog/2021-trog.pdf/page-19.pdf").
+1. ALWAYS cite sources in a bulleted list at the end of every reply, even if there's only one. Format them human-readably (e.g., "- The Trog 2021, page 19" not "vpi/trog/2021-trog.pdf/page-19.pdf").
 2. Say when uncertain. Never hallucinate.
 3. Be safety-conscious.
 4. Can create ascii diagrams/maps.
 5. Be direct—no sycophantic phrases.
 6. Keep responses concise.
-7. Use tools sparingly—one search usually suffices."""
+7. Use tools sparingly—one search usually suffices.
 8. If you hit the search limit, end your reply with an italicized note: *Your question may be too broad. Try asking something more specific.* Do NOT mention "tools" or "tool limits"—the user doesn't know what those are."""
 def create_tool_call_limiter(max_calls: int = 3):
    """Create a process_tool_call callback that limits tool calls."""
    call_count = [0]  # Mutable container for closure
    async def process_tool_call(
        ctx: RunContext,
        call_tool: CallToolFunc,
        name: str,
        tool_args: dict[str, Any],
    ):
        call_count[0] += 1
        if call_count[0] > max_calls:
            return (
                f"SEARCH LIMIT REACHED: You have made {max_calls} searches. "
                "Stop searching and answer now with what you have. "
                "End your reply with: *Your question may be too broad. Try asking something more specific.*"
            )
        return await call_tool(name, tool_args)
    return process_tool_call
 def create_agent(user_roles: list[str] | None = None):
@@ -92,6 +129,7 @@ def create_agent(user_roles: list[str] | None = None):
                url=CAVE_MCP_URL,
                headers={"x-user-roles": roles_header},
                timeout=30.0,
                process_tool_call=create_tool_call_limiter(max_calls=3),
            )
            toolsets.append(mcp_server)
            logger.info(f"MCP server configured with roles: {user_roles}")
--- a/web/agent/src/main.py
+++ b/web/agent/src/main.py
@@ -14,13 +14,27 @@ from pydantic_ai.settings import ModelSettings
 load_dotenv()
 # Set up logging based on environment
-log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
+from pythonjsonlogger import jsonlogger
 log_level = os.getenv("LOG_LEVEL", "INFO").upper()
 json_formatter = jsonlogger.JsonFormatter("%(asctime)s %(name)s %(levelname)s %(message)s")
 # Configure root logger with JSON
 handler = logging.StreamHandler()
 handler.setFormatter(json_formatter)
 logging.basicConfig(
-    level=log_level,
+    level=getattr(logging, log_level, logging.INFO),
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    handlers=[handler],
 )
 logger = logging.getLogger(__name__)
 # Apply JSON formatter to uvicorn loggers (works even when run via `uvicorn src.main:app`)
 for uvicorn_logger_name in ("uvicorn", "uvicorn.error", "uvicorn.access"):
    uvicorn_logger = logging.getLogger(uvicorn_logger_name)
    uvicorn_logger.handlers = [handler]
    uvicorn_logger.setLevel(getattr(logging, log_level, logging.INFO))
    uvicorn_logger.propagate = False
 # Validate required environment variables
 if not os.getenv("ANTHROPIC_API_KEY"):
    logger.error("ANTHROPIC_API_KEY environment variable is required")
@@ -41,12 +55,9 @@ logger.info("Creating AG-UI app...")
 async def handle_agent_request(request: Request) -> Response:
    """Handle incoming AG-UI requests with dynamic role-based MCP configuration."""
    # Debug: log all incoming headers
    logger.info(f"DEBUG: All request headers: {dict(request.headers)}")
    # Extract user roles from request headers
    roles_header = request.headers.get("x-user-roles", "")
    logger.info(f"DEBUG: x-user-roles header value: '{roles_header}'")
    user_roles = []
    if roles_header:
@@ -59,13 +70,12 @@ async def handle_agent_request(request: Request) -> Response:
    # Create agent with the user's roles
    agent = create_agent(user_roles)
-    # Dispatch the request using AGUIAdapter with usage limits
+    # Dispatch the request - tool limits handled by ToolCallLimiter in agent.py
    return await AGUIAdapter.dispatch_request(
        request,
        agent=agent,
        usage_limits=UsageLimits(
-            request_limit=5,      # Max 5 LLM requests per query
+            request_limit=10,     # Safety net for runaway requests
            tool_calls_limit=3,   # Max 3 tool calls per query
        ),
        model_settings=ModelSettings(max_tokens=4096),
    )
--- a/web/agent/uv.lock
+++ b/web/agent/uv.lock
@@ -231,10 +231,12 @@ source = { virtual = "." }
 dependencies = [
    { name = "ag-ui-protocol" },
    { name = "httpx" },
    { name = "logfire" },
    { name = "mcp" },
    { name = "openai" },
    { name = "pydantic-ai" },
    { name = "python-dotenv" },
    { name = "python-json-logger" },
    { name = "starlette" },
    { name = "uvicorn" },
 ]
@@ -243,10 +245,12 @@ dependencies = [
 requires-dist = [
    { name = "ag-ui-protocol" },
    { name = "httpx" },
    { name = "logfire", specifier = ">=4.16.0" },
    { name = "mcp" },
    { name = "openai" },
    { name = "pydantic-ai" },
    { name = "python-dotenv" },
    { name = "python-json-logger", specifier = ">=4.0.0" },
    { name = "starlette" },
    { name = "uvicorn" },
 ]
--- a/web/src/app/page.tsx
+++ b/web/src/app/page.tsx
@@ -7,20 +7,29 @@ import { useUser } from "@auth0/nextjs-auth0/client";
 import LoginButton from "@/components/LoginButton";
 import LogoutButton from "@/components/LogoutButton";
-// Separate component to safely use useCopilotChat hook
+// Block input and show indicator while agent is processing
-function ThinkingIndicator() {
+function LoadingOverlay() {
  try {
    const { isLoading } = useCopilotChat();
    if (!isLoading) return null;
    return (
-      <div className="absolute bottom-24 left-1/2 transform -translate-x-1/2 bg-white shadow-lg rounded-full px-4 py-2 flex items-center gap-2 z-50">
+      <>
-        <div className="flex gap-1">
+        {/* Overlay to block input area */}
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "0ms" }}></span>
+        <div
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "150ms" }}></span>
+          className="absolute bottom-0 left-0 right-0 h-24 z-40"
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "300ms" }}></span>
+          style={{ pointerEvents: 'all' }}
          onClick={(e) => e.stopPropagation()}
        />
        {/* Thinking indicator */}
        <div className="absolute bottom-24 left-1/2 transform -translate-x-1/2 bg-white shadow-lg rounded-full px-4 py-2 flex items-center gap-2 z-50">
          <div className="flex gap-1">
            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "0ms" }}></span>
            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "150ms" }}></span>
            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "300ms" }}></span>
          </div>
          <span className="text-sm text-gray-600">Thinking...</span>
        </div>
-        <span className="text-sm text-gray-600">Thinking...</span>
+      </>
      </div>
    );
  } catch {
    return null;
@@ -121,7 +130,7 @@ export default function CopilotKitPage() {
            className="h-full w-full"
          />
        </div>
-        <ThinkingIndicator />
+        <LoadingOverlay />
      </div>
    </main>
  );
Author	SHA1	Message	Date
Paul Walko	29b111080f	rerank All checks were successful Build and Push Agent Docker Image / build (push) Successful in 2m47s Details Build and Push Web Docker Image / build (push) Successful in 8m6s Details	2025-12-24 22:49:34 +01:00
Paul Walko	f869381283	human readable source	2025-12-24 22:32:09 +01:00
Paul Walko	bc1dc8a11a	tell user when tool limit is reached	2025-12-24 21:47:41 +01:00
Paul Walko	4ac0389ce2	bot isnt shitting itself due to tool limits anymore	2025-12-24 21:30:31 +01:00
Paul Walko	6654496379	json logging	2025-12-24 00:26:20 +01:00
Paul Walko	e2c18b07a5	logfire	2025-12-23 23:57:17 +01:00
Paul Walko	31a9e868e9	attempt to fix chats not returning	2025-12-23 23:27:11 +01:00