rerank

human readable source
tell user when tool limit is reached
2025-12-24 22:49:34 +01:00 · 2025-12-24 22:32:09 +01:00 · 2025-12-24 21:47:41 +01:00 · 2025-12-24 21:30:31 +01:00 · 2025-12-24 00:26:20 +01:00 · 2025-12-23 23:57:17 +01:00
6 changed files with 109 additions and 31 deletions
--- a/mcp/server.py
+++ b/mcp/server.py
@@ -59,23 +59,38 @@ def embed(text, input_type):
    assert resp.embeddings.float_ is not None
    return resp.embeddings.float_[0]

-def search(query, roles: list[str], limit: int = 3, max_content_length: int = 1500) -> list[dict]:
+def search(query, roles: list[str], top_n: int = 3, max_content_length: int = 1500) -> list[dict]:
+    """Search with vector similarity, then rerank with Cohere for better relevance."""
    query_embedding = embed(query, 'search_query')

    if not roles:
        return []

+    # Fetch more candidates for reranking
+    candidate_limit = top_n * 4
    rows = conn.execute(
        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
-        (roles, query_embedding, limit)
+        (roles, query_embedding, candidate_limit)
    ).fetchall()

+    if not rows:
+        return []
+
+    # Rerank with Cohere for better relevance
+    rerank_resp = co.rerank(
+        query=query,
+        documents=[row['content'] or '' for row in rows],
+        model='rerank-v3.5',
+        top_n=top_n,
+    )
+
    docs = []
-    for row in rows:
+    for result in rerank_resp.results:
+        row = rows[result.index]
        content = row['content'] or ''
        if len(content) > max_content_length:
            content = content[:max_content_length] + '...[truncated, use get_document_page for full text]'
-        docs.append({'key': row['key'], 'content': content})
+        docs.append({'key': row['key'], 'content': content, 'relevance': round(result.relevance_score, 3)})
    return docs

@mcp.tool
--- a/web/agent/pyproject.toml
+++ b/web/agent/pyproject.toml
@@ -12,4 +12,6 @@ dependencies = [
    "ag-ui-protocol",
    "python-dotenv",
    "httpx",
+    "logfire>=4.16.0",
+    "python-json-logger>=4.0.0",
 ]
--- a/web/agent/src/agent.py
+++ b/web/agent/src/agent.py
@@ -5,18 +5,32 @@ PydanticAI agent with MCP tools from Cavepedia server.
 import os
 import logging
 import httpx
+import logfire

-from pydantic_ai import Agent, ModelMessage, RunContext
-from pydantic_ai.settings import ModelSettings
+# Set up logging BEFORE logfire (otherwise basicConfig is ignored)
+from pythonjsonlogger import jsonlogger

-# Set up logging based on environment
-log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
+log_level = os.getenv("LOG_LEVEL", "INFO").upper()
+handler = logging.StreamHandler()
+handler.setFormatter(jsonlogger.JsonFormatter("%(asctime)s %(name)s %(levelname)s %(message)s"))
 logging.basicConfig(
-    level=log_level,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    level=getattr(logging, log_level, logging.INFO),
+    handlers=[handler],
 )
 logger = logging.getLogger(__name__)

+# Configure Logfire for observability
+logfire.configure(
+    environment=os.getenv('ENVIRONMENT', 'development'),
+)
+logfire.instrument_pydantic_ai()
+logfire.instrument_httpx()
+
+from typing import Any
+from pydantic_ai import Agent, ModelMessage, RunContext
+from pydantic_ai.settings import ModelSettings
+from pydantic_ai.mcp import CallToolFunc
+
 CAVE_MCP_URL = os.getenv("CAVE_MCP_URL", "https://mcp.caving.dev/mcp")

 logger.info(f"Initializing Cavepedia agent with CAVE_MCP_URL={CAVE_MCP_URL}")
@@ -64,13 +78,36 @@ def check_mcp_available(url: str, timeout: float = 5.0) -> bool:
 AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveying, locations, geology, equipment, history, conservation.

 Rules:
-1. ALWAYS cite sources at the end of every reply. Use the 'key' from search results (e.g., "Source: vpi/trog/2021-trog.pdf/page-19.pdf").
+1. ALWAYS cite sources in a bulleted list at the end of every reply, even if there's only one. Format them human-readably (e.g., "- The Trog 2021, page 19" not "vpi/trog/2021-trog.pdf/page-19.pdf").
 2. Say when uncertain. Never hallucinate.
 3. Be safety-conscious.
 4. Can create ascii diagrams/maps.
 5. Be direct—no sycophantic phrases.
 6. Keep responses concise.
-7. Use tools sparingly—one search usually suffices."""
+7. Use tools sparingly—one search usually suffices.
+8. If you hit the search limit, end your reply with an italicized note: *Your question may be too broad. Try asking something more specific.* Do NOT mention "tools" or "tool limits"—the user doesn't know what those are."""
+
+
+def create_tool_call_limiter(max_calls: int = 3):
+    """Create a process_tool_call callback that limits tool calls."""
+    call_count = [0]  # Mutable container for closure
+
+    async def process_tool_call(
+        ctx: RunContext,
+        call_tool: CallToolFunc,
+        name: str,
+        tool_args: dict[str, Any],
+    ):
+        call_count[0] += 1
+        if call_count[0] > max_calls:
+            return (
+                f"SEARCH LIMIT REACHED: You have made {max_calls} searches. "
+                "Stop searching and answer now with what you have. "
+                "End your reply with: *Your question may be too broad. Try asking something more specific.*"
+            )
+        return await call_tool(name, tool_args)
+
+    return process_tool_call


 def create_agent(user_roles: list[str] | None = None):
@@ -92,6 +129,7 @@ def create_agent(user_roles: list[str] | None = None):
                url=CAVE_MCP_URL,
                headers={"x-user-roles": roles_header},
                timeout=30.0,
+                process_tool_call=create_tool_call_limiter(max_calls=3),
            )
            toolsets.append(mcp_server)
            logger.info(f"MCP server configured with roles: {user_roles}")
--- a/web/agent/src/main.py
+++ b/web/agent/src/main.py
@@ -14,13 +14,27 @@ from pydantic_ai.settings import ModelSettings
 load_dotenv()

 # Set up logging based on environment
-log_level = logging.DEBUG if os.getenv("DEBUG") else logging.INFO
+from pythonjsonlogger import jsonlogger
+
+log_level = os.getenv("LOG_LEVEL", "INFO").upper()
+json_formatter = jsonlogger.JsonFormatter("%(asctime)s %(name)s %(levelname)s %(message)s")
+
+# Configure root logger with JSON
+handler = logging.StreamHandler()
+handler.setFormatter(json_formatter)
 logging.basicConfig(
-    level=log_level,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    level=getattr(logging, log_level, logging.INFO),
+    handlers=[handler],
 )
 logger = logging.getLogger(__name__)

+# Apply JSON formatter to uvicorn loggers (works even when run via `uvicorn src.main:app`)
+for uvicorn_logger_name in ("uvicorn", "uvicorn.error", "uvicorn.access"):
+    uvicorn_logger = logging.getLogger(uvicorn_logger_name)
+    uvicorn_logger.handlers = [handler]
+    uvicorn_logger.setLevel(getattr(logging, log_level, logging.INFO))
+    uvicorn_logger.propagate = False
+
 # Validate required environment variables
 if not os.getenv("ANTHROPIC_API_KEY"):
    logger.error("ANTHROPIC_API_KEY environment variable is required")
@@ -41,12 +55,9 @@ logger.info("Creating AG-UI app...")

 async def handle_agent_request(request: Request) -> Response:
    """Handle incoming AG-UI requests with dynamic role-based MCP configuration."""
-    # Debug: log all incoming headers
-    logger.info(f"DEBUG: All request headers: {dict(request.headers)}")

    # Extract user roles from request headers
    roles_header = request.headers.get("x-user-roles", "")
-    logger.info(f"DEBUG: x-user-roles header value: '{roles_header}'")
    user_roles = []

    if roles_header:
@@ -59,13 +70,12 @@ async def handle_agent_request(request: Request) -> Response:
    # Create agent with the user's roles
    agent = create_agent(user_roles)

-    # Dispatch the request using AGUIAdapter with usage limits
+    # Dispatch the request - tool limits handled by ToolCallLimiter in agent.py
    return await AGUIAdapter.dispatch_request(
        request,
        agent=agent,
        usage_limits=UsageLimits(
-            request_limit=5,      # Max 5 LLM requests per query
-            tool_calls_limit=3,   # Max 3 tool calls per query
+            request_limit=10,     # Safety net for runaway requests
        ),
        model_settings=ModelSettings(max_tokens=4096),
    )
--- a/web/agent/uv.lock
+++ b/web/agent/uv.lock
@@ -231,10 +231,12 @@ source = { virtual = "." }
 dependencies = [
    { name = "ag-ui-protocol" },
    { name = "httpx" },
+    { name = "logfire" },
    { name = "mcp" },
    { name = "openai" },
    { name = "pydantic-ai" },
    { name = "python-dotenv" },
+    { name = "python-json-logger" },
    { name = "starlette" },
    { name = "uvicorn" },
 ]
@@ -243,10 +245,12 @@ dependencies = [
 requires-dist = [
    { name = "ag-ui-protocol" },
    { name = "httpx" },
+    { name = "logfire", specifier = ">=4.16.0" },
    { name = "mcp" },
    { name = "openai" },
    { name = "pydantic-ai" },
    { name = "python-dotenv" },
+    { name = "python-json-logger", specifier = ">=4.0.0" },
    { name = "starlette" },
    { name = "uvicorn" },
 ]
--- a/web/src/app/page.tsx
+++ b/web/src/app/page.tsx
@@ -7,20 +7,29 @@ import { useUser } from "@auth0/nextjs-auth0/client";
 import LoginButton from "@/components/LoginButton";
 import LogoutButton from "@/components/LogoutButton";

-// Separate component to safely use useCopilotChat hook
-function ThinkingIndicator() {
+// Block input and show indicator while agent is processing
+function LoadingOverlay() {
  try {
    const { isLoading } = useCopilotChat();
    if (!isLoading) return null;
    return (
-      <div className="absolute bottom-24 left-1/2 transform -translate-x-1/2 bg-white shadow-lg rounded-full px-4 py-2 flex items-center gap-2 z-50">
-        <div className="flex gap-1">
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "0ms" }}></span>
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "150ms" }}></span>
-          <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "300ms" }}></span>
+      <>
+        {/* Overlay to block input area */}
+        <div
+          className="absolute bottom-0 left-0 right-0 h-24 z-40"
+          style={{ pointerEvents: 'all' }}
+          onClick={(e) => e.stopPropagation()}
+        />
+        {/* Thinking indicator */}
+        <div className="absolute bottom-24 left-1/2 transform -translate-x-1/2 bg-white shadow-lg rounded-full px-4 py-2 flex items-center gap-2 z-50">
+          <div className="flex gap-1">
+            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "0ms" }}></span>
+            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "150ms" }}></span>
+            <span className="w-2 h-2 bg-indigo-500 rounded-full animate-bounce" style={{ animationDelay: "300ms" }}></span>
+          </div>
+          <span className="text-sm text-gray-600">Thinking...</span>
        </div>
-        <span className="text-sm text-gray-600">Thinking...</span>
-      </div>
+      </>
    );
  } catch {
    return null;
@@ -121,7 +130,7 @@ export default function CopilotKitPage() {
            className="h-full w-full"
          />
        </div>
-        <ThinkingIndicator />
+        <LoadingOverlay />
      </div>
    </main>
  );
Author	SHA1	Message	Date
Paul Walko	29b111080f	rerank All checks were successful Build and Push Agent Docker Image / build (push) Successful in 2m47s Details Build and Push Web Docker Image / build (push) Successful in 8m6s Details	2025-12-24 22:49:34 +01:00
Paul Walko	f869381283	human readable source	2025-12-24 22:32:09 +01:00
Paul Walko	bc1dc8a11a	tell user when tool limit is reached	2025-12-24 21:47:41 +01:00
Paul Walko	4ac0389ce2	bot isnt shitting itself due to tool limits anymore	2025-12-24 21:30:31 +01:00
Paul Walko	6654496379	json logging	2025-12-24 00:26:20 +01:00
Paul Walko	e2c18b07a5	logfire	2025-12-23 23:57:17 +01:00
Paul Walko	31a9e868e9	attempt to fix chats not returning	2025-12-23 23:27:11 +01:00