Compare commits

...

5 Commits

Author SHA1 Message Date
f6891d231f 3 -> 2 candidates
All checks were successful
Build and Push Agent Docker Image / build (push) Successful in 2m26s
Build and Push Web Docker Image / build (push) Successful in 9m15s
2025-12-26 02:22:34 +01:00
337540496a simplify mcp 2025-12-26 02:18:00 +01:00
383e452322 allow prioritizing prefixes 2025-12-25 03:18:12 +01:00
a91fdb315c more concise 2025-12-25 02:52:50 +01:00
b156094691 sources only 2025-12-25 02:51:49 +01:00
6 changed files with 144 additions and 114 deletions

View File

@@ -59,14 +59,22 @@ def embed(text, input_type):
assert resp.embeddings.float_ is not None
return resp.embeddings.float_[0]
def search(query, roles: list[str], top_n: int = 3, max_content_length: int = 1500) -> list[dict]:
"""Search with vector similarity, then rerank with Cohere for better relevance."""
@mcp.tool
def search_caving_documents(query: str, priority_prefixes: list[str] | None = None) -> dict:
"""Search caving documents for information about caves, techniques, safety, accidents, history, and more.
Args:
query: Search query
priority_prefixes: Optional list of key prefixes to prioritize (e.g., ['nss/aca'] for rescue topics)
"""
roles = get_user_roles()
if not roles:
return {"results": [], "note": "No results. Answer based on your knowledge."}
query_embedding = embed(query, 'search_query')
if not roles:
return []
# Fetch more candidates for reranking
top_n = 2
candidate_limit = top_n * 4
rows = conn.execute(
'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
@@ -74,52 +82,37 @@ def search(query, roles: list[str], top_n: int = 3, max_content_length: int = 15
).fetchall()
if not rows:
return []
return {"results": [], "note": "No results found. Answer based on your knowledge."}
# Rerank with Cohere for better relevance
rerank_resp = co.rerank(
query=query,
documents=[row['content'] or '' for row in rows],
model='rerank-v3.5',
top_n=top_n,
top_n=min(top_n * 2, len(rows)),
)
# Build results with optional priority boost
docs = []
for result in rerank_resp.results:
row = rows[result.index]
score = result.relevance_score
# Boost score if key starts with any priority prefix (e.g., 'nss/aca')
if priority_prefixes:
key = row['key'] or ''
if any(key.startswith(prefix) for prefix in priority_prefixes):
score = min(1.0, score * 1.3)
content = row['content'] or ''
if len(content) > max_content_length:
content = content[:max_content_length] + '...[truncated, use get_document_page for full text]'
docs.append({'key': row['key'], 'content': content, 'relevance': round(result.relevance_score, 3)})
return docs
docs.append({'key': row['key'], 'content': content, 'relevance': round(score, 3)})
@mcp.tool
def get_cave_location(cave: str, state: str, county: str) -> list[dict]:
"""Lookup cave location as coordinates."""
roles = get_user_roles()
return search(f'{cave} Location, latitude, Longitude. Located in {state} and {county} county.', roles)
@mcp.tool
def general_caving_information(query: str) -> list[dict]:
"""General purpose search for any topic related to caves."""
roles = get_user_roles()
return search(query, roles)
@mcp.tool
def get_document_page(key: str) -> dict:
"""Fetch full content for a document page. Pass the exact 'key' value from search results."""
roles = get_user_roles()
if not roles:
return {"error": "No roles assigned"}
row = conn.execute(
'SELECT key, content FROM embeddings WHERE key = %s AND role = ANY(%s)',
(key, roles)
).fetchone()
if row:
return {"key": row["key"], "content": row["content"]}
return {"error": f"Page not found: {key}"}
# Re-sort by boosted score and return top_n
docs.sort(key=lambda x: x['relevance'], reverse=True)
return {
"results": docs[:top_n],
"note": "These are ALL available results. Do NOT search again - answer using these results now."
}
@mcp.tool
def get_user_info() -> dict:

View File

@@ -26,7 +26,6 @@ logfire.configure(
logfire.instrument_pydantic_ai()
logfire.instrument_httpx()
from typing import Any
from pydantic_ai import Agent, ModelMessage, RunContext
from pydantic_ai.settings import ModelSettings
from pydantic_ai.mcp import CallToolFunc
@@ -43,8 +42,8 @@ def limit_history(ctx: RunContext[None], messages: list[ModelMessage]) -> list[M
if not messages:
return messages
# Keep only the last 4 messages
messages = messages[-4:]
# Keep last 10 messages
messages = messages[-10:]
# Check if the last message is an assistant response with a tool call
# If so, remove it - it's orphaned (no tool result followed)
@@ -80,37 +79,34 @@ AGENT_INSTRUCTIONS = """Caving assistant. Help with exploration, safety, surveyi
Rules:
1. ALWAYS cite sources in a bulleted list at the end of every reply, even if there's only one. Format them human-readably (e.g., "- The Trog 2021, page 19" not "vpi/trog/2021-trog.pdf/page-19.pdf").
2. Say when uncertain. Never hallucinate.
3. Be safety-conscious.
4. Can create ascii diagrams/maps.
5. Be direct—no sycophantic phrases.
6. Keep responses concise.
7. Use tools sparingly—one search usually suffices.
8. If you hit the search limit, end your reply with an italicized note: *Your question may be too broad. Try asking something more specific.* Do NOT mention "tools" or "tool limits"—the user doesn't know what those are."""
3. Be direct—no sycophantic phrases.
4. Keep responses concise.
5. SEARCH EXACTLY ONCE. After searching, IMMEDIATELY answer using those results. NEVER search again - additional searches are blocked and waste resources.
6. For rescue, accident, or emergency-related queries, use priority_prefixes=['nss/aca'] when searching to prioritize official accident reports."""
SOURCES_ONLY_INSTRUCTIONS = """SOURCES ONLY MODE: Give exactly ONE sentence summary. Then list sources with specific page numbers (e.g., "- The Trog 2021, page 19"). No explanations."""
def create_tool_call_limiter(max_calls: int = 3):
"""Create a process_tool_call callback that limits tool calls."""
call_count = [0] # Mutable container for closure
def create_search_limiter():
"""Block searches after the first one."""
searched = [False]
async def process_tool_call(
ctx: RunContext,
call_tool: CallToolFunc,
name: str,
tool_args: dict[str, Any],
tool_args: dict,
):
call_count[0] += 1
if call_count[0] > max_calls:
return (
f"SEARCH LIMIT REACHED: You have made {max_calls} searches. "
"Stop searching and answer now with what you have. "
"End your reply with: *Your question may be too broad. Try asking something more specific.*"
)
if name == "search_caving_documents":
if searched[0]:
return "You have already searched. Use the results you have."
searched[0] = True
return await call_tool(name, tool_args)
return process_tool_call
def create_agent(user_roles: list[str] | None = None):
def create_agent(user_roles: list[str] | None = None, sources_only: bool = False):
"""Create an agent with MCP tools configured for the given user roles."""
toolsets = []
@@ -129,7 +125,7 @@ def create_agent(user_roles: list[str] | None = None):
url=CAVE_MCP_URL,
headers={"x-user-roles": roles_header},
timeout=30.0,
process_tool_call=create_tool_call_limiter(max_calls=3),
process_tool_call=create_search_limiter(),
)
toolsets.append(mcp_server)
logger.info(f"MCP server configured with roles: {user_roles}")
@@ -140,10 +136,15 @@ def create_agent(user_roles: list[str] | None = None):
else:
logger.info("MCP server unavailable - running without MCP tools")
# Build instructions based on mode
instructions = AGENT_INSTRUCTIONS
if sources_only:
instructions = f"{SOURCES_ONLY_INSTRUCTIONS}\n\n{AGENT_INSTRUCTIONS}"
return Agent(
model="anthropic:claude-sonnet-4-5",
toolsets=toolsets if toolsets else None,
instructions=AGENT_INSTRUCTIONS,
instructions=instructions,
history_processors=[limit_history],
model_settings=ModelSettings(max_tokens=4096),
)

View File

@@ -67,8 +67,13 @@ async def handle_agent_request(request: Request) -> Response:
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse x-user-roles header: {e}")
# Create agent with the user's roles
agent = create_agent(user_roles)
# Extract sources-only mode from header
sources_only = request.headers.get("x-sources-only", "false") == "true"
if sources_only:
logger.info("Sources-only mode enabled")
# Create agent with the user's roles and mode
agent = create_agent(user_roles, sources_only=sources_only)
# Dispatch the request - tool limits handled by ToolCallLimiter in agent.py
return await AGUIAdapter.dispatch_request(

View File

@@ -15,13 +15,19 @@ export const POST = async (req: NextRequest) => {
const session = await auth0.getSession();
const userRoles = (session?.user?.roles as string[]) || [];
console.log("DEBUG: User roles from session:", userRoles);
// Get sources-only mode from query param
const url = new URL(req.url);
const sourcesOnly = url.searchParams.get("sourcesOnly") === "true";
// Create HttpAgent with user roles header
console.log("DEBUG: User roles from session:", userRoles);
console.log("DEBUG: Sources only mode:", sourcesOnly);
// Create HttpAgent with user roles and sources-only headers
const agent = new HttpAgent({
url: process.env.AGENT_URL || "http://localhost:8000/",
headers: {
"x-user-roles": JSON.stringify(userRoles),
"x-sources-only": sourcesOnly ? "true" : "false",
},
});

View File

@@ -1,6 +1,5 @@
import type { Metadata } from "next";
import { CopilotKit } from "@copilotkit/react-core";
import { Auth0Provider } from "@auth0/nextjs-auth0/client";
import "./globals.css";
import "@copilotkit/react-ui/styles.css";
@@ -19,9 +18,7 @@ export default function RootLayout({
<html lang="en">
<body className={"antialiased"}>
<Auth0Provider>
<CopilotKit runtimeUrl="/api/copilotkit" agent="vpi_1000">
{children}
</CopilotKit>
</Auth0Provider>
</body>
</html>

View File

@@ -1,6 +1,6 @@
"use client";
import { useCopilotAction, useCopilotChat } from "@copilotkit/react-core";
import { CopilotKit, useCopilotAction, useCopilotChat } from "@copilotkit/react-core";
import { CopilotKitCSSProperties, CopilotChat } from "@copilotkit/react-ui";
import { useState } from "react";
import { useUser } from "@auth0/nextjs-auth0/client";
@@ -36,10 +36,8 @@ function LoadingOverlay() {
}
}
export default function CopilotKitPage() {
const [themeColor, setThemeColor] = useState("#6366f1");
const { user, isLoading: authLoading } = useUser();
// Chat content with CopilotKit hooks - must be inside CopilotKit provider
function ChatContent({ themeColor, setThemeColor }: { themeColor: string; setThemeColor: (color: string) => void }) {
useCopilotAction({
name: "setThemeColor",
parameters: [{
@@ -52,6 +50,35 @@ export default function CopilotKitPage() {
},
});
return (
<div
className="flex-1 flex justify-center py-8 px-2 overflow-hidden relative"
style={{ "--copilot-kit-primary-color": themeColor } as CopilotKitCSSProperties}
>
<div className="h-full w-full max-w-5xl flex flex-col">
<CopilotChat
labels={{
title: "AI Cartwright",
initial: "Hello! I'm here to help with anything related to caving. Ask me about caves, techniques, safety, equipment, or anything else caving-related!",
}}
className="h-full w-full"
/>
</div>
<LoadingOverlay />
</div>
);
}
export default function CopilotKitPage() {
const [themeColor, setThemeColor] = useState("#6366f1");
const [sourcesOnlyMode, setSourcesOnlyMode] = useState(false);
const { user, isLoading: authLoading } = useUser();
// Dynamic runtime URL based on sources-only mode
const runtimeUrl = sourcesOnlyMode
? "/api/copilotkit?sourcesOnly=true"
: "/api/copilotkit";
// Show loading state while checking authentication
if (authLoading) {
return (
@@ -88,15 +115,26 @@ export default function CopilotKitPage() {
// If authenticated, show the CopilotKit chat with user profile
return (
<main
style={{ "--copilot-kit-primary-color": themeColor } as CopilotKitCSSProperties}
className="h-screen w-screen flex flex-col bg-gray-50"
<CopilotKit
runtimeUrl={runtimeUrl}
agent="vpi_1000"
key={sourcesOnlyMode ? "sources" : "normal"}
>
<main className="h-screen w-screen flex flex-col bg-gray-50">
{/* Header with user profile and logout */}
<div className="w-full bg-white shadow-sm border-b border-gray-200 px-4 py-3">
<div className="max-w-7xl mx-auto flex justify-between items-center">
<div className="flex items-center gap-4">
<h1 className="text-xl font-semibold text-gray-900">Cavepedia</h1>
<label className="flex items-center gap-2 text-sm text-gray-600 cursor-pointer">
<input
type="checkbox"
checked={sourcesOnlyMode}
onChange={(e) => setSourcesOnlyMode(e.target.checked)}
className="w-4 h-4 rounded border-gray-300 text-indigo-600 focus:ring-indigo-500"
/>
Sources only
</label>
</div>
<div className="flex items-center gap-4">
{user.picture && (
@@ -120,18 +158,8 @@ export default function CopilotKitPage() {
</div>
{/* CopilotKit Chat */}
<div className="flex-1 flex justify-center py-8 px-2 overflow-hidden relative">
<div className="h-full w-full max-w-5xl flex flex-col">
<CopilotChat
labels={{
title: "AI Cartwright",
initial: "Hello! I'm here to help with anything related to caving. Ask me about caves, techniques, safety, equipment, or anything else caving-related!",
}}
className="h-full w-full"
/>
</div>
<LoadingOverlay />
</div>
<ChatContent themeColor={themeColor} setThemeColor={setThemeColor} />
</main>
</CopilotKit>
);
}