skip content < 100 chars

2025-12-26 03:36:28 +01:00
parent 499a3cb170
commit b6f01f9b3f
1 changed files with 2 additions and 2 deletions
--- a/mcp/server.py
+++ b/mcp/server.py
@@ -76,7 +76,7 @@ def search_caving_documents(query: str, priority_prefixes: list[str] | None = No
    top_n = 2
    candidate_limit = top_n * 4
    rows = conn.execute(
-        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
+        'SELECT * FROM embeddings WHERE embedding IS NOT NULL AND LENGTH(content) > 100 AND role = ANY(%s) ORDER BY embedding <=> %s::vector LIMIT %s',
        (roles, query_embedding, candidate_limit)
    ).fetchall()

@@ -96,6 +96,7 @@ def search_caving_documents(query: str, priority_prefixes: list[str] | None = No
    sources_only = is_sources_only()
    for result in rerank_resp.results:
        row = rows[result.index]
+        content = row['content'] or ''
        score = result.relevance_score

        # Boost score if key starts with any priority prefix (e.g., 'nss/aca')
@@ -107,7 +108,6 @@ def search_caving_documents(query: str, priority_prefixes: list[str] | None = No
        if sources_only:
            docs.append({'key': row['key'], 'relevance': round(score, 3)})
        else:
-            content = row['content'] or ''
            docs.append({'key': row['key'], 'content': content, 'relevance': round(score, 3)})

    # Re-sort by boosted score and return top_n