diff --git a/src-tauri/src/commands/chat.rs b/src-tauri/src/commands/chat.rs index a25ddad..2800106 100644 --- a/src-tauri/src/commands/chat.rs +++ b/src-tauri/src/commands/chat.rs @@ -15,7 +15,7 @@ use serde_json::Value; use std::sync::Arc; use tauri::{AppHandle, State}; -const MAX_HOPS: usize = 8; +const MAX_HOPS: usize = 10; /// Number of MOST RECENT run_query tool_results that get full sample-rows in /// LLM history. Older ones are reduced to a marker so very long threads stay /// within model context budget. @@ -246,12 +246,14 @@ WORKFLOW RULES - Use ONLY identifiers visible to you (overview / list_tables / get_columns output). Don't pluralize, translate, or guess. + - **After get_columns, your next run_query must use ONLY column names that appear verbatim in that output.** Do not assume conventions like `name`, `id`, `title`. If get_columns shows a table has `company_name` and `legal_name` but no `name`, then `name` does NOT exist on that table. - LIMIT on ad-hoc SELECTs unless aggregating. - When run_query fails, READ the error carefully — especially any `HINT:` line, which often spells out the fix. Common PostgreSQL fixes: * `operator does not exist: X = Y` (e.g. `character varying = uuid`) → cast one side, e.g. `a.id::uuid = b.id` or `a.id = b.id::text`. If unsure of types, call get_columns on both tables. - * `column "X" does not exist` → call get_columns on the table you're querying; the column is named differently. + * `column "X" does not exist` → call get_columns on the table you're querying; the column is named differently. The error message lists which alias the column was attached to (e.g. `column le.name` means it's missing on the table aliased as `le`). * `relation "X" does not exist` → check the OVERVIEW table list; the table may be in a different schema or database. - On SQL error retry at most ONCE with a corrected query. On the second consecutive failure, STOP and respond with `final` explaining what's missing — do not loop. The harness will force-stop after 2 consecutive errors regardless. + - You have a hop budget of 10 tool calls per user turn. Spend them deliberately: don't burn hops re-running the same query — investigate (get_columns) when in doubt. - `remember` is for durable facts, not transient observations. Don't memorise query results — only insights about the schema/data model that aren't already in the OVERVIEW. ═══════════════════════════════════════════════════════════════ @@ -678,12 +680,20 @@ pub async fn chat_send( } } - let msg = ChatMessage::Assistant { - id: new_id("asst"), - text: format!( - "Stopped after {} tool calls without a final answer. Try rephrasing or simplifying the question.", + // Last-chance synthesis: model is out of tool calls but may have collected + // enough data on the last hop to answer. One extra LLM call, no JSON + // protocol, just plain text. + let synthesis = force_final_synthesis(&app, &state, &working).await; + let text = match synthesis { + Some(t) => format!("{}\n\n_(Tool-call limit reached; answer synthesised from collected results.)_", t), + None => format!( + "Stopped after {} tool calls without a final answer. Try rephrasing the question, splitting it into smaller parts, or running the SQL manually in Advanced mode.", MAX_HOPS ), + }; + let msg = ChatMessage::Assistant { + id: new_id("asst"), + text, created_at: now_ms(), }; new_messages.push(msg.clone()); @@ -834,6 +844,55 @@ fn last_run_query_error(messages: &[ChatMessage]) -> Option { None } +/// Last-chance LLM call after MAX_HOPS is exhausted: nudge the model to +/// produce a `final` answer based on whatever data and tool results the +/// thread already contains. Without this, a thread that succeeded on its +/// last run_query would end with "Stopped after N tool calls" and waste +/// the result. Returns None if the LLM call fails. +async fn force_final_synthesis( + app: &AppHandle, + state: &AppState, + working: &[ChatMessage], +) -> Option { + if working.is_empty() { + return None; + } + let convo = render_thread_for_summary(working); + let system = "The agent loop has reached the tool-call limit. The user is waiting for an answer right now. \ + Based ONLY on the conversation below, write a SHORT plain-text answer for the user. \ + Reply in the SAME language the user used. \ + If a query produced results, summarise what those results show. \ + If queries kept failing, explain what went wrong and what the user could do (provide the missing piece, switch to Advanced mode, etc.). \ + Be concrete with numbers and identifiers from the results.\n\ + \n\ + OUTPUT FORMAT: PLAIN TEXT ONLY. \ + DO NOT output JSON, markdown fences, or field names. \ + DO NOT call any tools. DO NOT use the action protocol. \ + Just the answer text."; + + let llm_messages = vec![ + OllamaChatMessage { + role: "system".to_string(), + content: system.to_string(), + }, + OllamaChatMessage { + role: "user".to_string(), + content: convo, + }, + ]; + match call_ollama_chat_messages(app, state, llm_messages, None).await { + Ok(s) => { + let cleaned = clean_summary(&s); + if cleaned.trim().is_empty() { + None + } else { + Some(cleaned) + } + } + Err(_) => None, + } +} + /// Strip JSON envelopes, markdown fences, and known field-extraction patterns /// that the agent-trained model tends to emit even for non-agent prompts. /// Returns the underlying summary text.