diff --git a/src-tauri/src/commands/chat.rs b/src-tauri/src/commands/chat.rs index bfaba97..a25ddad 100644 --- a/src-tauri/src/commands/chat.rs +++ b/src-tauri/src/commands/chat.rs @@ -30,6 +30,10 @@ const TEXT_TOOL_CHAR_CAP: usize = 10_000; /// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens). /// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content. const CONTEXT_BUDGET_CHARS: u64 = 24_000; +/// Stop the loop when the model fails the same SQL hurdle this many times in a +/// row. Beyond this, additional hops almost always burn the rest of the budget +/// on identical retries; a definitive `final` with the error is more useful. +const MAX_CONSECUTIVE_QUERY_ERRORS: usize = 2; // --------------------------------------------------------------------------- // Action protocol @@ -243,7 +247,11 @@ WORKFLOW RULES - Use ONLY identifiers visible to you (overview / list_tables / get_columns output). Don't pluralize, translate, or guess. - LIMIT on ad-hoc SELECTs unless aggregating. - - On SQL error retry once with a fix; on the second failure respond with `final` explaining what's missing. + - When run_query fails, READ the error carefully — especially any `HINT:` line, which often spells out the fix. Common PostgreSQL fixes: + * `operator does not exist: X = Y` (e.g. `character varying = uuid`) → cast one side, e.g. `a.id::uuid = b.id` or `a.id = b.id::text`. If unsure of types, call get_columns on both tables. + * `column "X" does not exist` → call get_columns on the table you're querying; the column is named differently. + * `relation "X" does not exist` → check the OVERVIEW table list; the table may be in a different schema or database. + - On SQL error retry at most ONCE with a corrected query. On the second consecutive failure, STOP and respond with `final` explaining what's missing — do not loop. The harness will force-stop after 2 consecutive errors regardless. - `remember` is for durable facts, not transient observations. Don't memorise query results — only insights about the schema/data model that aren't already in the OVERVIEW. ═══════════════════════════════════════════════════════════════ @@ -456,8 +464,31 @@ pub async fn chat_send( ) -> TuskResult { let mut new_messages: Vec = Vec::new(); let mut working: Vec = messages; + let mut consecutive_query_errors: usize = 0; for _hop in 0..MAX_HOPS { + // Hard guard: stop once the model has hit the same SQL hurdle multiple + // times in a row. Beyond this point further hops virtually always + // re-run the same broken query against the model's better judgment. + if consecutive_query_errors >= MAX_CONSECUTIVE_QUERY_ERRORS { + let last_err = last_run_query_error(&working).unwrap_or_default(); + let msg = ChatMessage::Assistant { + id: new_id("asst"), + text: format!( + "I tried {} times and kept hitting the same SQL error:\n\n{}\n\nThis usually means a schema mismatch — wrong column type, missing cast (often `::uuid` or `::text`), or a join key that doesn't exist as I assumed. Could you double-check the question, or open the table in the sidebar to verify column types? You can also write the SQL manually in Advanced mode.", + consecutive_query_errors, last_err + ), + created_at: now_ms(), + }; + new_messages.push(msg.clone()); + working.push(msg); + let usage = compute_usage(&state, &app, &connection_id, &working).await; + return Ok(ChatTurnResult { + messages: new_messages, + usage, + }); + } + // Overview is rebuilt per turn — cheap (cached) and reflects the active DB // even if the user (or the agent) just switched it. let overview_text = build_overview_context(&state, &connection_id) @@ -493,6 +524,8 @@ pub async fn chat_send( } }; + let is_run_query = matches!(&action, AgentAction::RunQuery { .. }); + match action { AgentAction::Final { text } => { let msg = ChatMessage::Assistant { @@ -516,24 +549,30 @@ pub async fn chat_send( serde_json::json!({ "sql": sql }).to_string(), ); let result = match execute_query_core(&state, &connection_id, &sql).await { - Ok(qr) => ChatMessage::ToolResult { - id: new_id("res"), - tool: "run_query".to_string(), - is_error: false, - text: None, - result: Some(qr), - created_at: now_ms(), - }, + Ok(qr) => { + consecutive_query_errors = 0; + ChatMessage::ToolResult { + id: new_id("res"), + tool: "run_query".to_string(), + is_error: false, + text: None, + result: Some(qr), + created_at: now_ms(), + } + } Err(e) => { - let hint = match e { - TuskError::ReadOnly => "\n\nRead-only mode is on. Toggle it off in the toolbar to allow writes.", + consecutive_query_errors += 1; + let suffix = match &e { + TuskError::ReadOnly => { + "\n\nRead-only mode is on. Toggle it off in the toolbar to allow writes." + } _ => "", }; ChatMessage::ToolResult { id: new_id("res"), tool: "run_query".to_string(), is_error: true, - text: Some(format!("{}{}", e, hint)), + text: Some(format!("{}{}", format_db_error(&e), suffix)), result: None, created_at: now_ms(), } @@ -631,6 +670,12 @@ pub async fn chat_send( push_tool_result(&mut new_messages, &mut working, result); } } + + // Any non-RunQuery, non-Final action means the model is investigating + // (e.g. get_columns to verify a type). Give it a fresh error budget. + if !is_run_query { + consecutive_query_errors = 0; + } } let msg = ChatMessage::Assistant { @@ -748,6 +793,47 @@ fn render_thread_for_summary(messages: &[ChatMessage]) -> String { out } +/// Format a database error with PostgreSQL HINT/DETAIL extracted when present. +/// Plain `e.to_string()` shows just the bare error message, which makes +/// errors like `operator does not exist: character varying = uuid` impossible +/// to act on. PG's own HINT (`You might need to add explicit type casts`) is +/// often the difference between the agent finding the fix on retry and +/// looping forever. +fn format_db_error(e: &TuskError) -> String { + if let TuskError::Database(sqlx::Error::Database(db_err)) = e { + let mut out = format!("Database error: {}", db_err.message()); + if let Some(pg) = db_err.try_downcast_ref::() { + if let Some(detail) = pg.detail() { + out.push_str(&format!("\nDETAIL: {}", detail)); + } + if let Some(hint) = pg.hint() { + out.push_str(&format!("\nHINT: {}", hint)); + } + } + return out; + } + e.to_string() +} + +/// Pull the most recent run_query error text from the working thread, so the +/// post-loop "I gave up" summary can quote concrete errors back to the user. +fn last_run_query_error(messages: &[ChatMessage]) -> Option { + for m in messages.iter().rev() { + if let ChatMessage::ToolResult { + tool, + is_error: true, + text, + .. + } = m + { + if tool == "run_query" { + return text.clone(); + } + } + } + None +} + /// Strip JSON envelopes, markdown fences, and known field-extraction patterns /// that the agent-trained model tends to emit even for non-agent prompts. /// Returns the underlying summary text. @@ -1107,6 +1193,61 @@ mod tests { assert_eq!(clean_summary(s), s); } + #[test] + fn format_db_error_falls_back_to_to_string_for_non_db_errors() { + let e = TuskError::Custom("oops".into()); + assert_eq!(format_db_error(&e), e.to_string()); + } + + #[test] + fn last_run_query_error_finds_most_recent() { + let msgs = vec![ + ChatMessage::ToolResult { + id: "r1".into(), + tool: "run_query".into(), + is_error: true, + text: Some("first error".into()), + result: None, + created_at: 1, + }, + ChatMessage::ToolResult { + id: "r2".into(), + tool: "get_columns".into(), + is_error: true, + text: Some("not run_query".into()), + result: None, + created_at: 2, + }, + ChatMessage::ToolResult { + id: "r3".into(), + tool: "run_query".into(), + is_error: true, + text: Some("second error".into()), + result: None, + created_at: 3, + }, + ]; + assert_eq!( + last_run_query_error(&msgs).as_deref(), + Some("second error") + ); + } + + #[test] + fn last_run_query_error_none_for_empty_thread() { + assert!(last_run_query_error(&[]).is_none()); + } + + #[test] + fn last_run_query_error_none_when_no_errors() { + let msgs = vec![ChatMessage::User { + id: "u1".into(), + text: "hi".into(), + created_at: 1, + }]; + assert!(last_run_query_error(&msgs).is_none()); + } + #[test] fn render_thread_for_summary_includes_roles_and_skips_rows() { let msgs = vec![