From 93e526af72af4d094cbd3ca27551a22a8d406ea5 Mon Sep 17 00:00:00 2001 From: Aleksey Shakhmatov Date: Thu, 7 May 2026 00:12:37 +0300 Subject: [PATCH] fix: cap overview table list and surface friendly prompt-overflow message Two changes that together kill the "prompt is too long: 1991577 tokens" crash on large Greenplum catalogs. 1. Overview table cap (build_overview_postgres): The agent re-injects the full overview into the system prompt every turn. On a multi-thousand-table catalog (typical for GP data lakes) this was megabytes per call and quickly overran even Kimi-K2's 256K window. Now we list at most 400 tables and replace the rest with per-schema counts plus a hint to call list_tables for specifics. 2. Friendly Fireworks-400 rewrite: Detect "prompt is too long" / "maximum context length" in the Fireworks 400 body and turn it into an actionable message asking the user to /compact, instead of leaking raw API JSON to the chat. --- src-tauri/src/commands/ai.rs | 44 ++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index 49619bb..8edb499 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -325,6 +325,19 @@ async fn call_fireworks( if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); + // Detect the prompt-overflow case and rewrite it as actionable + // user-facing guidance instead of leaking raw API JSON. + let lc = body.to_lowercase(); + if status.as_u16() == 400 + && (lc.contains("prompt is too long") + || lc.contains("maximum context length")) + { + return Err(TuskError::Ai( + "Context exceeded the model's window. Click `Compact` (or send `/compact`) \ + to summarize the conversation, then retry. If this keeps happening, \ + start a new chat for unrelated questions.".to_string(), + )); + } return Err(TuskError::Ai(format!( "Fireworks error ({}): {}", status, body @@ -598,12 +611,39 @@ async fn build_overview_postgres(state: &AppState, connection_id: &str) -> TuskR out.push(String::new()); } + // Cap the inline table list so a multi-thousand-table catalog (typical on + // Greenplum data-lake clusters) doesn't blow past the model's context + // window when the overview is re-injected into every turn's system prompt. + // Beyond the cap, we summarize per-schema counts so the agent still knows + // which schemas exist and can call list_tables for the missing ones. + const OVERVIEW_TABLE_CAP: usize = 400; out.push(format!( "TABLES IN ACTIVE DATABASE ({}):", tables.len() )); - for (schema, name) in &tables { - out.push(format!(" {}.{}", schema, name)); + if tables.len() <= OVERVIEW_TABLE_CAP { + for (schema, name) in &tables { + out.push(format!(" {}.{}", schema, name)); + } + } else { + for (schema, name) in tables.iter().take(OVERVIEW_TABLE_CAP) { + out.push(format!(" {}.{}", schema, name)); + } + out.push(format!( + " ... + {} more tables (truncated). Per-schema counts of remaining tables:", + tables.len() - OVERVIEW_TABLE_CAP + )); + let mut remaining: BTreeMap<&str, usize> = BTreeMap::new(); + for (schema, _) in tables.iter().skip(OVERVIEW_TABLE_CAP) { + *remaining.entry(schema.as_str()).or_default() += 1; + } + for (schema, count) in &remaining { + out.push(format!(" {} ({} tables)", schema, count)); + } + out.push( + " → call list_tables({\"schema\":\"\"}) to enumerate a specific schema." + .to_string(), + ); } out.push(String::new()); out.push(