fix: cap overview table list and surface friendly prompt-overflow message

Two changes that together kill the "prompt is too long: 1991577 tokens" crash on large Greenplum catalogs. 1. Overview table cap (build_overview_postgres): The agent re-injects the full overview into the system prompt every turn. On a multi-thousand-table catalog (typical for GP data lakes) this was megabytes per call and quickly overran even Kimi-K2's 256K window. Now we list at most 400 tables and replace the rest with per-schema counts plus a hint to call list_tables for specifics. 2. Friendly Fireworks-400 rewrite: Detect "prompt is too long" / "maximum context length" in the Fireworks 400 body and turn it into an actionable message asking the user to /compact, instead of leaking raw API JSON to the chat.
2026-05-07 00:12:37 +03:00
parent 5c5d256cee
commit 93e526af72
1 changed files with 42 additions and 2 deletions
--- a/src-tauri/src/commands/ai.rs
+++ b/src-tauri/src/commands/ai.rs
@@ -325,6 +325,19 @@ async fn call_fireworks(
            if !resp.status().is_success() {
                let status = resp.status();
                let body = resp.text().await.unwrap_or_default();
+                // Detect the prompt-overflow case and rewrite it as actionable
+                // user-facing guidance instead of leaking raw API JSON.
+                let lc = body.to_lowercase();
+                if status.as_u16() == 400
+                    && (lc.contains("prompt is too long")
+                        || lc.contains("maximum context length"))
+                {
+                    return Err(TuskError::Ai(
+                        "Context exceeded the model's window. Click `Compact` (or send `/compact`) \
+                         to summarize the conversation, then retry. If this keeps happening, \
+                         start a new chat for unrelated questions.".to_string(),
+                    ));
+                }
                return Err(TuskError::Ai(format!(
                    "Fireworks error ({}): {}",
                    status, body
@@ -598,12 +611,39 @@ async fn build_overview_postgres(state: &AppState, connection_id: &str) -> TuskR
        out.push(String::new());
    }

+    // Cap the inline table list so a multi-thousand-table catalog (typical on
+    // Greenplum data-lake clusters) doesn't blow past the model's context
+    // window when the overview is re-injected into every turn's system prompt.
+    // Beyond the cap, we summarize per-schema counts so the agent still knows
+    // which schemas exist and can call list_tables for the missing ones.
+    const OVERVIEW_TABLE_CAP: usize = 400;
    out.push(format!(
        "TABLES IN ACTIVE DATABASE ({}):",
        tables.len()
    ));
-    for (schema, name) in &tables {
-        out.push(format!("  {}.{}", schema, name));
+    if tables.len() <= OVERVIEW_TABLE_CAP {
+        for (schema, name) in &tables {
+            out.push(format!("  {}.{}", schema, name));
+        }
+    } else {
+        for (schema, name) in tables.iter().take(OVERVIEW_TABLE_CAP) {
+            out.push(format!("  {}.{}", schema, name));
+        }
+        out.push(format!(
+            "  ... + {} more tables (truncated). Per-schema counts of remaining tables:",
+            tables.len() - OVERVIEW_TABLE_CAP
+        ));
+        let mut remaining: BTreeMap<&str, usize> = BTreeMap::new();
+        for (schema, _) in tables.iter().skip(OVERVIEW_TABLE_CAP) {
+            *remaining.entry(schema.as_str()).or_default() += 1;
+        }
+        for (schema, count) in &remaining {
+            out.push(format!("    {} ({} tables)", schema, count));
+        }
+        out.push(
+            "  → call list_tables({\"schema\":\"<schema>\"}) to enumerate a specific schema."
+                .to_string(),
+        );
    }
    out.push(String::new());
    out.push(