fix: cap overview table list and surface friendly prompt-overflow message
Two changes that together kill the "prompt is too long: 1991577 tokens" crash on large Greenplum catalogs. 1. Overview table cap (build_overview_postgres): The agent re-injects the full overview into the system prompt every turn. On a multi-thousand-table catalog (typical for GP data lakes) this was megabytes per call and quickly overran even Kimi-K2's 256K window. Now we list at most 400 tables and replace the rest with per-schema counts plus a hint to call list_tables for specifics. 2. Friendly Fireworks-400 rewrite: Detect "prompt is too long" / "maximum context length" in the Fireworks 400 body and turn it into an actionable message asking the user to /compact, instead of leaking raw API JSON to the chat.
This commit is contained in:
@@ -325,6 +325,19 @@ async fn call_fireworks(
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
// Detect the prompt-overflow case and rewrite it as actionable
|
||||
// user-facing guidance instead of leaking raw API JSON.
|
||||
let lc = body.to_lowercase();
|
||||
if status.as_u16() == 400
|
||||
&& (lc.contains("prompt is too long")
|
||||
|| lc.contains("maximum context length"))
|
||||
{
|
||||
return Err(TuskError::Ai(
|
||||
"Context exceeded the model's window. Click `Compact` (or send `/compact`) \
|
||||
to summarize the conversation, then retry. If this keeps happening, \
|
||||
start a new chat for unrelated questions.".to_string(),
|
||||
));
|
||||
}
|
||||
return Err(TuskError::Ai(format!(
|
||||
"Fireworks error ({}): {}",
|
||||
status, body
|
||||
@@ -598,12 +611,39 @@ async fn build_overview_postgres(state: &AppState, connection_id: &str) -> TuskR
|
||||
out.push(String::new());
|
||||
}
|
||||
|
||||
// Cap the inline table list so a multi-thousand-table catalog (typical on
|
||||
// Greenplum data-lake clusters) doesn't blow past the model's context
|
||||
// window when the overview is re-injected into every turn's system prompt.
|
||||
// Beyond the cap, we summarize per-schema counts so the agent still knows
|
||||
// which schemas exist and can call list_tables for the missing ones.
|
||||
const OVERVIEW_TABLE_CAP: usize = 400;
|
||||
out.push(format!(
|
||||
"TABLES IN ACTIVE DATABASE ({}):",
|
||||
tables.len()
|
||||
));
|
||||
for (schema, name) in &tables {
|
||||
out.push(format!(" {}.{}", schema, name));
|
||||
if tables.len() <= OVERVIEW_TABLE_CAP {
|
||||
for (schema, name) in &tables {
|
||||
out.push(format!(" {}.{}", schema, name));
|
||||
}
|
||||
} else {
|
||||
for (schema, name) in tables.iter().take(OVERVIEW_TABLE_CAP) {
|
||||
out.push(format!(" {}.{}", schema, name));
|
||||
}
|
||||
out.push(format!(
|
||||
" ... + {} more tables (truncated). Per-schema counts of remaining tables:",
|
||||
tables.len() - OVERVIEW_TABLE_CAP
|
||||
));
|
||||
let mut remaining: BTreeMap<&str, usize> = BTreeMap::new();
|
||||
for (schema, _) in tables.iter().skip(OVERVIEW_TABLE_CAP) {
|
||||
*remaining.entry(schema.as_str()).or_default() += 1;
|
||||
}
|
||||
for (schema, count) in &remaining {
|
||||
out.push(format!(" {} ({} tables)", schema, count));
|
||||
}
|
||||
out.push(
|
||||
" → call list_tables({\"schema\":\"<schema>\"}) to enumerate a specific schema."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
out.push(String::new());
|
||||
out.push(
|
||||
|
||||
Reference in New Issue
Block a user