fix: cap overview table list and surface friendly prompt-overflow message
Two changes that together kill the "prompt is too long: 1991577 tokens" crash on large Greenplum catalogs. 1. Overview table cap (build_overview_postgres): The agent re-injects the full overview into the system prompt every turn. On a multi-thousand-table catalog (typical for GP data lakes) this was megabytes per call and quickly overran even Kimi-K2's 256K window. Now we list at most 400 tables and replace the rest with per-schema counts plus a hint to call list_tables for specifics. 2. Friendly Fireworks-400 rewrite: Detect "prompt is too long" / "maximum context length" in the Fireworks 400 body and turn it into an actionable message asking the user to /compact, instead of leaking raw API JSON to the chat.
This commit is contained in:
@@ -325,6 +325,19 @@ async fn call_fireworks(
|
|||||||
if !resp.status().is_success() {
|
if !resp.status().is_success() {
|
||||||
let status = resp.status();
|
let status = resp.status();
|
||||||
let body = resp.text().await.unwrap_or_default();
|
let body = resp.text().await.unwrap_or_default();
|
||||||
|
// Detect the prompt-overflow case and rewrite it as actionable
|
||||||
|
// user-facing guidance instead of leaking raw API JSON.
|
||||||
|
let lc = body.to_lowercase();
|
||||||
|
if status.as_u16() == 400
|
||||||
|
&& (lc.contains("prompt is too long")
|
||||||
|
|| lc.contains("maximum context length"))
|
||||||
|
{
|
||||||
|
return Err(TuskError::Ai(
|
||||||
|
"Context exceeded the model's window. Click `Compact` (or send `/compact`) \
|
||||||
|
to summarize the conversation, then retry. If this keeps happening, \
|
||||||
|
start a new chat for unrelated questions.".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
return Err(TuskError::Ai(format!(
|
return Err(TuskError::Ai(format!(
|
||||||
"Fireworks error ({}): {}",
|
"Fireworks error ({}): {}",
|
||||||
status, body
|
status, body
|
||||||
@@ -598,13 +611,40 @@ async fn build_overview_postgres(state: &AppState, connection_id: &str) -> TuskR
|
|||||||
out.push(String::new());
|
out.push(String::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cap the inline table list so a multi-thousand-table catalog (typical on
|
||||||
|
// Greenplum data-lake clusters) doesn't blow past the model's context
|
||||||
|
// window when the overview is re-injected into every turn's system prompt.
|
||||||
|
// Beyond the cap, we summarize per-schema counts so the agent still knows
|
||||||
|
// which schemas exist and can call list_tables for the missing ones.
|
||||||
|
const OVERVIEW_TABLE_CAP: usize = 400;
|
||||||
out.push(format!(
|
out.push(format!(
|
||||||
"TABLES IN ACTIVE DATABASE ({}):",
|
"TABLES IN ACTIVE DATABASE ({}):",
|
||||||
tables.len()
|
tables.len()
|
||||||
));
|
));
|
||||||
|
if tables.len() <= OVERVIEW_TABLE_CAP {
|
||||||
for (schema, name) in &tables {
|
for (schema, name) in &tables {
|
||||||
out.push(format!(" {}.{}", schema, name));
|
out.push(format!(" {}.{}", schema, name));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
for (schema, name) in tables.iter().take(OVERVIEW_TABLE_CAP) {
|
||||||
|
out.push(format!(" {}.{}", schema, name));
|
||||||
|
}
|
||||||
|
out.push(format!(
|
||||||
|
" ... + {} more tables (truncated). Per-schema counts of remaining tables:",
|
||||||
|
tables.len() - OVERVIEW_TABLE_CAP
|
||||||
|
));
|
||||||
|
let mut remaining: BTreeMap<&str, usize> = BTreeMap::new();
|
||||||
|
for (schema, _) in tables.iter().skip(OVERVIEW_TABLE_CAP) {
|
||||||
|
*remaining.entry(schema.as_str()).or_default() += 1;
|
||||||
|
}
|
||||||
|
for (schema, count) in &remaining {
|
||||||
|
out.push(format!(" {} ({} tables)", schema, count));
|
||||||
|
}
|
||||||
|
out.push(
|
||||||
|
" → call list_tables({\"schema\":\"<schema>\"}) to enumerate a specific schema."
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
out.push(String::new());
|
out.push(String::new());
|
||||||
out.push(
|
out.push(
|
||||||
"NOTE: Tables of other databases are not enumerated here. \
|
"NOTE: Tables of other databases are not enumerated here. \
|
||||||
|
|||||||
Reference in New Issue
Block a user