fix: use provider-aware context budget so Fireworks doesn't show 150% on small threads

The chat usage badge was hardcoded to ~8K-token Ollama defaults
(`CONTEXT_BUDGET_CHARS = 24_000`), which made every Fireworks session
look 150%+ full after a few hops even though models like Kimi-K2 carry
256K context windows. Now the budget is selected per-provider:

- Ollama → 24K chars (~8K tok), unchanged
- Fireworks → 384K chars (~128K tok), a safe floor for the smallest
  Fireworks chat models (qwen2.5-coder 32K) while not stuffing the bar
  for the larger ones

Auto-compact thresholds and the % badge both read this back from the
backend, so they now scale correctly when the user switches providers.
This commit is contained in:
2026-05-06 23:11:56 +03:00
parent 96a54edcd0
commit 9a424dcd34

View File

@@ -1,4 +1,4 @@
use crate::commands::ai::{build_overview_context, call_chat_messages};
use crate::commands::ai::{build_overview_context, call_chat_messages, load_ai_settings};
use crate::commands::chat_tools::{
find_queries_tool, get_columns_tool, list_databases_tool, list_tables_tool, save_query_tool,
switch_database_tool,
@@ -27,9 +27,14 @@ const CELL_CHAR_CAP: usize = 200;
/// Per text-tool-result character cap (list_tables, get_columns, etc).
const TEXT_TOOL_CHAR_CAP: usize = 10_000;
/// Soft cap on serialized history+system prompt characters before the user
/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens).
/// is nudged to /compact. Tuned for Ollama defaults (~8K tokens at num_ctx=8192).
/// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content.
const CONTEXT_BUDGET_CHARS: u64 = 24_000;
const CONTEXT_BUDGET_CHARS_OLLAMA: u64 = 24_000;
/// Conservative default for managed providers (Fireworks). Most chat-capable
/// Fireworks models ship with 32K256K context windows; 384K chars (~128K tok)
/// is a safe floor that won't trigger false /compact nags on normal sessions
/// while still flagging genuinely runaway threads.
const CONTEXT_BUDGET_CHARS_FIREWORKS: u64 = 384_000;
/// Stop the loop when the model fails the same SQL hurdle this many times in a
/// row. Beyond this, additional hops almost always burn the rest of the budget
/// on identical retries; a definitive `final` with the error is more useful.
@@ -506,7 +511,20 @@ async fn compute_usage(
.sum();
ContextUsage {
used_chars: used,
budget_chars: CONTEXT_BUDGET_CHARS,
budget_chars: provider_budget_chars(state, app).await,
}
}
/// Returns the soft context budget appropriate for the currently-configured
/// LLM provider. Falls back to the Ollama default if settings can't be loaded.
async fn provider_budget_chars(state: &AppState, app: &AppHandle) -> u64 {
use crate::models::ai::AiProvider;
match load_ai_settings(app, state).await {
Ok(s) => match s.provider {
AiProvider::Fireworks => CONTEXT_BUDGET_CHARS_FIREWORKS,
_ => CONTEXT_BUDGET_CHARS_OLLAMA,
},
Err(_) => CONTEXT_BUDGET_CHARS_OLLAMA,
}
}