fix: use provider-aware context budget so Fireworks doesn't show 150% on small threads
The chat usage badge was hardcoded to ~8K-token Ollama defaults (`CONTEXT_BUDGET_CHARS = 24_000`), which made every Fireworks session look 150%+ full after a few hops even though models like Kimi-K2 carry 256K context windows. Now the budget is selected per-provider: - Ollama → 24K chars (~8K tok), unchanged - Fireworks → 384K chars (~128K tok), a safe floor for the smallest Fireworks chat models (qwen2.5-coder 32K) while not stuffing the bar for the larger ones Auto-compact thresholds and the % badge both read this back from the backend, so they now scale correctly when the user switches providers.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use crate::commands::ai::{build_overview_context, call_chat_messages};
|
||||
use crate::commands::ai::{build_overview_context, call_chat_messages, load_ai_settings};
|
||||
use crate::commands::chat_tools::{
|
||||
find_queries_tool, get_columns_tool, list_databases_tool, list_tables_tool, save_query_tool,
|
||||
switch_database_tool,
|
||||
@@ -27,9 +27,14 @@ const CELL_CHAR_CAP: usize = 200;
|
||||
/// Per text-tool-result character cap (list_tables, get_columns, etc).
|
||||
const TEXT_TOOL_CHAR_CAP: usize = 10_000;
|
||||
/// Soft cap on serialized history+system prompt characters before the user
|
||||
/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens).
|
||||
/// is nudged to /compact. Tuned for Ollama defaults (~8K tokens at num_ctx=8192).
|
||||
/// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content.
|
||||
const CONTEXT_BUDGET_CHARS: u64 = 24_000;
|
||||
const CONTEXT_BUDGET_CHARS_OLLAMA: u64 = 24_000;
|
||||
/// Conservative default for managed providers (Fireworks). Most chat-capable
|
||||
/// Fireworks models ship with 32K–256K context windows; 384K chars (~128K tok)
|
||||
/// is a safe floor that won't trigger false /compact nags on normal sessions
|
||||
/// while still flagging genuinely runaway threads.
|
||||
const CONTEXT_BUDGET_CHARS_FIREWORKS: u64 = 384_000;
|
||||
/// Stop the loop when the model fails the same SQL hurdle this many times in a
|
||||
/// row. Beyond this, additional hops almost always burn the rest of the budget
|
||||
/// on identical retries; a definitive `final` with the error is more useful.
|
||||
@@ -506,7 +511,20 @@ async fn compute_usage(
|
||||
.sum();
|
||||
ContextUsage {
|
||||
used_chars: used,
|
||||
budget_chars: CONTEXT_BUDGET_CHARS,
|
||||
budget_chars: provider_budget_chars(state, app).await,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the soft context budget appropriate for the currently-configured
|
||||
/// LLM provider. Falls back to the Ollama default if settings can't be loaded.
|
||||
async fn provider_budget_chars(state: &AppState, app: &AppHandle) -> u64 {
|
||||
use crate::models::ai::AiProvider;
|
||||
match load_ai_settings(app, state).await {
|
||||
Ok(s) => match s.provider {
|
||||
AiProvider::Fireworks => CONTEXT_BUDGET_CHARS_FIREWORKS,
|
||||
_ => CONTEXT_BUDGET_CHARS_OLLAMA,
|
||||
},
|
||||
Err(_) => CONTEXT_BUDGET_CHARS_OLLAMA,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user