fix: use provider-aware context budget so Fireworks doesn't show 150% on small threads

The chat usage badge was hardcoded to ~8K-token Ollama defaults (`CONTEXT_BUDGET_CHARS = 24_000`), which made every Fireworks session look 150%+ full after a few hops even though models like Kimi-K2 carry 256K context windows. Now the budget is selected per-provider: - Ollama → 24K chars (~8K tok), unchanged - Fireworks → 384K chars (~128K tok), a safe floor for the smallest Fireworks chat models (qwen2.5-coder 32K) while not stuffing the bar for the larger ones Auto-compact thresholds and the % badge both read this back from the backend, so they now scale correctly when the user switches providers.
2026-05-06 23:11:56 +03:00
parent 96a54edcd0
commit 9a424dcd34
1 changed files with 22 additions and 4 deletions
--- a/src-tauri/src/commands/chat.rs
+++ b/src-tauri/src/commands/chat.rs
@@ -1,4 +1,4 @@
-use crate::commands::ai::{build_overview_context, call_chat_messages};
+use crate::commands::ai::{build_overview_context, call_chat_messages, load_ai_settings};
 use crate::commands::chat_tools::{
    find_queries_tool, get_columns_tool, list_databases_tool, list_tables_tool, save_query_tool,
    switch_database_tool,
@@ -27,9 +27,14 @@ const CELL_CHAR_CAP: usize = 200;
 /// Per text-tool-result character cap (list_tables, get_columns, etc).
 const TEXT_TOOL_CHAR_CAP: usize = 10_000;
 /// Soft cap on serialized history+system prompt characters before the user
-/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens).
+/// is nudged to /compact. Tuned for Ollama defaults (~8K tokens at num_ctx=8192).
 /// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content.
-const CONTEXT_BUDGET_CHARS: u64 = 24_000;
+const CONTEXT_BUDGET_CHARS_OLLAMA: u64 = 24_000;
+/// Conservative default for managed providers (Fireworks). Most chat-capable
+/// Fireworks models ship with 32K–256K context windows; 384K chars (~128K tok)
+/// is a safe floor that won't trigger false /compact nags on normal sessions
+/// while still flagging genuinely runaway threads.
+const CONTEXT_BUDGET_CHARS_FIREWORKS: u64 = 384_000;
 /// Stop the loop when the model fails the same SQL hurdle this many times in a
 /// row. Beyond this, additional hops almost always burn the rest of the budget
 /// on identical retries; a definitive `final` with the error is more useful.
@@ -506,7 +511,20 @@ async fn compute_usage(
        .sum();
    ContextUsage {
        used_chars: used,
-        budget_chars: CONTEXT_BUDGET_CHARS,
+        budget_chars: provider_budget_chars(state, app).await,
+    }
+}
+
+/// Returns the soft context budget appropriate for the currently-configured
+/// LLM provider. Falls back to the Ollama default if settings can't be loaded.
+async fn provider_budget_chars(state: &AppState, app: &AppHandle) -> u64 {
+    use crate::models::ai::AiProvider;
+    match load_ai_settings(app, state).await {
+        Ok(s) => match s.provider {
+            AiProvider::Fireworks => CONTEXT_BUDGET_CHARS_FIREWORKS,
+            _ => CONTEXT_BUDGET_CHARS_OLLAMA,
+        },
+        Err(_) => CONTEXT_BUDGET_CHARS_OLLAMA,
    }
 }