diff --git a/src-tauri/src/commands/chat.rs b/src-tauri/src/commands/chat.rs index 2774aa2..4b65bbe 100644 --- a/src-tauri/src/commands/chat.rs +++ b/src-tauri/src/commands/chat.rs @@ -1,4 +1,4 @@ -use crate::commands::ai::{build_overview_context, call_chat_messages}; +use crate::commands::ai::{build_overview_context, call_chat_messages, load_ai_settings}; use crate::commands::chat_tools::{ find_queries_tool, get_columns_tool, list_databases_tool, list_tables_tool, save_query_tool, switch_database_tool, @@ -27,9 +27,14 @@ const CELL_CHAR_CAP: usize = 200; /// Per text-tool-result character cap (list_tables, get_columns, etc). const TEXT_TOOL_CHAR_CAP: usize = 10_000; /// Soft cap on serialized history+system prompt characters before the user -/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens). +/// is nudged to /compact. Tuned for Ollama defaults (~8K tokens at num_ctx=8192). /// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content. -const CONTEXT_BUDGET_CHARS: u64 = 24_000; +const CONTEXT_BUDGET_CHARS_OLLAMA: u64 = 24_000; +/// Conservative default for managed providers (Fireworks). Most chat-capable +/// Fireworks models ship with 32K–256K context windows; 384K chars (~128K tok) +/// is a safe floor that won't trigger false /compact nags on normal sessions +/// while still flagging genuinely runaway threads. +const CONTEXT_BUDGET_CHARS_FIREWORKS: u64 = 384_000; /// Stop the loop when the model fails the same SQL hurdle this many times in a /// row. Beyond this, additional hops almost always burn the rest of the budget /// on identical retries; a definitive `final` with the error is more useful. @@ -506,7 +511,20 @@ async fn compute_usage( .sum(); ContextUsage { used_chars: used, - budget_chars: CONTEXT_BUDGET_CHARS, + budget_chars: provider_budget_chars(state, app).await, + } +} + +/// Returns the soft context budget appropriate for the currently-configured +/// LLM provider. Falls back to the Ollama default if settings can't be loaded. +async fn provider_budget_chars(state: &AppState, app: &AppHandle) -> u64 { + use crate::models::ai::AiProvider; + match load_ai_settings(app, state).await { + Ok(s) => match s.provider { + AiProvider::Fireworks => CONTEXT_BUDGET_CHARS_FIREWORKS, + _ => CONTEXT_BUDGET_CHARS_OLLAMA, + }, + Err(_) => CONTEXT_BUDGET_CHARS_OLLAMA, } }