diff --git a/src-tauri/src/commands/chat.rs b/src-tauri/src/commands/chat.rs index 7df16ed..dd5ceb3 100644 --- a/src-tauri/src/commands/chat.rs +++ b/src-tauri/src/commands/chat.rs @@ -7,7 +7,7 @@ use crate::commands::memory::{append_memory_core, read_memory_core}; use crate::commands::queries::execute_query_core; use crate::error::{TuskError, TuskResult}; use crate::models::ai::OllamaChatMessage; -use crate::models::chat::ChatMessage; +use crate::models::chat::{ChatMessage, ChatTurnResult, ContextUsage}; use crate::models::query_result::QueryResult; use crate::state::AppState; use chrono::Utc; @@ -26,6 +26,10 @@ const RUN_QUERY_SAMPLE_ROWS: usize = 10; const CELL_CHAR_CAP: usize = 200; /// Per text-tool-result character cap (list_tables, get_columns, etc). const TEXT_TOOL_CHAR_CAP: usize = 10_000; +/// Soft cap on serialized history+system prompt characters before the user +/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens). +/// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content. +const CONTEXT_BUDGET_CHARS: u64 = 24_000; // --------------------------------------------------------------------------- // Action protocol @@ -418,13 +422,38 @@ fn build_history( // chat_send // --------------------------------------------------------------------------- +/// Estimate how many characters the next LLM call will serialize to history +/// (system prompt + conversation, after compression). This is the same data +/// path as the actual call, so the count is exact for the chosen budget unit. +async fn compute_usage( + state: &AppState, + app: &AppHandle, + connection_id: &str, + working: &[ChatMessage], +) -> ContextUsage { + let overview = build_overview_context(state, connection_id) + .await + .unwrap_or_default(); + let memory = read_memory_core(app, connection_id).unwrap_or_default(); + let history = build_history(working, &overview, &memory); + // role string ("system"/"user"/"assistant") ≤ 9 chars + content + JSON envelope overhead + let used: u64 = history + .iter() + .map(|m| (m.role.len() + m.content.len() + 16) as u64) + .sum(); + ContextUsage { + used_chars: used, + budget_chars: CONTEXT_BUDGET_CHARS, + } +} + #[tauri::command] pub async fn chat_send( app: AppHandle, state: State<'_, Arc>, connection_id: String, messages: Vec, -) -> TuskResult> { +) -> TuskResult { let mut new_messages: Vec = Vec::new(); let mut working: Vec = messages; @@ -456,7 +485,11 @@ pub async fn chat_send( }; new_messages.push(msg.clone()); working.push(msg); - return Ok(new_messages); + let usage = compute_usage(&state, &app, &connection_id, &working).await; + return Ok(ChatTurnResult { + messages: new_messages, + usage, + }); } }; @@ -469,7 +502,11 @@ pub async fn chat_send( }; new_messages.push(msg.clone()); working.push(msg); - return Ok(new_messages); + let usage = compute_usage(&state, &app, &connection_id, &working).await; + return Ok(ChatTurnResult { + messages: new_messages, + usage, + }); } AgentAction::RunQuery { sql } => { push_tool_call( @@ -604,8 +641,13 @@ pub async fn chat_send( ), created_at: now_ms(), }; - new_messages.push(msg); - Ok(new_messages) + new_messages.push(msg.clone()); + working.push(msg); + let usage = compute_usage(&state, &app, &connection_id, &working).await; + Ok(ChatTurnResult { + messages: new_messages, + usage, + }) } fn push_tool_call( @@ -654,6 +696,143 @@ fn run_text_tool(outcome: TuskResult, tool: &str) -> ChatMessage { } } +// --------------------------------------------------------------------------- +// chat_compact +// --------------------------------------------------------------------------- + +/// Render the older-history portion of the thread as a compact text block +/// for LLM-driven summarization. Skips QueryResult.rows (huge), keeps only +/// columns + row_count + sample. +fn render_thread_for_summary(messages: &[ChatMessage]) -> String { + let mut out = String::new(); + for m in messages { + match m { + ChatMessage::User { text, .. } => { + out.push_str(&format!("USER: {}\n\n", text)); + } + ChatMessage::Assistant { text, .. } => { + out.push_str(&format!("ASSISTANT: {}\n\n", text)); + } + ChatMessage::ToolCall { tool, input_json, .. } => { + out.push_str(&format!("TOOL_CALL [{}]: {}\n\n", tool, input_json)); + } + ChatMessage::ToolResult { + tool, + is_error, + text, + result, + .. + } => { + if *is_error { + out.push_str(&format!( + "TOOL_ERROR [{}]: {}\n\n", + tool, + text.as_deref().unwrap_or("") + )); + continue; + } + if let Some(qr) = result { + out.push_str(&format!( + "TOOL_RESULT [{}]: {} rows; columns={}\n\n", + tool, + qr.row_count, + qr.columns.join(", ") + )); + } else if let Some(t) = text { + let snippet: String = t.chars().take(800).collect(); + out.push_str(&format!("TOOL_RESULT [{}]: {}\n\n", tool, snippet)); + } + } + } + } + out +} + +/// Find the index of the last User message; returns messages.len() if no user message. +fn last_user_turn_index(messages: &[ChatMessage]) -> usize { + for (i, m) in messages.iter().enumerate().rev() { + if matches!(m, ChatMessage::User { .. }) { + return i; + } + } + messages.len() +} + +/// LLM-summarise the older portion of a chat thread. +/// Returns thread = [ Assistant("📋 Compacted: …") , ]. +/// If the thread has nothing to compact, returns it unchanged. +#[tauri::command] +pub async fn chat_compact( + app: AppHandle, + state: State<'_, Arc>, + connection_id: String, + messages: Vec, +) -> TuskResult { + if messages.is_empty() { + let usage = compute_usage(&state, &app, &connection_id, &messages).await; + return Ok(ChatTurnResult { messages, usage }); + } + + // Preserve the user's most recent question (if any) untouched so the + // model can continue from it after compaction. Everything before goes + // into the summary. + let split_at = last_user_turn_index(&messages); + let (older, recent): (&[ChatMessage], &[ChatMessage]) = if split_at == messages.len() { + (&messages[..], &[]) + } else { + (&messages[..split_at], &messages[split_at..]) + }; + + if older.is_empty() { + let usage = compute_usage(&state, &app, &connection_id, &messages).await; + return Ok(ChatTurnResult { messages, usage }); + } + + let convo = render_thread_for_summary(older); + let system = "You are a precise summarizer of a database analysis dialogue. \ + Produce a SHORT summary in the SAME language the user spoke. \ + Use 3-6 bullet points covering: the user's goal, key tables/columns/queries used, \ + numerical findings, conclusions reached, any open questions. \ + Be concrete with numbers and identifiers. Total length < 800 chars. \ + Output the bullets directly with no preamble, no JSON, no markdown fences."; + + let llm_messages = vec![ + OllamaChatMessage { + role: "system".to_string(), + content: system.to_string(), + }, + OllamaChatMessage { + role: "user".to_string(), + content: convo, + }, + ]; + let summary = call_ollama_chat_messages(&app, &state, llm_messages, None) + .await + .map_err(|e| TuskError::Ai(format!("Compact failed: {}", e)))?; + + let cleaned = summary.trim(); + let compacted_msg = ChatMessage::Assistant { + id: new_id("asst"), + text: format!( + "📋 Compacted {} earlier message{}:\n\n{}", + older.len(), + if older.len() == 1 { "" } else { "s" }, + cleaned + ), + created_at: now_ms(), + }; + + let mut out: Vec = Vec::with_capacity(1 + recent.len()); + out.push(compacted_msg); + out.extend(recent.iter().cloned()); + + let usage = compute_usage(&state, &app, &connection_id, &out).await; + Ok(ChatTurnResult { + messages: out, + usage, + }) +} + // --------------------------------------------------------------------------- // tests // --------------------------------------------------------------------------- @@ -833,6 +1012,55 @@ mod tests { assert!(parse_agent_action(r#"{"action":"find_queries","text":""}"#).is_err()); } + #[test] + fn last_user_turn_index_finds_last_user() { + let msgs = vec![ + ChatMessage::User { id: "u1".into(), text: "first".into(), created_at: 1 }, + ChatMessage::Assistant { id: "a1".into(), text: "ans".into(), created_at: 2 }, + ChatMessage::User { id: "u2".into(), text: "second".into(), created_at: 3 }, + ChatMessage::Assistant { id: "a2".into(), text: "ans2".into(), created_at: 4 }, + ]; + assert_eq!(last_user_turn_index(&msgs), 2); + } + + #[test] + fn last_user_turn_index_returns_len_when_no_user() { + let msgs = vec![ChatMessage::Assistant { + id: "a1".into(), + text: "alone".into(), + created_at: 1, + }]; + assert_eq!(last_user_turn_index(&msgs), msgs.len()); + } + + #[test] + fn render_thread_for_summary_includes_roles_and_skips_rows() { + let msgs = vec![ + ChatMessage::User { id: "u1".into(), text: "find users".into(), created_at: 1 }, + ChatMessage::ToolCall { id: "c1".into(), tool: "run_query".into(), input_json: r#"{"sql":"SELECT 1"}"#.into(), created_at: 2 }, + ChatMessage::ToolResult { + id: "r1".into(), + tool: "run_query".into(), + is_error: false, + text: None, + result: Some(QueryResult { + columns: vec!["id".into(), "name".into()], + types: vec!["INT4".into(), "TEXT".into()], + rows: vec![vec![Value::Number(1.into()), Value::String("alice".into())]; 1000], + row_count: 1000, + execution_time_ms: 12, + }), + created_at: 3, + }, + ]; + let rendered = render_thread_for_summary(&msgs); + assert!(rendered.contains("USER: find users")); + assert!(rendered.contains("TOOL_CALL [run_query]")); + assert!(rendered.contains("1000 rows")); + // Must NOT include the actual rows + assert!(!rendered.contains("alice")); + } + #[test] fn rejects_legacy_get_schema() { assert!(parse_agent_action(r#"{"action":"get_schema"}"#).is_err()); diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c6dff21..a8983c2 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -115,6 +115,7 @@ pub fn run() { commands::ai::fix_sql_error, // chat commands::chat::chat_send, + commands::chat::chat_compact, // memory commands::memory::get_memory, commands::memory::save_memory, diff --git a/src-tauri/src/models/chat.rs b/src-tauri/src/models/chat.rs index 857a4d9..fbde600 100644 --- a/src-tauri/src/models/chat.rs +++ b/src-tauri/src/models/chat.rs @@ -17,3 +17,18 @@ pub enum ChatMessage { }, } +/// Approximate model-context budget usage for the current chat thread. +/// Measured in characters of the serialized history that we send to the LLM. +/// Token estimate ≈ used_chars / 3 for mixed Cyrillic/ASCII content. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContextUsage { + pub used_chars: u64, + pub budget_chars: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatTurnResult { + pub messages: Vec, + pub usage: ContextUsage, +} + diff --git a/src/components/chat/ChatPanel.tsx b/src/components/chat/ChatPanel.tsx index 09e7fe5..94e7424 100644 --- a/src/components/chat/ChatPanel.tsx +++ b/src/components/chat/ChatPanel.tsx @@ -3,9 +3,15 @@ import { useChat } from "@/hooks/use-chat"; import { ChatComposer } from "./ChatComposer"; import { ChatMessageView } from "./ChatMessageView"; import { Button } from "@/components/ui/button"; -import { Eraser, Sparkles } from "lucide-react"; +import { + Tooltip, + TooltipContent, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { Eraser, Sparkles, Layers } from "lucide-react"; import { useAppStore } from "@/stores/app-store"; import { useAiSettings } from "@/hooks/use-ai"; +import type { ContextUsage } from "@/types"; interface Props { tabId: string; @@ -13,7 +19,7 @@ interface Props { } export function ChatPanel({ tabId, connectionId }: Props) { - const { messages, pending, send, clear } = useChat(tabId, connectionId); + const { messages, pending, usage, send, clear, compact } = useChat(tabId, connectionId); const dbFlavors = useAppStore((s) => s.dbFlavors); const flavor = dbFlavors[connectionId]; const { data: aiSettings } = useAiSettings(); @@ -33,21 +39,39 @@ export function ChatPanel({ tabId, connectionId }: Props) {
AI Assistant - {flavor && · {flavor}} + {flavor && ( + + · {flavor} + + )} {aiSettings?.model && ( · {aiSettings.model} )}
- +
+ + + +
@@ -69,7 +93,7 @@ export function ChatPanel({ tabId, connectionId }: Props) { disabled={pending || !aiReady} placeholder={ aiReady - ? "Ask in plain language. The agent will browse schema and run read-only queries." + ? "Ask in plain language. /compact to summarise, /clear to wipe." : "Configure an AI model in Settings to enable chat." } /> @@ -78,6 +102,50 @@ export function ChatPanel({ tabId, connectionId }: Props) { ); } +function UsageBadge({ usage }: { usage: ContextUsage | undefined }) { + if (!usage || usage.budget_chars === 0) return null; + const ratio = Math.min(usage.used_chars / usage.budget_chars, 1.5); + const usedTok = Math.round(usage.used_chars / 3 / 100) / 10; // ~k-tokens with 1 decimal + const budgetTok = Math.round(usage.budget_chars / 3 / 100) / 10; + const percent = Math.round(ratio * 100); + + let toneClass = "text-muted-foreground/70"; + if (ratio >= 0.85) toneClass = "text-destructive"; + else if (ratio >= 0.6) toneClass = "text-amber-500"; + else if (ratio >= 0.3) toneClass = "text-emerald-500/80"; + + const trackClass = "h-1.5 w-12 overflow-hidden rounded-full bg-muted"; + let fillClass = "bg-emerald-500/70"; + if (ratio >= 0.85) fillClass = "bg-destructive"; + else if (ratio >= 0.6) fillClass = "bg-amber-500"; + + return ( + + +
+
+
+
+ + {usedTok}k / {budgetTok}k tok · {percent}% + +
+ + + Approximate context usage. {usage.used_chars.toLocaleString()} chars sent to the model + last turn out of {usage.budget_chars.toLocaleString()} budget. + {ratio >= 0.6 && " Type /compact (or click Compact) to summarise older history."} + + + ); +} + function PendingIndicator() { return (
@@ -102,6 +170,11 @@ function EmptyState({ aiReady, flavor }: { aiReady: boolean; flavor: string | un ? `Connected to ${flavor ?? "database"}. Try: "How many rows in each table?", "Top 10 customers by total spend", "Show me last week's orders".` : "Open Settings → AI to choose an Ollama model. Tusk will then assist with natural-language queries."}

+ {aiReady && ( +

+ Slash commands: /compact · /clear +

+ )}
); diff --git a/src/hooks/use-chat.ts b/src/hooks/use-chat.ts index b5ca65b..2a49246 100644 --- a/src/hooks/use-chat.ts +++ b/src/hooks/use-chat.ts @@ -1,10 +1,13 @@ import { useCallback } from "react"; -import { chatSend } from "@/lib/tauri"; +import { chatCompact, chatSend } from "@/lib/tauri"; import { useAppStore } from "@/stores/app-store"; import type { ChatMessage } from "@/types"; const EMPTY_THREAD: ChatMessage[] = []; +/// Auto-compact when serialized history exceeds this fraction of the budget. +const AUTO_COMPACT_THRESHOLD = 0.85; + function newId(prefix: string) { return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; } @@ -12,17 +15,80 @@ function newId(prefix: string) { export function useChat(tabId: string, connectionId: string) { const messages = useAppStore((s) => s.chatThreads[tabId] ?? EMPTY_THREAD); const pending = useAppStore((s) => Boolean(s.chatPending[tabId])); + const usage = useAppStore((s) => s.chatUsage[tabId]); const appendChatMessages = useAppStore((s) => s.appendChatMessages); + const replaceChatThread = useAppStore((s) => s.replaceChatThread); const clearChatThread = useAppStore((s) => s.clearChatThread); const setChatPending = useAppStore((s) => s.setChatPending); + const setChatUsage = useAppStore((s) => s.setChatUsage); + + const compact = useCallback(async (): Promise => { + const state = useAppStore.getState(); + if (state.chatPending[tabId]) return false; + const history = state.chatThreads[tabId] ?? []; + if (history.length === 0) return false; + + setChatPending(tabId, true); + try { + const turn = await chatCompact(connectionId, history); + replaceChatThread(tabId, turn.messages); + setChatUsage(tabId, turn.usage); + return true; + } catch (err) { + appendChatMessages(tabId, [ + { + id: newId("err"), + role: "assistant", + text: `Compact failed: ${String(err)}`, + created_at: Date.now(), + }, + ]); + return false; + } finally { + setChatPending(tabId, false); + } + }, [ + tabId, + connectionId, + appendChatMessages, + replaceChatThread, + setChatPending, + setChatUsage, + ]); const send = useCallback( async (text: string) => { const trimmed = text.trim(); if (!trimmed) return; + + // Slash commands + if (trimmed === "/clear") { + clearChatThread(tabId); + return; + } + if (trimmed === "/compact") { + await compact(); + return; + } + const state = useAppStore.getState(); if (state.chatPending[tabId]) return; - const history = state.chatThreads[tabId] ?? []; + + // Auto-compact when prior turn pushed usage past threshold. + const lastUsage = state.chatUsage[tabId]; + if ( + lastUsage && + lastUsage.budget_chars > 0 && + lastUsage.used_chars / lastUsage.budget_chars > AUTO_COMPACT_THRESHOLD && + (state.chatThreads[tabId]?.length ?? 0) > 1 + ) { + const ok = await compact(); + if (!ok) return; // compact failed; bail out so the user can retry manually + } + + const after = useAppStore.getState(); + if (after.chatPending[tabId]) return; + const history = after.chatThreads[tabId] ?? []; const userMsg: ChatMessage = { id: newId("user"), role: "user", @@ -32,8 +98,9 @@ export function useChat(tabId: string, connectionId: string) { appendChatMessages(tabId, [userMsg]); setChatPending(tabId, true); try { - const reply = await chatSend(connectionId, [...history, userMsg]); - appendChatMessages(tabId, reply); + const turn = await chatSend(connectionId, [...history, userMsg]); + appendChatMessages(tabId, turn.messages); + setChatUsage(tabId, turn.usage); } catch (err) { appendChatMessages(tabId, [ { @@ -47,10 +114,18 @@ export function useChat(tabId: string, connectionId: string) { setChatPending(tabId, false); } }, - [tabId, connectionId, appendChatMessages, setChatPending] + [ + tabId, + connectionId, + appendChatMessages, + clearChatThread, + compact, + setChatPending, + setChatUsage, + ] ); const clear = useCallback(() => clearChatThread(tabId), [tabId, clearChatThread]); - return { messages, pending, send, clear }; + return { messages, pending, usage, send, clear, compact }; } diff --git a/src/lib/tauri.ts b/src/lib/tauri.ts index 5a20e31..1633acc 100644 --- a/src/lib/tauri.ts +++ b/src/lib/tauri.ts @@ -18,6 +18,7 @@ import type { AppSettings, McpStatus, ChatMessage, + ChatTurnResult, } from "@/types"; // Connections @@ -220,7 +221,10 @@ export const fixSqlError = (connectionId: string, sql: string, errorMessage: str invoke("fix_sql_error", { connectionId, sql, errorMessage }); export const chatSend = (connectionId: string, messages: ChatMessage[]) => - invoke("chat_send", { connectionId, messages }); + invoke("chat_send", { connectionId, messages }); + +export const chatCompact = (connectionId: string, messages: ChatMessage[]) => + invoke("chat_compact", { connectionId, messages }); // Memory (per-connection markdown notes for the chat agent) export const getMemory = (connectionId: string) => diff --git a/src/stores/app-store.ts b/src/stores/app-store.ts index 262bfdf..7c82de0 100644 --- a/src/stores/app-store.ts +++ b/src/stores/app-store.ts @@ -1,5 +1,5 @@ import { create } from "zustand"; -import type { ChatMessage, ConnectionConfig, DbFlavor, Tab } from "@/types"; +import type { ChatMessage, ConnectionConfig, ContextUsage, DbFlavor, Tab } from "@/types"; interface AppState { connections: ConnectionConfig[]; @@ -14,6 +14,7 @@ interface AppState { pgVersion: string | null; chatThreads: Record; chatPending: Record; + chatUsage: Record; setConnections: (connections: ConnectionConfig[]) => void; setActiveConnectionId: (id: string | null) => void; @@ -31,8 +32,10 @@ interface AppState { setSidebarWidth: (width: number) => void; appendChatMessages: (tabId: string, messages: ChatMessage[]) => void; + replaceChatThread: (tabId: string, messages: ChatMessage[]) => void; clearChatThread: (tabId: string) => void; setChatPending: (tabId: string, pending: boolean) => void; + setChatUsage: (tabId: string, usage: ContextUsage) => void; } export const useAppStore = create((set) => ({ @@ -48,6 +51,7 @@ export const useAppStore = create((set) => ({ pgVersion: null, chatThreads: {}, chatPending: {}, + chatUsage: {}, setConnections: (connections) => set({ connections }), setActiveConnectionId: (id) => set({ activeConnectionId: id }), @@ -97,7 +101,9 @@ export const useAppStore = create((set) => ({ delete chatThreads[id]; const chatPending = { ...state.chatPending }; delete chatPending[id]; - return { tabs, activeTabId, chatThreads, chatPending }; + const chatUsage = { ...state.chatUsage }; + delete chatUsage[id]; + return { tabs, activeTabId, chatThreads, chatPending, chatUsage }; }), setActiveTabId: (id) => set({ activeTabId: id }), updateTab: (id, updates) => @@ -113,12 +119,23 @@ export const useAppStore = create((set) => ({ [tabId]: [...(state.chatThreads[tabId] ?? []), ...messages], }, })), - clearChatThread: (tabId) => + replaceChatThread: (tabId, messages) => set((state) => ({ - chatThreads: { ...state.chatThreads, [tabId]: [] }, + chatThreads: { ...state.chatThreads, [tabId]: messages }, })), + clearChatThread: (tabId) => + set((state) => { + const chatThreads = { ...state.chatThreads, [tabId]: [] }; + const chatUsage = { ...state.chatUsage }; + delete chatUsage[tabId]; + return { chatThreads, chatUsage }; + }), setChatPending: (tabId, pending) => set((state) => ({ chatPending: { ...state.chatPending, [tabId]: pending }, })), + setChatUsage: (tabId, usage) => + set((state) => ({ + chatUsage: { ...state.chatUsage, [tabId]: usage }, + })), })); diff --git a/src/types/index.ts b/src/types/index.ts index 0b98943..d3da46d 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -205,3 +205,13 @@ export type ChatMessage = text?: string | null; result?: QueryResult | null; }); + +export interface ContextUsage { + used_chars: number; + budget_chars: number; +} + +export interface ChatTurnResult { + messages: ChatMessage[]; + usage: ContextUsage; +}