feat: chat context-usage display, /compact slash command, auto-compact

Adds visibility into how much of the model context window the chat agent is using and a way to free space when it fills up. Backend - New ContextUsage{used_chars, budget_chars} returned from chat_send alongside messages (return type ChatTurnResult). Computed by running build_history once at end of turn and counting char bytes — same data path as the actual LLM call, so the count is exact for the chosen budget unit. - CONTEXT_BUDGET_CHARS = 24,000 (~6-8K tokens). Tuned for Ollama defaults; can be exposed via AiSettings later. - New chat_compact Tauri command. Splits the thread at the last user turn, LLM-summarises everything before it (3-6 bullet points, language-aware, < 800 chars), and returns a thread of [Assistant("📋 Compacted N messages: …"), <last_user_turn?>]. The recent user turn is preserved untouched so the agent can keep answering it. - render_thread_for_summary skips QueryResult.rows entirely so a single large run_query can't blow the summariser's context. - 3 new unit tests (last_user_turn_index, render skipping rows, empty thread no-op). Frontend - ChatPanel header gets a usage badge: progress bar + `Xk / Yk tok · P%`, color-coded green (<30%) / muted (<60%) / amber (<85%) / red (≥85%). Tooltip explains and nudges /compact when ≥60%. - Compact button next to Clear in the header. - Slash commands in ChatComposer: /compact, /clear. - Empty-state shows the slash-command hint. - Auto-compact: if the previous turn pushed usage past 85% AND the thread has more than one message, the next user turn first runs chat_compact transparently before chat_send. The compaction surfaces as a visible Assistant("📋 Compacted …") message so the user can see what the agent kept. - app-store gets chatUsage map per tab + replaceChatThread + setChatUsage actions; closeTab and clearChatThread clean up usage too. Verification: cargo check clean, cargo test --lib 53 pass (+3), tsc --noEmit clean, vitest run 20 pass.
2026-05-06 19:44:11 +03:00
parent b41c84dab8
commit 27fed0dbf8
8 changed files with 454 additions and 31 deletions
--- a/src-tauri/src/commands/chat.rs
+++ b/src-tauri/src/commands/chat.rs
@@ -7,7 +7,7 @@ use crate::commands::memory::{append_memory_core, read_memory_core};
 use crate::commands::queries::execute_query_core;
 use crate::error::{TuskError, TuskResult};
 use crate::models::ai::OllamaChatMessage;
-use crate::models::chat::ChatMessage;
+use crate::models::chat::{ChatMessage, ChatTurnResult, ContextUsage};
 use crate::models::query_result::QueryResult;
 use crate::state::AppState;
 use chrono::Utc;
@@ -26,6 +26,10 @@ const RUN_QUERY_SAMPLE_ROWS: usize = 10;
 const CELL_CHAR_CAP: usize = 200;
 /// Per text-tool-result character cap (list_tables, get_columns, etc).
 const TEXT_TOOL_CHAR_CAP: usize = 10_000;
+/// Soft cap on serialized history+system prompt characters before the user
+/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens).
+/// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content.
+const CONTEXT_BUDGET_CHARS: u64 = 24_000;

 // ---------------------------------------------------------------------------
 // Action protocol
@@ -418,13 +422,38 @@ fn build_history(
 // chat_send
 // ---------------------------------------------------------------------------

+/// Estimate how many characters the next LLM call will serialize to history
+/// (system prompt + conversation, after compression). This is the same data
+/// path as the actual call, so the count is exact for the chosen budget unit.
+async fn compute_usage(
+    state: &AppState,
+    app: &AppHandle,
+    connection_id: &str,
+    working: &[ChatMessage],
+) -> ContextUsage {
+    let overview = build_overview_context(state, connection_id)
+        .await
+        .unwrap_or_default();
+    let memory = read_memory_core(app, connection_id).unwrap_or_default();
+    let history = build_history(working, &overview, &memory);
+    // role string ("system"/"user"/"assistant") ≤ 9 chars + content + JSON envelope overhead
+    let used: u64 = history
+        .iter()
+        .map(|m| (m.role.len() + m.content.len() + 16) as u64)
+        .sum();
+    ContextUsage {
+        used_chars: used,
+        budget_chars: CONTEXT_BUDGET_CHARS,
+    }
+}
+
 #[tauri::command]
 pub async fn chat_send(
    app: AppHandle,
    state: State<'_, Arc<AppState>>,
    connection_id: String,
    messages: Vec<ChatMessage>,
-) -> TuskResult<Vec<ChatMessage>> {
+) -> TuskResult<ChatTurnResult> {
    let mut new_messages: Vec<ChatMessage> = Vec::new();
    let mut working: Vec<ChatMessage> = messages;

@@ -456,7 +485,11 @@ pub async fn chat_send(
                };
                new_messages.push(msg.clone());
                working.push(msg);
-                return Ok(new_messages);
+                let usage = compute_usage(&state, &app, &connection_id, &working).await;
+                return Ok(ChatTurnResult {
+                    messages: new_messages,
+                    usage,
+                });
            }
        };

@@ -469,7 +502,11 @@ pub async fn chat_send(
                };
                new_messages.push(msg.clone());
                working.push(msg);
-                return Ok(new_messages);
+                let usage = compute_usage(&state, &app, &connection_id, &working).await;
+                return Ok(ChatTurnResult {
+                    messages: new_messages,
+                    usage,
+                });
            }
            AgentAction::RunQuery { sql } => {
                push_tool_call(
@@ -604,8 +641,13 @@ pub async fn chat_send(
        ),
        created_at: now_ms(),
    };
-    new_messages.push(msg);
-    Ok(new_messages)
+    new_messages.push(msg.clone());
+    working.push(msg);
+    let usage = compute_usage(&state, &app, &connection_id, &working).await;
+    Ok(ChatTurnResult {
+        messages: new_messages,
+        usage,
+    })
 }

 fn push_tool_call(
@@ -654,6 +696,143 @@ fn run_text_tool(outcome: TuskResult<String>, tool: &str) -> ChatMessage {
    }
 }

+// ---------------------------------------------------------------------------
+// chat_compact
+// ---------------------------------------------------------------------------
+
+/// Render the older-history portion of the thread as a compact text block
+/// for LLM-driven summarization. Skips QueryResult.rows (huge), keeps only
+/// columns + row_count + sample.
+fn render_thread_for_summary(messages: &[ChatMessage]) -> String {
+    let mut out = String::new();
+    for m in messages {
+        match m {
+            ChatMessage::User { text, .. } => {
+                out.push_str(&format!("USER: {}\n\n", text));
+            }
+            ChatMessage::Assistant { text, .. } => {
+                out.push_str(&format!("ASSISTANT: {}\n\n", text));
+            }
+            ChatMessage::ToolCall { tool, input_json, .. } => {
+                out.push_str(&format!("TOOL_CALL [{}]: {}\n\n", tool, input_json));
+            }
+            ChatMessage::ToolResult {
+                tool,
+                is_error,
+                text,
+                result,
+                ..
+            } => {
+                if *is_error {
+                    out.push_str(&format!(
+                        "TOOL_ERROR [{}]: {}\n\n",
+                        tool,
+                        text.as_deref().unwrap_or("")
+                    ));
+                    continue;
+                }
+                if let Some(qr) = result {
+                    out.push_str(&format!(
+                        "TOOL_RESULT [{}]: {} rows; columns={}\n\n",
+                        tool,
+                        qr.row_count,
+                        qr.columns.join(", ")
+                    ));
+                } else if let Some(t) = text {
+                    let snippet: String = t.chars().take(800).collect();
+                    out.push_str(&format!("TOOL_RESULT [{}]: {}\n\n", tool, snippet));
+                }
+            }
+        }
+    }
+    out
+}
+
+/// Find the index of the last User message; returns messages.len() if no user message.
+fn last_user_turn_index(messages: &[ChatMessage]) -> usize {
+    for (i, m) in messages.iter().enumerate().rev() {
+        if matches!(m, ChatMessage::User { .. }) {
+            return i;
+        }
+    }
+    messages.len()
+}
+
+/// LLM-summarise the older portion of a chat thread.
+/// Returns thread = [ Assistant("📋 Compacted: …") , <last_user_turn_if_any> ].
+/// If the thread has nothing to compact, returns it unchanged.
+#[tauri::command]
+pub async fn chat_compact(
+    app: AppHandle,
+    state: State<'_, Arc<AppState>>,
+    connection_id: String,
+    messages: Vec<ChatMessage>,
+) -> TuskResult<ChatTurnResult> {
+    if messages.is_empty() {
+        let usage = compute_usage(&state, &app, &connection_id, &messages).await;
+        return Ok(ChatTurnResult { messages, usage });
+    }
+
+    // Preserve the user's most recent question (if any) untouched so the
+    // model can continue from it after compaction. Everything before goes
+    // into the summary.
+    let split_at = last_user_turn_index(&messages);
+    let (older, recent): (&[ChatMessage], &[ChatMessage]) = if split_at == messages.len() {
+        (&messages[..], &[])
+    } else {
+        (&messages[..split_at], &messages[split_at..])
+    };
+
+    if older.is_empty() {
+        let usage = compute_usage(&state, &app, &connection_id, &messages).await;
+        return Ok(ChatTurnResult { messages, usage });
+    }
+
+    let convo = render_thread_for_summary(older);
+    let system = "You are a precise summarizer of a database analysis dialogue. \
+         Produce a SHORT summary in the SAME language the user spoke. \
+         Use 3-6 bullet points covering: the user's goal, key tables/columns/queries used, \
+         numerical findings, conclusions reached, any open questions. \
+         Be concrete with numbers and identifiers. Total length < 800 chars. \
+         Output the bullets directly with no preamble, no JSON, no markdown fences.";
+
+    let llm_messages = vec![
+        OllamaChatMessage {
+            role: "system".to_string(),
+            content: system.to_string(),
+        },
+        OllamaChatMessage {
+            role: "user".to_string(),
+            content: convo,
+        },
+    ];
+    let summary = call_ollama_chat_messages(&app, &state, llm_messages, None)
+        .await
+        .map_err(|e| TuskError::Ai(format!("Compact failed: {}", e)))?;
+
+    let cleaned = summary.trim();
+    let compacted_msg = ChatMessage::Assistant {
+        id: new_id("asst"),
+        text: format!(
+            "📋 Compacted {} earlier message{}:\n\n{}",
+            older.len(),
+            if older.len() == 1 { "" } else { "s" },
+            cleaned
+        ),
+        created_at: now_ms(),
+    };
+
+    let mut out: Vec<ChatMessage> = Vec::with_capacity(1 + recent.len());
+    out.push(compacted_msg);
+    out.extend(recent.iter().cloned());
+
+    let usage = compute_usage(&state, &app, &connection_id, &out).await;
+    Ok(ChatTurnResult {
+        messages: out,
+        usage,
+    })
+}
+
 // ---------------------------------------------------------------------------
 // tests
 // ---------------------------------------------------------------------------
@@ -833,6 +1012,55 @@ mod tests {
        assert!(parse_agent_action(r#"{"action":"find_queries","text":""}"#).is_err());
    }

+    #[test]
+    fn last_user_turn_index_finds_last_user() {
+        let msgs = vec![
+            ChatMessage::User { id: "u1".into(), text: "first".into(), created_at: 1 },
+            ChatMessage::Assistant { id: "a1".into(), text: "ans".into(), created_at: 2 },
+            ChatMessage::User { id: "u2".into(), text: "second".into(), created_at: 3 },
+            ChatMessage::Assistant { id: "a2".into(), text: "ans2".into(), created_at: 4 },
+        ];
+        assert_eq!(last_user_turn_index(&msgs), 2);
+    }
+
+    #[test]
+    fn last_user_turn_index_returns_len_when_no_user() {
+        let msgs = vec![ChatMessage::Assistant {
+            id: "a1".into(),
+            text: "alone".into(),
+            created_at: 1,
+        }];
+        assert_eq!(last_user_turn_index(&msgs), msgs.len());
+    }
+
+    #[test]
+    fn render_thread_for_summary_includes_roles_and_skips_rows() {
+        let msgs = vec![
+            ChatMessage::User { id: "u1".into(), text: "find users".into(), created_at: 1 },
+            ChatMessage::ToolCall { id: "c1".into(), tool: "run_query".into(), input_json: r#"{"sql":"SELECT 1"}"#.into(), created_at: 2 },
+            ChatMessage::ToolResult {
+                id: "r1".into(),
+                tool: "run_query".into(),
+                is_error: false,
+                text: None,
+                result: Some(QueryResult {
+                    columns: vec!["id".into(), "name".into()],
+                    types: vec!["INT4".into(), "TEXT".into()],
+                    rows: vec![vec![Value::Number(1.into()), Value::String("alice".into())]; 1000],
+                    row_count: 1000,
+                    execution_time_ms: 12,
+                }),
+                created_at: 3,
+            },
+        ];
+        let rendered = render_thread_for_summary(&msgs);
+        assert!(rendered.contains("USER: find users"));
+        assert!(rendered.contains("TOOL_CALL [run_query]"));
+        assert!(rendered.contains("1000 rows"));
+        // Must NOT include the actual rows
+        assert!(!rendered.contains("alice"));
+    }
+
    #[test]
    fn rejects_legacy_get_schema() {
        assert!(parse_agent_action(r#"{"action":"get_schema"}"#).is_err());
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -115,6 +115,7 @@ pub fn run() {
            commands::ai::fix_sql_error,
            // chat
            commands::chat::chat_send,
+            commands::chat::chat_compact,
            // memory
            commands::memory::get_memory,
            commands::memory::save_memory,
--- a/src-tauri/src/models/chat.rs
+++ b/src-tauri/src/models/chat.rs
@@ -17,3 +17,18 @@ pub enum ChatMessage {
    },
 }

+/// Approximate model-context budget usage for the current chat thread.
+/// Measured in characters of the serialized history that we send to the LLM.
+/// Token estimate ≈ used_chars / 3 for mixed Cyrillic/ASCII content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContextUsage {
+    pub used_chars: u64,
+    pub budget_chars: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatTurnResult {
+    pub messages: Vec<ChatMessage>,
+    pub usage: ContextUsage,
+}
+
--- a/src/components/chat/ChatPanel.tsx
+++ b/src/components/chat/ChatPanel.tsx
@@ -3,9 +3,15 @@ import { useChat } from "@/hooks/use-chat";
 import { ChatComposer } from "./ChatComposer";
 import { ChatMessageView } from "./ChatMessageView";
 import { Button } from "@/components/ui/button";
-import { Eraser, Sparkles } from "lucide-react";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import { Eraser, Sparkles, Layers } from "lucide-react";
 import { useAppStore } from "@/stores/app-store";
 import { useAiSettings } from "@/hooks/use-ai";
+import type { ContextUsage } from "@/types";

 interface Props {
  tabId: string;
@@ -13,7 +19,7 @@ interface Props {
 }

 export function ChatPanel({ tabId, connectionId }: Props) {
-  const { messages, pending, send, clear } = useChat(tabId, connectionId);
+  const { messages, pending, usage, send, clear, compact } = useChat(tabId, connectionId);
  const dbFlavors = useAppStore((s) => s.dbFlavors);
  const flavor = dbFlavors[connectionId];
  const { data: aiSettings } = useAiSettings();
@@ -33,11 +39,28 @@ export function ChatPanel({ tabId, connectionId }: Props) {
        <div className="flex items-center gap-2 text-xs text-muted-foreground">
          <Sparkles className="h-3.5 w-3.5 text-primary/70" />
          <span className="font-medium">AI Assistant</span>
-          {flavor && <span className="text-[10px] uppercase tracking-wider text-muted-foreground/60">· {flavor}</span>}
+          {flavor && (
+            <span className="text-[10px] uppercase tracking-wider text-muted-foreground/60">
+              · {flavor}
+            </span>
+          )}
          {aiSettings?.model && (
            <span className="text-[10px] text-muted-foreground/60">· {aiSettings.model}</span>
          )}
        </div>
+        <div className="flex items-center gap-2">
+          <UsageBadge usage={usage} />
+          <Button
+            size="xs"
+            variant="ghost"
+            onClick={() => compact()}
+            disabled={messages.length === 0 || pending}
+            title="Summarize older messages to free context (also: type /compact)"
+            className="h-6 gap-1 text-xs text-muted-foreground hover:text-foreground"
+          >
+            <Layers className="h-3 w-3" />
+            Compact
+          </Button>
          <Button
            size="xs"
            variant="ghost"
@@ -49,6 +72,7 @@ export function ChatPanel({ tabId, connectionId }: Props) {
            Clear
          </Button>
        </div>
+      </div>

      <div ref={scrollerRef} className="min-h-0 flex-1 overflow-y-auto">
        {messages.length === 0 && !pending ? (
@@ -69,7 +93,7 @@ export function ChatPanel({ tabId, connectionId }: Props) {
          disabled={pending || !aiReady}
          placeholder={
            aiReady
-              ? "Ask in plain language. The agent will browse schema and run read-only queries."
+              ? "Ask in plain language. /compact to summarise, /clear to wipe."
              : "Configure an AI model in Settings to enable chat."
          }
        />
@@ -78,6 +102,50 @@ export function ChatPanel({ tabId, connectionId }: Props) {
  );
 }

+function UsageBadge({ usage }: { usage: ContextUsage | undefined }) {
+  if (!usage || usage.budget_chars === 0) return null;
+  const ratio = Math.min(usage.used_chars / usage.budget_chars, 1.5);
+  const usedTok = Math.round(usage.used_chars / 3 / 100) / 10; // ~k-tokens with 1 decimal
+  const budgetTok = Math.round(usage.budget_chars / 3 / 100) / 10;
+  const percent = Math.round(ratio * 100);
+
+  let toneClass = "text-muted-foreground/70";
+  if (ratio >= 0.85) toneClass = "text-destructive";
+  else if (ratio >= 0.6) toneClass = "text-amber-500";
+  else if (ratio >= 0.3) toneClass = "text-emerald-500/80";
+
+  const trackClass = "h-1.5 w-12 overflow-hidden rounded-full bg-muted";
+  let fillClass = "bg-emerald-500/70";
+  if (ratio >= 0.85) fillClass = "bg-destructive";
+  else if (ratio >= 0.6) fillClass = "bg-amber-500";
+
+  return (
+    <Tooltip>
+      <TooltipTrigger asChild>
+        <div className="flex items-center gap-1.5 text-[10px]">
+          <div className={trackClass}>
+            <div
+              className={fillClass}
+              style={{
+                height: "100%",
+                width: `${Math.min(ratio, 1) * 100}%`,
+              }}
+            />
+          </div>
+          <span className={toneClass}>
+            {usedTok}k / {budgetTok}k tok · {percent}%
+          </span>
+        </div>
+      </TooltipTrigger>
+      <TooltipContent side="bottom" className="max-w-[260px] text-xs">
+        Approximate context usage. {usage.used_chars.toLocaleString()} chars sent to the model
+        last turn out of {usage.budget_chars.toLocaleString()} budget.
+        {ratio >= 0.6 && " Type /compact (or click Compact) to summarise older history."}
+      </TooltipContent>
+    </Tooltip>
+  );
+}
+
 function PendingIndicator() {
  return (
    <div className="flex items-center gap-2 text-xs text-muted-foreground/70">
@@ -102,6 +170,11 @@ function EmptyState({ aiReady, flavor }: { aiReady: boolean; flavor: string | un
            ? `Connected to ${flavor ?? "database"}. Try: "How many rows in each table?", "Top 10 customers by total spend", "Show me last week's orders".`
            : "Open Settings → AI to choose an Ollama model. Tusk will then assist with natural-language queries."}
        </p>
+        {aiReady && (
+          <p className="text-[11px] text-muted-foreground/60">
+            Slash commands: <code>/compact</code> · <code>/clear</code>
+          </p>
+        )}
      </div>
    </div>
  );
--- a/src/hooks/use-chat.ts
+++ b/src/hooks/use-chat.ts
@@ -1,10 +1,13 @@
 import { useCallback } from "react";
-import { chatSend } from "@/lib/tauri";
+import { chatCompact, chatSend } from "@/lib/tauri";
 import { useAppStore } from "@/stores/app-store";
 import type { ChatMessage } from "@/types";

 const EMPTY_THREAD: ChatMessage[] = [];

+/// Auto-compact when serialized history exceeds this fraction of the budget.
+const AUTO_COMPACT_THRESHOLD = 0.85;
+
 function newId(prefix: string) {
  return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
 }
@@ -12,17 +15,80 @@ function newId(prefix: string) {
 export function useChat(tabId: string, connectionId: string) {
  const messages = useAppStore((s) => s.chatThreads[tabId] ?? EMPTY_THREAD);
  const pending = useAppStore((s) => Boolean(s.chatPending[tabId]));
+  const usage = useAppStore((s) => s.chatUsage[tabId]);
  const appendChatMessages = useAppStore((s) => s.appendChatMessages);
+  const replaceChatThread = useAppStore((s) => s.replaceChatThread);
  const clearChatThread = useAppStore((s) => s.clearChatThread);
  const setChatPending = useAppStore((s) => s.setChatPending);
+  const setChatUsage = useAppStore((s) => s.setChatUsage);
+
+  const compact = useCallback(async (): Promise<boolean> => {
+    const state = useAppStore.getState();
+    if (state.chatPending[tabId]) return false;
+    const history = state.chatThreads[tabId] ?? [];
+    if (history.length === 0) return false;
+
+    setChatPending(tabId, true);
+    try {
+      const turn = await chatCompact(connectionId, history);
+      replaceChatThread(tabId, turn.messages);
+      setChatUsage(tabId, turn.usage);
+      return true;
+    } catch (err) {
+      appendChatMessages(tabId, [
+        {
+          id: newId("err"),
+          role: "assistant",
+          text: `Compact failed: ${String(err)}`,
+          created_at: Date.now(),
+        },
+      ]);
+      return false;
+    } finally {
+      setChatPending(tabId, false);
+    }
+  }, [
+    tabId,
+    connectionId,
+    appendChatMessages,
+    replaceChatThread,
+    setChatPending,
+    setChatUsage,
+  ]);

  const send = useCallback(
    async (text: string) => {
      const trimmed = text.trim();
      if (!trimmed) return;
+
+      // Slash commands
+      if (trimmed === "/clear") {
+        clearChatThread(tabId);
+        return;
+      }
+      if (trimmed === "/compact") {
+        await compact();
+        return;
+      }
+
      const state = useAppStore.getState();
      if (state.chatPending[tabId]) return;
-      const history = state.chatThreads[tabId] ?? [];
+
+      // Auto-compact when prior turn pushed usage past threshold.
+      const lastUsage = state.chatUsage[tabId];
+      if (
+        lastUsage &&
+        lastUsage.budget_chars > 0 &&
+        lastUsage.used_chars / lastUsage.budget_chars > AUTO_COMPACT_THRESHOLD &&
+        (state.chatThreads[tabId]?.length ?? 0) > 1
+      ) {
+        const ok = await compact();
+        if (!ok) return; // compact failed; bail out so the user can retry manually
+      }
+
+      const after = useAppStore.getState();
+      if (after.chatPending[tabId]) return;
+      const history = after.chatThreads[tabId] ?? [];
      const userMsg: ChatMessage = {
        id: newId("user"),
        role: "user",
@@ -32,8 +98,9 @@ export function useChat(tabId: string, connectionId: string) {
      appendChatMessages(tabId, [userMsg]);
      setChatPending(tabId, true);
      try {
-        const reply = await chatSend(connectionId, [...history, userMsg]);
-        appendChatMessages(tabId, reply);
+        const turn = await chatSend(connectionId, [...history, userMsg]);
+        appendChatMessages(tabId, turn.messages);
+        setChatUsage(tabId, turn.usage);
      } catch (err) {
        appendChatMessages(tabId, [
          {
@@ -47,10 +114,18 @@ export function useChat(tabId: string, connectionId: string) {
        setChatPending(tabId, false);
      }
    },
-    [tabId, connectionId, appendChatMessages, setChatPending]
+    [
+      tabId,
+      connectionId,
+      appendChatMessages,
+      clearChatThread,
+      compact,
+      setChatPending,
+      setChatUsage,
+    ]
  );

  const clear = useCallback(() => clearChatThread(tabId), [tabId, clearChatThread]);

-  return { messages, pending, send, clear };
+  return { messages, pending, usage, send, clear, compact };
 }
--- a/src/lib/tauri.ts
+++ b/src/lib/tauri.ts
@@ -18,6 +18,7 @@ import type {
  AppSettings,
  McpStatus,
  ChatMessage,
+  ChatTurnResult,
 } from "@/types";

 // Connections
@@ -220,7 +221,10 @@ export const fixSqlError = (connectionId: string, sql: string, errorMessage: str
  invoke<string>("fix_sql_error", { connectionId, sql, errorMessage });

 export const chatSend = (connectionId: string, messages: ChatMessage[]) =>
-  invoke<ChatMessage[]>("chat_send", { connectionId, messages });
+  invoke<ChatTurnResult>("chat_send", { connectionId, messages });
+
+export const chatCompact = (connectionId: string, messages: ChatMessage[]) =>
+  invoke<ChatTurnResult>("chat_compact", { connectionId, messages });

 // Memory (per-connection markdown notes for the chat agent)
 export const getMemory = (connectionId: string) =>
--- a/src/stores/app-store.ts
+++ b/src/stores/app-store.ts
@@ -1,5 +1,5 @@
 import { create } from "zustand";
-import type { ChatMessage, ConnectionConfig, DbFlavor, Tab } from "@/types";
+import type { ChatMessage, ConnectionConfig, ContextUsage, DbFlavor, Tab } from "@/types";

 interface AppState {
  connections: ConnectionConfig[];
@@ -14,6 +14,7 @@ interface AppState {
  pgVersion: string | null;
  chatThreads: Record<string, ChatMessage[]>;
  chatPending: Record<string, boolean>;
+  chatUsage: Record<string, ContextUsage>;

  setConnections: (connections: ConnectionConfig[]) => void;
  setActiveConnectionId: (id: string | null) => void;
@@ -31,8 +32,10 @@ interface AppState {
  setSidebarWidth: (width: number) => void;

  appendChatMessages: (tabId: string, messages: ChatMessage[]) => void;
+  replaceChatThread: (tabId: string, messages: ChatMessage[]) => void;
  clearChatThread: (tabId: string) => void;
  setChatPending: (tabId: string, pending: boolean) => void;
+  setChatUsage: (tabId: string, usage: ContextUsage) => void;
 }

 export const useAppStore = create<AppState>((set) => ({
@@ -48,6 +51,7 @@ export const useAppStore = create<AppState>((set) => ({
  pgVersion: null,
  chatThreads: {},
  chatPending: {},
+  chatUsage: {},

  setConnections: (connections) => set({ connections }),
  setActiveConnectionId: (id) => set({ activeConnectionId: id }),
@@ -97,7 +101,9 @@ export const useAppStore = create<AppState>((set) => ({
      delete chatThreads[id];
      const chatPending = { ...state.chatPending };
      delete chatPending[id];
-      return { tabs, activeTabId, chatThreads, chatPending };
+      const chatUsage = { ...state.chatUsage };
+      delete chatUsage[id];
+      return { tabs, activeTabId, chatThreads, chatPending, chatUsage };
    }),
  setActiveTabId: (id) => set({ activeTabId: id }),
  updateTab: (id, updates) =>
@@ -113,12 +119,23 @@ export const useAppStore = create<AppState>((set) => ({
        [tabId]: [...(state.chatThreads[tabId] ?? []), ...messages],
      },
    })),
-  clearChatThread: (tabId) =>
+  replaceChatThread: (tabId, messages) =>
    set((state) => ({
-      chatThreads: { ...state.chatThreads, [tabId]: [] },
+      chatThreads: { ...state.chatThreads, [tabId]: messages },
    })),
+  clearChatThread: (tabId) =>
+    set((state) => {
+      const chatThreads = { ...state.chatThreads, [tabId]: [] };
+      const chatUsage = { ...state.chatUsage };
+      delete chatUsage[tabId];
+      return { chatThreads, chatUsage };
+    }),
  setChatPending: (tabId, pending) =>
    set((state) => ({
      chatPending: { ...state.chatPending, [tabId]: pending },
    })),
+  setChatUsage: (tabId, usage) =>
+    set((state) => ({
+      chatUsage: { ...state.chatUsage, [tabId]: usage },
+    })),
 }));
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -205,3 +205,13 @@ export type ChatMessage =
      text?: string | null;
      result?: QueryResult | null;
    });
+
+export interface ContextUsage {
+  used_chars: number;
+  budget_chars: number;
+}
+
+export interface ChatTurnResult {
+  messages: ChatMessage[];
+  usage: ContextUsage;
+}