feat: chat context-usage display, /compact slash command, auto-compact
Adds visibility into how much of the model context window the chat agent
is using and a way to free space when it fills up.
Backend
- New ContextUsage{used_chars, budget_chars} returned from chat_send
alongside messages (return type ChatTurnResult). Computed by running
build_history once at end of turn and counting char bytes — same data
path as the actual LLM call, so the count is exact for the chosen
budget unit.
- CONTEXT_BUDGET_CHARS = 24,000 (~6-8K tokens). Tuned for Ollama
defaults; can be exposed via AiSettings later.
- New chat_compact Tauri command. Splits the thread at the last user
turn, LLM-summarises everything before it (3-6 bullet points,
language-aware, < 800 chars), and returns a thread of
[Assistant("📋 Compacted N messages: …"), <last_user_turn?>]. The
recent user turn is preserved untouched so the agent can keep
answering it.
- render_thread_for_summary skips QueryResult.rows entirely so a single
large run_query can't blow the summariser's context.
- 3 new unit tests (last_user_turn_index, render skipping rows, empty
thread no-op).
Frontend
- ChatPanel header gets a usage badge: progress bar + `Xk / Yk tok ·
P%`, color-coded green (<30%) / muted (<60%) / amber (<85%) / red
(≥85%). Tooltip explains and nudges /compact when ≥60%.
- Compact button next to Clear in the header.
- Slash commands in ChatComposer: /compact, /clear.
- Empty-state shows the slash-command hint.
- Auto-compact: if the previous turn pushed usage past 85% AND the
thread has more than one message, the next user turn first runs
chat_compact transparently before chat_send. The compaction surfaces
as a visible Assistant("📋 Compacted …") message so the user can see
what the agent kept.
- app-store gets chatUsage map per tab + replaceChatThread + setChatUsage
actions; closeTab and clearChatThread clean up usage too.
Verification: cargo check clean, cargo test --lib 53 pass (+3),
tsc --noEmit clean, vitest run 20 pass.
This commit is contained in:
@@ -7,7 +7,7 @@ use crate::commands::memory::{append_memory_core, read_memory_core};
|
||||
use crate::commands::queries::execute_query_core;
|
||||
use crate::error::{TuskError, TuskResult};
|
||||
use crate::models::ai::OllamaChatMessage;
|
||||
use crate::models::chat::ChatMessage;
|
||||
use crate::models::chat::{ChatMessage, ChatTurnResult, ContextUsage};
|
||||
use crate::models::query_result::QueryResult;
|
||||
use crate::state::AppState;
|
||||
use chrono::Utc;
|
||||
@@ -26,6 +26,10 @@ const RUN_QUERY_SAMPLE_ROWS: usize = 10;
|
||||
const CELL_CHAR_CAP: usize = 200;
|
||||
/// Per text-tool-result character cap (list_tables, get_columns, etc).
|
||||
const TEXT_TOOL_CHAR_CAP: usize = 10_000;
|
||||
/// Soft cap on serialized history+system prompt characters before the user
|
||||
/// is nudged to /compact. Tuned for Ollama defaults (~4-8K tokens).
|
||||
/// Token estimate ≈ chars / 3 for mixed Cyrillic/ASCII content.
|
||||
const CONTEXT_BUDGET_CHARS: u64 = 24_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Action protocol
|
||||
@@ -418,13 +422,38 @@ fn build_history(
|
||||
// chat_send
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Estimate how many characters the next LLM call will serialize to history
|
||||
/// (system prompt + conversation, after compression). This is the same data
|
||||
/// path as the actual call, so the count is exact for the chosen budget unit.
|
||||
async fn compute_usage(
|
||||
state: &AppState,
|
||||
app: &AppHandle,
|
||||
connection_id: &str,
|
||||
working: &[ChatMessage],
|
||||
) -> ContextUsage {
|
||||
let overview = build_overview_context(state, connection_id)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let memory = read_memory_core(app, connection_id).unwrap_or_default();
|
||||
let history = build_history(working, &overview, &memory);
|
||||
// role string ("system"/"user"/"assistant") ≤ 9 chars + content + JSON envelope overhead
|
||||
let used: u64 = history
|
||||
.iter()
|
||||
.map(|m| (m.role.len() + m.content.len() + 16) as u64)
|
||||
.sum();
|
||||
ContextUsage {
|
||||
used_chars: used,
|
||||
budget_chars: CONTEXT_BUDGET_CHARS,
|
||||
}
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn chat_send(
|
||||
app: AppHandle,
|
||||
state: State<'_, Arc<AppState>>,
|
||||
connection_id: String,
|
||||
messages: Vec<ChatMessage>,
|
||||
) -> TuskResult<Vec<ChatMessage>> {
|
||||
) -> TuskResult<ChatTurnResult> {
|
||||
let mut new_messages: Vec<ChatMessage> = Vec::new();
|
||||
let mut working: Vec<ChatMessage> = messages;
|
||||
|
||||
@@ -456,7 +485,11 @@ pub async fn chat_send(
|
||||
};
|
||||
new_messages.push(msg.clone());
|
||||
working.push(msg);
|
||||
return Ok(new_messages);
|
||||
let usage = compute_usage(&state, &app, &connection_id, &working).await;
|
||||
return Ok(ChatTurnResult {
|
||||
messages: new_messages,
|
||||
usage,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -469,7 +502,11 @@ pub async fn chat_send(
|
||||
};
|
||||
new_messages.push(msg.clone());
|
||||
working.push(msg);
|
||||
return Ok(new_messages);
|
||||
let usage = compute_usage(&state, &app, &connection_id, &working).await;
|
||||
return Ok(ChatTurnResult {
|
||||
messages: new_messages,
|
||||
usage,
|
||||
});
|
||||
}
|
||||
AgentAction::RunQuery { sql } => {
|
||||
push_tool_call(
|
||||
@@ -604,8 +641,13 @@ pub async fn chat_send(
|
||||
),
|
||||
created_at: now_ms(),
|
||||
};
|
||||
new_messages.push(msg);
|
||||
Ok(new_messages)
|
||||
new_messages.push(msg.clone());
|
||||
working.push(msg);
|
||||
let usage = compute_usage(&state, &app, &connection_id, &working).await;
|
||||
Ok(ChatTurnResult {
|
||||
messages: new_messages,
|
||||
usage,
|
||||
})
|
||||
}
|
||||
|
||||
fn push_tool_call(
|
||||
@@ -654,6 +696,143 @@ fn run_text_tool(outcome: TuskResult<String>, tool: &str) -> ChatMessage {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// chat_compact
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Render the older-history portion of the thread as a compact text block
|
||||
/// for LLM-driven summarization. Skips QueryResult.rows (huge), keeps only
|
||||
/// columns + row_count + sample.
|
||||
fn render_thread_for_summary(messages: &[ChatMessage]) -> String {
|
||||
let mut out = String::new();
|
||||
for m in messages {
|
||||
match m {
|
||||
ChatMessage::User { text, .. } => {
|
||||
out.push_str(&format!("USER: {}\n\n", text));
|
||||
}
|
||||
ChatMessage::Assistant { text, .. } => {
|
||||
out.push_str(&format!("ASSISTANT: {}\n\n", text));
|
||||
}
|
||||
ChatMessage::ToolCall { tool, input_json, .. } => {
|
||||
out.push_str(&format!("TOOL_CALL [{}]: {}\n\n", tool, input_json));
|
||||
}
|
||||
ChatMessage::ToolResult {
|
||||
tool,
|
||||
is_error,
|
||||
text,
|
||||
result,
|
||||
..
|
||||
} => {
|
||||
if *is_error {
|
||||
out.push_str(&format!(
|
||||
"TOOL_ERROR [{}]: {}\n\n",
|
||||
tool,
|
||||
text.as_deref().unwrap_or("")
|
||||
));
|
||||
continue;
|
||||
}
|
||||
if let Some(qr) = result {
|
||||
out.push_str(&format!(
|
||||
"TOOL_RESULT [{}]: {} rows; columns={}\n\n",
|
||||
tool,
|
||||
qr.row_count,
|
||||
qr.columns.join(", ")
|
||||
));
|
||||
} else if let Some(t) = text {
|
||||
let snippet: String = t.chars().take(800).collect();
|
||||
out.push_str(&format!("TOOL_RESULT [{}]: {}\n\n", tool, snippet));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Find the index of the last User message; returns messages.len() if no user message.
|
||||
fn last_user_turn_index(messages: &[ChatMessage]) -> usize {
|
||||
for (i, m) in messages.iter().enumerate().rev() {
|
||||
if matches!(m, ChatMessage::User { .. }) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
messages.len()
|
||||
}
|
||||
|
||||
/// LLM-summarise the older portion of a chat thread.
|
||||
/// Returns thread = [ Assistant("📋 Compacted: …") , <last_user_turn_if_any> ].
|
||||
/// If the thread has nothing to compact, returns it unchanged.
|
||||
#[tauri::command]
|
||||
pub async fn chat_compact(
|
||||
app: AppHandle,
|
||||
state: State<'_, Arc<AppState>>,
|
||||
connection_id: String,
|
||||
messages: Vec<ChatMessage>,
|
||||
) -> TuskResult<ChatTurnResult> {
|
||||
if messages.is_empty() {
|
||||
let usage = compute_usage(&state, &app, &connection_id, &messages).await;
|
||||
return Ok(ChatTurnResult { messages, usage });
|
||||
}
|
||||
|
||||
// Preserve the user's most recent question (if any) untouched so the
|
||||
// model can continue from it after compaction. Everything before goes
|
||||
// into the summary.
|
||||
let split_at = last_user_turn_index(&messages);
|
||||
let (older, recent): (&[ChatMessage], &[ChatMessage]) = if split_at == messages.len() {
|
||||
(&messages[..], &[])
|
||||
} else {
|
||||
(&messages[..split_at], &messages[split_at..])
|
||||
};
|
||||
|
||||
if older.is_empty() {
|
||||
let usage = compute_usage(&state, &app, &connection_id, &messages).await;
|
||||
return Ok(ChatTurnResult { messages, usage });
|
||||
}
|
||||
|
||||
let convo = render_thread_for_summary(older);
|
||||
let system = "You are a precise summarizer of a database analysis dialogue. \
|
||||
Produce a SHORT summary in the SAME language the user spoke. \
|
||||
Use 3-6 bullet points covering: the user's goal, key tables/columns/queries used, \
|
||||
numerical findings, conclusions reached, any open questions. \
|
||||
Be concrete with numbers and identifiers. Total length < 800 chars. \
|
||||
Output the bullets directly with no preamble, no JSON, no markdown fences.";
|
||||
|
||||
let llm_messages = vec![
|
||||
OllamaChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: system.to_string(),
|
||||
},
|
||||
OllamaChatMessage {
|
||||
role: "user".to_string(),
|
||||
content: convo,
|
||||
},
|
||||
];
|
||||
let summary = call_ollama_chat_messages(&app, &state, llm_messages, None)
|
||||
.await
|
||||
.map_err(|e| TuskError::Ai(format!("Compact failed: {}", e)))?;
|
||||
|
||||
let cleaned = summary.trim();
|
||||
let compacted_msg = ChatMessage::Assistant {
|
||||
id: new_id("asst"),
|
||||
text: format!(
|
||||
"📋 Compacted {} earlier message{}:\n\n{}",
|
||||
older.len(),
|
||||
if older.len() == 1 { "" } else { "s" },
|
||||
cleaned
|
||||
),
|
||||
created_at: now_ms(),
|
||||
};
|
||||
|
||||
let mut out: Vec<ChatMessage> = Vec::with_capacity(1 + recent.len());
|
||||
out.push(compacted_msg);
|
||||
out.extend(recent.iter().cloned());
|
||||
|
||||
let usage = compute_usage(&state, &app, &connection_id, &out).await;
|
||||
Ok(ChatTurnResult {
|
||||
messages: out,
|
||||
usage,
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// tests
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -833,6 +1012,55 @@ mod tests {
|
||||
assert!(parse_agent_action(r#"{"action":"find_queries","text":""}"#).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_user_turn_index_finds_last_user() {
|
||||
let msgs = vec![
|
||||
ChatMessage::User { id: "u1".into(), text: "first".into(), created_at: 1 },
|
||||
ChatMessage::Assistant { id: "a1".into(), text: "ans".into(), created_at: 2 },
|
||||
ChatMessage::User { id: "u2".into(), text: "second".into(), created_at: 3 },
|
||||
ChatMessage::Assistant { id: "a2".into(), text: "ans2".into(), created_at: 4 },
|
||||
];
|
||||
assert_eq!(last_user_turn_index(&msgs), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_user_turn_index_returns_len_when_no_user() {
|
||||
let msgs = vec![ChatMessage::Assistant {
|
||||
id: "a1".into(),
|
||||
text: "alone".into(),
|
||||
created_at: 1,
|
||||
}];
|
||||
assert_eq!(last_user_turn_index(&msgs), msgs.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_thread_for_summary_includes_roles_and_skips_rows() {
|
||||
let msgs = vec![
|
||||
ChatMessage::User { id: "u1".into(), text: "find users".into(), created_at: 1 },
|
||||
ChatMessage::ToolCall { id: "c1".into(), tool: "run_query".into(), input_json: r#"{"sql":"SELECT 1"}"#.into(), created_at: 2 },
|
||||
ChatMessage::ToolResult {
|
||||
id: "r1".into(),
|
||||
tool: "run_query".into(),
|
||||
is_error: false,
|
||||
text: None,
|
||||
result: Some(QueryResult {
|
||||
columns: vec!["id".into(), "name".into()],
|
||||
types: vec!["INT4".into(), "TEXT".into()],
|
||||
rows: vec![vec![Value::Number(1.into()), Value::String("alice".into())]; 1000],
|
||||
row_count: 1000,
|
||||
execution_time_ms: 12,
|
||||
}),
|
||||
created_at: 3,
|
||||
},
|
||||
];
|
||||
let rendered = render_thread_for_summary(&msgs);
|
||||
assert!(rendered.contains("USER: find users"));
|
||||
assert!(rendered.contains("TOOL_CALL [run_query]"));
|
||||
assert!(rendered.contains("1000 rows"));
|
||||
// Must NOT include the actual rows
|
||||
assert!(!rendered.contains("alice"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_legacy_get_schema() {
|
||||
assert!(parse_agent_action(r#"{"action":"get_schema"}"#).is_err());
|
||||
|
||||
@@ -115,6 +115,7 @@ pub fn run() {
|
||||
commands::ai::fix_sql_error,
|
||||
// chat
|
||||
commands::chat::chat_send,
|
||||
commands::chat::chat_compact,
|
||||
// memory
|
||||
commands::memory::get_memory,
|
||||
commands::memory::save_memory,
|
||||
|
||||
@@ -17,3 +17,18 @@ pub enum ChatMessage {
|
||||
},
|
||||
}
|
||||
|
||||
/// Approximate model-context budget usage for the current chat thread.
|
||||
/// Measured in characters of the serialized history that we send to the LLM.
|
||||
/// Token estimate ≈ used_chars / 3 for mixed Cyrillic/ASCII content.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ContextUsage {
|
||||
pub used_chars: u64,
|
||||
pub budget_chars: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChatTurnResult {
|
||||
pub messages: Vec<ChatMessage>,
|
||||
pub usage: ContextUsage,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user