//! Chat agent tool handlers (chat v2). //! //! Each `*_tool` function returns a plain string formatted for direct injection //! into the LLM tool-result history. They reuse the schema helpers in //! `commands::ai` and `commands::schema` rather than re-implementing SQL. use crate::commands::ai::{ fetch_column_comments, fetch_columns, fetch_enum_types, fetch_foreign_keys_raw, fetch_gp_table_extras, fetch_table_comments, fetch_unique_constraints, format_table_block, ColumnInfo, }; use crate::commands::connections::{load_connection_config, switch_database_core}; use crate::commands::queries::execute_query_core; use crate::commands::saved_queries::{list_saved_queries_core, save_query_core}; use crate::commands::schema::{list_databases_core, list_tables_core}; use crate::db::sql_guard::ensure_readonly_sql; use crate::error::{TuskError, TuskResult}; use crate::models::saved_queries::SavedQuery; use crate::state::{AppState, CachedVec, DbFlavor}; use crate::utils::escape_ident; use sqlx::{PgPool, Row}; use std::collections::{BTreeMap, HashMap}; use std::time::{Duration, Instant}; use tauri::AppHandle; const TOOL_CACHE_TTL: Duration = Duration::from_secs(300); const MAX_TABLES_PER_GET_COLUMNS: usize = 20; const COLUMNS_TOOL_OUTPUT_CAP: usize = 15_000; // --------------------------------------------------------------------------- // list_databases // --------------------------------------------------------------------------- pub async fn list_databases_tool(state: &AppState, connection_id: &str) -> TuskResult { let dbs = list_databases_core(state, connection_id).await?; let active = active_db_name(state, connection_id).await; let mut out = format!("DATABASES ({}):", dbs.len()); for db in &dbs { if Some(db) == active.as_ref() { out.push_str(&format!("\n * {} (active)", db)); } else { out.push_str(&format!("\n {}", db)); } } Ok(out) } // --------------------------------------------------------------------------- // list_tables // --------------------------------------------------------------------------- pub async fn list_tables_tool( app: &AppHandle, state: &AppState, connection_id: &str, db: Option<&str>, ) -> TuskResult { let active = active_db_name(state, connection_id).await; let target = db.map(|s| s.to_string()).or_else(|| active.clone()); let target_name = match target.as_deref() { Some(n) => n.to_string(), None => return Err(TuskError::Custom("No active database selected.".into())), }; let same_as_active = active.as_deref() == Some(target_name.as_str()); let flavor = state.get_flavor(connection_id).await; let table_names = match (flavor, same_as_active) { (DbFlavor::ClickHouse, _) => list_tables_clickhouse(state, connection_id, &target_name).await?, (_, true) => list_tables_active_pg(state, connection_id).await?, (_, false) => list_tables_other_pg(app, state, connection_id, &target_name).await?, }; let header = if same_as_active { format!("TABLES IN ACTIVE DATABASE `{}` ({}):", target_name, table_names.len()) } else { format!("TABLES IN DATABASE `{}` ({}):", target_name, table_names.len()) }; let body: Vec = table_names.iter().map(|t| format!(" {}", t)).collect(); Ok(format!("{}\n{}", header, body.join("\n"))) } async fn list_tables_active_pg(state: &AppState, connection_id: &str) -> TuskResult> { let schemas = crate::commands::schema::list_schemas_core(state, connection_id).await?; let mut all: Vec = Vec::new(); for schema in &schemas { let tables = list_tables_core(state, connection_id, schema).await?; for t in tables { all.push(format!("{}.{}", schema, t.name)); } } Ok(all) } async fn list_tables_other_pg( app: &AppHandle, state: &AppState, connection_id: &str, target_db: &str, ) -> TuskResult> { let cache_key = (connection_id.to_string(), target_db.to_string()); if let Some(hit) = state.tables_by_db_cache.read().await.get(&cache_key).cloned() { if hit.cached_at.elapsed() < TOOL_CACHE_TTL { return Ok(hit.value); } } let config = load_connection_config(app, connection_id)?; let url = config.connection_url_for_db(target_db); let pool = PgPool::connect(&url).await.map_err(|e| { TuskError::Custom(format!( "Could not connect to database '{}' on this server: {}", target_db, e )) })?; let rows = sqlx::query( "SELECT table_schema, table_name FROM information_schema.tables \ WHERE table_schema NOT IN ('pg_catalog','information_schema','pg_toast','gp_toolkit') \ AND table_type = 'BASE TABLE' \ ORDER BY table_schema, table_name", ) .fetch_all(&pool) .await .map_err(TuskError::Database)?; pool.close().await; let names: Vec = rows .iter() .map(|r| format!("{}.{}", r.get::(0), r.get::(1))) .collect(); state.tables_by_db_cache.write().await.insert( cache_key, CachedVec { value: names.clone(), cached_at: Instant::now(), }, ); Ok(names) } async fn list_tables_clickhouse( state: &AppState, connection_id: &str, target_db: &str, ) -> TuskResult> { let client = state.get_ch_client(connection_id).await?; let escaped = target_db.replace('\\', "\\\\").replace('\'', "\\'"); let sql = format!( "SELECT name FROM system.tables WHERE database = '{}' ORDER BY name", escaped ); let rows = client.fetch_objects(&sql).await?; Ok(rows .iter() .filter_map(|r| r.get("name").and_then(|v| v.as_str()).map(String::from)) .collect()) } // --------------------------------------------------------------------------- // get_columns // --------------------------------------------------------------------------- pub async fn get_columns_tool( state: &AppState, connection_id: &str, tables: &[String], ) -> TuskResult { if tables.is_empty() { return Err(TuskError::Custom("get_columns requires at least one table.".into())); } if tables.len() > MAX_TABLES_PER_GET_COLUMNS { return Err(TuskError::Custom(format!( "Too many tables ({}); split into batches of ≤{}.", tables.len(), MAX_TABLES_PER_GET_COLUMNS ))); } let active_db = active_db_name(state, connection_id).await.unwrap_or_default(); // Normalise: accept "schema.table", "db.schema.table" (drop db if == active), // and "table" (assume schema "public" for PG, or active DB for CH). let parsed: Vec<(String, String, String)> = tables .iter() .map(|raw| normalise_table_ref(raw, &active_db)) .collect(); let flavor = state.get_flavor(connection_id).await; if matches!(flavor, DbFlavor::ClickHouse) { return get_columns_clickhouse(state, connection_id, &parsed).await; } get_columns_postgres(state, connection_id, &parsed).await } fn normalise_table_ref(raw: &str, active_db: &str) -> (String, String, String) { // Returns (schema, table, original_input_for_diagnostics) let trimmed = raw.trim().trim_matches('"').trim_matches('`'); let parts: Vec<&str> = trimmed.split('.').collect(); match parts.len() { 1 => ("public".to_string(), parts[0].to_string(), raw.to_string()), 2 => (parts[0].to_string(), parts[1].to_string(), raw.to_string()), 3 => { // "db.schema.table" — drop db prefix when it matches active let (db, schema, table) = (parts[0], parts[1], parts[2]); if db == active_db { (schema.to_string(), table.to_string(), raw.to_string()) } else { // Different DB requested — let the caller surface a not-found warning. // We still parse it as schema.table here. (schema.to_string(), table.to_string(), raw.to_string()) } } _ => ("public".to_string(), trimmed.to_string(), raw.to_string()), } } async fn get_columns_postgres( state: &AppState, connection_id: &str, requested: &[(String, String, String)], ) -> TuskResult { let pool = state.get_pool(connection_id).await?; let is_greenplum = matches!(state.get_flavor(connection_id).await, DbFlavor::Greenplum); let gp_major = state.get_gp_major(connection_id).await.unwrap_or(7); let (col_res, fk_res, enum_res, tbl_comm_res, col_comm_res, unique_res) = tokio::join!( fetch_columns(&pool), fetch_foreign_keys_raw(&pool), fetch_enum_types(&pool), fetch_table_comments(&pool), fetch_column_comments(&pool), fetch_unique_constraints(&pool), ); let all_cols = col_res?; let fk_rows = fk_res?; let enum_map = enum_res.unwrap_or_default(); let tbl_comments = tbl_comm_res.unwrap_or_default(); let col_comments = col_comm_res.unwrap_or_default(); let uniques = unique_res.unwrap_or_default(); let gp_extras = if is_greenplum { Some(fetch_gp_table_extras(&pool, gp_major).await) } else { None }; // Build (schema, table) → Vec let mut by_table: BTreeMap<(String, String), Vec> = BTreeMap::new(); for ci in &all_cols { by_table .entry((ci.schema.clone(), ci.table.clone())) .or_default() .push(ci.clone()); } let mut fk_inline: HashMap<(String, String, String), String> = HashMap::new(); for fk in &fk_rows { if fk.columns.len() == 1 && fk.ref_columns.len() == 1 { fk_inline.insert( (fk.schema.clone(), fk.table.clone(), fk.columns[0].clone()), format!("{}.{}({})", fk.ref_schema, fk.ref_table, fk.ref_columns[0]), ); } } let mut unique_map: HashMap<(String, String), Vec> = HashMap::new(); for (schema, table, cols) in &uniques { unique_map .entry((schema.clone(), table.clone())) .or_default() .push(cols.join(", ")); } let varchar_values: HashMap<(String, String, String), Vec> = HashMap::new(); let jsonb_keys: HashMap<(String, String, String), Vec> = HashMap::new(); let mut output: Vec = Vec::new(); let mut not_found: Vec = Vec::new(); for (schema, table, raw) in requested { match by_table.get(&(schema.clone(), table.clone())) { Some(cols) => { let full_name = format!("{}.{}", schema, table); format_table_block( &full_name, cols, &tbl_comments, &col_comments, &fk_inline, &enum_map, &unique_map, &varchar_values, &jsonb_keys, gp_extras.as_ref(), &mut output, ); } None => not_found.push(raw.clone()), } } if !not_found.is_empty() { let nearest = nearest_table_matches(&by_table, ¬_found); let header = format!( "WARNING: tables not found: {}.{}", not_found.join(", "), if nearest.is_empty() { String::new() } else { format!(" Nearest matches: {}.", nearest.join(", ")) } ); output.insert(0, header); output.insert(1, String::new()); } let mut text = output.join("\n"); if text.len() > COLUMNS_TOOL_OUTPUT_CAP { text.truncate(COLUMNS_TOOL_OUTPUT_CAP); text.push_str("\n... (output truncated)"); } Ok(text) } async fn get_columns_clickhouse( state: &AppState, connection_id: &str, requested: &[(String, String, String)], ) -> TuskResult { let client = state.get_ch_client(connection_id).await?; let active_db = client.database.clone(); let where_terms: Vec = requested .iter() .map(|(schema, table, _)| { // For CH, treat the parsed "schema" as the database name; if it equals // a PG-conventional default ("public"), substitute with active CH database. let dbn = if schema == "public" { active_db.clone() } else { schema.clone() }; format!( "(database = '{}' AND name = '{}')", dbn.replace('\'', "\\'"), table.replace('\'', "\\'") ) }) .collect(); let where_clause = where_terms.join(" OR "); let sql = format!( "SELECT database, table, name, type, default_expression, is_in_primary_key, comment, position \ FROM system.columns WHERE {} ORDER BY database, table, position", where_clause ); let rows = client.fetch_objects(&sql).await?; // Group by (database, table) let mut grouped: BTreeMap<(String, String), Vec<&serde_json::Map>> = BTreeMap::new(); for row in &rows { let dbn = row.get("database").and_then(|v| v.as_str()).unwrap_or("").to_string(); let tbl = row.get("table").and_then(|v| v.as_str()).unwrap_or("").to_string(); grouped.entry((dbn, tbl)).or_default().push(row); } // Track which requested tables were found let mut output = String::new(); let mut not_found: Vec = Vec::new(); for (schema, table, raw) in requested { let dbn = if schema == "public" { active_db.clone() } else { schema.clone() }; match grouped.get(&(dbn.clone(), table.clone())) { Some(cols) => { output.push_str(&format!("\nTABLE {}.{}\n", dbn, table)); for col in cols { let name = col.get("name").and_then(|v| v.as_str()).unwrap_or(""); let dtype = col.get("type").and_then(|v| v.as_str()).unwrap_or(""); let is_pk = matches!( col.get("is_in_primary_key"), Some(serde_json::Value::Number(n)) if n.as_i64() == Some(1) ) || matches!( col.get("is_in_primary_key"), Some(serde_json::Value::String(s)) if s == "1" ); let default = col.get("default_expression").and_then(|v| v.as_str()).unwrap_or(""); let comment = col.get("comment").and_then(|v| v.as_str()).unwrap_or(""); let mut line = format!(" {} {}", name, dtype); if is_pk { line.push_str(" [PK]"); } if !default.is_empty() { line.push_str(&format!(" DEFAULT {}", default)); } if !comment.is_empty() { line.push_str(&format!(" -- {}", comment)); } output.push_str(&line); output.push('\n'); } } None => not_found.push(raw.clone()), } } let mut header = String::new(); if !not_found.is_empty() { header.push_str(&format!( "WARNING: tables not found: {}\n\n", not_found.join(", ") )); } let mut combined = format!("{}{}", header, output.trim_start()); if combined.len() > COLUMNS_TOOL_OUTPUT_CAP { combined.truncate(COLUMNS_TOOL_OUTPUT_CAP); combined.push_str("\n... (output truncated)"); } Ok(combined) } fn nearest_table_matches( by_table: &BTreeMap<(String, String), Vec>, missing: &[String], ) -> Vec { let all: Vec = by_table .keys() .map(|(s, t)| format!("{}.{}", s, t)) .collect(); let mut hints: Vec = Vec::new(); for m in missing { let needle = m.to_lowercase(); let mut candidates: Vec<&String> = all .iter() .filter(|n| { let lower = n.to_lowercase(); lower.contains(&needle) || needle.contains(lower.split('.').last().unwrap_or("")) }) .take(3) .collect(); candidates.dedup(); for c in candidates { if !hints.contains(c) { hints.push(c.clone()); } } } hints } // --------------------------------------------------------------------------- // switch_database // --------------------------------------------------------------------------- pub async fn switch_database_tool( app: &AppHandle, state: &AppState, connection_id: &str, target_db: &str, ) -> TuskResult { let config = load_connection_config(app, connection_id)?; // Verify target exists in cluster let dbs = list_databases_core(state, connection_id).await?; if !dbs.iter().any(|d| d == target_db) { return Err(TuskError::Custom(format!( "Database '{}' does not exist on this server. Available: {}", target_db, dbs.join(", ") ))); } switch_database_core(state, &config, target_db).await?; Ok(format!("Switched active database to '{}'.", target_db)) } // --------------------------------------------------------------------------- // helpers // --------------------------------------------------------------------------- async fn active_db_name(state: &AppState, connection_id: &str) -> Option { let flavor = state.get_flavor(connection_id).await; if matches!(flavor, DbFlavor::ClickHouse) { return state .get_ch_client(connection_id) .await .ok() .map(|c| c.database.clone()); } let pool = state.get_pool(connection_id).await.ok()?; sqlx::query_scalar::<_, String>("SELECT current_database()") .fetch_one(&pool) .await .ok() } // --------------------------------------------------------------------------- // save_query / find_queries (chat v3 — F2) // --------------------------------------------------------------------------- const FIND_QUERIES_LIMIT: usize = 10; const FIND_QUERIES_SQL_PREVIEW_CHARS: usize = 500; pub async fn save_query_tool( app: &AppHandle, connection_id: &str, name: &str, sql: &str, ) -> TuskResult { let trimmed_name = name.trim(); let trimmed_sql = sql.trim(); if trimmed_name.is_empty() { return Err(TuskError::Custom("save_query: name must not be empty".into())); } if trimmed_sql.is_empty() { return Err(TuskError::Custom("save_query: sql must not be empty".into())); } let entry = SavedQuery { id: uuid::Uuid::new_v4().to_string(), name: trimmed_name.to_string(), sql: trimmed_sql.to_string(), connection_id: Some(connection_id.to_string()), created_at: chrono::Utc::now().to_rfc3339(), }; save_query_core(app, entry).await?; Ok(format!("Saved query \"{}\" — visible in sidebar → Saved.", trimmed_name)) } pub async fn find_queries_tool( app: &AppHandle, connection_id: &str, text: &str, ) -> TuskResult { let trimmed = text.trim(); if trimmed.is_empty() { return Err(TuskError::Custom("find_queries: text must not be empty".into())); } let all = list_saved_queries_core(app, Some(trimmed)).await?; let matches: Vec = all .into_iter() .filter(|q| q.connection_id.as_deref() == Some(connection_id)) .take(FIND_QUERIES_LIMIT) .collect(); if matches.is_empty() { return Ok(format!( "No saved queries match \"{}\" for this connection.", trimmed )); } let mut out = format!( "Saved queries matching \"{}\" ({}):", trimmed, matches.len() ); for q in &matches { let sql_preview: String = if q.sql.chars().count() > FIND_QUERIES_SQL_PREVIEW_CHARS { let truncated: String = q.sql.chars().take(FIND_QUERIES_SQL_PREVIEW_CHARS).collect(); format!("{}…", truncated) } else { q.sql.clone() }; out.push_str(&format!( "\n\n[{}] {}\n{}", q.created_at, q.name, sql_preview )); } Ok(out) } // --------------------------------------------------------------------------- // profile_table (PR2 — data-engineering tool) // --------------------------------------------------------------------------- const PROFILE_TABLE_MAX_COLUMNS: usize = 30; const PROFILE_TABLE_TOPK: usize = 5; pub async fn profile_table_tool( state: &AppState, connection_id: &str, table: &str, ) -> TuskResult { let active_db = active_db_name(state, connection_id).await.unwrap_or_default(); let (schema, tbl, _raw) = normalise_table_ref(table, &active_db); let flavor = state.get_flavor(connection_id).await; match flavor { DbFlavor::PostgreSQL | DbFlavor::Greenplum => { profile_table_postgres(state, connection_id, &schema, &tbl).await } DbFlavor::ClickHouse => profile_table_clickhouse(state, connection_id, &schema, &tbl).await, } } async fn profile_table_postgres( state: &AppState, connection_id: &str, schema: &str, table: &str, ) -> TuskResult { let pool = state.get_pool(connection_id).await?; let exists = sqlx::query_scalar::<_, i64>( "SELECT 1 FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid \ WHERE n.nspname = $1 AND c.relname = $2 LIMIT 1", ) .bind(schema) .bind(table) .fetch_optional(&pool) .await .map_err(TuskError::Database)?; if exists.is_none() { return Err(TuskError::Custom(format!( "Table '{}.{}' does not exist (or no privileges).", schema, table ))); } let last_analyze: Option> = sqlx::query_scalar( "SELECT GREATEST(last_analyze, last_autoanalyze) FROM pg_stat_user_tables \ WHERE schemaname = $1 AND relname = $2", ) .bind(schema) .bind(table) .fetch_optional(&pool) .await .ok() .flatten(); let stat_rows = sqlx::query( "SELECT attname, null_frac, n_distinct, \ most_common_vals::text, most_common_freqs, histogram_bounds::text \ FROM pg_stats \ WHERE schemaname = $1 AND tablename = $2 \ ORDER BY attname", ) .bind(schema) .bind(table) .fetch_all(&pool) .await .map_err(TuskError::Database)?; let mut out = format!("PROFILE {}.{}\n", schema, table); match last_analyze { Some(ts) => out.push_str(&format!("Last ANALYZE: {}\n", ts.to_rfc3339())), None => out.push_str("Last ANALYZE: never\n"), } if stat_rows.is_empty() { out.push_str(&format!( "\nNo statistics in pg_stats. Run: ANALYZE {}.{};\n", escape_ident(schema), escape_ident(table) )); return Ok(out); } let total = stat_rows.len(); let take = total.min(PROFILE_TABLE_MAX_COLUMNS); out.push_str(&format!("\n{} columns with stats\n", total)); for r in stat_rows.iter().take(take) { let attname: String = r.get(0); let null_frac: f32 = r.try_get(1).unwrap_or(0.0); let n_distinct: f32 = r.try_get(2).unwrap_or(0.0); let mcv_text: Option = r.try_get(3).ok(); let mcf_arr: Option> = r.try_get(4).ok(); let hist_text: Option = r.try_get(5).ok(); out.push_str(&format!("\n {}:\n", attname)); out.push_str(&format!(" null_frac: {:.4}\n", null_frac)); if n_distinct < 0.0 { out.push_str(&format!( " n_distinct: {:.3} (ratio of total rows)\n", -n_distinct )); } else { out.push_str(&format!(" n_distinct: {}\n", n_distinct as i64)); } if let Some(text) = hist_text.as_deref() { let bounds = parse_pg_array_text_local(text); if let (Some(min), Some(max)) = (bounds.first(), bounds.last()) { out.push_str(&format!(" range: {} … {}\n", min, max)); } } if let Some(text) = mcv_text.as_deref() { let vals = parse_pg_array_text_local(text); if !vals.is_empty() { let freqs = mcf_arr.unwrap_or_default(); let pairs: Vec = vals .iter() .take(PROFILE_TABLE_TOPK) .enumerate() .map(|(i, v)| match freqs.get(i) { Some(f) => format!("{}({:.3})", v, f), None => v.clone(), }) .collect(); out.push_str(&format!(" top: {}\n", pairs.join(", "))); } } } if total > take { out.push_str(&format!("\n…and {} more columns\n", total - take)); } Ok(out) } /// Local pg-array parser used by profile_table; mirrors `parse_pg_array_text` in ai.rs /// but kept local to avoid importing a private helper. fn parse_pg_array_text_local(s: &str) -> Vec { let s = s.trim(); let s = s.strip_prefix('{').unwrap_or(s); let s = s.strip_suffix('}').unwrap_or(s); if s.is_empty() { return Vec::new(); } let mut out = Vec::new(); let mut cur = String::new(); let mut in_quotes = false; let mut chars = s.chars().peekable(); while let Some(c) = chars.next() { match c { '"' if !in_quotes => in_quotes = true, '"' if in_quotes => { if chars.peek() == Some(&'"') { cur.push('"'); chars.next(); } else { in_quotes = false; } } ',' if !in_quotes => { out.push(std::mem::take(&mut cur)); } '\\' if in_quotes => { if let Some(next) = chars.next() { cur.push(next); } } other => cur.push(other), } } if !cur.is_empty() || s.ends_with(',') { out.push(cur); } out } async fn profile_table_clickhouse( state: &AppState, connection_id: &str, schema: &str, table: &str, ) -> TuskResult { let client = state.get_ch_client(connection_id).await?; let active_db = client.database.clone(); let dbn = if schema == "public" || schema.is_empty() { active_db } else { schema.to_string() }; let cols_sql = format!( "SELECT name, type FROM system.columns \ WHERE database = '{}' AND table = '{}' \ ORDER BY position LIMIT {}", dbn.replace('\'', "\\'"), table.replace('\'', "\\'"), PROFILE_TABLE_MAX_COLUMNS ); let col_rows = client.fetch_objects(&cols_sql).await?; if col_rows.is_empty() { return Err(TuskError::Custom(format!( "Table '{}.{}' does not exist (or no privileges).", dbn, table ))); } let mut select_parts: Vec = vec!["count() AS rows_total".to_string()]; let mut col_names: Vec = Vec::new(); let mut col_types: Vec = Vec::new(); for r in &col_rows { let name = r.get("name").and_then(|v| v.as_str()).unwrap_or("").to_string(); let dtype = r.get("type").and_then(|v| v.as_str()).unwrap_or("").to_string(); if name.is_empty() { continue; } col_names.push(name.clone()); col_types.push(dtype); let q = name.replace('`', "``"); select_parts.push(format!("countIf(`{}` IS NULL) AS null_{}", q, col_names.len())); select_parts.push(format!("uniqHLL12(`{}`) AS dist_{}", q, col_names.len())); select_parts.push(format!("toString(min(`{}`)) AS min_{}", q, col_names.len())); select_parts.push(format!("toString(max(`{}`)) AS max_{}", q, col_names.len())); select_parts.push(format!( "arrayStringConcat(arrayMap(x -> toString(x), topK({})(`{}`)), '|') AS top_{}", PROFILE_TABLE_TOPK, q, col_names.len() )); } let agg_sql = format!( "SELECT {} FROM `{}`.`{}`", select_parts.join(", "), dbn.replace('`', "``"), table.replace('`', "``") ); let agg_rows = client.fetch_objects(&agg_sql).await?; let row = agg_rows .first() .ok_or_else(|| TuskError::Custom("ClickHouse returned no row for profile aggregate".into()))?; let rows_total = row .get("rows_total") .and_then(|v| v.as_str().and_then(|s| s.parse::().ok()).or_else(|| v.as_i64())) .unwrap_or(0); let mut out = format!( "PROFILE {}.{}\nRows: {}\n{} columns profiled\n", dbn, table, rows_total, col_names.len() ); for (i, name) in col_names.iter().enumerate() { let n = i + 1; let nulls = row .get(&format!("null_{}", n)) .and_then(|v| v.as_str().and_then(|s| s.parse::().ok()).or_else(|| v.as_i64())) .unwrap_or(0); let dist = row .get(&format!("dist_{}", n)) .and_then(|v| v.as_str().and_then(|s| s.parse::().ok()).or_else(|| v.as_i64())) .unwrap_or(0); let min = row.get(&format!("min_{}", n)).and_then(|v| v.as_str()).unwrap_or(""); let max = row.get(&format!("max_{}", n)).and_then(|v| v.as_str()).unwrap_or(""); let top_raw = row.get(&format!("top_{}", n)).and_then(|v| v.as_str()).unwrap_or(""); out.push_str(&format!("\n {} ({}):\n", name, col_types[i])); let null_frac = if rows_total > 0 { nulls as f64 / rows_total as f64 } else { 0.0 }; out.push_str(&format!(" null_frac: {:.4}\n", null_frac)); out.push_str(&format!(" distinct (HLL): {}\n", dist)); if !min.is_empty() || !max.is_empty() { out.push_str(&format!(" range: {} … {}\n", min, max)); } if !top_raw.is_empty() { let top_vals: Vec<&str> = top_raw.split('|').take(PROFILE_TABLE_TOPK).collect(); out.push_str(&format!(" top: {}\n", top_vals.join(", "))); } } if col_rows.len() == PROFILE_TABLE_MAX_COLUMNS { out.push_str(&format!( "\n…showing first {} columns\n", PROFILE_TABLE_MAX_COLUMNS )); } Ok(out) } // --------------------------------------------------------------------------- // sample_data (PR2 — returns SQL string; dispatch site runs it through // execute_query_core so the QueryResult feeds the standard renderer) // --------------------------------------------------------------------------- pub async fn build_sample_sql( state: &AppState, connection_id: &str, table: &str, limit: u32, ) -> TuskResult { let active_db = active_db_name(state, connection_id).await.unwrap_or_default(); let (schema, tbl, _raw) = normalise_table_ref(table, &active_db); let flavor = state.get_flavor(connection_id).await; match flavor { DbFlavor::PostgreSQL | DbFlavor::Greenplum => { build_sample_sql_postgres(state, connection_id, &schema, &tbl, limit).await } DbFlavor::ClickHouse => { build_sample_sql_clickhouse(state, connection_id, &schema, &tbl, limit).await } } } async fn build_sample_sql_postgres( state: &AppState, connection_id: &str, schema: &str, table: &str, limit: u32, ) -> TuskResult { let pool = state.get_pool(connection_id).await?; // pg_class.reltuples is `real` (FLOAT4); decode as f32 then widen — sqlx is // strict and reading it directly as f64 fails with a type-mismatch error. let reltuples: f64 = sqlx::query_scalar::<_, f32>( "SELECT c.reltuples FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid \ WHERE n.nspname = $1 AND c.relname = $2", ) .bind(schema) .bind(table) .fetch_optional(&pool) .await .map_err(TuskError::Database)? .unwrap_or(0.0) as f64; let qualified = format!("{}.{}", escape_ident(schema), escape_ident(table)); if reltuples > 0.0 { let target = limit as f64 * 100.0 / reltuples; let percent = target.clamp(0.01, 100.0); Ok(format!( "SELECT * FROM {} TABLESAMPLE BERNOULLI({:.4}) LIMIT {}", qualified, percent, limit )) } else { Ok(format!( "SELECT * FROM {} ORDER BY random() LIMIT {}", qualified, limit )) } } async fn build_sample_sql_clickhouse( state: &AppState, connection_id: &str, schema: &str, table: &str, limit: u32, ) -> TuskResult { let client = state.get_ch_client(connection_id).await?; let active_db = client.database.clone(); let dbn = if schema == "public" || schema.is_empty() { active_db } else { schema.to_string() }; let info_sql = format!( "SELECT engine, sampling_key FROM system.tables \ WHERE database = '{}' AND name = '{}' LIMIT 1", dbn.replace('\'', "\\'"), table.replace('\'', "\\'") ); let rows = client.fetch_objects(&info_sql).await.unwrap_or_default(); let (engine, sampling_key) = match rows.first() { Some(r) => ( r.get("engine").and_then(|v| v.as_str()).unwrap_or("").to_string(), r.get("sampling_key").and_then(|v| v.as_str()).unwrap_or("").to_string(), ), None => (String::new(), String::new()), }; let qualified = format!( "`{}`.`{}`", dbn.replace('`', "``"), table.replace('`', "``") ); if engine.starts_with("Merge") && !sampling_key.trim().is_empty() { Ok(format!( "SELECT * FROM {} SAMPLE 0.01 LIMIT {}", qualified, limit )) } else { Ok(format!( "SELECT * FROM {} ORDER BY rand() LIMIT {}", qualified, limit )) } } // --------------------------------------------------------------------------- // explain_query (PR2) // --------------------------------------------------------------------------- pub async fn explain_query_tool( state: &AppState, connection_id: &str, sql: &str, ) -> TuskResult { let trimmed = sql.trim(); if trimmed.is_empty() { return Err(TuskError::Custom("explain_query: sql must not be empty".into())); } // Validate the user's statement BEFORE prefixing EXPLAIN so the error message // references their SQL, not the wrapper. ensure_readonly_sql also rejects any // forbidden keywords (INSERT/UPDATE/DELETE/...) even nested under EXPLAIN. ensure_readonly_sql(trimmed).map_err(|e| TuskError::Custom(e.to_string()))?; let flavor = state.get_flavor(connection_id).await; match flavor { DbFlavor::PostgreSQL | DbFlavor::Greenplum => { explain_query_postgres(state, connection_id, trimmed).await } DbFlavor::ClickHouse => explain_query_clickhouse(state, connection_id, trimmed).await, } } async fn explain_query_postgres( state: &AppState, connection_id: &str, sql: &str, ) -> TuskResult { let pool = state.get_pool(connection_id).await?; let plan_sql = format!("EXPLAIN (FORMAT JSON, ANALYZE, BUFFERS) {}", sql); let mut tx = pool.begin().await.map_err(TuskError::Database)?; sqlx::query("SET TRANSACTION READ ONLY") .execute(&mut *tx) .await .map_err(TuskError::Database)?; let row = sqlx::query(&plan_sql) .fetch_one(&mut *tx) .await .map_err(TuskError::Database)?; let _ = tx.rollback().await; let raw_json: serde_json::Value = match row.try_get::(0) { Ok(v) => v, Err(_) => { let s: String = row.try_get(0).map_err(TuskError::Database)?; serde_json::from_str(&s) .map_err(|e| TuskError::Custom(format!("EXPLAIN JSON parse failed: {}", e)))? } }; let plans = raw_json .as_array() .ok_or_else(|| TuskError::Custom("EXPLAIN JSON: expected array".into()))?; let plan = plans.first().and_then(|p| p.get("Plan")).ok_or_else(|| { TuskError::Custom("EXPLAIN JSON: missing top-level Plan node".into()) })?; let root_node = plan.get("Node Type").and_then(|v| v.as_str()).unwrap_or("?"); let total_cost = plan.get("Total Cost").and_then(|v| v.as_f64()).unwrap_or(0.0); let planning = plans .first() .and_then(|p| p.get("Planning Time").and_then(|v| v.as_f64())) .unwrap_or(0.0); let execution = plans .first() .and_then(|p| p.get("Execution Time").and_then(|v| v.as_f64())) .unwrap_or(0.0); let mut seq_scans: Vec = Vec::new(); let mut spilled: Vec = Vec::new(); let mut motions: Vec = Vec::new(); let mut max_skew: Option<(f64, String)> = None; walk_pg_plan(plan, &mut seq_scans, &mut spilled, &mut motions, &mut max_skew); let mut out = format!( "PLAN root: {}, total cost {:.1}\nPlanning: {:.2} ms Execution: {:.2} ms\n", root_node, total_cost, planning, execution ); if !seq_scans.is_empty() { out.push_str(&format!("Seq scans on: {}\n", seq_scans.join(", "))); } if !spilled.is_empty() { out.push_str(&format!("Spilled to disk: {}\n", spilled.join(", "))); } if !motions.is_empty() { out.push_str(&format!("Motions (Greenplum): {}\n", motions.join(", "))); } if let Some((ratio, node)) = max_skew { if ratio >= 5.0 { out.push_str(&format!( "Estimate skew: max plan/actual ratio = {:.1} on {}\n", ratio, node )); } } if seq_scans.is_empty() && spilled.is_empty() && motions.is_empty() { out.push_str("No obvious red flags.\n"); } Ok(out) } fn walk_pg_plan( node: &serde_json::Value, seq_scans: &mut Vec, spilled: &mut Vec, motions: &mut Vec, max_skew: &mut Option<(f64, String)>, ) { let node_type = node.get("Node Type").and_then(|v| v.as_str()).unwrap_or(""); if node_type == "Seq Scan" { let rel = node .get("Relation Name") .and_then(|v| v.as_str()) .unwrap_or("?"); let schema = node .get("Schema") .and_then(|v| v.as_str()) .map(|s| format!("{}.", s)) .unwrap_or_default(); seq_scans.push(format!("{}{}", schema, rel)); } if let Some(method) = node.get("Sort Method").and_then(|v| v.as_str()) { if method.contains("disk") || method.contains("external") { spilled.push(format!("Sort ({})", method)); } } if node_type.contains("Motion") { motions.push(node_type.to_string()); } let plan_rows = node.get("Plan Rows").and_then(|v| v.as_f64()).unwrap_or(0.0); let actual_rows = node.get("Actual Rows").and_then(|v| v.as_f64()).unwrap_or(0.0); if actual_rows > 0.0 && plan_rows > 0.0 { let ratio = (plan_rows / actual_rows).max(actual_rows / plan_rows); if max_skew.as_ref().map(|(r, _)| ratio > *r).unwrap_or(true) { *max_skew = Some((ratio, node_type.to_string())); } } if let Some(children) = node.get("Plans").and_then(|v| v.as_array()) { for child in children { walk_pg_plan(child, seq_scans, spilled, motions, max_skew); } } } async fn explain_query_clickhouse( state: &AppState, connection_id: &str, sql: &str, ) -> TuskResult { let client = state.get_ch_client(connection_id).await?; let plan_sql = format!("EXPLAIN PLAN {}", sql); let qr = client.execute_query(&plan_sql, true).await?; if qr.rows.is_empty() { return Ok("(empty plan)".to_string()); } let mut out = String::from("ClickHouse plan:\n"); for row in &qr.rows { if let Some(cell) = row.first() { if let Some(s) = cell.as_str() { out.push_str(s); out.push('\n'); } } } Ok(out) } // --------------------------------------------------------------------------- // detect_skew (PR2 — Greenplum-only) // --------------------------------------------------------------------------- pub async fn detect_skew_tool( state: &AppState, connection_id: &str, table: &str, ) -> TuskResult { let flavor = state.get_flavor(connection_id).await; if !matches!(flavor, DbFlavor::Greenplum) { return Ok("detect_skew is only available on Greenplum connections.".to_string()); } let active_db = active_db_name(state, connection_id).await.unwrap_or_default(); let (schema, tbl, _raw) = normalise_table_ref(table, &active_db); let qualified = format!("{}.{}", escape_ident(&schema), escape_ident(&tbl)); let sql = format!( "SELECT gp_segment_id, COUNT(*) AS n FROM {} GROUP BY 1 ORDER BY 1", qualified ); let qr = execute_query_core(state, connection_id, &sql).await?; let mut counts: Vec<(i64, i64)> = Vec::new(); for row in &qr.rows { let seg = row .get(0) .and_then(|v| v.as_i64().or_else(|| v.as_str().and_then(|s| s.parse().ok()))) .unwrap_or(0); let n = row .get(1) .and_then(|v| v.as_i64().or_else(|| v.as_str().and_then(|s| s.parse().ok()))) .unwrap_or(0); counts.push((seg, n)); } if counts.is_empty() { return Ok(format!("Table {}.{} is empty.", schema, tbl)); } let total: i64 = counts.iter().map(|(_, n)| *n).sum(); let max = counts.iter().map(|(_, n)| *n).max().unwrap_or(0); let min = counts.iter().map(|(_, n)| *n).min().unwrap_or(0); let avg = total as f64 / counts.len() as f64; let ratio = if avg > 0.0 { max as f64 / avg } else { 0.0 }; let mut out = format!( "Per-segment row distribution for {}.{}\nsegments: {} total rows: {}\nmin: {} max: {} avg: {:.0}\nskew ratio (max/avg): {:.2}", schema, tbl, counts.len(), total, min, max, avg, ratio ); if ratio > 1.5 { out.push_str(" ⚠ uneven distribution\n"); } else { out.push_str(" OK — within 1.5x of average\n"); } let pool = state.get_pool(connection_id).await?; if let Some(policy) = fetch_gp_distribution_for(&pool, &schema, &tbl).await { out.push_str(&format!("\nCurrent policy: {}\n", policy)); if ratio > 1.5 { out.push_str( "Hint: pick a higher-cardinality column. Run profile_table to compare n_distinct.\n", ); } } Ok(out) } /// Fetch the Greenplum DISTRIBUTED BY policy for a single table. Returns None if /// the catalog query fails (non-GP connection, missing privileges, etc.). async fn fetch_gp_distribution_for( pool: &PgPool, schema: &str, table: &str, ) -> Option { let row = sqlx::query( "SELECT COALESCE(\ (SELECT array_agg(a.attname ORDER BY ord.idx) \ FROM regexp_split_to_table(NULLIF(trim(p.distkey::text), ''), ' ') \ WITH ORDINALITY AS ord(attnum_str, idx) \ JOIN pg_attribute a \ ON a.attrelid = c.oid \ AND a.attnum::int = ord.attnum_str::int), \ ARRAY[]::text[] \ ) AS dist_columns \ FROM gp_distribution_policy p \ JOIN pg_class c ON p.localoid = c.oid \ JOIN pg_namespace n ON c.relnamespace = n.oid \ WHERE n.nspname = $1 AND c.relname = $2", ) .bind(schema) .bind(table) .fetch_optional(pool) .await .ok() .flatten()?; let cols: Vec = row.try_get(0).ok()?; Some(if cols.is_empty() { "DISTRIBUTED RANDOMLY".to_string() } else { format!("DISTRIBUTED BY ({})", cols.join(", ")) }) }