|
1 | 1 | import logging |
2 | 2 | import json |
3 | | -from typing import Optional, List, Any |
| 3 | +from typing import Optional, List, Any, Dict |
4 | 4 | import concurrent.futures |
5 | 5 | import atexit |
6 | 6 | import os |
| 7 | +import uuid |
7 | 8 |
|
8 | 9 | import clickhouse_connect |
9 | 10 | import chdb.session as chs |
10 | 11 | from clickhouse_connect.driver.binding import format_query_value |
11 | 12 | from dotenv import load_dotenv |
12 | 13 | from fastmcp import FastMCP |
| 14 | +from cachetools import TTLCache |
13 | 15 | from fastmcp.tools import Tool |
14 | 16 | from fastmcp.prompts import Prompt |
15 | 17 | from fastmcp.exceptions import ToolError |
@@ -135,36 +137,250 @@ def list_databases(): |
135 | 137 | return json.dumps(databases) |
136 | 138 |
|
137 | 139 |
|
138 | | -def list_tables(database: str, like: Optional[str] = None, not_like: Optional[str] = None): |
139 | | - """List available ClickHouse tables in a database, including schema, comment, |
140 | | - row count, and column count.""" |
141 | | - logger.info(f"Listing tables in database '{database}'") |
142 | | - client = create_clickhouse_client() |
143 | | - query = f"SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment FROM system.tables WHERE database = {format_query_value(database)}" |
| 140 | +# Store pagination state for list_tables with 1-hour expiry |
| 141 | +# Using TTLCache from cachetools to automatically expire entries after 1 hour |
| 142 | +table_pagination_cache: TTLCache = TTLCache(maxsize=100, ttl=3600) # 3600 seconds = 1 hour |
| 143 | + |
| 144 | + |
| 145 | +def fetch_table_names_from_system( |
| 146 | + client, |
| 147 | + database: str, |
| 148 | + like: Optional[str] = None, |
| 149 | + not_like: Optional[str] = None, |
| 150 | +) -> List[str]: |
| 151 | + """Get list of table names from system.tables. |
| 152 | +
|
| 153 | + Args: |
| 154 | + client: ClickHouse client |
| 155 | + database: Database name |
| 156 | + like: Optional pattern to filter table names (LIKE) |
| 157 | + not_like: Optional pattern to filter out table names (NOT LIKE) |
| 158 | +
|
| 159 | + Returns: |
| 160 | + List of table names |
| 161 | + """ |
| 162 | + query = f"SELECT name FROM system.tables WHERE database = {format_query_value(database)}" |
144 | 163 | if like: |
145 | 164 | query += f" AND name LIKE {format_query_value(like)}" |
146 | 165 |
|
147 | 166 | if not_like: |
148 | 167 | query += f" AND name NOT LIKE {format_query_value(not_like)}" |
149 | 168 |
|
150 | 169 | result = client.query(query) |
| 170 | + table_names = [row[0] for row in result.result_rows] |
| 171 | + return table_names |
| 172 | + |
| 173 | + |
| 174 | +def get_paginated_table_data( |
| 175 | + client, |
| 176 | + database: str, |
| 177 | + table_names: List[str], |
| 178 | + start_idx: int, |
| 179 | + page_size: int, |
| 180 | + include_detailed_columns: bool = True, |
| 181 | +) -> tuple[List[Table], int, bool]: |
| 182 | + """Get detailed information for a page of tables. |
| 183 | +
|
| 184 | + Args: |
| 185 | + client: ClickHouse client |
| 186 | + database: Database name |
| 187 | + table_names: List of all table names to paginate |
| 188 | + start_idx: Starting index for pagination |
| 189 | + page_size: Number of tables per page |
| 190 | + include_detailed_columns: Whether to include detailed column metadata (default: True) |
| 191 | +
|
| 192 | + Returns: |
| 193 | + Tuple of (list of Table objects, end index, has more pages) |
| 194 | + """ |
| 195 | + end_idx = min(start_idx + page_size, len(table_names)) |
| 196 | + current_page_table_names = table_names[start_idx:end_idx] |
| 197 | + |
| 198 | + if not current_page_table_names: |
| 199 | + return [], end_idx, False |
| 200 | + |
| 201 | + query = f""" |
| 202 | + SELECT database, name, engine, create_table_query, dependencies_database, |
| 203 | + dependencies_table, engine_full, sorting_key, primary_key, total_rows, |
| 204 | + total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment |
| 205 | + FROM system.tables |
| 206 | + WHERE database = {format_query_value(database)} |
| 207 | + AND name IN ({", ".join(format_query_value(name) for name in current_page_table_names)}) |
| 208 | + """ |
151 | 209 |
|
152 | | - # Deserialize result as Table dataclass instances |
| 210 | + result = client.query(query) |
153 | 211 | tables = result_to_table(result.column_names, result.result_rows) |
154 | 212 |
|
155 | | - for table in tables: |
156 | | - column_data_query = f"SELECT database, table, name, type AS column_type, default_kind, default_expression, comment FROM system.columns WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}" |
157 | | - column_data_query_result = client.query(column_data_query) |
158 | | - table.columns = [ |
159 | | - c |
160 | | - for c in result_to_column( |
| 213 | + if include_detailed_columns: |
| 214 | + for table in tables: |
| 215 | + column_data_query = f""" |
| 216 | + SELECT database, table, name, type AS column_type, default_kind, default_expression, comment |
| 217 | + FROM system.columns |
| 218 | + WHERE database = {format_query_value(database)} |
| 219 | + AND table = {format_query_value(table.name)} |
| 220 | + """ |
| 221 | + column_data_query_result = client.query(column_data_query) |
| 222 | + table.columns = result_to_column( |
161 | 223 | column_data_query_result.column_names, |
162 | 224 | column_data_query_result.result_rows, |
163 | 225 | ) |
164 | | - ] |
| 226 | + else: |
| 227 | + for table in tables: |
| 228 | + table.columns = [] |
| 229 | + |
| 230 | + return tables, end_idx, end_idx < len(table_names) |
| 231 | + |
| 232 | + |
| 233 | +def create_page_token( |
| 234 | + database: str, |
| 235 | + like: Optional[str], |
| 236 | + not_like: Optional[str], |
| 237 | + table_names: List[str], |
| 238 | + end_idx: int, |
| 239 | + include_detailed_columns: bool, |
| 240 | +) -> str: |
| 241 | + """Create a new page token and store it in the cache. |
| 242 | +
|
| 243 | + Args: |
| 244 | + database: Database name |
| 245 | + like: LIKE pattern used to filter tables |
| 246 | + not_like: NOT LIKE pattern used to filter tables |
| 247 | + table_names: List of all table names |
| 248 | + end_idx: Index to start from for the next page |
| 249 | + include_detailed_columns: Whether to include detailed column metadata |
| 250 | +
|
| 251 | + Returns: |
| 252 | + New page token |
| 253 | + """ |
| 254 | + token = str(uuid.uuid4()) |
| 255 | + table_pagination_cache[token] = { |
| 256 | + "database": database, |
| 257 | + "like": like, |
| 258 | + "not_like": not_like, |
| 259 | + "table_names": table_names, |
| 260 | + "start_idx": end_idx, |
| 261 | + "include_detailed_columns": include_detailed_columns, |
| 262 | + } |
| 263 | + return token |
| 264 | + |
| 265 | + |
| 266 | +def list_tables( |
| 267 | + database: str, |
| 268 | + like: Optional[str] = None, |
| 269 | + not_like: Optional[str] = None, |
| 270 | + page_token: Optional[str] = None, |
| 271 | + page_size: int = 50, |
| 272 | + include_detailed_columns: bool = True, |
| 273 | +) -> Dict[str, Any]: |
| 274 | + """List available ClickHouse tables in a database, including schema, comment, |
| 275 | + row count, and column count. |
| 276 | +
|
| 277 | + Args: |
| 278 | + database: The database to list tables from |
| 279 | + like: Optional LIKE pattern to filter table names |
| 280 | + not_like: Optional NOT LIKE pattern to exclude table names |
| 281 | + page_token: Token for pagination, obtained from a previous call |
| 282 | + page_size: Number of tables to return per page (default: 50) |
| 283 | + include_detailed_columns: Whether to include detailed column metadata (default: True). |
| 284 | + When False, the columns array will be empty but create_table_query still contains |
| 285 | + all column information. This reduces payload size for large schemas. |
| 286 | +
|
| 287 | + Returns: |
| 288 | + A dictionary containing: |
| 289 | + - tables: List of table information (as dictionaries) |
| 290 | + - next_page_token: Token for the next page, or None if no more pages |
| 291 | + - total_tables: Total number of tables matching the filters |
| 292 | + """ |
| 293 | + logger.info( |
| 294 | + "Listing tables in database '%s' with like=%s, not_like=%s, " |
| 295 | + "page_token=%s, page_size=%s, include_detailed_columns=%s", |
| 296 | + database, |
| 297 | + like, |
| 298 | + not_like, |
| 299 | + page_token, |
| 300 | + page_size, |
| 301 | + include_detailed_columns, |
| 302 | + ) |
| 303 | + client = create_clickhouse_client() |
| 304 | + |
| 305 | + if page_token and page_token in table_pagination_cache: |
| 306 | + cached_state = table_pagination_cache[page_token] |
| 307 | + cached_include_detailed = cached_state.get("include_detailed_columns", True) |
| 308 | + |
| 309 | + if ( |
| 310 | + cached_state["database"] != database |
| 311 | + or cached_state["like"] != like |
| 312 | + or cached_state["not_like"] != not_like |
| 313 | + or cached_include_detailed != include_detailed_columns |
| 314 | + ): |
| 315 | + logger.warning( |
| 316 | + "Page token %s is for a different database, filter, or metadata setting. " |
| 317 | + "Ignoring token and starting from beginning.", |
| 318 | + page_token, |
| 319 | + ) |
| 320 | + page_token = None |
| 321 | + else: |
| 322 | + table_names = cached_state["table_names"] |
| 323 | + start_idx = cached_state["start_idx"] |
| 324 | + |
| 325 | + tables, end_idx, has_more = get_paginated_table_data( |
| 326 | + client, |
| 327 | + database, |
| 328 | + table_names, |
| 329 | + start_idx, |
| 330 | + page_size, |
| 331 | + include_detailed_columns, |
| 332 | + ) |
| 333 | + |
| 334 | + next_page_token = None |
| 335 | + if has_more: |
| 336 | + next_page_token = create_page_token( |
| 337 | + database, like, not_like, table_names, end_idx, include_detailed_columns |
| 338 | + ) |
| 339 | + |
| 340 | + del table_pagination_cache[page_token] |
| 341 | + |
| 342 | + logger.info( |
| 343 | + "Returned page with %s tables (total: %s), next_page_token=%s", |
| 344 | + len(tables), |
| 345 | + len(table_names), |
| 346 | + next_page_token, |
| 347 | + ) |
| 348 | + return { |
| 349 | + "tables": [asdict(table) for table in tables], |
| 350 | + "next_page_token": next_page_token, |
| 351 | + "total_tables": len(table_names), |
| 352 | + } |
| 353 | + |
| 354 | + table_names = fetch_table_names_from_system(client, database, like, not_like) |
| 355 | + |
| 356 | + start_idx = 0 |
| 357 | + tables, end_idx, has_more = get_paginated_table_data( |
| 358 | + client, |
| 359 | + database, |
| 360 | + table_names, |
| 361 | + start_idx, |
| 362 | + page_size, |
| 363 | + include_detailed_columns, |
| 364 | + ) |
| 365 | + |
| 366 | + next_page_token = None |
| 367 | + if has_more: |
| 368 | + next_page_token = create_page_token( |
| 369 | + database, like, not_like, table_names, end_idx, include_detailed_columns |
| 370 | + ) |
| 371 | + |
| 372 | + logger.info( |
| 373 | + "Found %s tables, returning %s with next_page_token=%s", |
| 374 | + len(table_names), |
| 375 | + len(tables), |
| 376 | + next_page_token, |
| 377 | + ) |
165 | 378 |
|
166 | | - logger.info(f"Found {len(tables)} tables") |
167 | | - return [asdict(table) for table in tables] |
| 379 | + return { |
| 380 | + "tables": [asdict(table) for table in tables], |
| 381 | + "next_page_token": next_page_token, |
| 382 | + "total_tables": len(table_names), |
| 383 | + } |
168 | 384 |
|
169 | 385 |
|
170 | 386 | def execute_query(query: str): |
|
0 commit comments