Skip to content

Commit ea18bdb

Browse files
Paginate list_tables (#92)
1 parent 21e6cb2 commit ea18bdb

File tree

8 files changed

+593
-47
lines changed

8 files changed

+593
-47
lines changed

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,17 @@ An MCP server for ClickHouse.
1919
* List all databases on your ClickHouse cluster.
2020

2121
* `list_tables`
22-
* List all tables in a database.
23-
* Input: `database` (string): The name of the database.
22+
* List tables in a database with pagination.
23+
* Required input: `database` (string).
24+
* Optional inputs:
25+
* `like` / `not_like` (string): Apply `LIKE` or `NOT LIKE` filters to table names.
26+
* `page_token` (string): Token returned by a previous call for fetching the next page.
27+
* `page_size` (int, default `50`): Number of tables returned per page.
28+
* `include_detailed_columns` (bool, default `true`): When `false`, omits column metadata for lighter responses while keeping the full `create_table_query`.
29+
* Response shape:
30+
* `tables`: Array of table objects for the current page.
31+
* `next_page_token`: Pass this value back to fetch the next page, or `null` when there are no more tables.
32+
* `total_tables`: Total count of tables that match the supplied filters.
2433

2534
### chDB Tools
2635

mcp_clickhouse/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
create_chdb_client,
99
run_chdb_select_query,
1010
chdb_initial_prompt,
11+
table_pagination_cache,
12+
fetch_table_names_from_system,
13+
get_paginated_table_data,
14+
create_page_token,
1115
)
1216

1317

@@ -26,4 +30,8 @@
2630
"create_chdb_client",
2731
"run_chdb_select_query",
2832
"chdb_initial_prompt",
33+
"table_pagination_cache",
34+
"fetch_table_names_from_system",
35+
"get_paginated_table_data",
36+
"create_page_token",
2937
]

mcp_clickhouse/mcp_server.py

Lines changed: 233 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import logging
22
import json
3-
from typing import Optional, List, Any
3+
from typing import Optional, List, Any, Dict
44
import concurrent.futures
55
import atexit
66
import os
7+
import uuid
78

89
import clickhouse_connect
910
import chdb.session as chs
1011
from clickhouse_connect.driver.binding import format_query_value
1112
from dotenv import load_dotenv
1213
from fastmcp import FastMCP
14+
from cachetools import TTLCache
1315
from fastmcp.tools import Tool
1416
from fastmcp.prompts import Prompt
1517
from fastmcp.exceptions import ToolError
@@ -135,36 +137,250 @@ def list_databases():
135137
return json.dumps(databases)
136138

137139

138-
def list_tables(database: str, like: Optional[str] = None, not_like: Optional[str] = None):
139-
"""List available ClickHouse tables in a database, including schema, comment,
140-
row count, and column count."""
141-
logger.info(f"Listing tables in database '{database}'")
142-
client = create_clickhouse_client()
143-
query = f"SELECT database, name, engine, create_table_query, dependencies_database, dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment FROM system.tables WHERE database = {format_query_value(database)}"
140+
# Store pagination state for list_tables with 1-hour expiry
141+
# Using TTLCache from cachetools to automatically expire entries after 1 hour
142+
table_pagination_cache: TTLCache = TTLCache(maxsize=100, ttl=3600) # 3600 seconds = 1 hour
143+
144+
145+
def fetch_table_names_from_system(
146+
client,
147+
database: str,
148+
like: Optional[str] = None,
149+
not_like: Optional[str] = None,
150+
) -> List[str]:
151+
"""Get list of table names from system.tables.
152+
153+
Args:
154+
client: ClickHouse client
155+
database: Database name
156+
like: Optional pattern to filter table names (LIKE)
157+
not_like: Optional pattern to filter out table names (NOT LIKE)
158+
159+
Returns:
160+
List of table names
161+
"""
162+
query = f"SELECT name FROM system.tables WHERE database = {format_query_value(database)}"
144163
if like:
145164
query += f" AND name LIKE {format_query_value(like)}"
146165

147166
if not_like:
148167
query += f" AND name NOT LIKE {format_query_value(not_like)}"
149168

150169
result = client.query(query)
170+
table_names = [row[0] for row in result.result_rows]
171+
return table_names
172+
173+
174+
def get_paginated_table_data(
175+
client,
176+
database: str,
177+
table_names: List[str],
178+
start_idx: int,
179+
page_size: int,
180+
include_detailed_columns: bool = True,
181+
) -> tuple[List[Table], int, bool]:
182+
"""Get detailed information for a page of tables.
183+
184+
Args:
185+
client: ClickHouse client
186+
database: Database name
187+
table_names: List of all table names to paginate
188+
start_idx: Starting index for pagination
189+
page_size: Number of tables per page
190+
include_detailed_columns: Whether to include detailed column metadata (default: True)
191+
192+
Returns:
193+
Tuple of (list of Table objects, end index, has more pages)
194+
"""
195+
end_idx = min(start_idx + page_size, len(table_names))
196+
current_page_table_names = table_names[start_idx:end_idx]
197+
198+
if not current_page_table_names:
199+
return [], end_idx, False
200+
201+
query = f"""
202+
SELECT database, name, engine, create_table_query, dependencies_database,
203+
dependencies_table, engine_full, sorting_key, primary_key, total_rows,
204+
total_bytes, total_bytes_uncompressed, parts, active_parts, total_marks, comment
205+
FROM system.tables
206+
WHERE database = {format_query_value(database)}
207+
AND name IN ({", ".join(format_query_value(name) for name in current_page_table_names)})
208+
"""
151209

152-
# Deserialize result as Table dataclass instances
210+
result = client.query(query)
153211
tables = result_to_table(result.column_names, result.result_rows)
154212

155-
for table in tables:
156-
column_data_query = f"SELECT database, table, name, type AS column_type, default_kind, default_expression, comment FROM system.columns WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"
157-
column_data_query_result = client.query(column_data_query)
158-
table.columns = [
159-
c
160-
for c in result_to_column(
213+
if include_detailed_columns:
214+
for table in tables:
215+
column_data_query = f"""
216+
SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
217+
FROM system.columns
218+
WHERE database = {format_query_value(database)}
219+
AND table = {format_query_value(table.name)}
220+
"""
221+
column_data_query_result = client.query(column_data_query)
222+
table.columns = result_to_column(
161223
column_data_query_result.column_names,
162224
column_data_query_result.result_rows,
163225
)
164-
]
226+
else:
227+
for table in tables:
228+
table.columns = []
229+
230+
return tables, end_idx, end_idx < len(table_names)
231+
232+
233+
def create_page_token(
234+
database: str,
235+
like: Optional[str],
236+
not_like: Optional[str],
237+
table_names: List[str],
238+
end_idx: int,
239+
include_detailed_columns: bool,
240+
) -> str:
241+
"""Create a new page token and store it in the cache.
242+
243+
Args:
244+
database: Database name
245+
like: LIKE pattern used to filter tables
246+
not_like: NOT LIKE pattern used to filter tables
247+
table_names: List of all table names
248+
end_idx: Index to start from for the next page
249+
include_detailed_columns: Whether to include detailed column metadata
250+
251+
Returns:
252+
New page token
253+
"""
254+
token = str(uuid.uuid4())
255+
table_pagination_cache[token] = {
256+
"database": database,
257+
"like": like,
258+
"not_like": not_like,
259+
"table_names": table_names,
260+
"start_idx": end_idx,
261+
"include_detailed_columns": include_detailed_columns,
262+
}
263+
return token
264+
265+
266+
def list_tables(
267+
database: str,
268+
like: Optional[str] = None,
269+
not_like: Optional[str] = None,
270+
page_token: Optional[str] = None,
271+
page_size: int = 50,
272+
include_detailed_columns: bool = True,
273+
) -> Dict[str, Any]:
274+
"""List available ClickHouse tables in a database, including schema, comment,
275+
row count, and column count.
276+
277+
Args:
278+
database: The database to list tables from
279+
like: Optional LIKE pattern to filter table names
280+
not_like: Optional NOT LIKE pattern to exclude table names
281+
page_token: Token for pagination, obtained from a previous call
282+
page_size: Number of tables to return per page (default: 50)
283+
include_detailed_columns: Whether to include detailed column metadata (default: True).
284+
When False, the columns array will be empty but create_table_query still contains
285+
all column information. This reduces payload size for large schemas.
286+
287+
Returns:
288+
A dictionary containing:
289+
- tables: List of table information (as dictionaries)
290+
- next_page_token: Token for the next page, or None if no more pages
291+
- total_tables: Total number of tables matching the filters
292+
"""
293+
logger.info(
294+
"Listing tables in database '%s' with like=%s, not_like=%s, "
295+
"page_token=%s, page_size=%s, include_detailed_columns=%s",
296+
database,
297+
like,
298+
not_like,
299+
page_token,
300+
page_size,
301+
include_detailed_columns,
302+
)
303+
client = create_clickhouse_client()
304+
305+
if page_token and page_token in table_pagination_cache:
306+
cached_state = table_pagination_cache[page_token]
307+
cached_include_detailed = cached_state.get("include_detailed_columns", True)
308+
309+
if (
310+
cached_state["database"] != database
311+
or cached_state["like"] != like
312+
or cached_state["not_like"] != not_like
313+
or cached_include_detailed != include_detailed_columns
314+
):
315+
logger.warning(
316+
"Page token %s is for a different database, filter, or metadata setting. "
317+
"Ignoring token and starting from beginning.",
318+
page_token,
319+
)
320+
page_token = None
321+
else:
322+
table_names = cached_state["table_names"]
323+
start_idx = cached_state["start_idx"]
324+
325+
tables, end_idx, has_more = get_paginated_table_data(
326+
client,
327+
database,
328+
table_names,
329+
start_idx,
330+
page_size,
331+
include_detailed_columns,
332+
)
333+
334+
next_page_token = None
335+
if has_more:
336+
next_page_token = create_page_token(
337+
database, like, not_like, table_names, end_idx, include_detailed_columns
338+
)
339+
340+
del table_pagination_cache[page_token]
341+
342+
logger.info(
343+
"Returned page with %s tables (total: %s), next_page_token=%s",
344+
len(tables),
345+
len(table_names),
346+
next_page_token,
347+
)
348+
return {
349+
"tables": [asdict(table) for table in tables],
350+
"next_page_token": next_page_token,
351+
"total_tables": len(table_names),
352+
}
353+
354+
table_names = fetch_table_names_from_system(client, database, like, not_like)
355+
356+
start_idx = 0
357+
tables, end_idx, has_more = get_paginated_table_data(
358+
client,
359+
database,
360+
table_names,
361+
start_idx,
362+
page_size,
363+
include_detailed_columns,
364+
)
365+
366+
next_page_token = None
367+
if has_more:
368+
next_page_token = create_page_token(
369+
database, like, not_like, table_names, end_idx, include_detailed_columns
370+
)
371+
372+
logger.info(
373+
"Found %s tables, returning %s with next_page_token=%s",
374+
len(table_names),
375+
len(tables),
376+
next_page_token,
377+
)
165378

166-
logger.info(f"Found {len(tables)} tables")
167-
return [asdict(table) for table in tables]
379+
return {
380+
"tables": [asdict(table) for table in tables],
381+
"next_page_token": next_page_token,
382+
"total_tables": len(table_names),
383+
}
168384

169385

170386
def execute_query(query: str):

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ dependencies = [
1212
"clickhouse-connect>=0.8.16",
1313
"truststore>=0.10",
1414
"chdb>=3.3.0",
15+
"cachetools>=5.5.0",
1516
]
1617

1718
[project.scripts]

0 commit comments

Comments
 (0)