Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ Paracelsus generates Entity Relationship Diagrams by reading your SQLAlchemy mod
- [Installation](#installation)
- [Basic CLI Usage](#basic-cli-usage)
- [Importing Models](#importing-models)
- [Include or Exclude tables](#include-or-exclude-tables)
- [Specify Column Sort Order](#specify-column-sort-order)
- [Omit Comments](#omit-comments)
- [Type Parameter Delimiter](#type-parameter-delimiter)
- [Generate Mermaid Diagrams](#generate-mermaid-diagrams)
- [Inject Mermaid Diagrams](#inject-mermaid-diagrams)
- [Creating Images](#creating-images)
- [pyproject.toml](#pyprojecttoml)
- [Alternative config files](#alternative-config-files)
- [Sponsorship](#sponsorship)

## Features
Expand Down Expand Up @@ -129,6 +134,24 @@ paracelsus graph example_app.models.base:Base \

By default, SQLAlchemy column comments are included in the generated mermaid diagrams. You can omit these comments using the `--omit-comments` flag, which [might improve](https://github.com/tedivm/paracelsus/issues/32) legibility.

### Type Parameter Delimiter

Some SQLAlchemy column types include parameters with commas, such as `NUMERIC(10, 2)` or `DECIMAL(8, 3)`. Since Mermaid's ER diagram parser uses commas as structural separators for attribute keys (PK, FK, UK), these commas can break diagram rendering.

Paracelsus automatically handles this by replacing commas in type parameters with a delimiter. By default, it uses a hyphen (`-`), converting `NUMERIC(10, 2)` to `NUMERIC(10-2)`.

You can customize this delimiter using the `--type-parameter-delimiter` option:

```bash
paracelsus graph example_app.models.base:Base \
--import-module "example_app.models.users" \
--type-parameter-delimiter "_"
```

This would convert `NUMERIC(10, 2)` to `NUMERIC(10_2)`.

**Note:** The delimiter cannot contain commas or spaces, as these characters would cause the same parsing issues in Mermaid diagrams.

### Generate Mermaid Diagrams


Expand Down Expand Up @@ -243,7 +266,7 @@ imports = [
]
```

This also allows users to set excludes, includes, and column sorting.
This also allows users to set excludes, includes, column sorting, and type parameter delimiter.

```toml
[tool.paracelsus]
Expand All @@ -257,6 +280,7 @@ exclude_tables = [
column_sort = "preserve-order"
omit_comments = false
max_enum_members = 10
type_parameter_delimiter = "-" # Default is hyphen, cannot contain commas or spaces
```

### Alternative config files
Expand Down
32 changes: 28 additions & 4 deletions paracelsus/cli.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import re
import sys
from dataclasses import asdict
from pathlib import Path
from textwrap import dedent
from typing import List, Optional

import typer
from typing_extensions import Annotated
from dataclasses import asdict

from paracelsus.config import (
Formats,
MAX_ENUM_MEMBERS_DEFAULT,
SORT_DEFAULT,
ColumnSorts,
Formats,
Layouts,
ParacelsusSettingsForGraph,
ParacelsusSettingsForInject,
MAX_ENUM_MEMBERS_DEFAULT,
SORT_DEFAULT,
)

from .graph import get_graph_string, transformers
from .pyproject import get_pyproject_settings

Expand Down Expand Up @@ -112,6 +114,14 @@ def graph(
help="Specifies the layout of the diagram. Only applicable for mermaid format.",
),
] = None,
type_parameter_delimiter: Annotated[
Optional[str],
typer.Option(
"--type-parameter-delimiter",
help="Delimiter to use for type parameters in mermaid diagrams (e.g., NUMERIC(10-2)). Cannot contain commas or spaces.",
show_default="-",
),
] = None,
):
settings = get_pyproject_settings(config_file=config)

Expand All @@ -126,6 +136,9 @@ def graph(
omit_comments=omit_comments if omit_comments is not None else settings.omit_comments,
max_enum_members=max_enum_members if max_enum_members is not None else settings.max_enum_members,
layout=layout,
type_parameter_delimiter=type_parameter_delimiter
if type_parameter_delimiter is not None
else settings.type_parameter_delimiter,
)

graph_string = get_graph_string(
Expand Down Expand Up @@ -232,6 +245,14 @@ def inject(
help="Specifies the layout of the diagram. Only applicable for mermaid format.",
),
] = None,
type_parameter_delimiter: Annotated[
Optional[str],
typer.Option(
"--type-parameter-delimiter",
help="Delimiter to use for type parameters in mermaid diagrams (e.g., NUMERIC(10-2)). Cannot contain commas or spaces.",
show_default="-",
),
] = None,
):
settings = get_pyproject_settings(config_file=config)

Expand All @@ -247,6 +268,9 @@ def inject(
omit_comments=omit_comments if omit_comments is not None else settings.omit_comments,
max_enum_members=max_enum_members if max_enum_members is not None else settings.max_enum_members,
layout=layout,
type_parameter_delimiter=type_parameter_delimiter
if type_parameter_delimiter is not None
else settings.type_parameter_delimiter,
),
file=file,
replace_begin_tag=replace_begin_tag,
Expand Down
5 changes: 4 additions & 1 deletion paracelsus/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
from enum import Enum
from dataclasses import field, dataclass
from pathlib import Path
from typing import Final

Expand All @@ -24,6 +24,7 @@ class Layouts(str, Enum):
SORT_DEFAULT: Final[ColumnSorts] = ColumnSorts.key_based
OMIT_COMMENTS_DEFAULT: Final[bool] = False
MAX_ENUM_MEMBERS_DEFAULT: Final[int] = 3
TYPE_PARAMETER_DELIMITER_DEFAULT: Final[str] = "-"


def validate_layout(*, format: Formats, layout: Layouts | None) -> None:
Expand All @@ -46,6 +47,7 @@ class ParacelsusTomlConfig:
column_sort: ColumnSorts = SORT_DEFAULT
omit_comments: bool = OMIT_COMMENTS_DEFAULT
max_enum_members: int = MAX_ENUM_MEMBERS_DEFAULT
type_parameter_delimiter: str = TYPE_PARAMETER_DELIMITER_DEFAULT


@dataclass(frozen=True)
Expand All @@ -66,6 +68,7 @@ class ParacelsusSettingsForGraph:
omit_comments: bool
max_enum_members: int
layout: Layouts | None
type_parameter_delimiter: str

def __post_init__(self) -> None:
validate_layout(format=self.format, layout=self.layout)
Expand Down
20 changes: 16 additions & 4 deletions paracelsus/graph.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import importlib
import os
import re
import sys
from pathlib import Path
import re
from typing import List, Set, Optional, Dict, Union
from typing import Dict, List, Optional, Set, Union

from sqlalchemy.schema import MetaData

Expand Down Expand Up @@ -31,6 +31,7 @@ def get_graph_string(
omit_comments: bool = False,
max_enum_members: int = 0,
layout: Optional[Layouts] = None,
type_parameter_delimiter: str = "-",
) -> str:
# Update the PYTHON_PATH to allow more module imports.
sys.path.append(str(os.getcwd()))
Expand All @@ -57,7 +58,6 @@ def get_graph_string(
# Grab a transformer.
if format not in transformers:
raise ValueError(f"Unknown Format: {format}")
transformer = transformers[format]

# Keep only the tables which were included / not-excluded
include_tables = resolve_included_tables(
Expand All @@ -66,7 +66,19 @@ def get_graph_string(
filtered_metadata = filter_metadata(metadata=metadata, include_tables=include_tables)

# Save the graph structure to string.
return str(transformer(filtered_metadata, column_sort, omit_comments=omit_comments, layout=layout))
# Note: type_parameter_delimiter only applies to Mermaid transformer
if format in ["mermaid", "mmd"]:
return str(
Mermaid(
filtered_metadata,
column_sort,
omit_comments=omit_comments,
layout=layout,
type_parameter_delimiter=type_parameter_delimiter,
)
)
else:
return str(Dot(filtered_metadata, column_sort, omit_comments=omit_comments))


def resolve_included_tables(
Expand Down
44 changes: 42 additions & 2 deletions paracelsus/transformers/mermaid.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,51 @@
import logging
import re
import textwrap
from typing import Optional

import sqlalchemy
from sqlalchemy.sql.schema import Column, MetaData, Table

from .utils import sort_columns, is_unique
from paracelsus.config import Layouts

from .utils import is_unique, sort_columns

logger = logging.getLogger(__name__)


def sanitize_type_for_mermaid(type_str: str, delimiter: str = "-") -> str:
"""Replace commas in type parameters with a delimiter for Mermaid compatibility.

Args:
type_str: The type string to sanitize (e.g., "NUMERIC(10, 2)")
delimiter: The delimiter to use instead of commas (default: "-")

Returns:
Sanitized type string (e.g., "NUMERIC(10-2)")

Raises:
ValueError: If delimiter contains comma or space characters

Note:
Mermaid ER diagrams use commas as structural separators for attribute keys
(PK, FK, UK), so commas in type parameters break the parser. This function
replaces commas with a safe delimiter. See GitHub issue #51.
"""
if "," in delimiter or " " in delimiter:
raise ValueError(f"Type parameter delimiter cannot contain commas or spaces, got: {delimiter!r}")

# Replace commas (with optional surrounding spaces) in parentheses with delimiter
return re.sub(r"\(([^)]*),\s*([^)]*)\)", rf"(\1{delimiter}\2)", str(type_str))


class Mermaid:
comment_format: str = "mermaid"
metadata: MetaData
column_sort: str
omit_comments: bool
max_enum_members: int
layout: Optional[Layouts]
type_parameter_delimiter: str

def __init__(
self,
Expand All @@ -26,13 +54,21 @@ def __init__(
omit_comments: bool = False,
max_enum_members: int = 0,
layout: Optional[Layouts] = None,
type_parameter_delimiter: str = "-",
) -> None:
self.metadata = metaclass
self.column_sort = column_sort
self.omit_comments = omit_comments
self.max_enum_members = max_enum_members
self.layout: Optional[Layouts] = layout

# Validate delimiter doesn't contain commas or spaces
if "," in type_parameter_delimiter or " " in type_parameter_delimiter:
raise ValueError(
f"Type parameter delimiter cannot contain commas or spaces, got: {type_parameter_delimiter!r}"
)
self.type_parameter_delimiter = type_parameter_delimiter

def _table(self, table: Table) -> str:
output = f" {table.name}"
output += " {\n"
Expand All @@ -46,7 +82,10 @@ def _column(self, column: Column) -> str:
options = []
col_type = column.type
is_enum = isinstance(col_type, sqlalchemy.Enum)
column_str = f"ENUM {column.name}" if is_enum else f"{col_type} {column.name}"

# Sanitize type string to replace commas with delimiter (GitHub issue #51)
sanitized_type = sanitize_type_for_mermaid(str(col_type), self.type_parameter_delimiter)
column_str = f"ENUM {column.name}" if is_enum else f"{sanitized_type} {column.name}"

if column.primary_key:
if len(column.foreign_keys) > 0:
Expand Down Expand Up @@ -143,3 +182,4 @@ def __str__(self) -> str:
output += self._relationships(column)

return output
return output
23 changes: 22 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from typing import Literal
import pytest

import pytest
from typer.testing import CliRunner

from paracelsus.cli import app
Expand Down Expand Up @@ -306,3 +306,24 @@ def test_inject_cardinalities_mermaid(package_path: Path, expected_mermaid_cardi
generated_readme = (package_path / "README.md").read_text()

assert generated_readme == expected_mermaid_cardinalities_graph


def test_graph_with_custom_type_delimiter(package_path: Path):
"""Test that custom type parameter delimiter works via CLI."""
result = runner.invoke(
app,
[
"graph",
"example.base:Base",
"--import-module",
"example.models",
"--python-dir",
str(package_path),
"--type-parameter-delimiter",
"_",
],
)

assert result.exit_code == 0, result.output
# The output should be valid mermaid
mermaid_assert(result.stdout)
14 changes: 12 additions & 2 deletions tests/transformers/test_dot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ def test_dot(metaclass):
dot_assert(graph_string)


def test_dot_column_sort_preserve_order(metaclass, dot_full_string_preseve_column_sort):
def test_dot_column_sort_preserve_order(metaclass):
dot = Dot(metaclass=metaclass, column_sort="preserve-order")
assert str(dot) == dot_full_string_preseve_column_sort
graph_string = str(dot)

# Verify structure and relationships are correct
dot_assert(graph_string)

# Verify preserve-order specific column ordering
# In preserve-order mode, columns should appear in the order they're defined
# users: id, display_name, created
assert graph_string.index("users") < graph_string.index("id")
assert graph_string.index("id") < graph_string.index("display_name")
assert graph_string.index("display_name") < graph_string.index("created")
Loading