Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ quartodoc:
- GT.cols_align
- GT.cols_width
- GT.cols_label
- GT.cols_label_with
- GT.cols_move
- GT.cols_move_to_start
- GT.cols_move_to_end
Expand Down
90 changes: 72 additions & 18 deletions great_tables/_boxhead.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Callable, TYPE_CHECKING

from ._locations import resolve_cols_c
from ._utils import _assert_list_is_subset
from ._utils import _assert_list_is_subset, _handle_units_syntax
from ._tbl_data import SelectExpr
from ._text import BaseText

Expand Down Expand Up @@ -114,8 +114,6 @@ def cols_label(
)
```
"""
from great_tables._helpers import UnitStr

cases = cases if cases is not None else {}
new_cases = cases | kwargs

Expand All @@ -132,24 +130,80 @@ def cols_label(
_assert_list_is_subset(mod_columns, set_list=column_names)

# Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})")
new_kwargs: dict[str, UnitStr | str | BaseText] = {}
new_kwargs = _handle_units_syntax(new_cases)

boxhead = self._boxhead._set_column_labels(new_kwargs)

return self._replace(_boxhead=boxhead)


def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr = None) -> GTSelf:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From pairing w/ Rich, @jrycw WDYT of switching the order of the parameters, so columns is before fn?

Looking through the cols_*() methods in the R library, column selection usually comes first. In the case of cols_align() where it doesn't, it sounds like it's a historical artifact. Keeping column first might help cement a pattern for cols_*() methods.

It makes sense you put it first in the PR, since it doesn't have a default. WDYT of us setting it to fn = None, and then erroring if fn is None?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@machow, nice suggestion! I definitely like your idea of introducing fn= to cols_label() using Polars syntax.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks to @machow for the comment! I think this is a good opportunity to introduce fn= to accept a callable in cols_label() as well.

By the way, I noticed that we have two test files for boxhead, test_boxhead.py and test__boxhead.py. I picked one to add the new test, but perhaps we should consider consolidating them in the future.

Copy link
Collaborator Author

@jrycw jrycw Mar 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a side note, we might consider using the pl.Expr.name attribute to handle Polars syntax. For example:

df_names.select(cs.starts_with("my").name.to_uppercase())
shape: (1, 2)
┌────────┬─────────┐
│ MY_COLMY_COL2 │
│ ------     │
│ strstr     │
╞════════╪═════════╡
│ my_colmy_col2 │
└────────┴─────────┘

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I've attempted to implement this in the latest commit. I admit it's a bit of a bold move, so feel free to set it aside for now.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here’s a simple example:

import polars as pl
import polars.selectors as cs
from great_tables import GT

df_names = pl.DataFrame({"my_col": ["my_col"], "my_col2": ["my_col2"]})

GT(df_names).cols_label_with(converter=cs.starts_with("my").name.to_uppercase())

image

Copy link
Collaborator Author

@jrycw jrycw Mar 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When passing a list of Polars expressions, if a column is referenced multiple times, we can either superimpose the transformations or follow a "last one wins" approach (current implementation).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for experimenting with this. If it's okay with you, I think we need a little bit more time to chew on this approach / run it past some folks, since it's a bit cutting edge in terms of external libraries using Polars selectors. I definitely think it'll be useful to figure out in the long run though

"""
Relabel one or more columns using a function.

The `cols_label_with()` function allows for modification of column labels through a supplied
function. By default, the function will be invoked on all column labels but this can be limited
to a subset via the `columns` parameter.

Parameters
----------
fn
A function that accepts a column label as input and returns a transformed label as output.

columns
The columns to target. Can either be a single column name or a series of column names
provided in a list.

Returns
-------
GT
The GT object is returned. This is the same object that the method is called on so that we
can facilitate method chaining.

Notes
-----
GT always selects columns using their name in the underlying data. This means that a column's
label is purely for final presentation.

for k, v in new_cases.items():
if isinstance(v, str):
unitstr_v = UnitStr.from_str(v)
Examples
--------
Let's use a subset of the `sp500` dataset to create a gt table.
```{python}
from great_tables import GT, md
from great_tables.data import sp500

if len(unitstr_v.units_str) == 1 and isinstance(unitstr_v.units_str[0], str):
new_kwargs[k] = unitstr_v.units_str[0]
else:
new_kwargs[k] = unitstr_v
gt = GT(sp500.head())
gt
```

elif isinstance(v, BaseText):
new_kwargs[k] = v
We can pass `str.upper()` to the `columns` parameter to convert all column labels to uppercase.
```{python}
gt.cols_label_with(str.upper)
```

One useful use case is using `md()`, provided by **Great Tables**, to format column labels.
For example, the following code demonstrates how to make the `date` and `adj_close` column labels
bold using markdown syntax.
```{python}
gt.cols_label_with(lambda x: md(f"**{x}**"), columns=["date", "adj_close"])
```

else:
raise ValueError(
"Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting."
)
"""
# Get the full list of column names for the data
column_names = self._boxhead._get_columns()

if isinstance(columns, str):
columns = [columns]
_assert_list_is_subset(columns, set_list=column_names)
elif columns is None:
columns = column_names

sel_cols = resolve_cols_c(data=self, expr=columns)

new_cases = {col: fn(col) for col in sel_cols}

# Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})")
new_kwargs = _handle_units_syntax(new_cases)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the R package doesn't automatically apply unit syntax here. My understanding is that this makes .cols_label_with() a very plain custom function applier. WDYT of not handling unit syntax here?

We could tell people they can make it happen with define_units() etc.. so hopefully they'd be able to make it happen if they need it

I'm still getting a feel for some of these dynamics, so could be totally off

Copy link
Collaborator Author

@jrycw jrycw Mar 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@machow, no worries—happy to work on this. Regarding define_units(), I’m having some difficulty describing it since its definition and use cases are still a bit unclear to me (e.g., #446). Maybe you could help clarify this in the documentation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shoot, I can see how this is confusing. I'll take a look at the linked PR, and think about what might put units more front and center. I think it's okay for now to just omit any mention of units in this docstring.


boxhead = self._boxhead._set_column_labels(new_kwargs)

Expand Down
24 changes: 24 additions & 0 deletions great_tables/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from types import ModuleType
from typing import TYPE_CHECKING, Any, Iterable, Iterator

from ._helpers import UnitStr
from ._tbl_data import _get_cell, _set_cell, get_column_names, n_rows
from ._text import BaseText, _process_text

Expand Down Expand Up @@ -285,3 +286,26 @@ def _get_visible_cells(data: TblData) -> list[tuple[str, int]]:

def is_valid_http_schema(url: str) -> bool:
return url.startswith("http://") or url.startswith("https://")


def _handle_units_syntax(cases: dict[str, str | BaseText]) -> dict[str, UnitStr | str | BaseText]:
# Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})")
kwargs: dict[str, UnitStr | str | BaseText] = {}

for k, v in cases.items():
if isinstance(v, str):
unitstr_v = UnitStr.from_str(v)

if len(unitstr_v.units_str) == 1 and isinstance(unitstr_v.units_str[0], str):
kwargs[k] = unitstr_v.units_str[0]
else:
kwargs[k] = unitstr_v

elif isinstance(v, BaseText):
kwargs[k] = v

else:
raise ValueError(
"Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting."
)
return kwargs
3 changes: 2 additions & 1 deletion great_tables/gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# Main gt imports ----
from ._body import body_reassemble
from ._boxhead import cols_align, cols_label
from ._boxhead import cols_align, cols_label, cols_label_with
from ._data_color import data_color
from ._export import as_latex, as_raw_html, save, show, write_raw_html
from ._formats import (
Expand Down Expand Up @@ -253,6 +253,7 @@ def __init__(
cols_align = cols_align
cols_width = cols_width
cols_label = cols_label
cols_label_with = cols_label_with
cols_move = cols_move
cols_move_to_start = cols_move_to_start
cols_move_to_end = cols_move_to_end
Expand Down
27 changes: 27 additions & 0 deletions tests/test__boxhead.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,33 @@ def test_cols_label_return_self_if_no_kwargs():
assert isinstance(unmodified_table, gt.GT)


def test_cols_label_with_relabel_columns():
# Create a table with default column labels
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
table = gt.GT(df)

# Relabel the columns
modified_table = table.cols_label_with(str.lower)

# Check that the column labels have been updated
assert modified_table._boxhead._get_column_labels() == ["a", "b"]


def test_cols_label_with_relabel_columns_with_markdown():
# Create a table with default column labels
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
table = gt.GT(df)

# Relabel a column with a Markdown formatted label
modified_table = table.cols_label_with(lambda x: gt.md(f"**{x}**"), columns="A")

# Check that the column label has been updated with Markdown formatting
modified_column_labels = modified_table._boxhead._get_column_labels()

assert modified_column_labels[0].text == "**A**"
assert modified_column_labels[1] == "B"


def test_cols_align_default():
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
table = gt.GT(df)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_assert_str_list,
_assert_str_scalar,
_collapse_list_elements,
_handle_units_syntax,
_insert_into_list,
_match_arg,
_migrate_unformatted_to_output,
Expand Down Expand Up @@ -224,3 +225,21 @@ def test_migrate_unformatted_to_output_html():
)
def test_is_valid_http_schema(url: str):
assert is_valid_http_schema(url)


def test_handle_units_syntax():
from great_tables._text import BaseText, Text

new_kwargs = _handle_units_syntax({"column_label_1": "abc", "column_label_2": Text(text="xyz")})

assert all(isinstance(v, (str, BaseText)) for v in new_kwargs.values())


def test_handle_units_syntax_raises():
with pytest.raises(ValueError) as exc_info:
_handle_units_syntax({"column_label": 123})

assert (
"Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting."
in exc_info.value.args[0]
)
Loading