Skip to content

Commit 57e47c2

Browse files
authored
clean up the string output in read_nexus (#712)
1 parent 14a5c88 commit 57e47c2

File tree

5 files changed

+376
-223
lines changed

5 files changed

+376
-223
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
repos:
22
- repo: https://github.com/astral-sh/ruff-pre-commit
33
# Ruff version.
4-
rev: v0.12.0
4+
rev: v0.14.0
55
hooks:
66
# Run the linter.
77
- id: ruff

src/pynxtools/nexus/nexus.py

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,91 @@ def get_inherited_hdf_nodes(
425425
return (class_path, nxdl_elem_path, elist)
426426

427427

428+
def safe_str(value, precision: int = 8) -> str:
429+
"""Return a deterministic string representation of arrays, lists, or scalars.
430+
431+
Floats are formatted consistently across systems to ensure deterministic
432+
output. Special handling is applied to simplify representation:
433+
- `0.0` → `'0.0'`
434+
- `1.0` → `'1'`
435+
- `1.50` → `'1.5'`
436+
- Non-integer floats keep up to `precision` decimals with trailing zeros
437+
and dots removed.
438+
439+
Arrays and lists are formatted elementwise using the same rules.
440+
441+
Args:
442+
value: The input value to format. Can be a scalar, list, tuple,
443+
NumPy array, or basic type such as int, float, str, or bytes.
444+
precision (int): Maximum number of decimal places for non-integer
445+
floats. Defaults to 8.
446+
447+
Returns:
448+
str: Deterministic string representation of the input.
449+
"""
450+
# Normalize NumPy scalar and 0D array types
451+
if isinstance(value, np.generic):
452+
value = value.item()
453+
elif isinstance(value, np.ndarray) and value.shape == ():
454+
value = value.item()
455+
456+
def format_float(value: float) -> str:
457+
"""Format a float deterministically."""
458+
if value == 0.0:
459+
return "0.0"
460+
if value.is_integer():
461+
return str(int(value))
462+
if abs(value) < 10**-precision or abs(value) >= 10 ** (precision + 1):
463+
return f"{value:.{precision}e}"
464+
return f"{value:.{precision}f}".rstrip("0").rstrip(".")
465+
466+
# --- Arrays ---
467+
if isinstance(value, np.ndarray):
468+
flat = value.flatten()
469+
formatted = []
470+
for v in flat:
471+
if isinstance(v, (np.generic, np.ndarray)):
472+
v = v.item()
473+
if isinstance(v, float):
474+
formatted.append(format_float(v))
475+
elif isinstance(v, (int, bool)):
476+
formatted.append(str(v))
477+
elif isinstance(v, str):
478+
formatted.append(v)
479+
elif isinstance(v, bytes):
480+
formatted.append(v.decode(errors="replace"))
481+
else:
482+
formatted.append(str(v))
483+
reshaped = np.array(formatted, dtype=object).reshape(value.shape)
484+
return np.array2string(
485+
reshaped,
486+
separator=", ",
487+
formatter={"all": lambda x: str(x)},
488+
max_line_width=1000000,
489+
threshold=6,
490+
)
491+
492+
# --- Lists / tuples ---
493+
if isinstance(value, list | tuple):
494+
formatted = [safe_str(v, precision) for v in value]
495+
return "[" + ", ".join(formatted) + "]"
496+
497+
# --- Floats ---
498+
if isinstance(value, float | np.floating):
499+
return format_float(float(value))
500+
501+
# --- Integers / booleans ---
502+
elif isinstance(value, (int, np.integer, bool, np.bool_)):
503+
return str(value)
504+
505+
# --- Strings / bytes ---
506+
elif isinstance(value, (bytes, str)):
507+
return value if isinstance(value, str) else value.decode(errors="replace")
508+
509+
# --- Fallback ---
510+
return str(value)
511+
512+
428513
def process_node(hdf_node, hdf_path, parser, logger, doc=True):
429514
"""Processes an hdf5 node.
430515
- it logs the node found and also checks for its attributes
@@ -436,11 +521,11 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True):
436521
if isinstance(hdf_node, h5py.Dataset):
437522
logger.debug(f"===== FIELD (/{hdf_path}): {hdf_node}")
438523
val = (
439-
str(decode_if_string(hdf_node[()])).split("\n")
524+
safe_str(decode_if_string(hdf_node[()])).split("\n")
440525
if len(hdf_node.shape) <= 1
441-
else str(decode_if_string(hdf_node[0])).split("\n")
526+
else safe_str(decode_if_string(hdf_node[0])).split("\n")
442527
)
443-
logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}")
528+
logger.debug(f"value: {val[0]}{' ...' if len(val) > 1 else ''}")
444529
else:
445530
logger.debug(
446531
f"===== GROUP (/{hdf_path} "
@@ -460,8 +545,8 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True):
460545
)
461546
for key, value in hdf_node.attrs.items():
462547
logger.debug(f"===== ATTRS (/{hdf_path}@{key})")
463-
val = str(decode_if_string(value)).split("\n")
464-
logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}")
548+
val = safe_str(decode_if_string(value)).split("\n")
549+
logger.debug(f"value: {val[0]}{' ...' if len(val) > 1 else ''}")
465550
(req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc, attr=key)
466551
if (
467552
parser is not None

src/pynxtools/nomad/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def _get_value(hdf_node):
149149
def decode_array(arr):
150150
result = []
151151
for x in arr:
152-
if isinstance(x, (np.ndarray, list)):
152+
if isinstance(x, np.ndarray | list):
153153
result.append(decode_array(x))
154154
else:
155155
result.append(str(decode_or_not(x)))

0 commit comments

Comments
 (0)