Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ classifiers = [
keywords = ["data format", "HDF5", "neutron scattering", "x-ray scattering"]
requires-python = ">=3.9"
dependencies = [
"chardet",
"colored",
"h5py",
"hdf5plugin",
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/nexus/completer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2019-2025, NeXpy Development Team.
# Copyright (c) 2019-2026, NeXpy Development Team.
#
# Author: Paul Kienzle, Ray Osborn
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/nexus/lock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2019-2022, NeXpy Development Team.
# Copyright (c) 2019-2026, NeXpy Development Team.
#
# Author: Paul Kienzle, Ray Osborn
#
Expand Down
13 changes: 7 additions & 6 deletions src/nexusformat/nexus/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,17 @@ def text(value):
if isinstance(value, bytes):
try:
_text = value.decode(NX_CONFIG['encoding'])
except UnicodeDecodeError:
if NX_CONFIG['encoding'] == 'utf-8':
_text = value.decode('latin-1')
else:
_text = value.decode('utf-8')
except (UnicodeDecodeError, KeyError, LookupError):
import chardet
detected = chardet.detect(value)
encoding = detected['encoding']
if not encoding:
encoding = 'latin-1'
_text = value.decode(encoding, errors='replace')
else:
_text = str(value)
return _text.replace('\x00', '').rstrip()


def is_text(value):
"""
Return True if the value represents text.
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nexusformat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2019-2021, NeXpy Development Team.
# Copyright (c) 2019-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxcheck.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2025, NeXpy Development Team.
# Copyright (c) 2025-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxconsolidate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2022, NeXpy Development Team.
# Copyright (c) 2022-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxdir.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2019-2021, NeXpy Development Team.
# Copyright (c) 2019-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxduplicate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2019-2021, NeXpy Development Team.
# Copyright (c) 2019-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxinspect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2025, NeXpy Development Team.
# Copyright (c) 2025-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
2 changes: 1 addition & 1 deletion src/nexusformat/scripts/nxstack.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2015-2022, NeXpy Development Team.
# Copyright (c) 2015-2026, NeXpy Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
Expand Down
45 changes: 45 additions & 0 deletions tests/test_encodings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os

import pytest
from nexusformat.nexus import NXfield, NXentry, nxopen

def test_encoding_detection(tmpdir):

filename = os.path.join(tmpdir, "encoding_test.nxs")

latin1_text = "Café"
encoded_bytes = latin1_text.encode('latin-1')

with nxopen(filename, 'w') as root:
root['entry'] = NXentry()
root['entry/name'] = NXfield(encoded_bytes, dtype='S')

with nxopen(filename, 'r') as root:
retrieved_bytes = root['entry/name'].nxvalue

if isinstance(retrieved_bytes, bytes):
decoded_text = retrieved_bytes.decode('latin-1')
assert decoded_text == latin1_text
with pytest.raises(UnicodeDecodeError):
retrieved_bytes.decode('utf-8')
else:
assert str(retrieved_bytes) == latin1_text


@pytest.mark.parametrize("encoding", ["latin-1", "cp1252", "ascii"])
def test_multiple_encodings(tmpdir, encoding):

filename = os.path.join(tmpdir, f"test_{encoding}.nxs")
original_text = "Test_Data"

with nxopen(filename, 'w') as root:
root['entry'] = NXentry()
root['entry/name'] = NXfield(original_text.encode(encoding), dtype='S')

with nxopen(filename, 'r') as root:
value = root['entry/name'].nxvalue

if isinstance(value, bytes):
assert value.decode(encoding) == original_text
else:
assert str(value) == original_text
Loading