SasView · lucas-wilkins · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 5, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,7 +27,7 @@ jobs:
     strategy:
       matrix:
         os: [macos-latest, ubuntu-latest, windows-latest]
-        python-version: ['3.10', '3.11', '3.12']
+        python-version: ['3.12']
       fail-fast: false
 
     env:

diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,11 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--show_plots", action="store_true", default=False, help="Display diagnostic plots during tests"
+    )
+
+@pytest.fixture
+def show_plots(request):
+    return request.config.getoption("--show_plots")
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ dynamic = [
 ]
 description = "Sas Data Loader application"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.12"
 license = { text = "BSD-3-Clause" }
 authors = [
     {name = "SasView Team", email = "[email protected]"},
@@ -118,3 +118,27 @@ testpaths = [
 norecursedirs = [
   "sasdata",
 ]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+# The ruff rules are available at: https://docs.astral.sh/ruff/rules/
+select = ["E",       # pycodestyle errors
+          "F",       # pyflakes
+          "I",       # isort
+          "UP",      # pyupgrade
+          "SIM118",  # Use `key in dict` instead of `key in dict.keys()`
+          "SIM300"]  # Yoda condition detected
+
+
+ignore = ["E501",    # line too long (leave to formatter)
+          "UP008",   # Use `super()` instead of `super(__class__, self)`
+          "UP031"]   # Use format specifiers instead of percent format
+
+[tool.ruff.lint.isort.sections]
+# Group all SasView and SasModels imports into a separate section.
+"sas" = ["sas", "sasmodels"]
+
+[tool.ruff.lint.isort]
+section-order = ["future", "standard-library", "third-party", "sas", "first-party", "local-folder"]
diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,15 @@ lxml
 
 # Calculation
 numpy
+scipy
+
+# Unit testing
+pytest
+unittest-xml-reporting
+
+# Documentation (future)
+sphinx
+html5lib
+
+# Other stuff
+matplotlib
diff --git a/sasdata/abscissa.py b/sasdata/abscissa.py
@@ -0,0 +1,126 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+from exceptions import InterpretationError
+from numpy._typing import ArrayLike
+from quantities.quantity import Quantity
+from util import is_increasing
+
+
+class Abscissa(ABC):
+
+    def __init__(self, axes: list[Quantity]):
+        self._axes = axes
+        self._dimensionality = len(axes)
+    @property
+    def dimensionality(self) -> int:
+        """ Dimensionality of this data """
+        return self._dimensionality
+
+    @property
+    @abstractmethod
+    def is_grid(self) -> bool:
+        """ Are these coordinates using a grid representation
+        ( as opposed to a general list representation)
+
+        is_grid = True: implies that the corresponding ordinate is n-dimensional tensor
+        is_grid = False: implies that the corresponding ordinate is a 1D list
+
+        If the data is one dimensional, is_grid=True
+
+        """
+
+
+    @property
+    def axes(self) -> list[Quantity]:
+        """ Axes of the data:
+
+        If it's an (n1-by-n2-by-n3...) grid (is_grid=True): give the values for each axis, returning a list like
+          [Quantity(length n1), Quantity(length n2), Quantity(length n3) ... ]
+
+        If it is not grid data (is_grid=False), but n points on a general mesh, give one array for each dimension
+          [Quantity(length n), Quantity(length n), Quantity(length n) ... ]
+
+        """
+
+        return self._axes
+
+    @staticmethod
+    def _determine_error_message(axis_arrays: list[np.ndarray], ordinate_shape: tuple):
+        """ Error message for the `.determine` function"""
+
+        shape_string = ", ".join([str(axis.shape) for axis in axis_arrays])
+
+        return f"Cannot interpret array shapes axis: [{shape_string}], ordinate: {ordinate_shape}"
+
+    @staticmethod
+    def determine(axis_data: list[Quantity[ArrayLike]], ordinate_data: Quantity[ArrayLike]) -> "Abscissa":
+        """ Get an Abscissa object that fits the combination of axes and data"""
+
+        # Different posibilites:
+        #   1: axes_data[i].shape == axes_data[j].shape == ordinate_data.shape
+        #    1a: axis_data[i] is 1D =>
+        #      1a-i:  len(axes_data) == 1 => Grid type or Scatter type depending on sortedness
+        #      1a-ii: len(axes_data) != 1 => Scatter type
+        #    1b: axis_data[i] dimensionality matches len(axis_data) => Meshgrid type
+        #    1c: other => Error
+        #   2: (len(axes_data[0]), len(axes_data[1])... ) == ordinate_data.shape => Grid type
+        #   3: None of the above => Error
+
+        ordinate_shape = np.array(ordinate_data.value).shape
+        axis_arrays = [np.array(axis.value) for axis in axis_data]
+
+        # 1:
+        if all([axis.shape == ordinate_shape for axis in axis_arrays]):
+            # 1a:
+            if all([len(axis.shape)== 1 for axis in axis_arrays]):
+                # 1a-i:
+                if len(axis_arrays) == 1:
+                    # Is it sorted
+                    if is_increasing(axis_arrays[0]):
+                        return GridAbscissa(axis_data)
+                    else:
+                        return ScatterAbscissa(axis_data)
+                # 1a-ii
+                else:
+                    return ScatterAbscissa(axis_data)
+            # 1b
+            elif all([len(axis.shape) == len(axis_arrays) for axis in axis_arrays]):
+
+                return MeshgridAbscissa(axis_data)
+
+            else:
+                raise InterpretationError(Abscissa._determine_error_message(axis_arrays, ordinate_shape))
+
+        elif all([len(axis.shape)== 1 for axis in axis_arrays]) and \
+                tuple([axis.shape[0] for axis in axis_arrays]) == ordinate_shape:
+
+            # Require that they are sorted
+            if all([is_increasing(axis) for axis in axis_arrays]):
+
+                return GridAbscissa(axis_data)
+
+            else:
+                raise InterpretationError("Grid axes are not sorted")
+
+        else:
+            raise InterpretationError(Abscissa._determine_error_message(axis_arrays, ordinate_shape))
+
+class GridAbscissa(Abscissa):
+
+    @property
+    def is_grid(self):
+        return True
+
+class MeshgridAbscissa(Abscissa):
+
+    @property
+    def is_grid(self):
+        return True
+
+class ScatterAbscissa(Abscissa):
+
+    @property
+    def is_grid(self):
+        return False
+
diff --git a/sasdata/ascii_reader_metadata.py b/sasdata/ascii_reader_metadata.py
@@ -0,0 +1,149 @@
+import re
+from dataclasses import dataclass, field
+from typing import TypeVar
+
+initial_metadata = {
+    'source': ['name', 'radiation', 'type', 'probe_particle', 'beam_size_name', 'beam_size', 'beam_shape', 'wavelength', 'wavelength_min', 'wavelength_max', 'wavelength_spread'],
+    'detector': ['name', 'distance', 'offset', 'orientation', 'beam_center', 'pixel_size', 'slit_length'],
+    'aperture': ['name', 'type', 'size_name', 'size', 'distance'],
+    'collimation': ['name', 'lengths'],
+    'process': ['name', 'date', 'description', 'term', 'notes'],
+    'sample': ['name', 'sample_id', 'thickness', 'transmission', 'temperature', 'position', 'orientation', 'details'],
+    'transmission_spectrum': ['name', 'timestamp', 'transmission', 'transmission_deviation'],
+    'magnetic': ['demagnetizing_field', 'saturation_magnetization', 'applied_magnetic_field', 'counting_index'],
+    'other': ['title', 'run', 'definition']
+}
+
+CASING_REGEX = r'[A-Z][a-z]*'
+
+# First item has the highest precedence.
+SEPARATOR_PRECEDENCE = [
+    '_',
+    '-',
+]
+# If none of these characters exist in that string, use casing. See init_separator
+
+T = TypeVar('T')
+
+# TODO: There may be a better place for this.
+pairings = {'I': 'dI', 'Q': 'dQ', 'Qx': 'dQx', 'Qy': 'dQy'}
+pairing_error = {value: key for key, value in pairings.items()}
+# Allows this to be bidirectional.
+bidirectional_pairings = pairings | pairing_error
+
+@dataclass
+class AsciiMetadataCategory[T]:
+    values: dict[str, T] = field(default_factory=dict)
+
+def default_categories() -> dict[str, AsciiMetadataCategory[str | int]]:
+    return {key: AsciiMetadataCategory() for key in initial_metadata}
+
+@dataclass
+class AsciiReaderMetadata:
+    # Key is the filename.
+    filename_specific_metadata: dict[str, dict[str, AsciiMetadataCategory[str]]] = field(default_factory=dict)
+    # True instead of str means use the casing to separate the filename.
+    filename_separator: dict[str, str | bool] = field(default_factory=dict)
+    master_metadata: dict[str, AsciiMetadataCategory[int]] = field(default_factory=default_categories)
+
+    def init_separator(self, filename: str):
+        separator = next(filter(lambda c: c in SEPARATOR_PRECEDENCE, filename), True)
+        self.filename_separator[filename] = separator
+
+    def filename_components(self, filename: str, cut_off_extension: bool = True, capture: bool = False) -> list[str]:
+        """Split the filename into several components based on the current separator for that file."""
+        separator = self.filename_separator[filename]
+        # FIXME: This sort of string construction may be an issue. Might need an alternative.
+        base_str = '({})' if capture else '{}'
+        if isinstance(separator, str):
+            splitted = re.split(base_str.replace('{}', separator), filename)
+        else:
+            splitted = re.findall(base_str.replace('{}', CASING_REGEX), filename)
+        # If the last component has a file extensions, remove it.
+        last_component = splitted[-1]
+        if cut_off_extension and '.' in last_component:
+            pos = last_component.index('.')
+            last_component = last_component[:pos]
+            splitted[-1] = last_component
+        return splitted
+
+    def purge_unreachable(self, filename: str):
+        """This is used when the separator has changed. If lets say we now have 2 components when there were 5 but the
+        3rd component was selected, this will now produce an index out of range exception. Thus we'll need to purge this
+        to stop exceptions from happening."""
+        components = self.filename_components(filename)
+        component_length = len(components)
+        # Converting to list as this mutates the dictionary as it goes through it.
+        for category_name, category in list(self.master_metadata.items()):
+            for key, value in list(category.values.items()):
+                if value >= component_length:
+                    del self.master_metadata[category_name].values[key]
+
+    def all_file_metadata(self, filename: str) -> dict[str, AsciiMetadataCategory[str]]:
+        """Return all of the metadata for known for the specified filename. This
+        will combin the master metadata specified for all files with the
+        metadata specific to that filename."""
+        file_metadata = self.filename_specific_metadata[filename]
+        components = self.filename_components(filename)
+        # The ordering here is important. If there are conflicts, the second dictionary will override the first one.
+        # Conflicts shouldn't really be happening anyway but if they do, we're gonna go with the master metadata taking
+        # precedence for now.
+        return_metadata: dict[str, AsciiMetadataCategory[str]] = {}
+        for category_name, category in (file_metadata | self.master_metadata).items():
+            combined_category_dict = category.values | self.master_metadata[category_name].values
+            new_category_dict: dict[str, str] = {}
+            for key, value in combined_category_dict.items():
+                if isinstance(value, str):
+                    new_category_dict[key] = value
+                elif isinstance(value, int):
+                    new_category_dict[key] = components[value]
+                else:
+                    raise TypeError(f'Invalid value for {key} in {category_name}')
+            new_category = AsciiMetadataCategory(new_category_dict)
+            return_metadata[category_name] = new_category
+        return return_metadata
+    def get_metadata(self, category: str, value: str, filename: str, error_on_not_found=False) -> str | None:
+        """Get a particular piece of metadata for the filename."""
+        components = self.filename_components(filename)
+
+        # We prioritise the master metadata.
+
+        # TODO: Assumes category in master_metadata exists. Is this a reasonable assumption? May need to make sure it is
+        # definitely in the dictionary.
+        if value in self.master_metadata[category].values:
+            index = self.master_metadata[category].values[value]
+            return components[index]
+        target_category = self.filename_specific_metadata[filename][category].values
+        if value in target_category:
+            return target_category[value]
+        if error_on_not_found:
+            raise ValueError('value does not exist in metadata.')
+        else:
+            return None
+
+    def update_metadata(self, category: str, key: str, filename: str, new_value: str | int):
+        """Update the metadata for a filename. If the new_value is a string,
+        then this new metadata will be specific to that file. Otherwise, if
+        new_value is an integer, then this will represent the component of the
+        filename that this metadata applies to all."""
+        if isinstance(new_value, str):
+            self.filename_specific_metadata[filename][category].values[key] = new_value
+            # TODO: What about the master metadata? Until that's gone, that still takes precedence.
+        elif isinstance(new_value, int):
+            self.master_metadata[category].values[key] = new_value
+        else:
+            raise TypeError('Invalid type for new_value')
+
+    def clear_metadata(self, category: str, key: str, filename: str):
+        """Remove any metadata recorded for a certain filename."""
+        category_obj = self.filename_specific_metadata[filename][category]
+        if key in category_obj.values:
+            del category_obj.values[key]
+        if key in self.master_metadata[category].values:
+            del self.master_metadata[category].values[key]
+
+    def add_file(self, new_filename: str):
+        """Add a filename to the metadata, filling it with some default
+        categories."""
+        # TODO: Fix typing here. Pyright is showing errors.
+        self.filename_specific_metadata[new_filename] = default_categories()
diff --git a/sasdata/checklist.txt b/sasdata/checklist.txt
@@ -0,0 +1,3 @@
+Things to check once everything is in place:
+
+1) Do any centigrade fields read in incorrectly?
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Things to check once everything is in place:

		1) Do any centigrade fields read in incorrectly?