-
Notifications
You must be signed in to change notification settings - Fork 3
Work on the SasData object definition #156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: refactor_24
Are you sure you want to change the base?
Changes from all commits
b8fb654
74b896c
10455fe
8366cd8
198c0a3
042eb29
94fcc04
2589732
7769f90
f569126
6b9f873
29213a6
6b732f8
a426c19
4b3fb72
4fb53e4
b855026
847b966
d55c377
d0baf0a
b742d2c
6cfd4e3
330a725
e66ff5c
f09c7fe
b2077bb
053d494
dd2ad3c
0729c39
85120f7
84760ee
ca16ca1
dec5669
3a82a27
db12067
c8abd6d
444a05f
9355316
5c1e0ce
e29eaa5
1836411
8335fc7
f92655b
f542286
0c6c4d2
bb6edb4
4815c98
fd9f15a
f18ebcb
11dabb5
4b9d963
71c09ef
32cf210
20aed4e
d83bd3c
f42113a
f5f753d
6e25baa
e78db91
213dbc2
6d457e0
8836bba
b2e4641
ccadd77
b5be36a
aa33dca
a243435
ffb6e05
74afe49
71f4a61
a652ab6
08b68e5
392b4ff
614c1e2
2778bbc
41f9b8b
6115f73
87d2c30
7dbfac7
6d198d6
cee1619
637e8a1
d673b3c
2e91c4f
1c058d2
d292b34
94e9c68
ec59240
f955710
7264c1d
bcb803a
a422741
058e827
f53d66b
60d4bfb
a4190d7
068cc09
f7dab12
5c7cb72
bf7244e
5512416
be963bb
5b3daf4
7c32f83
0da9553
e0db3b8
a606e9c
1e726cb
0abcf01
a2207fd
74cb534
d3931d0
01e7d95
a7206de
ab78e8c
f67ca5a
4fb6948
bc8415a
8592769
07e753a
dbf1afb
0e9e219
e21d9ab
2ce3491
4cb0e45
0716618
6e6e7ba
db6457f
fd8311f
2c76846
577d1d9
ffa2fb8
3803d0d
9d56eb1
ad94f77
c3f28f7
367967b
4d4252c
a8644a8
08e2fb9
d5e3a8b
7f2b413
de8e4e6
5e12205
cc305f9
61c62b0
af8e198
357f986
e907221
57f1e68
a15298e
5d11621
abb2f7b
839b39e
a731597
4ba6657
a1c00c8
174e68f
279cd95
df3b70f
192b43a
a5f0aa5
f3b53e4
3486b09
8a54ddb
d07e432
37cef0d
9b82cbc
bbe282d
bda9de3
012fe8b
f2dff42
b47297c
580c131
a111590
5732617
62238d6
ccec88e
94f7de6
da83c51
74881f5
cd225f8
618409c
736f0c5
998378b
125ca04
7d466cf
350eaf2
f642ef1
2f61308
c1bdc32
c2ad2af
7bd7c32
d03a440
bcefce7
5690d37
f639a11
fab834f
7489c95
d92362c
9c3e1e4
0f7bd91
ad64bf7
c7c3d4b
52f5314
f14c8e7
43269a1
b2d93c7
137ea54
41ae684
5414665
95ddb75
94ccd98
9e97400
a9f4d98
96cb964
9534ea1
1afeaa6
623043d
7c26dca
a0ab00a
ab48763
c99f173
e27d0cc
43cfd12
39e11a3
f9419a0
9b4a561
4acda72
e66117d
948759b
0037490
df12d24
775170e
6ffbaa8
bf15104
c37facd
431e6e8
ad82625
0a6b213
e19f214
76ace63
a45f18e
2d9364b
75bd590
dcb3fb3
7d1cbd6
7af5c33
78bcc6d
078cb36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| import pytest | ||
|
|
||
|
|
||
| def pytest_addoption(parser): | ||
| parser.addoption( | ||
| "--show_plots", action="store_true", default=False, help="Display diagnostic plots during tests" | ||
| ) | ||
|
|
||
| @pytest.fixture | ||
| def show_plots(request): | ||
| return request.config.getoption("--show_plots") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,7 @@ dynamic = [ | |
| ] | ||
| description = "Sas Data Loader application" | ||
| readme = "README.md" | ||
| requires-python = ">=3.9" | ||
| requires-python = ">=3.12" | ||
| license = { text = "BSD-3-Clause" } | ||
| authors = [ | ||
| {name = "SasView Team", email = "[email protected]"}, | ||
|
|
@@ -118,3 +118,27 @@ testpaths = [ | |
| norecursedirs = [ | ||
| "sasdata", | ||
| ] | ||
|
|
||
| [tool.ruff] | ||
| line-length = 120 | ||
|
|
||
| [tool.ruff.lint] | ||
| # The ruff rules are available at: https://docs.astral.sh/ruff/rules/ | ||
| select = ["E", # pycodestyle errors | ||
| "F", # pyflakes | ||
| "I", # isort | ||
| "UP", # pyupgrade | ||
| "SIM118", # Use `key in dict` instead of `key in dict.keys()` | ||
| "SIM300"] # Yoda condition detected | ||
|
|
||
|
|
||
| ignore = ["E501", # line too long (leave to formatter) | ||
| "UP008", # Use `super()` instead of `super(__class__, self)` | ||
| "UP031"] # Use format specifiers instead of percent format | ||
|
|
||
| [tool.ruff.lint.isort.sections] | ||
| # Group all SasView and SasModels imports into a separate section. | ||
| "sas" = ["sas", "sasmodels"] | ||
|
|
||
| [tool.ruff.lint.isort] | ||
| section-order = ["future", "standard-library", "third-party", "sas", "first-party", "local-folder"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| from abc import ABC, abstractmethod | ||
|
|
||
| import numpy as np | ||
| from exceptions import InterpretationError | ||
| from numpy._typing import ArrayLike | ||
| from quantities.quantity import Quantity | ||
| from util import is_increasing | ||
|
|
||
|
|
||
| class Abscissa(ABC): | ||
|
|
||
| def __init__(self, axes: list[Quantity]): | ||
| self._axes = axes | ||
| self._dimensionality = len(axes) | ||
| @property | ||
| def dimensionality(self) -> int: | ||
| """ Dimensionality of this data """ | ||
| return self._dimensionality | ||
|
|
||
| @property | ||
| @abstractmethod | ||
| def is_grid(self) -> bool: | ||
| """ Are these coordinates using a grid representation | ||
| ( as opposed to a general list representation) | ||
|
|
||
| is_grid = True: implies that the corresponding ordinate is n-dimensional tensor | ||
| is_grid = False: implies that the corresponding ordinate is a 1D list | ||
|
|
||
| If the data is one dimensional, is_grid=True | ||
|
|
||
| """ | ||
|
|
||
|
|
||
| @property | ||
| def axes(self) -> list[Quantity]: | ||
| """ Axes of the data: | ||
|
|
||
| If it's an (n1-by-n2-by-n3...) grid (is_grid=True): give the values for each axis, returning a list like | ||
| [Quantity(length n1), Quantity(length n2), Quantity(length n3) ... ] | ||
|
|
||
| If it is not grid data (is_grid=False), but n points on a general mesh, give one array for each dimension | ||
| [Quantity(length n), Quantity(length n), Quantity(length n) ... ] | ||
|
|
||
| """ | ||
|
|
||
| return self._axes | ||
|
|
||
| @staticmethod | ||
| def _determine_error_message(axis_arrays: list[np.ndarray], ordinate_shape: tuple): | ||
| """ Error message for the `.determine` function""" | ||
|
|
||
| shape_string = ", ".join([str(axis.shape) for axis in axis_arrays]) | ||
|
|
||
| return f"Cannot interpret array shapes axis: [{shape_string}], ordinate: {ordinate_shape}" | ||
|
|
||
| @staticmethod | ||
| def determine(axis_data: list[Quantity[ArrayLike]], ordinate_data: Quantity[ArrayLike]) -> "Abscissa": | ||
| """ Get an Abscissa object that fits the combination of axes and data""" | ||
|
|
||
| # Different posibilites: | ||
| # 1: axes_data[i].shape == axes_data[j].shape == ordinate_data.shape | ||
| # 1a: axis_data[i] is 1D => | ||
| # 1a-i: len(axes_data) == 1 => Grid type or Scatter type depending on sortedness | ||
| # 1a-ii: len(axes_data) != 1 => Scatter type | ||
| # 1b: axis_data[i] dimensionality matches len(axis_data) => Meshgrid type | ||
| # 1c: other => Error | ||
| # 2: (len(axes_data[0]), len(axes_data[1])... ) == ordinate_data.shape => Grid type | ||
| # 3: None of the above => Error | ||
|
|
||
| ordinate_shape = np.array(ordinate_data.value).shape | ||
| axis_arrays = [np.array(axis.value) for axis in axis_data] | ||
|
|
||
| # 1: | ||
| if all([axis.shape == ordinate_shape for axis in axis_arrays]): | ||
| # 1a: | ||
| if all([len(axis.shape)== 1 for axis in axis_arrays]): | ||
| # 1a-i: | ||
| if len(axis_arrays) == 1: | ||
| # Is it sorted | ||
| if is_increasing(axis_arrays[0]): | ||
| return GridAbscissa(axis_data) | ||
| else: | ||
| return ScatterAbscissa(axis_data) | ||
| # 1a-ii | ||
| else: | ||
| return ScatterAbscissa(axis_data) | ||
| # 1b | ||
| elif all([len(axis.shape) == len(axis_arrays) for axis in axis_arrays]): | ||
|
|
||
| return MeshgridAbscissa(axis_data) | ||
|
|
||
| else: | ||
| raise InterpretationError(Abscissa._determine_error_message(axis_arrays, ordinate_shape)) | ||
|
|
||
| elif all([len(axis.shape)== 1 for axis in axis_arrays]) and \ | ||
| tuple([axis.shape[0] for axis in axis_arrays]) == ordinate_shape: | ||
|
|
||
| # Require that they are sorted | ||
| if all([is_increasing(axis) for axis in axis_arrays]): | ||
|
|
||
| return GridAbscissa(axis_data) | ||
|
|
||
| else: | ||
| raise InterpretationError("Grid axes are not sorted") | ||
|
|
||
| else: | ||
| raise InterpretationError(Abscissa._determine_error_message(axis_arrays, ordinate_shape)) | ||
|
Comment on lines
+57
to
+107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❌ New issue: Bumpy Road Ahead
Comment on lines
+57
to
+107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❌ New issue: Deep, Nested Complexity |
||
|
|
||
| class GridAbscissa(Abscissa): | ||
|
|
||
| @property | ||
| def is_grid(self): | ||
| return True | ||
|
|
||
| class MeshgridAbscissa(Abscissa): | ||
|
|
||
| @property | ||
| def is_grid(self): | ||
| return True | ||
|
|
||
| class ScatterAbscissa(Abscissa): | ||
|
|
||
| @property | ||
| def is_grid(self): | ||
| return False | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| import re | ||
| from dataclasses import dataclass, field | ||
| from typing import TypeVar | ||
|
|
||
| initial_metadata = { | ||
| 'source': ['name', 'radiation', 'type', 'probe_particle', 'beam_size_name', 'beam_size', 'beam_shape', 'wavelength', 'wavelength_min', 'wavelength_max', 'wavelength_spread'], | ||
| 'detector': ['name', 'distance', 'offset', 'orientation', 'beam_center', 'pixel_size', 'slit_length'], | ||
| 'aperture': ['name', 'type', 'size_name', 'size', 'distance'], | ||
| 'collimation': ['name', 'lengths'], | ||
| 'process': ['name', 'date', 'description', 'term', 'notes'], | ||
| 'sample': ['name', 'sample_id', 'thickness', 'transmission', 'temperature', 'position', 'orientation', 'details'], | ||
| 'transmission_spectrum': ['name', 'timestamp', 'transmission', 'transmission_deviation'], | ||
| 'magnetic': ['demagnetizing_field', 'saturation_magnetization', 'applied_magnetic_field', 'counting_index'], | ||
| 'other': ['title', 'run', 'definition'] | ||
| } | ||
|
|
||
| CASING_REGEX = r'[A-Z][a-z]*' | ||
|
|
||
| # First item has the highest precedence. | ||
| SEPARATOR_PRECEDENCE = [ | ||
| '_', | ||
| '-', | ||
| ] | ||
| # If none of these characters exist in that string, use casing. See init_separator | ||
|
|
||
| T = TypeVar('T') | ||
|
|
||
| # TODO: There may be a better place for this. | ||
| pairings = {'I': 'dI', 'Q': 'dQ', 'Qx': 'dQx', 'Qy': 'dQy'} | ||
| pairing_error = {value: key for key, value in pairings.items()} | ||
| # Allows this to be bidirectional. | ||
| bidirectional_pairings = pairings | pairing_error | ||
|
|
||
| @dataclass | ||
| class AsciiMetadataCategory[T]: | ||
| values: dict[str, T] = field(default_factory=dict) | ||
|
|
||
| def default_categories() -> dict[str, AsciiMetadataCategory[str | int]]: | ||
| return {key: AsciiMetadataCategory() for key in initial_metadata} | ||
|
|
||
| @dataclass | ||
| class AsciiReaderMetadata: | ||
| # Key is the filename. | ||
| filename_specific_metadata: dict[str, dict[str, AsciiMetadataCategory[str]]] = field(default_factory=dict) | ||
| # True instead of str means use the casing to separate the filename. | ||
| filename_separator: dict[str, str | bool] = field(default_factory=dict) | ||
| master_metadata: dict[str, AsciiMetadataCategory[int]] = field(default_factory=default_categories) | ||
|
|
||
| def init_separator(self, filename: str): | ||
| separator = next(filter(lambda c: c in SEPARATOR_PRECEDENCE, filename), True) | ||
| self.filename_separator[filename] = separator | ||
|
|
||
| def filename_components(self, filename: str, cut_off_extension: bool = True, capture: bool = False) -> list[str]: | ||
| """Split the filename into several components based on the current separator for that file.""" | ||
| separator = self.filename_separator[filename] | ||
| # FIXME: This sort of string construction may be an issue. Might need an alternative. | ||
| base_str = '({})' if capture else '{}' | ||
| if isinstance(separator, str): | ||
| splitted = re.split(base_str.replace('{}', separator), filename) | ||
| else: | ||
| splitted = re.findall(base_str.replace('{}', CASING_REGEX), filename) | ||
| # If the last component has a file extensions, remove it. | ||
| last_component = splitted[-1] | ||
| if cut_off_extension and '.' in last_component: | ||
| pos = last_component.index('.') | ||
| last_component = last_component[:pos] | ||
| splitted[-1] = last_component | ||
| return splitted | ||
|
|
||
| def purge_unreachable(self, filename: str): | ||
| """This is used when the separator has changed. If lets say we now have 2 components when there were 5 but the | ||
| 3rd component was selected, this will now produce an index out of range exception. Thus we'll need to purge this | ||
| to stop exceptions from happening.""" | ||
| components = self.filename_components(filename) | ||
| component_length = len(components) | ||
| # Converting to list as this mutates the dictionary as it goes through it. | ||
| for category_name, category in list(self.master_metadata.items()): | ||
| for key, value in list(category.values.items()): | ||
| if value >= component_length: | ||
| del self.master_metadata[category_name].values[key] | ||
|
|
||
| def all_file_metadata(self, filename: str) -> dict[str, AsciiMetadataCategory[str]]: | ||
| """Return all of the metadata for known for the specified filename. This | ||
| will combin the master metadata specified for all files with the | ||
| metadata specific to that filename.""" | ||
| file_metadata = self.filename_specific_metadata[filename] | ||
| components = self.filename_components(filename) | ||
| # The ordering here is important. If there are conflicts, the second dictionary will override the first one. | ||
| # Conflicts shouldn't really be happening anyway but if they do, we're gonna go with the master metadata taking | ||
| # precedence for now. | ||
| return_metadata: dict[str, AsciiMetadataCategory[str]] = {} | ||
| for category_name, category in (file_metadata | self.master_metadata).items(): | ||
| combined_category_dict = category.values | self.master_metadata[category_name].values | ||
| new_category_dict: dict[str, str] = {} | ||
| for key, value in combined_category_dict.items(): | ||
| if isinstance(value, str): | ||
| new_category_dict[key] = value | ||
| elif isinstance(value, int): | ||
| new_category_dict[key] = components[value] | ||
| else: | ||
| raise TypeError(f'Invalid value for {key} in {category_name}') | ||
| new_category = AsciiMetadataCategory(new_category_dict) | ||
| return_metadata[category_name] = new_category | ||
| return return_metadata | ||
| def get_metadata(self, category: str, value: str, filename: str, error_on_not_found=False) -> str | None: | ||
| """Get a particular piece of metadata for the filename.""" | ||
| components = self.filename_components(filename) | ||
|
|
||
| # We prioritise the master metadata. | ||
|
|
||
| # TODO: Assumes category in master_metadata exists. Is this a reasonable assumption? May need to make sure it is | ||
| # definitely in the dictionary. | ||
| if value in self.master_metadata[category].values: | ||
| index = self.master_metadata[category].values[value] | ||
| return components[index] | ||
| target_category = self.filename_specific_metadata[filename][category].values | ||
| if value in target_category: | ||
| return target_category[value] | ||
| if error_on_not_found: | ||
| raise ValueError('value does not exist in metadata.') | ||
| else: | ||
| return None | ||
|
|
||
| def update_metadata(self, category: str, key: str, filename: str, new_value: str | int): | ||
| """Update the metadata for a filename. If the new_value is a string, | ||
| then this new metadata will be specific to that file. Otherwise, if | ||
| new_value is an integer, then this will represent the component of the | ||
| filename that this metadata applies to all.""" | ||
| if isinstance(new_value, str): | ||
| self.filename_specific_metadata[filename][category].values[key] = new_value | ||
| # TODO: What about the master metadata? Until that's gone, that still takes precedence. | ||
| elif isinstance(new_value, int): | ||
| self.master_metadata[category].values[key] = new_value | ||
| else: | ||
| raise TypeError('Invalid type for new_value') | ||
|
|
||
| def clear_metadata(self, category: str, key: str, filename: str): | ||
| """Remove any metadata recorded for a certain filename.""" | ||
| category_obj = self.filename_specific_metadata[filename][category] | ||
| if key in category_obj.values: | ||
| del category_obj.values[key] | ||
| if key in self.master_metadata[category].values: | ||
| del self.master_metadata[category].values[key] | ||
|
|
||
| def add_file(self, new_filename: str): | ||
| """Add a filename to the metadata, filling it with some default | ||
| categories.""" | ||
| # TODO: Fix typing here. Pyright is showing errors. | ||
| self.filename_specific_metadata[new_filename] = default_categories() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Things to check once everything is in place: | ||
|
|
||
| 1) Do any centigrade fields read in incorrectly? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❌ New issue: Complex Method
Abscissa.determine has a cyclomatic complexity of 24, threshold = 9
Suppress