From 300d737700823732dd0661610711736f860ca131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 7 Aug 2025 18:01:47 +0200 Subject: [PATCH 01/14] Add decathlon datalist description to load_decathlon_datalist --- monai/data/decathlon_datalist.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 14765dcfaa..cf2d652025 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -92,8 +92,34 @@ def load_decathlon_datalist( ) -> list[dict]: """Load image/label paths of decathlon challenge from JSON file - Json file is similar to what you get from http://medicaldecathlon.com/ - Those dataset.json files + JSON file should follow the format of the Medical Segmentation Decathlon + datalist.json files. The files are structured as follows: + { + "metadata_key_0": "metadata_value_0", + "metadata_key_1": "metadata_value_1", + ..., + "training": [ + {"image": "path/to/image_1.nii.gz", "label": "path/to/label_1.nii.gz"}, + {"image": "path/to/image_2.nii.gz", "label": "path/to/label_2.nii.gz"}, + ... + ], + "test": [ + "path/to/image_3.nii.gz", + "path/to/image_4.nii.gz", + ... + ] + } + + The metadata keys are optional for loading the datalist, but include the following + string items: + "name", "description", "reference", "licence", "release", "tensorImageSize", + two dict items, "modality" (keyed by channel index), and "labels" (keyed by label index), + and two integer items, "numTraining" and "numTest", with the number of items. + + The "training" key contains a list of direcitonaries, each of which has at least + the "image" and "label" keys, the latter of which is a path for segmentation data. + Each item can also include a "fold" key for cross-validation purposes. + The "test" key contains a list of image paths, without labels. Args: data_list_file_path: the path to the json file of datalist. From f0dde7acbe6d8ab3bf9d5a057fbd234134a53e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Tue, 12 Aug 2025 09:36:10 +0200 Subject: [PATCH 02/14] small clarification --- monai/data/decathlon_datalist.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index cf2d652025..672feeeed3 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -160,8 +160,9 @@ def load_decathlon_datalist( def load_decathlon_properties(data_property_file_path: PathLike, property_keys: Sequence[str] | str) -> dict: - """Load the properties from the JSON file contains data property with specified `property_keys`. - + """Extract the properties with the specified keys from the Decathlon JSON file. + See under `load_decathlon_datalist` for the expected keys in the Decathlon challenge. + Args: data_property_file_path: the path to the JSON file of data properties. property_keys: expected keys to load from the JSON file, for example, we have these keys From 2648b84dce3a4d7fe2686129f92cc034a39bfaba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Tue, 12 Aug 2025 09:48:58 +0200 Subject: [PATCH 03/14] add note on input format to auto_runner --- monai/apps/auto3dseg/auto_runner.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index 191e4e99a9..b7478626a5 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -194,6 +194,13 @@ class AutoRunner: ├── segresnet2d_0 # network scripts/configs/checkpoints and pickle object of the algo └── swinunetr_0 # network scripts/configs/checkpoints and pickle object of the algo + Notes: + The input config requires at least the following keys: + - ``modality``: the modality of the data, e.g. "ct", "mri", etc. + - ``datalist``: the path to the datalist file in JSON format. + - ``dataroot``: the root directory of the data files. + + For the datalist file format, see the description under monai.data.decathlon_datalist.load_decathlon_datalist. """ analyze_params: dict | None From 86c90857bf263e960c990f5f3ae24d5e116cdf7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 15:00:21 +0200 Subject: [PATCH 04/14] add link to decathlon --- monai/data/decathlon_datalist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 672feeeed3..875dfe5036 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -93,7 +93,8 @@ def load_decathlon_datalist( """Load image/label paths of decathlon challenge from JSON file JSON file should follow the format of the Medical Segmentation Decathlon - datalist.json files. The files are structured as follows: + datalist.json files, see http://medicaldecathlon.com. + The files are structured as follows: { "metadata_key_0": "metadata_value_0", "metadata_key_1": "metadata_value_1", From 48afc88ca77dea92f65bbea7132ec75e0a07cbdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 15:43:39 +0200 Subject: [PATCH 05/14] fix typo --- monai/data/decathlon_datalist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 875dfe5036..6144190e0c 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -117,7 +117,7 @@ def load_decathlon_datalist( two dict items, "modality" (keyed by channel index), and "labels" (keyed by label index), and two integer items, "numTraining" and "numTest", with the number of items. - The "training" key contains a list of direcitonaries, each of which has at least + The "training" key contains a list of dictionaries, each of which has at least the "image" and "label" keys, the latter of which is a path for segmentation data. Each item can also include a "fold" key for cross-validation purposes. The "test" key contains a list of image paths, without labels. From 761306a695a80c5c293354253e901b9659517c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 15:58:06 +0200 Subject: [PATCH 06/14] formatting --- monai/apps/auto3dseg/auto_runner.py | 2 +- monai/data/decathlon_datalist.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index b7478626a5..d15041c59c 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -194,7 +194,7 @@ class AutoRunner: ├── segresnet2d_0 # network scripts/configs/checkpoints and pickle object of the algo └── swinunetr_0 # network scripts/configs/checkpoints and pickle object of the algo - Notes: + Notes: The input config requires at least the following keys: - ``modality``: the modality of the data, e.g. "ct", "mri", etc. - ``datalist``: the path to the datalist file in JSON format. diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 6144190e0c..8bf3fc91a5 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -163,7 +163,7 @@ def load_decathlon_datalist( def load_decathlon_properties(data_property_file_path: PathLike, property_keys: Sequence[str] | str) -> dict: """Extract the properties with the specified keys from the Decathlon JSON file. See under `load_decathlon_datalist` for the expected keys in the Decathlon challenge. - + Args: data_property_file_path: the path to the JSON file of data properties. property_keys: expected keys to load from the JSON file, for example, we have these keys From ed384d717daec8a0f9b8d96ee217c7e59f012e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 16:36:56 +0200 Subject: [PATCH 07/14] doc formatting --- monai/data/decathlon_datalist.py | 57 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 8bf3fc91a5..5a87fd7f96 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -95,31 +95,34 @@ def load_decathlon_datalist( JSON file should follow the format of the Medical Segmentation Decathlon datalist.json files, see http://medicaldecathlon.com. The files are structured as follows: - { - "metadata_key_0": "metadata_value_0", - "metadata_key_1": "metadata_value_1", - ..., - "training": [ - {"image": "path/to/image_1.nii.gz", "label": "path/to/label_1.nii.gz"}, - {"image": "path/to/image_2.nii.gz", "label": "path/to/label_2.nii.gz"}, - ... - ], - "test": [ - "path/to/image_3.nii.gz", - "path/to/image_4.nii.gz", - ... - ] - } - - The metadata keys are optional for loading the datalist, but include the following - string items: - "name", "description", "reference", "licence", "release", "tensorImageSize", - two dict items, "modality" (keyed by channel index), and "labels" (keyed by label index), - and two integer items, "numTraining" and "numTest", with the number of items. - The "training" key contains a list of dictionaries, each of which has at least - the "image" and "label" keys, the latter of which is a path for segmentation data. - Each item can also include a "fold" key for cross-validation purposes. + .. code-block:: json + + { + "metadata_key_0": "metadata_value_0", + "metadata_key_1": "metadata_value_1", + ..., + "training": [ + {"image": "path/to/image_1.nii.gz", "label": "path/to/label_1.nii.gz"}, + {"image": "path/to/image_2.nii.gz", "label": "path/to/label_2.nii.gz"}, + ... + ], + "test": [ + "path/to/image_3.nii.gz", + "path/to/image_4.nii.gz", + ... + ] + } + + + The metadata keys are optional for loading the datalist, but include: + - some string items: ``name``, ``description``, ``reference``, ``licence``, ``release``, ``tensorImageSize`` + - two dict items: ``modality`` (keyed by channel index), and ``labels`` (keyed by label index) + - and two integer items: ``numTraining`` and ``numTest``, with the number of items. + + The ``training`` key contains a list of dictionaries, each of which has at least + the ``image`` and ``label`` keys, the latter of which is a path (for segmentation data). + Each item can also include a ``fold`` key for cross-validation purposes. The "test" key contains a list of image paths, without labels. Args: @@ -134,11 +137,11 @@ def load_decathlon_datalist( Returns a list of data items, each of which is a dict keyed by element names, for example: - .. code-block:: + .. code-block:: python [ - {'image': '/workspace/data/chest_19.nii.gz', 'label': 0}, - {'image': '/workspace/data/chest_31.nii.gz', 'label': 1} + {'image': '/workspace/data/chest_19.nii.gz', 'label': '/workspace/labels/chest_19.nii.gz}, + {'image': '/workspace/data/chest_31.nii.gz', 'label': '/workspace/labels/chest_31.nii.gz'}, ] """ From b46ed4110d01eafa32bd6df77d08d9da88fd50da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 16:39:52 +0200 Subject: [PATCH 08/14] add space --- monai/data/decathlon_datalist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 5a87fd7f96..5f71be0f14 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -125,6 +125,8 @@ def load_decathlon_datalist( Each item can also include a ``fold`` key for cross-validation purposes. The "test" key contains a list of image paths, without labels. + + Args: data_list_file_path: the path to the json file of datalist. is_segmentation: whether the datalist is for segmentation task, default is True. From 0b9f596dc9873a8cb01d39cd280f5e80c0d77077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 16:41:45 +0200 Subject: [PATCH 09/14] =?UTF-8?q?DCO=20Remediation=20Commit=20for=20Dani?= =?UTF-8?q?=C3=ABl=20Nobbe=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I, Daniël Nobbe , hereby add my Signed-off-by to this commit: 300d737700823732dd0661610711736f860ca131 I, Daniël Nobbe , hereby add my Signed-off-by to this commit: f0dde7acbe6d8ab3bf9d5a057fbd234134a53e38 I, Daniël Nobbe , hereby add my Signed-off-by to this commit: 2648b84dce3a4d7fe2686129f92cc034a39bfaba I, Daniël Nobbe , hereby add my Signed-off-by to this commit: 86c90857bf263e960c990f5f3ae24d5e116cdf7f I, Daniël Nobbe , hereby add my Signed-off-by to this commit: 48afc88ca77dea92f65bbea7132ec75e0a07cbdd I, Daniël Nobbe , hereby add my Signed-off-by to this commit: 761306a695a80c5c293354253e901b9659517c45 I, Daniël Nobbe , hereby add my Signed-off-by to this commit: ed384d717daec8a0f9b8d96ee217c7e59f012e00 Signed-off-by: Daniël Nobbe --- monai/data/decathlon_datalist.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 5f71be0f14..5a87fd7f96 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -125,8 +125,6 @@ def load_decathlon_datalist( Each item can also include a ``fold`` key for cross-validation purposes. The "test" key contains a list of image paths, without labels. - - Args: data_list_file_path: the path to the json file of datalist. is_segmentation: whether the datalist is for segmentation task, default is True. From ac2ef23c55c7696ec6a839bbc6b0ce511b84694d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 16:46:06 +0200 Subject: [PATCH 10/14] =?UTF-8?q?DCO=20Remediation=20Commit=20for=20Dani?= =?UTF-8?q?=C3=ABl=20Nobbe=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I, Daniël Nobbe , hereby add my Signed-off-by to this commit: b46ed4110d01eafa32bd6df77d08d9da88fd50da Signed-off-by: Daniël Nobbe --- monai/data/decathlon_datalist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 5a87fd7f96..adf0df1aaf 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -125,6 +125,8 @@ def load_decathlon_datalist( Each item can also include a ``fold`` key for cross-validation purposes. The "test" key contains a list of image paths, without labels. + + Args: data_list_file_path: the path to the json file of datalist. is_segmentation: whether the datalist is for segmentation task, default is True. From 716f4d8196123a4c01df10e781e23925c724feaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Thu, 14 Aug 2025 17:43:07 +0200 Subject: [PATCH 11/14] change highlighting to python to allow ... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniël Nobbe --- monai/data/decathlon_datalist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index adf0df1aaf..0444e39cc4 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -96,7 +96,7 @@ def load_decathlon_datalist( datalist.json files, see http://medicaldecathlon.com. The files are structured as follows: - .. code-block:: json + .. code-block:: python { "metadata_key_0": "metadata_value_0", From 2d9c3515583b58b22351ae3eb7f6c11fa8af47fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Fri, 15 Aug 2025 17:16:00 +0200 Subject: [PATCH 12/14] modify docstrings based on coderabbit feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniël Nobbe --- monai/apps/auto3dseg/auto_runner.py | 11 ++++++----- monai/data/decathlon_datalist.py | 8 ++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index d15041c59c..1fac92b407 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -194,13 +194,14 @@ class AutoRunner: ├── segresnet2d_0 # network scripts/configs/checkpoints and pickle object of the algo └── swinunetr_0 # network scripts/configs/checkpoints and pickle object of the algo - Notes: + The input config requires at least the following keys: - - ``modality``: the modality of the data, e.g. "ct", "mri", etc. - - ``datalist``: the path to the datalist file in JSON format. - - ``dataroot``: the root directory of the data files. + - ``modality``: the modality of the data, e.g. "ct", "mri", etc. + - ``datalist``: the path to the datalist file in JSON format. + - ``dataroot``: the root directory of the data files. - For the datalist file format, see the description under monai.data.decathlon_datalist.load_decathlon_datalist. + For the datalist file format, see the description under :py:func:`monai.data.decathlon_datalist.load_decathlon_datalist`. + Note that the AutoRunner will use the "validation" key in the datalist file if it exists, otherwise it will do cross-validation, by default with five folds (this is hardcoded). """ analyze_params: dict | None diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index 0444e39cc4..f0352cd10d 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -121,10 +121,10 @@ def load_decathlon_datalist( - and two integer items: ``numTraining`` and ``numTest``, with the number of items. The ``training`` key contains a list of dictionaries, each of which has at least - the ``image`` and ``label`` keys, the latter of which is a path (for segmentation data). + the ``image`` and ``label`` keys. + The image and label are loaded by :py:func:`monai.data.transforms.LoadImaged`, so both can be either a single file path or a list of file paths, in which case they are loaded as multi-channel images. Each item can also include a ``fold`` key for cross-validation purposes. - The "test" key contains a list of image paths, without labels. - + The "test" key contains a list of image paths, without labels, MONAI also supports a "validation" list with the same format as the "training" list. Args: @@ -142,7 +142,7 @@ def load_decathlon_datalist( .. code-block:: python [ - {'image': '/workspace/data/chest_19.nii.gz', 'label': '/workspace/labels/chest_19.nii.gz}, + {'image': '/workspace/data/chest_19.nii.gz', 'label': '/workspace/labels/chest_19.nii.gz'}, {'image': '/workspace/data/chest_31.nii.gz', 'label': '/workspace/labels/chest_31.nii.gz'}, ] From d172e2761b887d4526f3c2cd96b0d4ae926efd09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Nobbe?= Date: Fri, 15 Aug 2025 17:28:27 +0200 Subject: [PATCH 13/14] style fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniël Nobbe --- monai/apps/auto3dseg/auto_runner.py | 5 +++-- monai/data/decathlon_datalist.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index 1fac92b407..78ffeb99a6 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -200,8 +200,9 @@ class AutoRunner: - ``datalist``: the path to the datalist file in JSON format. - ``dataroot``: the root directory of the data files. - For the datalist file format, see the description under :py:func:`monai.data.decathlon_datalist.load_decathlon_datalist`. - Note that the AutoRunner will use the "validation" key in the datalist file if it exists, otherwise it will do cross-validation, by default with five folds (this is hardcoded). + For the datalist file format, see the description under :py:func:`monai.data.load_decathlon_datalist`. + Note that the AutoRunner will use the "validation" key in the datalist file if it exists, otherwise + it will do cross-validation, by default with five folds (this is hardcoded). """ analyze_params: dict | None diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py index f0352cd10d..0f101e2095 100644 --- a/monai/data/decathlon_datalist.py +++ b/monai/data/decathlon_datalist.py @@ -122,9 +122,11 @@ def load_decathlon_datalist( The ``training`` key contains a list of dictionaries, each of which has at least the ``image`` and ``label`` keys. - The image and label are loaded by :py:func:`monai.data.transforms.LoadImaged`, so both can be either a single file path or a list of file paths, in which case they are loaded as multi-channel images. + The image and label are loaded by :py:func:`monai.transforms.LoadImaged`, so both can be either + a single file path or a list of file paths, in which case they are loaded as multi-channel images. Each item can also include a ``fold`` key for cross-validation purposes. - The "test" key contains a list of image paths, without labels, MONAI also supports a "validation" list with the same format as the "training" list. + The "test" key contains a list of image paths, without labels, MONAI also supports a "validation" list + with the same format as the "training" list. Args: From f6b50d8a5a8387f1c3226cdfbc6d362bd2eef776 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Aug 2025 15:29:07 +0000 Subject: [PATCH 14/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/auto3dseg/auto_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index 78ffeb99a6..28ba2a88f9 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -201,7 +201,7 @@ class AutoRunner: - ``dataroot``: the root directory of the data files. For the datalist file format, see the description under :py:func:`monai.data.load_decathlon_datalist`. - Note that the AutoRunner will use the "validation" key in the datalist file if it exists, otherwise + Note that the AutoRunner will use the "validation" key in the datalist file if it exists, otherwise it will do cross-validation, by default with five folds (this is hardcoded). """