Skip to content

Commit b4dec2c

Browse files
Add DWHC datasets 41-60 (#105)
* Add DWHC datasets 41-60 * [github-action] formatting fixes Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent ae8ced2 commit b4dec2c

22 files changed

+737
-0
lines changed

docs/datasets.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,27 @@ The Torchhd library provides many popular built-in datasets to work with.
5959
HayesRoth
6060
HeartCleveland
6161
HeartHungarian
62+
HeartSwitzerland
63+
HeartVa
64+
Hepatitis
65+
HillValley
66+
HorseColic
67+
IlpdIndianLiver
68+
ImageSegmentation
69+
Ionosphere
70+
Iris
71+
LedDisplay
72+
Lenses
73+
Letter
74+
Libras
75+
LowResSpect
76+
LungCancer
77+
Lymphography
78+
Magic
79+
Mammographic
80+
Miniboone
81+
MolecBiolPromoter
82+
6283

6384
Base classes
6485
------------------------

torchhd/datasets/__init__.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@
4949
from torchhd.datasets.hayes_roth import HayesRoth
5050
from torchhd.datasets.heart_cleveland import HeartCleveland
5151
from torchhd.datasets.heart_hungarian import HeartHungarian
52+
from torchhd.datasets.heart_switzerland import HeartSwitzerland
53+
from torchhd.datasets.heart_va import HeartVa
54+
from torchhd.datasets.hepatitis import Hepatitis
55+
from torchhd.datasets.hill_valley import HillValley
56+
from torchhd.datasets.horse_colic import HorseColic
57+
from torchhd.datasets.ilpd_indian_liver import IlpdIndianLiver
58+
from torchhd.datasets.image_segmentation import ImageSegmentation
59+
from torchhd.datasets.ionosphere import Ionosphere
60+
from torchhd.datasets.iris import Iris
61+
from torchhd.datasets.led_display import LedDisplay
62+
from torchhd.datasets.lenses import Lenses
63+
from torchhd.datasets.letter import Letter
64+
from torchhd.datasets.libras import Libras
65+
from torchhd.datasets.low_res_spect import LowResSpect
66+
from torchhd.datasets.lung_cancer import LungCancer
67+
from torchhd.datasets.lymphography import Lymphography
68+
from torchhd.datasets.magic import Magic
69+
from torchhd.datasets.mammographic import Mammographic
70+
from torchhd.datasets.miniboone import Miniboone
71+
from torchhd.datasets.molec_biol_promoter import MolecBiolPromoter
5272

5373

5474
__all__ = [
@@ -103,4 +123,24 @@
103123
"HayesRoth",
104124
"HeartCleveland",
105125
"HeartHungarian",
126+
"HeartSwitzerland",
127+
"HeartVa",
128+
"Hepatitis",
129+
"HillValley",
130+
"HorseColic",
131+
"IlpdIndianLiver",
132+
"ImageSegmentation",
133+
"Ionosphere",
134+
"Iris",
135+
"LedDisplay",
136+
"Lenses",
137+
"Letter",
138+
"Libras",
139+
"LowResSpect",
140+
"LungCancer",
141+
"Lymphography",
142+
"Magic",
143+
"Mammographic",
144+
"Miniboone",
145+
"MolecBiolPromoter",
106146
]

torchhd/datasets/heart_switzerland.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class HeartSwitzerland(DatasetFourFold):
6+
"""`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "heart-switzerland"
28+
classes: List[str] = [
29+
"0",
30+
"1",
31+
"2",
32+
"3",
33+
"4",
34+
]

torchhd/datasets/heart_va.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class HeartVa(DatasetFourFold):
6+
"""`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "heart-va"
28+
classes: List[str] = [
29+
"0",
30+
"1",
31+
"2",
32+
"3",
33+
"4",
34+
]

torchhd/datasets/hepatitis.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Hepatitis(DatasetFourFold):
6+
"""`Hepatitis <https://archive.ics.uci.edu/ml/datasets/hepatitis>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "hepatitis"
28+
classes: List[str] = [
29+
"die",
30+
"live",
31+
]

torchhd/datasets/hill_valley.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class HillValley(DatasetTrainTest):
6+
"""`Hill-Valley <https://archive.ics.uci.edu/ml/datasets/hill-valley>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "hill-valley"
24+
classes: List[str] = [
25+
"valley",
26+
"hill",
27+
]

torchhd/datasets/horse_colic.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class HorseColic(DatasetTrainTest):
6+
"""`Horse Colic <https://archive.ics.uci.edu/ml/datasets/Horse+Colic>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "horse-colic"
24+
classes: List[str] = [
25+
"Yes, it had surgery",
26+
"It was treated without surgery",
27+
]

torchhd/datasets/ilpd_indian_liver.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class IlpdIndianLiver(DatasetFourFold):
6+
"""`ILPD (Indian Liver Patient Dataset) <https://archive.ics.uci.edu/ml/datasets/ILPD+(Indian+Liver+Patient+Dataset)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "ilpd-indian-liver"
28+
classes: List[str] = [
29+
"liver patient",
30+
"not liver patient",
31+
]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class ImageSegmentation(DatasetTrainTest):
6+
"""`Image Segmentation <https://archive.ics.uci.edu/ml/datasets/image+segmentation>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "image-segmentation"
24+
classes: List[str] = [
25+
"brickface",
26+
"sky",
27+
"foliage",
28+
"cement",
29+
"window",
30+
"path",
31+
"grass",
32+
]

torchhd/datasets/ionosphere.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Ionosphere(DatasetFourFold):
6+
"""`Ionosphere <https://archive.ics.uci.edu/ml/datasets/ionosphere>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "ionosphere"
28+
classes: List[str] = [
29+
"good",
30+
"bad",
31+
]

0 commit comments

Comments
 (0)