Skip to content

Commit b1ceb26

Browse files
DWHC datasets 91 121 (#107)
* Add last batch of DWHC datasets * [github-action] formatting fixes Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 981263b commit b1ceb26

33 files changed

+1110
-0
lines changed

docs/datasets.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,37 @@ The Torchhd library provides many popular built-in datasets to work with.
109109
PostOperative
110110
PrimaryTumor
111111
Ringnorm
112+
Seeds
113+
Semeion
114+
Soybean
115+
Spambase
116+
Spect
117+
Spectf
118+
StatlogAustralianCredit
119+
StatlogGermanCredit
120+
StatlogHeart
121+
StatlogImage
122+
StatlogLandsat
123+
StatlogShuttle
124+
StatlogVehicle
125+
SteelPlates
126+
SyntheticControl
127+
Teaching
128+
Thyroid
129+
TicTacToe
130+
Titanic
131+
Trains
132+
Twonorm
133+
VertebralColumn2Clases
134+
VertebralColumn3Clases
135+
WallFollowing
136+
Waveform
137+
WaveformNoise
138+
Wine
139+
WineQualityRed
140+
WineQualityWhite
141+
Yeast
142+
Zoo
112143

113144
Base classes
114145
------------------------

torchhd/datasets/__init__.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,37 @@
9999
from torchhd.datasets.post_operative import PostOperative
100100
from torchhd.datasets.primary_tumor import PrimaryTumor
101101
from torchhd.datasets.ringnorm import Ringnorm
102+
from torchhd.datasets.seeds import Seeds
103+
from torchhd.datasets.semeion import Semeion
104+
from torchhd.datasets.soybean import Soybean
105+
from torchhd.datasets.spambase import Spambase
106+
from torchhd.datasets.spect import Spect
107+
from torchhd.datasets.spectf import Spectf
108+
from torchhd.datasets.statlog_australian_credit import StatlogAustralianCredit
109+
from torchhd.datasets.statlog_german_credit import StatlogGermanCredit
110+
from torchhd.datasets.statlog_heart import StatlogHeart
111+
from torchhd.datasets.statlog_image import StatlogImage
112+
from torchhd.datasets.statlog_landsat import StatlogLandsat
113+
from torchhd.datasets.statlog_shuttle import StatlogShuttle
114+
from torchhd.datasets.statlog_vehicle import StatlogVehicle
115+
from torchhd.datasets.steel_plates import SteelPlates
116+
from torchhd.datasets.synthetic_control import SyntheticControl
117+
from torchhd.datasets.teaching import Teaching
118+
from torchhd.datasets.thyroid import Thyroid
119+
from torchhd.datasets.tic_tac_toe import TicTacToe
120+
from torchhd.datasets.titanic import Titanic
121+
from torchhd.datasets.trains import Trains
122+
from torchhd.datasets.twonorm import Twonorm
123+
from torchhd.datasets.vertebral_column_2clases import VertebralColumn2Clases
124+
from torchhd.datasets.vertebral_column_3clases import VertebralColumn3Clases
125+
from torchhd.datasets.wall_following import WallFollowing
126+
from torchhd.datasets.waveform import Waveform
127+
from torchhd.datasets.waveform_noise import WaveformNoise
128+
from torchhd.datasets.wine import Wine
129+
from torchhd.datasets.wine_quality_red import WineQualityRed
130+
from torchhd.datasets.wine_quality_white import WineQualityWhite
131+
from torchhd.datasets.yeast import Yeast
132+
from torchhd.datasets.zoo import Zoo
102133

103134
__all__ = [
104135
"BeijingAirQuality",
@@ -202,4 +233,35 @@
202233
"PostOperative",
203234
"PrimaryTumor",
204235
"Ringnorm",
236+
"Seeds",
237+
"Semeion",
238+
"Soybean",
239+
"Spambase",
240+
"Spect",
241+
"Spectf",
242+
"StatlogAustralianCredit",
243+
"StatlogGermanCredit",
244+
"StatlogHeart",
245+
"StatlogImage",
246+
"StatlogLandsat",
247+
"StatlogShuttle",
248+
"StatlogVehicle",
249+
"SteelPlates",
250+
"SyntheticControl",
251+
"Teaching",
252+
"Thyroid",
253+
"TicTacToe",
254+
"Titanic",
255+
"Trains",
256+
"Twonorm",
257+
"VertebralColumn2Clases",
258+
"VertebralColumn3Clases",
259+
"WallFollowing",
260+
"Waveform",
261+
"WaveformNoise",
262+
"Wine",
263+
"WineQualityRed",
264+
"WineQualityWhite",
265+
"Yeast",
266+
"Zoo",
205267
]

torchhd/datasets/seeds.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Seeds(DatasetFourFold):
6+
"""`Seeds <https://archive.ics.uci.edu/ml/datasets/seeds>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "seeds"
28+
classes: List[str] = [
29+
"Kama",
30+
"Rosa",
31+
"Canadian",
32+
]

torchhd/datasets/semeion.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Semeion(DatasetFourFold):
6+
"""`Semeion Handwritten Digit <https://archive.ics.uci.edu/ml/datasets/semeion+handwritten+digit>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "semeion"
28+
classes: List[str] = [
29+
"0",
30+
"1",
31+
"2",
32+
"3",
33+
"4",
34+
"5",
35+
"6",
36+
"7",
37+
"8",
38+
"9",
39+
]

torchhd/datasets/soybean.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Soybean(DatasetTrainTest):
6+
"""`Soybean (Large) <https://archive.ics.uci.edu/ml/datasets/Soybean+(Large)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "soybean"
24+
classes: List[str] = [
25+
"diaporthe-stem-canker",
26+
"charcoal-rot",
27+
"rhizoctonia-root-rot",
28+
"phytophthora-rot",
29+
"brown-stem-rot",
30+
"powdery-mildew",
31+
"downy-mildew",
32+
"brown-spot",
33+
"bacterial-blight",
34+
"bacterial-pustule",
35+
"purple-seed-stain",
36+
"anthracnose",
37+
"phyllosticta-leaf-spot",
38+
"alternarialeaf-spot",
39+
"frog-eye-leaf-spot",
40+
"diaporthe-pod-&-stem-blight",
41+
"cyst-nematode",
42+
"herbicide-injury",
43+
]

torchhd/datasets/spambase.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Spambase(DatasetFourFold):
6+
"""`Spambase <https://archive.ics.uci.edu/ml/datasets/spambase>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "spambase"
28+
classes: List[str] = [
29+
"non-spam",
30+
"spam",
31+
]

torchhd/datasets/spect.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Spect(DatasetTrainTest):
6+
"""`SPECT Heart Data <https://archive.ics.uci.edu/ml/datasets/spect+heart>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "spect"
24+
classes: List[str] = [
25+
"normal",
26+
"abnormal",
27+
]

torchhd/datasets/spectf.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Spectf(DatasetTrainTest):
6+
"""`SPECTF Heart Data <https://archive.ics.uci.edu/ml/datasets/SPECTF+Heart>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "spectf"
24+
classes: List[str] = [
25+
"normal",
26+
"abnormal",
27+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class StatlogAustralianCredit(DatasetFourFold):
6+
"""`Statlog (Australian Credit Approval) <https://archive.ics.uci.edu/ml/datasets/statlog+(australian+credit+approval)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "statlog-australian-credit"
28+
classes: List[str] = [
29+
"+",
30+
"-",
31+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class StatlogGermanCredit(DatasetFourFold):
6+
"""`Statlog (German Credit Data) <https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "statlog-german-credit"
28+
classes: List[str] = [
29+
"Good",
30+
"Bad",
31+
]

0 commit comments

Comments
 (0)