-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataset_Classifier_Tool.py
More file actions
133 lines (103 loc) · 5.55 KB
/
Dataset_Classifier_Tool.py
File metadata and controls
133 lines (103 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 8 22:26:58 2022
@author: Antonio Vispi
"""
import argparse
import os
import numpy as np
import skimage.io as img
def extrapolate_parameters(input_percentages,input_classes):
c = '%'
support = [pos for pos, char in enumerate(input_percentages) if char == c]
percentages = np.zeros(len(support)-1)
for i in range (0,len(support)-1):
percentages[i] = (input_percentages[(support[i]+1):(support[i+1])])
support = [pos for pos, char in enumerate(input_classes) if char == c]
classes = []
for i in range (0,len(support)-1):
classes.append(input_classes[(support[i]+1):(support[i+1])])
if len(classes)!=len(percentages):
print('Error! Double-check that the percentage vector and the class vector are in the recommended format.')
classes = []
percentages = []
return percentages , classes
def counter(directory):
k=0
for file in os.listdir(directory):
filename = os.fsdecode(file)
k=k+1
return k
print('Image splitting is 70% for the training set, 20% for the test set, and 10% for the validation set.')
print('\n')
def make_dataset(path_in,path_out,percentages_in,classes_in,classes_out):
percentages , classes = extrapolate_parameters(percentages_in,classes_in)
percentages , classes_output = extrapolate_parameters(percentages_in,classes_out)
if len(classes_output) != len(classes):
print('Mistake! Double check that the names of the input and output classes are in the right order and in equal number')
os.makedirs(path_out, exist_ok = True)
os.makedirs(path_out+'/test_set/test_set', exist_ok = True)
os.makedirs(path_out+'/training_set/training_set', exist_ok = True)
os.makedirs(path_out+'/val_set/val_set', exist_ok = True)
for i in range(len(classes)):
os.makedirs(path_out+'/test_set/test_set/'+classes_output[i], exist_ok = True)
os.makedirs(path_out+'/training_set/training_set/'+classes_output[i], exist_ok = True)
os.makedirs(path_out+'/val_set/val_set/'+classes_output[i], exist_ok = True)
# Specify the file name
file = 'classes.txt'
# Creating a file at specified location
with open(os.path.join(path_out, file), 'w') as fp:
pass
# To write data to new file uncomment
string=''
for i in range(0,len(classes)):string = string+classes_output[i]+'\n'
fp.write(string)
# After creating
print("File .txt just created:")
print('\n')
print(string)
for i in range (len(classes)):
Counter_test = 0 # Count of total training images saved by class
Counter_train = 0 # Count of total training images saved by class
Counter_val = 0 # Count of total training images saved by class
directory = os.fsencode(path_in+'/'+classes[i])
images_num = counter(directory)
quotaparte_20 = round(((images_num*percentages[i])/100)*20);
quotaparte_90 = round((images_num*percentages[i]/100)*90);
randomized_indices = np.random.permutation(images_num)
all_images = os.listdir(path_in+'/'+classes[i])
# TEST SET
for m in range(0,quotaparte_20 +1):
name = all_images[randomized_indices[m]]
image = img.imread(path_in+'/'+classes[i]+'/'+name)
img.imsave(path_out+'/test_set/test_set/'+classes_output[i]+'/'+name,image)
Counter_test = Counter_test + 1
print('A total of '+str(Counter_test)+' images of the '+classes_output[i]+' class were saved in the test set...')
# TRAINING SET
for m in range(quotaparte_20 +1,quotaparte_90 +1):
name = all_images[randomized_indices[m]]
image = img.imread(path_in+'/'+classes[i]+'/'+name)
img.imsave(path_out+'/training_set/training_set/'+classes_output[i]+'/'+name,image)
Counter_train = Counter_train + 1
print('A total of '+str(Counter_train)+' images of the '+classes_output[i]+' class were saved in the training set...')
# VALIDATION SET
for m in range(quotaparte_90 + 1, round(images_num*percentages[i])):
name = all_images[randomized_indices[m]]
image = img.imread(path_in+'/'+classes[i]+'/'+name)
img.imsave(path_out+'/val_set/val_set/'+classes_output[i]+'/'+name,image)
Counter_val = Counter_val + 1
print('A total of '+str(Counter_val)+' images of the '+classes_output[i]+' class were saved in the validation set...')
print('A total of '+str(Counter_test+Counter_train+Counter_val)+' images of the '+classes_output[i]+' class were saved.')
print('\n')
print('The dataset was successfully defined or updated.')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--path_in', help='input path of the directory containing all classes')
parser.add_argument('--path_out', help='output path for saving the entire dataset')
parser.add_argument('--percentages_in', help='Ordered percentages of each single class [0,1], where 1 means 100%, of the images to be taken from the input dataset.')
parser.add_argument('--classes_in', help='Ordered names of all classes belonging to path_in')
parser.add_argument('--classes_out', help='Ordered names of all classes belonging to the output destination path (path_out).')
args = parser.parse_args()
make_dataset(args.path_in,args.path_out,args.percentages_in,args.classes_in,args.classes_out)
if __name__ == '__main__':
main()