-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
66 lines (54 loc) · 2.54 KB
/
main.py
File metadata and controls
66 lines (54 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.backend import clear_session
from sklearn.model_selection import KFold
import numpy as np
from model import create_model, create_encoder
from data_loading import load_data, clean_sentence
from plot_results import HistoriesStorage, plot_model_histories
from hyperparameters import *
tweets_data = load_data('Data_tweets.csv')
tweets_data = tweets_data[[1, 6]]
tweets_data.columns = ['Class', 'Tweet']
# One-hot-encoding classes
def substitute_classes(x):
"""Maps classes (0,2,4) to indexes (0,1,2)"""
sub_dict = {0: 0, 2: 1, 4: 2}
return sub_dict[x]
# Clean tweets
inputs = np.array(tweets_data['Tweet'].apply(lambda x: clean_sentence(x)))
# One-hot-encode classes
targets = tweets_data["Class"].apply(lambda x: substitute_classes(x))
targets = to_categorical(targets, num_classes=3)
# Fit encoder on input data - create vocabulary of NUM_WORDS most frequent words
encoder = create_encoder(inputs)
test_accuracies = []
histories_storage = HistoriesStorage()
kfold = KFold(n_splits=NUM_FOLDS, shuffle=True)
fold_no = 1
# Perform a model cross-validation for obtaining more reliable results of the model
for train_indexes, test_indexes in kfold.split(inputs, targets):
clear_session() # restart Keras global status
train_inputs, train_targets, test_inputs, test_targets = inputs[train_indexes], targets[train_indexes], \
inputs[test_indexes], targets[test_indexes]
model = create_model(encoder)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('------------------------------------------------------------------------')
print(f'Training for fold {fold_no} ...')
history = model.fit(train_inputs, train_targets,
batch_size=BATCH_SIZE,
epochs=NUMBER_OF_EPOCHS)
histories_storage.store_history(history)
train_loss, train_acc = model.evaluate(train_inputs, train_targets, verbose=False)
print(f'Train Loss: {train_loss}')
print(f'Train Accuracy: {train_acc}')
test_loss, test_acc = model.evaluate(test_inputs, test_targets, verbose=False)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_acc}')
test_accuracies.append(test_acc)
fold_no += 1
print(f'Best Test acc: {max(test_accuracies)}')
print(f'Worst Test acc: {min(test_accuracies)}')
print(f'Avg Test acc: {np.mean(test_accuracies)}')
plot_model_histories(histories_storage, 'accuracy')