1
+ """
2
+ Title: Timeseries Classification with LSTM and CNN Models
3
+ Author: [Georgios Sklavounakos] (https://github.com/gsklavounakos)
4
+ Date created: 2025/06/05
5
+ Last modified: 2025/06/05
6
+ Description: Comparing LSTM and 1D CNN models for timeseries classification on the FordA dataset from the UCR/UEA archive.
7
+ Accelerator: GPU
8
+ """
9
+
10
+ """
11
+ ## Introduction
12
+
13
+ This example demonstrates how to perform timeseries classification using two deep learning models:
14
+ a Long Short-Term Memory (LSTM) model, which processes sequences using recurrent layers, and a 1D Convolutional
15
+ Neural Network (CNN) model, which uses convolutions to detect local temporal patterns. We use the FordA dataset
16
+ from the [UCR/UEA archive](https://www.cs.ucr.edu/~eamonn/time_series_data_2018/), which contains univariate
17
+ timeseries of engine noise measurements for binary classification (fault detection). This example compares the
18
+ performance of both models under identical training conditions.
19
+
20
+ The workflow includes:
21
+ - Loading and preprocessing the FordA dataset
22
+ - Building and training LSTM and CNN models
23
+ - Evaluating and comparing their performance
24
+ - Visualizing training metrics
25
+ """
26
+
27
+ """
28
+ ## Setup
29
+ """
30
+
31
+ import numpy as np
32
+ import keras
33
+ from keras import layers
34
+ import matplotlib .pyplot as plt
35
+
36
+ """
37
+ ## Load the Data: FordA Dataset
38
+
39
+ ### Dataset Description
40
+
41
+ The FordA dataset contains 3601 training instances and 1320 testing instances, each a univariate timeseries of
42
+ 500 timesteps representing engine noise measurements. The task is to classify whether a fault is present (label 1)
43
+ or not (label -1). The data is z-normalized (mean=0, std=1) and sourced from the UCR/UEA archive. For details, see
44
+ [the dataset description](http://www.j-wichard.de/publications/FordPaper.pdf).
45
+ """
46
+
47
+ def readucr (filename ):
48
+ """Read UCR timeseries dataset from a TSV file."""
49
+ data = np .loadtxt (filename , delimiter = "\t " )
50
+ y = data [:, 0 ].astype (int )
51
+ x = data [:, 1 :].astype (np .float32 )
52
+ return x , y
53
+
54
+ root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
55
+
56
+ x_train , y_train = readucr (root_url + "FordA_TRAIN.tsv" )
57
+ x_test , y_test = readucr (root_url + "FordA_TEST.tsv" )
58
+
59
+ """
60
+ ## Visualize the Data
61
+
62
+ We plot one example timeseries from each class to understand the data's characteristics.
63
+ """
64
+
65
+ classes = np .unique (np .concatenate ((y_train , y_test ), axis = 0 ))
66
+
67
+ plt .figure (figsize = (8 , 4 ))
68
+ for c in classes :
69
+ c_x_train = x_train [y_train == c ]
70
+ plt .plot (c_x_train [0 ], label = f"class { c } " )
71
+ plt .title ("Sample Time Series from Each Class" )
72
+ plt .xlabel ("Timestep" )
73
+ plt .ylabel ("Amplitude" )
74
+ plt .legend (loc = "best" )
75
+ plt .show ()
76
+ plt .close ()
77
+
78
+ """
79
+ ## Preprocess the Data
80
+
81
+ The timeseries are already z-normalized and of fixed length (500 timesteps). We reshape the data to
82
+ `(samples, timesteps, 1)` to represent univariate timeseries as multivariate with one channel, enabling
83
+ compatibility with both LSTM and CNN models. We also standardize labels to 0 and 1 for binary classification.
84
+ """
85
+
86
+ x_train = x_train .reshape ((x_train .shape [0 ], x_train .shape [1 ], 1 ))
87
+ x_test = x_test .reshape ((x_test .shape [0 ], x_test .shape [1 ], 1 ))
88
+
89
+ # Standardize labels: map {-1, 1} to {0, 1}
90
+ y_train = np .where (y_train == - 1 , 0 , y_train )
91
+ y_test = np .where (y_test == - 1 , 0 , y_test )
92
+
93
+ # Shuffle training data for validation split
94
+ idx = np .random .permutation (len (x_train ))
95
+ x_train = x_train [idx ]
96
+ y_train = y_train [idx ]
97
+
98
+ num_classes = len (np .unique (y_train ))
99
+
100
+ """
101
+ ## Build the Models
102
+
103
+ We define two models:
104
+ 1. **LSTM Model**: A recurrent model with two LSTM layers for sequential processing, with dropout for regularization.
105
+ 2. **CNN Model**: A convolutional model with 1D convolutions, batch normalization, max-pooling, and global average pooling,
106
+ inspired by [this paper](https://arxiv.org/abs/1611.06455).
107
+
108
+ Both models use similar hyperparameters for a fair comparison.
109
+ """
110
+
111
+ def build_lstm_model (input_shape , num_classes ):
112
+ """Build an LSTM-based model for timeseries classification."""
113
+ model = keras .Sequential ([
114
+ layers .Input (shape = input_shape ),
115
+ layers .LSTM (64 , return_sequences = True , dropout = 0.3 , recurrent_dropout = 0.3 ),
116
+ layers .LSTM (32 , dropout = 0.3 , recurrent_dropout = 0.3 ),
117
+ layers .Dense (16 , activation = "relu" ),
118
+ layers .Dense (num_classes , activation = "softmax" )
119
+ ])
120
+ return model
121
+
122
+ def build_cnn_model (input_shape , num_classes ):
123
+ """Build a 1D CNN-based model for timeseries classification."""
124
+ model = keras .Sequential ([
125
+ layers .Input (shape = input_shape ),
126
+ layers .Conv1D (filters = 64 , kernel_size = 7 , padding = "same" , activation = "relu" ),
127
+ layers .BatchNormalization (),
128
+ layers .MaxPooling1D (pool_size = 2 ),
129
+ layers .Dropout (0.3 ),
130
+ layers .Conv1D (filters = 128 , kernel_size = 5 , padding = "same" , activation = "relu" ),
131
+ layers .BatchNormalization (),
132
+ layers .MaxPooling1D (pool_size = 2 ),
133
+ layers .Dropout (0.3 ),
134
+ layers .GlobalAveragePooling1D (),
135
+ layers .Dense (16 , activation = "relu" ),
136
+ layers .Dense (num_classes , activation = "softmax" )
137
+ ])
138
+ return model
139
+
140
+ input_shape = x_train .shape [1 :]
141
+ lstm_model = build_lstm_model (input_shape , num_classes )
142
+ cnn_model = build_cnn_model (input_shape , num_classes )
143
+
144
+ """
145
+ ## Train the Models
146
+
147
+ We train both models with identical settings: Adam optimizer, sparse categorical crossentropy loss,
148
+ and early stopping to prevent overfitting. We save the best model weights based on validation loss
149
+ and reload them for evaluation.
150
+ """
151
+
152
+ epochs = 100
153
+ batch_size = 32
154
+
155
+ def train_model (model , model_name , x_train , y_train , x_test , y_test ):
156
+ """Train and evaluate a model, return history and test metrics."""
157
+ model .compile (
158
+ optimizer = "adam" ,
159
+ loss = "sparse_categorical_crossentropy" ,
160
+ metrics = ["sparse_categorical_accuracy" ]
161
+ )
162
+
163
+ history = model .fit (
164
+ x_train ,
165
+ y_train ,
166
+ epochs = epochs ,
167
+ batch_size = batch_size ,
168
+ validation_split = 0.2 ,
169
+ callbacks = [
170
+ keras .callbacks .ModelCheckpoint (
171
+ f"best_{ model_name } _model.keras" ,
172
+ save_best_only = True ,
173
+ monitor = "val_loss"
174
+ ),
175
+ keras .callbacks .EarlyStopping (
176
+ monitor = "val_loss" ,
177
+ patience = 20 ,
178
+ restore_best_weights = True
179
+ )
180
+ ],
181
+ verbose = 1
182
+ )
183
+
184
+ # Load best model for evaluation
185
+ best_model = keras .models .load_model (f"best_{ model_name } _model.keras" )
186
+ test_loss , test_acc = best_model .evaluate (x_test , y_test , verbose = 0 )
187
+ print (f"{ model_name } Test Accuracy: { test_acc :.4f} , Loss: { test_loss :.4f} " )
188
+
189
+ return history , test_acc , test_loss
190
+
191
+ # Train LSTM model
192
+ print ("Training LSTM model..." )
193
+ lstm_history , lstm_test_acc , lstm_test_loss = train_model (lstm_model , "LSTM" , x_train , y_train , x_test , y_test )
194
+
195
+ # Train CNN model
196
+ print ("Training CNN model..." )
197
+ cnn_history , cnn_test_acc , cnn_test_loss = train_model (cnn_model , "CNN" , x_train , y_train , x_test , y_test )
198
+
199
+ """
200
+ ## Visualize Training Metrics
201
+
202
+ We plot the training and validation accuracy and loss for both models to compare their performance.
203
+ """
204
+
205
+ def plot_training_metrics (histories , model_names ):
206
+ """Plot training and validation accuracy/loss for multiple models."""
207
+ plt .figure (figsize = (12 , 4 ))
208
+
209
+ # Accuracy plot
210
+ plt .subplot (1 , 2 , 1 )
211
+ for history , name in zip (histories , model_names ):
212
+ plt .plot (history .history ["sparse_categorical_accuracy" ], label = f"{ name } Train" )
213
+ plt .plot (history .history ["val_sparse_categorical_accuracy" ], linestyle = "--" , label = f"{ name } Val" )
214
+ plt .title ("Model Accuracy" )
215
+ plt .xlabel ("Epoch" )
216
+ plt .ylabel ("Accuracy" )
217
+ plt .legend ()
218
+
219
+ # Loss plot
220
+ plt .subplot (1 , 2 , 2 )
221
+ for history , name in zip (histories , model_names ):
222
+ plt .plot (history .history ["loss" ], label = f"{ name } Train" )
223
+ plt .plot (history .history ["val_loss" ], linestyle = "--" , label = f"{ name } Val" )
224
+ plt .title ("Model Loss" )
225
+ plt .xlabel ("Epoch" )
226
+ plt .ylabel ("Loss" )
227
+ plt .legend ()
228
+
229
+ plt .tight_layout ()
230
+ plt .show ()
231
+
232
+ plot_training_metrics ([lstm_history , cnn_history ], ["LSTM" , "CNN" ])
233
+
234
+ """
235
+ ## Evaluate and Compare Models
236
+
237
+ We compare the test accuracy and loss of both models to assess their performance.
238
+ """
239
+
240
+ print ("\n Model Comparison:" )
241
+ print (f"LSTM Test Accuracy: { lstm_test_acc :.4f} , Loss: { lstm_test_loss :.4f} " )
242
+ print (f"CNN Test Accuracy: { cnn_test_acc :.4f} , Loss: { cnn_test_loss :.4f} " )
243
+
244
+ """
245
+ ## Conclusions
246
+
247
+ This example compared an LSTM-based model and a 1D CNN-based model for timeseries classification
248
+ on the FordA dataset. The LSTM model leverages sequential dependencies, while the CNN model captures
249
+ local temporal patterns through convolutions. The CNN often converges faster due to its robust architecture
250
+ with batch normalization and global pooling, while the LSTM may better handle long-term dependencies.
251
+
252
+ To improve performance, consider:
253
+ - Tuning hyperparameters (e.g., number of layers, units, or kernel sizes) using Keras Tuner.
254
+ - Adding further regularization (e.g., L2 regularization) to prevent overfitting.
255
+ - Experimenting with hybrid architectures combining LSTM and CNN layers.
256
+ - Using data augmentation techniques for timeseries, such as jittering or scaling.
257
+ """
0 commit comments