Skip to content

Commit c8f1794

Browse files
committed
docs(readme): update quick examples
1 parent bfbe103 commit c8f1794

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

README.md

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,116 @@ model.save('my_model.json')
138138
model = Model.load('my_model.json')
139139
```
140140

141+
### Image Compression
142+
143+
```python
144+
X, y = fetch_openml('Fashion-MNIST', version=1, return_X_y=True, as_frame=False)
145+
X = X.astype('float32') / 255.
146+
147+
X = X.reshape(-1, 28, 28, 1)
148+
149+
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)
150+
151+
autoencoder = Autoencoder(random_state=42, skip_connections=True)
152+
153+
autoencoder.add_encoder_layer(Input((28, 28, 1)))
154+
autoencoder.add_encoder_layer(Conv2D(16, kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same'))
155+
autoencoder.add_encoder_layer(Conv2D(32, kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same'))
156+
157+
autoencoder.add_encoder_layer(Flatten())
158+
autoencoder.add_encoder_layer(Dense(64, activation='relu')) # Bottleneck
159+
160+
autoencoder.add_decoder_layer(Dense(7 * 7 * 32, activation='relu'))
161+
autoencoder.add_decoder_layer(Reshape((7, 7, 32)))
162+
163+
autoencoder.add_decoder_layer(UpSampling2D(size=(2, 2))) # Output: 14x14x32
164+
autoencoder.add_decoder_layer(Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same'))
165+
166+
autoencoder.add_decoder_layer(UpSampling2D(size=(2, 2))) # Output: 28x28x16
167+
autoencoder.add_decoder_layer(Conv2D(1, kernel_size=(3, 3), activation='sigmoid', padding='same')) # Output: 28x28x1
168+
169+
autoencoder.compile(encoder_loss='mse', decoder_loss='mse', encoder_optimizer='adam', decoder_optimizer='adam', verbose=True)
170+
171+
history = autoencoder.fit(X_train, epochs=5, batch_size=256, validation_data=(X_test,), verbose=True,)
172+
```
173+
174+
### Image Generation
175+
176+
```python
177+
# Load the MNIST dataset
178+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
179+
n_classes = np.unique(y_train).shape[0]
180+
181+
# Flatten images
182+
x_train = x_train.reshape(x_train.shape[0], -1)
183+
x_test = x_test.reshape(x_test.shape[0], -1)
184+
185+
# Normalize pixel values
186+
x_train = x_train.astype('float32') / 255
187+
x_test = x_test.astype('float32') / 255
188+
189+
# Labels to categorical
190+
y_train = one_hot_encode(y_train, n_classes)
191+
y_test = one_hot_encode(y_test, n_classes)
192+
193+
noise_dim = 32
194+
195+
generator = Sequential()
196+
generator.add(Input(noise_dim))
197+
generator.add(Dense(128, input_dim = noise_dim, activation='relu'))
198+
generator.add(Dense(784, activation = 'sigmoid'))
199+
200+
discriminator = Sequential()
201+
discriminator.add(Input(784))
202+
discriminator.add(Dense(128, input_dim=784, activation='relu'))
203+
discriminator.add(Dense(1, activation='sigmoid'))
204+
205+
gan = GAN(latent_dim=noise_dim)
206+
207+
gan.compile(generator, discriminator, generator_optimizer='adam', discriminator_optimizer='adam', loss_function='bce', verbose=True)
208+
209+
history = gan.fit(x_train, epochs=40, batch_size=128, plot_generated=True)
210+
```
211+
212+
### Text Generation (example here is for translation)
213+
214+
```python
215+
df = pd.read_csv("dataset.tsv", sep="\t")
216+
df.iloc[:, 1] = df.iloc[:, 1].apply(lambda x: re.sub(r'\\x[a-fA-F0-9]{2}|\\u[a-fA-F0-9]{4}|\xa0|\u202f', ' ', x)) # remove unicode characters
217+
218+
LIMIT = 1000
219+
fr_sentences = df.iloc[:, 1].values.tolist()[0:LIMIT]
220+
en_sentences = df.iloc[:, 3].values.tolist()[0:LIMIT]
221+
222+
fr_tokenizer = Tokenizer(filters="", mode="word") # else the tokenizer would remove the special characters including ponctuation
223+
en_tokenizer = Tokenizer(filters="", mode="word") # else the tokenizer would remove the special characters including ponctuation
224+
225+
fr_tokenizer.fit_on_texts(fr_sentences, preprocess_ponctuation=True)
226+
en_tokenizer.fit_on_texts(en_sentences, preprocess_ponctuation=True)
227+
228+
X = fr_tokenizer.texts_to_sequences(fr_sentences, preprocess_ponctuation=True, add_special_tokens=True)
229+
y = en_tokenizer.texts_to_sequences(en_sentences, preprocess_ponctuation=True, add_special_tokens=True)
230+
231+
max_len_x = max(len(seq) for seq in X)
232+
max_len_y = max(len(seq) for seq in y)
233+
max_seq_len = max(max_len_x, max_len_y)
234+
235+
vocab_size_fr = len(fr_tokenizer.word_index)
236+
vocab_size_en = len(en_tokenizer.word_index)
237+
max_vocab_size = max(vocab_size_fr, vocab_size_en)
238+
239+
X = pad_sequences(X, max_length=max_seq_len, padding='post', pad_value=fr_tokenizer.PAD_IDX)
240+
y = pad_sequences(y, max_length=max_seq_len, padding='post', pad_value=en_tokenizer.PAD_IDX)
241+
242+
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
243+
244+
model = Transformer(src_vocab_size=vocab_size_fr, tgt_vocab_size=vocab_size_en, d_model=512, n_heads=8, n_encoder_layers=8, n_decoder_layers=10, d_ff=2048, dropout_rate=0.1, max_sequence_length=max_seq_len, random_state=42)
245+
246+
model.compile(loss_function="cels", optimizer=Adam(learning_rate=5e-5, beta_1=0.9, beta_2=0.98, epsilon=1e-9, clip_norm=1.0, ), verbose=True)
247+
248+
history = model.fit(x_train, y_train, epochs=50, batch_size=32, verbose=True, callbacks=[EarlyStopping(monitor='loss', patience=20), LearningRateScheduler(schedule="warmup_cosine", initial_learning_rate=5e-5, verbose=True)],validation_data=(x_test, y_test), metrics=['bleu_score'])
249+
```
250+
141251
## 📜 Output of the example file
142252

143253
### Here is the decision boundary on a Binary Classification (breast cancer dataset):

0 commit comments

Comments
 (0)