Skip to content

Commit 90e339a

Browse files
committed
feat: add Transformer model and layer architecture (wip)
1 parent 75c31f7 commit 90e339a

File tree

8 files changed

+1671
-64
lines changed

8 files changed

+1671
-64
lines changed

examples/classification-regression/sentiment_analysis.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
},
6969
{
7070
"cell_type": "code",
71-
"execution_count": 3,
71+
"execution_count": null,
7272
"metadata": {
7373
"ExecuteTime": {
7474
"end_time": "2024-11-14T19:16:46.274852700Z",
@@ -240,7 +240,7 @@
240240
},
241241
{
242242
"data": {
243-
"text/plain": ""
243+
"text/plain": []
244244
},
245245
"execution_count": 6,
246246
"metadata": {},

examples/generation/transformer-text-generation/transformer-for-translation.ipynb

Lines changed: 390 additions & 0 deletions
Large diffs are not rendered by default.

neuralnetlib/layers.py

Lines changed: 761 additions & 26 deletions
Large diffs are not rendered by default.

neuralnetlib/losses.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,21 @@ def from_config(config: dict) -> 'LossFunction':
3333
@staticmethod
3434
def from_name(name: str) -> "LossFunction":
3535
name = name.lower().replace("_", "")
36-
if name == "mse":
36+
if name == "mse" or name == "meansquarederror":
3737
return MeanSquaredError()
38-
elif name == "bce":
38+
elif name == "bce" or name == "binarycrossentropy":
3939
return BinaryCrossentropy()
40-
elif name == "cce":
40+
elif name == "cce" or name == "categorycrossentropy":
4141
return CategoricalCrossentropy()
42-
elif name == "scce":
42+
elif name == "scce" or name == "sparsecategoricalcrossentropy":
4343
return SparseCategoricalCrossentropy()
44-
elif name == "mae":
44+
elif name == "mae" or name == "meanabsoluteerror":
4545
return MeanAbsoluteError()
46-
elif name == "kld":
46+
elif name == "kld" or name == "kullbackleiblerdivergence":
4747
return KullbackLeiblerDivergence()
48-
elif name.startswith("huber"):
48+
elif name == "sequencecrossentropy" or name == "sce":
49+
return SequenceCrossEntropy()
50+
elif name.startswith("huber") and len(name.split("_")) == 2:
4951
delta = float(name.split("_")[-1])
5052
return HuberLoss(delta)
5153
else:
@@ -162,4 +164,36 @@ def derivative(self, mu: np.ndarray, log_var: np.ndarray) -> tuple:
162164
return d_mu, d_log_var
163165

164166
def __str__(self):
165-
return "KullbackLeiblerDivergence"
167+
return "KullbackLeiblerDivergence"
168+
169+
170+
class SequenceCrossEntropy(LossFunction):
171+
def __init__(self):
172+
super().__init__()
173+
174+
def __call__(self, y_true, y_pred):
175+
batch_size, seq_len = y_true.shape
176+
y_pred = np.clip(y_pred, 1e-10, 1.0)
177+
178+
y_true_one_hot = np.zeros_like(y_pred)
179+
for i in range(batch_size):
180+
for j in range(seq_len):
181+
if y_true[i,j] < y_pred.shape[-1]:
182+
y_true_one_hot[i,j,y_true[i,j]] = 1
183+
184+
loss = -np.sum(y_true_one_hot * np.log(y_pred))
185+
loss = loss / (batch_size * seq_len)
186+
return loss
187+
188+
def derivative(self, y_true, y_pred):
189+
batch_size, seq_len = y_true.shape
190+
y_pred = np.clip(y_pred, 1e-10, 1.0)
191+
192+
grad = np.zeros_like(y_pred)
193+
for i in range(batch_size):
194+
for j in range(seq_len):
195+
if y_true[i,j] < y_pred.shape[-1]:
196+
grad[i,j,y_true[i,j]] = -1.0 / (y_pred[i,j,y_true[i,j]])
197+
198+
grad = grad / (batch_size * seq_len)
199+
return grad

neuralnetlib/metrics.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def __init__(self, name: str):
2323
def _get_function_by_name(self, name: str):
2424
if name in ['accuracy', 'accuracy_score', 'accuracy-score', 'acc']:
2525
return accuracy_score
26+
elif name in ['sparse_categorical_accuracy', 'sparse-categorical-accuracy', 'sparse_acc']:
27+
return sparse_categorical_accuracy_score
2628
elif name in ['f1', 'f1_score', 'f1-score']:
2729
return f1_score
2830
elif name in ['recall', 'recall_score', 'recall-score', 'sensitivity', 'rec']:
@@ -64,6 +66,24 @@ def accuracy_score(y_pred: np.ndarray, y_true: np.ndarray, threshold: float = 0.
6466
return np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_true, axis=1))
6567

6668

69+
def sparse_categorical_accuracy_score(y_pred: np.ndarray, y_true: np.ndarray, threshold: float = 0.5) -> float:
70+
y_pred = np.asarray(y_pred)
71+
y_true = np.asarray(y_true)
72+
73+
if y_pred.ndim == 1:
74+
y_pred = y_pred.reshape(-1, 1)
75+
76+
if y_true.ndim > 1:
77+
if y_true.shape[1] == 1:
78+
y_true = y_true.ravel()
79+
else:
80+
raise ValueError("y_true should be a 1D array of shape (n_samples,) containing integer class indices")
81+
82+
predicted_classes = np.argmax(y_pred, axis=1)
83+
84+
return np.mean(predicted_classes == y_true)
85+
86+
6787
def precision_score(y_pred: np.ndarray, y_true: np.ndarray, threshold: float = 0.5) -> float:
6888
y_pred, y_true = _reshape_inputs(y_pred, y_true)
6989
if y_pred.shape[1] == 1:

0 commit comments

Comments
 (0)