Skip to content
This repository was archived by the owner on Apr 12, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 182 additions & 1 deletion iids/classifier/mlclassifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,189 @@ def __init__(self, data, features_list="all", criterion="gini", splitter="best",
self.data = data[features_list]


def train(self):

model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is for saving the models

try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True


class RandomForest():

def __init__(self,criterion="gini", min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, n_jobs=-1, verbose=1, ccp_alpha=0.0, max_samples=None, n_estimators=100, max_depth=2,random_state=0,class_weight='balanced'):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]

def train(self):

model.fit(X_train,y_train) #model will be declared as global variable in views.py
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True



class KNeighbors():
def __init__(self,algorithm='auto', leaf_size=30, metric='minkowski',metric_params=None, n_jobs=None, n_neighbors=3, p=2, weights='uniform'):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]

def train(self):

model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True



class GaussianProcess():
def __init__(self,copy_X_train=True, kernel=1**2 * RBF(length_scale=1),max_iter_predict=100, multi_class='one_vs_rest',n_jobs=None, n_restarts_optimizer=0,optimizer='fmin_l_bfgs_b', random_state=None,warm_start=False):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]

def train(self):


def train(self):

model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True


class AdaBoost():
def __init__(self,algorithm='SAMME.R', base_estimator=None, learning_rate=1.0, n_estimators=50, random_state=None):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]


def train(self):

model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True



class KNN():
def __init__(self,algorithm='auto', leaf_size=30, metric='minkowski',metric_params=None, n_jobs=None, n_neighbors=3, p=2, weights='uniform'):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]

def train(self):

model.fit(X_train,y_train)
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True

class MLP():
def __init__(self,activation='relu', alpha=1, batch_size='auto', beta_1=0.9,beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(100,), learning_rate='constant', learning_rate_init=0.001, max_fun=15000, max_iter=1000,momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='adam',tol=0.0001, validation_fraction=0.1, verbose=False,warm_start=False):
self.features_list = features_list
if self.features_list == "all":
self.data = data
else:
self.data = data[features_list]

#fit, predict, accuracy and save model

def train(self):

model.fit(X_train,y_train) #model will be declared as global variable in views.py
score = model.score(X_test, y_test)
print(f'The score by {model} is {score}')

return model, score

def save_model(self,model,filename):
try:
joblib.dump(model,filename)
print("Model saved to the disk")
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True

# parallelizing datafor faster
# instead of taking whole dataset, take one by input



195 changes: 195 additions & 0 deletions iids/classifier/nnclassifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#neural net classifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable


class CNN(nn.Module):
def __init__(self, in_dim, n_class):
super(CNN, self).__init__()

self.conv = nn.Sequential(
nn.Conv2d(in_dim, 6, 3, stride=1, padding=1),
nn.BatchNorm2d(6),
nn.ReLU(True),
nn.Conv2d(6, 16, 3, stride=1, padding=0),
nn.BatchNorm2d(16),
nn.ReLU(True),
nn.MaxPool2d(2, 2)
)

self.fc = nn.Sequential(
nn.Linear(144, 512),
nn.Linear(512, 256),
nn.Linear(256, n_class)
)

def forward(self, x):
out = self.conv(x)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out


class RNNModel(nn.Module):

def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNNModel, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')

self.fc = nn.Linear(hidden_dim, output_dim)

def forward(self, x):

h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))


out, hn = self.rnn(x, h0)
out = self.fc(out[:, -1, :])
return out




class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder,self).__init__()

self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=5),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=5),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(16,6,kernel_size=5),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=5),
nn.ReLU(True))

def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x


class SOM(nn.Module):

def __init__(self, m, n, dim, niter, alpha=None, sigma=None):
super(SOM, self).__init__()
self.m = m
self.n = n
self.dim = dim
self.niter = niter
if alpha is None:
self.alpha = 0.3
else:
self.alpha = float(alpha)
if sigma is None:
self.sigma = max(m, n) / 2.0
else:
self.sigma = float(sigma)

self.weights = torch.randn(m*n, dim)
self.locations = torch.LongTensor(np.array(list(self.neuron_locations())))
self.pdist = nn.PairwiseDistance(p=2)

def get_weights(self):
return self.weights

def get_locations(self):
return self.locations

def neuron_locations(self):
for i in range(self.m):
for j in range(self.n):
yield np.array([i, j])

def map_vects(self, input_vects):
to_return = []
for vect in input_vects:
min_index = min([i for i in range(len(self.weights))],
key=lambda x: np.linalg.norm(vect-self.weights[x]))
to_return.append(self.locations[min_index])

return to_return

def forward(self, x, it):
dists = self.pdist(torch.stack([x for i in range(self.m*self.n)]), self.weights)
_, bmu_index = torch.min(dists, 0)
bmu_loc = self.locations[bmu_index,:]
bmu_loc = bmu_loc.squeeze()


# Neural network parameters " would be collected from JSON Config"
batch_size = 128
learning_rate = 1e-2
num_epoches = 5
USE_GPU = torch.cuda.is_available()

model = CNN(1, 23) #would be set in config
#model = SOM(1, 23)
#model = RNN(1, 23)
#model = AE(1, 23)

def neural_train():

global model

if USE_GPU:
model = model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epoches):
print('epoch {}'.format(epoch + 1))
print('*' * 10)
running_loss = 0.0
running_acc = 0.0
for i, data in enumerate(dataset.train_dataloader, 1):
img, label = data
if USE_GPU:
img = img.cuda()
label = label.cuda()
img = Variable(img)
label = Variable(label)
# Spread forward
out = model(img)
loss = criterion(out, label)
running_loss += loss.item() * label.size(0)
_, pred = torch.max(out, 1)
num_correct = (pred == label).sum()
accuracy = (pred == label).float().mean()
running_acc += num_correct.item()
# Spread backward
optimizer.zero_grad()
loss.backward()
optimizer.step()

print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
epoch + 1, running_loss / (len(dataset.train_dataset)), running_acc / (len(
dataset.train_dataset))))
model.eval()
eval_loss = 0
eval_acc = 0
for data in dataset.test_dataloader:
img, label = data
if USE_GPU:
img = Variable(img, volatile=True).cuda()
label = Variable(label, volatile=True).cuda()
else:
img = Variable(img, volatile=True)
label = Variable(label, volatile=True)
out = model(img)
loss = criterion(out, label)
eval_loss += loss.item() * label.size(0)
_, pred = torch.max(out, 1)
num_correct = (pred == label).sum()
eval_acc += num_correct.item()
print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
dataset.test_dataset)), eval_acc / (len(dataset.test_dataset))))
torch.save(model, filepath)
print()
Loading