diff --git a/README.md b/README.md
index 0f5a90b..fb273bf 100644
--- a/README.md
+++ b/README.md
@@ -65,3 +65,7 @@ You can specify arguments for training, or it will follow the default sets in th
 
 ### Testing ###
 Use the `testing/test_physics_metrics.py` for now.
+please make sure that training and testing are configured with the same nn model.
+```
+cp training/models.py testing
+```
diff --git a/testing/models.py b/testing/models.py
new file mode 100644
index 0000000..117ffeb
--- /dev/null
+++ b/testing/models.py
@@ -0,0 +1,230 @@
+"""
+Save models here
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+import torch_geometric.nn as pyg_nn
+import torch_geometric.utils as pyg_utils
+import math
+from math import pi
+
+
+class GNNStack(torch.nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, args):
+        # since we do not need phi and eta in x
+        input_dim = input_dim - 2
+
+        super(GNNStack, self).__init__()
+        conv_model = self.build_conv_model(args.model_type)
+        self.convs = nn.ModuleList()
+        self.convs.append(conv_model(input_dim, hidden_dim))
+        assert (args.num_layers >= 1), 'Number of layers is not >=1'
+        for l in range(args.num_layers - 1):
+            self.convs.append(conv_model(hidden_dim, hidden_dim))
+
+        # post-message-passing
+        self.post_mp = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(
+            ), nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, output_dim),
+        )
+        self.lamb = args.lamb
+        self.grlparam = args.grlparam
+        self.grl = GradientReverseLayer(self.grlparam)
+        self.post_da = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(
+            ), nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),
+            nn.Linear(hidden_dim, output_dim),
+        )
+
+        self.dropout = args.dropout
+        self.beta_one = nn.parameter.Parameter(torch.rand(1))
+        self.num_layers = args.num_layers
+
+        
+    def build_conv_model(self, model_type):
+        if model_type == 'GraphSage':
+            return GraphSage
+        elif model_type == 'Gated':
+            return Gated_model
+
+    def forward(self, data):
+        num_feature = data.num_feature_actual[0].item()
+        original_x = data.x[:, 0:num_feature]
+        train_mask = data.x[:, num_feature]
+        train_default = data.x[:, (num_feature + 1):]
+
+        x = torch.transpose(original_x, 0, 1) * (1 - train_mask) + \
+            torch.transpose(train_default, 0, 1) * train_mask
+        x = torch.transpose(x, 0, 1)
+
+        edge_index = data.edge_index
+        eta_pos = 0
+        phi_pos = 1
+        eta = x[:, eta_pos]
+        phi = x[:, phi_pos]
+        # x = x[:, 2:-1]
+        x = x[:, 2:]
+
+        # x = self.before_mp(x)
+
+        for layer in self.convs:
+            x = layer(x, edge_index, eta, phi)
+            # x = self.batchnorm(x)
+            x = F.relu(x)
+            x = F.dropout(x, self.dropout, training=self.training)
+
+        x_cl = self.post_mp(x)
+        x_da = self.grl(x)
+        x_da = self.post_da(x_da)
+
+        x_cl = torch.sigmoid(x_cl)
+        x_da = torch.sigmoid(x_da)
+
+        return x_cl, x_da
+
+    def loss(self, pred, label, domain_discrimiant, domain_label):
+        # loss = nn.CrossEntropyLoss()
+        """
+        weight = torch.zeros_like(label)
+        weight[label == 1] = 2
+        weight[label == 0] = 0.2
+        """
+        loss = nn.BCELoss()
+        eps = 1e-10
+        temp = loss(pred + eps, label)
+        temp_da = loss(domain_discrimiant + eps, domain_label)
+        return temp + self.lamb*temp_da
+
+
+class GraphSage(pyg_nn.MessagePassing):
+
+    def __init__(self, in_channels, out_channels, reducer='meansacct',
+                 normalize_embedding=True):
+        super(GraphSage, self).__init__(aggr='mean')
+        in_channels = in_channels
+        self.lin = torch.nn.Linear(2 * in_channels + 1, out_channels)
+        self.agg_lin = torch.nn.Linear(
+            out_channels + 2 * in_channels + 1, out_channels)
+
+        if normalize_embedding:
+            self.normalize_emb = True
+
+    def forward(self, x, edge_index, eta, phi):
+        # x = torch.cat((x, eta.view(-1, 1)), dim=1)
+        # x = torch.cat((x, phi.view(-1, 1)), dim=1)
+        num_nodes = x.size(0)
+        return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)
+
+    def message(self, x_j, edge_index, size, x_i, x):
+        self.node_count = torch.tensor(size[0]).type(torch.float32).to('cuda')
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x_i.size()[0], 1)
+        x_g = x_g.repeat(x_i.size()[0], 1)
+        x_j = torch.cat((x_j, x_g, log_count), dim=1)
+        x_j = self.lin(x_j)
+        return x_j
+
+    def update(self, aggr_out, x):
+        x_g = torch.mean(x, dim=0)
+        x_g = x_g.repeat(x.size()[0], 1)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x.size()[0], 1)
+        aggr_out = torch.cat((aggr_out, x, x_g, log_count), dim=-1)
+        aggr_out = self.agg_lin(aggr_out)
+        aggr_out = F.relu(aggr_out)
+        if self.normalize_emb:
+            aggr_out = F.normalize(aggr_out, p=2, dim=-1)
+
+        return aggr_out
+
+class GradientReverseFunction(Function):
+
+    @staticmethod
+    def forward(ctx, x, coeff = 1.):
+        ctx.coeff = coeff
+        output = x * 1.0
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output.neg() * ctx.coeff, None
+
+
+class GradientReverseLayer(nn.Module):
+    def __init__(self,coeff):
+        super(GradientReverseLayer, self).__init__()
+        self.coeff = coeff
+    def forward(self, x):
+        return GradientReverseFunction.apply(x,self.coeff)
+
+class Gated_model(pyg_nn.MessagePassing):
+    def __init__(self, in_channels, out_channels, normalize_embedding=True):
+        super(Gated_model, self).__init__(aggr='mean')
+        # last sclar = d_eta, d_phi, d_R, append x_i, x_j, x_g so * 3，+1 for log count
+        new_x_input = 3 * (in_channels) + 3 + 1
+        self.x_dim = new_x_input
+        self.lin_m2 = torch.nn.Linear(new_x_input, 1)
+        # also append x and x_g, so + 2 * in_channels, +1 for log count in the global node
+        self.lin_m5 = torch.nn.Linear(new_x_input + 2 * in_channels + 1, 1)
+        self.lin_m5_g1 = torch.nn.Linear(in_channels, out_channels)
+        self.lin_m5_g2 = torch.nn.Linear(new_x_input, out_channels)
+
+    def forward(self, x, edge_index, eta, phi):
+        num_nodes = x.size(0)
+        x = torch.cat((x, eta.view(-1, 1)), dim=1)
+        x = torch.cat((x, phi.view(-1, 1)), dim=1)
+        return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)
+
+    def message(self, x_j, x_i, edge_index, size, x):
+        self.node_count = torch.tensor(size[0]).type(torch.float32).to('cuda')
+        # eta at position 0
+        dif_eta_phi = x_j[:, -2: x_j.size()[1]] - x_i[:, -2: x_i.size()[1]]
+
+        # make sure delta within 2pi
+        indices = dif_eta_phi[:, 1] > pi
+        temp = torch.ceil(
+            (dif_eta_phi[:, 1][indices] - pi) / (2 * pi)) * (2 * pi)
+        dif_eta_phi[:, 1][indices] = dif_eta_phi[:, 1][indices] - temp
+
+        delta_r = torch.sum(torch.sqrt(dif_eta_phi ** 2), dim=1).reshape(-1, 1)
+
+        x = x[:, 0:-2]
+        x_i = x_i[:, 0:-2]
+        x_j = x_j[:, 0:-2]
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x_i.size()[0], 1)
+        x_g = x_g.repeat(x_i.size()[0], 1)
+        x_j = torch.cat((x_j, x_i, x_g, dif_eta_phi,
+                        delta_r, log_count), dim=1)
+        M_1 = self.lin_m2(x_j)
+        M_2 = torch.sigmoid(M_1)
+        x_j = x_j * M_2
+        return x_j
+
+    def update(self, aggr_out, x):
+        x = x[:, 0:-2]
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x.size()[0], 1)
+        x_g = x_g.repeat(x.size()[0], 1)
+        aggr_out_temp = aggr_out
+        aggr_out = torch.cat((aggr_out, x, x_g, log_count), dim=1)
+        aggr_out = torch.sigmoid(self.lin_m5(aggr_out))
+        aggr_out = F.relu(aggr_out * self.lin_m5_g1(x) +
+                          (1 - aggr_out) * self.lin_m5_g2(aggr_out_temp))
+        return aggr_out
diff --git a/testing/test_physics_metrics.py b/testing/test_physics_metrics.py
index 29a5c03..3f168db 100644
--- a/testing/test_physics_metrics.py
+++ b/testing/test_physics_metrics.py
@@ -142,15 +142,19 @@ class Args(object):
 
     def __init__(self, model_type='Gated', do_boost=False, extralayers=False):
         self.model_type = model_type
-        self.num_layers = 3
+        self.num_layers = 6
         self.batch_size = 1
-        self.hidden_dim = 20
-        self.dropout = 0
+        self.hidden_dim = 33
+        self.dropout = 0.3
         self.opt = 'adam'
         self.weight_decay = 0
-        self.lr = 0.01
+        self.lr = 5.4e-4
         self.do_boost = do_boost
         self.extralayers = extralayers
+        self.nLV=1
+        self.nPU=23,
+        self.lamb = 1.77e-5,
+        self.grlparam = 0,
 
 
 class PerformanceMetrics(object):
@@ -165,7 +169,7 @@ def __init__(self):
         dR_diff = 0.
 
 
-def clusterJets(pt, eta, phi, ifAK8, ptcut=0., deltaR=0.4):
+def clusterJets(pt, eta, phi, ifAK8, ptcut=0., deltaR=0.4, chg = False):
     """
     cluster the jets based on the array of pt, eta, phi,
     of all particles (masses are assumed to be zero),
@@ -185,7 +189,9 @@ def clusterJets(pt, eta, phi, ifAK8, ptcut=0., deltaR=0.4):
     Ptmin = 30
     if ifAK8:
         Ptmin = 300
-    jets = sequence.inclusive_jets(ptmin=Ptmin)
+    if chg:
+        Ptmin = 150
+    jets = sequence.inclusive_jets(ptmin=200)
     #charged only
     #jets = sequence.inclusive_jets(ptmin=20)
 
@@ -296,6 +302,7 @@ def postProcessing(data, preds,ifAK8):
     pt = np.array(data.x[:, 2].cpu().detach())
     eta = np.array(data.x[:, 0].cpu().detach())
     phi = np.array(data.x[:, 1].cpu().detach())
+    pdgId = np.array(data.x[:, 3].cpu().detach())
     puppi = np.array(data.pWeight.cpu().detach())
     puppichg = np.array(data.pWeightchg.cpu().detach())
     # puppi = np.array(data.x[:,data.num_feature_actual[0].item()-1].cpu().detach())
@@ -304,6 +311,7 @@ def postProcessing(data, preds,ifAK8):
     pt_truth = np.array(data.GenPart_nump[:, 2].cpu().detach())
     eta_truth = np.array(data.GenPart_nump[:, 0].cpu().detach())
     phi_truth = np.array(data.GenPart_nump[:, 1].cpu().detach())
+    pdgId_truth = np.array(data.GenPart_nump[:, 3].cpu().detach())
     # print (pt)
 
     # print(truth)
@@ -311,8 +319,7 @@ def postProcessing(data, preds,ifAK8):
     # pred2 = np.array(pred2[:, 0].cpu().detach())
     # set all particle masses to zero
     mass = np.zeros(pt.shape[0])
-
-    
+       
 
     # remove pt < 0.5 particles
     pt[pt < 0.01] = 0
@@ -334,7 +341,14 @@ def postProcessing(data, preds,ifAK8):
 
     # truth information
     # pt_truth   = pt * truth
-
+    pt_chsinfo = np.array(data.x[:, 2].cpu().detach())
+    pt_truth_chsinfo = np.array(data.GenPart_nump[:, 2].cpu().detach())
+    pt_chsinfo[neutral_index] = 0
+    pt_truth_chsinfo[Gencharge==0] = 0
+    pt_chsinfo = pt_chsinfo*puppi
+    jets_truth_chsinfo = clusterJets(pt_truth_chsinfo, eta_truth, phi_truth, ifAK8)
+    jets_chsinfo = clusterJets(pt_chsinfo, eta, phi, ifAK8)
+    performances_jet_chs_info = compareJets(jets_truth_chsinfo, jets_chsinfo)
     
     if testneu == 1:
         chargeOnly = 0
@@ -344,6 +358,19 @@ def postProcessing(data, preds,ifAK8):
     if chargeOnly == 1 :
        pt[neutral_index] = 0
        pt_truth[Gencharge==0] = 0
+    
+    #chg test
+    ptchg = []
+    for i in range(len(pt)):
+        ptchg.append(pt[i])
+    ptchg = np.array(ptchg)
+    ptchg[neutral_index] = 0
+    ptchg_chs = ptchg * puppi
+     #photon only
+    #pt[pdgId==130] = 0
+    #pt_truth[pdgId_truth==130] = 0
+
+      
 
     # puppi information
     if testneu == 1:
@@ -384,6 +411,10 @@ def postProcessing(data, preds,ifAK8):
     jets_puppi = clusterJets(pt_puppi, eta, phi, ifAK8)
     njets_puppi, pt_jets_puppi, eta_jets_puppi, phi_jets_puppi, mass_jets_puppi = ExtractJet(jets_puppi)
     performances_jet_puppi = compareJets(jets_truth, jets_puppi)
+    
+    #chg test
+    jets_chg_chs = clusterJets(ptchg_chs, eta, phi, ifAK8, chg=True)
+    performances_jet_chg_chs = compareJets(jets_truth, jets_chg_chs)
 
     jets_puppi_wcut = clusterJets(pt_puppi_wcut, eta, phi, ifAK8)
     njets_pf, pt_jets_pf, eta_jets_pf, phi_jets_pf, mass_jets_pf = ExtractJet(jets_puppi_wcut)
@@ -469,7 +500,7 @@ def postProcessing(data, preds,ifAK8):
     
     
 
-    return met_truth,performances_jet_CHS, performances_jet_puppi, met_puppi, performances_jet_puppi_wcut, met_puppi_wcut, performances_jet_pred, mets_pred, neu_pred, neu_puppi, chlv_pred, chpu_pred, chlv_puppi, chpu_puppi, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS
+    return met_truth,performances_jet_chs_info,performances_jet_chg_chs,performances_jet_CHS, performances_jet_puppi, met_puppi, performances_jet_puppi_wcut, met_puppi_wcut, performances_jet_pred, mets_pred, neu_pred, neu_puppi, chlv_pred, chpu_pred, chlv_puppi, chpu_puppi, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS
 
 
 def test(filelists, models={}):
@@ -477,10 +508,12 @@ def test(filelists, models={}):
     for model in models.values():
         model.to('cuda:0')
         model.eval()
-
+ 
+    performances_jet_chs_info = []
     performances_jet_puppi = []
     performances_jet_CHS = []
     performances_jet_puppi_wcut = []
+    performances_jet_chg_chs = []
 
     mets_truth = []
     mets_puppi = []
@@ -538,8 +571,8 @@ def test(filelists, models={}):
         data = DataLoader(dataset, batch_size=1)
         loader = data
 
-        ifAK8 = 0
-        if (ifile=="data_pickle/dataset_graph_puppi_test_Wjets4000"):
+        ifAK8 = 1
+        if (ifile=="../data_pickle/dataset_graph_puppi_test_Zjets4000"):
             ifAK8 = 1
         
 
@@ -565,13 +598,15 @@ def test(filelists, models={}):
                     # print("pred here: ", pred)
                     preds.append(pred)
 
-                met_truth, perfs_jet_CHS, perfs_jet_puppi, met_puppi, perfs_jet_puppi_wcut, met_puppi_wcut, perfs_jet_pred, mets_fromF_pred, neus_pred, neus_puppi, chlvs_pred, chpus_pred, chlvs_puppi, chpus_puppi, Njets_pf, Njets_pred, Njets_puppi, Njets_truth, Njets_CHS, Pt_jets_pf, Pt_jets_pred, Pt_jets_puppi, Pt_jets_truth, Pt_jets_CHS, Eta_jets_pf, Eta_jets_pred, Eta_jets_puppi, Eta_jets_truth, Eta_jets_CHS, Phi_jets_pf, Phi_jets_pred, Phi_jets_puppi, Phi_jets_truth, Phi_jets_CHS, Mass_jets_pf, Mass_jets_pred, Mass_jets_puppi, Mass_jets_truth, Mass_jets_CHS = postProcessing(
+                met_truth,perfs_jet_chs_info, perfs_jet_chg_chs, perfs_jet_CHS, perfs_jet_puppi, met_puppi, perfs_jet_puppi_wcut, met_puppi_wcut, perfs_jet_pred, mets_fromF_pred, neus_pred, neus_puppi, chlvs_pred, chpus_pred, chlvs_puppi, chpus_puppi, Njets_pf, Njets_pred, Njets_puppi, Njets_truth, Njets_CHS, Pt_jets_pf, Pt_jets_pred, Pt_jets_puppi, Pt_jets_truth, Pt_jets_CHS, Eta_jets_pf, Eta_jets_pred, Eta_jets_puppi, Eta_jets_truth, Eta_jets_CHS, Phi_jets_pf, Phi_jets_pred, Phi_jets_puppi, Phi_jets_truth, Phi_jets_CHS, Mass_jets_pf, Mass_jets_pred, Mass_jets_puppi, Mass_jets_truth, Mass_jets_CHS = postProcessing(
                     data, preds, ifAK8)
                 # perfs_jet_puppi, perfs_jet_puppi_wcut, perfs_jet_pred, perfs_jet_pred2, met_truth, met_puppi, met_puppi_wcut, met_pred, met_pred2 = postProcessing(data, preds)
-
+                
+                performances_jet_chs_info += perfs_jet_chs_info
                 performances_jet_puppi += perfs_jet_puppi
                 performances_jet_CHS += perfs_jet_CHS
                 performances_jet_puppi_wcut += perfs_jet_puppi_wcut
+                performances_jet_chg_chs += perfs_jet_chg_chs
                 # performances_jet_pred += perfs_jet_pred
                 # performances_jet_pred2 += perfs_jet_pred2
 
@@ -640,7 +675,7 @@ def test(filelists, models={}):
 
         fp.close()
 
-    return mets_truth, performances_jet_CHS, performances_jet_puppi, mets_puppi, performances_jet_puppi_wcut, mets_puppi_wcut, performances_jet_pred, mets_pred, neu_weight, neu_puppiweight, chlv_weight, chpu_weight, chlv_puppiweight, chpu_puppiweight, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS
+    return mets_truth,performances_jet_chs_info, performances_jet_chg_chs, performances_jet_CHS, performances_jet_puppi, mets_puppi, performances_jet_puppi_wcut, mets_puppi_wcut, performances_jet_pred, mets_pred, neu_weight, neu_puppiweight, chlv_weight, chpu_weight, chlv_puppiweight, chpu_puppiweight, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS
 
 
 def main(modelname, filelists):
@@ -655,7 +690,7 @@ def main(modelname, filelists):
 
     # run the tests
     #filelists = ["../data_pickle/dataset_graph_puppi_test_40004000"]
-    mets_truth, performances_jet_CHS, performances_jet_puppi, mets_puppi, performances_jet_puppi_wcut, mets_puppi_wcut, performances_jet_pred, mets_pred, neu_weight, neu_puppiweight, chlv_weight, chpu_weight, chlv_puppiweight, chpu_puppiweight, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS = test(
+    mets_truth,performances_jet_chs_info,performances_jet_chg_chs, performances_jet_CHS, performances_jet_puppi, mets_puppi, performances_jet_puppi_wcut, mets_puppi_wcut, performances_jet_pred, mets_pred, neu_weight, neu_puppiweight, chlv_weight, chpu_weight, chlv_puppiweight, chpu_puppiweight, njets_pf, njets_pred, njets_puppi, njets_truth, njets_CHS, pt_jets_pf, pt_jets_pred, pt_jets_puppi, pt_jets_truth, pt_jets_CHS, eta_jets_pf, eta_jets_pred, eta_jets_puppi, eta_jets_truth, eta_jets_CHS, phi_jets_pf, phi_jets_pred, phi_jets_puppi, phi_jets_truth, phi_jets_CHS, mass_jets_pf, mass_jets_pred, mass_jets_puppi, mass_jets_truth, mass_jets_CHS = test(
         filelists, modelcolls)
 
     # plot the differences
@@ -694,6 +729,16 @@ def getStat(input):
                          for perf in performances_jet_CHS])
     plt.hist(mass_diff, bins=40, range=(-1, 1), histtype='step', color='orange', linewidth=linewidth, 
              density=True, label=r'CHS, $\mu={:10.2f}$, $\sigma={:10.2f}$, counts:'.format(*(getStat(mass_diff)))+str(len(mass_diff)))
+    if testneu:
+        mass_diff = np.array([getattr(perf, "mass_diff")
+                         for perf in performances_jet_chs_info])
+        plt.hist(mass_diff, bins=40, range=(-1, 1), histtype='step', color='purple', linewidth=linewidth, 
+             density=True, label=r'CHS chgonly, $\mu={:10.2f}$, $\sigma={:10.2f}$, counts:'.format(*(getStat(mass_diff)))+str(len(mass_diff)))
+
+    #mass_diff = np.array([getattr(perf, "mass_diff")
+    #                     for perf in performances_jet_chg_chs])
+    #plt.hist(mass_diff, bins=40, range=(-1, 1), histtype='step', color='purple', linewidth=linewidth, 
+    #         density=True, label=r'CHS,charged, $\mu={:10.2f}$, $\sigma={:10.2f}$, counts:'.format(*(getStat(mass_diff)))+str(len(mass_diff)))
     # plt.xlim(-1.0,1.3)
     plt.xlabel(r"Jet Mass $(m_{reco} - m_{truth})/m_{truth}$")
     plt.ylabel('density')
@@ -1007,6 +1052,10 @@ def getStat(input):
                        for perf in performances_jet_CHS])
     plt.hist(pt_diff, bins=40, range=(-0.3, 0.3), histtype='step', color='orange', linewidth=linewidth, 
              density=True, label=r'CHS, $\mu={:10.3f}$, $\sigma={:10.3f}$, counts:'.format(*(getStat(pt_diff)))+str(len(pt_diff)))
+    #pt_diff = np.array([getattr(perf, "pt_diff")
+    #                   for perf in performances_jet_chg_chs])
+    #plt.hist(pt_diff, bins=40, range=(-0.3, 0.3), histtype='step', color='purple', linewidth=linewidth, 
+    #        density=True, label=r'CHS,charged, $\mu={:10.3f}$, $\sigma={:10.3f}$, counts:'.format(*(getStat(pt_diff)))+str(len(pt_diff)))
     # plt.xlim(0,40)
     plt.ylim(0, 10)
     plt.xlabel(r"Jet $p_{T}$ $(p^{reco}_{T} - p^{truth}_{T})/p^{truth}_{T}$")
@@ -1041,6 +1090,6 @@ def getStat(input):
 
 
 if __name__ == '__main__':
-    modelname = "test/best_valid_model_nPU20_deeper.pt"
-    filelists = ["../data_pickle/dataset_graph_puppi_test_Wjets4000"]
+    modelname = "test/best_valid_model_nPU23_JackparamWjets.pt"
+    filelists = ["../data_pickle/dataset_graph_puppi_test_WjetsDR84000"]
     main(modelname, filelists)
diff --git a/testing/utils.py b/testing/utils.py
new file mode 100644
index 0000000..7dbe858
--- /dev/null
+++ b/testing/utils.py
@@ -0,0 +1,521 @@
+import torch
+import torch.optim as optim
+from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
+import matplotlib as mpl
+import imageio
+import matplotlib.pyplot as plt
+import mplhep as hep
+mpl.use("pdf")
+#hep.set_style(hep.style.ROOT)
+hep.style.use(hep.style.ROOT)
+import numpy as np
+import pickle
+from copy import deepcopy
+import random
+import os
+
+class RunningAverage():
+    """A simple class that maintains the running average of a quantity
+    Example:se
+    ```
+    loss_avg = RunningAverage()
+    loss_avg.update(2)
+    loss_avg.update(4)
+    loss_avg() = 3
+    ```
+    """
+
+    def __init__(self):
+        self.steps = 0.
+        self.total = 0.
+
+    def update(self, val):
+        self.total += float(val)
+        self.steps += 1
+
+    def __call__(self):
+        return self.total / float(self.steps)
+
+
+def parse_optimizer(parser):
+    opt_parser = parser.add_argument_group()
+    opt_parser.add_argument('--opt', dest='opt', type=str,
+                            help='Type of optimizer')
+    opt_parser.add_argument('--opt-scheduler', dest='opt_scheduler', type=str,
+                            help='Type of optimizer scheduler. By default none')
+    opt_parser.add_argument('--opt-restart', dest='opt_restart', type=int,
+                            help='Number of epochs before restart (by default set to 0 which means no restart)')
+    opt_parser.add_argument('--opt-decay-step', dest='opt_decay_step', type=int,
+                            help='Number of epochs before decay')
+    opt_parser.add_argument('--opt-decay-rate', dest='opt_decay_rate', type=float,
+                            help='Learning rate decay ratio')
+    opt_parser.add_argument('--lr', dest='lr', type=float,
+                            help='Learning rate.')
+    opt_parser.add_argument('--clip', dest='clip', type=float,
+                            help='Gradient clipping.')
+    opt_parser.add_argument('--weight_decay', type=float,
+                            help='Optimizer weight decay.')
+
+
+def build_optimizer(args, params):
+    weight_decay = args.weight_decay
+    scheduler = None
+    filter_fn = filter(lambda p: p.requires_grad, params)
+    if args.opt == 'adam':
+        optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    elif args.opt == 'sgd':
+        optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
+    elif args.opt == 'rmsprop':
+        optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    elif args.opt == 'adagrad':
+        optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    if args.opt_scheduler == 'none':
+        return None, optimizer
+    elif args.opt_scheduler == 'step':
+        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
+    elif args.opt_scheduler == 'cos':
+        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
+    return scheduler, optimizer
+
+
+def generate_random_mask(nparticles, nselect, LV_index, PU_index):
+    """
+    randomly select nselect particles from LV_index and
+    nselect particles from PU_index,
+    return the indices of the selected
+    """
+    nselect = min(LV_index.shape[0], PU_index.shape[0], nselect)
+
+    # generate the index for LV and PU samples for training mask
+    gen_index_LV = random.sample(range(LV_index.shape[0]), nselect)
+    selected_LV_train = LV_index[gen_index_LV]
+
+    gen_index_PU = random.sample(range(PU_index.shape[0]), nselect)
+    selected_PU_train = PU_index[gen_index_PU]
+
+    training_mask = np.concatenate((selected_LV_train, selected_PU_train), axis=None)
+
+    # construct mask vector for training and testing
+    mask_training = torch.zeros(nparticles, 1)
+    mask_training[training_mask, 0] = 1
+
+    return mask_training
+
+
+def get_acc(truth, prediction, cut=0.5):
+    truth = truth.astype('int32')
+    predict = np.copy(prediction)
+    predict[predict > cut] = 1
+    predict[predict < cut] = 0
+    predict = predict.astype('int32')
+    acc = accuracy_score(truth, predict)
+    return acc
+
+
+def get_auc(truth, prediction):
+    auc = roc_auc_score(truth, prediction)
+    return auc
+
+
+def plot_roc(truths, predictions, legends, postfix="", dir_name='.', saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr, tpr, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+        # print(legend, 'auc (%)', auc*100)
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    plt.xlabel('False positive rate')
+    plt.ylabel('True positive rate')
+    plt.xlim((0, 1))
+    plt.ylim((0, 1))
+    plt.grid()
+    plt.legend(loc=4, fontsize=28)
+    plt.savefig(dir_name + "roc_" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_roc_logscale(truths, predictions, legends, postfix="",dir_name='.',  saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr, tpr, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    #plt.xlabel('False positive rate')
+    #plt.ylabel('True positive rate')
+    plt.xlim([1e-4, 0.01])
+    plt.xscale('log')
+    plt.ylim([1e-4, 1])
+    plt.yscale('log')
+    plt.grid()
+    #plt.legend(loc=4, fontsize=25)
+    plt.savefig(dir_name + "/roc_logscale_cut" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_roc_lowerleft(truths, predictions, legends, postfix="", dir_name = '.',saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    max_tpr = 0
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        truncate_index = np.where(fpr < 0.2)[0].size
+        fpr_truncate = fpr[0:truncate_index]
+        tpr_truncate = tpr[0:truncate_index]
+        cur_tpr = np.max(tpr_truncate)
+        if cur_tpr > max_tpr:
+            max_tpr = cur_tpr
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr_truncate, tpr_truncate, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+        # print(legend, 'auc (%)', auc*100)
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    #plt.xlabel('False positive rate')
+    #plt.ylabel('True positive rate')
+    plt.grid()
+    #plt.legend(loc=4)
+    plt.savefig(dir_name + "/lowerleft_roc_" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_hist2d(pts, weights, yname='Weight', postfix="", dir_name='.'):
+    """
+    plot the (puppi)Weights vs pt 2D histogram
+    """
+    plt.figure()
+    h = plt.hist2d(pts, weights, bins=20, range=[[0, 4.0], [0, 1.0]], norm=mpl.colors.LogNorm())
+    plt.colorbar(h[3])
+    plt.xlabel('p_{T} [GeV]')
+    plt.ylabel(yname)
+    plt.grid()
+    plt.savefig(dir_name + "/hist2d_pt_vs_" + yname.replace(' ', '') + "_" + postfix + ".pdf")
+    plt.close()
+
+
+def plot_discriminator(epoch, vals, legends=['LV', 'PU'], postfix="", label="Discriminator", bins=50, xaxisrange=(0, 1), dir_name='.'):
+    """
+    plot the distriminator distribution
+    """
+    sub_dir = "prob_plots"
+    parent_dir = "./" + dir_name
+
+    path = os.path.join(parent_dir, sub_dir)
+
+    isdir = os.path.isdir(path)
+    if isdir == False:
+        os.mkdir(os.path.join(parent_dir, sub_dir))
+
+    plt.figure()
+    for i in range(len(vals)):
+        val = vals[i]
+        legend = legends[i]
+        plt.hist(val, bins=50, range=xaxisrange, density=True, histtype='step', label=legend)
+    plt.ylabel('A.U.')
+    plt.xlabel(label + str(epoch))
+    plt.legend(loc=4)
+    filename = dir_name + "/prob_plots/Distriminator_" + postfix + "_" + str(epoch) +".png"
+    plt.savefig(filename)
+    plt.close()
+
+    return filename
+
+
+def plot_training(
+        epochs_train, epochs_test, loss_graph_train,
+        loss_graph, auc_graph_train, train_accuracy, auc_graph_train_puppi, train_accuracy_puppi,
+        loss_graph_test, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi,
+        auc_graph_neu_train, auc_graph_train_puppi_neu,
+        auc_graph_neu_test, auc_graph_test_puppi_neu,
+        postfix=".pdf", dir_name='.'):
+    # print(epochs_train)
+    # print(epochs_test)
+    # print(loss_graph)
+    # print(auc_graph_test)
+
+    # loss
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, loss_graph_train, label = 'train_avg', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, loss_graph_test, label='Validation', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_train, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, auc_graph_test, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_train_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_neu_train, label="Neu train", linestyle='solid', linewidth=1,
+             color='orange')
+    plt.plot(epochs_test, auc_graph_neu_test, label="neu Validation", linestyle='solid', linewidth=1, color='cyan')
+    plt.plot(epochs_test, auc_graph_train_puppi_neu, label="PUPPI Neu train", linestyle='dashed',
+             linewidth=1, color='m')
+    plt.plot(epochs_test, auc_graph_test_puppi_neu, label="PUPPI Neu Validation", linestyle='solid', linewidth=1,
+             color='m')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, train_accuracy, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, test_accuracy, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, train_accuracy_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_training_fullsim(
+        epochs_train, epochs_test, loss_graph_train,
+        loss_graph, auc_graph_train, train_accuracy, auc_graph_train_puppi, train_accuracy_puppi,
+        loss_graph_test, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi,
+        postfix=".pdf", dir_name='.'):
+    # print(epochs_train)
+    # print(epochs_test)
+    # print(loss_graph)
+    # print(auc_graph_test)
+
+    # loss
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, loss_graph_train, label = 'train_avg', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, loss_graph_test, label='Validation', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_train, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, auc_graph_test, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_train_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, train_accuracy, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, test_accuracy, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, train_accuracy_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_testing(epochs_test,
+        loss_graph_test, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi, dir_name
+        ):
+    # loss
+    postfix = "_test.pdf"
+    plt.figure()
+    plt.plot(epochs_test, loss_graph_test, label='test', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_test, label="test", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI test", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, test_accuracy, label="test", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI test", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_kinematics(dataset, dir_name):
+    """
+    plot the kinematic distribution of given particles
+    """
+    pts = None
+    etas = None
+    phis = None
+    chgs = None
+    fromLVs = None
+    weights = None
+    chgMasks = None
+    neuMasks = None
+
+    isfirst = True
+    for graph in dataset:
+        num_features = graph.num_feature_actual
+        features = graph.x.cpu().numpy()
+        mask = features[:, num_features]
+        mask_neu = graph.mask_neu[:, 0].cpu().numpy()
+        pt = features[:, 2]
+        eta = features[:, 0]
+        phi = features[:, 1]
+        chg = features[:, 3]
+        fromLV = features[:, 4]
+        weight = features[:, 5]
+
+        if not isfirst:
+            chgMasks = np.concatenate((chgMasks, mask), 0)
+            neuMasks = np.concatenate((neuMasks, mask_neu), 0)
+            pts = np.concatenate((pts, pt), 0)
+            etas = np.concatenate((etas, eta), 0)
+            phis = np.concatenate((phis, phi), 0)
+            chgs = np.concatenate((chgs, chg), 0)
+            fromLVs = np.concatenate((fromLVs, fromLV), 0)
+            weights = np.concatenate((weights, weight), 0)
+        else:
+            chgMasks = mask
+            neuMasks = mask_neu
+            pts = pt
+            etas = eta
+            phis = phi
+            chgs = chg
+            fromLVs = fromLV
+            weights = weight
+            isfirst = False
+
+    chgMasks = chgMasks.astype(int)
+    neuMasks = neuMasks.astype(int)
+
+    plt.figure()
+    plt.hist(pts[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(pts[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.yscale('log')
+    plt.xlabel('p_{T} [GeV]')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "pt.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(etas[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(etas[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('eta')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/eta.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(phis[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(phis[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('phi')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/phi.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(fromLVs[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(fromLVs[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('fromLVs')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/fromLVs.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(chgs[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(chgs[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('charge')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/charge.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(weights[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(weights[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('weights')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/weights.pdf")
+    plt.close()
+
+
+def make_gif(figures, postfix="Train", dir_name='.'):
+    """
+    make the fig based on the list of figures
+    """
+    with imageio.get_writer(dir_name + '/result_' + postfix + ".gif") as writer:
+        for fig in figures:
+            image = imageio.imread(fig)
+            writer.append_data(image)
diff --git a/training/models.py b/training/models.py
new file mode 100644
index 0000000..117ffeb
--- /dev/null
+++ b/training/models.py
@@ -0,0 +1,230 @@
+"""
+Save models here
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+import torch_geometric.nn as pyg_nn
+import torch_geometric.utils as pyg_utils
+import math
+from math import pi
+
+
+class GNNStack(torch.nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, args):
+        # since we do not need phi and eta in x
+        input_dim = input_dim - 2
+
+        super(GNNStack, self).__init__()
+        conv_model = self.build_conv_model(args.model_type)
+        self.convs = nn.ModuleList()
+        self.convs.append(conv_model(input_dim, hidden_dim))
+        assert (args.num_layers >= 1), 'Number of layers is not >=1'
+        for l in range(args.num_layers - 1):
+            self.convs.append(conv_model(hidden_dim, hidden_dim))
+
+        # post-message-passing
+        self.post_mp = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(
+            ), nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, output_dim),
+        )
+        self.lamb = args.lamb
+        self.grlparam = args.grlparam
+        self.grl = GradientReverseLayer(self.grlparam)
+        self.post_da = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(
+            ), nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),nn.Dropout(args.dropout),
+            nn.Linear(hidden_dim, hidden_dim),nn.ReLU(
+            ),
+            nn.Linear(hidden_dim, output_dim),
+        )
+
+        self.dropout = args.dropout
+        self.beta_one = nn.parameter.Parameter(torch.rand(1))
+        self.num_layers = args.num_layers
+
+        
+    def build_conv_model(self, model_type):
+        if model_type == 'GraphSage':
+            return GraphSage
+        elif model_type == 'Gated':
+            return Gated_model
+
+    def forward(self, data):
+        num_feature = data.num_feature_actual[0].item()
+        original_x = data.x[:, 0:num_feature]
+        train_mask = data.x[:, num_feature]
+        train_default = data.x[:, (num_feature + 1):]
+
+        x = torch.transpose(original_x, 0, 1) * (1 - train_mask) + \
+            torch.transpose(train_default, 0, 1) * train_mask
+        x = torch.transpose(x, 0, 1)
+
+        edge_index = data.edge_index
+        eta_pos = 0
+        phi_pos = 1
+        eta = x[:, eta_pos]
+        phi = x[:, phi_pos]
+        # x = x[:, 2:-1]
+        x = x[:, 2:]
+
+        # x = self.before_mp(x)
+
+        for layer in self.convs:
+            x = layer(x, edge_index, eta, phi)
+            # x = self.batchnorm(x)
+            x = F.relu(x)
+            x = F.dropout(x, self.dropout, training=self.training)
+
+        x_cl = self.post_mp(x)
+        x_da = self.grl(x)
+        x_da = self.post_da(x_da)
+
+        x_cl = torch.sigmoid(x_cl)
+        x_da = torch.sigmoid(x_da)
+
+        return x_cl, x_da
+
+    def loss(self, pred, label, domain_discrimiant, domain_label):
+        # loss = nn.CrossEntropyLoss()
+        """
+        weight = torch.zeros_like(label)
+        weight[label == 1] = 2
+        weight[label == 0] = 0.2
+        """
+        loss = nn.BCELoss()
+        eps = 1e-10
+        temp = loss(pred + eps, label)
+        temp_da = loss(domain_discrimiant + eps, domain_label)
+        return temp + self.lamb*temp_da
+
+
+class GraphSage(pyg_nn.MessagePassing):
+
+    def __init__(self, in_channels, out_channels, reducer='meansacct',
+                 normalize_embedding=True):
+        super(GraphSage, self).__init__(aggr='mean')
+        in_channels = in_channels
+        self.lin = torch.nn.Linear(2 * in_channels + 1, out_channels)
+        self.agg_lin = torch.nn.Linear(
+            out_channels + 2 * in_channels + 1, out_channels)
+
+        if normalize_embedding:
+            self.normalize_emb = True
+
+    def forward(self, x, edge_index, eta, phi):
+        # x = torch.cat((x, eta.view(-1, 1)), dim=1)
+        # x = torch.cat((x, phi.view(-1, 1)), dim=1)
+        num_nodes = x.size(0)
+        return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)
+
+    def message(self, x_j, edge_index, size, x_i, x):
+        self.node_count = torch.tensor(size[0]).type(torch.float32).to('cuda')
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x_i.size()[0], 1)
+        x_g = x_g.repeat(x_i.size()[0], 1)
+        x_j = torch.cat((x_j, x_g, log_count), dim=1)
+        x_j = self.lin(x_j)
+        return x_j
+
+    def update(self, aggr_out, x):
+        x_g = torch.mean(x, dim=0)
+        x_g = x_g.repeat(x.size()[0], 1)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x.size()[0], 1)
+        aggr_out = torch.cat((aggr_out, x, x_g, log_count), dim=-1)
+        aggr_out = self.agg_lin(aggr_out)
+        aggr_out = F.relu(aggr_out)
+        if self.normalize_emb:
+            aggr_out = F.normalize(aggr_out, p=2, dim=-1)
+
+        return aggr_out
+
+class GradientReverseFunction(Function):
+
+    @staticmethod
+    def forward(ctx, x, coeff = 1.):
+        ctx.coeff = coeff
+        output = x * 1.0
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output.neg() * ctx.coeff, None
+
+
+class GradientReverseLayer(nn.Module):
+    def __init__(self,coeff):
+        super(GradientReverseLayer, self).__init__()
+        self.coeff = coeff
+    def forward(self, x):
+        return GradientReverseFunction.apply(x,self.coeff)
+
+class Gated_model(pyg_nn.MessagePassing):
+    def __init__(self, in_channels, out_channels, normalize_embedding=True):
+        super(Gated_model, self).__init__(aggr='mean')
+        # last sclar = d_eta, d_phi, d_R, append x_i, x_j, x_g so * 3，+1 for log count
+        new_x_input = 3 * (in_channels) + 3 + 1
+        self.x_dim = new_x_input
+        self.lin_m2 = torch.nn.Linear(new_x_input, 1)
+        # also append x and x_g, so + 2 * in_channels, +1 for log count in the global node
+        self.lin_m5 = torch.nn.Linear(new_x_input + 2 * in_channels + 1, 1)
+        self.lin_m5_g1 = torch.nn.Linear(in_channels, out_channels)
+        self.lin_m5_g2 = torch.nn.Linear(new_x_input, out_channels)
+
+    def forward(self, x, edge_index, eta, phi):
+        num_nodes = x.size(0)
+        x = torch.cat((x, eta.view(-1, 1)), dim=1)
+        x = torch.cat((x, phi.view(-1, 1)), dim=1)
+        return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)
+
+    def message(self, x_j, x_i, edge_index, size, x):
+        self.node_count = torch.tensor(size[0]).type(torch.float32).to('cuda')
+        # eta at position 0
+        dif_eta_phi = x_j[:, -2: x_j.size()[1]] - x_i[:, -2: x_i.size()[1]]
+
+        # make sure delta within 2pi
+        indices = dif_eta_phi[:, 1] > pi
+        temp = torch.ceil(
+            (dif_eta_phi[:, 1][indices] - pi) / (2 * pi)) * (2 * pi)
+        dif_eta_phi[:, 1][indices] = dif_eta_phi[:, 1][indices] - temp
+
+        delta_r = torch.sum(torch.sqrt(dif_eta_phi ** 2), dim=1).reshape(-1, 1)
+
+        x = x[:, 0:-2]
+        x_i = x_i[:, 0:-2]
+        x_j = x_j[:, 0:-2]
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x_i.size()[0], 1)
+        x_g = x_g.repeat(x_i.size()[0], 1)
+        x_j = torch.cat((x_j, x_i, x_g, dif_eta_phi,
+                        delta_r, log_count), dim=1)
+        M_1 = self.lin_m2(x_j)
+        M_2 = torch.sigmoid(M_1)
+        x_j = x_j * M_2
+        return x_j
+
+    def update(self, aggr_out, x):
+        x = x[:, 0:-2]
+        x_g = torch.mean(x, dim=0)
+        log_count = torch.log(self.node_count)
+        log_count = log_count.repeat(x.size()[0], 1)
+        x_g = x_g.repeat(x.size()[0], 1)
+        aggr_out_temp = aggr_out
+        aggr_out = torch.cat((aggr_out, x, x_g, log_count), dim=1)
+        aggr_out = torch.sigmoid(self.lin_m5(aggr_out))
+        aggr_out = F.relu(aggr_out * self.lin_m5_g1(x) +
+                          (1 - aggr_out) * self.lin_m5_g2(aggr_out_temp))
+        return aggr_out
diff --git a/training/test_physics_metrics.py b/training/test_physics_metrics.py
index 6a4c40b..433a409 100644
--- a/training/test_physics_metrics.py
+++ b/training/test_physics_metrics.py
@@ -179,7 +179,7 @@ def clusterJets(pt, eta, phi, ptcut=0., deltaR=0.4):
     event = np.column_stack((pt_wptcut, eta_wptcut, phi_wptcut, mass_wptcut))
     event.dtype = DTYPE_PTEPM
     sequence = cluster(event, R=deltaR, p=-1)
-    jets = sequence.inclusive_jets(ptmin=300)
+    jets = sequence.inclusive_jets(ptmin=150)
     #charged only
     #jets = sequence.inclusive_jets(ptmin=20)
 
@@ -330,7 +330,7 @@ def postProcessing(data, preds):
     # pt_truth   = pt * truth
 
     
-    if testneu == 1:
+    if testneu == 1: 
         chargeOnly = 0
     else:
         chargeOnly = 1
@@ -687,7 +687,7 @@ def getStat(input):
     # plt.xlim(-1.0,1.3)
     plt.xlabel(r"Jet Mass $(m_{reco} - m_{truth})/m_{truth}$")
     plt.ylabel('density')
-    plt.ylim(0, 3.6)
+    plt.ylim(0, 6)
     plt.rc('legend', fontsize=fontsize)
     plt.legend()
     plt.savefig("Jet_mass_diff.pdf")
@@ -946,7 +946,7 @@ def getStat(input):
     plt.hist(pt_diff, bins=40, range=(-0.3, 0.3), histtype='step', color='orange', linewidth=linewidth, 
              density=True, label=r'CHS, $\mu={:10.3f}$, $\sigma={:10.3f}$, counts:'.format(*(getStat(pt_diff)))+str(len(pt_diff)))
     # plt.xlim(0,40)
-    plt.ylim(0, 7)
+    plt.ylim(0, 10)
     plt.xlabel(r"Jet $p_{T}$ $(p^{reco}_{T} - p^{truth}_{T})/p^{truth}_{T}$")
     plt.ylabel('density')
     plt.rc('legend', fontsize=fontsize)
@@ -981,4 +981,4 @@ def getStat(input):
 if __name__ == '__main__':
     modelname = "test/best_valid_model_nPU20_deeper.pt"
     filelists = ["../data_pickle/dataset_graph_puppi_test_Zjets4000"]
-    main(modelname, filelists)
+    main(modelname, filelists)
\ No newline at end of file
diff --git a/training/train_semi.py b/training/train_semi.py
index 2be2995..76723af 100644
--- a/training/train_semi.py
+++ b/training/train_semi.py
@@ -5,6 +5,7 @@
 import pickle
 import random
 import numpy as np
+import torch.nn as nn
 import argparse
 import torch
 from torch_geometric.data import DataLoader
@@ -43,6 +44,14 @@ def arg_parse():
                         help='Number of training epochs')
     parser.add_argument('--pulevel', type=int,
                         help='pileup level for the dataset')
+    parser.add_argument('--nLV', type=int,
+                        help='LV for the dataset')
+    parser.add_argument('--nPU', type=int,
+                        help='PU for the dataset')
+    parser.add_argument('--lamb', type=float,
+                        help='lambda for domain adaptation')
+    parser.add_argument('--grlparam', type=float,
+                        help='grl param for domain adaptation')
     parser.add_argument('--training_path', type=str,
                         help='path for training graphs')
     parser.add_argument('--validation_path', type=str,
@@ -51,46 +60,69 @@ def arg_parse():
                         help='directory to save trained model and plots')
 
     parser.set_defaults(model_type='Gated',
-                        num_layers=3,
+                        num_layers=6,
                         batch_size=4,
-                        hidden_dim=20,
-                        dropout=0,
+                        hidden_dim=33,
+                        dropout=0.3,
                         opt='adam',
                         weight_decay=0,
-                        lr=0.001,
+                        lr=5.4e-4,
                         pulevel=80,
-                        training_path="../data_pickle/dataset_graph_puppi_20000",
-                        validation_path="../data_pickle/dataset_graph_puppi_val_4000",
-                        save_dir="test",
+                        nLV=1,
+                        nPU=23,
+                        lamb = 5e-3, #1.77e-3,
+                        grlparam = 1, #1:Open DANN, 0:Close DANN
+                        training_path="../data_pickle/dataset_graph_puppi_WjetsDR820000",
+                        validation_path="../data_pickle/dataset_graph_puppi_val_WjetsDR84000",
+                        save_dir="testZDR8_conv7_DANN_JackBestParamLV1_WjetsLdx500",
                         )
 
     return parser.parse_args()
 
 
-def train(dataset, dataset_validation, args, batchsize):
+def train(dataset, dataset_validation, trial, args, batchsize, tunning):
     directory = args.save_dir
     # parent_dir = "/home/gpaspala/new_Pileup_GNN/Pileup_GNN/fast_simulation/"
     # path = os.path.join(parent_dir, directory)
     path = directory
     isdir = os.path.isdir(path)
-
+    print("args:"+str(args.dropout))
     if isdir == False:
         os.mkdir(path)
 
     start = timer()
 
-    rotate_mask = 5
-    # rotate_mask = 2
-    if args.pulevel == 20:
-        rotate_mask = 8
-        num_select_LV = 3
-        num_select_PU = 45
-    elif args.pulevel == 80:
-        num_select_LV = 5
-        num_select_PU = 11
+    if tunning:
+
+       args.dropout = trial.suggest_float("dropout", 0.0, 0.1, step=0.025)
+       rotate_mask = trial.suggest_int("rotate_mask", 7, 12)
+    
+       num_select_LV = trial.suggest_int("num_select_LV", 2, 10)
+       num_select_PU = trial.suggest_int("num_select_PU", 5, 20)
+    
+    
+       g = [args.dropout, rotate_mask, num_select_LV, num_select_PU]
+       s = ""
+       for i in range(len(g)):
+           s = s + str(g[i]) + " "
+       f = open("datphys6dropout.txt", "w")
+       f.write(s)
+       f.close()
+
     else:
-        num_select_LV = 6
-        num_select_PU = 282
+
+       rotate_mask = 12
+       # rotate_mask = 2
+       if args.pulevel == 20:
+           rotate_mask = 8
+           num_select_LV = 3
+           num_select_PU = 45
+       elif args.pulevel == 80:
+           num_select_LV = args.nLV
+           num_select_PU = args.nPU
+       else:
+           num_select_LV = 6
+           num_select_PU = 282
 
     generate_mask(dataset, rotate_mask, num_select_LV, num_select_PU)
     generate_mask(dataset_validation, 1, num_select_LV, num_select_PU)
@@ -100,6 +132,12 @@ def train(dataset, dataset_validation, args, batchsize):
 
     model = models.GNNStack(
         dataset[0].num_feature_actual, args.hidden_dim, 1, args)
+
+    if torch.cuda.device_count() > 1:
+      print("Let's use", torch.cuda.device_count(), "GPUs!")
+      # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
+      n_gpu = torch.cuda.device_count()
+      model = nn.DataParallel(model)
     model.to(device)
     scheduler, opt = utils.build_optimizer(args, model.parameters())
 
@@ -115,6 +153,7 @@ def train(dataset, dataset_validation, args, batchsize):
     loss_graph_train = []
     loss_graph_train_hybrid = []
     loss_graph_valid = []
+    loss_graph_valid_hybrid = []
     auc_graph_train = []
     auc_graph_train_hybrid = []
     auc_graph_valid = []
@@ -135,6 +174,10 @@ def train(dataset, dataset_validation, args, batchsize):
     valid_accuracy_puppi_neu = []
     train_fig_names = []
     valid_fig_names = []
+    train_metricPUPPI = []
+    train_metricSSL = []
+    valid_metricPUPPI = []
+    valid_metricSSL = []
 
     train_graph_SSLMassdiffMu = []
     train_graph_PUPPIMassdiffMu = []
@@ -156,6 +199,8 @@ def train(dataset, dataset_validation, args, batchsize):
 
     count_event = 0
     best_validation_auc = 0
+    best_validation_SSLMassSigma = 999
+    best_valid_SSLMassdiffMu = 0
     converge = False
     converge_num_event = 0
     last_steady_event = 0
@@ -182,24 +227,31 @@ def train(dataset, dataset_validation, args, batchsize):
                                      feature_with_mask[:, -num_feature:]), 1)
                 batch = batch.to(device)
 
-                pred, _ = model.forward(batch)
+                pred, d_da = model.forward(batch)
 
                 label = batch.y
+                label_da = batch.random_mask_neu[:, 0]
+
                 train_mask = batch.x[:, num_feature]
                 # print("train mask: ", torch.sum(train_mask))
                 if train_mask_all != None:
                     train_mask_all = torch.cat((train_mask_all, train_mask), 0)
                 else:
                     train_mask_all = train_mask
-
+                
+                NeutralTag = np.zeros(len(label))
+                NeutralTag[batch.Neutral_index] = 1  # Tell which is neu
                 label = label[train_mask == 1]
                 label = label.type(torch.float)
                 label = label.view(-1, 1)
                 pred = pred[train_mask == 1]
-
+                label_da = label_da[(train_mask == 1) | (batch.random_mask_neu[:, 0] == 1 )]
+                label_da = label_da.type(torch.float)
+                label_da = label_da.view(-1, 1)
+                d_da = d_da[(train_mask == 1) | (batch.random_mask_neu[:, 0] == 1 )]
                 # print("pred: ", pred)
                 # print("label: ", label)
-                loss = model.loss(pred, label)
+                loss = model.loss(pred, label, d_da, label_da)
                 cur_loss += loss.item()
                 opt.zero_grad()
                 loss.backward()
@@ -241,6 +293,7 @@ def train(dataset, dataset_validation, args, batchsize):
                 loss_graph_valid.append(valid_loss)
                 loss_graph_train.append(training_loss)
                 loss_graph_train_hybrid.append(training_loss_hybrid)
+                loss_graph_valid_hybrid.append(valid_loss_hybrid)
                 auc_graph_train_puppi.append(train_puppi_auc)
                 auc_graph_valid_puppi.append(valid_puppi_auc)
                 auc_graph_train.append(train_auc)
@@ -270,6 +323,8 @@ def train(dataset, dataset_validation, args, batchsize):
                 train_graph_SSLPtSigma.append(train_SSLPtSigma)
                 train_graph_PUPPIPtdiffMu.append(train_PUPPIPtdiffMu)
                 train_graph_PUPPIPtSigma.append(train_PUPPIPtSigma)
+                train_metricPUPPI.append(valid_PUPPIMassSigma/(1-abs(valid_PUPPIMassdiffMu)))
+                train_metricSSL.append(valid_SSLMassSigma/(1-abs(valid_SSLMassdiffMu)))
 
                 valid_graph_SSLMassdiffMu.append(valid_SSLMassdiffMu)
                 valid_graph_PUPPIMassdiffMu.append(valid_PUPPIMassdiffMu)
@@ -279,12 +334,21 @@ def train(dataset, dataset_validation, args, batchsize):
                 valid_graph_SSLPtSigma.append(valid_SSLPtSigma)
                 valid_graph_PUPPIPtdiffMu.append(valid_PUPPIPtdiffMu)
                 valid_graph_PUPPIPtSigma.append(valid_PUPPIPtSigma)
-
-                if valid_auc > best_validation_auc:
-                    best_validation_auc = valid_auc
-                    print("model is saved in " + path + "/best_valid_model_nPU11_deeper.pt")
-                    torch.save(model.state_dict(), path +
-                               "/best_valid_model_nPU11_deeper.pt")
+                valid_metricPUPPI.append(valid_PUPPIMassSigma/(1-abs(valid_PUPPIMassdiffMu)))
+                valid_metricSSL.append(valid_SSLMassSigma/(1-abs(valid_SSLMassdiffMu)))
+
+                #if valid_auc > best_validation_auc:
+                    #best_validation_auc = valid_auc
+                if (valid_SSLMassSigma/(1-abs(valid_SSLMassdiffMu))) < (best_validation_SSLMassSigma/(1-abs(best_valid_SSLMassdiffMu))):
+                    best_validation_SSLMassSigma = valid_SSLMassSigma 
+                    best_valid_SSLMassdiffMu = valid_SSLMassdiffMu 
+                    print("model is saved in " + path + "/best_valid_model_nPU11_deeper_Zjets.pt")
+                    if isinstance(model, torch.nn.DataParallel):
+                       model_state_dict = model.module.state_dict()
+                    else:
+                       model_state_dict = model.state_dict()
+                    torch.save(model_state_dict, path +
+                               "/best_valid_model_nPU11_deeper_Zjets.pt")
 
                 if valid_loss >= lowest_valid_loss:
                     print(
@@ -303,8 +367,9 @@ def train(dataset, dataset_validation, args, batchsize):
                 else:
                     print("lowest valid loss " + str(valid_loss))
                     lowest_valid_loss = valid_loss
-
-                if count_event == 5000:
+                if count_event % 5000 == 0:
+                    args.lr = args.lr*0.8
+                if count_event == 35000:
                     converge = True
                     break
 
@@ -314,10 +379,10 @@ def train(dataset, dataset_validation, args, batchsize):
     training_time = end - start
     print("training time " + str(training_time))
 
-    utils.plot_training(epochs_train, epochs_valid, loss_graph_train,
+    utils.plot_training(epochs_train, epochs_valid, loss_graph_train, loss_graph_train_hybrid,
                         loss_graph, auc_graph_train, train_accuracy_neu, auc_graph_train_puppi,
-                        train_accuracy_puppi_neu,
-                        loss_graph_valid, auc_graph_valid, valid_accuracy_neu, auc_graph_valid_puppi,
+                        train_accuracy_puppi_neu, train_metricPUPPI, valid_metricPUPPI, train_metricSSL, valid_metricSSL,
+                        loss_graph_valid, loss_graph_valid_hybrid, auc_graph_valid, valid_accuracy_neu, auc_graph_valid_puppi,
                         valid_accuracy_puppi_neu,
                         auc_graph_neu_train, auc_graph_train_puppi_neu,
                         auc_graph_neu_valid, auc_graph_valid_puppi_neu, dir_name=args.save_dir
@@ -400,7 +465,8 @@ def test(loader, model, indicator, epoch, args, modelcolls, pathname):
         with torch.no_grad():
             num_feature = data.num_feature_actual[0].item()
             test_mask = data.x[:, num_feature]
-
+            mask_neu = data.mask_neu[:, 0]
+            random_mask_neu = data.random_mask_neu[:, 0]
             data.x = torch.cat(
                 (data.x[:, 0:num_feature], test_mask.view(-1, 1), data.x[:, -num_feature:]), 1)
             data = data.to(device)
@@ -409,6 +475,7 @@ def test(loader, model, indicator, epoch, args, modelcolls, pathname):
             # puppi = data.x[:, data.num_feature_actual[0].item() - 1]
             puppi = data.pWeight
             label = data.y
+            label_da = data.random_mask_neu[:, 0]
 
             if pred_all != None:
                 pred_all = torch.cat((pred_all, pred), 0)
@@ -421,7 +488,7 @@ def test(loader, model, indicator, epoch, args, modelcolls, pathname):
                 puppi_all = puppi
                 label_all = label
 
-            mask_neu = data.mask_neu[:, 0]
+            
 
             if test_mask_all != None:
                 test_mask_all = torch.cat((test_mask_all, test_mask), 0)
@@ -432,12 +499,19 @@ def test(loader, model, indicator, epoch, args, modelcolls, pathname):
 
             label = label[test_mask == 1]
             pred = pred[test_mask == 1]
-            pred_hybrid = pred_hybrid[test_mask == 1]
+            pred_hybrid = pred_hybrid[(test_mask == 1) | (random_mask_neu == 1 )]
             label = label.type(torch.float)
             label = label.view(-1, 1)
-            total_loss += model.loss(pred, label).item() * data.num_graphs
-            total_loss_hybrid += model.loss(pred_hybrid,
-                                            label).item() * data.num_graphs
+            label_da = label_da[(test_mask == 1) | (random_mask_neu == 1 )]
+            label_da = label_da.type(torch.float)
+            label_da = label_da.view(-1, 1)
+            LossBCE = nn.BCELoss()
+            epsi = 1e-10
+            total_loss += LossBCE(pred+epsi, label).item() * data.num_graphs
+            total_loss_hybrid += LossBCE(pred_hybrid, label_da).item() * data.num_graphs
+            #total_loss += model.loss(pred, label, pred_hybrid, label_da).item() * data.num_graphs
+            #total_loss_hybrid += model.loss(pred,
+            #                               label, pred_hybrid, label_da).item() * data.num_graphs
 
     if indicator == 0:
         total_loss /= min(epoch, len(loader.dataset))
@@ -659,7 +733,7 @@ def generate_mask(dataset, num_mask, num_select_LV, num_select_PU):
         graph.num_mask = num_mask
 
 
-def generate_neu_mask(dataset):
+def generate_neu_mask(dataset, args):
     # all neutrals with pt cuts are masked for evaluation
     for graph in dataset:
         nparticles = graph.num_nodes
@@ -668,27 +742,83 @@ def generate_neu_mask(dataset):
         Neutral_feature = graph.x[Neutral_index]
         Neutral_index = Neutral_index[torch.where(
             Neutral_feature[:, 2] > 0.5)[0]]
-
+        nLVPU = args.nLV + args.nPU
+        random_mask_index = np.random.choice(Neutral_index,nLVPU,replace = False)
         mask_neu = torch.zeros(nparticles, 1)
+        random_mask_neu = torch.zeros(nparticles, 1)
         mask_neu[Neutral_index, 0] = 1
+        random_mask_neu[random_mask_index, 0] = 1
         graph.mask_neu = mask_neu
+        graph.random_mask_neu = random_mask_neu
 
     return dataset
-
-
-def main():
+def black_box_function(trial):
     args = arg_parse()
-    print("model type: ", args.model_type)
-
-    # load the constructed graphs
     with open(args.training_path, "rb") as fp:
         dataset = pickle.load(fp)
     with open(args.validation_path, "rb") as fp:
         dataset_validation = pickle.load(fp)
+    generate_neu_mask(dataset,args)
+    generate_neu_mask(dataset_validation,args)
+    return train(dataset, dataset_validation, trial, args, 1, True)
+
+class Configuration(object):
+
+    def __init__(self):
+        args = arg_parse()
+        self.lr = args.lr
+        self.dropout = args.dropout
+        self.batch_size = args.batch_size
+        self.nPU = 7
+        self.nLV = 4
+        self.rotate_mask = 5
+
+    
+
+    def tunningMode(self):
+        mstart = timer()
+        study = optuna.create_study(study_name='Bayesian Optimization with Optuna', direction="minimize")
+        study.optimize(black_box_function, n_trials=9)
+        trial = study.best_trial
+        print('Best quant: {}'.format(trial.value))
+        s = "Best hyperparameters: {}".format(trial.params)
+        s2 = str(trial.number)
+        f = open("datphys6dropout.txt", "w")
+        f.write(s)
+        f.write(s2)
+        f.close()
+        mend = timer()
+        mbotime = mend - mstart
+        print("main time " + str(mbotime))
+        fig1 = optuna.visualization.plot_optimization_history(study, target_name = 'perf_auc mean')
+        fig2 = optuna.visualization.plot_param_importances(study)
+        fig3 = optuna.visualization.plot_edf(study, target_name = 'perf_auc mean')
+        fig1.write_image(file = "opt_history4var.png", format = 'png')
+        fig2.write_image(file = "param_importances4var.png", format='png')
+        fig3.write_image(file = "edf4var.png", format='png')
+
+
+    def trainingMode(self):
+        args = arg_parse()
+        args.dropout = 0.1
+        print("model type: ", args.model_type)
+
+        # load the constructed graphs
+        with open(args.training_path, "rb") as fp:
+           dataset = pickle.load(fp)
+        with open(args.validation_path, "rb") as fp:
+           dataset_validation = pickle.load(fp)
+
+        generate_neu_mask(dataset,args)
+        generate_neu_mask(dataset_validation,args)
+        trial = 1
+        train(dataset, dataset_validation, trial, args, 1, False)
+
+def main():
+
+    config = Configuration()
 
-    generate_neu_mask(dataset)
-    generate_neu_mask(dataset_validation)
-    train(dataset, dataset_validation, args, 1)
+    config.trainingMode()
 
 
 if __name__ == '__main__':
diff --git a/training/utils.py b/training/utils.py
new file mode 100644
index 0000000..611048a
--- /dev/null
+++ b/training/utils.py
@@ -0,0 +1,553 @@
+import torch
+import torch.optim as optim
+from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
+import matplotlib as mpl
+import imageio
+import matplotlib.pyplot as plt
+import mplhep as hep
+mpl.use("pdf")
+#hep.set_style(hep.style.ROOT)
+hep.style.use(hep.style.ROOT)
+import numpy as np
+import pickle
+from copy import deepcopy
+import random
+import os
+
+class RunningAverage():
+    """A simple class that maintains the running average of a quantity
+    Example:se
+    ```
+    loss_avg = RunningAverage()
+    loss_avg.update(2)
+    loss_avg.update(4)
+    loss_avg() = 3
+    ```
+    """
+
+    def __init__(self):
+        self.steps = 0.
+        self.total = 0.
+
+    def update(self, val):
+        self.total += float(val)
+        self.steps += 1
+
+    def __call__(self):
+        return self.total / float(self.steps)
+
+
+def parse_optimizer(parser):
+    opt_parser = parser.add_argument_group()
+    opt_parser.add_argument('--opt', dest='opt', type=str,
+                            help='Type of optimizer')
+    opt_parser.add_argument('--opt-scheduler', dest='opt_scheduler', type=str,
+                            help='Type of optimizer scheduler. By default none')
+    opt_parser.add_argument('--opt-restart', dest='opt_restart', type=int,
+                            help='Number of epochs before restart (by default set to 0 which means no restart)')
+    opt_parser.add_argument('--opt-decay-step', dest='opt_decay_step', type=int,
+                            help='Number of epochs before decay')
+    opt_parser.add_argument('--opt-decay-rate', dest='opt_decay_rate', type=float,
+                            help='Learning rate decay ratio')
+    opt_parser.add_argument('--lr', dest='lr', type=float,
+                            help='Learning rate.')
+    opt_parser.add_argument('--clip', dest='clip', type=float,
+                            help='Gradient clipping.')
+    opt_parser.add_argument('--weight_decay', type=float,
+                            help='Optimizer weight decay.')
+
+
+def build_optimizer(args, params):
+    weight_decay = args.weight_decay
+    scheduler = None
+    filter_fn = filter(lambda p: p.requires_grad, params)
+    if args.opt == 'adam':
+        optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    elif args.opt == 'sgd':
+        optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
+    elif args.opt == 'rmsprop':
+        optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    elif args.opt == 'adagrad':
+        optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
+    if args.opt_scheduler == 'none':
+        return None, optimizer
+    elif args.opt_scheduler == 'step':
+        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
+    elif args.opt_scheduler == 'cos':
+        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
+    return scheduler, optimizer
+
+
+def generate_random_mask(nparticles, nselect, LV_index, PU_index):
+    """
+    randomly select nselect particles from LV_index and
+    nselect particles from PU_index,
+    return the indices of the selected
+    """
+    nselect = min(LV_index.shape[0], PU_index.shape[0], nselect)
+
+    # generate the index for LV and PU samples for training mask
+    gen_index_LV = random.sample(range(LV_index.shape[0]), nselect)
+    selected_LV_train = LV_index[gen_index_LV]
+
+    gen_index_PU = random.sample(range(PU_index.shape[0]), nselect)
+    selected_PU_train = PU_index[gen_index_PU]
+
+    training_mask = np.concatenate((selected_LV_train, selected_PU_train), axis=None)
+
+    # construct mask vector for training and testing
+    mask_training = torch.zeros(nparticles, 1)
+    mask_training[training_mask, 0] = 1
+
+    return mask_training
+
+
+def get_acc(truth, prediction, cut=0.5):
+    truth = truth.astype('int32')
+    predict = np.copy(prediction)
+    predict[predict > cut] = 1
+    predict[predict < cut] = 0
+    predict = predict.astype('int32')
+    acc = accuracy_score(truth, predict)
+    return acc
+
+
+def get_auc(truth, prediction):
+    auc = roc_auc_score(truth, prediction)
+    return auc
+
+
+def plot_roc(truths, predictions, legends, postfix="", dir_name='.', saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr, tpr, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+        # print(legend, 'auc (%)', auc*100)
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    plt.xlabel('False positive rate')
+    plt.ylabel('True positive rate')
+    plt.xlim((0, 1))
+    plt.ylim((0, 1))
+    plt.grid()
+    plt.legend(loc=4, fontsize=28)
+    plt.savefig(dir_name + "roc_" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_roc_logscale(truths, predictions, legends, postfix="",dir_name='.',  saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr, tpr, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    #plt.xlabel('False positive rate')
+    #plt.ylabel('True positive rate')
+    plt.xlim([1e-4, 0.01])
+    plt.xscale('log')
+    plt.ylim([1e-4, 1])
+    plt.yscale('log')
+    plt.grid()
+    #plt.legend(loc=4, fontsize=25)
+    plt.savefig(dir_name + "/roc_logscale_cut" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_roc_lowerleft(truths, predictions, legends, postfix="", dir_name = '.',saveTo=None):
+    """
+    plot the roc based on the truth (label), and the list of predictions
+    """
+    colors = ['b', 'g', 'r', 'm']
+    plt.figure()
+    max_tpr = 0
+    for i in range(len(predictions)):
+        truth = truths[i]
+        prediction = predictions[i]
+        legend = legends[i]
+
+        fpr, tpr, thresholds = roc_curve(truth, prediction)
+        truncate_index = np.where(fpr < 0.2)[0].size
+        fpr_truncate = fpr[0:truncate_index]
+        tpr_truncate = tpr[0:truncate_index]
+        cur_tpr = np.max(tpr_truncate)
+        if cur_tpr > max_tpr:
+            max_tpr = cur_tpr
+        auc = get_auc(truth, prediction)
+        plt.plot(fpr_truncate, tpr_truncate, label=legend + ", auc=" + np.format_float_positional(auc * 100, precision=2) + "%",
+                 linestyle='solid', linewidth=2, color=colors[i])
+        # print(legend, 'auc (%)', auc*100)
+
+        # Save to file if needed
+        if saveTo is not None:
+            file = open(saveTo, 'wb')
+            pickle.dump([fpr, tpr], file)
+            file.close()
+
+    #plt.xlabel('False positive rate')
+    #plt.ylabel('True positive rate')
+    plt.grid()
+    #plt.legend(loc=4)
+    plt.savefig(dir_name + "/lowerleft_roc_" + postfix + ".pdf", bbox_inches='tight')
+    plt.close()
+
+def plot_hist2d(pts, weights, yname='Weight', postfix="", dir_name='.'):
+    """
+    plot the (puppi)Weights vs pt 2D histogram
+    """
+    plt.figure()
+    h = plt.hist2d(pts, weights, bins=20, range=[[0, 4.0], [0, 1.0]], norm=mpl.colors.LogNorm())
+    plt.colorbar(h[3])
+    plt.xlabel('p_{T} [GeV]')
+    plt.ylabel(yname)
+    plt.grid()
+    plt.savefig(dir_name + "/hist2d_pt_vs_" + yname.replace(' ', '') + "_" + postfix + ".pdf")
+    plt.close()
+
+
+def plot_discriminator(epoch, vals, legends=['LV', 'PU'], postfix="", label="Discriminator", bins=50, xaxisrange=(0, 1), dir_name='.'):
+    """
+    plot the distriminator distribution
+    """
+    sub_dir = "prob_plots"
+    parent_dir = "./" + dir_name
+
+    path = os.path.join(parent_dir, sub_dir)
+
+    isdir = os.path.isdir(path)
+    if isdir == False:
+        os.mkdir(os.path.join(parent_dir, sub_dir))
+
+    plt.figure()
+    for i in range(len(vals)):
+        val = vals[i]
+        legend = legends[i]
+        plt.hist(val, bins=50, range=xaxisrange, density=True, histtype='step', label=legend)
+    plt.ylabel('A.U.')
+    plt.xlabel(label + str(epoch))
+    plt.legend(loc=4)
+    filename = dir_name + "/prob_plots/Distriminator_" + postfix + "_" + str(epoch) +".png"
+    plt.savefig(filename)
+    plt.close()
+
+    return filename
+
+
+def plot_training(
+        epochs_train, epochs_test, loss_graph_train, loss_graph_train_hybrid,
+        loss_graph, auc_graph_train, train_accuracy, auc_graph_train_puppi, train_accuracy_puppi,
+        train_metricPUPPI, test_metricPUPPI, train_metricSSL, test_metricSSL,
+        loss_graph_test, loss_graph_test_hybrid, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi,
+        auc_graph_neu_train, auc_graph_train_puppi_neu,
+        auc_graph_neu_test, auc_graph_test_puppi_neu,
+        postfix=".pdf", dir_name='.'):
+    # print(epochs_train)
+    # print(epochs_test)
+    # print(loss_graph)
+    # print(auc_graph_test)
+
+    # loss
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, loss_graph_train, label = 'train_avg', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, loss_graph_test, label='Validation', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # loss
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, loss_graph_train_hybrid, label = 'train_avg', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, loss_graph_test_hybrid, label='Validation', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss_da')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph_da" + postfix)
+    plt.close()
+
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, train_metricPUPPI, label = 'train_PUPPI', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, train_metricSSL, label = 'train_SSL', linestyle = 'solid', linewidth = 1, color = 'r')
+    plt.xlabel('Epochs')
+    plt.ylabel('sigma/(1-|mu|)')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/physic_metrics_train" + postfix)
+    plt.close()
+
+    plt.figure()
+    plt.plot(epochs_test, test_metricPUPPI, label='Valid_PUPPI', linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, test_metricSSL, label='Valid_SSL', linestyle='solid', linewidth=1, color='y')
+    plt.xlabel('Epochs')
+    plt.ylabel('sigma/(1-|mu|)')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/physic_metrics_valid" + postfix)
+    plt.close()
+
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_train, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, auc_graph_test, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_train_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_neu_train, label="Neu train", linestyle='solid', linewidth=1,
+             color='orange')
+    plt.plot(epochs_test, auc_graph_neu_test, label="neu Validation", linestyle='solid', linewidth=1, color='cyan')
+    plt.plot(epochs_test, auc_graph_train_puppi_neu, label="PUPPI Neu train", linestyle='dashed',
+             linewidth=1, color='m')
+    plt.plot(epochs_test, auc_graph_test_puppi_neu, label="PUPPI Neu Validation", linestyle='solid', linewidth=1,
+             color='m')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, train_accuracy, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, test_accuracy, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, train_accuracy_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_training_fullsim(
+        epochs_train, epochs_test, loss_graph_train,
+        loss_graph, auc_graph_train, train_accuracy, auc_graph_train_puppi, train_accuracy_puppi,
+        loss_graph_test, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi,
+        postfix=".pdf", dir_name='.'):
+    # print(epochs_train)
+    # print(epochs_test)
+    # print(loss_graph)
+    # print(auc_graph_test)
+
+    # loss
+    plt.figure()
+    #plt.plot(epochs_train, loss_graph, label='train', linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, loss_graph_train, label = 'train_avg', linestyle = 'solid', linewidth = 1, color = 'g')
+    plt.plot(epochs_test, loss_graph_test, label='Validation', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_train, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, auc_graph_test, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_train_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, train_accuracy, label="train", linestyle='solid', linewidth=1, color='r')
+    plt.plot(epochs_test, test_accuracy, label="Validation", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, train_accuracy_puppi, label="PUPPI train", linestyle='dashed', linewidth=1,
+             color='g')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI Validation", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_testing(epochs_test,
+        loss_graph_test, auc_graph_test, test_accuracy, auc_graph_test_puppi, test_accuracy_puppi, dir_name
+        ):
+    # loss
+    postfix = "_test.pdf"
+    plt.figure()
+    plt.plot(epochs_test, loss_graph_test, label='test', linestyle='solid', linewidth=1, color='b')
+    plt.xlabel('Epochs')
+    plt.ylabel('loss')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/loss_graph" + postfix)
+    plt.close()
+
+    # auc
+    plt.figure()
+    plt.plot(epochs_test, auc_graph_test, label="test", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, auc_graph_test_puppi, label="PUPPI test", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('auc')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/auc_graph_train" + postfix)
+    plt.close()
+
+    # accuracy
+    plt.figure()
+    plt.plot(epochs_test, test_accuracy, label="test", linestyle='solid', linewidth=1, color='b')
+    plt.plot(epochs_test, test_accuracy_puppi, label="PUPPI test", linestyle='solid', linewidth=1,
+             color='g')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/accuracy_graph" + postfix)
+    plt.clf()
+    plt.close()
+
+def plot_kinematics(dataset, dir_name):
+    """
+    plot the kinematic distribution of given particles
+    """
+    pts = None
+    etas = None
+    phis = None
+    chgs = None
+    fromLVs = None
+    weights = None
+    chgMasks = None
+    neuMasks = None
+
+    isfirst = True
+    for graph in dataset:
+        num_features = graph.num_feature_actual
+        features = graph.x.cpu().numpy()
+        mask = features[:, num_features]
+        mask_neu = graph.mask_neu[:, 0].cpu().numpy()
+        pt = features[:, 2]
+        eta = features[:, 0]
+        phi = features[:, 1]
+        chg = features[:, 3]
+        fromLV = features[:, 4]
+        weight = features[:, 5]
+
+        if not isfirst:
+            chgMasks = np.concatenate((chgMasks, mask), 0)
+            neuMasks = np.concatenate((neuMasks, mask_neu), 0)
+            pts = np.concatenate((pts, pt), 0)
+            etas = np.concatenate((etas, eta), 0)
+            phis = np.concatenate((phis, phi), 0)
+            chgs = np.concatenate((chgs, chg), 0)
+            fromLVs = np.concatenate((fromLVs, fromLV), 0)
+            weights = np.concatenate((weights, weight), 0)
+        else:
+            chgMasks = mask
+            neuMasks = mask_neu
+            pts = pt
+            etas = eta
+            phis = phi
+            chgs = chg
+            fromLVs = fromLV
+            weights = weight
+            isfirst = False
+
+    chgMasks = chgMasks.astype(int)
+    neuMasks = neuMasks.astype(int)
+
+    plt.figure()
+    plt.hist(pts[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(pts[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.yscale('log')
+    plt.xlabel('p_{T} [GeV]')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "pt.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(etas[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(etas[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('eta')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/eta.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(phis[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(phis[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('phi')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/phi.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(fromLVs[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(fromLVs[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('fromLVs')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/fromLVs.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(chgs[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(chgs[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('charge')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/charge.pdf")
+    plt.close()
+
+    plt.figure()
+    plt.hist(weights[chgMasks == 1], bins=50, density=True, histtype='step', label='Chg')
+    plt.hist(weights[neuMasks == 1], bins=50, density=True, histtype='step', label='Neu')
+    plt.ylabel('A.U.')
+    plt.xlabel('weights')
+    plt.legend(loc=4)
+    plt.savefig(dir_name + "/weights.pdf")
+    plt.close()
+
+
+def make_gif(figures, postfix="Train", dir_name='.'):
+    """
+    make the fig based on the list of figures
+    """
+    with imageio.get_writer(dir_name + '/result_' + postfix + ".gif") as writer:
+        for fig in figures:
+            image = imageio.imread(fig)
+            writer.append_data(image)