MGXlab
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎dnngior/MSEED_reactions.py‎
Lines changed: 1 addition & 1 deletion b/‎dnngior/MSEED_reactions.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dnngior/NN_Predictor.py‎
Lines changed: 32 additions & 6 deletions b/‎dnngior/NN_Predictor.py‎
Lines changed: 32 additions & 6 deletions
diff --git a/‎dnngior/NN_Trainer.py‎
Lines changed: 45 additions & 22 deletions b/‎dnngior/NN_Trainer.py‎
Lines changed: 45 additions & 22 deletions
@@ -27,7 +27,7 @@ import dnngior.gapfill_class.Gapfill
 Gapfill(path_to_model)
 ```
 
-You may find examples of gap-filling a genome scale reconstruction (GEM) with `dnngior` with a complete or a defined medium in this [example notebook](tutorials/example.ipynb). `dnngior` can gapfill both ModelSEED and BiGG models, to gapfill BiGG models you need to specify modeltype. 
+You may find examples of gap-filling a genome scale reconstruction (GEM) with `dnngior` with a complete or a defined medium in this [example notebook](tutorials/gapfilling_example.ipynb). `dnngior` can gapfill both ModelSEED and BiGG models, to gapfill BiGG models you need to specify modeltype. 
 
 ```python
 Gapfill(path_to_BiGG_model, modeltype='BiGG')
@@ -48,12 +48,13 @@ Alternatively you can find additional custom Neural Networks for several taxonom
 
 ## License
 
+
 Please see [License](LICENSE)
 
 
 ## Cite
 
-The paper that will accompany the tool is currrently available as preprint:\
-https://www.biorxiv.org/content/10.1101/2023.07.10.548314v2
+The paper that will accompany the tool is can be found here:\
+https://www.cell.com/iscience/fulltext/S2589-0042(24)02574-4
 
 
@@ -132,7 +132,7 @@ def parseStoichOnt(self, stoichiometry):
 
         #For empty reaction
         if(stoichiometry == ""):
-            return rxn_cpds_array
+            return rxn_cpds_dict
 
         for rgt in stoichiometry.split(";"):
             (coeff, cpd, cpt, index, name) = rgt.split(":", 4)
 
@@ -7,19 +7,40 @@
 import pandas as pd
 import cobra.core.model as cobra_model
 from dnngior.reaction_class import Reaction
+from dnngior.variables import *
 import os
 import sys
 from math import exp
 from pathlib import Path
 
 class NN:
-    def __init__(self, modeltype=None, path=None, custom=None):
+    def __init__(self, path=None, modeltype=None, custom=None):
         '''
         Light version of the model, saves space, uses only numpy and cobra and no tensorflow
         '''
 
-        self.path=path
-        self.__get_pseudo_network()
+        if custom:
+            self.network   = custom[0]
+            self.modeltype = custom[1]
+            self.rxn_keys  = custom[2]
+        else:
+            if path:
+                self.path=path
+
+            elif modeltype:
+                if modeltype == 'ModelSEED':
+                    self.path = TRAINED_NN_MSEED
+                elif modeltype == 'BiGG':
+                    self.path = TRAINED_NN_BIGG
+                else:
+                    print("Modeltype: {} not recognized, defaulting to ModelSEED".format(modeltype))
+                    self.path = TRAINED_NN_MSEED
+            else:
+                print("No path or modeltype provided, defaulting to ModelSEED")
+                self.path = TRAINED_NN_MSEED
+            self.__get_pseudo_network()
+
+
 
 
             #Function that loads the Neural network; path is path to .h5 file
@@ -53,10 +74,10 @@ def predict(self, input):
             #check if reaction class
             input2 = self.__convert_reaction_list(set(input.reactions))
         elif isinstance(input, pd.DataFrame):
-            input.reindex(self.rxn_keys)
+            input1b = input.reindex(self.rxn_keys).fillna(0.0)
             df_columns = input.columns
             #Transpose because rows need to be different models for the network
-            input2 = np.asarray(input.T)
+            input2 = np.asarray(input1b.T)
         elif isinstance(input, dict):
             #check if dictionary, get list of reactions and convert
             input2 = self.__convert_reaction_list([i for i in input if input[i]==1])
@@ -75,6 +96,9 @@ def predict(self, input):
         else:
             single_input=False
 
+        if not input2.shape[1] == self.network[0][0].shape[0]:
+            raise Exception("Input size ({}) does not match network ({})".format(input2.shape[1], len(self.rxn_keys)))
+
         a = input2
         for layer in self.network:
             a = a.clip(0)
@@ -85,6 +109,8 @@ def predict(self, input):
             prediction = dict(zip(self.rxn_keys, np.squeeze(prediction)))
         if isinstance(input, pd.DataFrame):
             prediction = pd.DataFrame(index=self.rxn_keys, columns=df_columns, data=prediction.T)
+            if len(prediction.index) != len(input.index):
+                print('Warning mismatch input vs prediction ({})'.format(len(prediction.index) - len(input.index)))
         return prediction
 
     #function that generates a binary input based on a list of reaction ids
@@ -106,7 +132,7 @@ def __convert_reaction_list(self, reaction_set):
                     b_input.append(1)
                 else:
                     b_input.append(0)
-            print("#reactions not found in keys: ", len(set(reaction_set)) - sum(b_input), '/', len(reaction_set))
+            print("#reactions not found in NN-keys: ", len(set(reaction_set)) - sum(b_input), '/', len(reaction_set))
         except:
             raise Exception("Conversion failed")
 
 
@@ -17,7 +17,7 @@
 import sys
 from tensorflow import compat, config, dtypes
 
-import dnngior.NN_Predictor
+from dnngior.NN_Predictor import NN
 
 # Tensorflow; please consider: https://www.tensorflow.org/api_docs/python/tf/compat/v1/disable_eager_execution
 compat.v1.disable_eager_execution()
@@ -31,7 +31,7 @@ def noise_data(i, noise_0, noise_1, del_p, con_p):
         ----------
         i : numpy array, required
             an array of 0s and 1s which you want to noisify
-        noise_0 : numpy array, required
+        noise_0 : numpy array, requiredimport dnngior.NN_Predictor
             fraction of 0s to change to 1s
         noise_1 : numpy array, required
             fraction of 1s to change to 0s
@@ -73,7 +73,7 @@ def noise_data(i, noise_0, noise_1, del_p, con_p):
     o = temp
     return o
 
-def generate_training_set(data,nuplo, min_con, max_con, min_for, max_for, del_p, con_p):
+def generate_feature(data, nuplo, min_con, max_con, min_for, max_for, del_p, con_p):
     """
     Function to generate the dataset for training (feature).
         PARAMETERS:
@@ -150,7 +150,7 @@ def custom_loss(y_true, y_pred): #y_true is the label, y_pred is the prediction
         return bias*(1-y_true)*loss+(1-bias)*y_true*loss # return the biased loss y_true are all cases where prediction shouold be 1, 1-y_true all cases where prediction should be one, can scale between these two classes
     return custom_loss
 
-def train(data, modeltype,rxn_keys=None,labels = None,validation_split=0.0,nuplo=30, min_con=0, max_con=0, min_for=0.05, max_for=0.3, con_p=None, del_p = None, nlayers=1, nnodes=256,  nepochs=10, b_size=32, dropout=0.1, bias_0=0.3, maskI=True, save=False, name='noname', output_path='', return_history=False):
+def train(data, modeltype,rxn_keys=None,labels = None,validation_split=0.0,nuplo=30, min_con=0, max_con=0, min_for=0.05, max_for=0.3, con_p=None, del_p = None, nlayers=1, nnodes=256,  nepochs=10, b_size=32, dropout=0.1, bias_0=0.3, maskI=True, save=True, output_path='dnngior_predictor.npz', return_history=False, return_full_network=False):
     """
         Most important function, creates actual NN, there are many optional parameters
 
@@ -168,7 +168,7 @@ def train(data, modeltype,rxn_keys=None,labels = None,validation_split=0.0,nuplo
 
         TRAINING PARAMETERS:
         -------
-        see generate_training_set() ^
+        see generate_feature() ^
 
         NETWORK PARAMETERS
         -------------
@@ -200,39 +200,57 @@ def train(data, modeltype,rxn_keys=None,labels = None,validation_split=0.0,nuplo
         SAVING PARAMETERS:
 
         save: boolean, optional
-            Whether you want to save the network, default = False
-        name: string, optional
-            name of your network, default='noname'
+            Whether you want to save the network, default = True
         output_path: string,
-            where output, default=''
+            Where to save the network, file_extension that work are .h5 and .npz
+            all other file_extensions defailt to npz (lite network)
+            default='dnngior_predictor.npz'
+
+        OPTIONAL RETURNS:
+
         return_history: boolean, optional
             If you want training history
-
+            default = False
+        return_full_network: boolean, optional
+            if you want to return the lite_network or full tensorflow object
+            default = False
        Returns:
         -------------
         trainedNN
             NN class containing network, rxn_keys and modeltype
-        history: type, if history=True
+        history: if history=True
             history of training
     """
 
     print("Num GPUs Available: ", len(config.list_physical_devices('GPU')))
 
+    if os.path.exists(output_path):
+        print("# WARNING: overwriting savefile")
+    elif os.access(os.path.dirname(output_path), os.W_OK):
+        print("Saving network at: {}".format(output_path))
+    else:
+        Exception("Can not save at: {}".format(output_path))
+
     if(isinstance(data, pd.DataFrame)):
         rxn_keys = data.index
         ndata = np.asarray(data, dtype=np.float32).T
     elif rxn_keys is None:
         raise(Exception('Provide DataFrame or rxn_keys'))
 
-    #create feature and labels from training data
+    #create feature from training data
     if(labels is None):
-        labels = np.repeat(np.copy(ndata), nuplo, axis=0).astype(np.float32)
+        feature = np.repeat(np.copy(ndata), nuplo, axis=0).astype(np.float32)
         print('using data as labels')
     else:
-        labels = np.repeat(np.copy(labels), nuplo, axis=0).astype(np.float32)
+        if(isinstance(labels, pd.DataFrame)):
+            rxn_keys = data.index
+            nlabels = np.asarray(labels, dtype=np.float32).T
+        else:
+            nlabels = labels.astype(np.float32).T
+        feature = np.repeat(np.copy(nlabels), nuplo, axis=0).astype(np.float32)
         print("using user provided labels")
 
-    train_data = generate_training_set(ndata, nuplo, min_con, max_con, min_for, max_for, del_p, con_p)
+    train_data = generate_feature(ndata, nuplo, min_con, max_con, min_for, max_for, del_p, con_p)
 
     print('dataset created')
     nmodels, nreactions = ndata.shape
@@ -253,23 +271,28 @@ def train(data, modeltype,rxn_keys=None,labels = None,validation_split=0.0,nuplo
     #print summary of model
     network.summary()
     #train model, history can be used to observe training
-    history = network.fit(train_data, labels, validation_split = validation_split, epochs = nepochs, shuffle=True, batch_size = b_size, verbose=1)
+    history = network.fit(train_data, feature, validation_split = validation_split, epochs = nepochs, shuffle=True, batch_size = b_size, verbose=1)
     pseudo_network = []
     for i in range(0, len(network.layers),2):
         pseudo_network.append(network.layers[i].get_weights())
     pseudo_network = np.asarray(pseudo_network, dtype=object)
     #save Network
     if(save):
-        if(save == 'h5'):
-            network_path = os.path.join(output_path, "{}.h5".format(name))
-            with h5py.File(model_path, mode='w') as f:
+        if(output_path.endswith('.h5')):
+            with h5py.File(output_path, mode='w') as f:
                 network.save(f)
                 f.attrs['modeltype'] = modeltype
                 f.create_dataset("rxn_keys", data =[n.encode("ascii", "ignore") for n in rxn_keys])
         else:
-            network_path = os.path.join(output_path, "{}.npz".format(name))
-            np.savez(network_path,network=pseudo_network, modeltype=modeltype,rxn_keys=rxn_keys)
-    trainedNN = NN_Predictor.NN(custom=[pseudo_network,rxn_keys,modeltype])
+            if not output_path.endswith('.npz'):
+                file_extension = output_path.split('.')[-1]
+                print('{} not recognized, saving as .npz (lite) instead'.format(file_extension))
+                output_path.replace(file_extension, '.npz')
+            np.savez(output_path,network=pseudo_network, modeltype=modeltype,rxn_keys=rxn_keys)
+    if return_full_network:
+        trainedNN = NN(custom=[network,modeltype,rxn_keys])
+    else:
+        trainedNN = NN(custom=[pseudo_network,modeltype,rxn_keys])
     if return_history:
         return trainedNN, history
     else: